7a351f55a1df0f9559fda5d5035d56b2fba4603b
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1030
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1035 0,
1036
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1038 ~m_386,
1039
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1043
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1048
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1057 m_PPRO,
1058
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1061
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1064
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1067
1068 /* X86_TUNE_USE_MOV0 */
1069 m_K6,
1070
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1073
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1075 m_PENT4,
1076
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1078 m_PPRO,
1079
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1081 ~m_PENT,
1082
1083 /* X86_TUNE_READ_MODIFY */
1084 ~(m_PENT | m_PPRO),
1085
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1089
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1092
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1095
1096 /* X86_TUNE_QIMODE_MATH */
1097 ~0,
1098
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1103 ~m_PPRO,
1104
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1106 0,
1107
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1109 m_PPRO,
1110
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1113
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1117
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1120
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1124
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1129
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1132
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1143
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1145 m_AMDFAM10,
1146
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1151 m_ATHLON_K8,
1152
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1155
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1158
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1161
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1164
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1167
1168 /* X86_TUNE_SHIFT1 */
1169 ~m_486,
1170
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1173
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1176
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1180
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1183
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1186
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1189
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1192
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1195
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1197 ~m_K8,
1198
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1200 m_K8 | m_GENERIC64,
1201
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1204 ~(m_386 | m_486),
1205
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1209
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1211 machines. */
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1213
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1215 than a MOV. */
1216 m_PENT,
1217
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1220 m_PENT,
1221
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1225 m_K6,
1226 };
1227
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1231 ~(m_386 | m_486 | m_PENT | m_K6),
1232
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1234 ~m_386,
1235
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1237 ~(m_386 | m_486),
1238
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1240 ~m_386,
1241
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 ~m_386,
1244 };
1245
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1248
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1252
1253 static enum stringop_alg stringop_alg = no_stringop;
1254
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1257 epilogue code. */
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1259
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1264
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1267
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1269 {
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1274 /* FP registers */
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1277 /* arg pointer */
1278 NON_Q_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1282 SSE_REGS, SSE_REGS,
1283 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1284 MMX_REGS, MMX_REGS,
1285 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1288 SSE_REGS, SSE_REGS,
1289 };
1290
1291 /* The "default" register map used in 32bit mode. */
1292
1293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1294 {
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1302 };
1303
1304 static int const x86_64_int_parameter_registers[6] =
1305 {
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1308 };
1309
1310 static int const x86_64_ms_abi_int_parameter_registers[4] =
1311 {
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1314 };
1315
1316 static int const x86_64_int_return_registers[4] =
1317 {
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1319 };
1320
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1323 {
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1331 };
1332
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1377 numbers.
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1386 */
1387 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1388 {
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1396 };
1397
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1400
1401 rtx ix86_compare_op0 = NULL_RTX;
1402 rtx ix86_compare_op1 = NULL_RTX;
1403 rtx ix86_compare_emitted = NULL_RTX;
1404
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1407
1408 /* Define the structure for the machine field in struct function. */
1409
1410 struct stack_local_entry GTY(())
1411 {
1412 unsigned short mode;
1413 unsigned short n;
1414 rtx rtl;
1415 struct stack_local_entry *next;
1416 };
1417
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1420
1421 [arguments]
1422 <- ARG_POINTER
1423 saved pc
1424
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1427 [saved regs]
1428
1429 [padding1] \
1430 )
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1433 [frame] (
1434 )
1435 [padding2] /
1436 */
1437 struct ix86_frame
1438 {
1439 int nregs;
1440 int padding1;
1441 int va_arg_size;
1442 HOST_WIDE_INT frame;
1443 int padding2;
1444 int outgoing_arguments_size;
1445 int red_zone_size;
1446
1447 HOST_WIDE_INT to_allocate;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset;
1450 HOST_WIDE_INT hard_frame_pointer_offset;
1451 HOST_WIDE_INT stack_pointer_offset;
1452
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov;
1456 };
1457
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel;
1460 /* Asm dialect. */
1461 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1462 /* TLS dialects. */
1463 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1464
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath;
1467
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune;
1470
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch;
1473
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse;
1476
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm;
1479
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer;
1482 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1483
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary;
1486
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost;
1489
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1492
1493 int ix86_section_threshold = 65536;
1494
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix[16];
1497 int internal_label_prefix_len;
1498
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1503
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1507 {
1508 X86_64_NO_CLASS,
1509 X86_64_INTEGER_CLASS,
1510 X86_64_INTEGERSI_CLASS,
1511 X86_64_SSE_CLASS,
1512 X86_64_SSESF_CLASS,
1513 X86_64_SSEDF_CLASS,
1514 X86_64_SSEUP_CLASS,
1515 X86_64_X87_CLASS,
1516 X86_64_X87UP_CLASS,
1517 X86_64_COMPLEX_X87_CLASS,
1518 X86_64_MEMORY_CLASS
1519 };
1520 static const char * const x86_64_reg_class_name[] =
1521 {
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1524 };
1525
1526 #define MAX_CLASSES 4
1527
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1530 static bool ext_80387_constants_init = 0;
1531
1532 \f
1533 static struct machine_function * ix86_init_machine_status (void);
1534 static rtx ix86_function_value (tree, tree, bool);
1535 static int ix86_function_regparm (tree, tree);
1536 static void ix86_compute_frame_layout (struct ix86_frame *);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1538 rtx, rtx, int);
1539
1540 \f
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1542 in memory. */
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1545 #endif
1546
1547 /* Bit flags that specify the ISA we are compiling for. */
1548 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1549
1550 /* A mask of ix86_isa_flags that includes bit X if X
1551 was set or cleared on the command line. */
1552 static int ix86_isa_flags_explicit;
1553
1554 /* Implement TARGET_HANDLE_OPTION. */
1555
1556 static bool
1557 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1558 {
1559 switch (code)
1560 {
1561 case OPT_mmmx:
1562 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1563 if (!value)
1564 {
1565 ix86_isa_flags
1566 &= ~(OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A);
1567 ix86_isa_flags_explicit
1568 |= OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A;
1569 }
1570 return true;
1571
1572 case OPT_m3dnow:
1573 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1574 if (!value)
1575 {
1576 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_A;
1577 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_A;
1578 }
1579 return true;
1580
1581 case OPT_m3dnowa:
1582 return false;
1583
1584 case OPT_msse:
1585 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1586 if (!value)
1587 {
1588 ix86_isa_flags
1589 &= ~(OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3
1590 | OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
1591 | OPTION_MASK_ISA_SSE4A);
1592 ix86_isa_flags_explicit
1593 |= (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3
1594 | OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
1595 | OPTION_MASK_ISA_SSE4A);
1596 }
1597 return true;
1598
1599 case OPT_msse2:
1600 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1601 if (!value)
1602 {
1603 ix86_isa_flags
1604 &= ~(OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3
1605 | OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
1606 ix86_isa_flags_explicit
1607 |= (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSSE3
1608 | OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
1609 }
1610 return true;
1611
1612 case OPT_msse3:
1613 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1614 if (!value)
1615 {
1616 ix86_isa_flags
1617 &= ~(OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
1618 | OPTION_MASK_ISA_SSE4A);
1619 ix86_isa_flags_explicit
1620 |= (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1
1621 | OPTION_MASK_ISA_SSE4A);
1622 }
1623 return true;
1624
1625 case OPT_mssse3:
1626 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1627 if (!value)
1628 {
1629 ix86_isa_flags
1630 &= ~(OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A);
1631 ix86_isa_flags_explicit
1632 |= OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4A;
1633 }
1634 return true;
1635
1636 case OPT_msse4_1:
1637 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1638 if (!value)
1639 {
1640 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A;
1641 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1642 }
1643 return true;
1644
1645 case OPT_msse4a:
1646 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1647 if (!value)
1648 {
1649 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1;
1650 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1651 }
1652 return true;
1653
1654 default:
1655 return true;
1656 }
1657 }
1658
1659 /* Sometimes certain combinations of command options do not make
1660 sense on a particular target machine. You can define a macro
1661 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1662 defined, is executed once just after all the command options have
1663 been parsed.
1664
1665 Don't use this macro to turn on various extra optimizations for
1666 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1667
1668 void
1669 override_options (void)
1670 {
1671 int i;
1672 int ix86_tune_defaulted = 0;
1673 unsigned int ix86_arch_mask, ix86_tune_mask;
1674
1675 /* Comes from final.c -- no real reason to change it. */
1676 #define MAX_CODE_ALIGN 16
1677
1678 static struct ptt
1679 {
1680 const struct processor_costs *cost; /* Processor costs */
1681 const int align_loop; /* Default alignments. */
1682 const int align_loop_max_skip;
1683 const int align_jump;
1684 const int align_jump_max_skip;
1685 const int align_func;
1686 }
1687 const processor_target_table[PROCESSOR_max] =
1688 {
1689 {&i386_cost, 4, 3, 4, 3, 4},
1690 {&i486_cost, 16, 15, 16, 15, 16},
1691 {&pentium_cost, 16, 7, 16, 7, 16},
1692 {&pentiumpro_cost, 16, 15, 16, 7, 16},
1693 {&geode_cost, 0, 0, 0, 0, 0},
1694 {&k6_cost, 32, 7, 32, 7, 32},
1695 {&athlon_cost, 16, 7, 16, 7, 16},
1696 {&pentium4_cost, 0, 0, 0, 0, 0},
1697 {&k8_cost, 16, 7, 16, 7, 16},
1698 {&nocona_cost, 0, 0, 0, 0, 0},
1699 {&core2_cost, 16, 7, 16, 7, 16},
1700 {&generic32_cost, 16, 7, 16, 7, 16},
1701 {&generic64_cost, 16, 7, 16, 7, 16},
1702 {&amdfam10_cost, 32, 24, 32, 7, 32}
1703 };
1704
1705 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1706 static struct pta
1707 {
1708 const char *const name; /* processor name or nickname. */
1709 const enum processor_type processor;
1710 const enum pta_flags
1711 {
1712 PTA_SSE = 1 << 0,
1713 PTA_SSE2 = 1 << 1,
1714 PTA_SSE3 = 1 << 2,
1715 PTA_MMX = 1 << 3,
1716 PTA_PREFETCH_SSE = 1 << 4,
1717 PTA_3DNOW = 1 << 5,
1718 PTA_3DNOW_A = 1 << 6,
1719 PTA_64BIT = 1 << 7,
1720 PTA_SSSE3 = 1 << 8,
1721 PTA_CX16 = 1 << 9,
1722 PTA_POPCNT = 1 << 10,
1723 PTA_ABM = 1 << 11,
1724 PTA_SSE4A = 1 << 12,
1725 PTA_NO_SAHF = 1 << 13,
1726 PTA_SSE4_1 = 1 << 14
1727 } flags;
1728 }
1729 const processor_alias_table[] =
1730 {
1731 {"i386", PROCESSOR_I386, 0},
1732 {"i486", PROCESSOR_I486, 0},
1733 {"i586", PROCESSOR_PENTIUM, 0},
1734 {"pentium", PROCESSOR_PENTIUM, 0},
1735 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1736 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1737 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1738 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1739 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1740 {"i686", PROCESSOR_PENTIUMPRO, 0},
1741 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1742 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1743 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1744 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1745 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1746 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1747 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1748 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1749 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1750 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1751 | PTA_CX16 | PTA_NO_SAHF)},
1752 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1753 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1754 | PTA_SSSE3
1755 | PTA_CX16)},
1756 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1757 |PTA_PREFETCH_SSE)},
1758 {"k6", PROCESSOR_K6, PTA_MMX},
1759 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1760 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1761 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1762 | PTA_PREFETCH_SSE)},
1763 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1764 | PTA_PREFETCH_SSE)},
1765 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1766 | PTA_SSE)},
1767 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1768 | PTA_SSE)},
1769 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1770 | PTA_SSE)},
1771 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1772 | PTA_MMX | PTA_SSE | PTA_SSE2
1773 | PTA_NO_SAHF)},
1774 {"k8", PROCESSOR_K8, (PTA_64BIT
1775 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1776 | PTA_SSE | PTA_SSE2
1777 | PTA_NO_SAHF)},
1778 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1779 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1780 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1781 | PTA_NO_SAHF)},
1782 {"opteron", PROCESSOR_K8, (PTA_64BIT
1783 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1784 | PTA_SSE | PTA_SSE2
1785 | PTA_NO_SAHF)},
1786 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1787 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1788 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1789 | PTA_NO_SAHF)},
1790 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1791 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1792 | PTA_SSE | PTA_SSE2
1793 | PTA_NO_SAHF)},
1794 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1795 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1796 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1797 | PTA_NO_SAHF)},
1798 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1799 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1800 | PTA_SSE | PTA_SSE2
1801 | PTA_NO_SAHF)},
1802 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1803 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1804 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1805 | PTA_SSE4A
1806 | PTA_CX16 | PTA_ABM)},
1807 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1808 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1809 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1810 | PTA_SSE4A
1811 | PTA_CX16 | PTA_ABM)},
1812 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1813 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1814 };
1815
1816 int const pta_size = ARRAY_SIZE (processor_alias_table);
1817
1818 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1819 SUBTARGET_OVERRIDE_OPTIONS;
1820 #endif
1821
1822 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1823 SUBSUBTARGET_OVERRIDE_OPTIONS;
1824 #endif
1825
1826 /* -fPIC is the default for x86_64. */
1827 if (TARGET_MACHO && TARGET_64BIT)
1828 flag_pic = 2;
1829
1830 /* Set the default values for switches whose default depends on TARGET_64BIT
1831 in case they weren't overwritten by command line options. */
1832 if (TARGET_64BIT)
1833 {
1834 /* Mach-O doesn't support omitting the frame pointer for now. */
1835 if (flag_omit_frame_pointer == 2)
1836 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1837 if (flag_asynchronous_unwind_tables == 2)
1838 flag_asynchronous_unwind_tables = 1;
1839 if (flag_pcc_struct_return == 2)
1840 flag_pcc_struct_return = 0;
1841 }
1842 else
1843 {
1844 if (flag_omit_frame_pointer == 2)
1845 flag_omit_frame_pointer = 0;
1846 if (flag_asynchronous_unwind_tables == 2)
1847 flag_asynchronous_unwind_tables = 0;
1848 if (flag_pcc_struct_return == 2)
1849 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1850 }
1851
1852 /* Need to check -mtune=generic first. */
1853 if (ix86_tune_string)
1854 {
1855 if (!strcmp (ix86_tune_string, "generic")
1856 || !strcmp (ix86_tune_string, "i686")
1857 /* As special support for cross compilers we read -mtune=native
1858 as -mtune=generic. With native compilers we won't see the
1859 -mtune=native, as it was changed by the driver. */
1860 || !strcmp (ix86_tune_string, "native"))
1861 {
1862 if (TARGET_64BIT)
1863 ix86_tune_string = "generic64";
1864 else
1865 ix86_tune_string = "generic32";
1866 }
1867 else if (!strncmp (ix86_tune_string, "generic", 7))
1868 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1869 }
1870 else
1871 {
1872 if (ix86_arch_string)
1873 ix86_tune_string = ix86_arch_string;
1874 if (!ix86_tune_string)
1875 {
1876 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1877 ix86_tune_defaulted = 1;
1878 }
1879
1880 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1881 need to use a sensible tune option. */
1882 if (!strcmp (ix86_tune_string, "generic")
1883 || !strcmp (ix86_tune_string, "x86-64")
1884 || !strcmp (ix86_tune_string, "i686"))
1885 {
1886 if (TARGET_64BIT)
1887 ix86_tune_string = "generic64";
1888 else
1889 ix86_tune_string = "generic32";
1890 }
1891 }
1892 if (ix86_stringop_string)
1893 {
1894 if (!strcmp (ix86_stringop_string, "rep_byte"))
1895 stringop_alg = rep_prefix_1_byte;
1896 else if (!strcmp (ix86_stringop_string, "libcall"))
1897 stringop_alg = libcall;
1898 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1899 stringop_alg = rep_prefix_4_byte;
1900 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1901 stringop_alg = rep_prefix_8_byte;
1902 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1903 stringop_alg = loop_1_byte;
1904 else if (!strcmp (ix86_stringop_string, "loop"))
1905 stringop_alg = loop;
1906 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1907 stringop_alg = unrolled_loop;
1908 else
1909 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1910 }
1911 if (!strcmp (ix86_tune_string, "x86-64"))
1912 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1913 "-mtune=generic instead as appropriate.");
1914
1915 if (!ix86_arch_string)
1916 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1917 if (!strcmp (ix86_arch_string, "generic"))
1918 error ("generic CPU can be used only for -mtune= switch");
1919 if (!strncmp (ix86_arch_string, "generic", 7))
1920 error ("bad value (%s) for -march= switch", ix86_arch_string);
1921
1922 if (ix86_cmodel_string != 0)
1923 {
1924 if (!strcmp (ix86_cmodel_string, "small"))
1925 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1926 else if (!strcmp (ix86_cmodel_string, "medium"))
1927 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1928 else if (!strcmp (ix86_cmodel_string, "large"))
1929 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1930 else if (flag_pic)
1931 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1932 else if (!strcmp (ix86_cmodel_string, "32"))
1933 ix86_cmodel = CM_32;
1934 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1935 ix86_cmodel = CM_KERNEL;
1936 else
1937 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1938 }
1939 else
1940 {
1941 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1942 use of rip-relative addressing. This eliminates fixups that
1943 would otherwise be needed if this object is to be placed in a
1944 DLL, and is essentially just as efficient as direct addressing. */
1945 if (TARGET_64BIT_MS_ABI)
1946 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1947 else if (TARGET_64BIT)
1948 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1949 else
1950 ix86_cmodel = CM_32;
1951 }
1952 if (ix86_asm_string != 0)
1953 {
1954 if (! TARGET_MACHO
1955 && !strcmp (ix86_asm_string, "intel"))
1956 ix86_asm_dialect = ASM_INTEL;
1957 else if (!strcmp (ix86_asm_string, "att"))
1958 ix86_asm_dialect = ASM_ATT;
1959 else
1960 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1961 }
1962 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1963 error ("code model %qs not supported in the %s bit mode",
1964 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1965 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
1966 sorry ("%i-bit mode not compiled in",
1967 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
1968
1969 for (i = 0; i < pta_size; i++)
1970 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1971 {
1972 ix86_arch = processor_alias_table[i].processor;
1973 /* Default cpu tuning to the architecture. */
1974 ix86_tune = ix86_arch;
1975
1976 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1977 error ("CPU you selected does not support x86-64 "
1978 "instruction set");
1979
1980 if (processor_alias_table[i].flags & PTA_MMX
1981 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
1982 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
1983 if (processor_alias_table[i].flags & PTA_3DNOW
1984 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
1985 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
1986 if (processor_alias_table[i].flags & PTA_3DNOW_A
1987 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
1988 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
1989 if (processor_alias_table[i].flags & PTA_SSE
1990 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
1991 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
1992 if (processor_alias_table[i].flags & PTA_SSE2
1993 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
1994 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
1995 if (processor_alias_table[i].flags & PTA_SSE3
1996 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
1997 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
1998 if (processor_alias_table[i].flags & PTA_SSSE3
1999 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2000 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2001 if (processor_alias_table[i].flags & PTA_SSE4_1
2002 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2003 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2004 if (processor_alias_table[i].flags & PTA_SSE4A
2005 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2006 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2007
2008 if (processor_alias_table[i].flags & PTA_ABM)
2009 x86_abm = true;
2010 if (processor_alias_table[i].flags & PTA_CX16)
2011 x86_cmpxchg16b = true;
2012 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2013 x86_popcnt = true;
2014 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2015 x86_prefetch_sse = true;
2016 if ((processor_alias_table[i].flags & PTA_NO_SAHF) && !TARGET_64BIT)
2017 x86_sahf = true;
2018
2019 break;
2020 }
2021
2022 if (i == pta_size)
2023 error ("bad value (%s) for -march= switch", ix86_arch_string);
2024
2025 ix86_arch_mask = 1u << ix86_arch;
2026 for (i = 0; i < X86_ARCH_LAST; ++i)
2027 ix86_arch_features[i] &= ix86_arch_mask;
2028
2029 for (i = 0; i < pta_size; i++)
2030 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2031 {
2032 ix86_tune = processor_alias_table[i].processor;
2033 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2034 {
2035 if (ix86_tune_defaulted)
2036 {
2037 ix86_tune_string = "x86-64";
2038 for (i = 0; i < pta_size; i++)
2039 if (! strcmp (ix86_tune_string,
2040 processor_alias_table[i].name))
2041 break;
2042 ix86_tune = processor_alias_table[i].processor;
2043 }
2044 else
2045 error ("CPU you selected does not support x86-64 "
2046 "instruction set");
2047 }
2048 /* Intel CPUs have always interpreted SSE prefetch instructions as
2049 NOPs; so, we can enable SSE prefetch instructions even when
2050 -mtune (rather than -march) points us to a processor that has them.
2051 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2052 higher processors. */
2053 if (TARGET_CMOVE
2054 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2055 x86_prefetch_sse = true;
2056 break;
2057 }
2058 if (i == pta_size)
2059 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2060
2061 ix86_tune_mask = 1u << ix86_tune;
2062 for (i = 0; i < X86_TUNE_LAST; ++i)
2063 ix86_tune_features[i] &= ix86_tune_mask;
2064
2065 if (optimize_size)
2066 ix86_cost = &size_cost;
2067 else
2068 ix86_cost = processor_target_table[ix86_tune].cost;
2069
2070 /* Arrange to set up i386_stack_locals for all functions. */
2071 init_machine_status = ix86_init_machine_status;
2072
2073 /* Validate -mregparm= value. */
2074 if (ix86_regparm_string)
2075 {
2076 if (TARGET_64BIT)
2077 warning (0, "-mregparm is ignored in 64-bit mode");
2078 i = atoi (ix86_regparm_string);
2079 if (i < 0 || i > REGPARM_MAX)
2080 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2081 else
2082 ix86_regparm = i;
2083 }
2084 if (TARGET_64BIT)
2085 ix86_regparm = REGPARM_MAX;
2086
2087 /* If the user has provided any of the -malign-* options,
2088 warn and use that value only if -falign-* is not set.
2089 Remove this code in GCC 3.2 or later. */
2090 if (ix86_align_loops_string)
2091 {
2092 warning (0, "-malign-loops is obsolete, use -falign-loops");
2093 if (align_loops == 0)
2094 {
2095 i = atoi (ix86_align_loops_string);
2096 if (i < 0 || i > MAX_CODE_ALIGN)
2097 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2098 else
2099 align_loops = 1 << i;
2100 }
2101 }
2102
2103 if (ix86_align_jumps_string)
2104 {
2105 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2106 if (align_jumps == 0)
2107 {
2108 i = atoi (ix86_align_jumps_string);
2109 if (i < 0 || i > MAX_CODE_ALIGN)
2110 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2111 else
2112 align_jumps = 1 << i;
2113 }
2114 }
2115
2116 if (ix86_align_funcs_string)
2117 {
2118 warning (0, "-malign-functions is obsolete, use -falign-functions");
2119 if (align_functions == 0)
2120 {
2121 i = atoi (ix86_align_funcs_string);
2122 if (i < 0 || i > MAX_CODE_ALIGN)
2123 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2124 else
2125 align_functions = 1 << i;
2126 }
2127 }
2128
2129 /* Default align_* from the processor table. */
2130 if (align_loops == 0)
2131 {
2132 align_loops = processor_target_table[ix86_tune].align_loop;
2133 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2134 }
2135 if (align_jumps == 0)
2136 {
2137 align_jumps = processor_target_table[ix86_tune].align_jump;
2138 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2139 }
2140 if (align_functions == 0)
2141 {
2142 align_functions = processor_target_table[ix86_tune].align_func;
2143 }
2144
2145 /* Validate -mbranch-cost= value, or provide default. */
2146 ix86_branch_cost = ix86_cost->branch_cost;
2147 if (ix86_branch_cost_string)
2148 {
2149 i = atoi (ix86_branch_cost_string);
2150 if (i < 0 || i > 5)
2151 error ("-mbranch-cost=%d is not between 0 and 5", i);
2152 else
2153 ix86_branch_cost = i;
2154 }
2155 if (ix86_section_threshold_string)
2156 {
2157 i = atoi (ix86_section_threshold_string);
2158 if (i < 0)
2159 error ("-mlarge-data-threshold=%d is negative", i);
2160 else
2161 ix86_section_threshold = i;
2162 }
2163
2164 if (ix86_tls_dialect_string)
2165 {
2166 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2167 ix86_tls_dialect = TLS_DIALECT_GNU;
2168 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2169 ix86_tls_dialect = TLS_DIALECT_GNU2;
2170 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2171 ix86_tls_dialect = TLS_DIALECT_SUN;
2172 else
2173 error ("bad value (%s) for -mtls-dialect= switch",
2174 ix86_tls_dialect_string);
2175 }
2176
2177 if (ix87_precision_string)
2178 {
2179 i = atoi (ix87_precision_string);
2180 if (i != 32 && i != 64 && i != 80)
2181 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2182 }
2183
2184 /* Keep nonleaf frame pointers. */
2185 if (flag_omit_frame_pointer)
2186 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2187 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2188 flag_omit_frame_pointer = 1;
2189
2190 /* If we're doing fast math, we don't care about comparison order
2191 wrt NaNs. This lets us use a shorter comparison sequence. */
2192 if (flag_finite_math_only)
2193 target_flags &= ~MASK_IEEE_FP;
2194
2195 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2196 since the insns won't need emulation. */
2197 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2198 target_flags &= ~MASK_NO_FANCY_MATH_387;
2199
2200 /* Likewise, if the target doesn't have a 387, or we've specified
2201 software floating point, don't use 387 inline intrinsics. */
2202 if (!TARGET_80387)
2203 target_flags |= MASK_NO_FANCY_MATH_387;
2204
2205 /* Turn on SSSE3 builtins for -msse4.1. */
2206 if (TARGET_SSE4_1)
2207 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2208
2209 /* Turn on SSE3 builtins for -mssse3. */
2210 if (TARGET_SSSE3)
2211 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2212
2213 /* Turn on SSE3 builtins for -msse4a. */
2214 if (TARGET_SSE4A)
2215 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2216
2217 /* Turn on SSE2 builtins for -msse3. */
2218 if (TARGET_SSE3)
2219 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2220
2221 /* Turn on SSE builtins for -msse2. */
2222 if (TARGET_SSE2)
2223 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2224
2225 /* Turn on MMX builtins for -msse. */
2226 if (TARGET_SSE)
2227 {
2228 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2229 x86_prefetch_sse = true;
2230 }
2231
2232 /* Turn on MMX builtins for 3Dnow. */
2233 if (TARGET_3DNOW)
2234 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2235
2236 /* Turn on POPCNT builtins for -mabm. */
2237 if (TARGET_ABM)
2238 x86_popcnt = true;
2239
2240 if (TARGET_64BIT)
2241 {
2242 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2243
2244 /* Enable by default the SSE and MMX builtins. Do allow the user to
2245 explicitly disable any of these. In particular, disabling SSE and
2246 MMX for kernel code is extremely useful. */
2247 ix86_isa_flags
2248 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2249 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2250
2251 if (TARGET_RTD)
2252 warning (0, "-mrtd is ignored in 64bit mode");
2253 }
2254 else
2255 {
2256 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2257
2258 ix86_isa_flags
2259 |= TARGET_SUBTARGET32_DEFAULT & ~ix86_isa_flags_explicit;
2260
2261 /* i386 ABI does not specify red zone. It still makes sense to use it
2262 when programmer takes care to stack from being destroyed. */
2263 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2264 target_flags |= MASK_NO_RED_ZONE;
2265 }
2266
2267 /* Validate -mpreferred-stack-boundary= value, or provide default.
2268 The default of 128 bits is for Pentium III's SSE __m128. We can't
2269 change it because of optimize_size. Otherwise, we can't mix object
2270 files compiled with -Os and -On. */
2271 ix86_preferred_stack_boundary = 128;
2272 if (ix86_preferred_stack_boundary_string)
2273 {
2274 i = atoi (ix86_preferred_stack_boundary_string);
2275 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2276 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2277 TARGET_64BIT ? 4 : 2);
2278 else
2279 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2280 }
2281
2282 /* Accept -msseregparm only if at least SSE support is enabled. */
2283 if (TARGET_SSEREGPARM
2284 && ! TARGET_SSE)
2285 error ("-msseregparm used without SSE enabled");
2286
2287 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2288 if (ix86_fpmath_string != 0)
2289 {
2290 if (! strcmp (ix86_fpmath_string, "387"))
2291 ix86_fpmath = FPMATH_387;
2292 else if (! strcmp (ix86_fpmath_string, "sse"))
2293 {
2294 if (!TARGET_SSE)
2295 {
2296 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2297 ix86_fpmath = FPMATH_387;
2298 }
2299 else
2300 ix86_fpmath = FPMATH_SSE;
2301 }
2302 else if (! strcmp (ix86_fpmath_string, "387,sse")
2303 || ! strcmp (ix86_fpmath_string, "sse,387"))
2304 {
2305 if (!TARGET_SSE)
2306 {
2307 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2308 ix86_fpmath = FPMATH_387;
2309 }
2310 else if (!TARGET_80387)
2311 {
2312 warning (0, "387 instruction set disabled, using SSE arithmetics");
2313 ix86_fpmath = FPMATH_SSE;
2314 }
2315 else
2316 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2317 }
2318 else
2319 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2320 }
2321
2322 /* If the i387 is disabled, then do not return values in it. */
2323 if (!TARGET_80387)
2324 target_flags &= ~MASK_FLOAT_RETURNS;
2325
2326 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2327 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2328 && !optimize_size)
2329 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2330
2331 /* ??? Unwind info is not correct around the CFG unless either a frame
2332 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2333 unwind info generation to be aware of the CFG and propagating states
2334 around edges. */
2335 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2336 || flag_exceptions || flag_non_call_exceptions)
2337 && flag_omit_frame_pointer
2338 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2339 {
2340 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2341 warning (0, "unwind tables currently require either a frame pointer "
2342 "or -maccumulate-outgoing-args for correctness");
2343 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2344 }
2345
2346 /* For sane SSE instruction set generation we need fcomi instruction.
2347 It is safe to enable all CMOVE instructions. */
2348 if (TARGET_SSE)
2349 TARGET_CMOVE = 1;
2350
2351 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2352 {
2353 char *p;
2354 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2355 p = strchr (internal_label_prefix, 'X');
2356 internal_label_prefix_len = p - internal_label_prefix;
2357 *p = '\0';
2358 }
2359
2360 /* When scheduling description is not available, disable scheduler pass
2361 so it won't slow down the compilation and make x87 code slower. */
2362 if (!TARGET_SCHEDULE)
2363 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2364
2365 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2366 set_param_value ("simultaneous-prefetches",
2367 ix86_cost->simultaneous_prefetches);
2368 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2369 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2370 }
2371 \f
2372 /* Return true if this goes in large data/bss. */
2373
2374 static bool
2375 ix86_in_large_data_p (tree exp)
2376 {
2377 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2378 return false;
2379
2380 /* Functions are never large data. */
2381 if (TREE_CODE (exp) == FUNCTION_DECL)
2382 return false;
2383
2384 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2385 {
2386 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2387 if (strcmp (section, ".ldata") == 0
2388 || strcmp (section, ".lbss") == 0)
2389 return true;
2390 return false;
2391 }
2392 else
2393 {
2394 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2395
2396 /* If this is an incomplete type with size 0, then we can't put it
2397 in data because it might be too big when completed. */
2398 if (!size || size > ix86_section_threshold)
2399 return true;
2400 }
2401
2402 return false;
2403 }
2404
2405 /* Switch to the appropriate section for output of DECL.
2406 DECL is either a `VAR_DECL' node or a constant of some sort.
2407 RELOC indicates whether forming the initial value of DECL requires
2408 link-time relocations. */
2409
2410 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2411 ATTRIBUTE_UNUSED;
2412
2413 static section *
2414 x86_64_elf_select_section (tree decl, int reloc,
2415 unsigned HOST_WIDE_INT align)
2416 {
2417 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2418 && ix86_in_large_data_p (decl))
2419 {
2420 const char *sname = NULL;
2421 unsigned int flags = SECTION_WRITE;
2422 switch (categorize_decl_for_section (decl, reloc))
2423 {
2424 case SECCAT_DATA:
2425 sname = ".ldata";
2426 break;
2427 case SECCAT_DATA_REL:
2428 sname = ".ldata.rel";
2429 break;
2430 case SECCAT_DATA_REL_LOCAL:
2431 sname = ".ldata.rel.local";
2432 break;
2433 case SECCAT_DATA_REL_RO:
2434 sname = ".ldata.rel.ro";
2435 break;
2436 case SECCAT_DATA_REL_RO_LOCAL:
2437 sname = ".ldata.rel.ro.local";
2438 break;
2439 case SECCAT_BSS:
2440 sname = ".lbss";
2441 flags |= SECTION_BSS;
2442 break;
2443 case SECCAT_RODATA:
2444 case SECCAT_RODATA_MERGE_STR:
2445 case SECCAT_RODATA_MERGE_STR_INIT:
2446 case SECCAT_RODATA_MERGE_CONST:
2447 sname = ".lrodata";
2448 flags = 0;
2449 break;
2450 case SECCAT_SRODATA:
2451 case SECCAT_SDATA:
2452 case SECCAT_SBSS:
2453 gcc_unreachable ();
2454 case SECCAT_TEXT:
2455 case SECCAT_TDATA:
2456 case SECCAT_TBSS:
2457 /* We don't split these for medium model. Place them into
2458 default sections and hope for best. */
2459 break;
2460 }
2461 if (sname)
2462 {
2463 /* We might get called with string constants, but get_named_section
2464 doesn't like them as they are not DECLs. Also, we need to set
2465 flags in that case. */
2466 if (!DECL_P (decl))
2467 return get_section (sname, flags, NULL);
2468 return get_named_section (decl, sname, reloc);
2469 }
2470 }
2471 return default_elf_select_section (decl, reloc, align);
2472 }
2473
2474 /* Build up a unique section name, expressed as a
2475 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2476 RELOC indicates whether the initial value of EXP requires
2477 link-time relocations. */
2478
2479 static void ATTRIBUTE_UNUSED
2480 x86_64_elf_unique_section (tree decl, int reloc)
2481 {
2482 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2483 && ix86_in_large_data_p (decl))
2484 {
2485 const char *prefix = NULL;
2486 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2487 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2488
2489 switch (categorize_decl_for_section (decl, reloc))
2490 {
2491 case SECCAT_DATA:
2492 case SECCAT_DATA_REL:
2493 case SECCAT_DATA_REL_LOCAL:
2494 case SECCAT_DATA_REL_RO:
2495 case SECCAT_DATA_REL_RO_LOCAL:
2496 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2497 break;
2498 case SECCAT_BSS:
2499 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2500 break;
2501 case SECCAT_RODATA:
2502 case SECCAT_RODATA_MERGE_STR:
2503 case SECCAT_RODATA_MERGE_STR_INIT:
2504 case SECCAT_RODATA_MERGE_CONST:
2505 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2506 break;
2507 case SECCAT_SRODATA:
2508 case SECCAT_SDATA:
2509 case SECCAT_SBSS:
2510 gcc_unreachable ();
2511 case SECCAT_TEXT:
2512 case SECCAT_TDATA:
2513 case SECCAT_TBSS:
2514 /* We don't split these for medium model. Place them into
2515 default sections and hope for best. */
2516 break;
2517 }
2518 if (prefix)
2519 {
2520 const char *name;
2521 size_t nlen, plen;
2522 char *string;
2523 plen = strlen (prefix);
2524
2525 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2526 name = targetm.strip_name_encoding (name);
2527 nlen = strlen (name);
2528
2529 string = alloca (nlen + plen + 1);
2530 memcpy (string, prefix, plen);
2531 memcpy (string + plen, name, nlen + 1);
2532
2533 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2534 return;
2535 }
2536 }
2537 default_unique_section (decl, reloc);
2538 }
2539
2540 #ifdef COMMON_ASM_OP
2541 /* This says how to output assembler code to declare an
2542 uninitialized external linkage data object.
2543
2544 For medium model x86-64 we need to use .largecomm opcode for
2545 large objects. */
2546 void
2547 x86_elf_aligned_common (FILE *file,
2548 const char *name, unsigned HOST_WIDE_INT size,
2549 int align)
2550 {
2551 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2552 && size > (unsigned int)ix86_section_threshold)
2553 fprintf (file, ".largecomm\t");
2554 else
2555 fprintf (file, "%s", COMMON_ASM_OP);
2556 assemble_name (file, name);
2557 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2558 size, align / BITS_PER_UNIT);
2559 }
2560 #endif
2561
2562 /* Utility function for targets to use in implementing
2563 ASM_OUTPUT_ALIGNED_BSS. */
2564
2565 void
2566 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2567 const char *name, unsigned HOST_WIDE_INT size,
2568 int align)
2569 {
2570 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2571 && size > (unsigned int)ix86_section_threshold)
2572 switch_to_section (get_named_section (decl, ".lbss", 0));
2573 else
2574 switch_to_section (bss_section);
2575 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2576 #ifdef ASM_DECLARE_OBJECT_NAME
2577 last_assemble_variable_decl = decl;
2578 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2579 #else
2580 /* Standard thing is just output label for the object. */
2581 ASM_OUTPUT_LABEL (file, name);
2582 #endif /* ASM_DECLARE_OBJECT_NAME */
2583 ASM_OUTPUT_SKIP (file, size ? size : 1);
2584 }
2585 \f
2586 void
2587 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2588 {
2589 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2590 make the problem with not enough registers even worse. */
2591 #ifdef INSN_SCHEDULING
2592 if (level > 1)
2593 flag_schedule_insns = 0;
2594 #endif
2595
2596 if (TARGET_MACHO)
2597 /* The Darwin libraries never set errno, so we might as well
2598 avoid calling them when that's the only reason we would. */
2599 flag_errno_math = 0;
2600
2601 /* The default values of these switches depend on the TARGET_64BIT
2602 that is not known at this moment. Mark these values with 2 and
2603 let user the to override these. In case there is no command line option
2604 specifying them, we will set the defaults in override_options. */
2605 if (optimize >= 1)
2606 flag_omit_frame_pointer = 2;
2607 flag_pcc_struct_return = 2;
2608 flag_asynchronous_unwind_tables = 2;
2609 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2610 SUBTARGET_OPTIMIZATION_OPTIONS;
2611 #endif
2612 }
2613 \f
2614 /* Decide whether we can make a sibling call to a function. DECL is the
2615 declaration of the function being targeted by the call and EXP is the
2616 CALL_EXPR representing the call. */
2617
2618 static bool
2619 ix86_function_ok_for_sibcall (tree decl, tree exp)
2620 {
2621 tree func;
2622 rtx a, b;
2623
2624 /* If we are generating position-independent code, we cannot sibcall
2625 optimize any indirect call, or a direct call to a global function,
2626 as the PLT requires %ebx be live. */
2627 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2628 return false;
2629
2630 if (decl)
2631 func = decl;
2632 else
2633 {
2634 func = TREE_TYPE (CALL_EXPR_FN (exp));
2635 if (POINTER_TYPE_P (func))
2636 func = TREE_TYPE (func);
2637 }
2638
2639 /* Check that the return value locations are the same. Like
2640 if we are returning floats on the 80387 register stack, we cannot
2641 make a sibcall from a function that doesn't return a float to a
2642 function that does or, conversely, from a function that does return
2643 a float to a function that doesn't; the necessary stack adjustment
2644 would not be executed. This is also the place we notice
2645 differences in the return value ABI. Note that it is ok for one
2646 of the functions to have void return type as long as the return
2647 value of the other is passed in a register. */
2648 a = ix86_function_value (TREE_TYPE (exp), func, false);
2649 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2650 cfun->decl, false);
2651 if (STACK_REG_P (a) || STACK_REG_P (b))
2652 {
2653 if (!rtx_equal_p (a, b))
2654 return false;
2655 }
2656 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2657 ;
2658 else if (!rtx_equal_p (a, b))
2659 return false;
2660
2661 /* If this call is indirect, we'll need to be able to use a call-clobbered
2662 register for the address of the target function. Make sure that all
2663 such registers are not used for passing parameters. */
2664 if (!decl && !TARGET_64BIT)
2665 {
2666 tree type;
2667
2668 /* We're looking at the CALL_EXPR, we need the type of the function. */
2669 type = CALL_EXPR_FN (exp); /* pointer expression */
2670 type = TREE_TYPE (type); /* pointer type */
2671 type = TREE_TYPE (type); /* function type */
2672
2673 if (ix86_function_regparm (type, NULL) >= 3)
2674 {
2675 /* ??? Need to count the actual number of registers to be used,
2676 not the possible number of registers. Fix later. */
2677 return false;
2678 }
2679 }
2680
2681 /* Dllimport'd functions are also called indirectly. */
2682 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2683 && decl && DECL_DLLIMPORT_P (decl)
2684 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2685 return false;
2686
2687 /* If we forced aligned the stack, then sibcalling would unalign the
2688 stack, which may break the called function. */
2689 if (cfun->machine->force_align_arg_pointer)
2690 return false;
2691
2692 /* Otherwise okay. That also includes certain types of indirect calls. */
2693 return true;
2694 }
2695
2696 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2697 calling convention attributes;
2698 arguments as in struct attribute_spec.handler. */
2699
2700 static tree
2701 ix86_handle_cconv_attribute (tree *node, tree name,
2702 tree args,
2703 int flags ATTRIBUTE_UNUSED,
2704 bool *no_add_attrs)
2705 {
2706 if (TREE_CODE (*node) != FUNCTION_TYPE
2707 && TREE_CODE (*node) != METHOD_TYPE
2708 && TREE_CODE (*node) != FIELD_DECL
2709 && TREE_CODE (*node) != TYPE_DECL)
2710 {
2711 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2712 IDENTIFIER_POINTER (name));
2713 *no_add_attrs = true;
2714 return NULL_TREE;
2715 }
2716
2717 /* Can combine regparm with all attributes but fastcall. */
2718 if (is_attribute_p ("regparm", name))
2719 {
2720 tree cst;
2721
2722 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2723 {
2724 error ("fastcall and regparm attributes are not compatible");
2725 }
2726
2727 cst = TREE_VALUE (args);
2728 if (TREE_CODE (cst) != INTEGER_CST)
2729 {
2730 warning (OPT_Wattributes,
2731 "%qs attribute requires an integer constant argument",
2732 IDENTIFIER_POINTER (name));
2733 *no_add_attrs = true;
2734 }
2735 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2736 {
2737 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2738 IDENTIFIER_POINTER (name), REGPARM_MAX);
2739 *no_add_attrs = true;
2740 }
2741
2742 if (!TARGET_64BIT
2743 && lookup_attribute (ix86_force_align_arg_pointer_string,
2744 TYPE_ATTRIBUTES (*node))
2745 && compare_tree_int (cst, REGPARM_MAX-1))
2746 {
2747 error ("%s functions limited to %d register parameters",
2748 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2749 }
2750
2751 return NULL_TREE;
2752 }
2753
2754 if (TARGET_64BIT)
2755 {
2756 /* Do not warn when emulating the MS ABI. */
2757 if (!TARGET_64BIT_MS_ABI)
2758 warning (OPT_Wattributes, "%qs attribute ignored",
2759 IDENTIFIER_POINTER (name));
2760 *no_add_attrs = true;
2761 return NULL_TREE;
2762 }
2763
2764 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2765 if (is_attribute_p ("fastcall", name))
2766 {
2767 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2768 {
2769 error ("fastcall and cdecl attributes are not compatible");
2770 }
2771 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2772 {
2773 error ("fastcall and stdcall attributes are not compatible");
2774 }
2775 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2776 {
2777 error ("fastcall and regparm attributes are not compatible");
2778 }
2779 }
2780
2781 /* Can combine stdcall with fastcall (redundant), regparm and
2782 sseregparm. */
2783 else if (is_attribute_p ("stdcall", name))
2784 {
2785 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2786 {
2787 error ("stdcall and cdecl attributes are not compatible");
2788 }
2789 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2790 {
2791 error ("stdcall and fastcall attributes are not compatible");
2792 }
2793 }
2794
2795 /* Can combine cdecl with regparm and sseregparm. */
2796 else if (is_attribute_p ("cdecl", name))
2797 {
2798 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2799 {
2800 error ("stdcall and cdecl attributes are not compatible");
2801 }
2802 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2803 {
2804 error ("fastcall and cdecl attributes are not compatible");
2805 }
2806 }
2807
2808 /* Can combine sseregparm with all attributes. */
2809
2810 return NULL_TREE;
2811 }
2812
2813 /* Return 0 if the attributes for two types are incompatible, 1 if they
2814 are compatible, and 2 if they are nearly compatible (which causes a
2815 warning to be generated). */
2816
2817 static int
2818 ix86_comp_type_attributes (tree type1, tree type2)
2819 {
2820 /* Check for mismatch of non-default calling convention. */
2821 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2822
2823 if (TREE_CODE (type1) != FUNCTION_TYPE)
2824 return 1;
2825
2826 /* Check for mismatched fastcall/regparm types. */
2827 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2828 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2829 || (ix86_function_regparm (type1, NULL)
2830 != ix86_function_regparm (type2, NULL)))
2831 return 0;
2832
2833 /* Check for mismatched sseregparm types. */
2834 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2835 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2836 return 0;
2837
2838 /* Check for mismatched return types (cdecl vs stdcall). */
2839 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2840 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2841 return 0;
2842
2843 return 1;
2844 }
2845 \f
2846 /* Return the regparm value for a function with the indicated TYPE and DECL.
2847 DECL may be NULL when calling function indirectly
2848 or considering a libcall. */
2849
2850 static int
2851 ix86_function_regparm (tree type, tree decl)
2852 {
2853 tree attr;
2854 int regparm = ix86_regparm;
2855
2856 if (TARGET_64BIT)
2857 return regparm;
2858
2859 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2860 if (attr)
2861 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2862
2863 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2864 return 2;
2865
2866 /* Use register calling convention for local functions when possible. */
2867 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2868 && flag_unit_at_a_time && !profile_flag)
2869 {
2870 struct cgraph_local_info *i = cgraph_local_info (decl);
2871 if (i && i->local)
2872 {
2873 int local_regparm, globals = 0, regno;
2874 struct function *f;
2875
2876 /* Make sure no regparm register is taken by a
2877 global register variable. */
2878 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2879 if (global_regs[local_regparm])
2880 break;
2881
2882 /* We can't use regparm(3) for nested functions as these use
2883 static chain pointer in third argument. */
2884 if (local_regparm == 3
2885 && (decl_function_context (decl)
2886 || ix86_force_align_arg_pointer)
2887 && !DECL_NO_STATIC_CHAIN (decl))
2888 local_regparm = 2;
2889
2890 /* If the function realigns its stackpointer, the prologue will
2891 clobber %ecx. If we've already generated code for the callee,
2892 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2893 scanning the attributes for the self-realigning property. */
2894 f = DECL_STRUCT_FUNCTION (decl);
2895 if (local_regparm == 3
2896 && (f ? !!f->machine->force_align_arg_pointer
2897 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2898 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2899 local_regparm = 2;
2900
2901 /* Each global register variable increases register preassure,
2902 so the more global reg vars there are, the smaller regparm
2903 optimization use, unless requested by the user explicitly. */
2904 for (regno = 0; regno < 6; regno++)
2905 if (global_regs[regno])
2906 globals++;
2907 local_regparm
2908 = globals < local_regparm ? local_regparm - globals : 0;
2909
2910 if (local_regparm > regparm)
2911 regparm = local_regparm;
2912 }
2913 }
2914
2915 return regparm;
2916 }
2917
2918 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2919 DFmode (2) arguments in SSE registers for a function with the
2920 indicated TYPE and DECL. DECL may be NULL when calling function
2921 indirectly or considering a libcall. Otherwise return 0. */
2922
2923 static int
2924 ix86_function_sseregparm (tree type, tree decl)
2925 {
2926 gcc_assert (!TARGET_64BIT);
2927
2928 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2929 by the sseregparm attribute. */
2930 if (TARGET_SSEREGPARM
2931 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2932 {
2933 if (!TARGET_SSE)
2934 {
2935 if (decl)
2936 error ("Calling %qD with attribute sseregparm without "
2937 "SSE/SSE2 enabled", decl);
2938 else
2939 error ("Calling %qT with attribute sseregparm without "
2940 "SSE/SSE2 enabled", type);
2941 return 0;
2942 }
2943
2944 return 2;
2945 }
2946
2947 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2948 (and DFmode for SSE2) arguments in SSE registers. */
2949 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2950 {
2951 struct cgraph_local_info *i = cgraph_local_info (decl);
2952 if (i && i->local)
2953 return TARGET_SSE2 ? 2 : 1;
2954 }
2955
2956 return 0;
2957 }
2958
2959 /* Return true if EAX is live at the start of the function. Used by
2960 ix86_expand_prologue to determine if we need special help before
2961 calling allocate_stack_worker. */
2962
2963 static bool
2964 ix86_eax_live_at_start_p (void)
2965 {
2966 /* Cheat. Don't bother working forward from ix86_function_regparm
2967 to the function type to whether an actual argument is located in
2968 eax. Instead just look at cfg info, which is still close enough
2969 to correct at this point. This gives false positives for broken
2970 functions that might use uninitialized data that happens to be
2971 allocated in eax, but who cares? */
2972 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2973 }
2974
2975 /* Return true if TYPE has a variable argument list. */
2976
2977 static bool
2978 type_has_variadic_args_p (tree type)
2979 {
2980 tree n, t = TYPE_ARG_TYPES (type);
2981
2982 if (t == NULL)
2983 return false;
2984
2985 while ((n = TREE_CHAIN (t)) != NULL)
2986 t = n;
2987
2988 return TREE_VALUE (t) != void_type_node;
2989 }
2990
2991 /* Value is the number of bytes of arguments automatically
2992 popped when returning from a subroutine call.
2993 FUNDECL is the declaration node of the function (as a tree),
2994 FUNTYPE is the data type of the function (as a tree),
2995 or for a library call it is an identifier node for the subroutine name.
2996 SIZE is the number of bytes of arguments passed on the stack.
2997
2998 On the 80386, the RTD insn may be used to pop them if the number
2999 of args is fixed, but if the number is variable then the caller
3000 must pop them all. RTD can't be used for library calls now
3001 because the library is compiled with the Unix compiler.
3002 Use of RTD is a selectable option, since it is incompatible with
3003 standard Unix calling sequences. If the option is not selected,
3004 the caller must always pop the args.
3005
3006 The attribute stdcall is equivalent to RTD on a per module basis. */
3007
3008 int
3009 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3010 {
3011 int rtd;
3012
3013 /* None of the 64-bit ABIs pop arguments. */
3014 if (TARGET_64BIT)
3015 return 0;
3016
3017 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3018
3019 /* Cdecl functions override -mrtd, and never pop the stack. */
3020 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3021 {
3022 /* Stdcall and fastcall functions will pop the stack if not
3023 variable args. */
3024 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3025 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3026 rtd = 1;
3027
3028 if (rtd && ! type_has_variadic_args_p (funtype))
3029 return size;
3030 }
3031
3032 /* Lose any fake structure return argument if it is passed on the stack. */
3033 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3034 && !KEEP_AGGREGATE_RETURN_POINTER)
3035 {
3036 int nregs = ix86_function_regparm (funtype, fundecl);
3037 if (nregs == 0)
3038 return GET_MODE_SIZE (Pmode);
3039 }
3040
3041 return 0;
3042 }
3043 \f
3044 /* Argument support functions. */
3045
3046 /* Return true when register may be used to pass function parameters. */
3047 bool
3048 ix86_function_arg_regno_p (int regno)
3049 {
3050 int i;
3051 const int *parm_regs;
3052
3053 if (!TARGET_64BIT)
3054 {
3055 if (TARGET_MACHO)
3056 return (regno < REGPARM_MAX
3057 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3058 else
3059 return (regno < REGPARM_MAX
3060 || (TARGET_MMX && MMX_REGNO_P (regno)
3061 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3062 || (TARGET_SSE && SSE_REGNO_P (regno)
3063 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3064 }
3065
3066 if (TARGET_MACHO)
3067 {
3068 if (SSE_REGNO_P (regno) && TARGET_SSE)
3069 return true;
3070 }
3071 else
3072 {
3073 if (TARGET_SSE && SSE_REGNO_P (regno)
3074 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3075 return true;
3076 }
3077
3078 /* RAX is used as hidden argument to va_arg functions. */
3079 if (!TARGET_64BIT_MS_ABI && regno == 0)
3080 return true;
3081
3082 if (TARGET_64BIT_MS_ABI)
3083 parm_regs = x86_64_ms_abi_int_parameter_registers;
3084 else
3085 parm_regs = x86_64_int_parameter_registers;
3086 for (i = 0; i < REGPARM_MAX; i++)
3087 if (regno == parm_regs[i])
3088 return true;
3089 return false;
3090 }
3091
3092 /* Return if we do not know how to pass TYPE solely in registers. */
3093
3094 static bool
3095 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3096 {
3097 if (must_pass_in_stack_var_size_or_pad (mode, type))
3098 return true;
3099
3100 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3101 The layout_type routine is crafty and tries to trick us into passing
3102 currently unsupported vector types on the stack by using TImode. */
3103 return (!TARGET_64BIT && mode == TImode
3104 && type && TREE_CODE (type) != VECTOR_TYPE);
3105 }
3106
3107 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3108 for a call to a function whose data type is FNTYPE.
3109 For a library call, FNTYPE is 0. */
3110
3111 void
3112 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3113 tree fntype, /* tree ptr for function decl */
3114 rtx libname, /* SYMBOL_REF of library name or 0 */
3115 tree fndecl)
3116 {
3117 memset (cum, 0, sizeof (*cum));
3118
3119 /* Set up the number of registers to use for passing arguments. */
3120 cum->nregs = ix86_regparm;
3121 if (TARGET_SSE)
3122 cum->sse_nregs = SSE_REGPARM_MAX;
3123 if (TARGET_MMX)
3124 cum->mmx_nregs = MMX_REGPARM_MAX;
3125 cum->warn_sse = true;
3126 cum->warn_mmx = true;
3127 cum->maybe_vaarg = (fntype
3128 ? (!TYPE_ARG_TYPES (fntype)
3129 || type_has_variadic_args_p (fntype))
3130 : !libname);
3131
3132 if (!TARGET_64BIT)
3133 {
3134 /* If there are variable arguments, then we won't pass anything
3135 in registers in 32-bit mode. */
3136 if (cum->maybe_vaarg)
3137 {
3138 cum->nregs = 0;
3139 cum->sse_nregs = 0;
3140 cum->mmx_nregs = 0;
3141 cum->warn_sse = 0;
3142 cum->warn_mmx = 0;
3143 return;
3144 }
3145
3146 /* Use ecx and edx registers if function has fastcall attribute,
3147 else look for regparm information. */
3148 if (fntype)
3149 {
3150 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3151 {
3152 cum->nregs = 2;
3153 cum->fastcall = 1;
3154 }
3155 else
3156 cum->nregs = ix86_function_regparm (fntype, fndecl);
3157 }
3158
3159 /* Set up the number of SSE registers used for passing SFmode
3160 and DFmode arguments. Warn for mismatching ABI. */
3161 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3162 }
3163 }
3164
3165 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3166 But in the case of vector types, it is some vector mode.
3167
3168 When we have only some of our vector isa extensions enabled, then there
3169 are some modes for which vector_mode_supported_p is false. For these
3170 modes, the generic vector support in gcc will choose some non-vector mode
3171 in order to implement the type. By computing the natural mode, we'll
3172 select the proper ABI location for the operand and not depend on whatever
3173 the middle-end decides to do with these vector types. */
3174
3175 static enum machine_mode
3176 type_natural_mode (tree type)
3177 {
3178 enum machine_mode mode = TYPE_MODE (type);
3179
3180 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3181 {
3182 HOST_WIDE_INT size = int_size_in_bytes (type);
3183 if ((size == 8 || size == 16)
3184 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3185 && TYPE_VECTOR_SUBPARTS (type) > 1)
3186 {
3187 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3188
3189 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3190 mode = MIN_MODE_VECTOR_FLOAT;
3191 else
3192 mode = MIN_MODE_VECTOR_INT;
3193
3194 /* Get the mode which has this inner mode and number of units. */
3195 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3196 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3197 && GET_MODE_INNER (mode) == innermode)
3198 return mode;
3199
3200 gcc_unreachable ();
3201 }
3202 }
3203
3204 return mode;
3205 }
3206
3207 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3208 this may not agree with the mode that the type system has chosen for the
3209 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3210 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3211
3212 static rtx
3213 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3214 unsigned int regno)
3215 {
3216 rtx tmp;
3217
3218 if (orig_mode != BLKmode)
3219 tmp = gen_rtx_REG (orig_mode, regno);
3220 else
3221 {
3222 tmp = gen_rtx_REG (mode, regno);
3223 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3224 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3225 }
3226
3227 return tmp;
3228 }
3229
3230 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3231 of this code is to classify each 8bytes of incoming argument by the register
3232 class and assign registers accordingly. */
3233
3234 /* Return the union class of CLASS1 and CLASS2.
3235 See the x86-64 PS ABI for details. */
3236
3237 static enum x86_64_reg_class
3238 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3239 {
3240 /* Rule #1: If both classes are equal, this is the resulting class. */
3241 if (class1 == class2)
3242 return class1;
3243
3244 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3245 the other class. */
3246 if (class1 == X86_64_NO_CLASS)
3247 return class2;
3248 if (class2 == X86_64_NO_CLASS)
3249 return class1;
3250
3251 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3252 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3253 return X86_64_MEMORY_CLASS;
3254
3255 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3256 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3257 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3258 return X86_64_INTEGERSI_CLASS;
3259 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3260 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3261 return X86_64_INTEGER_CLASS;
3262
3263 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3264 MEMORY is used. */
3265 if (class1 == X86_64_X87_CLASS
3266 || class1 == X86_64_X87UP_CLASS
3267 || class1 == X86_64_COMPLEX_X87_CLASS
3268 || class2 == X86_64_X87_CLASS
3269 || class2 == X86_64_X87UP_CLASS
3270 || class2 == X86_64_COMPLEX_X87_CLASS)
3271 return X86_64_MEMORY_CLASS;
3272
3273 /* Rule #6: Otherwise class SSE is used. */
3274 return X86_64_SSE_CLASS;
3275 }
3276
3277 /* Classify the argument of type TYPE and mode MODE.
3278 CLASSES will be filled by the register class used to pass each word
3279 of the operand. The number of words is returned. In case the parameter
3280 should be passed in memory, 0 is returned. As a special case for zero
3281 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3282
3283 BIT_OFFSET is used internally for handling records and specifies offset
3284 of the offset in bits modulo 256 to avoid overflow cases.
3285
3286 See the x86-64 PS ABI for details.
3287 */
3288
3289 static int
3290 classify_argument (enum machine_mode mode, tree type,
3291 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3292 {
3293 HOST_WIDE_INT bytes =
3294 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3295 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3296
3297 /* Variable sized entities are always passed/returned in memory. */
3298 if (bytes < 0)
3299 return 0;
3300
3301 if (mode != VOIDmode
3302 && targetm.calls.must_pass_in_stack (mode, type))
3303 return 0;
3304
3305 if (type && AGGREGATE_TYPE_P (type))
3306 {
3307 int i;
3308 tree field;
3309 enum x86_64_reg_class subclasses[MAX_CLASSES];
3310
3311 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3312 if (bytes > 16)
3313 return 0;
3314
3315 for (i = 0; i < words; i++)
3316 classes[i] = X86_64_NO_CLASS;
3317
3318 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3319 signalize memory class, so handle it as special case. */
3320 if (!words)
3321 {
3322 classes[0] = X86_64_NO_CLASS;
3323 return 1;
3324 }
3325
3326 /* Classify each field of record and merge classes. */
3327 switch (TREE_CODE (type))
3328 {
3329 case RECORD_TYPE:
3330 /* And now merge the fields of structure. */
3331 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3332 {
3333 if (TREE_CODE (field) == FIELD_DECL)
3334 {
3335 int num;
3336
3337 if (TREE_TYPE (field) == error_mark_node)
3338 continue;
3339
3340 /* Bitfields are always classified as integer. Handle them
3341 early, since later code would consider them to be
3342 misaligned integers. */
3343 if (DECL_BIT_FIELD (field))
3344 {
3345 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3346 i < ((int_bit_position (field) + (bit_offset % 64))
3347 + tree_low_cst (DECL_SIZE (field), 0)
3348 + 63) / 8 / 8; i++)
3349 classes[i] =
3350 merge_classes (X86_64_INTEGER_CLASS,
3351 classes[i]);
3352 }
3353 else
3354 {
3355 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3356 TREE_TYPE (field), subclasses,
3357 (int_bit_position (field)
3358 + bit_offset) % 256);
3359 if (!num)
3360 return 0;
3361 for (i = 0; i < num; i++)
3362 {
3363 int pos =
3364 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3365 classes[i + pos] =
3366 merge_classes (subclasses[i], classes[i + pos]);
3367 }
3368 }
3369 }
3370 }
3371 break;
3372
3373 case ARRAY_TYPE:
3374 /* Arrays are handled as small records. */
3375 {
3376 int num;
3377 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3378 TREE_TYPE (type), subclasses, bit_offset);
3379 if (!num)
3380 return 0;
3381
3382 /* The partial classes are now full classes. */
3383 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3384 subclasses[0] = X86_64_SSE_CLASS;
3385 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3386 subclasses[0] = X86_64_INTEGER_CLASS;
3387
3388 for (i = 0; i < words; i++)
3389 classes[i] = subclasses[i % num];
3390
3391 break;
3392 }
3393 case UNION_TYPE:
3394 case QUAL_UNION_TYPE:
3395 /* Unions are similar to RECORD_TYPE but offset is always 0.
3396 */
3397 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3398 {
3399 if (TREE_CODE (field) == FIELD_DECL)
3400 {
3401 int num;
3402
3403 if (TREE_TYPE (field) == error_mark_node)
3404 continue;
3405
3406 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3407 TREE_TYPE (field), subclasses,
3408 bit_offset);
3409 if (!num)
3410 return 0;
3411 for (i = 0; i < num; i++)
3412 classes[i] = merge_classes (subclasses[i], classes[i]);
3413 }
3414 }
3415 break;
3416
3417 default:
3418 gcc_unreachable ();
3419 }
3420
3421 /* Final merger cleanup. */
3422 for (i = 0; i < words; i++)
3423 {
3424 /* If one class is MEMORY, everything should be passed in
3425 memory. */
3426 if (classes[i] == X86_64_MEMORY_CLASS)
3427 return 0;
3428
3429 /* The X86_64_SSEUP_CLASS should be always preceded by
3430 X86_64_SSE_CLASS. */
3431 if (classes[i] == X86_64_SSEUP_CLASS
3432 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3433 classes[i] = X86_64_SSE_CLASS;
3434
3435 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3436 if (classes[i] == X86_64_X87UP_CLASS
3437 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3438 classes[i] = X86_64_SSE_CLASS;
3439 }
3440 return words;
3441 }
3442
3443 /* Compute alignment needed. We align all types to natural boundaries with
3444 exception of XFmode that is aligned to 64bits. */
3445 if (mode != VOIDmode && mode != BLKmode)
3446 {
3447 int mode_alignment = GET_MODE_BITSIZE (mode);
3448
3449 if (mode == XFmode)
3450 mode_alignment = 128;
3451 else if (mode == XCmode)
3452 mode_alignment = 256;
3453 if (COMPLEX_MODE_P (mode))
3454 mode_alignment /= 2;
3455 /* Misaligned fields are always returned in memory. */
3456 if (bit_offset % mode_alignment)
3457 return 0;
3458 }
3459
3460 /* for V1xx modes, just use the base mode */
3461 if (VECTOR_MODE_P (mode)
3462 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3463 mode = GET_MODE_INNER (mode);
3464
3465 /* Classification of atomic types. */
3466 switch (mode)
3467 {
3468 case SDmode:
3469 case DDmode:
3470 classes[0] = X86_64_SSE_CLASS;
3471 return 1;
3472 case TDmode:
3473 classes[0] = X86_64_SSE_CLASS;
3474 classes[1] = X86_64_SSEUP_CLASS;
3475 return 2;
3476 case DImode:
3477 case SImode:
3478 case HImode:
3479 case QImode:
3480 case CSImode:
3481 case CHImode:
3482 case CQImode:
3483 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3484 classes[0] = X86_64_INTEGERSI_CLASS;
3485 else
3486 classes[0] = X86_64_INTEGER_CLASS;
3487 return 1;
3488 case CDImode:
3489 case TImode:
3490 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3491 return 2;
3492 case CTImode:
3493 return 0;
3494 case SFmode:
3495 if (!(bit_offset % 64))
3496 classes[0] = X86_64_SSESF_CLASS;
3497 else
3498 classes[0] = X86_64_SSE_CLASS;
3499 return 1;
3500 case DFmode:
3501 classes[0] = X86_64_SSEDF_CLASS;
3502 return 1;
3503 case XFmode:
3504 classes[0] = X86_64_X87_CLASS;
3505 classes[1] = X86_64_X87UP_CLASS;
3506 return 2;
3507 case TFmode:
3508 classes[0] = X86_64_SSE_CLASS;
3509 classes[1] = X86_64_SSEUP_CLASS;
3510 return 2;
3511 case SCmode:
3512 classes[0] = X86_64_SSE_CLASS;
3513 return 1;
3514 case DCmode:
3515 classes[0] = X86_64_SSEDF_CLASS;
3516 classes[1] = X86_64_SSEDF_CLASS;
3517 return 2;
3518 case XCmode:
3519 classes[0] = X86_64_COMPLEX_X87_CLASS;
3520 return 1;
3521 case TCmode:
3522 /* This modes is larger than 16 bytes. */
3523 return 0;
3524 case V4SFmode:
3525 case V4SImode:
3526 case V16QImode:
3527 case V8HImode:
3528 case V2DFmode:
3529 case V2DImode:
3530 classes[0] = X86_64_SSE_CLASS;
3531 classes[1] = X86_64_SSEUP_CLASS;
3532 return 2;
3533 case V2SFmode:
3534 case V2SImode:
3535 case V4HImode:
3536 case V8QImode:
3537 classes[0] = X86_64_SSE_CLASS;
3538 return 1;
3539 case BLKmode:
3540 case VOIDmode:
3541 return 0;
3542 default:
3543 gcc_assert (VECTOR_MODE_P (mode));
3544
3545 if (bytes > 16)
3546 return 0;
3547
3548 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3549
3550 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3551 classes[0] = X86_64_INTEGERSI_CLASS;
3552 else
3553 classes[0] = X86_64_INTEGER_CLASS;
3554 classes[1] = X86_64_INTEGER_CLASS;
3555 return 1 + (bytes > 8);
3556 }
3557 }
3558
3559 /* Examine the argument and return set number of register required in each
3560 class. Return 0 iff parameter should be passed in memory. */
3561 static int
3562 examine_argument (enum machine_mode mode, tree type, int in_return,
3563 int *int_nregs, int *sse_nregs)
3564 {
3565 enum x86_64_reg_class class[MAX_CLASSES];
3566 int n = classify_argument (mode, type, class, 0);
3567
3568 *int_nregs = 0;
3569 *sse_nregs = 0;
3570 if (!n)
3571 return 0;
3572 for (n--; n >= 0; n--)
3573 switch (class[n])
3574 {
3575 case X86_64_INTEGER_CLASS:
3576 case X86_64_INTEGERSI_CLASS:
3577 (*int_nregs)++;
3578 break;
3579 case X86_64_SSE_CLASS:
3580 case X86_64_SSESF_CLASS:
3581 case X86_64_SSEDF_CLASS:
3582 (*sse_nregs)++;
3583 break;
3584 case X86_64_NO_CLASS:
3585 case X86_64_SSEUP_CLASS:
3586 break;
3587 case X86_64_X87_CLASS:
3588 case X86_64_X87UP_CLASS:
3589 if (!in_return)
3590 return 0;
3591 break;
3592 case X86_64_COMPLEX_X87_CLASS:
3593 return in_return ? 2 : 0;
3594 case X86_64_MEMORY_CLASS:
3595 gcc_unreachable ();
3596 }
3597 return 1;
3598 }
3599
3600 /* Construct container for the argument used by GCC interface. See
3601 FUNCTION_ARG for the detailed description. */
3602
3603 static rtx
3604 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3605 tree type, int in_return, int nintregs, int nsseregs,
3606 const int *intreg, int sse_regno)
3607 {
3608 /* The following variables hold the static issued_error state. */
3609 static bool issued_sse_arg_error;
3610 static bool issued_sse_ret_error;
3611 static bool issued_x87_ret_error;
3612
3613 enum machine_mode tmpmode;
3614 int bytes =
3615 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3616 enum x86_64_reg_class class[MAX_CLASSES];
3617 int n;
3618 int i;
3619 int nexps = 0;
3620 int needed_sseregs, needed_intregs;
3621 rtx exp[MAX_CLASSES];
3622 rtx ret;
3623
3624 n = classify_argument (mode, type, class, 0);
3625 if (!n)
3626 return NULL;
3627 if (!examine_argument (mode, type, in_return, &needed_intregs,
3628 &needed_sseregs))
3629 return NULL;
3630 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3631 return NULL;
3632
3633 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3634 some less clueful developer tries to use floating-point anyway. */
3635 if (needed_sseregs && !TARGET_SSE)
3636 {
3637 if (in_return)
3638 {
3639 if (!issued_sse_ret_error)
3640 {
3641 error ("SSE register return with SSE disabled");
3642 issued_sse_ret_error = true;
3643 }
3644 }
3645 else if (!issued_sse_arg_error)
3646 {
3647 error ("SSE register argument with SSE disabled");
3648 issued_sse_arg_error = true;
3649 }
3650 return NULL;
3651 }
3652
3653 /* Likewise, error if the ABI requires us to return values in the
3654 x87 registers and the user specified -mno-80387. */
3655 if (!TARGET_80387 && in_return)
3656 for (i = 0; i < n; i++)
3657 if (class[i] == X86_64_X87_CLASS
3658 || class[i] == X86_64_X87UP_CLASS
3659 || class[i] == X86_64_COMPLEX_X87_CLASS)
3660 {
3661 if (!issued_x87_ret_error)
3662 {
3663 error ("x87 register return with x87 disabled");
3664 issued_x87_ret_error = true;
3665 }
3666 return NULL;
3667 }
3668
3669 /* First construct simple cases. Avoid SCmode, since we want to use
3670 single register to pass this type. */
3671 if (n == 1 && mode != SCmode)
3672 switch (class[0])
3673 {
3674 case X86_64_INTEGER_CLASS:
3675 case X86_64_INTEGERSI_CLASS:
3676 return gen_rtx_REG (mode, intreg[0]);
3677 case X86_64_SSE_CLASS:
3678 case X86_64_SSESF_CLASS:
3679 case X86_64_SSEDF_CLASS:
3680 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3681 case X86_64_X87_CLASS:
3682 case X86_64_COMPLEX_X87_CLASS:
3683 return gen_rtx_REG (mode, FIRST_STACK_REG);
3684 case X86_64_NO_CLASS:
3685 /* Zero sized array, struct or class. */
3686 return NULL;
3687 default:
3688 gcc_unreachable ();
3689 }
3690 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3691 && mode != BLKmode)
3692 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3693
3694 if (n == 2
3695 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3696 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3697 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3698 && class[1] == X86_64_INTEGER_CLASS
3699 && (mode == CDImode || mode == TImode || mode == TFmode)
3700 && intreg[0] + 1 == intreg[1])
3701 return gen_rtx_REG (mode, intreg[0]);
3702
3703 /* Otherwise figure out the entries of the PARALLEL. */
3704 for (i = 0; i < n; i++)
3705 {
3706 switch (class[i])
3707 {
3708 case X86_64_NO_CLASS:
3709 break;
3710 case X86_64_INTEGER_CLASS:
3711 case X86_64_INTEGERSI_CLASS:
3712 /* Merge TImodes on aligned occasions here too. */
3713 if (i * 8 + 8 > bytes)
3714 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3715 else if (class[i] == X86_64_INTEGERSI_CLASS)
3716 tmpmode = SImode;
3717 else
3718 tmpmode = DImode;
3719 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3720 if (tmpmode == BLKmode)
3721 tmpmode = DImode;
3722 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3723 gen_rtx_REG (tmpmode, *intreg),
3724 GEN_INT (i*8));
3725 intreg++;
3726 break;
3727 case X86_64_SSESF_CLASS:
3728 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3729 gen_rtx_REG (SFmode,
3730 SSE_REGNO (sse_regno)),
3731 GEN_INT (i*8));
3732 sse_regno++;
3733 break;
3734 case X86_64_SSEDF_CLASS:
3735 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3736 gen_rtx_REG (DFmode,
3737 SSE_REGNO (sse_regno)),
3738 GEN_INT (i*8));
3739 sse_regno++;
3740 break;
3741 case X86_64_SSE_CLASS:
3742 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3743 tmpmode = TImode;
3744 else
3745 tmpmode = DImode;
3746 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3747 gen_rtx_REG (tmpmode,
3748 SSE_REGNO (sse_regno)),
3749 GEN_INT (i*8));
3750 if (tmpmode == TImode)
3751 i++;
3752 sse_regno++;
3753 break;
3754 default:
3755 gcc_unreachable ();
3756 }
3757 }
3758
3759 /* Empty aligned struct, union or class. */
3760 if (nexps == 0)
3761 return NULL;
3762
3763 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3764 for (i = 0; i < nexps; i++)
3765 XVECEXP (ret, 0, i) = exp [i];
3766 return ret;
3767 }
3768
3769 /* Update the data in CUM to advance over an argument of mode MODE
3770 and data type TYPE. (TYPE is null for libcalls where that information
3771 may not be available.) */
3772
3773 static void
3774 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3775 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3776 {
3777 switch (mode)
3778 {
3779 default:
3780 break;
3781
3782 case BLKmode:
3783 if (bytes < 0)
3784 break;
3785 /* FALLTHRU */
3786
3787 case DImode:
3788 case SImode:
3789 case HImode:
3790 case QImode:
3791 cum->words += words;
3792 cum->nregs -= words;
3793 cum->regno += words;
3794
3795 if (cum->nregs <= 0)
3796 {
3797 cum->nregs = 0;
3798 cum->regno = 0;
3799 }
3800 break;
3801
3802 case DFmode:
3803 if (cum->float_in_sse < 2)
3804 break;
3805 case SFmode:
3806 if (cum->float_in_sse < 1)
3807 break;
3808 /* FALLTHRU */
3809
3810 case TImode:
3811 case V16QImode:
3812 case V8HImode:
3813 case V4SImode:
3814 case V2DImode:
3815 case V4SFmode:
3816 case V2DFmode:
3817 if (!type || !AGGREGATE_TYPE_P (type))
3818 {
3819 cum->sse_words += words;
3820 cum->sse_nregs -= 1;
3821 cum->sse_regno += 1;
3822 if (cum->sse_nregs <= 0)
3823 {
3824 cum->sse_nregs = 0;
3825 cum->sse_regno = 0;
3826 }
3827 }
3828 break;
3829
3830 case V8QImode:
3831 case V4HImode:
3832 case V2SImode:
3833 case V2SFmode:
3834 if (!type || !AGGREGATE_TYPE_P (type))
3835 {
3836 cum->mmx_words += words;
3837 cum->mmx_nregs -= 1;
3838 cum->mmx_regno += 1;
3839 if (cum->mmx_nregs <= 0)
3840 {
3841 cum->mmx_nregs = 0;
3842 cum->mmx_regno = 0;
3843 }
3844 }
3845 break;
3846 }
3847 }
3848
3849 static void
3850 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3851 tree type, HOST_WIDE_INT words)
3852 {
3853 int int_nregs, sse_nregs;
3854
3855 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3856 cum->words += words;
3857 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3858 {
3859 cum->nregs -= int_nregs;
3860 cum->sse_nregs -= sse_nregs;
3861 cum->regno += int_nregs;
3862 cum->sse_regno += sse_nregs;
3863 }
3864 else
3865 cum->words += words;
3866 }
3867
3868 static void
3869 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3870 HOST_WIDE_INT words)
3871 {
3872 /* Otherwise, this should be passed indirect. */
3873 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3874
3875 cum->words += words;
3876 if (cum->nregs > 0)
3877 {
3878 cum->nregs -= 1;
3879 cum->regno += 1;
3880 }
3881 }
3882
3883 void
3884 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3885 tree type, int named ATTRIBUTE_UNUSED)
3886 {
3887 HOST_WIDE_INT bytes, words;
3888
3889 if (mode == BLKmode)
3890 bytes = int_size_in_bytes (type);
3891 else
3892 bytes = GET_MODE_SIZE (mode);
3893 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3894
3895 if (type)
3896 mode = type_natural_mode (type);
3897
3898 if (TARGET_64BIT_MS_ABI)
3899 function_arg_advance_ms_64 (cum, bytes, words);
3900 else if (TARGET_64BIT)
3901 function_arg_advance_64 (cum, mode, type, words);
3902 else
3903 function_arg_advance_32 (cum, mode, type, bytes, words);
3904 }
3905
3906 /* Define where to put the arguments to a function.
3907 Value is zero to push the argument on the stack,
3908 or a hard register in which to store the argument.
3909
3910 MODE is the argument's machine mode.
3911 TYPE is the data type of the argument (as a tree).
3912 This is null for libcalls where that information may
3913 not be available.
3914 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3915 the preceding args and about the function being called.
3916 NAMED is nonzero if this argument is a named parameter
3917 (otherwise it is an extra parameter matching an ellipsis). */
3918
3919 static rtx
3920 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3921 enum machine_mode orig_mode, tree type,
3922 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3923 {
3924 static bool warnedsse, warnedmmx;
3925
3926 /* Avoid the AL settings for the Unix64 ABI. */
3927 if (mode == VOIDmode)
3928 return constm1_rtx;
3929
3930 switch (mode)
3931 {
3932 default:
3933 break;
3934
3935 case BLKmode:
3936 if (bytes < 0)
3937 break;
3938 /* FALLTHRU */
3939 case DImode:
3940 case SImode:
3941 case HImode:
3942 case QImode:
3943 if (words <= cum->nregs)
3944 {
3945 int regno = cum->regno;
3946
3947 /* Fastcall allocates the first two DWORD (SImode) or
3948 smaller arguments to ECX and EDX. */
3949 if (cum->fastcall)
3950 {
3951 if (mode == BLKmode || mode == DImode)
3952 break;
3953
3954 /* ECX not EAX is the first allocated register. */
3955 if (regno == 0)
3956 regno = 2;
3957 }
3958 return gen_rtx_REG (mode, regno);
3959 }
3960 break;
3961
3962 case DFmode:
3963 if (cum->float_in_sse < 2)
3964 break;
3965 case SFmode:
3966 if (cum->float_in_sse < 1)
3967 break;
3968 /* FALLTHRU */
3969 case TImode:
3970 case V16QImode:
3971 case V8HImode:
3972 case V4SImode:
3973 case V2DImode:
3974 case V4SFmode:
3975 case V2DFmode:
3976 if (!type || !AGGREGATE_TYPE_P (type))
3977 {
3978 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3979 {
3980 warnedsse = true;
3981 warning (0, "SSE vector argument without SSE enabled "
3982 "changes the ABI");
3983 }
3984 if (cum->sse_nregs)
3985 return gen_reg_or_parallel (mode, orig_mode,
3986 cum->sse_regno + FIRST_SSE_REG);
3987 }
3988 break;
3989
3990 case V8QImode:
3991 case V4HImode:
3992 case V2SImode:
3993 case V2SFmode:
3994 if (!type || !AGGREGATE_TYPE_P (type))
3995 {
3996 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3997 {
3998 warnedmmx = true;
3999 warning (0, "MMX vector argument without MMX enabled "
4000 "changes the ABI");
4001 }
4002 if (cum->mmx_nregs)
4003 return gen_reg_or_parallel (mode, orig_mode,
4004 cum->mmx_regno + FIRST_MMX_REG);
4005 }
4006 break;
4007 }
4008
4009 return NULL_RTX;
4010 }
4011
4012 static rtx
4013 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4014 enum machine_mode orig_mode, tree type)
4015 {
4016 /* Handle a hidden AL argument containing number of registers
4017 for varargs x86-64 functions. */
4018 if (mode == VOIDmode)
4019 return GEN_INT (cum->maybe_vaarg
4020 ? (cum->sse_nregs < 0
4021 ? SSE_REGPARM_MAX
4022 : cum->sse_regno)
4023 : -1);
4024
4025 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4026 cum->sse_nregs,
4027 &x86_64_int_parameter_registers [cum->regno],
4028 cum->sse_regno);
4029 }
4030
4031 static rtx
4032 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4033 enum machine_mode orig_mode, int named)
4034 {
4035 unsigned int regno;
4036
4037 /* Avoid the AL settings for the Unix64 ABI. */
4038 if (mode == VOIDmode)
4039 return constm1_rtx;
4040
4041 /* If we've run out of registers, it goes on the stack. */
4042 if (cum->nregs == 0)
4043 return NULL_RTX;
4044
4045 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4046
4047 /* Only floating point modes are passed in anything but integer regs. */
4048 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4049 {
4050 if (named)
4051 regno = cum->regno + FIRST_SSE_REG;
4052 else
4053 {
4054 rtx t1, t2;
4055
4056 /* Unnamed floating parameters are passed in both the
4057 SSE and integer registers. */
4058 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4059 t2 = gen_rtx_REG (mode, regno);
4060 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4061 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4062 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4063 }
4064 }
4065
4066 return gen_reg_or_parallel (mode, orig_mode, regno);
4067 }
4068
4069 rtx
4070 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4071 tree type, int named)
4072 {
4073 enum machine_mode mode = omode;
4074 HOST_WIDE_INT bytes, words;
4075
4076 if (mode == BLKmode)
4077 bytes = int_size_in_bytes (type);
4078 else
4079 bytes = GET_MODE_SIZE (mode);
4080 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4081
4082 /* To simplify the code below, represent vector types with a vector mode
4083 even if MMX/SSE are not active. */
4084 if (type && TREE_CODE (type) == VECTOR_TYPE)
4085 mode = type_natural_mode (type);
4086
4087 if (TARGET_64BIT_MS_ABI)
4088 return function_arg_ms_64 (cum, mode, omode, named);
4089 else if (TARGET_64BIT)
4090 return function_arg_64 (cum, mode, omode, type);
4091 else
4092 return function_arg_32 (cum, mode, omode, type, bytes, words);
4093 }
4094
4095 /* A C expression that indicates when an argument must be passed by
4096 reference. If nonzero for an argument, a copy of that argument is
4097 made in memory and a pointer to the argument is passed instead of
4098 the argument itself. The pointer is passed in whatever way is
4099 appropriate for passing a pointer to that type. */
4100
4101 static bool
4102 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4103 enum machine_mode mode ATTRIBUTE_UNUSED,
4104 tree type, bool named ATTRIBUTE_UNUSED)
4105 {
4106 if (TARGET_64BIT_MS_ABI)
4107 {
4108 if (type)
4109 {
4110 /* Arrays are passed by reference. */
4111 if (TREE_CODE (type) == ARRAY_TYPE)
4112 return true;
4113
4114 if (AGGREGATE_TYPE_P (type))
4115 {
4116 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4117 are passed by reference. */
4118 int el2 = exact_log2 (int_size_in_bytes (type));
4119 return !(el2 >= 0 && el2 <= 3);
4120 }
4121 }
4122
4123 /* __m128 is passed by reference. */
4124 /* ??? How to handle complex? For now treat them as structs,
4125 and pass them by reference if they're too large. */
4126 if (GET_MODE_SIZE (mode) > 8)
4127 return true;
4128 }
4129 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4130 return 1;
4131
4132 return 0;
4133 }
4134
4135 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4136 ABI. Only called if TARGET_SSE. */
4137 static bool
4138 contains_128bit_aligned_vector_p (tree type)
4139 {
4140 enum machine_mode mode = TYPE_MODE (type);
4141 if (SSE_REG_MODE_P (mode)
4142 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4143 return true;
4144 if (TYPE_ALIGN (type) < 128)
4145 return false;
4146
4147 if (AGGREGATE_TYPE_P (type))
4148 {
4149 /* Walk the aggregates recursively. */
4150 switch (TREE_CODE (type))
4151 {
4152 case RECORD_TYPE:
4153 case UNION_TYPE:
4154 case QUAL_UNION_TYPE:
4155 {
4156 tree field;
4157
4158 /* Walk all the structure fields. */
4159 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4160 {
4161 if (TREE_CODE (field) == FIELD_DECL
4162 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4163 return true;
4164 }
4165 break;
4166 }
4167
4168 case ARRAY_TYPE:
4169 /* Just for use if some languages passes arrays by value. */
4170 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4171 return true;
4172 break;
4173
4174 default:
4175 gcc_unreachable ();
4176 }
4177 }
4178 return false;
4179 }
4180
4181 /* Gives the alignment boundary, in bits, of an argument with the
4182 specified mode and type. */
4183
4184 int
4185 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4186 {
4187 int align;
4188 if (type)
4189 align = TYPE_ALIGN (type);
4190 else
4191 align = GET_MODE_ALIGNMENT (mode);
4192 if (align < PARM_BOUNDARY)
4193 align = PARM_BOUNDARY;
4194 if (!TARGET_64BIT)
4195 {
4196 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4197 make an exception for SSE modes since these require 128bit
4198 alignment.
4199
4200 The handling here differs from field_alignment. ICC aligns MMX
4201 arguments to 4 byte boundaries, while structure fields are aligned
4202 to 8 byte boundaries. */
4203 if (!TARGET_SSE)
4204 align = PARM_BOUNDARY;
4205 else if (!type)
4206 {
4207 if (!SSE_REG_MODE_P (mode))
4208 align = PARM_BOUNDARY;
4209 }
4210 else
4211 {
4212 if (!contains_128bit_aligned_vector_p (type))
4213 align = PARM_BOUNDARY;
4214 }
4215 }
4216 if (align > 128)
4217 align = 128;
4218 return align;
4219 }
4220
4221 /* Return true if N is a possible register number of function value. */
4222
4223 bool
4224 ix86_function_value_regno_p (int regno)
4225 {
4226 switch (regno)
4227 {
4228 case 0:
4229 return true;
4230
4231 case FIRST_FLOAT_REG:
4232 if (TARGET_64BIT_MS_ABI)
4233 return false;
4234 return TARGET_FLOAT_RETURNS_IN_80387;
4235
4236 case FIRST_SSE_REG:
4237 return TARGET_SSE;
4238
4239 case FIRST_MMX_REG:
4240 if (TARGET_MACHO || TARGET_64BIT)
4241 return false;
4242 return TARGET_MMX;
4243 }
4244
4245 return false;
4246 }
4247
4248 /* Define how to find the value returned by a function.
4249 VALTYPE is the data type of the value (as a tree).
4250 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4251 otherwise, FUNC is 0. */
4252
4253 static rtx
4254 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4255 tree fntype, tree fn)
4256 {
4257 unsigned int regno;
4258
4259 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4260 we normally prevent this case when mmx is not available. However
4261 some ABIs may require the result to be returned like DImode. */
4262 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4263 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4264
4265 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4266 we prevent this case when sse is not available. However some ABIs
4267 may require the result to be returned like integer TImode. */
4268 else if (mode == TImode
4269 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4270 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4271
4272 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4273 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4274 regno = FIRST_FLOAT_REG;
4275 else
4276 /* Most things go in %eax. */
4277 regno = 0;
4278
4279 /* Override FP return register with %xmm0 for local functions when
4280 SSE math is enabled or for functions with sseregparm attribute. */
4281 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4282 {
4283 int sse_level = ix86_function_sseregparm (fntype, fn);
4284 if ((sse_level >= 1 && mode == SFmode)
4285 || (sse_level == 2 && mode == DFmode))
4286 regno = FIRST_SSE_REG;
4287 }
4288
4289 return gen_rtx_REG (orig_mode, regno);
4290 }
4291
4292 static rtx
4293 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4294 tree valtype)
4295 {
4296 rtx ret;
4297
4298 /* Handle libcalls, which don't provide a type node. */
4299 if (valtype == NULL)
4300 {
4301 switch (mode)
4302 {
4303 case SFmode:
4304 case SCmode:
4305 case DFmode:
4306 case DCmode:
4307 case TFmode:
4308 case SDmode:
4309 case DDmode:
4310 case TDmode:
4311 return gen_rtx_REG (mode, FIRST_SSE_REG);
4312 case XFmode:
4313 case XCmode:
4314 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4315 case TCmode:
4316 return NULL;
4317 default:
4318 return gen_rtx_REG (mode, 0);
4319 }
4320 }
4321
4322 ret = construct_container (mode, orig_mode, valtype, 1,
4323 REGPARM_MAX, SSE_REGPARM_MAX,
4324 x86_64_int_return_registers, 0);
4325
4326 /* For zero sized structures, construct_container returns NULL, but we
4327 need to keep rest of compiler happy by returning meaningful value. */
4328 if (!ret)
4329 ret = gen_rtx_REG (orig_mode, 0);
4330
4331 return ret;
4332 }
4333
4334 static rtx
4335 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4336 {
4337 unsigned int regno = 0;
4338
4339 if (TARGET_SSE)
4340 {
4341 if (mode == SFmode || mode == DFmode)
4342 regno = FIRST_SSE_REG;
4343 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4344 regno = FIRST_SSE_REG;
4345 }
4346
4347 return gen_rtx_REG (orig_mode, regno);
4348 }
4349
4350 static rtx
4351 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4352 enum machine_mode orig_mode, enum machine_mode mode)
4353 {
4354 tree fn, fntype;
4355
4356 fn = NULL_TREE;
4357 if (fntype_or_decl && DECL_P (fntype_or_decl))
4358 fn = fntype_or_decl;
4359 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4360
4361 if (TARGET_64BIT_MS_ABI)
4362 return function_value_ms_64 (orig_mode, mode);
4363 else if (TARGET_64BIT)
4364 return function_value_64 (orig_mode, mode, valtype);
4365 else
4366 return function_value_32 (orig_mode, mode, fntype, fn);
4367 }
4368
4369 static rtx
4370 ix86_function_value (tree valtype, tree fntype_or_decl,
4371 bool outgoing ATTRIBUTE_UNUSED)
4372 {
4373 enum machine_mode mode, orig_mode;
4374
4375 orig_mode = TYPE_MODE (valtype);
4376 mode = type_natural_mode (valtype);
4377 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4378 }
4379
4380 rtx
4381 ix86_libcall_value (enum machine_mode mode)
4382 {
4383 return ix86_function_value_1 (NULL, NULL, mode, mode);
4384 }
4385
4386 /* Return true iff type is returned in memory. */
4387
4388 static int
4389 return_in_memory_32 (tree type, enum machine_mode mode)
4390 {
4391 HOST_WIDE_INT size;
4392
4393 if (mode == BLKmode)
4394 return 1;
4395
4396 size = int_size_in_bytes (type);
4397
4398 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4399 return 0;
4400
4401 if (VECTOR_MODE_P (mode) || mode == TImode)
4402 {
4403 /* User-created vectors small enough to fit in EAX. */
4404 if (size < 8)
4405 return 0;
4406
4407 /* MMX/3dNow values are returned in MM0,
4408 except when it doesn't exits. */
4409 if (size == 8)
4410 return (TARGET_MMX ? 0 : 1);
4411
4412 /* SSE values are returned in XMM0, except when it doesn't exist. */
4413 if (size == 16)
4414 return (TARGET_SSE ? 0 : 1);
4415 }
4416
4417 if (mode == XFmode)
4418 return 0;
4419
4420 if (mode == TDmode)
4421 return 1;
4422
4423 if (size > 12)
4424 return 1;
4425 return 0;
4426 }
4427
4428 static int
4429 return_in_memory_64 (tree type, enum machine_mode mode)
4430 {
4431 int needed_intregs, needed_sseregs;
4432 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4433 }
4434
4435 static int
4436 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4437 {
4438 HOST_WIDE_INT size = int_size_in_bytes (type);
4439
4440 /* __m128 and friends are returned in xmm0. */
4441 if (size == 16 && VECTOR_MODE_P (mode))
4442 return 0;
4443
4444 /* Otherwise, the size must be exactly in [1248]. */
4445 return (size != 1 && size != 2 && size != 4 && size != 8);
4446 }
4447
4448 int
4449 ix86_return_in_memory (tree type)
4450 {
4451 enum machine_mode mode = type_natural_mode (type);
4452
4453 if (TARGET_64BIT_MS_ABI)
4454 return return_in_memory_ms_64 (type, mode);
4455 else if (TARGET_64BIT)
4456 return return_in_memory_64 (type, mode);
4457 else
4458 return return_in_memory_32 (type, mode);
4459 }
4460
4461 /* Return false iff TYPE is returned in memory. This version is used
4462 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4463 but differs notably in that when MMX is available, 8-byte vectors
4464 are returned in memory, rather than in MMX registers. */
4465
4466 int
4467 ix86_sol10_return_in_memory (tree type)
4468 {
4469 int size;
4470 enum machine_mode mode = type_natural_mode (type);
4471
4472 if (TARGET_64BIT)
4473 return return_in_memory_64 (type, mode);
4474
4475 if (mode == BLKmode)
4476 return 1;
4477
4478 size = int_size_in_bytes (type);
4479
4480 if (VECTOR_MODE_P (mode))
4481 {
4482 /* Return in memory only if MMX registers *are* available. This
4483 seems backwards, but it is consistent with the existing
4484 Solaris x86 ABI. */
4485 if (size == 8)
4486 return TARGET_MMX;
4487 if (size == 16)
4488 return !TARGET_SSE;
4489 }
4490 else if (mode == TImode)
4491 return !TARGET_SSE;
4492 else if (mode == XFmode)
4493 return 0;
4494
4495 return size > 12;
4496 }
4497
4498 /* When returning SSE vector types, we have a choice of either
4499 (1) being abi incompatible with a -march switch, or
4500 (2) generating an error.
4501 Given no good solution, I think the safest thing is one warning.
4502 The user won't be able to use -Werror, but....
4503
4504 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4505 called in response to actually generating a caller or callee that
4506 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4507 via aggregate_value_p for general type probing from tree-ssa. */
4508
4509 static rtx
4510 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4511 {
4512 static bool warnedsse, warnedmmx;
4513
4514 if (!TARGET_64BIT && type)
4515 {
4516 /* Look at the return type of the function, not the function type. */
4517 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4518
4519 if (!TARGET_SSE && !warnedsse)
4520 {
4521 if (mode == TImode
4522 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4523 {
4524 warnedsse = true;
4525 warning (0, "SSE vector return without SSE enabled "
4526 "changes the ABI");
4527 }
4528 }
4529
4530 if (!TARGET_MMX && !warnedmmx)
4531 {
4532 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4533 {
4534 warnedmmx = true;
4535 warning (0, "MMX vector return without MMX enabled "
4536 "changes the ABI");
4537 }
4538 }
4539 }
4540
4541 return NULL;
4542 }
4543
4544 \f
4545 /* Create the va_list data type. */
4546
4547 static tree
4548 ix86_build_builtin_va_list (void)
4549 {
4550 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4551
4552 /* For i386 we use plain pointer to argument area. */
4553 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4554 return build_pointer_type (char_type_node);
4555
4556 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4557 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4558
4559 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4560 unsigned_type_node);
4561 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4562 unsigned_type_node);
4563 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4564 ptr_type_node);
4565 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4566 ptr_type_node);
4567
4568 va_list_gpr_counter_field = f_gpr;
4569 va_list_fpr_counter_field = f_fpr;
4570
4571 DECL_FIELD_CONTEXT (f_gpr) = record;
4572 DECL_FIELD_CONTEXT (f_fpr) = record;
4573 DECL_FIELD_CONTEXT (f_ovf) = record;
4574 DECL_FIELD_CONTEXT (f_sav) = record;
4575
4576 TREE_CHAIN (record) = type_decl;
4577 TYPE_NAME (record) = type_decl;
4578 TYPE_FIELDS (record) = f_gpr;
4579 TREE_CHAIN (f_gpr) = f_fpr;
4580 TREE_CHAIN (f_fpr) = f_ovf;
4581 TREE_CHAIN (f_ovf) = f_sav;
4582
4583 layout_type (record);
4584
4585 /* The correct type is an array type of one element. */
4586 return build_array_type (record, build_index_type (size_zero_node));
4587 }
4588
4589 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4590
4591 static void
4592 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4593 {
4594 rtx save_area, mem;
4595 rtx label;
4596 rtx label_ref;
4597 rtx tmp_reg;
4598 rtx nsse_reg;
4599 int set;
4600 int i;
4601
4602 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4603 return;
4604
4605 /* Indicate to allocate space on the stack for varargs save area. */
4606 ix86_save_varrargs_registers = 1;
4607 cfun->stack_alignment_needed = 128;
4608
4609 save_area = frame_pointer_rtx;
4610 set = get_varargs_alias_set ();
4611
4612 for (i = cum->regno;
4613 i < ix86_regparm
4614 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4615 i++)
4616 {
4617 mem = gen_rtx_MEM (Pmode,
4618 plus_constant (save_area, i * UNITS_PER_WORD));
4619 MEM_NOTRAP_P (mem) = 1;
4620 set_mem_alias_set (mem, set);
4621 emit_move_insn (mem, gen_rtx_REG (Pmode,
4622 x86_64_int_parameter_registers[i]));
4623 }
4624
4625 if (cum->sse_nregs && cfun->va_list_fpr_size)
4626 {
4627 /* Now emit code to save SSE registers. The AX parameter contains number
4628 of SSE parameter registers used to call this function. We use
4629 sse_prologue_save insn template that produces computed jump across
4630 SSE saves. We need some preparation work to get this working. */
4631
4632 label = gen_label_rtx ();
4633 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4634
4635 /* Compute address to jump to :
4636 label - 5*eax + nnamed_sse_arguments*5 */
4637 tmp_reg = gen_reg_rtx (Pmode);
4638 nsse_reg = gen_reg_rtx (Pmode);
4639 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4640 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4641 gen_rtx_MULT (Pmode, nsse_reg,
4642 GEN_INT (4))));
4643 if (cum->sse_regno)
4644 emit_move_insn
4645 (nsse_reg,
4646 gen_rtx_CONST (DImode,
4647 gen_rtx_PLUS (DImode,
4648 label_ref,
4649 GEN_INT (cum->sse_regno * 4))));
4650 else
4651 emit_move_insn (nsse_reg, label_ref);
4652 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4653
4654 /* Compute address of memory block we save into. We always use pointer
4655 pointing 127 bytes after first byte to store - this is needed to keep
4656 instruction size limited by 4 bytes. */
4657 tmp_reg = gen_reg_rtx (Pmode);
4658 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4659 plus_constant (save_area,
4660 8 * REGPARM_MAX + 127)));
4661 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4662 MEM_NOTRAP_P (mem) = 1;
4663 set_mem_alias_set (mem, set);
4664 set_mem_align (mem, BITS_PER_WORD);
4665
4666 /* And finally do the dirty job! */
4667 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4668 GEN_INT (cum->sse_regno), label));
4669 }
4670 }
4671
4672 static void
4673 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4674 {
4675 int set = get_varargs_alias_set ();
4676 int i;
4677
4678 for (i = cum->regno; i < REGPARM_MAX; i++)
4679 {
4680 rtx reg, mem;
4681
4682 mem = gen_rtx_MEM (Pmode,
4683 plus_constant (virtual_incoming_args_rtx,
4684 i * UNITS_PER_WORD));
4685 MEM_NOTRAP_P (mem) = 1;
4686 set_mem_alias_set (mem, set);
4687
4688 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4689 emit_move_insn (mem, reg);
4690 }
4691 }
4692
4693 static void
4694 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4695 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4696 int no_rtl)
4697 {
4698 CUMULATIVE_ARGS next_cum;
4699 tree fntype;
4700 int stdarg_p;
4701
4702 /* This argument doesn't appear to be used anymore. Which is good,
4703 because the old code here didn't suppress rtl generation. */
4704 gcc_assert (!no_rtl);
4705
4706 if (!TARGET_64BIT)
4707 return;
4708
4709 fntype = TREE_TYPE (current_function_decl);
4710 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4711 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4712 != void_type_node));
4713
4714 /* For varargs, we do not want to skip the dummy va_dcl argument.
4715 For stdargs, we do want to skip the last named argument. */
4716 next_cum = *cum;
4717 if (stdarg_p)
4718 function_arg_advance (&next_cum, mode, type, 1);
4719
4720 if (TARGET_64BIT_MS_ABI)
4721 setup_incoming_varargs_ms_64 (&next_cum);
4722 else
4723 setup_incoming_varargs_64 (&next_cum);
4724 }
4725
4726 /* Implement va_start. */
4727
4728 void
4729 ix86_va_start (tree valist, rtx nextarg)
4730 {
4731 HOST_WIDE_INT words, n_gpr, n_fpr;
4732 tree f_gpr, f_fpr, f_ovf, f_sav;
4733 tree gpr, fpr, ovf, sav, t;
4734 tree type;
4735
4736 /* Only 64bit target needs something special. */
4737 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4738 {
4739 std_expand_builtin_va_start (valist, nextarg);
4740 return;
4741 }
4742
4743 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4744 f_fpr = TREE_CHAIN (f_gpr);
4745 f_ovf = TREE_CHAIN (f_fpr);
4746 f_sav = TREE_CHAIN (f_ovf);
4747
4748 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4749 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4750 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4751 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4752 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4753
4754 /* Count number of gp and fp argument registers used. */
4755 words = current_function_args_info.words;
4756 n_gpr = current_function_args_info.regno;
4757 n_fpr = current_function_args_info.sse_regno;
4758
4759 if (cfun->va_list_gpr_size)
4760 {
4761 type = TREE_TYPE (gpr);
4762 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4763 build_int_cst (type, n_gpr * 8));
4764 TREE_SIDE_EFFECTS (t) = 1;
4765 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4766 }
4767
4768 if (cfun->va_list_fpr_size)
4769 {
4770 type = TREE_TYPE (fpr);
4771 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4772 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4773 TREE_SIDE_EFFECTS (t) = 1;
4774 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4775 }
4776
4777 /* Find the overflow area. */
4778 type = TREE_TYPE (ovf);
4779 t = make_tree (type, virtual_incoming_args_rtx);
4780 if (words != 0)
4781 t = build2 (PLUS_EXPR, type, t,
4782 build_int_cst (type, words * UNITS_PER_WORD));
4783 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4784 TREE_SIDE_EFFECTS (t) = 1;
4785 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4786
4787 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4788 {
4789 /* Find the register save area.
4790 Prologue of the function save it right above stack frame. */
4791 type = TREE_TYPE (sav);
4792 t = make_tree (type, frame_pointer_rtx);
4793 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4794 TREE_SIDE_EFFECTS (t) = 1;
4795 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4796 }
4797 }
4798
4799 /* Implement va_arg. */
4800
4801 static tree
4802 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4803 {
4804 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4805 tree f_gpr, f_fpr, f_ovf, f_sav;
4806 tree gpr, fpr, ovf, sav, t;
4807 int size, rsize;
4808 tree lab_false, lab_over = NULL_TREE;
4809 tree addr, t2;
4810 rtx container;
4811 int indirect_p = 0;
4812 tree ptrtype;
4813 enum machine_mode nat_mode;
4814
4815 /* Only 64bit target needs something special. */
4816 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4817 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4818
4819 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4820 f_fpr = TREE_CHAIN (f_gpr);
4821 f_ovf = TREE_CHAIN (f_fpr);
4822 f_sav = TREE_CHAIN (f_ovf);
4823
4824 valist = build_va_arg_indirect_ref (valist);
4825 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4826 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4827 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4828 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4829
4830 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4831 if (indirect_p)
4832 type = build_pointer_type (type);
4833 size = int_size_in_bytes (type);
4834 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4835
4836 nat_mode = type_natural_mode (type);
4837 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4838 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4839
4840 /* Pull the value out of the saved registers. */
4841
4842 addr = create_tmp_var (ptr_type_node, "addr");
4843 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4844
4845 if (container)
4846 {
4847 int needed_intregs, needed_sseregs;
4848 bool need_temp;
4849 tree int_addr, sse_addr;
4850
4851 lab_false = create_artificial_label ();
4852 lab_over = create_artificial_label ();
4853
4854 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4855
4856 need_temp = (!REG_P (container)
4857 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4858 || TYPE_ALIGN (type) > 128));
4859
4860 /* In case we are passing structure, verify that it is consecutive block
4861 on the register save area. If not we need to do moves. */
4862 if (!need_temp && !REG_P (container))
4863 {
4864 /* Verify that all registers are strictly consecutive */
4865 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4866 {
4867 int i;
4868
4869 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4870 {
4871 rtx slot = XVECEXP (container, 0, i);
4872 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4873 || INTVAL (XEXP (slot, 1)) != i * 16)
4874 need_temp = 1;
4875 }
4876 }
4877 else
4878 {
4879 int i;
4880
4881 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4882 {
4883 rtx slot = XVECEXP (container, 0, i);
4884 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4885 || INTVAL (XEXP (slot, 1)) != i * 8)
4886 need_temp = 1;
4887 }
4888 }
4889 }
4890 if (!need_temp)
4891 {
4892 int_addr = addr;
4893 sse_addr = addr;
4894 }
4895 else
4896 {
4897 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4898 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4899 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4900 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4901 }
4902
4903 /* First ensure that we fit completely in registers. */
4904 if (needed_intregs)
4905 {
4906 t = build_int_cst (TREE_TYPE (gpr),
4907 (REGPARM_MAX - needed_intregs + 1) * 8);
4908 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4909 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4910 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4911 gimplify_and_add (t, pre_p);
4912 }
4913 if (needed_sseregs)
4914 {
4915 t = build_int_cst (TREE_TYPE (fpr),
4916 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4917 + REGPARM_MAX * 8);
4918 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4919 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4920 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4921 gimplify_and_add (t, pre_p);
4922 }
4923
4924 /* Compute index to start of area used for integer regs. */
4925 if (needed_intregs)
4926 {
4927 /* int_addr = gpr + sav; */
4928 t = fold_convert (ptr_type_node, fold_convert (size_type_node, gpr));
4929 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4930 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4931 gimplify_and_add (t, pre_p);
4932 }
4933 if (needed_sseregs)
4934 {
4935 /* sse_addr = fpr + sav; */
4936 t = fold_convert (ptr_type_node, fold_convert (size_type_node, fpr));
4937 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4938 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4939 gimplify_and_add (t, pre_p);
4940 }
4941 if (need_temp)
4942 {
4943 int i;
4944 tree temp = create_tmp_var (type, "va_arg_tmp");
4945
4946 /* addr = &temp; */
4947 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4948 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4949 gimplify_and_add (t, pre_p);
4950
4951 for (i = 0; i < XVECLEN (container, 0); i++)
4952 {
4953 rtx slot = XVECEXP (container, 0, i);
4954 rtx reg = XEXP (slot, 0);
4955 enum machine_mode mode = GET_MODE (reg);
4956 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4957 tree addr_type = build_pointer_type (piece_type);
4958 tree src_addr, src;
4959 int src_offset;
4960 tree dest_addr, dest;
4961
4962 if (SSE_REGNO_P (REGNO (reg)))
4963 {
4964 src_addr = sse_addr;
4965 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4966 }
4967 else
4968 {
4969 src_addr = int_addr;
4970 src_offset = REGNO (reg) * 8;
4971 }
4972 src_addr = fold_convert (addr_type, src_addr);
4973 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4974 build_int_cst (addr_type, src_offset));
4975 src = build_va_arg_indirect_ref (src_addr);
4976
4977 dest_addr = fold_convert (addr_type, addr);
4978 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4979 build_int_cst (addr_type, INTVAL (XEXP (slot, 1))));
4980 dest = build_va_arg_indirect_ref (dest_addr);
4981
4982 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4983 gimplify_and_add (t, pre_p);
4984 }
4985 }
4986
4987 if (needed_intregs)
4988 {
4989 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4990 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4991 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4992 gimplify_and_add (t, pre_p);
4993 }
4994 if (needed_sseregs)
4995 {
4996 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4997 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4998 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4999 gimplify_and_add (t, pre_p);
5000 }
5001
5002 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5003 gimplify_and_add (t, pre_p);
5004
5005 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5006 append_to_statement_list (t, pre_p);
5007 }
5008
5009 /* ... otherwise out of the overflow area. */
5010
5011 /* Care for on-stack alignment if needed. */
5012 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5013 || integer_zerop (TYPE_SIZE (type)))
5014 t = ovf;
5015 else
5016 {
5017 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5018 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
5019 build_int_cst (TREE_TYPE (ovf), align - 1));
5020 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5021 build_int_cst (TREE_TYPE (t), -align));
5022 }
5023 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5024
5025 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5026 gimplify_and_add (t2, pre_p);
5027
5028 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
5029 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
5030 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5031 gimplify_and_add (t, pre_p);
5032
5033 if (container)
5034 {
5035 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5036 append_to_statement_list (t, pre_p);
5037 }
5038
5039 ptrtype = build_pointer_type (type);
5040 addr = fold_convert (ptrtype, addr);
5041
5042 if (indirect_p)
5043 addr = build_va_arg_indirect_ref (addr);
5044 return build_va_arg_indirect_ref (addr);
5045 }
5046 \f
5047 /* Return nonzero if OPNUM's MEM should be matched
5048 in movabs* patterns. */
5049
5050 int
5051 ix86_check_movabs (rtx insn, int opnum)
5052 {
5053 rtx set, mem;
5054
5055 set = PATTERN (insn);
5056 if (GET_CODE (set) == PARALLEL)
5057 set = XVECEXP (set, 0, 0);
5058 gcc_assert (GET_CODE (set) == SET);
5059 mem = XEXP (set, opnum);
5060 while (GET_CODE (mem) == SUBREG)
5061 mem = SUBREG_REG (mem);
5062 gcc_assert (MEM_P (mem));
5063 return (volatile_ok || !MEM_VOLATILE_P (mem));
5064 }
5065 \f
5066 /* Initialize the table of extra 80387 mathematical constants. */
5067
5068 static void
5069 init_ext_80387_constants (void)
5070 {
5071 static const char * cst[5] =
5072 {
5073 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5074 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5075 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5076 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5077 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5078 };
5079 int i;
5080
5081 for (i = 0; i < 5; i++)
5082 {
5083 real_from_string (&ext_80387_constants_table[i], cst[i]);
5084 /* Ensure each constant is rounded to XFmode precision. */
5085 real_convert (&ext_80387_constants_table[i],
5086 XFmode, &ext_80387_constants_table[i]);
5087 }
5088
5089 ext_80387_constants_init = 1;
5090 }
5091
5092 /* Return true if the constant is something that can be loaded with
5093 a special instruction. */
5094
5095 int
5096 standard_80387_constant_p (rtx x)
5097 {
5098 enum machine_mode mode = GET_MODE (x);
5099
5100 REAL_VALUE_TYPE r;
5101
5102 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5103 return -1;
5104
5105 if (x == CONST0_RTX (mode))
5106 return 1;
5107 if (x == CONST1_RTX (mode))
5108 return 2;
5109
5110 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5111
5112 /* For XFmode constants, try to find a special 80387 instruction when
5113 optimizing for size or on those CPUs that benefit from them. */
5114 if (mode == XFmode
5115 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5116 {
5117 int i;
5118
5119 if (! ext_80387_constants_init)
5120 init_ext_80387_constants ();
5121
5122 for (i = 0; i < 5; i++)
5123 if (real_identical (&r, &ext_80387_constants_table[i]))
5124 return i + 3;
5125 }
5126
5127 /* Load of the constant -0.0 or -1.0 will be split as
5128 fldz;fchs or fld1;fchs sequence. */
5129 if (real_isnegzero (&r))
5130 return 8;
5131 if (real_identical (&r, &dconstm1))
5132 return 9;
5133
5134 return 0;
5135 }
5136
5137 /* Return the opcode of the special instruction to be used to load
5138 the constant X. */
5139
5140 const char *
5141 standard_80387_constant_opcode (rtx x)
5142 {
5143 switch (standard_80387_constant_p (x))
5144 {
5145 case 1:
5146 return "fldz";
5147 case 2:
5148 return "fld1";
5149 case 3:
5150 return "fldlg2";
5151 case 4:
5152 return "fldln2";
5153 case 5:
5154 return "fldl2e";
5155 case 6:
5156 return "fldl2t";
5157 case 7:
5158 return "fldpi";
5159 case 8:
5160 case 9:
5161 return "#";
5162 default:
5163 gcc_unreachable ();
5164 }
5165 }
5166
5167 /* Return the CONST_DOUBLE representing the 80387 constant that is
5168 loaded by the specified special instruction. The argument IDX
5169 matches the return value from standard_80387_constant_p. */
5170
5171 rtx
5172 standard_80387_constant_rtx (int idx)
5173 {
5174 int i;
5175
5176 if (! ext_80387_constants_init)
5177 init_ext_80387_constants ();
5178
5179 switch (idx)
5180 {
5181 case 3:
5182 case 4:
5183 case 5:
5184 case 6:
5185 case 7:
5186 i = idx - 3;
5187 break;
5188
5189 default:
5190 gcc_unreachable ();
5191 }
5192
5193 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5194 XFmode);
5195 }
5196
5197 /* Return 1 if mode is a valid mode for sse. */
5198 static int
5199 standard_sse_mode_p (enum machine_mode mode)
5200 {
5201 switch (mode)
5202 {
5203 case V16QImode:
5204 case V8HImode:
5205 case V4SImode:
5206 case V2DImode:
5207 case V4SFmode:
5208 case V2DFmode:
5209 return 1;
5210
5211 default:
5212 return 0;
5213 }
5214 }
5215
5216 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5217 */
5218 int
5219 standard_sse_constant_p (rtx x)
5220 {
5221 enum machine_mode mode = GET_MODE (x);
5222
5223 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5224 return 1;
5225 if (vector_all_ones_operand (x, mode)
5226 && standard_sse_mode_p (mode))
5227 return TARGET_SSE2 ? 2 : -1;
5228
5229 return 0;
5230 }
5231
5232 /* Return the opcode of the special instruction to be used to load
5233 the constant X. */
5234
5235 const char *
5236 standard_sse_constant_opcode (rtx insn, rtx x)
5237 {
5238 switch (standard_sse_constant_p (x))
5239 {
5240 case 1:
5241 if (get_attr_mode (insn) == MODE_V4SF)
5242 return "xorps\t%0, %0";
5243 else if (get_attr_mode (insn) == MODE_V2DF)
5244 return "xorpd\t%0, %0";
5245 else
5246 return "pxor\t%0, %0";
5247 case 2:
5248 return "pcmpeqd\t%0, %0";
5249 }
5250 gcc_unreachable ();
5251 }
5252
5253 /* Returns 1 if OP contains a symbol reference */
5254
5255 int
5256 symbolic_reference_mentioned_p (rtx op)
5257 {
5258 const char *fmt;
5259 int i;
5260
5261 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5262 return 1;
5263
5264 fmt = GET_RTX_FORMAT (GET_CODE (op));
5265 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5266 {
5267 if (fmt[i] == 'E')
5268 {
5269 int j;
5270
5271 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5272 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5273 return 1;
5274 }
5275
5276 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5277 return 1;
5278 }
5279
5280 return 0;
5281 }
5282
5283 /* Return 1 if it is appropriate to emit `ret' instructions in the
5284 body of a function. Do this only if the epilogue is simple, needing a
5285 couple of insns. Prior to reloading, we can't tell how many registers
5286 must be saved, so return 0 then. Return 0 if there is no frame
5287 marker to de-allocate. */
5288
5289 int
5290 ix86_can_use_return_insn_p (void)
5291 {
5292 struct ix86_frame frame;
5293
5294 if (! reload_completed || frame_pointer_needed)
5295 return 0;
5296
5297 /* Don't allow more than 32 pop, since that's all we can do
5298 with one instruction. */
5299 if (current_function_pops_args
5300 && current_function_args_size >= 32768)
5301 return 0;
5302
5303 ix86_compute_frame_layout (&frame);
5304 return frame.to_allocate == 0 && frame.nregs == 0;
5305 }
5306 \f
5307 /* Value should be nonzero if functions must have frame pointers.
5308 Zero means the frame pointer need not be set up (and parms may
5309 be accessed via the stack pointer) in functions that seem suitable. */
5310
5311 int
5312 ix86_frame_pointer_required (void)
5313 {
5314 /* If we accessed previous frames, then the generated code expects
5315 to be able to access the saved ebp value in our frame. */
5316 if (cfun->machine->accesses_prev_frame)
5317 return 1;
5318
5319 /* Several x86 os'es need a frame pointer for other reasons,
5320 usually pertaining to setjmp. */
5321 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5322 return 1;
5323
5324 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5325 the frame pointer by default. Turn it back on now if we've not
5326 got a leaf function. */
5327 if (TARGET_OMIT_LEAF_FRAME_POINTER
5328 && (!current_function_is_leaf
5329 || ix86_current_function_calls_tls_descriptor))
5330 return 1;
5331
5332 if (current_function_profile)
5333 return 1;
5334
5335 return 0;
5336 }
5337
5338 /* Record that the current function accesses previous call frames. */
5339
5340 void
5341 ix86_setup_frame_addresses (void)
5342 {
5343 cfun->machine->accesses_prev_frame = 1;
5344 }
5345 \f
5346 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5347 # define USE_HIDDEN_LINKONCE 1
5348 #else
5349 # define USE_HIDDEN_LINKONCE 0
5350 #endif
5351
5352 static int pic_labels_used;
5353
5354 /* Fills in the label name that should be used for a pc thunk for
5355 the given register. */
5356
5357 static void
5358 get_pc_thunk_name (char name[32], unsigned int regno)
5359 {
5360 gcc_assert (!TARGET_64BIT);
5361
5362 if (USE_HIDDEN_LINKONCE)
5363 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5364 else
5365 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5366 }
5367
5368
5369 /* This function generates code for -fpic that loads %ebx with
5370 the return address of the caller and then returns. */
5371
5372 void
5373 ix86_file_end (void)
5374 {
5375 rtx xops[2];
5376 int regno;
5377
5378 for (regno = 0; regno < 8; ++regno)
5379 {
5380 char name[32];
5381
5382 if (! ((pic_labels_used >> regno) & 1))
5383 continue;
5384
5385 get_pc_thunk_name (name, regno);
5386
5387 #if TARGET_MACHO
5388 if (TARGET_MACHO)
5389 {
5390 switch_to_section (darwin_sections[text_coal_section]);
5391 fputs ("\t.weak_definition\t", asm_out_file);
5392 assemble_name (asm_out_file, name);
5393 fputs ("\n\t.private_extern\t", asm_out_file);
5394 assemble_name (asm_out_file, name);
5395 fputs ("\n", asm_out_file);
5396 ASM_OUTPUT_LABEL (asm_out_file, name);
5397 }
5398 else
5399 #endif
5400 if (USE_HIDDEN_LINKONCE)
5401 {
5402 tree decl;
5403
5404 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5405 error_mark_node);
5406 TREE_PUBLIC (decl) = 1;
5407 TREE_STATIC (decl) = 1;
5408 DECL_ONE_ONLY (decl) = 1;
5409
5410 (*targetm.asm_out.unique_section) (decl, 0);
5411 switch_to_section (get_named_section (decl, NULL, 0));
5412
5413 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5414 fputs ("\t.hidden\t", asm_out_file);
5415 assemble_name (asm_out_file, name);
5416 fputc ('\n', asm_out_file);
5417 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5418 }
5419 else
5420 {
5421 switch_to_section (text_section);
5422 ASM_OUTPUT_LABEL (asm_out_file, name);
5423 }
5424
5425 xops[0] = gen_rtx_REG (SImode, regno);
5426 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5427 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5428 output_asm_insn ("ret", xops);
5429 }
5430
5431 if (NEED_INDICATE_EXEC_STACK)
5432 file_end_indicate_exec_stack ();
5433 }
5434
5435 /* Emit code for the SET_GOT patterns. */
5436
5437 const char *
5438 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5439 {
5440 rtx xops[3];
5441
5442 xops[0] = dest;
5443
5444 if (TARGET_VXWORKS_RTP && flag_pic)
5445 {
5446 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5447 xops[2] = gen_rtx_MEM (Pmode,
5448 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5449 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5450
5451 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5452 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5453 an unadorned address. */
5454 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5455 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5456 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5457 return "";
5458 }
5459
5460 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5461
5462 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5463 {
5464 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5465
5466 if (!flag_pic)
5467 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5468 else
5469 output_asm_insn ("call\t%a2", xops);
5470
5471 #if TARGET_MACHO
5472 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5473 is what will be referenced by the Mach-O PIC subsystem. */
5474 if (!label)
5475 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5476 #endif
5477
5478 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5479 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5480
5481 if (flag_pic)
5482 output_asm_insn ("pop{l}\t%0", xops);
5483 }
5484 else
5485 {
5486 char name[32];
5487 get_pc_thunk_name (name, REGNO (dest));
5488 pic_labels_used |= 1 << REGNO (dest);
5489
5490 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5491 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5492 output_asm_insn ("call\t%X2", xops);
5493 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5494 is what will be referenced by the Mach-O PIC subsystem. */
5495 #if TARGET_MACHO
5496 if (!label)
5497 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5498 else
5499 targetm.asm_out.internal_label (asm_out_file, "L",
5500 CODE_LABEL_NUMBER (label));
5501 #endif
5502 }
5503
5504 if (TARGET_MACHO)
5505 return "";
5506
5507 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5508 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5509 else
5510 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5511
5512 return "";
5513 }
5514
5515 /* Generate an "push" pattern for input ARG. */
5516
5517 static rtx
5518 gen_push (rtx arg)
5519 {
5520 return gen_rtx_SET (VOIDmode,
5521 gen_rtx_MEM (Pmode,
5522 gen_rtx_PRE_DEC (Pmode,
5523 stack_pointer_rtx)),
5524 arg);
5525 }
5526
5527 /* Return >= 0 if there is an unused call-clobbered register available
5528 for the entire function. */
5529
5530 static unsigned int
5531 ix86_select_alt_pic_regnum (void)
5532 {
5533 if (current_function_is_leaf && !current_function_profile
5534 && !ix86_current_function_calls_tls_descriptor)
5535 {
5536 int i;
5537 for (i = 2; i >= 0; --i)
5538 if (!regs_ever_live[i])
5539 return i;
5540 }
5541
5542 return INVALID_REGNUM;
5543 }
5544
5545 /* Return 1 if we need to save REGNO. */
5546 static int
5547 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5548 {
5549 if (pic_offset_table_rtx
5550 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5551 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5552 || current_function_profile
5553 || current_function_calls_eh_return
5554 || current_function_uses_const_pool))
5555 {
5556 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5557 return 0;
5558 return 1;
5559 }
5560
5561 if (current_function_calls_eh_return && maybe_eh_return)
5562 {
5563 unsigned i;
5564 for (i = 0; ; i++)
5565 {
5566 unsigned test = EH_RETURN_DATA_REGNO (i);
5567 if (test == INVALID_REGNUM)
5568 break;
5569 if (test == regno)
5570 return 1;
5571 }
5572 }
5573
5574 if (cfun->machine->force_align_arg_pointer
5575 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5576 return 1;
5577
5578 return (regs_ever_live[regno]
5579 && !call_used_regs[regno]
5580 && !fixed_regs[regno]
5581 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5582 }
5583
5584 /* Return number of registers to be saved on the stack. */
5585
5586 static int
5587 ix86_nsaved_regs (void)
5588 {
5589 int nregs = 0;
5590 int regno;
5591
5592 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5593 if (ix86_save_reg (regno, true))
5594 nregs++;
5595 return nregs;
5596 }
5597
5598 /* Return the offset between two registers, one to be eliminated, and the other
5599 its replacement, at the start of a routine. */
5600
5601 HOST_WIDE_INT
5602 ix86_initial_elimination_offset (int from, int to)
5603 {
5604 struct ix86_frame frame;
5605 ix86_compute_frame_layout (&frame);
5606
5607 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5608 return frame.hard_frame_pointer_offset;
5609 else if (from == FRAME_POINTER_REGNUM
5610 && to == HARD_FRAME_POINTER_REGNUM)
5611 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5612 else
5613 {
5614 gcc_assert (to == STACK_POINTER_REGNUM);
5615
5616 if (from == ARG_POINTER_REGNUM)
5617 return frame.stack_pointer_offset;
5618
5619 gcc_assert (from == FRAME_POINTER_REGNUM);
5620 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5621 }
5622 }
5623
5624 /* Fill structure ix86_frame about frame of currently computed function. */
5625
5626 static void
5627 ix86_compute_frame_layout (struct ix86_frame *frame)
5628 {
5629 HOST_WIDE_INT total_size;
5630 unsigned int stack_alignment_needed;
5631 HOST_WIDE_INT offset;
5632 unsigned int preferred_alignment;
5633 HOST_WIDE_INT size = get_frame_size ();
5634
5635 frame->nregs = ix86_nsaved_regs ();
5636 total_size = size;
5637
5638 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5639 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5640
5641 /* During reload iteration the amount of registers saved can change.
5642 Recompute the value as needed. Do not recompute when amount of registers
5643 didn't change as reload does multiple calls to the function and does not
5644 expect the decision to change within single iteration. */
5645 if (!optimize_size
5646 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5647 {
5648 int count = frame->nregs;
5649
5650 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5651 /* The fast prologue uses move instead of push to save registers. This
5652 is significantly longer, but also executes faster as modern hardware
5653 can execute the moves in parallel, but can't do that for push/pop.
5654
5655 Be careful about choosing what prologue to emit: When function takes
5656 many instructions to execute we may use slow version as well as in
5657 case function is known to be outside hot spot (this is known with
5658 feedback only). Weight the size of function by number of registers
5659 to save as it is cheap to use one or two push instructions but very
5660 slow to use many of them. */
5661 if (count)
5662 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5663 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5664 || (flag_branch_probabilities
5665 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5666 cfun->machine->use_fast_prologue_epilogue = false;
5667 else
5668 cfun->machine->use_fast_prologue_epilogue
5669 = !expensive_function_p (count);
5670 }
5671 if (TARGET_PROLOGUE_USING_MOVE
5672 && cfun->machine->use_fast_prologue_epilogue)
5673 frame->save_regs_using_mov = true;
5674 else
5675 frame->save_regs_using_mov = false;
5676
5677
5678 /* Skip return address and saved base pointer. */
5679 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5680
5681 frame->hard_frame_pointer_offset = offset;
5682
5683 /* Do some sanity checking of stack_alignment_needed and
5684 preferred_alignment, since i386 port is the only using those features
5685 that may break easily. */
5686
5687 gcc_assert (!size || stack_alignment_needed);
5688 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5689 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5690 gcc_assert (stack_alignment_needed
5691 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5692
5693 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5694 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5695
5696 /* Register save area */
5697 offset += frame->nregs * UNITS_PER_WORD;
5698
5699 /* Va-arg area */
5700 if (ix86_save_varrargs_registers)
5701 {
5702 offset += X86_64_VARARGS_SIZE;
5703 frame->va_arg_size = X86_64_VARARGS_SIZE;
5704 }
5705 else
5706 frame->va_arg_size = 0;
5707
5708 /* Align start of frame for local function. */
5709 frame->padding1 = ((offset + stack_alignment_needed - 1)
5710 & -stack_alignment_needed) - offset;
5711
5712 offset += frame->padding1;
5713
5714 /* Frame pointer points here. */
5715 frame->frame_pointer_offset = offset;
5716
5717 offset += size;
5718
5719 /* Add outgoing arguments area. Can be skipped if we eliminated
5720 all the function calls as dead code.
5721 Skipping is however impossible when function calls alloca. Alloca
5722 expander assumes that last current_function_outgoing_args_size
5723 of stack frame are unused. */
5724 if (ACCUMULATE_OUTGOING_ARGS
5725 && (!current_function_is_leaf || current_function_calls_alloca
5726 || ix86_current_function_calls_tls_descriptor))
5727 {
5728 offset += current_function_outgoing_args_size;
5729 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5730 }
5731 else
5732 frame->outgoing_arguments_size = 0;
5733
5734 /* Align stack boundary. Only needed if we're calling another function
5735 or using alloca. */
5736 if (!current_function_is_leaf || current_function_calls_alloca
5737 || ix86_current_function_calls_tls_descriptor)
5738 frame->padding2 = ((offset + preferred_alignment - 1)
5739 & -preferred_alignment) - offset;
5740 else
5741 frame->padding2 = 0;
5742
5743 offset += frame->padding2;
5744
5745 /* We've reached end of stack frame. */
5746 frame->stack_pointer_offset = offset;
5747
5748 /* Size prologue needs to allocate. */
5749 frame->to_allocate =
5750 (size + frame->padding1 + frame->padding2
5751 + frame->outgoing_arguments_size + frame->va_arg_size);
5752
5753 if ((!frame->to_allocate && frame->nregs <= 1)
5754 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5755 frame->save_regs_using_mov = false;
5756
5757 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5758 && current_function_is_leaf
5759 && !ix86_current_function_calls_tls_descriptor)
5760 {
5761 frame->red_zone_size = frame->to_allocate;
5762 if (frame->save_regs_using_mov)
5763 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5764 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5765 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5766 }
5767 else
5768 frame->red_zone_size = 0;
5769 frame->to_allocate -= frame->red_zone_size;
5770 frame->stack_pointer_offset -= frame->red_zone_size;
5771 #if 0
5772 fprintf (stderr, "\n");
5773 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5774 fprintf (stderr, "size: %ld\n", (long)size);
5775 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5776 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5777 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5778 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5779 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5780 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5781 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5782 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5783 (long)frame->hard_frame_pointer_offset);
5784 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5785 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5786 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5787 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5788 #endif
5789 }
5790
5791 /* Emit code to save registers in the prologue. */
5792
5793 static void
5794 ix86_emit_save_regs (void)
5795 {
5796 unsigned int regno;
5797 rtx insn;
5798
5799 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5800 if (ix86_save_reg (regno, true))
5801 {
5802 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5803 RTX_FRAME_RELATED_P (insn) = 1;
5804 }
5805 }
5806
5807 /* Emit code to save registers using MOV insns. First register
5808 is restored from POINTER + OFFSET. */
5809 static void
5810 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5811 {
5812 unsigned int regno;
5813 rtx insn;
5814
5815 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5816 if (ix86_save_reg (regno, true))
5817 {
5818 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5819 Pmode, offset),
5820 gen_rtx_REG (Pmode, regno));
5821 RTX_FRAME_RELATED_P (insn) = 1;
5822 offset += UNITS_PER_WORD;
5823 }
5824 }
5825
5826 /* Expand prologue or epilogue stack adjustment.
5827 The pattern exist to put a dependency on all ebp-based memory accesses.
5828 STYLE should be negative if instructions should be marked as frame related,
5829 zero if %r11 register is live and cannot be freely used and positive
5830 otherwise. */
5831
5832 static void
5833 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5834 {
5835 rtx insn;
5836
5837 if (! TARGET_64BIT)
5838 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5839 else if (x86_64_immediate_operand (offset, DImode))
5840 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5841 else
5842 {
5843 rtx r11;
5844 /* r11 is used by indirect sibcall return as well, set before the
5845 epilogue and used after the epilogue. ATM indirect sibcall
5846 shouldn't be used together with huge frame sizes in one
5847 function because of the frame_size check in sibcall.c. */
5848 gcc_assert (style);
5849 r11 = gen_rtx_REG (DImode, R11_REG);
5850 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5851 if (style < 0)
5852 RTX_FRAME_RELATED_P (insn) = 1;
5853 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5854 offset));
5855 }
5856 if (style < 0)
5857 RTX_FRAME_RELATED_P (insn) = 1;
5858 }
5859
5860 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5861
5862 static rtx
5863 ix86_internal_arg_pointer (void)
5864 {
5865 bool has_force_align_arg_pointer =
5866 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5867 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5868 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5869 && DECL_NAME (current_function_decl)
5870 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5871 && DECL_FILE_SCOPE_P (current_function_decl))
5872 || ix86_force_align_arg_pointer
5873 || has_force_align_arg_pointer)
5874 {
5875 /* Nested functions can't realign the stack due to a register
5876 conflict. */
5877 if (DECL_CONTEXT (current_function_decl)
5878 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5879 {
5880 if (ix86_force_align_arg_pointer)
5881 warning (0, "-mstackrealign ignored for nested functions");
5882 if (has_force_align_arg_pointer)
5883 error ("%s not supported for nested functions",
5884 ix86_force_align_arg_pointer_string);
5885 return virtual_incoming_args_rtx;
5886 }
5887 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5888 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5889 }
5890 else
5891 return virtual_incoming_args_rtx;
5892 }
5893
5894 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5895 This is called from dwarf2out.c to emit call frame instructions
5896 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5897 static void
5898 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5899 {
5900 rtx unspec = SET_SRC (pattern);
5901 gcc_assert (GET_CODE (unspec) == UNSPEC);
5902
5903 switch (index)
5904 {
5905 case UNSPEC_REG_SAVE:
5906 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5907 SET_DEST (pattern));
5908 break;
5909 case UNSPEC_DEF_CFA:
5910 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5911 INTVAL (XVECEXP (unspec, 0, 0)));
5912 break;
5913 default:
5914 gcc_unreachable ();
5915 }
5916 }
5917
5918 /* Expand the prologue into a bunch of separate insns. */
5919
5920 void
5921 ix86_expand_prologue (void)
5922 {
5923 rtx insn;
5924 bool pic_reg_used;
5925 struct ix86_frame frame;
5926 HOST_WIDE_INT allocate;
5927
5928 ix86_compute_frame_layout (&frame);
5929
5930 if (cfun->machine->force_align_arg_pointer)
5931 {
5932 rtx x, y;
5933
5934 /* Grab the argument pointer. */
5935 x = plus_constant (stack_pointer_rtx, 4);
5936 y = cfun->machine->force_align_arg_pointer;
5937 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5938 RTX_FRAME_RELATED_P (insn) = 1;
5939
5940 /* The unwind info consists of two parts: install the fafp as the cfa,
5941 and record the fafp as the "save register" of the stack pointer.
5942 The later is there in order that the unwinder can see where it
5943 should restore the stack pointer across the and insn. */
5944 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5945 x = gen_rtx_SET (VOIDmode, y, x);
5946 RTX_FRAME_RELATED_P (x) = 1;
5947 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5948 UNSPEC_REG_SAVE);
5949 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5950 RTX_FRAME_RELATED_P (y) = 1;
5951 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5952 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5953 REG_NOTES (insn) = x;
5954
5955 /* Align the stack. */
5956 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5957 GEN_INT (-16)));
5958
5959 /* And here we cheat like madmen with the unwind info. We force the
5960 cfa register back to sp+4, which is exactly what it was at the
5961 start of the function. Re-pushing the return address results in
5962 the return at the same spot relative to the cfa, and thus is
5963 correct wrt the unwind info. */
5964 x = cfun->machine->force_align_arg_pointer;
5965 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5966 insn = emit_insn (gen_push (x));
5967 RTX_FRAME_RELATED_P (insn) = 1;
5968
5969 x = GEN_INT (4);
5970 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5971 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5972 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5973 REG_NOTES (insn) = x;
5974 }
5975
5976 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5977 slower on all targets. Also sdb doesn't like it. */
5978
5979 if (frame_pointer_needed)
5980 {
5981 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5982 RTX_FRAME_RELATED_P (insn) = 1;
5983
5984 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5985 RTX_FRAME_RELATED_P (insn) = 1;
5986 }
5987
5988 allocate = frame.to_allocate;
5989
5990 if (!frame.save_regs_using_mov)
5991 ix86_emit_save_regs ();
5992 else
5993 allocate += frame.nregs * UNITS_PER_WORD;
5994
5995 /* When using red zone we may start register saving before allocating
5996 the stack frame saving one cycle of the prologue. */
5997 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5998 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5999 : stack_pointer_rtx,
6000 -frame.nregs * UNITS_PER_WORD);
6001
6002 if (allocate == 0)
6003 ;
6004 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6005 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6006 GEN_INT (-allocate), -1);
6007 else
6008 {
6009 /* Only valid for Win32. */
6010 rtx eax = gen_rtx_REG (Pmode, 0);
6011 bool eax_live;
6012 rtx t;
6013
6014 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6015
6016 if (TARGET_64BIT_MS_ABI)
6017 eax_live = false;
6018 else
6019 eax_live = ix86_eax_live_at_start_p ();
6020
6021 if (eax_live)
6022 {
6023 emit_insn (gen_push (eax));
6024 allocate -= UNITS_PER_WORD;
6025 }
6026
6027 emit_move_insn (eax, GEN_INT (allocate));
6028
6029 if (TARGET_64BIT)
6030 insn = gen_allocate_stack_worker_64 (eax);
6031 else
6032 insn = gen_allocate_stack_worker_32 (eax);
6033 insn = emit_insn (insn);
6034 RTX_FRAME_RELATED_P (insn) = 1;
6035 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6036 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6037 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6038 t, REG_NOTES (insn));
6039
6040 if (eax_live)
6041 {
6042 if (frame_pointer_needed)
6043 t = plus_constant (hard_frame_pointer_rtx,
6044 allocate
6045 - frame.to_allocate
6046 - frame.nregs * UNITS_PER_WORD);
6047 else
6048 t = plus_constant (stack_pointer_rtx, allocate);
6049 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6050 }
6051 }
6052
6053 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6054 {
6055 if (!frame_pointer_needed || !frame.to_allocate)
6056 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6057 else
6058 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6059 -frame.nregs * UNITS_PER_WORD);
6060 }
6061
6062 pic_reg_used = false;
6063 if (pic_offset_table_rtx
6064 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6065 || current_function_profile))
6066 {
6067 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6068
6069 if (alt_pic_reg_used != INVALID_REGNUM)
6070 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6071
6072 pic_reg_used = true;
6073 }
6074
6075 if (pic_reg_used)
6076 {
6077 if (TARGET_64BIT)
6078 {
6079 if (ix86_cmodel == CM_LARGE_PIC)
6080 {
6081 rtx tmp_reg = gen_rtx_REG (DImode,
6082 FIRST_REX_INT_REG + 3 /* R11 */);
6083 rtx label = gen_label_rtx ();
6084 emit_label (label);
6085 LABEL_PRESERVE_P (label) = 1;
6086 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6087 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6088 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6089 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6090 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6091 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6092 pic_offset_table_rtx, tmp_reg));
6093 }
6094 else
6095 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6096 }
6097 else
6098 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6099
6100 /* Even with accurate pre-reload life analysis, we can wind up
6101 deleting all references to the pic register after reload.
6102 Consider if cross-jumping unifies two sides of a branch
6103 controlled by a comparison vs the only read from a global.
6104 In which case, allow the set_got to be deleted, though we're
6105 too late to do anything about the ebx save in the prologue. */
6106 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6107 }
6108
6109 /* Prevent function calls from be scheduled before the call to mcount.
6110 In the pic_reg_used case, make sure that the got load isn't deleted. */
6111 if (current_function_profile)
6112 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6113 }
6114
6115 /* Emit code to restore saved registers using MOV insns. First register
6116 is restored from POINTER + OFFSET. */
6117 static void
6118 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6119 int maybe_eh_return)
6120 {
6121 int regno;
6122 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6123
6124 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6125 if (ix86_save_reg (regno, maybe_eh_return))
6126 {
6127 /* Ensure that adjust_address won't be forced to produce pointer
6128 out of range allowed by x86-64 instruction set. */
6129 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6130 {
6131 rtx r11;
6132
6133 r11 = gen_rtx_REG (DImode, R11_REG);
6134 emit_move_insn (r11, GEN_INT (offset));
6135 emit_insn (gen_adddi3 (r11, r11, pointer));
6136 base_address = gen_rtx_MEM (Pmode, r11);
6137 offset = 0;
6138 }
6139 emit_move_insn (gen_rtx_REG (Pmode, regno),
6140 adjust_address (base_address, Pmode, offset));
6141 offset += UNITS_PER_WORD;
6142 }
6143 }
6144
6145 /* Restore function stack, frame, and registers. */
6146
6147 void
6148 ix86_expand_epilogue (int style)
6149 {
6150 int regno;
6151 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6152 struct ix86_frame frame;
6153 HOST_WIDE_INT offset;
6154
6155 ix86_compute_frame_layout (&frame);
6156
6157 /* Calculate start of saved registers relative to ebp. Special care
6158 must be taken for the normal return case of a function using
6159 eh_return: the eax and edx registers are marked as saved, but not
6160 restored along this path. */
6161 offset = frame.nregs;
6162 if (current_function_calls_eh_return && style != 2)
6163 offset -= 2;
6164 offset *= -UNITS_PER_WORD;
6165
6166 /* If we're only restoring one register and sp is not valid then
6167 using a move instruction to restore the register since it's
6168 less work than reloading sp and popping the register.
6169
6170 The default code result in stack adjustment using add/lea instruction,
6171 while this code results in LEAVE instruction (or discrete equivalent),
6172 so it is profitable in some other cases as well. Especially when there
6173 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6174 and there is exactly one register to pop. This heuristic may need some
6175 tuning in future. */
6176 if ((!sp_valid && frame.nregs <= 1)
6177 || (TARGET_EPILOGUE_USING_MOVE
6178 && cfun->machine->use_fast_prologue_epilogue
6179 && (frame.nregs > 1 || frame.to_allocate))
6180 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6181 || (frame_pointer_needed && TARGET_USE_LEAVE
6182 && cfun->machine->use_fast_prologue_epilogue
6183 && frame.nregs == 1)
6184 || current_function_calls_eh_return)
6185 {
6186 /* Restore registers. We can use ebp or esp to address the memory
6187 locations. If both are available, default to ebp, since offsets
6188 are known to be small. Only exception is esp pointing directly to the
6189 end of block of saved registers, where we may simplify addressing
6190 mode. */
6191
6192 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6193 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6194 frame.to_allocate, style == 2);
6195 else
6196 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6197 offset, style == 2);
6198
6199 /* eh_return epilogues need %ecx added to the stack pointer. */
6200 if (style == 2)
6201 {
6202 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6203
6204 if (frame_pointer_needed)
6205 {
6206 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6207 tmp = plus_constant (tmp, UNITS_PER_WORD);
6208 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6209
6210 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6211 emit_move_insn (hard_frame_pointer_rtx, tmp);
6212
6213 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6214 const0_rtx, style);
6215 }
6216 else
6217 {
6218 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6219 tmp = plus_constant (tmp, (frame.to_allocate
6220 + frame.nregs * UNITS_PER_WORD));
6221 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6222 }
6223 }
6224 else if (!frame_pointer_needed)
6225 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6226 GEN_INT (frame.to_allocate
6227 + frame.nregs * UNITS_PER_WORD),
6228 style);
6229 /* If not an i386, mov & pop is faster than "leave". */
6230 else if (TARGET_USE_LEAVE || optimize_size
6231 || !cfun->machine->use_fast_prologue_epilogue)
6232 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6233 else
6234 {
6235 pro_epilogue_adjust_stack (stack_pointer_rtx,
6236 hard_frame_pointer_rtx,
6237 const0_rtx, style);
6238 if (TARGET_64BIT)
6239 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6240 else
6241 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6242 }
6243 }
6244 else
6245 {
6246 /* First step is to deallocate the stack frame so that we can
6247 pop the registers. */
6248 if (!sp_valid)
6249 {
6250 gcc_assert (frame_pointer_needed);
6251 pro_epilogue_adjust_stack (stack_pointer_rtx,
6252 hard_frame_pointer_rtx,
6253 GEN_INT (offset), style);
6254 }
6255 else if (frame.to_allocate)
6256 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6257 GEN_INT (frame.to_allocate), style);
6258
6259 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6260 if (ix86_save_reg (regno, false))
6261 {
6262 if (TARGET_64BIT)
6263 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6264 else
6265 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6266 }
6267 if (frame_pointer_needed)
6268 {
6269 /* Leave results in shorter dependency chains on CPUs that are
6270 able to grok it fast. */
6271 if (TARGET_USE_LEAVE)
6272 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6273 else if (TARGET_64BIT)
6274 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6275 else
6276 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6277 }
6278 }
6279
6280 if (cfun->machine->force_align_arg_pointer)
6281 {
6282 emit_insn (gen_addsi3 (stack_pointer_rtx,
6283 cfun->machine->force_align_arg_pointer,
6284 GEN_INT (-4)));
6285 }
6286
6287 /* Sibcall epilogues don't want a return instruction. */
6288 if (style == 0)
6289 return;
6290
6291 if (current_function_pops_args && current_function_args_size)
6292 {
6293 rtx popc = GEN_INT (current_function_pops_args);
6294
6295 /* i386 can only pop 64K bytes. If asked to pop more, pop
6296 return address, do explicit add, and jump indirectly to the
6297 caller. */
6298
6299 if (current_function_pops_args >= 65536)
6300 {
6301 rtx ecx = gen_rtx_REG (SImode, 2);
6302
6303 /* There is no "pascal" calling convention in any 64bit ABI. */
6304 gcc_assert (!TARGET_64BIT);
6305
6306 emit_insn (gen_popsi1 (ecx));
6307 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6308 emit_jump_insn (gen_return_indirect_internal (ecx));
6309 }
6310 else
6311 emit_jump_insn (gen_return_pop_internal (popc));
6312 }
6313 else
6314 emit_jump_insn (gen_return_internal ());
6315 }
6316
6317 /* Reset from the function's potential modifications. */
6318
6319 static void
6320 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6321 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6322 {
6323 if (pic_offset_table_rtx)
6324 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6325 #if TARGET_MACHO
6326 /* Mach-O doesn't support labels at the end of objects, so if
6327 it looks like we might want one, insert a NOP. */
6328 {
6329 rtx insn = get_last_insn ();
6330 while (insn
6331 && NOTE_P (insn)
6332 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6333 insn = PREV_INSN (insn);
6334 if (insn
6335 && (LABEL_P (insn)
6336 || (NOTE_P (insn)
6337 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6338 fputs ("\tnop\n", file);
6339 }
6340 #endif
6341
6342 }
6343 \f
6344 /* Extract the parts of an RTL expression that is a valid memory address
6345 for an instruction. Return 0 if the structure of the address is
6346 grossly off. Return -1 if the address contains ASHIFT, so it is not
6347 strictly valid, but still used for computing length of lea instruction. */
6348
6349 int
6350 ix86_decompose_address (rtx addr, struct ix86_address *out)
6351 {
6352 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6353 rtx base_reg, index_reg;
6354 HOST_WIDE_INT scale = 1;
6355 rtx scale_rtx = NULL_RTX;
6356 int retval = 1;
6357 enum ix86_address_seg seg = SEG_DEFAULT;
6358
6359 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6360 base = addr;
6361 else if (GET_CODE (addr) == PLUS)
6362 {
6363 rtx addends[4], op;
6364 int n = 0, i;
6365
6366 op = addr;
6367 do
6368 {
6369 if (n >= 4)
6370 return 0;
6371 addends[n++] = XEXP (op, 1);
6372 op = XEXP (op, 0);
6373 }
6374 while (GET_CODE (op) == PLUS);
6375 if (n >= 4)
6376 return 0;
6377 addends[n] = op;
6378
6379 for (i = n; i >= 0; --i)
6380 {
6381 op = addends[i];
6382 switch (GET_CODE (op))
6383 {
6384 case MULT:
6385 if (index)
6386 return 0;
6387 index = XEXP (op, 0);
6388 scale_rtx = XEXP (op, 1);
6389 break;
6390
6391 case UNSPEC:
6392 if (XINT (op, 1) == UNSPEC_TP
6393 && TARGET_TLS_DIRECT_SEG_REFS
6394 && seg == SEG_DEFAULT)
6395 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6396 else
6397 return 0;
6398 break;
6399
6400 case REG:
6401 case SUBREG:
6402 if (!base)
6403 base = op;
6404 else if (!index)
6405 index = op;
6406 else
6407 return 0;
6408 break;
6409
6410 case CONST:
6411 case CONST_INT:
6412 case SYMBOL_REF:
6413 case LABEL_REF:
6414 if (disp)
6415 return 0;
6416 disp = op;
6417 break;
6418
6419 default:
6420 return 0;
6421 }
6422 }
6423 }
6424 else if (GET_CODE (addr) == MULT)
6425 {
6426 index = XEXP (addr, 0); /* index*scale */
6427 scale_rtx = XEXP (addr, 1);
6428 }
6429 else if (GET_CODE (addr) == ASHIFT)
6430 {
6431 rtx tmp;
6432
6433 /* We're called for lea too, which implements ashift on occasion. */
6434 index = XEXP (addr, 0);
6435 tmp = XEXP (addr, 1);
6436 if (!CONST_INT_P (tmp))
6437 return 0;
6438 scale = INTVAL (tmp);
6439 if ((unsigned HOST_WIDE_INT) scale > 3)
6440 return 0;
6441 scale = 1 << scale;
6442 retval = -1;
6443 }
6444 else
6445 disp = addr; /* displacement */
6446
6447 /* Extract the integral value of scale. */
6448 if (scale_rtx)
6449 {
6450 if (!CONST_INT_P (scale_rtx))
6451 return 0;
6452 scale = INTVAL (scale_rtx);
6453 }
6454
6455 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6456 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6457
6458 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6459 if (base_reg && index_reg && scale == 1
6460 && (index_reg == arg_pointer_rtx
6461 || index_reg == frame_pointer_rtx
6462 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6463 {
6464 rtx tmp;
6465 tmp = base, base = index, index = tmp;
6466 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6467 }
6468
6469 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6470 if ((base_reg == hard_frame_pointer_rtx
6471 || base_reg == frame_pointer_rtx
6472 || base_reg == arg_pointer_rtx) && !disp)
6473 disp = const0_rtx;
6474
6475 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6476 Avoid this by transforming to [%esi+0]. */
6477 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6478 && base_reg && !index_reg && !disp
6479 && REG_P (base_reg)
6480 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6481 disp = const0_rtx;
6482
6483 /* Special case: encode reg+reg instead of reg*2. */
6484 if (!base && index && scale && scale == 2)
6485 base = index, base_reg = index_reg, scale = 1;
6486
6487 /* Special case: scaling cannot be encoded without base or displacement. */
6488 if (!base && !disp && index && scale != 1)
6489 disp = const0_rtx;
6490
6491 out->base = base;
6492 out->index = index;
6493 out->disp = disp;
6494 out->scale = scale;
6495 out->seg = seg;
6496
6497 return retval;
6498 }
6499 \f
6500 /* Return cost of the memory address x.
6501 For i386, it is better to use a complex address than let gcc copy
6502 the address into a reg and make a new pseudo. But not if the address
6503 requires to two regs - that would mean more pseudos with longer
6504 lifetimes. */
6505 static int
6506 ix86_address_cost (rtx x)
6507 {
6508 struct ix86_address parts;
6509 int cost = 1;
6510 int ok = ix86_decompose_address (x, &parts);
6511
6512 gcc_assert (ok);
6513
6514 if (parts.base && GET_CODE (parts.base) == SUBREG)
6515 parts.base = SUBREG_REG (parts.base);
6516 if (parts.index && GET_CODE (parts.index) == SUBREG)
6517 parts.index = SUBREG_REG (parts.index);
6518
6519 /* More complex memory references are better. */
6520 if (parts.disp && parts.disp != const0_rtx)
6521 cost--;
6522 if (parts.seg != SEG_DEFAULT)
6523 cost--;
6524
6525 /* Attempt to minimize number of registers in the address. */
6526 if ((parts.base
6527 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6528 || (parts.index
6529 && (!REG_P (parts.index)
6530 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6531 cost++;
6532
6533 if (parts.base
6534 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6535 && parts.index
6536 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6537 && parts.base != parts.index)
6538 cost++;
6539
6540 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6541 since it's predecode logic can't detect the length of instructions
6542 and it degenerates to vector decoded. Increase cost of such
6543 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6544 to split such addresses or even refuse such addresses at all.
6545
6546 Following addressing modes are affected:
6547 [base+scale*index]
6548 [scale*index+disp]
6549 [base+index]
6550
6551 The first and last case may be avoidable by explicitly coding the zero in
6552 memory address, but I don't have AMD-K6 machine handy to check this
6553 theory. */
6554
6555 if (TARGET_K6
6556 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6557 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6558 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6559 cost += 10;
6560
6561 return cost;
6562 }
6563 \f
6564 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6565 this is used for to form addresses to local data when -fPIC is in
6566 use. */
6567
6568 static bool
6569 darwin_local_data_pic (rtx disp)
6570 {
6571 if (GET_CODE (disp) == MINUS)
6572 {
6573 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6574 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6575 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6576 {
6577 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6578 if (! strcmp (sym_name, "<pic base>"))
6579 return true;
6580 }
6581 }
6582
6583 return false;
6584 }
6585
6586 /* Determine if a given RTX is a valid constant. We already know this
6587 satisfies CONSTANT_P. */
6588
6589 bool
6590 legitimate_constant_p (rtx x)
6591 {
6592 switch (GET_CODE (x))
6593 {
6594 case CONST:
6595 x = XEXP (x, 0);
6596
6597 if (GET_CODE (x) == PLUS)
6598 {
6599 if (!CONST_INT_P (XEXP (x, 1)))
6600 return false;
6601 x = XEXP (x, 0);
6602 }
6603
6604 if (TARGET_MACHO && darwin_local_data_pic (x))
6605 return true;
6606
6607 /* Only some unspecs are valid as "constants". */
6608 if (GET_CODE (x) == UNSPEC)
6609 switch (XINT (x, 1))
6610 {
6611 case UNSPEC_GOT:
6612 case UNSPEC_GOTOFF:
6613 case UNSPEC_PLTOFF:
6614 return TARGET_64BIT;
6615 case UNSPEC_TPOFF:
6616 case UNSPEC_NTPOFF:
6617 x = XVECEXP (x, 0, 0);
6618 return (GET_CODE (x) == SYMBOL_REF
6619 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6620 case UNSPEC_DTPOFF:
6621 x = XVECEXP (x, 0, 0);
6622 return (GET_CODE (x) == SYMBOL_REF
6623 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6624 default:
6625 return false;
6626 }
6627
6628 /* We must have drilled down to a symbol. */
6629 if (GET_CODE (x) == LABEL_REF)
6630 return true;
6631 if (GET_CODE (x) != SYMBOL_REF)
6632 return false;
6633 /* FALLTHRU */
6634
6635 case SYMBOL_REF:
6636 /* TLS symbols are never valid. */
6637 if (SYMBOL_REF_TLS_MODEL (x))
6638 return false;
6639
6640 /* DLLIMPORT symbols are never valid. */
6641 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6642 && SYMBOL_REF_DLLIMPORT_P (x))
6643 return false;
6644 break;
6645
6646 case CONST_DOUBLE:
6647 if (GET_MODE (x) == TImode
6648 && x != CONST0_RTX (TImode)
6649 && !TARGET_64BIT)
6650 return false;
6651 break;
6652
6653 case CONST_VECTOR:
6654 if (x == CONST0_RTX (GET_MODE (x)))
6655 return true;
6656 return false;
6657
6658 default:
6659 break;
6660 }
6661
6662 /* Otherwise we handle everything else in the move patterns. */
6663 return true;
6664 }
6665
6666 /* Determine if it's legal to put X into the constant pool. This
6667 is not possible for the address of thread-local symbols, which
6668 is checked above. */
6669
6670 static bool
6671 ix86_cannot_force_const_mem (rtx x)
6672 {
6673 /* We can always put integral constants and vectors in memory. */
6674 switch (GET_CODE (x))
6675 {
6676 case CONST_INT:
6677 case CONST_DOUBLE:
6678 case CONST_VECTOR:
6679 return false;
6680
6681 default:
6682 break;
6683 }
6684 return !legitimate_constant_p (x);
6685 }
6686
6687 /* Determine if a given RTX is a valid constant address. */
6688
6689 bool
6690 constant_address_p (rtx x)
6691 {
6692 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6693 }
6694
6695 /* Nonzero if the constant value X is a legitimate general operand
6696 when generating PIC code. It is given that flag_pic is on and
6697 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6698
6699 bool
6700 legitimate_pic_operand_p (rtx x)
6701 {
6702 rtx inner;
6703
6704 switch (GET_CODE (x))
6705 {
6706 case CONST:
6707 inner = XEXP (x, 0);
6708 if (GET_CODE (inner) == PLUS
6709 && CONST_INT_P (XEXP (inner, 1)))
6710 inner = XEXP (inner, 0);
6711
6712 /* Only some unspecs are valid as "constants". */
6713 if (GET_CODE (inner) == UNSPEC)
6714 switch (XINT (inner, 1))
6715 {
6716 case UNSPEC_GOT:
6717 case UNSPEC_GOTOFF:
6718 case UNSPEC_PLTOFF:
6719 return TARGET_64BIT;
6720 case UNSPEC_TPOFF:
6721 x = XVECEXP (inner, 0, 0);
6722 return (GET_CODE (x) == SYMBOL_REF
6723 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6724 default:
6725 return false;
6726 }
6727 /* FALLTHRU */
6728
6729 case SYMBOL_REF:
6730 case LABEL_REF:
6731 return legitimate_pic_address_disp_p (x);
6732
6733 default:
6734 return true;
6735 }
6736 }
6737
6738 /* Determine if a given CONST RTX is a valid memory displacement
6739 in PIC mode. */
6740
6741 int
6742 legitimate_pic_address_disp_p (rtx disp)
6743 {
6744 bool saw_plus;
6745
6746 /* In 64bit mode we can allow direct addresses of symbols and labels
6747 when they are not dynamic symbols. */
6748 if (TARGET_64BIT)
6749 {
6750 rtx op0 = disp, op1;
6751
6752 switch (GET_CODE (disp))
6753 {
6754 case LABEL_REF:
6755 return true;
6756
6757 case CONST:
6758 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6759 break;
6760 op0 = XEXP (XEXP (disp, 0), 0);
6761 op1 = XEXP (XEXP (disp, 0), 1);
6762 if (!CONST_INT_P (op1)
6763 || INTVAL (op1) >= 16*1024*1024
6764 || INTVAL (op1) < -16*1024*1024)
6765 break;
6766 if (GET_CODE (op0) == LABEL_REF)
6767 return true;
6768 if (GET_CODE (op0) != SYMBOL_REF)
6769 break;
6770 /* FALLTHRU */
6771
6772 case SYMBOL_REF:
6773 /* TLS references should always be enclosed in UNSPEC. */
6774 if (SYMBOL_REF_TLS_MODEL (op0))
6775 return false;
6776 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6777 && ix86_cmodel != CM_LARGE_PIC)
6778 return true;
6779 break;
6780
6781 default:
6782 break;
6783 }
6784 }
6785 if (GET_CODE (disp) != CONST)
6786 return 0;
6787 disp = XEXP (disp, 0);
6788
6789 if (TARGET_64BIT)
6790 {
6791 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6792 of GOT tables. We should not need these anyway. */
6793 if (GET_CODE (disp) != UNSPEC
6794 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6795 && XINT (disp, 1) != UNSPEC_GOTOFF
6796 && XINT (disp, 1) != UNSPEC_PLTOFF))
6797 return 0;
6798
6799 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6800 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6801 return 0;
6802 return 1;
6803 }
6804
6805 saw_plus = false;
6806 if (GET_CODE (disp) == PLUS)
6807 {
6808 if (!CONST_INT_P (XEXP (disp, 1)))
6809 return 0;
6810 disp = XEXP (disp, 0);
6811 saw_plus = true;
6812 }
6813
6814 if (TARGET_MACHO && darwin_local_data_pic (disp))
6815 return 1;
6816
6817 if (GET_CODE (disp) != UNSPEC)
6818 return 0;
6819
6820 switch (XINT (disp, 1))
6821 {
6822 case UNSPEC_GOT:
6823 if (saw_plus)
6824 return false;
6825 /* We need to check for both symbols and labels because VxWorks loads
6826 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6827 details. */
6828 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6829 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6830 case UNSPEC_GOTOFF:
6831 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6832 While ABI specify also 32bit relocation but we don't produce it in
6833 small PIC model at all. */
6834 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6835 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6836 && !TARGET_64BIT)
6837 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6838 return false;
6839 case UNSPEC_GOTTPOFF:
6840 case UNSPEC_GOTNTPOFF:
6841 case UNSPEC_INDNTPOFF:
6842 if (saw_plus)
6843 return false;
6844 disp = XVECEXP (disp, 0, 0);
6845 return (GET_CODE (disp) == SYMBOL_REF
6846 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6847 case UNSPEC_NTPOFF:
6848 disp = XVECEXP (disp, 0, 0);
6849 return (GET_CODE (disp) == SYMBOL_REF
6850 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6851 case UNSPEC_DTPOFF:
6852 disp = XVECEXP (disp, 0, 0);
6853 return (GET_CODE (disp) == SYMBOL_REF
6854 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6855 }
6856
6857 return 0;
6858 }
6859
6860 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6861 memory address for an instruction. The MODE argument is the machine mode
6862 for the MEM expression that wants to use this address.
6863
6864 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6865 convert common non-canonical forms to canonical form so that they will
6866 be recognized. */
6867
6868 int
6869 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6870 rtx addr, int strict)
6871 {
6872 struct ix86_address parts;
6873 rtx base, index, disp;
6874 HOST_WIDE_INT scale;
6875 const char *reason = NULL;
6876 rtx reason_rtx = NULL_RTX;
6877
6878 if (ix86_decompose_address (addr, &parts) <= 0)
6879 {
6880 reason = "decomposition failed";
6881 goto report_error;
6882 }
6883
6884 base = parts.base;
6885 index = parts.index;
6886 disp = parts.disp;
6887 scale = parts.scale;
6888
6889 /* Validate base register.
6890
6891 Don't allow SUBREG's that span more than a word here. It can lead to spill
6892 failures when the base is one word out of a two word structure, which is
6893 represented internally as a DImode int. */
6894
6895 if (base)
6896 {
6897 rtx reg;
6898 reason_rtx = base;
6899
6900 if (REG_P (base))
6901 reg = base;
6902 else if (GET_CODE (base) == SUBREG
6903 && REG_P (SUBREG_REG (base))
6904 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6905 <= UNITS_PER_WORD)
6906 reg = SUBREG_REG (base);
6907 else
6908 {
6909 reason = "base is not a register";
6910 goto report_error;
6911 }
6912
6913 if (GET_MODE (base) != Pmode)
6914 {
6915 reason = "base is not in Pmode";
6916 goto report_error;
6917 }
6918
6919 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6920 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6921 {
6922 reason = "base is not valid";
6923 goto report_error;
6924 }
6925 }
6926
6927 /* Validate index register.
6928
6929 Don't allow SUBREG's that span more than a word here -- same as above. */
6930
6931 if (index)
6932 {
6933 rtx reg;
6934 reason_rtx = index;
6935
6936 if (REG_P (index))
6937 reg = index;
6938 else if (GET_CODE (index) == SUBREG
6939 && REG_P (SUBREG_REG (index))
6940 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6941 <= UNITS_PER_WORD)
6942 reg = SUBREG_REG (index);
6943 else
6944 {
6945 reason = "index is not a register";
6946 goto report_error;
6947 }
6948
6949 if (GET_MODE (index) != Pmode)
6950 {
6951 reason = "index is not in Pmode";
6952 goto report_error;
6953 }
6954
6955 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6956 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6957 {
6958 reason = "index is not valid";
6959 goto report_error;
6960 }
6961 }
6962
6963 /* Validate scale factor. */
6964 if (scale != 1)
6965 {
6966 reason_rtx = GEN_INT (scale);
6967 if (!index)
6968 {
6969 reason = "scale without index";
6970 goto report_error;
6971 }
6972
6973 if (scale != 2 && scale != 4 && scale != 8)
6974 {
6975 reason = "scale is not a valid multiplier";
6976 goto report_error;
6977 }
6978 }
6979
6980 /* Validate displacement. */
6981 if (disp)
6982 {
6983 reason_rtx = disp;
6984
6985 if (GET_CODE (disp) == CONST
6986 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6987 switch (XINT (XEXP (disp, 0), 1))
6988 {
6989 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6990 used. While ABI specify also 32bit relocations, we don't produce
6991 them at all and use IP relative instead. */
6992 case UNSPEC_GOT:
6993 case UNSPEC_GOTOFF:
6994 gcc_assert (flag_pic);
6995 if (!TARGET_64BIT)
6996 goto is_legitimate_pic;
6997 reason = "64bit address unspec";
6998 goto report_error;
6999
7000 case UNSPEC_GOTPCREL:
7001 gcc_assert (flag_pic);
7002 goto is_legitimate_pic;
7003
7004 case UNSPEC_GOTTPOFF:
7005 case UNSPEC_GOTNTPOFF:
7006 case UNSPEC_INDNTPOFF:
7007 case UNSPEC_NTPOFF:
7008 case UNSPEC_DTPOFF:
7009 break;
7010
7011 default:
7012 reason = "invalid address unspec";
7013 goto report_error;
7014 }
7015
7016 else if (SYMBOLIC_CONST (disp)
7017 && (flag_pic
7018 || (TARGET_MACHO
7019 #if TARGET_MACHO
7020 && MACHOPIC_INDIRECT
7021 && !machopic_operand_p (disp)
7022 #endif
7023 )))
7024 {
7025
7026 is_legitimate_pic:
7027 if (TARGET_64BIT && (index || base))
7028 {
7029 /* foo@dtpoff(%rX) is ok. */
7030 if (GET_CODE (disp) != CONST
7031 || GET_CODE (XEXP (disp, 0)) != PLUS
7032 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7033 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7034 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7035 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7036 {
7037 reason = "non-constant pic memory reference";
7038 goto report_error;
7039 }
7040 }
7041 else if (! legitimate_pic_address_disp_p (disp))
7042 {
7043 reason = "displacement is an invalid pic construct";
7044 goto report_error;
7045 }
7046
7047 /* This code used to verify that a symbolic pic displacement
7048 includes the pic_offset_table_rtx register.
7049
7050 While this is good idea, unfortunately these constructs may
7051 be created by "adds using lea" optimization for incorrect
7052 code like:
7053
7054 int a;
7055 int foo(int i)
7056 {
7057 return *(&a+i);
7058 }
7059
7060 This code is nonsensical, but results in addressing
7061 GOT table with pic_offset_table_rtx base. We can't
7062 just refuse it easily, since it gets matched by
7063 "addsi3" pattern, that later gets split to lea in the
7064 case output register differs from input. While this
7065 can be handled by separate addsi pattern for this case
7066 that never results in lea, this seems to be easier and
7067 correct fix for crash to disable this test. */
7068 }
7069 else if (GET_CODE (disp) != LABEL_REF
7070 && !CONST_INT_P (disp)
7071 && (GET_CODE (disp) != CONST
7072 || !legitimate_constant_p (disp))
7073 && (GET_CODE (disp) != SYMBOL_REF
7074 || !legitimate_constant_p (disp)))
7075 {
7076 reason = "displacement is not constant";
7077 goto report_error;
7078 }
7079 else if (TARGET_64BIT
7080 && !x86_64_immediate_operand (disp, VOIDmode))
7081 {
7082 reason = "displacement is out of range";
7083 goto report_error;
7084 }
7085 }
7086
7087 /* Everything looks valid. */
7088 return TRUE;
7089
7090 report_error:
7091 return FALSE;
7092 }
7093 \f
7094 /* Return a unique alias set for the GOT. */
7095
7096 static HOST_WIDE_INT
7097 ix86_GOT_alias_set (void)
7098 {
7099 static HOST_WIDE_INT set = -1;
7100 if (set == -1)
7101 set = new_alias_set ();
7102 return set;
7103 }
7104
7105 /* Return a legitimate reference for ORIG (an address) using the
7106 register REG. If REG is 0, a new pseudo is generated.
7107
7108 There are two types of references that must be handled:
7109
7110 1. Global data references must load the address from the GOT, via
7111 the PIC reg. An insn is emitted to do this load, and the reg is
7112 returned.
7113
7114 2. Static data references, constant pool addresses, and code labels
7115 compute the address as an offset from the GOT, whose base is in
7116 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7117 differentiate them from global data objects. The returned
7118 address is the PIC reg + an unspec constant.
7119
7120 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7121 reg also appears in the address. */
7122
7123 static rtx
7124 legitimize_pic_address (rtx orig, rtx reg)
7125 {
7126 rtx addr = orig;
7127 rtx new = orig;
7128 rtx base;
7129
7130 #if TARGET_MACHO
7131 if (TARGET_MACHO && !TARGET_64BIT)
7132 {
7133 if (reg == 0)
7134 reg = gen_reg_rtx (Pmode);
7135 /* Use the generic Mach-O PIC machinery. */
7136 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7137 }
7138 #endif
7139
7140 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7141 new = addr;
7142 else if (TARGET_64BIT
7143 && ix86_cmodel != CM_SMALL_PIC
7144 && gotoff_operand (addr, Pmode))
7145 {
7146 rtx tmpreg;
7147 /* This symbol may be referenced via a displacement from the PIC
7148 base address (@GOTOFF). */
7149
7150 if (reload_in_progress)
7151 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7152 if (GET_CODE (addr) == CONST)
7153 addr = XEXP (addr, 0);
7154 if (GET_CODE (addr) == PLUS)
7155 {
7156 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7157 UNSPEC_GOTOFF);
7158 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7159 }
7160 else
7161 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7162 new = gen_rtx_CONST (Pmode, new);
7163 if (!reg)
7164 tmpreg = gen_reg_rtx (Pmode);
7165 else
7166 tmpreg = reg;
7167 emit_move_insn (tmpreg, new);
7168
7169 if (reg != 0)
7170 {
7171 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7172 tmpreg, 1, OPTAB_DIRECT);
7173 new = reg;
7174 }
7175 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7176 }
7177 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7178 {
7179 /* This symbol may be referenced via a displacement from the PIC
7180 base address (@GOTOFF). */
7181
7182 if (reload_in_progress)
7183 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7184 if (GET_CODE (addr) == CONST)
7185 addr = XEXP (addr, 0);
7186 if (GET_CODE (addr) == PLUS)
7187 {
7188 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7189 UNSPEC_GOTOFF);
7190 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7191 }
7192 else
7193 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7194 new = gen_rtx_CONST (Pmode, new);
7195 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7196
7197 if (reg != 0)
7198 {
7199 emit_move_insn (reg, new);
7200 new = reg;
7201 }
7202 }
7203 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7204 /* We can't use @GOTOFF for text labels on VxWorks;
7205 see gotoff_operand. */
7206 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7207 {
7208 /* Given that we've already handled dllimport variables separately
7209 in legitimize_address, and all other variables should satisfy
7210 legitimate_pic_address_disp_p, we should never arrive here. */
7211 gcc_assert (!TARGET_64BIT_MS_ABI);
7212
7213 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7214 {
7215 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7216 new = gen_rtx_CONST (Pmode, new);
7217 new = gen_const_mem (Pmode, new);
7218 set_mem_alias_set (new, ix86_GOT_alias_set ());
7219
7220 if (reg == 0)
7221 reg = gen_reg_rtx (Pmode);
7222 /* Use directly gen_movsi, otherwise the address is loaded
7223 into register for CSE. We don't want to CSE this addresses,
7224 instead we CSE addresses from the GOT table, so skip this. */
7225 emit_insn (gen_movsi (reg, new));
7226 new = reg;
7227 }
7228 else
7229 {
7230 /* This symbol must be referenced via a load from the
7231 Global Offset Table (@GOT). */
7232
7233 if (reload_in_progress)
7234 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7235 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7236 new = gen_rtx_CONST (Pmode, new);
7237 if (TARGET_64BIT)
7238 new = force_reg (Pmode, new);
7239 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7240 new = gen_const_mem (Pmode, new);
7241 set_mem_alias_set (new, ix86_GOT_alias_set ());
7242
7243 if (reg == 0)
7244 reg = gen_reg_rtx (Pmode);
7245 emit_move_insn (reg, new);
7246 new = reg;
7247 }
7248 }
7249 else
7250 {
7251 if (CONST_INT_P (addr)
7252 && !x86_64_immediate_operand (addr, VOIDmode))
7253 {
7254 if (reg)
7255 {
7256 emit_move_insn (reg, addr);
7257 new = reg;
7258 }
7259 else
7260 new = force_reg (Pmode, addr);
7261 }
7262 else if (GET_CODE (addr) == CONST)
7263 {
7264 addr = XEXP (addr, 0);
7265
7266 /* We must match stuff we generate before. Assume the only
7267 unspecs that can get here are ours. Not that we could do
7268 anything with them anyway.... */
7269 if (GET_CODE (addr) == UNSPEC
7270 || (GET_CODE (addr) == PLUS
7271 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7272 return orig;
7273 gcc_assert (GET_CODE (addr) == PLUS);
7274 }
7275 if (GET_CODE (addr) == PLUS)
7276 {
7277 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7278
7279 /* Check first to see if this is a constant offset from a @GOTOFF
7280 symbol reference. */
7281 if (gotoff_operand (op0, Pmode)
7282 && CONST_INT_P (op1))
7283 {
7284 if (!TARGET_64BIT)
7285 {
7286 if (reload_in_progress)
7287 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7288 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7289 UNSPEC_GOTOFF);
7290 new = gen_rtx_PLUS (Pmode, new, op1);
7291 new = gen_rtx_CONST (Pmode, new);
7292 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7293
7294 if (reg != 0)
7295 {
7296 emit_move_insn (reg, new);
7297 new = reg;
7298 }
7299 }
7300 else
7301 {
7302 if (INTVAL (op1) < -16*1024*1024
7303 || INTVAL (op1) >= 16*1024*1024)
7304 {
7305 if (!x86_64_immediate_operand (op1, Pmode))
7306 op1 = force_reg (Pmode, op1);
7307 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7308 }
7309 }
7310 }
7311 else
7312 {
7313 base = legitimize_pic_address (XEXP (addr, 0), reg);
7314 new = legitimize_pic_address (XEXP (addr, 1),
7315 base == reg ? NULL_RTX : reg);
7316
7317 if (CONST_INT_P (new))
7318 new = plus_constant (base, INTVAL (new));
7319 else
7320 {
7321 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7322 {
7323 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7324 new = XEXP (new, 1);
7325 }
7326 new = gen_rtx_PLUS (Pmode, base, new);
7327 }
7328 }
7329 }
7330 }
7331 return new;
7332 }
7333 \f
7334 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7335
7336 static rtx
7337 get_thread_pointer (int to_reg)
7338 {
7339 rtx tp, reg, insn;
7340
7341 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7342 if (!to_reg)
7343 return tp;
7344
7345 reg = gen_reg_rtx (Pmode);
7346 insn = gen_rtx_SET (VOIDmode, reg, tp);
7347 insn = emit_insn (insn);
7348
7349 return reg;
7350 }
7351
7352 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7353 false if we expect this to be used for a memory address and true if
7354 we expect to load the address into a register. */
7355
7356 static rtx
7357 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7358 {
7359 rtx dest, base, off, pic, tp;
7360 int type;
7361
7362 switch (model)
7363 {
7364 case TLS_MODEL_GLOBAL_DYNAMIC:
7365 dest = gen_reg_rtx (Pmode);
7366 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7367
7368 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7369 {
7370 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7371
7372 start_sequence ();
7373 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7374 insns = get_insns ();
7375 end_sequence ();
7376
7377 CONST_OR_PURE_CALL_P (insns) = 1;
7378 emit_libcall_block (insns, dest, rax, x);
7379 }
7380 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7381 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7382 else
7383 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7384
7385 if (TARGET_GNU2_TLS)
7386 {
7387 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7388
7389 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7390 }
7391 break;
7392
7393 case TLS_MODEL_LOCAL_DYNAMIC:
7394 base = gen_reg_rtx (Pmode);
7395 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7396
7397 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7398 {
7399 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7400
7401 start_sequence ();
7402 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7403 insns = get_insns ();
7404 end_sequence ();
7405
7406 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7407 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7408 CONST_OR_PURE_CALL_P (insns) = 1;
7409 emit_libcall_block (insns, base, rax, note);
7410 }
7411 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7412 emit_insn (gen_tls_local_dynamic_base_64 (base));
7413 else
7414 emit_insn (gen_tls_local_dynamic_base_32 (base));
7415
7416 if (TARGET_GNU2_TLS)
7417 {
7418 rtx x = ix86_tls_module_base ();
7419
7420 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7421 gen_rtx_MINUS (Pmode, x, tp));
7422 }
7423
7424 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7425 off = gen_rtx_CONST (Pmode, off);
7426
7427 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7428
7429 if (TARGET_GNU2_TLS)
7430 {
7431 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7432
7433 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7434 }
7435
7436 break;
7437
7438 case TLS_MODEL_INITIAL_EXEC:
7439 if (TARGET_64BIT)
7440 {
7441 pic = NULL;
7442 type = UNSPEC_GOTNTPOFF;
7443 }
7444 else if (flag_pic)
7445 {
7446 if (reload_in_progress)
7447 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7448 pic = pic_offset_table_rtx;
7449 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7450 }
7451 else if (!TARGET_ANY_GNU_TLS)
7452 {
7453 pic = gen_reg_rtx (Pmode);
7454 emit_insn (gen_set_got (pic));
7455 type = UNSPEC_GOTTPOFF;
7456 }
7457 else
7458 {
7459 pic = NULL;
7460 type = UNSPEC_INDNTPOFF;
7461 }
7462
7463 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7464 off = gen_rtx_CONST (Pmode, off);
7465 if (pic)
7466 off = gen_rtx_PLUS (Pmode, pic, off);
7467 off = gen_const_mem (Pmode, off);
7468 set_mem_alias_set (off, ix86_GOT_alias_set ());
7469
7470 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7471 {
7472 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7473 off = force_reg (Pmode, off);
7474 return gen_rtx_PLUS (Pmode, base, off);
7475 }
7476 else
7477 {
7478 base = get_thread_pointer (true);
7479 dest = gen_reg_rtx (Pmode);
7480 emit_insn (gen_subsi3 (dest, base, off));
7481 }
7482 break;
7483
7484 case TLS_MODEL_LOCAL_EXEC:
7485 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7486 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7487 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7488 off = gen_rtx_CONST (Pmode, off);
7489
7490 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7491 {
7492 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7493 return gen_rtx_PLUS (Pmode, base, off);
7494 }
7495 else
7496 {
7497 base = get_thread_pointer (true);
7498 dest = gen_reg_rtx (Pmode);
7499 emit_insn (gen_subsi3 (dest, base, off));
7500 }
7501 break;
7502
7503 default:
7504 gcc_unreachable ();
7505 }
7506
7507 return dest;
7508 }
7509
7510 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7511 to symbol DECL. */
7512
7513 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7514 htab_t dllimport_map;
7515
7516 static tree
7517 get_dllimport_decl (tree decl)
7518 {
7519 struct tree_map *h, in;
7520 void **loc;
7521 const char *name;
7522 const char *prefix;
7523 size_t namelen, prefixlen;
7524 char *imp_name;
7525 tree to;
7526 rtx rtl;
7527
7528 if (!dllimport_map)
7529 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7530
7531 in.hash = htab_hash_pointer (decl);
7532 in.base.from = decl;
7533 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7534 h = *loc;
7535 if (h)
7536 return h->to;
7537
7538 *loc = h = ggc_alloc (sizeof (struct tree_map));
7539 h->hash = in.hash;
7540 h->base.from = decl;
7541 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7542 DECL_ARTIFICIAL (to) = 1;
7543 DECL_IGNORED_P (to) = 1;
7544 DECL_EXTERNAL (to) = 1;
7545 TREE_READONLY (to) = 1;
7546
7547 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7548 name = targetm.strip_name_encoding (name);
7549 if (name[0] == FASTCALL_PREFIX)
7550 {
7551 name++;
7552 prefix = "*__imp_";
7553 }
7554 else
7555 prefix = "*__imp__";
7556
7557 namelen = strlen (name);
7558 prefixlen = strlen (prefix);
7559 imp_name = alloca (namelen + prefixlen + 1);
7560 memcpy (imp_name, prefix, prefixlen);
7561 memcpy (imp_name + prefixlen, name, namelen + 1);
7562
7563 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7564 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7565 SET_SYMBOL_REF_DECL (rtl, to);
7566 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7567
7568 rtl = gen_const_mem (Pmode, rtl);
7569 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7570
7571 SET_DECL_RTL (to, rtl);
7572
7573 return to;
7574 }
7575
7576 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7577 true if we require the result be a register. */
7578
7579 static rtx
7580 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7581 {
7582 tree imp_decl;
7583 rtx x;
7584
7585 gcc_assert (SYMBOL_REF_DECL (symbol));
7586 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7587
7588 x = DECL_RTL (imp_decl);
7589 if (want_reg)
7590 x = force_reg (Pmode, x);
7591 return x;
7592 }
7593
7594 /* Try machine-dependent ways of modifying an illegitimate address
7595 to be legitimate. If we find one, return the new, valid address.
7596 This macro is used in only one place: `memory_address' in explow.c.
7597
7598 OLDX is the address as it was before break_out_memory_refs was called.
7599 In some cases it is useful to look at this to decide what needs to be done.
7600
7601 MODE and WIN are passed so that this macro can use
7602 GO_IF_LEGITIMATE_ADDRESS.
7603
7604 It is always safe for this macro to do nothing. It exists to recognize
7605 opportunities to optimize the output.
7606
7607 For the 80386, we handle X+REG by loading X into a register R and
7608 using R+REG. R will go in a general reg and indexing will be used.
7609 However, if REG is a broken-out memory address or multiplication,
7610 nothing needs to be done because REG can certainly go in a general reg.
7611
7612 When -fpic is used, special handling is needed for symbolic references.
7613 See comments by legitimize_pic_address in i386.c for details. */
7614
7615 rtx
7616 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7617 {
7618 int changed = 0;
7619 unsigned log;
7620
7621 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7622 if (log)
7623 return legitimize_tls_address (x, log, false);
7624 if (GET_CODE (x) == CONST
7625 && GET_CODE (XEXP (x, 0)) == PLUS
7626 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7627 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7628 {
7629 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7630 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7631 }
7632
7633 if (flag_pic && SYMBOLIC_CONST (x))
7634 return legitimize_pic_address (x, 0);
7635
7636 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7637 {
7638 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7639 return legitimize_dllimport_symbol (x, true);
7640 if (GET_CODE (x) == CONST
7641 && GET_CODE (XEXP (x, 0)) == PLUS
7642 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7643 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7644 {
7645 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7646 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7647 }
7648 }
7649
7650 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7651 if (GET_CODE (x) == ASHIFT
7652 && CONST_INT_P (XEXP (x, 1))
7653 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7654 {
7655 changed = 1;
7656 log = INTVAL (XEXP (x, 1));
7657 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7658 GEN_INT (1 << log));
7659 }
7660
7661 if (GET_CODE (x) == PLUS)
7662 {
7663 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7664
7665 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7666 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7667 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7668 {
7669 changed = 1;
7670 log = INTVAL (XEXP (XEXP (x, 0), 1));
7671 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7672 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7673 GEN_INT (1 << log));
7674 }
7675
7676 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7677 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7678 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7679 {
7680 changed = 1;
7681 log = INTVAL (XEXP (XEXP (x, 1), 1));
7682 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7683 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7684 GEN_INT (1 << log));
7685 }
7686
7687 /* Put multiply first if it isn't already. */
7688 if (GET_CODE (XEXP (x, 1)) == MULT)
7689 {
7690 rtx tmp = XEXP (x, 0);
7691 XEXP (x, 0) = XEXP (x, 1);
7692 XEXP (x, 1) = tmp;
7693 changed = 1;
7694 }
7695
7696 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7697 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7698 created by virtual register instantiation, register elimination, and
7699 similar optimizations. */
7700 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7701 {
7702 changed = 1;
7703 x = gen_rtx_PLUS (Pmode,
7704 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7705 XEXP (XEXP (x, 1), 0)),
7706 XEXP (XEXP (x, 1), 1));
7707 }
7708
7709 /* Canonicalize
7710 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7711 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7712 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7713 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7714 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7715 && CONSTANT_P (XEXP (x, 1)))
7716 {
7717 rtx constant;
7718 rtx other = NULL_RTX;
7719
7720 if (CONST_INT_P (XEXP (x, 1)))
7721 {
7722 constant = XEXP (x, 1);
7723 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7724 }
7725 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7726 {
7727 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7728 other = XEXP (x, 1);
7729 }
7730 else
7731 constant = 0;
7732
7733 if (constant)
7734 {
7735 changed = 1;
7736 x = gen_rtx_PLUS (Pmode,
7737 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7738 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7739 plus_constant (other, INTVAL (constant)));
7740 }
7741 }
7742
7743 if (changed && legitimate_address_p (mode, x, FALSE))
7744 return x;
7745
7746 if (GET_CODE (XEXP (x, 0)) == MULT)
7747 {
7748 changed = 1;
7749 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7750 }
7751
7752 if (GET_CODE (XEXP (x, 1)) == MULT)
7753 {
7754 changed = 1;
7755 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7756 }
7757
7758 if (changed
7759 && REG_P (XEXP (x, 1))
7760 && REG_P (XEXP (x, 0)))
7761 return x;
7762
7763 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7764 {
7765 changed = 1;
7766 x = legitimize_pic_address (x, 0);
7767 }
7768
7769 if (changed && legitimate_address_p (mode, x, FALSE))
7770 return x;
7771
7772 if (REG_P (XEXP (x, 0)))
7773 {
7774 rtx temp = gen_reg_rtx (Pmode);
7775 rtx val = force_operand (XEXP (x, 1), temp);
7776 if (val != temp)
7777 emit_move_insn (temp, val);
7778
7779 XEXP (x, 1) = temp;
7780 return x;
7781 }
7782
7783 else if (REG_P (XEXP (x, 1)))
7784 {
7785 rtx temp = gen_reg_rtx (Pmode);
7786 rtx val = force_operand (XEXP (x, 0), temp);
7787 if (val != temp)
7788 emit_move_insn (temp, val);
7789
7790 XEXP (x, 0) = temp;
7791 return x;
7792 }
7793 }
7794
7795 return x;
7796 }
7797 \f
7798 /* Print an integer constant expression in assembler syntax. Addition
7799 and subtraction are the only arithmetic that may appear in these
7800 expressions. FILE is the stdio stream to write to, X is the rtx, and
7801 CODE is the operand print code from the output string. */
7802
7803 static void
7804 output_pic_addr_const (FILE *file, rtx x, int code)
7805 {
7806 char buf[256];
7807
7808 switch (GET_CODE (x))
7809 {
7810 case PC:
7811 gcc_assert (flag_pic);
7812 putc ('.', file);
7813 break;
7814
7815 case SYMBOL_REF:
7816 if (! TARGET_MACHO || TARGET_64BIT)
7817 output_addr_const (file, x);
7818 else
7819 {
7820 const char *name = XSTR (x, 0);
7821
7822 /* Mark the decl as referenced so that cgraph will
7823 output the function. */
7824 if (SYMBOL_REF_DECL (x))
7825 mark_decl_referenced (SYMBOL_REF_DECL (x));
7826
7827 #if TARGET_MACHO
7828 if (MACHOPIC_INDIRECT
7829 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7830 name = machopic_indirection_name (x, /*stub_p=*/true);
7831 #endif
7832 assemble_name (file, name);
7833 }
7834 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7835 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7836 fputs ("@PLT", file);
7837 break;
7838
7839 case LABEL_REF:
7840 x = XEXP (x, 0);
7841 /* FALLTHRU */
7842 case CODE_LABEL:
7843 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7844 assemble_name (asm_out_file, buf);
7845 break;
7846
7847 case CONST_INT:
7848 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7849 break;
7850
7851 case CONST:
7852 /* This used to output parentheses around the expression,
7853 but that does not work on the 386 (either ATT or BSD assembler). */
7854 output_pic_addr_const (file, XEXP (x, 0), code);
7855 break;
7856
7857 case CONST_DOUBLE:
7858 if (GET_MODE (x) == VOIDmode)
7859 {
7860 /* We can use %d if the number is <32 bits and positive. */
7861 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7862 fprintf (file, "0x%lx%08lx",
7863 (unsigned long) CONST_DOUBLE_HIGH (x),
7864 (unsigned long) CONST_DOUBLE_LOW (x));
7865 else
7866 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7867 }
7868 else
7869 /* We can't handle floating point constants;
7870 PRINT_OPERAND must handle them. */
7871 output_operand_lossage ("floating constant misused");
7872 break;
7873
7874 case PLUS:
7875 /* Some assemblers need integer constants to appear first. */
7876 if (CONST_INT_P (XEXP (x, 0)))
7877 {
7878 output_pic_addr_const (file, XEXP (x, 0), code);
7879 putc ('+', file);
7880 output_pic_addr_const (file, XEXP (x, 1), code);
7881 }
7882 else
7883 {
7884 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7885 output_pic_addr_const (file, XEXP (x, 1), code);
7886 putc ('+', file);
7887 output_pic_addr_const (file, XEXP (x, 0), code);
7888 }
7889 break;
7890
7891 case MINUS:
7892 if (!TARGET_MACHO)
7893 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7894 output_pic_addr_const (file, XEXP (x, 0), code);
7895 putc ('-', file);
7896 output_pic_addr_const (file, XEXP (x, 1), code);
7897 if (!TARGET_MACHO)
7898 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7899 break;
7900
7901 case UNSPEC:
7902 gcc_assert (XVECLEN (x, 0) == 1);
7903 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7904 switch (XINT (x, 1))
7905 {
7906 case UNSPEC_GOT:
7907 fputs ("@GOT", file);
7908 break;
7909 case UNSPEC_GOTOFF:
7910 fputs ("@GOTOFF", file);
7911 break;
7912 case UNSPEC_PLTOFF:
7913 fputs ("@PLTOFF", file);
7914 break;
7915 case UNSPEC_GOTPCREL:
7916 fputs ("@GOTPCREL(%rip)", file);
7917 break;
7918 case UNSPEC_GOTTPOFF:
7919 /* FIXME: This might be @TPOFF in Sun ld too. */
7920 fputs ("@GOTTPOFF", file);
7921 break;
7922 case UNSPEC_TPOFF:
7923 fputs ("@TPOFF", file);
7924 break;
7925 case UNSPEC_NTPOFF:
7926 if (TARGET_64BIT)
7927 fputs ("@TPOFF", file);
7928 else
7929 fputs ("@NTPOFF", file);
7930 break;
7931 case UNSPEC_DTPOFF:
7932 fputs ("@DTPOFF", file);
7933 break;
7934 case UNSPEC_GOTNTPOFF:
7935 if (TARGET_64BIT)
7936 fputs ("@GOTTPOFF(%rip)", file);
7937 else
7938 fputs ("@GOTNTPOFF", file);
7939 break;
7940 case UNSPEC_INDNTPOFF:
7941 fputs ("@INDNTPOFF", file);
7942 break;
7943 default:
7944 output_operand_lossage ("invalid UNSPEC as operand");
7945 break;
7946 }
7947 break;
7948
7949 default:
7950 output_operand_lossage ("invalid expression as operand");
7951 }
7952 }
7953
7954 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7955 We need to emit DTP-relative relocations. */
7956
7957 static void ATTRIBUTE_UNUSED
7958 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7959 {
7960 fputs (ASM_LONG, file);
7961 output_addr_const (file, x);
7962 fputs ("@DTPOFF", file);
7963 switch (size)
7964 {
7965 case 4:
7966 break;
7967 case 8:
7968 fputs (", 0", file);
7969 break;
7970 default:
7971 gcc_unreachable ();
7972 }
7973 }
7974
7975 /* In the name of slightly smaller debug output, and to cater to
7976 general assembler lossage, recognize PIC+GOTOFF and turn it back
7977 into a direct symbol reference.
7978
7979 On Darwin, this is necessary to avoid a crash, because Darwin
7980 has a different PIC label for each routine but the DWARF debugging
7981 information is not associated with any particular routine, so it's
7982 necessary to remove references to the PIC label from RTL stored by
7983 the DWARF output code. */
7984
7985 static rtx
7986 ix86_delegitimize_address (rtx orig_x)
7987 {
7988 rtx x = orig_x;
7989 /* reg_addend is NULL or a multiple of some register. */
7990 rtx reg_addend = NULL_RTX;
7991 /* const_addend is NULL or a const_int. */
7992 rtx const_addend = NULL_RTX;
7993 /* This is the result, or NULL. */
7994 rtx result = NULL_RTX;
7995
7996 if (MEM_P (x))
7997 x = XEXP (x, 0);
7998
7999 if (TARGET_64BIT)
8000 {
8001 if (GET_CODE (x) != CONST
8002 || GET_CODE (XEXP (x, 0)) != UNSPEC
8003 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8004 || !MEM_P (orig_x))
8005 return orig_x;
8006 return XVECEXP (XEXP (x, 0), 0, 0);
8007 }
8008
8009 if (GET_CODE (x) != PLUS
8010 || GET_CODE (XEXP (x, 1)) != CONST)
8011 return orig_x;
8012
8013 if (REG_P (XEXP (x, 0))
8014 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8015 /* %ebx + GOT/GOTOFF */
8016 ;
8017 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8018 {
8019 /* %ebx + %reg * scale + GOT/GOTOFF */
8020 reg_addend = XEXP (x, 0);
8021 if (REG_P (XEXP (reg_addend, 0))
8022 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8023 reg_addend = XEXP (reg_addend, 1);
8024 else if (REG_P (XEXP (reg_addend, 1))
8025 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8026 reg_addend = XEXP (reg_addend, 0);
8027 else
8028 return orig_x;
8029 if (!REG_P (reg_addend)
8030 && GET_CODE (reg_addend) != MULT
8031 && GET_CODE (reg_addend) != ASHIFT)
8032 return orig_x;
8033 }
8034 else
8035 return orig_x;
8036
8037 x = XEXP (XEXP (x, 1), 0);
8038 if (GET_CODE (x) == PLUS
8039 && CONST_INT_P (XEXP (x, 1)))
8040 {
8041 const_addend = XEXP (x, 1);
8042 x = XEXP (x, 0);
8043 }
8044
8045 if (GET_CODE (x) == UNSPEC
8046 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8047 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8048 result = XVECEXP (x, 0, 0);
8049
8050 if (TARGET_MACHO && darwin_local_data_pic (x)
8051 && !MEM_P (orig_x))
8052 result = XEXP (x, 0);
8053
8054 if (! result)
8055 return orig_x;
8056
8057 if (const_addend)
8058 result = gen_rtx_PLUS (Pmode, result, const_addend);
8059 if (reg_addend)
8060 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8061 return result;
8062 }
8063
8064 /* If X is a machine specific address (i.e. a symbol or label being
8065 referenced as a displacement from the GOT implemented using an
8066 UNSPEC), then return the base term. Otherwise return X. */
8067
8068 rtx
8069 ix86_find_base_term (rtx x)
8070 {
8071 rtx term;
8072
8073 if (TARGET_64BIT)
8074 {
8075 if (GET_CODE (x) != CONST)
8076 return x;
8077 term = XEXP (x, 0);
8078 if (GET_CODE (term) == PLUS
8079 && (CONST_INT_P (XEXP (term, 1))
8080 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8081 term = XEXP (term, 0);
8082 if (GET_CODE (term) != UNSPEC
8083 || XINT (term, 1) != UNSPEC_GOTPCREL)
8084 return x;
8085
8086 term = XVECEXP (term, 0, 0);
8087
8088 if (GET_CODE (term) != SYMBOL_REF
8089 && GET_CODE (term) != LABEL_REF)
8090 return x;
8091
8092 return term;
8093 }
8094
8095 term = ix86_delegitimize_address (x);
8096
8097 if (GET_CODE (term) != SYMBOL_REF
8098 && GET_CODE (term) != LABEL_REF)
8099 return x;
8100
8101 return term;
8102 }
8103 \f
8104 static void
8105 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8106 int fp, FILE *file)
8107 {
8108 const char *suffix;
8109
8110 if (mode == CCFPmode || mode == CCFPUmode)
8111 {
8112 enum rtx_code second_code, bypass_code;
8113 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8114 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8115 code = ix86_fp_compare_code_to_integer (code);
8116 mode = CCmode;
8117 }
8118 if (reverse)
8119 code = reverse_condition (code);
8120
8121 switch (code)
8122 {
8123 case EQ:
8124 suffix = "e";
8125 break;
8126 case NE:
8127 suffix = "ne";
8128 break;
8129 case GT:
8130 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8131 suffix = "g";
8132 break;
8133 case GTU:
8134 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8135 Those same assemblers have the same but opposite lossage on cmov. */
8136 gcc_assert (mode == CCmode);
8137 suffix = fp ? "nbe" : "a";
8138 break;
8139 case LT:
8140 switch (mode)
8141 {
8142 case CCNOmode:
8143 case CCGOCmode:
8144 suffix = "s";
8145 break;
8146
8147 case CCmode:
8148 case CCGCmode:
8149 suffix = "l";
8150 break;
8151
8152 default:
8153 gcc_unreachable ();
8154 }
8155 break;
8156 case LTU:
8157 gcc_assert (mode == CCmode);
8158 suffix = "b";
8159 break;
8160 case GE:
8161 switch (mode)
8162 {
8163 case CCNOmode:
8164 case CCGOCmode:
8165 suffix = "ns";
8166 break;
8167
8168 case CCmode:
8169 case CCGCmode:
8170 suffix = "ge";
8171 break;
8172
8173 default:
8174 gcc_unreachable ();
8175 }
8176 break;
8177 case GEU:
8178 /* ??? As above. */
8179 gcc_assert (mode == CCmode);
8180 suffix = fp ? "nb" : "ae";
8181 break;
8182 case LE:
8183 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8184 suffix = "le";
8185 break;
8186 case LEU:
8187 gcc_assert (mode == CCmode);
8188 suffix = "be";
8189 break;
8190 case UNORDERED:
8191 suffix = fp ? "u" : "p";
8192 break;
8193 case ORDERED:
8194 suffix = fp ? "nu" : "np";
8195 break;
8196 default:
8197 gcc_unreachable ();
8198 }
8199 fputs (suffix, file);
8200 }
8201
8202 /* Print the name of register X to FILE based on its machine mode and number.
8203 If CODE is 'w', pretend the mode is HImode.
8204 If CODE is 'b', pretend the mode is QImode.
8205 If CODE is 'k', pretend the mode is SImode.
8206 If CODE is 'q', pretend the mode is DImode.
8207 If CODE is 'h', pretend the reg is the 'high' byte register.
8208 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8209
8210 void
8211 print_reg (rtx x, int code, FILE *file)
8212 {
8213 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8214 && REGNO (x) != FRAME_POINTER_REGNUM
8215 && REGNO (x) != FLAGS_REG
8216 && REGNO (x) != FPSR_REG
8217 && REGNO (x) != FPCR_REG);
8218
8219 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8220 putc ('%', file);
8221
8222 if (code == 'w' || MMX_REG_P (x))
8223 code = 2;
8224 else if (code == 'b')
8225 code = 1;
8226 else if (code == 'k')
8227 code = 4;
8228 else if (code == 'q')
8229 code = 8;
8230 else if (code == 'y')
8231 code = 3;
8232 else if (code == 'h')
8233 code = 0;
8234 else
8235 code = GET_MODE_SIZE (GET_MODE (x));
8236
8237 /* Irritatingly, AMD extended registers use different naming convention
8238 from the normal registers. */
8239 if (REX_INT_REG_P (x))
8240 {
8241 gcc_assert (TARGET_64BIT);
8242 switch (code)
8243 {
8244 case 0:
8245 error ("extended registers have no high halves");
8246 break;
8247 case 1:
8248 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8249 break;
8250 case 2:
8251 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8252 break;
8253 case 4:
8254 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8255 break;
8256 case 8:
8257 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8258 break;
8259 default:
8260 error ("unsupported operand size for extended register");
8261 break;
8262 }
8263 return;
8264 }
8265 switch (code)
8266 {
8267 case 3:
8268 if (STACK_TOP_P (x))
8269 {
8270 fputs ("st(0)", file);
8271 break;
8272 }
8273 /* FALLTHRU */
8274 case 8:
8275 case 4:
8276 case 12:
8277 if (! ANY_FP_REG_P (x))
8278 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8279 /* FALLTHRU */
8280 case 16:
8281 case 2:
8282 normal:
8283 fputs (hi_reg_name[REGNO (x)], file);
8284 break;
8285 case 1:
8286 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8287 goto normal;
8288 fputs (qi_reg_name[REGNO (x)], file);
8289 break;
8290 case 0:
8291 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8292 goto normal;
8293 fputs (qi_high_reg_name[REGNO (x)], file);
8294 break;
8295 default:
8296 gcc_unreachable ();
8297 }
8298 }
8299
8300 /* Locate some local-dynamic symbol still in use by this function
8301 so that we can print its name in some tls_local_dynamic_base
8302 pattern. */
8303
8304 static int
8305 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8306 {
8307 rtx x = *px;
8308
8309 if (GET_CODE (x) == SYMBOL_REF
8310 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8311 {
8312 cfun->machine->some_ld_name = XSTR (x, 0);
8313 return 1;
8314 }
8315
8316 return 0;
8317 }
8318
8319 static const char *
8320 get_some_local_dynamic_name (void)
8321 {
8322 rtx insn;
8323
8324 if (cfun->machine->some_ld_name)
8325 return cfun->machine->some_ld_name;
8326
8327 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8328 if (INSN_P (insn)
8329 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8330 return cfun->machine->some_ld_name;
8331
8332 gcc_unreachable ();
8333 }
8334
8335 /* Meaning of CODE:
8336 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8337 C -- print opcode suffix for set/cmov insn.
8338 c -- like C, but print reversed condition
8339 F,f -- likewise, but for floating-point.
8340 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8341 otherwise nothing
8342 R -- print the prefix for register names.
8343 z -- print the opcode suffix for the size of the current operand.
8344 * -- print a star (in certain assembler syntax)
8345 A -- print an absolute memory reference.
8346 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8347 s -- print a shift double count, followed by the assemblers argument
8348 delimiter.
8349 b -- print the QImode name of the register for the indicated operand.
8350 %b0 would print %al if operands[0] is reg 0.
8351 w -- likewise, print the HImode name of the register.
8352 k -- likewise, print the SImode name of the register.
8353 q -- likewise, print the DImode name of the register.
8354 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8355 y -- print "st(0)" instead of "st" as a register.
8356 D -- print condition for SSE cmp instruction.
8357 P -- if PIC, print an @PLT suffix.
8358 X -- don't print any sort of PIC '@' suffix for a symbol.
8359 & -- print some in-use local-dynamic symbol name.
8360 H -- print a memory address offset by 8; used for sse high-parts
8361 */
8362
8363 void
8364 print_operand (FILE *file, rtx x, int code)
8365 {
8366 if (code)
8367 {
8368 switch (code)
8369 {
8370 case '*':
8371 if (ASSEMBLER_DIALECT == ASM_ATT)
8372 putc ('*', file);
8373 return;
8374
8375 case '&':
8376 assemble_name (file, get_some_local_dynamic_name ());
8377 return;
8378
8379 case 'A':
8380 switch (ASSEMBLER_DIALECT)
8381 {
8382 case ASM_ATT:
8383 putc ('*', file);
8384 break;
8385
8386 case ASM_INTEL:
8387 /* Intel syntax. For absolute addresses, registers should not
8388 be surrounded by braces. */
8389 if (!REG_P (x))
8390 {
8391 putc ('[', file);
8392 PRINT_OPERAND (file, x, 0);
8393 putc (']', file);
8394 return;
8395 }
8396 break;
8397
8398 default:
8399 gcc_unreachable ();
8400 }
8401
8402 PRINT_OPERAND (file, x, 0);
8403 return;
8404
8405
8406 case 'L':
8407 if (ASSEMBLER_DIALECT == ASM_ATT)
8408 putc ('l', file);
8409 return;
8410
8411 case 'W':
8412 if (ASSEMBLER_DIALECT == ASM_ATT)
8413 putc ('w', file);
8414 return;
8415
8416 case 'B':
8417 if (ASSEMBLER_DIALECT == ASM_ATT)
8418 putc ('b', file);
8419 return;
8420
8421 case 'Q':
8422 if (ASSEMBLER_DIALECT == ASM_ATT)
8423 putc ('l', file);
8424 return;
8425
8426 case 'S':
8427 if (ASSEMBLER_DIALECT == ASM_ATT)
8428 putc ('s', file);
8429 return;
8430
8431 case 'T':
8432 if (ASSEMBLER_DIALECT == ASM_ATT)
8433 putc ('t', file);
8434 return;
8435
8436 case 'z':
8437 /* 387 opcodes don't get size suffixes if the operands are
8438 registers. */
8439 if (STACK_REG_P (x))
8440 return;
8441
8442 /* Likewise if using Intel opcodes. */
8443 if (ASSEMBLER_DIALECT == ASM_INTEL)
8444 return;
8445
8446 /* This is the size of op from size of operand. */
8447 switch (GET_MODE_SIZE (GET_MODE (x)))
8448 {
8449 case 1:
8450 putc ('b', file);
8451 return;
8452
8453 case 2:
8454 if (MEM_P (x))
8455 {
8456 #ifdef HAVE_GAS_FILDS_FISTS
8457 putc ('s', file);
8458 #endif
8459 return;
8460 }
8461 else
8462 putc ('w', file);
8463 return;
8464
8465 case 4:
8466 if (GET_MODE (x) == SFmode)
8467 {
8468 putc ('s', file);
8469 return;
8470 }
8471 else
8472 putc ('l', file);
8473 return;
8474
8475 case 12:
8476 case 16:
8477 putc ('t', file);
8478 return;
8479
8480 case 8:
8481 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8482 {
8483 #ifdef GAS_MNEMONICS
8484 putc ('q', file);
8485 #else
8486 putc ('l', file);
8487 putc ('l', file);
8488 #endif
8489 }
8490 else
8491 putc ('l', file);
8492 return;
8493
8494 default:
8495 gcc_unreachable ();
8496 }
8497
8498 case 'b':
8499 case 'w':
8500 case 'k':
8501 case 'q':
8502 case 'h':
8503 case 'y':
8504 case 'X':
8505 case 'P':
8506 break;
8507
8508 case 's':
8509 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8510 {
8511 PRINT_OPERAND (file, x, 0);
8512 putc (',', file);
8513 }
8514 return;
8515
8516 case 'D':
8517 /* Little bit of braindamage here. The SSE compare instructions
8518 does use completely different names for the comparisons that the
8519 fp conditional moves. */
8520 switch (GET_CODE (x))
8521 {
8522 case EQ:
8523 case UNEQ:
8524 fputs ("eq", file);
8525 break;
8526 case LT:
8527 case UNLT:
8528 fputs ("lt", file);
8529 break;
8530 case LE:
8531 case UNLE:
8532 fputs ("le", file);
8533 break;
8534 case UNORDERED:
8535 fputs ("unord", file);
8536 break;
8537 case NE:
8538 case LTGT:
8539 fputs ("neq", file);
8540 break;
8541 case UNGE:
8542 case GE:
8543 fputs ("nlt", file);
8544 break;
8545 case UNGT:
8546 case GT:
8547 fputs ("nle", file);
8548 break;
8549 case ORDERED:
8550 fputs ("ord", file);
8551 break;
8552 default:
8553 gcc_unreachable ();
8554 }
8555 return;
8556 case 'O':
8557 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8558 if (ASSEMBLER_DIALECT == ASM_ATT)
8559 {
8560 switch (GET_MODE (x))
8561 {
8562 case HImode: putc ('w', file); break;
8563 case SImode:
8564 case SFmode: putc ('l', file); break;
8565 case DImode:
8566 case DFmode: putc ('q', file); break;
8567 default: gcc_unreachable ();
8568 }
8569 putc ('.', file);
8570 }
8571 #endif
8572 return;
8573 case 'C':
8574 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8575 return;
8576 case 'F':
8577 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8578 if (ASSEMBLER_DIALECT == ASM_ATT)
8579 putc ('.', file);
8580 #endif
8581 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8582 return;
8583
8584 /* Like above, but reverse condition */
8585 case 'c':
8586 /* Check to see if argument to %c is really a constant
8587 and not a condition code which needs to be reversed. */
8588 if (!COMPARISON_P (x))
8589 {
8590 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8591 return;
8592 }
8593 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8594 return;
8595 case 'f':
8596 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8597 if (ASSEMBLER_DIALECT == ASM_ATT)
8598 putc ('.', file);
8599 #endif
8600 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8601 return;
8602
8603 case 'H':
8604 /* It doesn't actually matter what mode we use here, as we're
8605 only going to use this for printing. */
8606 x = adjust_address_nv (x, DImode, 8);
8607 break;
8608
8609 case '+':
8610 {
8611 rtx x;
8612
8613 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8614 return;
8615
8616 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8617 if (x)
8618 {
8619 int pred_val = INTVAL (XEXP (x, 0));
8620
8621 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8622 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8623 {
8624 int taken = pred_val > REG_BR_PROB_BASE / 2;
8625 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8626
8627 /* Emit hints only in the case default branch prediction
8628 heuristics would fail. */
8629 if (taken != cputaken)
8630 {
8631 /* We use 3e (DS) prefix for taken branches and
8632 2e (CS) prefix for not taken branches. */
8633 if (taken)
8634 fputs ("ds ; ", file);
8635 else
8636 fputs ("cs ; ", file);
8637 }
8638 }
8639 }
8640 return;
8641 }
8642 default:
8643 output_operand_lossage ("invalid operand code '%c'", code);
8644 }
8645 }
8646
8647 if (REG_P (x))
8648 print_reg (x, code, file);
8649
8650 else if (MEM_P (x))
8651 {
8652 /* No `byte ptr' prefix for call instructions. */
8653 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8654 {
8655 const char * size;
8656 switch (GET_MODE_SIZE (GET_MODE (x)))
8657 {
8658 case 1: size = "BYTE"; break;
8659 case 2: size = "WORD"; break;
8660 case 4: size = "DWORD"; break;
8661 case 8: size = "QWORD"; break;
8662 case 12: size = "XWORD"; break;
8663 case 16: size = "XMMWORD"; break;
8664 default:
8665 gcc_unreachable ();
8666 }
8667
8668 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8669 if (code == 'b')
8670 size = "BYTE";
8671 else if (code == 'w')
8672 size = "WORD";
8673 else if (code == 'k')
8674 size = "DWORD";
8675
8676 fputs (size, file);
8677 fputs (" PTR ", file);
8678 }
8679
8680 x = XEXP (x, 0);
8681 /* Avoid (%rip) for call operands. */
8682 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8683 && !CONST_INT_P (x))
8684 output_addr_const (file, x);
8685 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8686 output_operand_lossage ("invalid constraints for operand");
8687 else
8688 output_address (x);
8689 }
8690
8691 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8692 {
8693 REAL_VALUE_TYPE r;
8694 long l;
8695
8696 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8697 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8698
8699 if (ASSEMBLER_DIALECT == ASM_ATT)
8700 putc ('$', file);
8701 fprintf (file, "0x%08lx", l);
8702 }
8703
8704 /* These float cases don't actually occur as immediate operands. */
8705 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8706 {
8707 char dstr[30];
8708
8709 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8710 fprintf (file, "%s", dstr);
8711 }
8712
8713 else if (GET_CODE (x) == CONST_DOUBLE
8714 && GET_MODE (x) == XFmode)
8715 {
8716 char dstr[30];
8717
8718 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8719 fprintf (file, "%s", dstr);
8720 }
8721
8722 else
8723 {
8724 /* We have patterns that allow zero sets of memory, for instance.
8725 In 64-bit mode, we should probably support all 8-byte vectors,
8726 since we can in fact encode that into an immediate. */
8727 if (GET_CODE (x) == CONST_VECTOR)
8728 {
8729 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8730 x = const0_rtx;
8731 }
8732
8733 if (code != 'P')
8734 {
8735 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8736 {
8737 if (ASSEMBLER_DIALECT == ASM_ATT)
8738 putc ('$', file);
8739 }
8740 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8741 || GET_CODE (x) == LABEL_REF)
8742 {
8743 if (ASSEMBLER_DIALECT == ASM_ATT)
8744 putc ('$', file);
8745 else
8746 fputs ("OFFSET FLAT:", file);
8747 }
8748 }
8749 if (CONST_INT_P (x))
8750 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8751 else if (flag_pic)
8752 output_pic_addr_const (file, x, code);
8753 else
8754 output_addr_const (file, x);
8755 }
8756 }
8757 \f
8758 /* Print a memory operand whose address is ADDR. */
8759
8760 void
8761 print_operand_address (FILE *file, rtx addr)
8762 {
8763 struct ix86_address parts;
8764 rtx base, index, disp;
8765 int scale;
8766 int ok = ix86_decompose_address (addr, &parts);
8767
8768 gcc_assert (ok);
8769
8770 base = parts.base;
8771 index = parts.index;
8772 disp = parts.disp;
8773 scale = parts.scale;
8774
8775 switch (parts.seg)
8776 {
8777 case SEG_DEFAULT:
8778 break;
8779 case SEG_FS:
8780 case SEG_GS:
8781 if (USER_LABEL_PREFIX[0] == 0)
8782 putc ('%', file);
8783 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8784 break;
8785 default:
8786 gcc_unreachable ();
8787 }
8788
8789 if (!base && !index)
8790 {
8791 /* Displacement only requires special attention. */
8792
8793 if (CONST_INT_P (disp))
8794 {
8795 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8796 {
8797 if (USER_LABEL_PREFIX[0] == 0)
8798 putc ('%', file);
8799 fputs ("ds:", file);
8800 }
8801 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8802 }
8803 else if (flag_pic)
8804 output_pic_addr_const (file, disp, 0);
8805 else
8806 output_addr_const (file, disp);
8807
8808 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8809 if (TARGET_64BIT)
8810 {
8811 if (GET_CODE (disp) == CONST
8812 && GET_CODE (XEXP (disp, 0)) == PLUS
8813 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8814 disp = XEXP (XEXP (disp, 0), 0);
8815 if (GET_CODE (disp) == LABEL_REF
8816 || (GET_CODE (disp) == SYMBOL_REF
8817 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8818 fputs ("(%rip)", file);
8819 }
8820 }
8821 else
8822 {
8823 if (ASSEMBLER_DIALECT == ASM_ATT)
8824 {
8825 if (disp)
8826 {
8827 if (flag_pic)
8828 output_pic_addr_const (file, disp, 0);
8829 else if (GET_CODE (disp) == LABEL_REF)
8830 output_asm_label (disp);
8831 else
8832 output_addr_const (file, disp);
8833 }
8834
8835 putc ('(', file);
8836 if (base)
8837 print_reg (base, 0, file);
8838 if (index)
8839 {
8840 putc (',', file);
8841 print_reg (index, 0, file);
8842 if (scale != 1)
8843 fprintf (file, ",%d", scale);
8844 }
8845 putc (')', file);
8846 }
8847 else
8848 {
8849 rtx offset = NULL_RTX;
8850
8851 if (disp)
8852 {
8853 /* Pull out the offset of a symbol; print any symbol itself. */
8854 if (GET_CODE (disp) == CONST
8855 && GET_CODE (XEXP (disp, 0)) == PLUS
8856 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8857 {
8858 offset = XEXP (XEXP (disp, 0), 1);
8859 disp = gen_rtx_CONST (VOIDmode,
8860 XEXP (XEXP (disp, 0), 0));
8861 }
8862
8863 if (flag_pic)
8864 output_pic_addr_const (file, disp, 0);
8865 else if (GET_CODE (disp) == LABEL_REF)
8866 output_asm_label (disp);
8867 else if (CONST_INT_P (disp))
8868 offset = disp;
8869 else
8870 output_addr_const (file, disp);
8871 }
8872
8873 putc ('[', file);
8874 if (base)
8875 {
8876 print_reg (base, 0, file);
8877 if (offset)
8878 {
8879 if (INTVAL (offset) >= 0)
8880 putc ('+', file);
8881 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8882 }
8883 }
8884 else if (offset)
8885 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8886 else
8887 putc ('0', file);
8888
8889 if (index)
8890 {
8891 putc ('+', file);
8892 print_reg (index, 0, file);
8893 if (scale != 1)
8894 fprintf (file, "*%d", scale);
8895 }
8896 putc (']', file);
8897 }
8898 }
8899 }
8900
8901 bool
8902 output_addr_const_extra (FILE *file, rtx x)
8903 {
8904 rtx op;
8905
8906 if (GET_CODE (x) != UNSPEC)
8907 return false;
8908
8909 op = XVECEXP (x, 0, 0);
8910 switch (XINT (x, 1))
8911 {
8912 case UNSPEC_GOTTPOFF:
8913 output_addr_const (file, op);
8914 /* FIXME: This might be @TPOFF in Sun ld. */
8915 fputs ("@GOTTPOFF", file);
8916 break;
8917 case UNSPEC_TPOFF:
8918 output_addr_const (file, op);
8919 fputs ("@TPOFF", file);
8920 break;
8921 case UNSPEC_NTPOFF:
8922 output_addr_const (file, op);
8923 if (TARGET_64BIT)
8924 fputs ("@TPOFF", file);
8925 else
8926 fputs ("@NTPOFF", file);
8927 break;
8928 case UNSPEC_DTPOFF:
8929 output_addr_const (file, op);
8930 fputs ("@DTPOFF", file);
8931 break;
8932 case UNSPEC_GOTNTPOFF:
8933 output_addr_const (file, op);
8934 if (TARGET_64BIT)
8935 fputs ("@GOTTPOFF(%rip)", file);
8936 else
8937 fputs ("@GOTNTPOFF", file);
8938 break;
8939 case UNSPEC_INDNTPOFF:
8940 output_addr_const (file, op);
8941 fputs ("@INDNTPOFF", file);
8942 break;
8943
8944 default:
8945 return false;
8946 }
8947
8948 return true;
8949 }
8950 \f
8951 /* Split one or more DImode RTL references into pairs of SImode
8952 references. The RTL can be REG, offsettable MEM, integer constant, or
8953 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8954 split and "num" is its length. lo_half and hi_half are output arrays
8955 that parallel "operands". */
8956
8957 void
8958 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8959 {
8960 while (num--)
8961 {
8962 rtx op = operands[num];
8963
8964 /* simplify_subreg refuse to split volatile memory addresses,
8965 but we still have to handle it. */
8966 if (MEM_P (op))
8967 {
8968 lo_half[num] = adjust_address (op, SImode, 0);
8969 hi_half[num] = adjust_address (op, SImode, 4);
8970 }
8971 else
8972 {
8973 lo_half[num] = simplify_gen_subreg (SImode, op,
8974 GET_MODE (op) == VOIDmode
8975 ? DImode : GET_MODE (op), 0);
8976 hi_half[num] = simplify_gen_subreg (SImode, op,
8977 GET_MODE (op) == VOIDmode
8978 ? DImode : GET_MODE (op), 4);
8979 }
8980 }
8981 }
8982 /* Split one or more TImode RTL references into pairs of DImode
8983 references. The RTL can be REG, offsettable MEM, integer constant, or
8984 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8985 split and "num" is its length. lo_half and hi_half are output arrays
8986 that parallel "operands". */
8987
8988 void
8989 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8990 {
8991 while (num--)
8992 {
8993 rtx op = operands[num];
8994
8995 /* simplify_subreg refuse to split volatile memory addresses, but we
8996 still have to handle it. */
8997 if (MEM_P (op))
8998 {
8999 lo_half[num] = adjust_address (op, DImode, 0);
9000 hi_half[num] = adjust_address (op, DImode, 8);
9001 }
9002 else
9003 {
9004 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9005 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9006 }
9007 }
9008 }
9009 \f
9010 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9011 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9012 is the expression of the binary operation. The output may either be
9013 emitted here, or returned to the caller, like all output_* functions.
9014
9015 There is no guarantee that the operands are the same mode, as they
9016 might be within FLOAT or FLOAT_EXTEND expressions. */
9017
9018 #ifndef SYSV386_COMPAT
9019 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9020 wants to fix the assemblers because that causes incompatibility
9021 with gcc. No-one wants to fix gcc because that causes
9022 incompatibility with assemblers... You can use the option of
9023 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9024 #define SYSV386_COMPAT 1
9025 #endif
9026
9027 const char *
9028 output_387_binary_op (rtx insn, rtx *operands)
9029 {
9030 static char buf[30];
9031 const char *p;
9032 const char *ssep;
9033 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9034
9035 #ifdef ENABLE_CHECKING
9036 /* Even if we do not want to check the inputs, this documents input
9037 constraints. Which helps in understanding the following code. */
9038 if (STACK_REG_P (operands[0])
9039 && ((REG_P (operands[1])
9040 && REGNO (operands[0]) == REGNO (operands[1])
9041 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9042 || (REG_P (operands[2])
9043 && REGNO (operands[0]) == REGNO (operands[2])
9044 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9045 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9046 ; /* ok */
9047 else
9048 gcc_assert (is_sse);
9049 #endif
9050
9051 switch (GET_CODE (operands[3]))
9052 {
9053 case PLUS:
9054 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9055 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9056 p = "fiadd";
9057 else
9058 p = "fadd";
9059 ssep = "add";
9060 break;
9061
9062 case MINUS:
9063 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9064 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9065 p = "fisub";
9066 else
9067 p = "fsub";
9068 ssep = "sub";
9069 break;
9070
9071 case MULT:
9072 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9073 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9074 p = "fimul";
9075 else
9076 p = "fmul";
9077 ssep = "mul";
9078 break;
9079
9080 case DIV:
9081 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9082 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9083 p = "fidiv";
9084 else
9085 p = "fdiv";
9086 ssep = "div";
9087 break;
9088
9089 default:
9090 gcc_unreachable ();
9091 }
9092
9093 if (is_sse)
9094 {
9095 strcpy (buf, ssep);
9096 if (GET_MODE (operands[0]) == SFmode)
9097 strcat (buf, "ss\t{%2, %0|%0, %2}");
9098 else
9099 strcat (buf, "sd\t{%2, %0|%0, %2}");
9100 return buf;
9101 }
9102 strcpy (buf, p);
9103
9104 switch (GET_CODE (operands[3]))
9105 {
9106 case MULT:
9107 case PLUS:
9108 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9109 {
9110 rtx temp = operands[2];
9111 operands[2] = operands[1];
9112 operands[1] = temp;
9113 }
9114
9115 /* know operands[0] == operands[1]. */
9116
9117 if (MEM_P (operands[2]))
9118 {
9119 p = "%z2\t%2";
9120 break;
9121 }
9122
9123 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9124 {
9125 if (STACK_TOP_P (operands[0]))
9126 /* How is it that we are storing to a dead operand[2]?
9127 Well, presumably operands[1] is dead too. We can't
9128 store the result to st(0) as st(0) gets popped on this
9129 instruction. Instead store to operands[2] (which I
9130 think has to be st(1)). st(1) will be popped later.
9131 gcc <= 2.8.1 didn't have this check and generated
9132 assembly code that the Unixware assembler rejected. */
9133 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9134 else
9135 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9136 break;
9137 }
9138
9139 if (STACK_TOP_P (operands[0]))
9140 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9141 else
9142 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9143 break;
9144
9145 case MINUS:
9146 case DIV:
9147 if (MEM_P (operands[1]))
9148 {
9149 p = "r%z1\t%1";
9150 break;
9151 }
9152
9153 if (MEM_P (operands[2]))
9154 {
9155 p = "%z2\t%2";
9156 break;
9157 }
9158
9159 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9160 {
9161 #if SYSV386_COMPAT
9162 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9163 derived assemblers, confusingly reverse the direction of
9164 the operation for fsub{r} and fdiv{r} when the
9165 destination register is not st(0). The Intel assembler
9166 doesn't have this brain damage. Read !SYSV386_COMPAT to
9167 figure out what the hardware really does. */
9168 if (STACK_TOP_P (operands[0]))
9169 p = "{p\t%0, %2|rp\t%2, %0}";
9170 else
9171 p = "{rp\t%2, %0|p\t%0, %2}";
9172 #else
9173 if (STACK_TOP_P (operands[0]))
9174 /* As above for fmul/fadd, we can't store to st(0). */
9175 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9176 else
9177 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9178 #endif
9179 break;
9180 }
9181
9182 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9183 {
9184 #if SYSV386_COMPAT
9185 if (STACK_TOP_P (operands[0]))
9186 p = "{rp\t%0, %1|p\t%1, %0}";
9187 else
9188 p = "{p\t%1, %0|rp\t%0, %1}";
9189 #else
9190 if (STACK_TOP_P (operands[0]))
9191 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9192 else
9193 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9194 #endif
9195 break;
9196 }
9197
9198 if (STACK_TOP_P (operands[0]))
9199 {
9200 if (STACK_TOP_P (operands[1]))
9201 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9202 else
9203 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9204 break;
9205 }
9206 else if (STACK_TOP_P (operands[1]))
9207 {
9208 #if SYSV386_COMPAT
9209 p = "{\t%1, %0|r\t%0, %1}";
9210 #else
9211 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9212 #endif
9213 }
9214 else
9215 {
9216 #if SYSV386_COMPAT
9217 p = "{r\t%2, %0|\t%0, %2}";
9218 #else
9219 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9220 #endif
9221 }
9222 break;
9223
9224 default:
9225 gcc_unreachable ();
9226 }
9227
9228 strcat (buf, p);
9229 return buf;
9230 }
9231
9232 /* Return needed mode for entity in optimize_mode_switching pass. */
9233
9234 int
9235 ix86_mode_needed (int entity, rtx insn)
9236 {
9237 enum attr_i387_cw mode;
9238
9239 /* The mode UNINITIALIZED is used to store control word after a
9240 function call or ASM pattern. The mode ANY specify that function
9241 has no requirements on the control word and make no changes in the
9242 bits we are interested in. */
9243
9244 if (CALL_P (insn)
9245 || (NONJUMP_INSN_P (insn)
9246 && (asm_noperands (PATTERN (insn)) >= 0
9247 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9248 return I387_CW_UNINITIALIZED;
9249
9250 if (recog_memoized (insn) < 0)
9251 return I387_CW_ANY;
9252
9253 mode = get_attr_i387_cw (insn);
9254
9255 switch (entity)
9256 {
9257 case I387_TRUNC:
9258 if (mode == I387_CW_TRUNC)
9259 return mode;
9260 break;
9261
9262 case I387_FLOOR:
9263 if (mode == I387_CW_FLOOR)
9264 return mode;
9265 break;
9266
9267 case I387_CEIL:
9268 if (mode == I387_CW_CEIL)
9269 return mode;
9270 break;
9271
9272 case I387_MASK_PM:
9273 if (mode == I387_CW_MASK_PM)
9274 return mode;
9275 break;
9276
9277 default:
9278 gcc_unreachable ();
9279 }
9280
9281 return I387_CW_ANY;
9282 }
9283
9284 /* Output code to initialize control word copies used by trunc?f?i and
9285 rounding patterns. CURRENT_MODE is set to current control word,
9286 while NEW_MODE is set to new control word. */
9287
9288 void
9289 emit_i387_cw_initialization (int mode)
9290 {
9291 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9292 rtx new_mode;
9293
9294 int slot;
9295
9296 rtx reg = gen_reg_rtx (HImode);
9297
9298 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9299 emit_move_insn (reg, copy_rtx (stored_mode));
9300
9301 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9302 {
9303 switch (mode)
9304 {
9305 case I387_CW_TRUNC:
9306 /* round toward zero (truncate) */
9307 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9308 slot = SLOT_CW_TRUNC;
9309 break;
9310
9311 case I387_CW_FLOOR:
9312 /* round down toward -oo */
9313 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9314 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9315 slot = SLOT_CW_FLOOR;
9316 break;
9317
9318 case I387_CW_CEIL:
9319 /* round up toward +oo */
9320 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9321 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9322 slot = SLOT_CW_CEIL;
9323 break;
9324
9325 case I387_CW_MASK_PM:
9326 /* mask precision exception for nearbyint() */
9327 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9328 slot = SLOT_CW_MASK_PM;
9329 break;
9330
9331 default:
9332 gcc_unreachable ();
9333 }
9334 }
9335 else
9336 {
9337 switch (mode)
9338 {
9339 case I387_CW_TRUNC:
9340 /* round toward zero (truncate) */
9341 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9342 slot = SLOT_CW_TRUNC;
9343 break;
9344
9345 case I387_CW_FLOOR:
9346 /* round down toward -oo */
9347 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9348 slot = SLOT_CW_FLOOR;
9349 break;
9350
9351 case I387_CW_CEIL:
9352 /* round up toward +oo */
9353 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9354 slot = SLOT_CW_CEIL;
9355 break;
9356
9357 case I387_CW_MASK_PM:
9358 /* mask precision exception for nearbyint() */
9359 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9360 slot = SLOT_CW_MASK_PM;
9361 break;
9362
9363 default:
9364 gcc_unreachable ();
9365 }
9366 }
9367
9368 gcc_assert (slot < MAX_386_STACK_LOCALS);
9369
9370 new_mode = assign_386_stack_local (HImode, slot);
9371 emit_move_insn (new_mode, reg);
9372 }
9373
9374 /* Output code for INSN to convert a float to a signed int. OPERANDS
9375 are the insn operands. The output may be [HSD]Imode and the input
9376 operand may be [SDX]Fmode. */
9377
9378 const char *
9379 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9380 {
9381 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9382 int dimode_p = GET_MODE (operands[0]) == DImode;
9383 int round_mode = get_attr_i387_cw (insn);
9384
9385 /* Jump through a hoop or two for DImode, since the hardware has no
9386 non-popping instruction. We used to do this a different way, but
9387 that was somewhat fragile and broke with post-reload splitters. */
9388 if ((dimode_p || fisttp) && !stack_top_dies)
9389 output_asm_insn ("fld\t%y1", operands);
9390
9391 gcc_assert (STACK_TOP_P (operands[1]));
9392 gcc_assert (MEM_P (operands[0]));
9393 gcc_assert (GET_MODE (operands[1]) != TFmode);
9394
9395 if (fisttp)
9396 output_asm_insn ("fisttp%z0\t%0", operands);
9397 else
9398 {
9399 if (round_mode != I387_CW_ANY)
9400 output_asm_insn ("fldcw\t%3", operands);
9401 if (stack_top_dies || dimode_p)
9402 output_asm_insn ("fistp%z0\t%0", operands);
9403 else
9404 output_asm_insn ("fist%z0\t%0", operands);
9405 if (round_mode != I387_CW_ANY)
9406 output_asm_insn ("fldcw\t%2", operands);
9407 }
9408
9409 return "";
9410 }
9411
9412 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9413 have the values zero or one, indicates the ffreep insn's operand
9414 from the OPERANDS array. */
9415
9416 static const char *
9417 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9418 {
9419 if (TARGET_USE_FFREEP)
9420 #if HAVE_AS_IX86_FFREEP
9421 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9422 #else
9423 {
9424 static char retval[] = ".word\t0xc_df";
9425 int regno = REGNO (operands[opno]);
9426
9427 gcc_assert (FP_REGNO_P (regno));
9428
9429 retval[9] = '0' + (regno - FIRST_STACK_REG);
9430 return retval;
9431 }
9432 #endif
9433
9434 return opno ? "fstp\t%y1" : "fstp\t%y0";
9435 }
9436
9437
9438 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9439 should be used. UNORDERED_P is true when fucom should be used. */
9440
9441 const char *
9442 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9443 {
9444 int stack_top_dies;
9445 rtx cmp_op0, cmp_op1;
9446 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9447
9448 if (eflags_p)
9449 {
9450 cmp_op0 = operands[0];
9451 cmp_op1 = operands[1];
9452 }
9453 else
9454 {
9455 cmp_op0 = operands[1];
9456 cmp_op1 = operands[2];
9457 }
9458
9459 if (is_sse)
9460 {
9461 if (GET_MODE (operands[0]) == SFmode)
9462 if (unordered_p)
9463 return "ucomiss\t{%1, %0|%0, %1}";
9464 else
9465 return "comiss\t{%1, %0|%0, %1}";
9466 else
9467 if (unordered_p)
9468 return "ucomisd\t{%1, %0|%0, %1}";
9469 else
9470 return "comisd\t{%1, %0|%0, %1}";
9471 }
9472
9473 gcc_assert (STACK_TOP_P (cmp_op0));
9474
9475 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9476
9477 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9478 {
9479 if (stack_top_dies)
9480 {
9481 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9482 return output_387_ffreep (operands, 1);
9483 }
9484 else
9485 return "ftst\n\tfnstsw\t%0";
9486 }
9487
9488 if (STACK_REG_P (cmp_op1)
9489 && stack_top_dies
9490 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9491 && REGNO (cmp_op1) != FIRST_STACK_REG)
9492 {
9493 /* If both the top of the 387 stack dies, and the other operand
9494 is also a stack register that dies, then this must be a
9495 `fcompp' float compare */
9496
9497 if (eflags_p)
9498 {
9499 /* There is no double popping fcomi variant. Fortunately,
9500 eflags is immune from the fstp's cc clobbering. */
9501 if (unordered_p)
9502 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9503 else
9504 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9505 return output_387_ffreep (operands, 0);
9506 }
9507 else
9508 {
9509 if (unordered_p)
9510 return "fucompp\n\tfnstsw\t%0";
9511 else
9512 return "fcompp\n\tfnstsw\t%0";
9513 }
9514 }
9515 else
9516 {
9517 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9518
9519 static const char * const alt[16] =
9520 {
9521 "fcom%z2\t%y2\n\tfnstsw\t%0",
9522 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9523 "fucom%z2\t%y2\n\tfnstsw\t%0",
9524 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9525
9526 "ficom%z2\t%y2\n\tfnstsw\t%0",
9527 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9528 NULL,
9529 NULL,
9530
9531 "fcomi\t{%y1, %0|%0, %y1}",
9532 "fcomip\t{%y1, %0|%0, %y1}",
9533 "fucomi\t{%y1, %0|%0, %y1}",
9534 "fucomip\t{%y1, %0|%0, %y1}",
9535
9536 NULL,
9537 NULL,
9538 NULL,
9539 NULL
9540 };
9541
9542 int mask;
9543 const char *ret;
9544
9545 mask = eflags_p << 3;
9546 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9547 mask |= unordered_p << 1;
9548 mask |= stack_top_dies;
9549
9550 gcc_assert (mask < 16);
9551 ret = alt[mask];
9552 gcc_assert (ret);
9553
9554 return ret;
9555 }
9556 }
9557
9558 void
9559 ix86_output_addr_vec_elt (FILE *file, int value)
9560 {
9561 const char *directive = ASM_LONG;
9562
9563 #ifdef ASM_QUAD
9564 if (TARGET_64BIT)
9565 directive = ASM_QUAD;
9566 #else
9567 gcc_assert (!TARGET_64BIT);
9568 #endif
9569
9570 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9571 }
9572
9573 void
9574 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9575 {
9576 const char *directive = ASM_LONG;
9577
9578 #ifdef ASM_QUAD
9579 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9580 directive = ASM_QUAD;
9581 #else
9582 gcc_assert (!TARGET_64BIT);
9583 #endif
9584 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9585 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9586 fprintf (file, "%s%s%d-%s%d\n",
9587 directive, LPREFIX, value, LPREFIX, rel);
9588 else if (HAVE_AS_GOTOFF_IN_DATA)
9589 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9590 #if TARGET_MACHO
9591 else if (TARGET_MACHO)
9592 {
9593 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9594 machopic_output_function_base_name (file);
9595 fprintf(file, "\n");
9596 }
9597 #endif
9598 else
9599 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9600 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9601 }
9602 \f
9603 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9604 for the target. */
9605
9606 void
9607 ix86_expand_clear (rtx dest)
9608 {
9609 rtx tmp;
9610
9611 /* We play register width games, which are only valid after reload. */
9612 gcc_assert (reload_completed);
9613
9614 /* Avoid HImode and its attendant prefix byte. */
9615 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9616 dest = gen_rtx_REG (SImode, REGNO (dest));
9617 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9618
9619 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9620 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9621 {
9622 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9623 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9624 }
9625
9626 emit_insn (tmp);
9627 }
9628
9629 /* X is an unchanging MEM. If it is a constant pool reference, return
9630 the constant pool rtx, else NULL. */
9631
9632 rtx
9633 maybe_get_pool_constant (rtx x)
9634 {
9635 x = ix86_delegitimize_address (XEXP (x, 0));
9636
9637 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9638 return get_pool_constant (x);
9639
9640 return NULL_RTX;
9641 }
9642
9643 void
9644 ix86_expand_move (enum machine_mode mode, rtx operands[])
9645 {
9646 int strict = (reload_in_progress || reload_completed);
9647 rtx op0, op1;
9648 enum tls_model model;
9649
9650 op0 = operands[0];
9651 op1 = operands[1];
9652
9653 if (GET_CODE (op1) == SYMBOL_REF)
9654 {
9655 model = SYMBOL_REF_TLS_MODEL (op1);
9656 if (model)
9657 {
9658 op1 = legitimize_tls_address (op1, model, true);
9659 op1 = force_operand (op1, op0);
9660 if (op1 == op0)
9661 return;
9662 }
9663 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9664 && SYMBOL_REF_DLLIMPORT_P (op1))
9665 op1 = legitimize_dllimport_symbol (op1, false);
9666 }
9667 else if (GET_CODE (op1) == CONST
9668 && GET_CODE (XEXP (op1, 0)) == PLUS
9669 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9670 {
9671 rtx addend = XEXP (XEXP (op1, 0), 1);
9672 rtx symbol = XEXP (XEXP (op1, 0), 0);
9673 rtx tmp = NULL;
9674
9675 model = SYMBOL_REF_TLS_MODEL (symbol);
9676 if (model)
9677 tmp = legitimize_tls_address (symbol, model, true);
9678 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9679 && SYMBOL_REF_DLLIMPORT_P (symbol))
9680 tmp = legitimize_dllimport_symbol (symbol, true);
9681
9682 if (tmp)
9683 {
9684 tmp = force_operand (tmp, NULL);
9685 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9686 op0, 1, OPTAB_DIRECT);
9687 if (tmp == op0)
9688 return;
9689 }
9690 }
9691
9692 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9693 {
9694 if (TARGET_MACHO && !TARGET_64BIT)
9695 {
9696 #if TARGET_MACHO
9697 if (MACHOPIC_PURE)
9698 {
9699 rtx temp = ((reload_in_progress
9700 || ((op0 && REG_P (op0))
9701 && mode == Pmode))
9702 ? op0 : gen_reg_rtx (Pmode));
9703 op1 = machopic_indirect_data_reference (op1, temp);
9704 op1 = machopic_legitimize_pic_address (op1, mode,
9705 temp == op1 ? 0 : temp);
9706 }
9707 else if (MACHOPIC_INDIRECT)
9708 op1 = machopic_indirect_data_reference (op1, 0);
9709 if (op0 == op1)
9710 return;
9711 #endif
9712 }
9713 else
9714 {
9715 if (MEM_P (op0))
9716 op1 = force_reg (Pmode, op1);
9717 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9718 {
9719 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9720 op1 = legitimize_pic_address (op1, reg);
9721 if (op0 == op1)
9722 return;
9723 }
9724 }
9725 }
9726 else
9727 {
9728 if (MEM_P (op0)
9729 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9730 || !push_operand (op0, mode))
9731 && MEM_P (op1))
9732 op1 = force_reg (mode, op1);
9733
9734 if (push_operand (op0, mode)
9735 && ! general_no_elim_operand (op1, mode))
9736 op1 = copy_to_mode_reg (mode, op1);
9737
9738 /* Force large constants in 64bit compilation into register
9739 to get them CSEed. */
9740 if (TARGET_64BIT && mode == DImode
9741 && immediate_operand (op1, mode)
9742 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9743 && !register_operand (op0, mode)
9744 && optimize && !reload_completed && !reload_in_progress)
9745 op1 = copy_to_mode_reg (mode, op1);
9746
9747 if (FLOAT_MODE_P (mode))
9748 {
9749 /* If we are loading a floating point constant to a register,
9750 force the value to memory now, since we'll get better code
9751 out the back end. */
9752
9753 if (strict)
9754 ;
9755 else if (GET_CODE (op1) == CONST_DOUBLE)
9756 {
9757 op1 = validize_mem (force_const_mem (mode, op1));
9758 if (!register_operand (op0, mode))
9759 {
9760 rtx temp = gen_reg_rtx (mode);
9761 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9762 emit_move_insn (op0, temp);
9763 return;
9764 }
9765 }
9766 }
9767 }
9768
9769 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9770 }
9771
9772 void
9773 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9774 {
9775 rtx op0 = operands[0], op1 = operands[1];
9776 unsigned int align = GET_MODE_ALIGNMENT (mode);
9777
9778 /* Force constants other than zero into memory. We do not know how
9779 the instructions used to build constants modify the upper 64 bits
9780 of the register, once we have that information we may be able
9781 to handle some of them more efficiently. */
9782 if ((reload_in_progress | reload_completed) == 0
9783 && register_operand (op0, mode)
9784 && (CONSTANT_P (op1)
9785 || (GET_CODE (op1) == SUBREG
9786 && CONSTANT_P (SUBREG_REG (op1))))
9787 && standard_sse_constant_p (op1) <= 0)
9788 op1 = validize_mem (force_const_mem (mode, op1));
9789
9790 /* TDmode values are passed as TImode on the stack. Timode values
9791 are moved via xmm registers, and moving them to stack can result in
9792 unaligned memory access. Use ix86_expand_vector_move_misalign()
9793 if memory operand is not aligned correctly. */
9794 if (!no_new_pseudos
9795 && (mode == TImode) && !TARGET_64BIT
9796 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9797 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9798 {
9799 rtx tmp[2];
9800
9801 /* ix86_expand_vector_move_misalign() does not like constants ... */
9802 if (CONSTANT_P (op1)
9803 || (GET_CODE (op1) == SUBREG
9804 && CONSTANT_P (SUBREG_REG (op1))))
9805 op1 = validize_mem (force_const_mem (mode, op1));
9806
9807 /* ... nor both arguments in memory. */
9808 if (!register_operand (op0, mode)
9809 && !register_operand (op1, mode))
9810 op1 = force_reg (mode, op1);
9811
9812 tmp[0] = op0; tmp[1] = op1;
9813 ix86_expand_vector_move_misalign (mode, tmp);
9814 return;
9815 }
9816
9817 /* Make operand1 a register if it isn't already. */
9818 if (!no_new_pseudos
9819 && !register_operand (op0, mode)
9820 && !register_operand (op1, mode))
9821 {
9822 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9823 return;
9824 }
9825
9826 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9827 }
9828
9829 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9830 straight to ix86_expand_vector_move. */
9831 /* Code generation for scalar reg-reg moves of single and double precision data:
9832 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9833 movaps reg, reg
9834 else
9835 movss reg, reg
9836 if (x86_sse_partial_reg_dependency == true)
9837 movapd reg, reg
9838 else
9839 movsd reg, reg
9840
9841 Code generation for scalar loads of double precision data:
9842 if (x86_sse_split_regs == true)
9843 movlpd mem, reg (gas syntax)
9844 else
9845 movsd mem, reg
9846
9847 Code generation for unaligned packed loads of single precision data
9848 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9849 if (x86_sse_unaligned_move_optimal)
9850 movups mem, reg
9851
9852 if (x86_sse_partial_reg_dependency == true)
9853 {
9854 xorps reg, reg
9855 movlps mem, reg
9856 movhps mem+8, reg
9857 }
9858 else
9859 {
9860 movlps mem, reg
9861 movhps mem+8, reg
9862 }
9863
9864 Code generation for unaligned packed loads of double precision data
9865 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9866 if (x86_sse_unaligned_move_optimal)
9867 movupd mem, reg
9868
9869 if (x86_sse_split_regs == true)
9870 {
9871 movlpd mem, reg
9872 movhpd mem+8, reg
9873 }
9874 else
9875 {
9876 movsd mem, reg
9877 movhpd mem+8, reg
9878 }
9879 */
9880
9881 void
9882 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9883 {
9884 rtx op0, op1, m;
9885
9886 op0 = operands[0];
9887 op1 = operands[1];
9888
9889 if (MEM_P (op1))
9890 {
9891 /* If we're optimizing for size, movups is the smallest. */
9892 if (optimize_size)
9893 {
9894 op0 = gen_lowpart (V4SFmode, op0);
9895 op1 = gen_lowpart (V4SFmode, op1);
9896 emit_insn (gen_sse_movups (op0, op1));
9897 return;
9898 }
9899
9900 /* ??? If we have typed data, then it would appear that using
9901 movdqu is the only way to get unaligned data loaded with
9902 integer type. */
9903 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9904 {
9905 op0 = gen_lowpart (V16QImode, op0);
9906 op1 = gen_lowpart (V16QImode, op1);
9907 emit_insn (gen_sse2_movdqu (op0, op1));
9908 return;
9909 }
9910
9911 if (TARGET_SSE2 && mode == V2DFmode)
9912 {
9913 rtx zero;
9914
9915 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9916 {
9917 op0 = gen_lowpart (V2DFmode, op0);
9918 op1 = gen_lowpart (V2DFmode, op1);
9919 emit_insn (gen_sse2_movupd (op0, op1));
9920 return;
9921 }
9922
9923 /* When SSE registers are split into halves, we can avoid
9924 writing to the top half twice. */
9925 if (TARGET_SSE_SPLIT_REGS)
9926 {
9927 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9928 zero = op0;
9929 }
9930 else
9931 {
9932 /* ??? Not sure about the best option for the Intel chips.
9933 The following would seem to satisfy; the register is
9934 entirely cleared, breaking the dependency chain. We
9935 then store to the upper half, with a dependency depth
9936 of one. A rumor has it that Intel recommends two movsd
9937 followed by an unpacklpd, but this is unconfirmed. And
9938 given that the dependency depth of the unpacklpd would
9939 still be one, I'm not sure why this would be better. */
9940 zero = CONST0_RTX (V2DFmode);
9941 }
9942
9943 m = adjust_address (op1, DFmode, 0);
9944 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9945 m = adjust_address (op1, DFmode, 8);
9946 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9947 }
9948 else
9949 {
9950 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9951 {
9952 op0 = gen_lowpart (V4SFmode, op0);
9953 op1 = gen_lowpart (V4SFmode, op1);
9954 emit_insn (gen_sse_movups (op0, op1));
9955 return;
9956 }
9957
9958 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9959 emit_move_insn (op0, CONST0_RTX (mode));
9960 else
9961 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9962
9963 if (mode != V4SFmode)
9964 op0 = gen_lowpart (V4SFmode, op0);
9965 m = adjust_address (op1, V2SFmode, 0);
9966 emit_insn (gen_sse_loadlps (op0, op0, m));
9967 m = adjust_address (op1, V2SFmode, 8);
9968 emit_insn (gen_sse_loadhps (op0, op0, m));
9969 }
9970 }
9971 else if (MEM_P (op0))
9972 {
9973 /* If we're optimizing for size, movups is the smallest. */
9974 if (optimize_size)
9975 {
9976 op0 = gen_lowpart (V4SFmode, op0);
9977 op1 = gen_lowpart (V4SFmode, op1);
9978 emit_insn (gen_sse_movups (op0, op1));
9979 return;
9980 }
9981
9982 /* ??? Similar to above, only less clear because of quote
9983 typeless stores unquote. */
9984 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9985 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9986 {
9987 op0 = gen_lowpart (V16QImode, op0);
9988 op1 = gen_lowpart (V16QImode, op1);
9989 emit_insn (gen_sse2_movdqu (op0, op1));
9990 return;
9991 }
9992
9993 if (TARGET_SSE2 && mode == V2DFmode)
9994 {
9995 m = adjust_address (op0, DFmode, 0);
9996 emit_insn (gen_sse2_storelpd (m, op1));
9997 m = adjust_address (op0, DFmode, 8);
9998 emit_insn (gen_sse2_storehpd (m, op1));
9999 }
10000 else
10001 {
10002 if (mode != V4SFmode)
10003 op1 = gen_lowpart (V4SFmode, op1);
10004 m = adjust_address (op0, V2SFmode, 0);
10005 emit_insn (gen_sse_storelps (m, op1));
10006 m = adjust_address (op0, V2SFmode, 8);
10007 emit_insn (gen_sse_storehps (m, op1));
10008 }
10009 }
10010 else
10011 gcc_unreachable ();
10012 }
10013
10014 /* Expand a push in MODE. This is some mode for which we do not support
10015 proper push instructions, at least from the registers that we expect
10016 the value to live in. */
10017
10018 void
10019 ix86_expand_push (enum machine_mode mode, rtx x)
10020 {
10021 rtx tmp;
10022
10023 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10024 GEN_INT (-GET_MODE_SIZE (mode)),
10025 stack_pointer_rtx, 1, OPTAB_DIRECT);
10026 if (tmp != stack_pointer_rtx)
10027 emit_move_insn (stack_pointer_rtx, tmp);
10028
10029 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10030 emit_move_insn (tmp, x);
10031 }
10032
10033 /* Helper function of ix86_fixup_binary_operands to canonicalize
10034 operand order. Returns true if the operands should be swapped. */
10035
10036 static bool
10037 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10038 rtx operands[])
10039 {
10040 rtx dst = operands[0];
10041 rtx src1 = operands[1];
10042 rtx src2 = operands[2];
10043
10044 /* If the operation is not commutative, we can't do anything. */
10045 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10046 return false;
10047
10048 /* Highest priority is that src1 should match dst. */
10049 if (rtx_equal_p (dst, src1))
10050 return false;
10051 if (rtx_equal_p (dst, src2))
10052 return true;
10053
10054 /* Next highest priority is that immediate constants come second. */
10055 if (immediate_operand (src2, mode))
10056 return false;
10057 if (immediate_operand (src1, mode))
10058 return true;
10059
10060 /* Lowest priority is that memory references should come second. */
10061 if (MEM_P (src2))
10062 return false;
10063 if (MEM_P (src1))
10064 return true;
10065
10066 return false;
10067 }
10068
10069
10070 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10071 destination to use for the operation. If different from the true
10072 destination in operands[0], a copy operation will be required. */
10073
10074 rtx
10075 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10076 rtx operands[])
10077 {
10078 rtx dst = operands[0];
10079 rtx src1 = operands[1];
10080 rtx src2 = operands[2];
10081
10082 /* Canonicalize operand order. */
10083 if (ix86_swap_binary_operands_p (code, mode, operands))
10084 {
10085 rtx temp = src1;
10086 src1 = src2;
10087 src2 = temp;
10088 }
10089
10090 /* Both source operands cannot be in memory. */
10091 if (MEM_P (src1) && MEM_P (src2))
10092 {
10093 /* Optimization: Only read from memory once. */
10094 if (rtx_equal_p (src1, src2))
10095 {
10096 src2 = force_reg (mode, src2);
10097 src1 = src2;
10098 }
10099 else
10100 src2 = force_reg (mode, src2);
10101 }
10102
10103 /* If the destination is memory, and we do not have matching source
10104 operands, do things in registers. */
10105 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10106 dst = gen_reg_rtx (mode);
10107
10108 /* Source 1 cannot be a constant. */
10109 if (CONSTANT_P (src1))
10110 src1 = force_reg (mode, src1);
10111
10112 /* Source 1 cannot be a non-matching memory. */
10113 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10114 src1 = force_reg (mode, src1);
10115
10116 operands[1] = src1;
10117 operands[2] = src2;
10118 return dst;
10119 }
10120
10121 /* Similarly, but assume that the destination has already been
10122 set up properly. */
10123
10124 void
10125 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10126 enum machine_mode mode, rtx operands[])
10127 {
10128 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10129 gcc_assert (dst == operands[0]);
10130 }
10131
10132 /* Attempt to expand a binary operator. Make the expansion closer to the
10133 actual machine, then just general_operand, which will allow 3 separate
10134 memory references (one output, two input) in a single insn. */
10135
10136 void
10137 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10138 rtx operands[])
10139 {
10140 rtx src1, src2, dst, op, clob;
10141
10142 dst = ix86_fixup_binary_operands (code, mode, operands);
10143 src1 = operands[1];
10144 src2 = operands[2];
10145
10146 /* Emit the instruction. */
10147
10148 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10149 if (reload_in_progress)
10150 {
10151 /* Reload doesn't know about the flags register, and doesn't know that
10152 it doesn't want to clobber it. We can only do this with PLUS. */
10153 gcc_assert (code == PLUS);
10154 emit_insn (op);
10155 }
10156 else
10157 {
10158 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10159 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10160 }
10161
10162 /* Fix up the destination if needed. */
10163 if (dst != operands[0])
10164 emit_move_insn (operands[0], dst);
10165 }
10166
10167 /* Return TRUE or FALSE depending on whether the binary operator meets the
10168 appropriate constraints. */
10169
10170 int
10171 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10172 rtx operands[3])
10173 {
10174 rtx dst = operands[0];
10175 rtx src1 = operands[1];
10176 rtx src2 = operands[2];
10177
10178 /* Both source operands cannot be in memory. */
10179 if (MEM_P (src1) && MEM_P (src2))
10180 return 0;
10181
10182 /* Canonicalize operand order for commutative operators. */
10183 if (ix86_swap_binary_operands_p (code, mode, operands))
10184 {
10185 rtx temp = src1;
10186 src1 = src2;
10187 src2 = temp;
10188 }
10189
10190 /* If the destination is memory, we must have a matching source operand. */
10191 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10192 return 0;
10193
10194 /* Source 1 cannot be a constant. */
10195 if (CONSTANT_P (src1))
10196 return 0;
10197
10198 /* Source 1 cannot be a non-matching memory. */
10199 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10200 return 0;
10201
10202 return 1;
10203 }
10204
10205 /* Attempt to expand a unary operator. Make the expansion closer to the
10206 actual machine, then just general_operand, which will allow 2 separate
10207 memory references (one output, one input) in a single insn. */
10208
10209 void
10210 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10211 rtx operands[])
10212 {
10213 int matching_memory;
10214 rtx src, dst, op, clob;
10215
10216 dst = operands[0];
10217 src = operands[1];
10218
10219 /* If the destination is memory, and we do not have matching source
10220 operands, do things in registers. */
10221 matching_memory = 0;
10222 if (MEM_P (dst))
10223 {
10224 if (rtx_equal_p (dst, src))
10225 matching_memory = 1;
10226 else
10227 dst = gen_reg_rtx (mode);
10228 }
10229
10230 /* When source operand is memory, destination must match. */
10231 if (MEM_P (src) && !matching_memory)
10232 src = force_reg (mode, src);
10233
10234 /* Emit the instruction. */
10235
10236 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10237 if (reload_in_progress || code == NOT)
10238 {
10239 /* Reload doesn't know about the flags register, and doesn't know that
10240 it doesn't want to clobber it. */
10241 gcc_assert (code == NOT);
10242 emit_insn (op);
10243 }
10244 else
10245 {
10246 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10247 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10248 }
10249
10250 /* Fix up the destination if needed. */
10251 if (dst != operands[0])
10252 emit_move_insn (operands[0], dst);
10253 }
10254
10255 /* Return TRUE or FALSE depending on whether the unary operator meets the
10256 appropriate constraints. */
10257
10258 int
10259 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10260 enum machine_mode mode ATTRIBUTE_UNUSED,
10261 rtx operands[2] ATTRIBUTE_UNUSED)
10262 {
10263 /* If one of operands is memory, source and destination must match. */
10264 if ((MEM_P (operands[0])
10265 || MEM_P (operands[1]))
10266 && ! rtx_equal_p (operands[0], operands[1]))
10267 return FALSE;
10268 return TRUE;
10269 }
10270
10271 /* Post-reload splitter for converting an SF or DFmode value in an
10272 SSE register into an unsigned SImode. */
10273
10274 void
10275 ix86_split_convert_uns_si_sse (rtx operands[])
10276 {
10277 enum machine_mode vecmode;
10278 rtx value, large, zero_or_two31, input, two31, x;
10279
10280 large = operands[1];
10281 zero_or_two31 = operands[2];
10282 input = operands[3];
10283 two31 = operands[4];
10284 vecmode = GET_MODE (large);
10285 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10286
10287 /* Load up the value into the low element. We must ensure that the other
10288 elements are valid floats -- zero is the easiest such value. */
10289 if (MEM_P (input))
10290 {
10291 if (vecmode == V4SFmode)
10292 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10293 else
10294 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10295 }
10296 else
10297 {
10298 input = gen_rtx_REG (vecmode, REGNO (input));
10299 emit_move_insn (value, CONST0_RTX (vecmode));
10300 if (vecmode == V4SFmode)
10301 emit_insn (gen_sse_movss (value, value, input));
10302 else
10303 emit_insn (gen_sse2_movsd (value, value, input));
10304 }
10305
10306 emit_move_insn (large, two31);
10307 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10308
10309 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10310 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10311
10312 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10313 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10314
10315 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10316 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10317
10318 large = gen_rtx_REG (V4SImode, REGNO (large));
10319 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10320
10321 x = gen_rtx_REG (V4SImode, REGNO (value));
10322 if (vecmode == V4SFmode)
10323 emit_insn (gen_sse2_cvttps2dq (x, value));
10324 else
10325 emit_insn (gen_sse2_cvttpd2dq (x, value));
10326 value = x;
10327
10328 emit_insn (gen_xorv4si3 (value, value, large));
10329 }
10330
10331 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10332 Expects the 64-bit DImode to be supplied in a pair of integral
10333 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10334 -mfpmath=sse, !optimize_size only. */
10335
10336 void
10337 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10338 {
10339 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10340 rtx int_xmm, fp_xmm;
10341 rtx biases, exponents;
10342 rtx x;
10343
10344 int_xmm = gen_reg_rtx (V4SImode);
10345 if (TARGET_INTER_UNIT_MOVES)
10346 emit_insn (gen_movdi_to_sse (int_xmm, input));
10347 else if (TARGET_SSE_SPLIT_REGS)
10348 {
10349 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10350 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10351 }
10352 else
10353 {
10354 x = gen_reg_rtx (V2DImode);
10355 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10356 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10357 }
10358
10359 x = gen_rtx_CONST_VECTOR (V4SImode,
10360 gen_rtvec (4, GEN_INT (0x43300000UL),
10361 GEN_INT (0x45300000UL),
10362 const0_rtx, const0_rtx));
10363 exponents = validize_mem (force_const_mem (V4SImode, x));
10364
10365 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10366 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10367
10368 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10369 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10370 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10371 (0x1.0p84 + double(fp_value_hi_xmm)).
10372 Note these exponents differ by 32. */
10373
10374 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10375
10376 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10377 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10378 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10379 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10380 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10381 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10382 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10383 biases = validize_mem (force_const_mem (V2DFmode, biases));
10384 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10385
10386 /* Add the upper and lower DFmode values together. */
10387 if (TARGET_SSE3)
10388 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10389 else
10390 {
10391 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10392 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10393 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10394 }
10395
10396 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10397 }
10398
10399 /* Convert an unsigned SImode value into a DFmode. Only currently used
10400 for SSE, but applicable anywhere. */
10401
10402 void
10403 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10404 {
10405 REAL_VALUE_TYPE TWO31r;
10406 rtx x, fp;
10407
10408 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10409 NULL, 1, OPTAB_DIRECT);
10410
10411 fp = gen_reg_rtx (DFmode);
10412 emit_insn (gen_floatsidf2 (fp, x));
10413
10414 real_ldexp (&TWO31r, &dconst1, 31);
10415 x = const_double_from_real_value (TWO31r, DFmode);
10416
10417 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10418 if (x != target)
10419 emit_move_insn (target, x);
10420 }
10421
10422 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10423 32-bit mode; otherwise we have a direct convert instruction. */
10424
10425 void
10426 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10427 {
10428 REAL_VALUE_TYPE TWO32r;
10429 rtx fp_lo, fp_hi, x;
10430
10431 fp_lo = gen_reg_rtx (DFmode);
10432 fp_hi = gen_reg_rtx (DFmode);
10433
10434 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10435
10436 real_ldexp (&TWO32r, &dconst1, 32);
10437 x = const_double_from_real_value (TWO32r, DFmode);
10438 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10439
10440 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10441
10442 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10443 0, OPTAB_DIRECT);
10444 if (x != target)
10445 emit_move_insn (target, x);
10446 }
10447
10448 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10449 For x86_32, -mfpmath=sse, !optimize_size only. */
10450 void
10451 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10452 {
10453 REAL_VALUE_TYPE ONE16r;
10454 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10455
10456 real_ldexp (&ONE16r, &dconst1, 16);
10457 x = const_double_from_real_value (ONE16r, SFmode);
10458 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10459 NULL, 0, OPTAB_DIRECT);
10460 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10461 NULL, 0, OPTAB_DIRECT);
10462 fp_hi = gen_reg_rtx (SFmode);
10463 fp_lo = gen_reg_rtx (SFmode);
10464 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10465 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10466 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10467 0, OPTAB_DIRECT);
10468 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10469 0, OPTAB_DIRECT);
10470 if (!rtx_equal_p (target, fp_hi))
10471 emit_move_insn (target, fp_hi);
10472 }
10473
10474 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10475 then replicate the value for all elements of the vector
10476 register. */
10477
10478 rtx
10479 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10480 {
10481 rtvec v;
10482 switch (mode)
10483 {
10484 case SFmode:
10485 if (vect)
10486 v = gen_rtvec (4, value, value, value, value);
10487 else
10488 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10489 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10490 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10491
10492 case DFmode:
10493 if (vect)
10494 v = gen_rtvec (2, value, value);
10495 else
10496 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10497 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10498
10499 default:
10500 gcc_unreachable ();
10501 }
10502 }
10503
10504 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10505 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10506 true, then replicate the mask for all elements of the vector register.
10507 If INVERT is true, then create a mask excluding the sign bit. */
10508
10509 rtx
10510 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10511 {
10512 enum machine_mode vec_mode;
10513 HOST_WIDE_INT hi, lo;
10514 int shift = 63;
10515 rtx v;
10516 rtx mask;
10517
10518 /* Find the sign bit, sign extended to 2*HWI. */
10519 if (mode == SFmode)
10520 lo = 0x80000000, hi = lo < 0;
10521 else if (HOST_BITS_PER_WIDE_INT >= 64)
10522 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10523 else
10524 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10525
10526 if (invert)
10527 lo = ~lo, hi = ~hi;
10528
10529 /* Force this value into the low part of a fp vector constant. */
10530 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10531 mask = gen_lowpart (mode, mask);
10532
10533 v = ix86_build_const_vector (mode, vect, mask);
10534 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10535 return force_reg (vec_mode, v);
10536 }
10537
10538 /* Generate code for floating point ABS or NEG. */
10539
10540 void
10541 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10542 rtx operands[])
10543 {
10544 rtx mask, set, use, clob, dst, src;
10545 bool matching_memory;
10546 bool use_sse = false;
10547 bool vector_mode = VECTOR_MODE_P (mode);
10548 enum machine_mode elt_mode = mode;
10549
10550 if (vector_mode)
10551 {
10552 elt_mode = GET_MODE_INNER (mode);
10553 use_sse = true;
10554 }
10555 else if (TARGET_SSE_MATH)
10556 use_sse = SSE_FLOAT_MODE_P (mode);
10557
10558 /* NEG and ABS performed with SSE use bitwise mask operations.
10559 Create the appropriate mask now. */
10560 if (use_sse)
10561 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10562 else
10563 mask = NULL_RTX;
10564
10565 dst = operands[0];
10566 src = operands[1];
10567
10568 /* If the destination is memory, and we don't have matching source
10569 operands or we're using the x87, do things in registers. */
10570 matching_memory = false;
10571 if (MEM_P (dst))
10572 {
10573 if (use_sse && rtx_equal_p (dst, src))
10574 matching_memory = true;
10575 else
10576 dst = gen_reg_rtx (mode);
10577 }
10578 if (MEM_P (src) && !matching_memory)
10579 src = force_reg (mode, src);
10580
10581 if (vector_mode)
10582 {
10583 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10584 set = gen_rtx_SET (VOIDmode, dst, set);
10585 emit_insn (set);
10586 }
10587 else
10588 {
10589 set = gen_rtx_fmt_e (code, mode, src);
10590 set = gen_rtx_SET (VOIDmode, dst, set);
10591 if (mask)
10592 {
10593 use = gen_rtx_USE (VOIDmode, mask);
10594 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10595 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10596 gen_rtvec (3, set, use, clob)));
10597 }
10598 else
10599 emit_insn (set);
10600 }
10601
10602 if (dst != operands[0])
10603 emit_move_insn (operands[0], dst);
10604 }
10605
10606 /* Expand a copysign operation. Special case operand 0 being a constant. */
10607
10608 void
10609 ix86_expand_copysign (rtx operands[])
10610 {
10611 enum machine_mode mode, vmode;
10612 rtx dest, op0, op1, mask, nmask;
10613
10614 dest = operands[0];
10615 op0 = operands[1];
10616 op1 = operands[2];
10617
10618 mode = GET_MODE (dest);
10619 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10620
10621 if (GET_CODE (op0) == CONST_DOUBLE)
10622 {
10623 rtvec v;
10624
10625 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10626 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10627
10628 if (op0 == CONST0_RTX (mode))
10629 op0 = CONST0_RTX (vmode);
10630 else
10631 {
10632 if (mode == SFmode)
10633 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10634 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10635 else
10636 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10637 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10638 }
10639
10640 mask = ix86_build_signbit_mask (mode, 0, 0);
10641
10642 if (mode == SFmode)
10643 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10644 else
10645 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10646 }
10647 else
10648 {
10649 nmask = ix86_build_signbit_mask (mode, 0, 1);
10650 mask = ix86_build_signbit_mask (mode, 0, 0);
10651
10652 if (mode == SFmode)
10653 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10654 else
10655 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10656 }
10657 }
10658
10659 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10660 be a constant, and so has already been expanded into a vector constant. */
10661
10662 void
10663 ix86_split_copysign_const (rtx operands[])
10664 {
10665 enum machine_mode mode, vmode;
10666 rtx dest, op0, op1, mask, x;
10667
10668 dest = operands[0];
10669 op0 = operands[1];
10670 op1 = operands[2];
10671 mask = operands[3];
10672
10673 mode = GET_MODE (dest);
10674 vmode = GET_MODE (mask);
10675
10676 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10677 x = gen_rtx_AND (vmode, dest, mask);
10678 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10679
10680 if (op0 != CONST0_RTX (vmode))
10681 {
10682 x = gen_rtx_IOR (vmode, dest, op0);
10683 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10684 }
10685 }
10686
10687 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10688 so we have to do two masks. */
10689
10690 void
10691 ix86_split_copysign_var (rtx operands[])
10692 {
10693 enum machine_mode mode, vmode;
10694 rtx dest, scratch, op0, op1, mask, nmask, x;
10695
10696 dest = operands[0];
10697 scratch = operands[1];
10698 op0 = operands[2];
10699 op1 = operands[3];
10700 nmask = operands[4];
10701 mask = operands[5];
10702
10703 mode = GET_MODE (dest);
10704 vmode = GET_MODE (mask);
10705
10706 if (rtx_equal_p (op0, op1))
10707 {
10708 /* Shouldn't happen often (it's useless, obviously), but when it does
10709 we'd generate incorrect code if we continue below. */
10710 emit_move_insn (dest, op0);
10711 return;
10712 }
10713
10714 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10715 {
10716 gcc_assert (REGNO (op1) == REGNO (scratch));
10717
10718 x = gen_rtx_AND (vmode, scratch, mask);
10719 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10720
10721 dest = mask;
10722 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10723 x = gen_rtx_NOT (vmode, dest);
10724 x = gen_rtx_AND (vmode, x, op0);
10725 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10726 }
10727 else
10728 {
10729 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10730 {
10731 x = gen_rtx_AND (vmode, scratch, mask);
10732 }
10733 else /* alternative 2,4 */
10734 {
10735 gcc_assert (REGNO (mask) == REGNO (scratch));
10736 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10737 x = gen_rtx_AND (vmode, scratch, op1);
10738 }
10739 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10740
10741 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10742 {
10743 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10744 x = gen_rtx_AND (vmode, dest, nmask);
10745 }
10746 else /* alternative 3,4 */
10747 {
10748 gcc_assert (REGNO (nmask) == REGNO (dest));
10749 dest = nmask;
10750 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10751 x = gen_rtx_AND (vmode, dest, op0);
10752 }
10753 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10754 }
10755
10756 x = gen_rtx_IOR (vmode, dest, scratch);
10757 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10758 }
10759
10760 /* Return TRUE or FALSE depending on whether the first SET in INSN
10761 has source and destination with matching CC modes, and that the
10762 CC mode is at least as constrained as REQ_MODE. */
10763
10764 int
10765 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10766 {
10767 rtx set;
10768 enum machine_mode set_mode;
10769
10770 set = PATTERN (insn);
10771 if (GET_CODE (set) == PARALLEL)
10772 set = XVECEXP (set, 0, 0);
10773 gcc_assert (GET_CODE (set) == SET);
10774 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10775
10776 set_mode = GET_MODE (SET_DEST (set));
10777 switch (set_mode)
10778 {
10779 case CCNOmode:
10780 if (req_mode != CCNOmode
10781 && (req_mode != CCmode
10782 || XEXP (SET_SRC (set), 1) != const0_rtx))
10783 return 0;
10784 break;
10785 case CCmode:
10786 if (req_mode == CCGCmode)
10787 return 0;
10788 /* FALLTHRU */
10789 case CCGCmode:
10790 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10791 return 0;
10792 /* FALLTHRU */
10793 case CCGOCmode:
10794 if (req_mode == CCZmode)
10795 return 0;
10796 /* FALLTHRU */
10797 case CCZmode:
10798 break;
10799
10800 default:
10801 gcc_unreachable ();
10802 }
10803
10804 return (GET_MODE (SET_SRC (set)) == set_mode);
10805 }
10806
10807 /* Generate insn patterns to do an integer compare of OPERANDS. */
10808
10809 static rtx
10810 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10811 {
10812 enum machine_mode cmpmode;
10813 rtx tmp, flags;
10814
10815 cmpmode = SELECT_CC_MODE (code, op0, op1);
10816 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10817
10818 /* This is very simple, but making the interface the same as in the
10819 FP case makes the rest of the code easier. */
10820 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10821 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10822
10823 /* Return the test that should be put into the flags user, i.e.
10824 the bcc, scc, or cmov instruction. */
10825 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10826 }
10827
10828 /* Figure out whether to use ordered or unordered fp comparisons.
10829 Return the appropriate mode to use. */
10830
10831 enum machine_mode
10832 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10833 {
10834 /* ??? In order to make all comparisons reversible, we do all comparisons
10835 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10836 all forms trapping and nontrapping comparisons, we can make inequality
10837 comparisons trapping again, since it results in better code when using
10838 FCOM based compares. */
10839 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10840 }
10841
10842 enum machine_mode
10843 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10844 {
10845 enum machine_mode mode = GET_MODE (op0);
10846
10847 if (SCALAR_FLOAT_MODE_P (mode))
10848 {
10849 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10850 return ix86_fp_compare_mode (code);
10851 }
10852
10853 switch (code)
10854 {
10855 /* Only zero flag is needed. */
10856 case EQ: /* ZF=0 */
10857 case NE: /* ZF!=0 */
10858 return CCZmode;
10859 /* Codes needing carry flag. */
10860 case GEU: /* CF=0 */
10861 case GTU: /* CF=0 & ZF=0 */
10862 case LTU: /* CF=1 */
10863 case LEU: /* CF=1 | ZF=1 */
10864 return CCmode;
10865 /* Codes possibly doable only with sign flag when
10866 comparing against zero. */
10867 case GE: /* SF=OF or SF=0 */
10868 case LT: /* SF<>OF or SF=1 */
10869 if (op1 == const0_rtx)
10870 return CCGOCmode;
10871 else
10872 /* For other cases Carry flag is not required. */
10873 return CCGCmode;
10874 /* Codes doable only with sign flag when comparing
10875 against zero, but we miss jump instruction for it
10876 so we need to use relational tests against overflow
10877 that thus needs to be zero. */
10878 case GT: /* ZF=0 & SF=OF */
10879 case LE: /* ZF=1 | SF<>OF */
10880 if (op1 == const0_rtx)
10881 return CCNOmode;
10882 else
10883 return CCGCmode;
10884 /* strcmp pattern do (use flags) and combine may ask us for proper
10885 mode. */
10886 case USE:
10887 return CCmode;
10888 default:
10889 gcc_unreachable ();
10890 }
10891 }
10892
10893 /* Return the fixed registers used for condition codes. */
10894
10895 static bool
10896 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10897 {
10898 *p1 = FLAGS_REG;
10899 *p2 = FPSR_REG;
10900 return true;
10901 }
10902
10903 /* If two condition code modes are compatible, return a condition code
10904 mode which is compatible with both. Otherwise, return
10905 VOIDmode. */
10906
10907 static enum machine_mode
10908 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10909 {
10910 if (m1 == m2)
10911 return m1;
10912
10913 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10914 return VOIDmode;
10915
10916 if ((m1 == CCGCmode && m2 == CCGOCmode)
10917 || (m1 == CCGOCmode && m2 == CCGCmode))
10918 return CCGCmode;
10919
10920 switch (m1)
10921 {
10922 default:
10923 gcc_unreachable ();
10924
10925 case CCmode:
10926 case CCGCmode:
10927 case CCGOCmode:
10928 case CCNOmode:
10929 case CCZmode:
10930 switch (m2)
10931 {
10932 default:
10933 return VOIDmode;
10934
10935 case CCmode:
10936 case CCGCmode:
10937 case CCGOCmode:
10938 case CCNOmode:
10939 case CCZmode:
10940 return CCmode;
10941 }
10942
10943 case CCFPmode:
10944 case CCFPUmode:
10945 /* These are only compatible with themselves, which we already
10946 checked above. */
10947 return VOIDmode;
10948 }
10949 }
10950
10951 /* Split comparison code CODE into comparisons we can do using branch
10952 instructions. BYPASS_CODE is comparison code for branch that will
10953 branch around FIRST_CODE and SECOND_CODE. If some of branches
10954 is not required, set value to UNKNOWN.
10955 We never require more than two branches. */
10956
10957 void
10958 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10959 enum rtx_code *first_code,
10960 enum rtx_code *second_code)
10961 {
10962 *first_code = code;
10963 *bypass_code = UNKNOWN;
10964 *second_code = UNKNOWN;
10965
10966 /* The fcomi comparison sets flags as follows:
10967
10968 cmp ZF PF CF
10969 > 0 0 0
10970 < 0 0 1
10971 = 1 0 0
10972 un 1 1 1 */
10973
10974 switch (code)
10975 {
10976 case GT: /* GTU - CF=0 & ZF=0 */
10977 case GE: /* GEU - CF=0 */
10978 case ORDERED: /* PF=0 */
10979 case UNORDERED: /* PF=1 */
10980 case UNEQ: /* EQ - ZF=1 */
10981 case UNLT: /* LTU - CF=1 */
10982 case UNLE: /* LEU - CF=1 | ZF=1 */
10983 case LTGT: /* EQ - ZF=0 */
10984 break;
10985 case LT: /* LTU - CF=1 - fails on unordered */
10986 *first_code = UNLT;
10987 *bypass_code = UNORDERED;
10988 break;
10989 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10990 *first_code = UNLE;
10991 *bypass_code = UNORDERED;
10992 break;
10993 case EQ: /* EQ - ZF=1 - fails on unordered */
10994 *first_code = UNEQ;
10995 *bypass_code = UNORDERED;
10996 break;
10997 case NE: /* NE - ZF=0 - fails on unordered */
10998 *first_code = LTGT;
10999 *second_code = UNORDERED;
11000 break;
11001 case UNGE: /* GEU - CF=0 - fails on unordered */
11002 *first_code = GE;
11003 *second_code = UNORDERED;
11004 break;
11005 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11006 *first_code = GT;
11007 *second_code = UNORDERED;
11008 break;
11009 default:
11010 gcc_unreachable ();
11011 }
11012 if (!TARGET_IEEE_FP)
11013 {
11014 *second_code = UNKNOWN;
11015 *bypass_code = UNKNOWN;
11016 }
11017 }
11018
11019 /* Return cost of comparison done fcom + arithmetics operations on AX.
11020 All following functions do use number of instructions as a cost metrics.
11021 In future this should be tweaked to compute bytes for optimize_size and
11022 take into account performance of various instructions on various CPUs. */
11023 static int
11024 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11025 {
11026 if (!TARGET_IEEE_FP)
11027 return 4;
11028 /* The cost of code output by ix86_expand_fp_compare. */
11029 switch (code)
11030 {
11031 case UNLE:
11032 case UNLT:
11033 case LTGT:
11034 case GT:
11035 case GE:
11036 case UNORDERED:
11037 case ORDERED:
11038 case UNEQ:
11039 return 4;
11040 break;
11041 case LT:
11042 case NE:
11043 case EQ:
11044 case UNGE:
11045 return 5;
11046 break;
11047 case LE:
11048 case UNGT:
11049 return 6;
11050 break;
11051 default:
11052 gcc_unreachable ();
11053 }
11054 }
11055
11056 /* Return cost of comparison done using fcomi operation.
11057 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11058 static int
11059 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11060 {
11061 enum rtx_code bypass_code, first_code, second_code;
11062 /* Return arbitrarily high cost when instruction is not supported - this
11063 prevents gcc from using it. */
11064 if (!TARGET_CMOVE)
11065 return 1024;
11066 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11067 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11068 }
11069
11070 /* Return cost of comparison done using sahf operation.
11071 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11072 static int
11073 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11074 {
11075 enum rtx_code bypass_code, first_code, second_code;
11076 /* Return arbitrarily high cost when instruction is not preferred - this
11077 avoids gcc from using it. */
11078 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11079 return 1024;
11080 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11081 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11082 }
11083
11084 /* Compute cost of the comparison done using any method.
11085 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11086 static int
11087 ix86_fp_comparison_cost (enum rtx_code code)
11088 {
11089 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11090 int min;
11091
11092 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11093 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11094
11095 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11096 if (min > sahf_cost)
11097 min = sahf_cost;
11098 if (min > fcomi_cost)
11099 min = fcomi_cost;
11100 return min;
11101 }
11102
11103 /* Return true if we should use an FCOMI instruction for this
11104 fp comparison. */
11105
11106 int
11107 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11108 {
11109 enum rtx_code swapped_code = swap_condition (code);
11110
11111 return ((ix86_fp_comparison_cost (code)
11112 == ix86_fp_comparison_fcomi_cost (code))
11113 || (ix86_fp_comparison_cost (swapped_code)
11114 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11115 }
11116
11117 /* Swap, force into registers, or otherwise massage the two operands
11118 to a fp comparison. The operands are updated in place; the new
11119 comparison code is returned. */
11120
11121 static enum rtx_code
11122 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11123 {
11124 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11125 rtx op0 = *pop0, op1 = *pop1;
11126 enum machine_mode op_mode = GET_MODE (op0);
11127 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11128
11129 /* All of the unordered compare instructions only work on registers.
11130 The same is true of the fcomi compare instructions. The XFmode
11131 compare instructions require registers except when comparing
11132 against zero or when converting operand 1 from fixed point to
11133 floating point. */
11134
11135 if (!is_sse
11136 && (fpcmp_mode == CCFPUmode
11137 || (op_mode == XFmode
11138 && ! (standard_80387_constant_p (op0) == 1
11139 || standard_80387_constant_p (op1) == 1)
11140 && GET_CODE (op1) != FLOAT)
11141 || ix86_use_fcomi_compare (code)))
11142 {
11143 op0 = force_reg (op_mode, op0);
11144 op1 = force_reg (op_mode, op1);
11145 }
11146 else
11147 {
11148 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11149 things around if they appear profitable, otherwise force op0
11150 into a register. */
11151
11152 if (standard_80387_constant_p (op0) == 0
11153 || (MEM_P (op0)
11154 && ! (standard_80387_constant_p (op1) == 0
11155 || MEM_P (op1))))
11156 {
11157 rtx tmp;
11158 tmp = op0, op0 = op1, op1 = tmp;
11159 code = swap_condition (code);
11160 }
11161
11162 if (!REG_P (op0))
11163 op0 = force_reg (op_mode, op0);
11164
11165 if (CONSTANT_P (op1))
11166 {
11167 int tmp = standard_80387_constant_p (op1);
11168 if (tmp == 0)
11169 op1 = validize_mem (force_const_mem (op_mode, op1));
11170 else if (tmp == 1)
11171 {
11172 if (TARGET_CMOVE)
11173 op1 = force_reg (op_mode, op1);
11174 }
11175 else
11176 op1 = force_reg (op_mode, op1);
11177 }
11178 }
11179
11180 /* Try to rearrange the comparison to make it cheaper. */
11181 if (ix86_fp_comparison_cost (code)
11182 > ix86_fp_comparison_cost (swap_condition (code))
11183 && (REG_P (op1) || !no_new_pseudos))
11184 {
11185 rtx tmp;
11186 tmp = op0, op0 = op1, op1 = tmp;
11187 code = swap_condition (code);
11188 if (!REG_P (op0))
11189 op0 = force_reg (op_mode, op0);
11190 }
11191
11192 *pop0 = op0;
11193 *pop1 = op1;
11194 return code;
11195 }
11196
11197 /* Convert comparison codes we use to represent FP comparison to integer
11198 code that will result in proper branch. Return UNKNOWN if no such code
11199 is available. */
11200
11201 enum rtx_code
11202 ix86_fp_compare_code_to_integer (enum rtx_code code)
11203 {
11204 switch (code)
11205 {
11206 case GT:
11207 return GTU;
11208 case GE:
11209 return GEU;
11210 case ORDERED:
11211 case UNORDERED:
11212 return code;
11213 break;
11214 case UNEQ:
11215 return EQ;
11216 break;
11217 case UNLT:
11218 return LTU;
11219 break;
11220 case UNLE:
11221 return LEU;
11222 break;
11223 case LTGT:
11224 return NE;
11225 break;
11226 default:
11227 return UNKNOWN;
11228 }
11229 }
11230
11231 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11232
11233 static rtx
11234 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11235 rtx *second_test, rtx *bypass_test)
11236 {
11237 enum machine_mode fpcmp_mode, intcmp_mode;
11238 rtx tmp, tmp2;
11239 int cost = ix86_fp_comparison_cost (code);
11240 enum rtx_code bypass_code, first_code, second_code;
11241
11242 fpcmp_mode = ix86_fp_compare_mode (code);
11243 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11244
11245 if (second_test)
11246 *second_test = NULL_RTX;
11247 if (bypass_test)
11248 *bypass_test = NULL_RTX;
11249
11250 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11251
11252 /* Do fcomi/sahf based test when profitable. */
11253 if ((TARGET_CMOVE || TARGET_SAHF)
11254 && (bypass_code == UNKNOWN || bypass_test)
11255 && (second_code == UNKNOWN || second_test)
11256 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11257 {
11258 if (TARGET_CMOVE)
11259 {
11260 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11261 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11262 tmp);
11263 emit_insn (tmp);
11264 }
11265 else
11266 {
11267 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11268 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11269 if (!scratch)
11270 scratch = gen_reg_rtx (HImode);
11271 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11272 emit_insn (gen_x86_sahf_1 (scratch));
11273 }
11274
11275 /* The FP codes work out to act like unsigned. */
11276 intcmp_mode = fpcmp_mode;
11277 code = first_code;
11278 if (bypass_code != UNKNOWN)
11279 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11280 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11281 const0_rtx);
11282 if (second_code != UNKNOWN)
11283 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11284 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11285 const0_rtx);
11286 }
11287 else
11288 {
11289 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11290 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11291 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11292 if (!scratch)
11293 scratch = gen_reg_rtx (HImode);
11294 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11295
11296 /* In the unordered case, we have to check C2 for NaN's, which
11297 doesn't happen to work out to anything nice combination-wise.
11298 So do some bit twiddling on the value we've got in AH to come
11299 up with an appropriate set of condition codes. */
11300
11301 intcmp_mode = CCNOmode;
11302 switch (code)
11303 {
11304 case GT:
11305 case UNGT:
11306 if (code == GT || !TARGET_IEEE_FP)
11307 {
11308 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11309 code = EQ;
11310 }
11311 else
11312 {
11313 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11314 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11315 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11316 intcmp_mode = CCmode;
11317 code = GEU;
11318 }
11319 break;
11320 case LT:
11321 case UNLT:
11322 if (code == LT && TARGET_IEEE_FP)
11323 {
11324 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11325 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11326 intcmp_mode = CCmode;
11327 code = EQ;
11328 }
11329 else
11330 {
11331 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11332 code = NE;
11333 }
11334 break;
11335 case GE:
11336 case UNGE:
11337 if (code == GE || !TARGET_IEEE_FP)
11338 {
11339 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11340 code = EQ;
11341 }
11342 else
11343 {
11344 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11345 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11346 GEN_INT (0x01)));
11347 code = NE;
11348 }
11349 break;
11350 case LE:
11351 case UNLE:
11352 if (code == LE && TARGET_IEEE_FP)
11353 {
11354 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11355 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11356 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11357 intcmp_mode = CCmode;
11358 code = LTU;
11359 }
11360 else
11361 {
11362 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11363 code = NE;
11364 }
11365 break;
11366 case EQ:
11367 case UNEQ:
11368 if (code == EQ && TARGET_IEEE_FP)
11369 {
11370 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11371 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11372 intcmp_mode = CCmode;
11373 code = EQ;
11374 }
11375 else
11376 {
11377 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11378 code = NE;
11379 break;
11380 }
11381 break;
11382 case NE:
11383 case LTGT:
11384 if (code == NE && TARGET_IEEE_FP)
11385 {
11386 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11387 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11388 GEN_INT (0x40)));
11389 code = NE;
11390 }
11391 else
11392 {
11393 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11394 code = EQ;
11395 }
11396 break;
11397
11398 case UNORDERED:
11399 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11400 code = NE;
11401 break;
11402 case ORDERED:
11403 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11404 code = EQ;
11405 break;
11406
11407 default:
11408 gcc_unreachable ();
11409 }
11410 }
11411
11412 /* Return the test that should be put into the flags user, i.e.
11413 the bcc, scc, or cmov instruction. */
11414 return gen_rtx_fmt_ee (code, VOIDmode,
11415 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11416 const0_rtx);
11417 }
11418
11419 rtx
11420 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11421 {
11422 rtx op0, op1, ret;
11423 op0 = ix86_compare_op0;
11424 op1 = ix86_compare_op1;
11425
11426 if (second_test)
11427 *second_test = NULL_RTX;
11428 if (bypass_test)
11429 *bypass_test = NULL_RTX;
11430
11431 if (ix86_compare_emitted)
11432 {
11433 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11434 ix86_compare_emitted = NULL_RTX;
11435 }
11436 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11437 {
11438 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11439 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11440 second_test, bypass_test);
11441 }
11442 else
11443 ret = ix86_expand_int_compare (code, op0, op1);
11444
11445 return ret;
11446 }
11447
11448 /* Return true if the CODE will result in nontrivial jump sequence. */
11449 bool
11450 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11451 {
11452 enum rtx_code bypass_code, first_code, second_code;
11453 if (!TARGET_CMOVE)
11454 return true;
11455 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11456 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11457 }
11458
11459 void
11460 ix86_expand_branch (enum rtx_code code, rtx label)
11461 {
11462 rtx tmp;
11463
11464 /* If we have emitted a compare insn, go straight to simple.
11465 ix86_expand_compare won't emit anything if ix86_compare_emitted
11466 is non NULL. */
11467 if (ix86_compare_emitted)
11468 goto simple;
11469
11470 switch (GET_MODE (ix86_compare_op0))
11471 {
11472 case QImode:
11473 case HImode:
11474 case SImode:
11475 simple:
11476 tmp = ix86_expand_compare (code, NULL, NULL);
11477 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11478 gen_rtx_LABEL_REF (VOIDmode, label),
11479 pc_rtx);
11480 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11481 return;
11482
11483 case SFmode:
11484 case DFmode:
11485 case XFmode:
11486 {
11487 rtvec vec;
11488 int use_fcomi;
11489 enum rtx_code bypass_code, first_code, second_code;
11490
11491 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11492 &ix86_compare_op1);
11493
11494 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11495
11496 /* Check whether we will use the natural sequence with one jump. If
11497 so, we can expand jump early. Otherwise delay expansion by
11498 creating compound insn to not confuse optimizers. */
11499 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11500 && TARGET_CMOVE)
11501 {
11502 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11503 gen_rtx_LABEL_REF (VOIDmode, label),
11504 pc_rtx, NULL_RTX, NULL_RTX);
11505 }
11506 else
11507 {
11508 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11509 ix86_compare_op0, ix86_compare_op1);
11510 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11511 gen_rtx_LABEL_REF (VOIDmode, label),
11512 pc_rtx);
11513 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11514
11515 use_fcomi = ix86_use_fcomi_compare (code);
11516 vec = rtvec_alloc (3 + !use_fcomi);
11517 RTVEC_ELT (vec, 0) = tmp;
11518 RTVEC_ELT (vec, 1)
11519 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11520 RTVEC_ELT (vec, 2)
11521 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11522 if (! use_fcomi)
11523 RTVEC_ELT (vec, 3)
11524 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11525
11526 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11527 }
11528 return;
11529 }
11530
11531 case DImode:
11532 if (TARGET_64BIT)
11533 goto simple;
11534 case TImode:
11535 /* Expand DImode branch into multiple compare+branch. */
11536 {
11537 rtx lo[2], hi[2], label2;
11538 enum rtx_code code1, code2, code3;
11539 enum machine_mode submode;
11540
11541 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11542 {
11543 tmp = ix86_compare_op0;
11544 ix86_compare_op0 = ix86_compare_op1;
11545 ix86_compare_op1 = tmp;
11546 code = swap_condition (code);
11547 }
11548 if (GET_MODE (ix86_compare_op0) == DImode)
11549 {
11550 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11551 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11552 submode = SImode;
11553 }
11554 else
11555 {
11556 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11557 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11558 submode = DImode;
11559 }
11560
11561 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11562 avoid two branches. This costs one extra insn, so disable when
11563 optimizing for size. */
11564
11565 if ((code == EQ || code == NE)
11566 && (!optimize_size
11567 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11568 {
11569 rtx xor0, xor1;
11570
11571 xor1 = hi[0];
11572 if (hi[1] != const0_rtx)
11573 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11574 NULL_RTX, 0, OPTAB_WIDEN);
11575
11576 xor0 = lo[0];
11577 if (lo[1] != const0_rtx)
11578 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11579 NULL_RTX, 0, OPTAB_WIDEN);
11580
11581 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11582 NULL_RTX, 0, OPTAB_WIDEN);
11583
11584 ix86_compare_op0 = tmp;
11585 ix86_compare_op1 = const0_rtx;
11586 ix86_expand_branch (code, label);
11587 return;
11588 }
11589
11590 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11591 op1 is a constant and the low word is zero, then we can just
11592 examine the high word. */
11593
11594 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11595 switch (code)
11596 {
11597 case LT: case LTU: case GE: case GEU:
11598 ix86_compare_op0 = hi[0];
11599 ix86_compare_op1 = hi[1];
11600 ix86_expand_branch (code, label);
11601 return;
11602 default:
11603 break;
11604 }
11605
11606 /* Otherwise, we need two or three jumps. */
11607
11608 label2 = gen_label_rtx ();
11609
11610 code1 = code;
11611 code2 = swap_condition (code);
11612 code3 = unsigned_condition (code);
11613
11614 switch (code)
11615 {
11616 case LT: case GT: case LTU: case GTU:
11617 break;
11618
11619 case LE: code1 = LT; code2 = GT; break;
11620 case GE: code1 = GT; code2 = LT; break;
11621 case LEU: code1 = LTU; code2 = GTU; break;
11622 case GEU: code1 = GTU; code2 = LTU; break;
11623
11624 case EQ: code1 = UNKNOWN; code2 = NE; break;
11625 case NE: code2 = UNKNOWN; break;
11626
11627 default:
11628 gcc_unreachable ();
11629 }
11630
11631 /*
11632 * a < b =>
11633 * if (hi(a) < hi(b)) goto true;
11634 * if (hi(a) > hi(b)) goto false;
11635 * if (lo(a) < lo(b)) goto true;
11636 * false:
11637 */
11638
11639 ix86_compare_op0 = hi[0];
11640 ix86_compare_op1 = hi[1];
11641
11642 if (code1 != UNKNOWN)
11643 ix86_expand_branch (code1, label);
11644 if (code2 != UNKNOWN)
11645 ix86_expand_branch (code2, label2);
11646
11647 ix86_compare_op0 = lo[0];
11648 ix86_compare_op1 = lo[1];
11649 ix86_expand_branch (code3, label);
11650
11651 if (code2 != UNKNOWN)
11652 emit_label (label2);
11653 return;
11654 }
11655
11656 default:
11657 gcc_unreachable ();
11658 }
11659 }
11660
11661 /* Split branch based on floating point condition. */
11662 void
11663 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11664 rtx target1, rtx target2, rtx tmp, rtx pushed)
11665 {
11666 rtx second, bypass;
11667 rtx label = NULL_RTX;
11668 rtx condition;
11669 int bypass_probability = -1, second_probability = -1, probability = -1;
11670 rtx i;
11671
11672 if (target2 != pc_rtx)
11673 {
11674 rtx tmp = target2;
11675 code = reverse_condition_maybe_unordered (code);
11676 target2 = target1;
11677 target1 = tmp;
11678 }
11679
11680 condition = ix86_expand_fp_compare (code, op1, op2,
11681 tmp, &second, &bypass);
11682
11683 /* Remove pushed operand from stack. */
11684 if (pushed)
11685 ix86_free_from_memory (GET_MODE (pushed));
11686
11687 if (split_branch_probability >= 0)
11688 {
11689 /* Distribute the probabilities across the jumps.
11690 Assume the BYPASS and SECOND to be always test
11691 for UNORDERED. */
11692 probability = split_branch_probability;
11693
11694 /* Value of 1 is low enough to make no need for probability
11695 to be updated. Later we may run some experiments and see
11696 if unordered values are more frequent in practice. */
11697 if (bypass)
11698 bypass_probability = 1;
11699 if (second)
11700 second_probability = 1;
11701 }
11702 if (bypass != NULL_RTX)
11703 {
11704 label = gen_label_rtx ();
11705 i = emit_jump_insn (gen_rtx_SET
11706 (VOIDmode, pc_rtx,
11707 gen_rtx_IF_THEN_ELSE (VOIDmode,
11708 bypass,
11709 gen_rtx_LABEL_REF (VOIDmode,
11710 label),
11711 pc_rtx)));
11712 if (bypass_probability >= 0)
11713 REG_NOTES (i)
11714 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11715 GEN_INT (bypass_probability),
11716 REG_NOTES (i));
11717 }
11718 i = emit_jump_insn (gen_rtx_SET
11719 (VOIDmode, pc_rtx,
11720 gen_rtx_IF_THEN_ELSE (VOIDmode,
11721 condition, target1, target2)));
11722 if (probability >= 0)
11723 REG_NOTES (i)
11724 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11725 GEN_INT (probability),
11726 REG_NOTES (i));
11727 if (second != NULL_RTX)
11728 {
11729 i = emit_jump_insn (gen_rtx_SET
11730 (VOIDmode, pc_rtx,
11731 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11732 target2)));
11733 if (second_probability >= 0)
11734 REG_NOTES (i)
11735 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11736 GEN_INT (second_probability),
11737 REG_NOTES (i));
11738 }
11739 if (label != NULL_RTX)
11740 emit_label (label);
11741 }
11742
11743 int
11744 ix86_expand_setcc (enum rtx_code code, rtx dest)
11745 {
11746 rtx ret, tmp, tmpreg, equiv;
11747 rtx second_test, bypass_test;
11748
11749 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11750 return 0; /* FAIL */
11751
11752 gcc_assert (GET_MODE (dest) == QImode);
11753
11754 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11755 PUT_MODE (ret, QImode);
11756
11757 tmp = dest;
11758 tmpreg = dest;
11759
11760 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11761 if (bypass_test || second_test)
11762 {
11763 rtx test = second_test;
11764 int bypass = 0;
11765 rtx tmp2 = gen_reg_rtx (QImode);
11766 if (bypass_test)
11767 {
11768 gcc_assert (!second_test);
11769 test = bypass_test;
11770 bypass = 1;
11771 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11772 }
11773 PUT_MODE (test, QImode);
11774 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11775
11776 if (bypass)
11777 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11778 else
11779 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11780 }
11781
11782 /* Attach a REG_EQUAL note describing the comparison result. */
11783 if (ix86_compare_op0 && ix86_compare_op1)
11784 {
11785 equiv = simplify_gen_relational (code, QImode,
11786 GET_MODE (ix86_compare_op0),
11787 ix86_compare_op0, ix86_compare_op1);
11788 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11789 }
11790
11791 return 1; /* DONE */
11792 }
11793
11794 /* Expand comparison setting or clearing carry flag. Return true when
11795 successful and set pop for the operation. */
11796 static bool
11797 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11798 {
11799 enum machine_mode mode =
11800 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11801
11802 /* Do not handle DImode compares that go through special path.
11803 Also we can't deal with FP compares yet. This is possible to add. */
11804 if (mode == (TARGET_64BIT ? TImode : DImode))
11805 return false;
11806
11807 if (SCALAR_FLOAT_MODE_P (mode))
11808 {
11809 rtx second_test = NULL, bypass_test = NULL;
11810 rtx compare_op, compare_seq;
11811
11812 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11813
11814 /* Shortcut: following common codes never translate
11815 into carry flag compares. */
11816 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11817 || code == ORDERED || code == UNORDERED)
11818 return false;
11819
11820 /* These comparisons require zero flag; swap operands so they won't. */
11821 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11822 && !TARGET_IEEE_FP)
11823 {
11824 rtx tmp = op0;
11825 op0 = op1;
11826 op1 = tmp;
11827 code = swap_condition (code);
11828 }
11829
11830 /* Try to expand the comparison and verify that we end up with carry flag
11831 based comparison. This is fails to be true only when we decide to expand
11832 comparison using arithmetic that is not too common scenario. */
11833 start_sequence ();
11834 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11835 &second_test, &bypass_test);
11836 compare_seq = get_insns ();
11837 end_sequence ();
11838
11839 if (second_test || bypass_test)
11840 return false;
11841 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11842 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11843 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11844 else
11845 code = GET_CODE (compare_op);
11846 if (code != LTU && code != GEU)
11847 return false;
11848 emit_insn (compare_seq);
11849 *pop = compare_op;
11850 return true;
11851 }
11852 if (!INTEGRAL_MODE_P (mode))
11853 return false;
11854 switch (code)
11855 {
11856 case LTU:
11857 case GEU:
11858 break;
11859
11860 /* Convert a==0 into (unsigned)a<1. */
11861 case EQ:
11862 case NE:
11863 if (op1 != const0_rtx)
11864 return false;
11865 op1 = const1_rtx;
11866 code = (code == EQ ? LTU : GEU);
11867 break;
11868
11869 /* Convert a>b into b<a or a>=b-1. */
11870 case GTU:
11871 case LEU:
11872 if (CONST_INT_P (op1))
11873 {
11874 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11875 /* Bail out on overflow. We still can swap operands but that
11876 would force loading of the constant into register. */
11877 if (op1 == const0_rtx
11878 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11879 return false;
11880 code = (code == GTU ? GEU : LTU);
11881 }
11882 else
11883 {
11884 rtx tmp = op1;
11885 op1 = op0;
11886 op0 = tmp;
11887 code = (code == GTU ? LTU : GEU);
11888 }
11889 break;
11890
11891 /* Convert a>=0 into (unsigned)a<0x80000000. */
11892 case LT:
11893 case GE:
11894 if (mode == DImode || op1 != const0_rtx)
11895 return false;
11896 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11897 code = (code == LT ? GEU : LTU);
11898 break;
11899 case LE:
11900 case GT:
11901 if (mode == DImode || op1 != constm1_rtx)
11902 return false;
11903 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11904 code = (code == LE ? GEU : LTU);
11905 break;
11906
11907 default:
11908 return false;
11909 }
11910 /* Swapping operands may cause constant to appear as first operand. */
11911 if (!nonimmediate_operand (op0, VOIDmode))
11912 {
11913 if (no_new_pseudos)
11914 return false;
11915 op0 = force_reg (mode, op0);
11916 }
11917 ix86_compare_op0 = op0;
11918 ix86_compare_op1 = op1;
11919 *pop = ix86_expand_compare (code, NULL, NULL);
11920 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11921 return true;
11922 }
11923
11924 int
11925 ix86_expand_int_movcc (rtx operands[])
11926 {
11927 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11928 rtx compare_seq, compare_op;
11929 rtx second_test, bypass_test;
11930 enum machine_mode mode = GET_MODE (operands[0]);
11931 bool sign_bit_compare_p = false;;
11932
11933 start_sequence ();
11934 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11935 compare_seq = get_insns ();
11936 end_sequence ();
11937
11938 compare_code = GET_CODE (compare_op);
11939
11940 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11941 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11942 sign_bit_compare_p = true;
11943
11944 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11945 HImode insns, we'd be swallowed in word prefix ops. */
11946
11947 if ((mode != HImode || TARGET_FAST_PREFIX)
11948 && (mode != (TARGET_64BIT ? TImode : DImode))
11949 && CONST_INT_P (operands[2])
11950 && CONST_INT_P (operands[3]))
11951 {
11952 rtx out = operands[0];
11953 HOST_WIDE_INT ct = INTVAL (operands[2]);
11954 HOST_WIDE_INT cf = INTVAL (operands[3]);
11955 HOST_WIDE_INT diff;
11956
11957 diff = ct - cf;
11958 /* Sign bit compares are better done using shifts than we do by using
11959 sbb. */
11960 if (sign_bit_compare_p
11961 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11962 ix86_compare_op1, &compare_op))
11963 {
11964 /* Detect overlap between destination and compare sources. */
11965 rtx tmp = out;
11966
11967 if (!sign_bit_compare_p)
11968 {
11969 bool fpcmp = false;
11970
11971 compare_code = GET_CODE (compare_op);
11972
11973 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11974 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11975 {
11976 fpcmp = true;
11977 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11978 }
11979
11980 /* To simplify rest of code, restrict to the GEU case. */
11981 if (compare_code == LTU)
11982 {
11983 HOST_WIDE_INT tmp = ct;
11984 ct = cf;
11985 cf = tmp;
11986 compare_code = reverse_condition (compare_code);
11987 code = reverse_condition (code);
11988 }
11989 else
11990 {
11991 if (fpcmp)
11992 PUT_CODE (compare_op,
11993 reverse_condition_maybe_unordered
11994 (GET_CODE (compare_op)));
11995 else
11996 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11997 }
11998 diff = ct - cf;
11999
12000 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12001 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12002 tmp = gen_reg_rtx (mode);
12003
12004 if (mode == DImode)
12005 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12006 else
12007 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12008 }
12009 else
12010 {
12011 if (code == GT || code == GE)
12012 code = reverse_condition (code);
12013 else
12014 {
12015 HOST_WIDE_INT tmp = ct;
12016 ct = cf;
12017 cf = tmp;
12018 diff = ct - cf;
12019 }
12020 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12021 ix86_compare_op1, VOIDmode, 0, -1);
12022 }
12023
12024 if (diff == 1)
12025 {
12026 /*
12027 * cmpl op0,op1
12028 * sbbl dest,dest
12029 * [addl dest, ct]
12030 *
12031 * Size 5 - 8.
12032 */
12033 if (ct)
12034 tmp = expand_simple_binop (mode, PLUS,
12035 tmp, GEN_INT (ct),
12036 copy_rtx (tmp), 1, OPTAB_DIRECT);
12037 }
12038 else if (cf == -1)
12039 {
12040 /*
12041 * cmpl op0,op1
12042 * sbbl dest,dest
12043 * orl $ct, dest
12044 *
12045 * Size 8.
12046 */
12047 tmp = expand_simple_binop (mode, IOR,
12048 tmp, GEN_INT (ct),
12049 copy_rtx (tmp), 1, OPTAB_DIRECT);
12050 }
12051 else if (diff == -1 && ct)
12052 {
12053 /*
12054 * cmpl op0,op1
12055 * sbbl dest,dest
12056 * notl dest
12057 * [addl dest, cf]
12058 *
12059 * Size 8 - 11.
12060 */
12061 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12062 if (cf)
12063 tmp = expand_simple_binop (mode, PLUS,
12064 copy_rtx (tmp), GEN_INT (cf),
12065 copy_rtx (tmp), 1, OPTAB_DIRECT);
12066 }
12067 else
12068 {
12069 /*
12070 * cmpl op0,op1
12071 * sbbl dest,dest
12072 * [notl dest]
12073 * andl cf - ct, dest
12074 * [addl dest, ct]
12075 *
12076 * Size 8 - 11.
12077 */
12078
12079 if (cf == 0)
12080 {
12081 cf = ct;
12082 ct = 0;
12083 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12084 }
12085
12086 tmp = expand_simple_binop (mode, AND,
12087 copy_rtx (tmp),
12088 gen_int_mode (cf - ct, mode),
12089 copy_rtx (tmp), 1, OPTAB_DIRECT);
12090 if (ct)
12091 tmp = expand_simple_binop (mode, PLUS,
12092 copy_rtx (tmp), GEN_INT (ct),
12093 copy_rtx (tmp), 1, OPTAB_DIRECT);
12094 }
12095
12096 if (!rtx_equal_p (tmp, out))
12097 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12098
12099 return 1; /* DONE */
12100 }
12101
12102 if (diff < 0)
12103 {
12104 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12105
12106 HOST_WIDE_INT tmp;
12107 tmp = ct, ct = cf, cf = tmp;
12108 diff = -diff;
12109
12110 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12111 {
12112 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12113
12114 /* We may be reversing unordered compare to normal compare, that
12115 is not valid in general (we may convert non-trapping condition
12116 to trapping one), however on i386 we currently emit all
12117 comparisons unordered. */
12118 compare_code = reverse_condition_maybe_unordered (compare_code);
12119 code = reverse_condition_maybe_unordered (code);
12120 }
12121 else
12122 {
12123 compare_code = reverse_condition (compare_code);
12124 code = reverse_condition (code);
12125 }
12126 }
12127
12128 compare_code = UNKNOWN;
12129 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12130 && CONST_INT_P (ix86_compare_op1))
12131 {
12132 if (ix86_compare_op1 == const0_rtx
12133 && (code == LT || code == GE))
12134 compare_code = code;
12135 else if (ix86_compare_op1 == constm1_rtx)
12136 {
12137 if (code == LE)
12138 compare_code = LT;
12139 else if (code == GT)
12140 compare_code = GE;
12141 }
12142 }
12143
12144 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12145 if (compare_code != UNKNOWN
12146 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12147 && (cf == -1 || ct == -1))
12148 {
12149 /* If lea code below could be used, only optimize
12150 if it results in a 2 insn sequence. */
12151
12152 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12153 || diff == 3 || diff == 5 || diff == 9)
12154 || (compare_code == LT && ct == -1)
12155 || (compare_code == GE && cf == -1))
12156 {
12157 /*
12158 * notl op1 (if necessary)
12159 * sarl $31, op1
12160 * orl cf, op1
12161 */
12162 if (ct != -1)
12163 {
12164 cf = ct;
12165 ct = -1;
12166 code = reverse_condition (code);
12167 }
12168
12169 out = emit_store_flag (out, code, ix86_compare_op0,
12170 ix86_compare_op1, VOIDmode, 0, -1);
12171
12172 out = expand_simple_binop (mode, IOR,
12173 out, GEN_INT (cf),
12174 out, 1, OPTAB_DIRECT);
12175 if (out != operands[0])
12176 emit_move_insn (operands[0], out);
12177
12178 return 1; /* DONE */
12179 }
12180 }
12181
12182
12183 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12184 || diff == 3 || diff == 5 || diff == 9)
12185 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12186 && (mode != DImode
12187 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12188 {
12189 /*
12190 * xorl dest,dest
12191 * cmpl op1,op2
12192 * setcc dest
12193 * lea cf(dest*(ct-cf)),dest
12194 *
12195 * Size 14.
12196 *
12197 * This also catches the degenerate setcc-only case.
12198 */
12199
12200 rtx tmp;
12201 int nops;
12202
12203 out = emit_store_flag (out, code, ix86_compare_op0,
12204 ix86_compare_op1, VOIDmode, 0, 1);
12205
12206 nops = 0;
12207 /* On x86_64 the lea instruction operates on Pmode, so we need
12208 to get arithmetics done in proper mode to match. */
12209 if (diff == 1)
12210 tmp = copy_rtx (out);
12211 else
12212 {
12213 rtx out1;
12214 out1 = copy_rtx (out);
12215 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12216 nops++;
12217 if (diff & 1)
12218 {
12219 tmp = gen_rtx_PLUS (mode, tmp, out1);
12220 nops++;
12221 }
12222 }
12223 if (cf != 0)
12224 {
12225 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12226 nops++;
12227 }
12228 if (!rtx_equal_p (tmp, out))
12229 {
12230 if (nops == 1)
12231 out = force_operand (tmp, copy_rtx (out));
12232 else
12233 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12234 }
12235 if (!rtx_equal_p (out, operands[0]))
12236 emit_move_insn (operands[0], copy_rtx (out));
12237
12238 return 1; /* DONE */
12239 }
12240
12241 /*
12242 * General case: Jumpful:
12243 * xorl dest,dest cmpl op1, op2
12244 * cmpl op1, op2 movl ct, dest
12245 * setcc dest jcc 1f
12246 * decl dest movl cf, dest
12247 * andl (cf-ct),dest 1:
12248 * addl ct,dest
12249 *
12250 * Size 20. Size 14.
12251 *
12252 * This is reasonably steep, but branch mispredict costs are
12253 * high on modern cpus, so consider failing only if optimizing
12254 * for space.
12255 */
12256
12257 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12258 && BRANCH_COST >= 2)
12259 {
12260 if (cf == 0)
12261 {
12262 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12263
12264 cf = ct;
12265 ct = 0;
12266
12267 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12268 {
12269 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12270
12271 /* We may be reversing unordered compare to normal compare,
12272 that is not valid in general (we may convert non-trapping
12273 condition to trapping one), however on i386 we currently
12274 emit all comparisons unordered. */
12275 code = reverse_condition_maybe_unordered (code);
12276 }
12277 else
12278 {
12279 code = reverse_condition (code);
12280 if (compare_code != UNKNOWN)
12281 compare_code = reverse_condition (compare_code);
12282 }
12283 }
12284
12285 if (compare_code != UNKNOWN)
12286 {
12287 /* notl op1 (if needed)
12288 sarl $31, op1
12289 andl (cf-ct), op1
12290 addl ct, op1
12291
12292 For x < 0 (resp. x <= -1) there will be no notl,
12293 so if possible swap the constants to get rid of the
12294 complement.
12295 True/false will be -1/0 while code below (store flag
12296 followed by decrement) is 0/-1, so the constants need
12297 to be exchanged once more. */
12298
12299 if (compare_code == GE || !cf)
12300 {
12301 code = reverse_condition (code);
12302 compare_code = LT;
12303 }
12304 else
12305 {
12306 HOST_WIDE_INT tmp = cf;
12307 cf = ct;
12308 ct = tmp;
12309 }
12310
12311 out = emit_store_flag (out, code, ix86_compare_op0,
12312 ix86_compare_op1, VOIDmode, 0, -1);
12313 }
12314 else
12315 {
12316 out = emit_store_flag (out, code, ix86_compare_op0,
12317 ix86_compare_op1, VOIDmode, 0, 1);
12318
12319 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12320 copy_rtx (out), 1, OPTAB_DIRECT);
12321 }
12322
12323 out = expand_simple_binop (mode, AND, copy_rtx (out),
12324 gen_int_mode (cf - ct, mode),
12325 copy_rtx (out), 1, OPTAB_DIRECT);
12326 if (ct)
12327 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12328 copy_rtx (out), 1, OPTAB_DIRECT);
12329 if (!rtx_equal_p (out, operands[0]))
12330 emit_move_insn (operands[0], copy_rtx (out));
12331
12332 return 1; /* DONE */
12333 }
12334 }
12335
12336 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12337 {
12338 /* Try a few things more with specific constants and a variable. */
12339
12340 optab op;
12341 rtx var, orig_out, out, tmp;
12342
12343 if (BRANCH_COST <= 2)
12344 return 0; /* FAIL */
12345
12346 /* If one of the two operands is an interesting constant, load a
12347 constant with the above and mask it in with a logical operation. */
12348
12349 if (CONST_INT_P (operands[2]))
12350 {
12351 var = operands[3];
12352 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12353 operands[3] = constm1_rtx, op = and_optab;
12354 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12355 operands[3] = const0_rtx, op = ior_optab;
12356 else
12357 return 0; /* FAIL */
12358 }
12359 else if (CONST_INT_P (operands[3]))
12360 {
12361 var = operands[2];
12362 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12363 operands[2] = constm1_rtx, op = and_optab;
12364 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12365 operands[2] = const0_rtx, op = ior_optab;
12366 else
12367 return 0; /* FAIL */
12368 }
12369 else
12370 return 0; /* FAIL */
12371
12372 orig_out = operands[0];
12373 tmp = gen_reg_rtx (mode);
12374 operands[0] = tmp;
12375
12376 /* Recurse to get the constant loaded. */
12377 if (ix86_expand_int_movcc (operands) == 0)
12378 return 0; /* FAIL */
12379
12380 /* Mask in the interesting variable. */
12381 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12382 OPTAB_WIDEN);
12383 if (!rtx_equal_p (out, orig_out))
12384 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12385
12386 return 1; /* DONE */
12387 }
12388
12389 /*
12390 * For comparison with above,
12391 *
12392 * movl cf,dest
12393 * movl ct,tmp
12394 * cmpl op1,op2
12395 * cmovcc tmp,dest
12396 *
12397 * Size 15.
12398 */
12399
12400 if (! nonimmediate_operand (operands[2], mode))
12401 operands[2] = force_reg (mode, operands[2]);
12402 if (! nonimmediate_operand (operands[3], mode))
12403 operands[3] = force_reg (mode, operands[3]);
12404
12405 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12406 {
12407 rtx tmp = gen_reg_rtx (mode);
12408 emit_move_insn (tmp, operands[3]);
12409 operands[3] = tmp;
12410 }
12411 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12412 {
12413 rtx tmp = gen_reg_rtx (mode);
12414 emit_move_insn (tmp, operands[2]);
12415 operands[2] = tmp;
12416 }
12417
12418 if (! register_operand (operands[2], VOIDmode)
12419 && (mode == QImode
12420 || ! register_operand (operands[3], VOIDmode)))
12421 operands[2] = force_reg (mode, operands[2]);
12422
12423 if (mode == QImode
12424 && ! register_operand (operands[3], VOIDmode))
12425 operands[3] = force_reg (mode, operands[3]);
12426
12427 emit_insn (compare_seq);
12428 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12429 gen_rtx_IF_THEN_ELSE (mode,
12430 compare_op, operands[2],
12431 operands[3])));
12432 if (bypass_test)
12433 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12434 gen_rtx_IF_THEN_ELSE (mode,
12435 bypass_test,
12436 copy_rtx (operands[3]),
12437 copy_rtx (operands[0]))));
12438 if (second_test)
12439 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12440 gen_rtx_IF_THEN_ELSE (mode,
12441 second_test,
12442 copy_rtx (operands[2]),
12443 copy_rtx (operands[0]))));
12444
12445 return 1; /* DONE */
12446 }
12447
12448 /* Swap, force into registers, or otherwise massage the two operands
12449 to an sse comparison with a mask result. Thus we differ a bit from
12450 ix86_prepare_fp_compare_args which expects to produce a flags result.
12451
12452 The DEST operand exists to help determine whether to commute commutative
12453 operators. The POP0/POP1 operands are updated in place. The new
12454 comparison code is returned, or UNKNOWN if not implementable. */
12455
12456 static enum rtx_code
12457 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12458 rtx *pop0, rtx *pop1)
12459 {
12460 rtx tmp;
12461
12462 switch (code)
12463 {
12464 case LTGT:
12465 case UNEQ:
12466 /* We have no LTGT as an operator. We could implement it with
12467 NE & ORDERED, but this requires an extra temporary. It's
12468 not clear that it's worth it. */
12469 return UNKNOWN;
12470
12471 case LT:
12472 case LE:
12473 case UNGT:
12474 case UNGE:
12475 /* These are supported directly. */
12476 break;
12477
12478 case EQ:
12479 case NE:
12480 case UNORDERED:
12481 case ORDERED:
12482 /* For commutative operators, try to canonicalize the destination
12483 operand to be first in the comparison - this helps reload to
12484 avoid extra moves. */
12485 if (!dest || !rtx_equal_p (dest, *pop1))
12486 break;
12487 /* FALLTHRU */
12488
12489 case GE:
12490 case GT:
12491 case UNLE:
12492 case UNLT:
12493 /* These are not supported directly. Swap the comparison operands
12494 to transform into something that is supported. */
12495 tmp = *pop0;
12496 *pop0 = *pop1;
12497 *pop1 = tmp;
12498 code = swap_condition (code);
12499 break;
12500
12501 default:
12502 gcc_unreachable ();
12503 }
12504
12505 return code;
12506 }
12507
12508 /* Detect conditional moves that exactly match min/max operational
12509 semantics. Note that this is IEEE safe, as long as we don't
12510 interchange the operands.
12511
12512 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12513 and TRUE if the operation is successful and instructions are emitted. */
12514
12515 static bool
12516 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12517 rtx cmp_op1, rtx if_true, rtx if_false)
12518 {
12519 enum machine_mode mode;
12520 bool is_min;
12521 rtx tmp;
12522
12523 if (code == LT)
12524 ;
12525 else if (code == UNGE)
12526 {
12527 tmp = if_true;
12528 if_true = if_false;
12529 if_false = tmp;
12530 }
12531 else
12532 return false;
12533
12534 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12535 is_min = true;
12536 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12537 is_min = false;
12538 else
12539 return false;
12540
12541 mode = GET_MODE (dest);
12542
12543 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12544 but MODE may be a vector mode and thus not appropriate. */
12545 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12546 {
12547 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12548 rtvec v;
12549
12550 if_true = force_reg (mode, if_true);
12551 v = gen_rtvec (2, if_true, if_false);
12552 tmp = gen_rtx_UNSPEC (mode, v, u);
12553 }
12554 else
12555 {
12556 code = is_min ? SMIN : SMAX;
12557 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12558 }
12559
12560 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12561 return true;
12562 }
12563
12564 /* Expand an sse vector comparison. Return the register with the result. */
12565
12566 static rtx
12567 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12568 rtx op_true, rtx op_false)
12569 {
12570 enum machine_mode mode = GET_MODE (dest);
12571 rtx x;
12572
12573 cmp_op0 = force_reg (mode, cmp_op0);
12574 if (!nonimmediate_operand (cmp_op1, mode))
12575 cmp_op1 = force_reg (mode, cmp_op1);
12576
12577 if (optimize
12578 || reg_overlap_mentioned_p (dest, op_true)
12579 || reg_overlap_mentioned_p (dest, op_false))
12580 dest = gen_reg_rtx (mode);
12581
12582 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12583 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12584
12585 return dest;
12586 }
12587
12588 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12589 operations. This is used for both scalar and vector conditional moves. */
12590
12591 static void
12592 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12593 {
12594 enum machine_mode mode = GET_MODE (dest);
12595 rtx t2, t3, x;
12596
12597 if (op_false == CONST0_RTX (mode))
12598 {
12599 op_true = force_reg (mode, op_true);
12600 x = gen_rtx_AND (mode, cmp, op_true);
12601 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12602 }
12603 else if (op_true == CONST0_RTX (mode))
12604 {
12605 op_false = force_reg (mode, op_false);
12606 x = gen_rtx_NOT (mode, cmp);
12607 x = gen_rtx_AND (mode, x, op_false);
12608 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12609 }
12610 else
12611 {
12612 op_true = force_reg (mode, op_true);
12613 op_false = force_reg (mode, op_false);
12614
12615 t2 = gen_reg_rtx (mode);
12616 if (optimize)
12617 t3 = gen_reg_rtx (mode);
12618 else
12619 t3 = dest;
12620
12621 x = gen_rtx_AND (mode, op_true, cmp);
12622 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12623
12624 x = gen_rtx_NOT (mode, cmp);
12625 x = gen_rtx_AND (mode, x, op_false);
12626 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12627
12628 x = gen_rtx_IOR (mode, t3, t2);
12629 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12630 }
12631 }
12632
12633 /* Expand a floating-point conditional move. Return true if successful. */
12634
12635 int
12636 ix86_expand_fp_movcc (rtx operands[])
12637 {
12638 enum machine_mode mode = GET_MODE (operands[0]);
12639 enum rtx_code code = GET_CODE (operands[1]);
12640 rtx tmp, compare_op, second_test, bypass_test;
12641
12642 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12643 {
12644 enum machine_mode cmode;
12645
12646 /* Since we've no cmove for sse registers, don't force bad register
12647 allocation just to gain access to it. Deny movcc when the
12648 comparison mode doesn't match the move mode. */
12649 cmode = GET_MODE (ix86_compare_op0);
12650 if (cmode == VOIDmode)
12651 cmode = GET_MODE (ix86_compare_op1);
12652 if (cmode != mode)
12653 return 0;
12654
12655 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12656 &ix86_compare_op0,
12657 &ix86_compare_op1);
12658 if (code == UNKNOWN)
12659 return 0;
12660
12661 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12662 ix86_compare_op1, operands[2],
12663 operands[3]))
12664 return 1;
12665
12666 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12667 ix86_compare_op1, operands[2], operands[3]);
12668 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12669 return 1;
12670 }
12671
12672 /* The floating point conditional move instructions don't directly
12673 support conditions resulting from a signed integer comparison. */
12674
12675 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12676
12677 /* The floating point conditional move instructions don't directly
12678 support signed integer comparisons. */
12679
12680 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12681 {
12682 gcc_assert (!second_test && !bypass_test);
12683 tmp = gen_reg_rtx (QImode);
12684 ix86_expand_setcc (code, tmp);
12685 code = NE;
12686 ix86_compare_op0 = tmp;
12687 ix86_compare_op1 = const0_rtx;
12688 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12689 }
12690 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12691 {
12692 tmp = gen_reg_rtx (mode);
12693 emit_move_insn (tmp, operands[3]);
12694 operands[3] = tmp;
12695 }
12696 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12697 {
12698 tmp = gen_reg_rtx (mode);
12699 emit_move_insn (tmp, operands[2]);
12700 operands[2] = tmp;
12701 }
12702
12703 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12704 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12705 operands[2], operands[3])));
12706 if (bypass_test)
12707 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12708 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12709 operands[3], operands[0])));
12710 if (second_test)
12711 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12712 gen_rtx_IF_THEN_ELSE (mode, second_test,
12713 operands[2], operands[0])));
12714
12715 return 1;
12716 }
12717
12718 /* Expand a floating-point vector conditional move; a vcond operation
12719 rather than a movcc operation. */
12720
12721 bool
12722 ix86_expand_fp_vcond (rtx operands[])
12723 {
12724 enum rtx_code code = GET_CODE (operands[3]);
12725 rtx cmp;
12726
12727 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12728 &operands[4], &operands[5]);
12729 if (code == UNKNOWN)
12730 return false;
12731
12732 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12733 operands[5], operands[1], operands[2]))
12734 return true;
12735
12736 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12737 operands[1], operands[2]);
12738 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12739 return true;
12740 }
12741
12742 /* Expand a signed integral vector conditional move. */
12743
12744 bool
12745 ix86_expand_int_vcond (rtx operands[])
12746 {
12747 enum machine_mode mode = GET_MODE (operands[0]);
12748 enum rtx_code code = GET_CODE (operands[3]);
12749 bool negate = false;
12750 rtx x, cop0, cop1;
12751
12752 cop0 = operands[4];
12753 cop1 = operands[5];
12754
12755 /* Canonicalize the comparison to EQ, GT, GTU. */
12756 switch (code)
12757 {
12758 case EQ:
12759 case GT:
12760 case GTU:
12761 break;
12762
12763 case NE:
12764 case LE:
12765 case LEU:
12766 code = reverse_condition (code);
12767 negate = true;
12768 break;
12769
12770 case GE:
12771 case GEU:
12772 code = reverse_condition (code);
12773 negate = true;
12774 /* FALLTHRU */
12775
12776 case LT:
12777 case LTU:
12778 code = swap_condition (code);
12779 x = cop0, cop0 = cop1, cop1 = x;
12780 break;
12781
12782 default:
12783 gcc_unreachable ();
12784 }
12785
12786 /* Unsigned parallel compare is not supported by the hardware. Play some
12787 tricks to turn this into a signed comparison against 0. */
12788 if (code == GTU)
12789 {
12790 cop0 = force_reg (mode, cop0);
12791
12792 switch (mode)
12793 {
12794 case V4SImode:
12795 {
12796 rtx t1, t2, mask;
12797
12798 /* Perform a parallel modulo subtraction. */
12799 t1 = gen_reg_rtx (mode);
12800 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12801
12802 /* Extract the original sign bit of op0. */
12803 mask = GEN_INT (-0x80000000);
12804 mask = gen_rtx_CONST_VECTOR (mode,
12805 gen_rtvec (4, mask, mask, mask, mask));
12806 mask = force_reg (mode, mask);
12807 t2 = gen_reg_rtx (mode);
12808 emit_insn (gen_andv4si3 (t2, cop0, mask));
12809
12810 /* XOR it back into the result of the subtraction. This results
12811 in the sign bit set iff we saw unsigned underflow. */
12812 x = gen_reg_rtx (mode);
12813 emit_insn (gen_xorv4si3 (x, t1, t2));
12814
12815 code = GT;
12816 }
12817 break;
12818
12819 case V16QImode:
12820 case V8HImode:
12821 /* Perform a parallel unsigned saturating subtraction. */
12822 x = gen_reg_rtx (mode);
12823 emit_insn (gen_rtx_SET (VOIDmode, x,
12824 gen_rtx_US_MINUS (mode, cop0, cop1)));
12825
12826 code = EQ;
12827 negate = !negate;
12828 break;
12829
12830 default:
12831 gcc_unreachable ();
12832 }
12833
12834 cop0 = x;
12835 cop1 = CONST0_RTX (mode);
12836 }
12837
12838 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12839 operands[1+negate], operands[2-negate]);
12840
12841 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12842 operands[2-negate]);
12843 return true;
12844 }
12845
12846 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12847 true if we should do zero extension, else sign extension. HIGH_P is
12848 true if we want the N/2 high elements, else the low elements. */
12849
12850 void
12851 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12852 {
12853 enum machine_mode imode = GET_MODE (operands[1]);
12854 rtx (*unpack)(rtx, rtx, rtx);
12855 rtx se, dest;
12856
12857 switch (imode)
12858 {
12859 case V16QImode:
12860 if (high_p)
12861 unpack = gen_vec_interleave_highv16qi;
12862 else
12863 unpack = gen_vec_interleave_lowv16qi;
12864 break;
12865 case V8HImode:
12866 if (high_p)
12867 unpack = gen_vec_interleave_highv8hi;
12868 else
12869 unpack = gen_vec_interleave_lowv8hi;
12870 break;
12871 case V4SImode:
12872 if (high_p)
12873 unpack = gen_vec_interleave_highv4si;
12874 else
12875 unpack = gen_vec_interleave_lowv4si;
12876 break;
12877 default:
12878 gcc_unreachable ();
12879 }
12880
12881 dest = gen_lowpart (imode, operands[0]);
12882
12883 if (unsigned_p)
12884 se = force_reg (imode, CONST0_RTX (imode));
12885 else
12886 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12887 operands[1], pc_rtx, pc_rtx);
12888
12889 emit_insn (unpack (dest, operands[1], se));
12890 }
12891
12892 /* This function performs the same task as ix86_expand_sse_unpack,
12893 but with SSE4.1 instructions. */
12894
12895 void
12896 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12897 {
12898 enum machine_mode imode = GET_MODE (operands[1]);
12899 rtx (*unpack)(rtx, rtx);
12900 rtx src, dest;
12901
12902 switch (imode)
12903 {
12904 case V16QImode:
12905 if (unsigned_p)
12906 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
12907 else
12908 unpack = gen_sse4_1_extendv8qiv8hi2;
12909 break;
12910 case V8HImode:
12911 if (unsigned_p)
12912 unpack = gen_sse4_1_zero_extendv4hiv4si2;
12913 else
12914 unpack = gen_sse4_1_extendv4hiv4si2;
12915 break;
12916 case V4SImode:
12917 if (unsigned_p)
12918 unpack = gen_sse4_1_zero_extendv2siv2di2;
12919 else
12920 unpack = gen_sse4_1_extendv2siv2di2;
12921 break;
12922 default:
12923 gcc_unreachable ();
12924 }
12925
12926 dest = operands[0];
12927 if (high_p)
12928 {
12929 /* Shift higher 8 bytes to lower 8 bytes. */
12930 src = gen_reg_rtx (imode);
12931 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
12932 gen_lowpart (TImode, operands[1]),
12933 GEN_INT (64)));
12934 }
12935 else
12936 src = operands[1];
12937
12938 emit_insn (unpack (dest, src));
12939 }
12940
12941 /* Expand conditional increment or decrement using adb/sbb instructions.
12942 The default case using setcc followed by the conditional move can be
12943 done by generic code. */
12944 int
12945 ix86_expand_int_addcc (rtx operands[])
12946 {
12947 enum rtx_code code = GET_CODE (operands[1]);
12948 rtx compare_op;
12949 rtx val = const0_rtx;
12950 bool fpcmp = false;
12951 enum machine_mode mode = GET_MODE (operands[0]);
12952
12953 if (operands[3] != const1_rtx
12954 && operands[3] != constm1_rtx)
12955 return 0;
12956 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12957 ix86_compare_op1, &compare_op))
12958 return 0;
12959 code = GET_CODE (compare_op);
12960
12961 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12962 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12963 {
12964 fpcmp = true;
12965 code = ix86_fp_compare_code_to_integer (code);
12966 }
12967
12968 if (code != LTU)
12969 {
12970 val = constm1_rtx;
12971 if (fpcmp)
12972 PUT_CODE (compare_op,
12973 reverse_condition_maybe_unordered
12974 (GET_CODE (compare_op)));
12975 else
12976 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12977 }
12978 PUT_MODE (compare_op, mode);
12979
12980 /* Construct either adc or sbb insn. */
12981 if ((code == LTU) == (operands[3] == constm1_rtx))
12982 {
12983 switch (GET_MODE (operands[0]))
12984 {
12985 case QImode:
12986 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12987 break;
12988 case HImode:
12989 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12990 break;
12991 case SImode:
12992 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12993 break;
12994 case DImode:
12995 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12996 break;
12997 default:
12998 gcc_unreachable ();
12999 }
13000 }
13001 else
13002 {
13003 switch (GET_MODE (operands[0]))
13004 {
13005 case QImode:
13006 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13007 break;
13008 case HImode:
13009 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13010 break;
13011 case SImode:
13012 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13013 break;
13014 case DImode:
13015 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13016 break;
13017 default:
13018 gcc_unreachable ();
13019 }
13020 }
13021 return 1; /* DONE */
13022 }
13023
13024
13025 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13026 works for floating pointer parameters and nonoffsetable memories.
13027 For pushes, it returns just stack offsets; the values will be saved
13028 in the right order. Maximally three parts are generated. */
13029
13030 static int
13031 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13032 {
13033 int size;
13034
13035 if (!TARGET_64BIT)
13036 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13037 else
13038 size = (GET_MODE_SIZE (mode) + 4) / 8;
13039
13040 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13041 gcc_assert (size >= 2 && size <= 3);
13042
13043 /* Optimize constant pool reference to immediates. This is used by fp
13044 moves, that force all constants to memory to allow combining. */
13045 if (MEM_P (operand) && MEM_READONLY_P (operand))
13046 {
13047 rtx tmp = maybe_get_pool_constant (operand);
13048 if (tmp)
13049 operand = tmp;
13050 }
13051
13052 if (MEM_P (operand) && !offsettable_memref_p (operand))
13053 {
13054 /* The only non-offsetable memories we handle are pushes. */
13055 int ok = push_operand (operand, VOIDmode);
13056
13057 gcc_assert (ok);
13058
13059 operand = copy_rtx (operand);
13060 PUT_MODE (operand, Pmode);
13061 parts[0] = parts[1] = parts[2] = operand;
13062 return size;
13063 }
13064
13065 if (GET_CODE (operand) == CONST_VECTOR)
13066 {
13067 enum machine_mode imode = int_mode_for_mode (mode);
13068 /* Caution: if we looked through a constant pool memory above,
13069 the operand may actually have a different mode now. That's
13070 ok, since we want to pun this all the way back to an integer. */
13071 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13072 gcc_assert (operand != NULL);
13073 mode = imode;
13074 }
13075
13076 if (!TARGET_64BIT)
13077 {
13078 if (mode == DImode)
13079 split_di (&operand, 1, &parts[0], &parts[1]);
13080 else
13081 {
13082 if (REG_P (operand))
13083 {
13084 gcc_assert (reload_completed);
13085 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13086 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13087 if (size == 3)
13088 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13089 }
13090 else if (offsettable_memref_p (operand))
13091 {
13092 operand = adjust_address (operand, SImode, 0);
13093 parts[0] = operand;
13094 parts[1] = adjust_address (operand, SImode, 4);
13095 if (size == 3)
13096 parts[2] = adjust_address (operand, SImode, 8);
13097 }
13098 else if (GET_CODE (operand) == CONST_DOUBLE)
13099 {
13100 REAL_VALUE_TYPE r;
13101 long l[4];
13102
13103 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13104 switch (mode)
13105 {
13106 case XFmode:
13107 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13108 parts[2] = gen_int_mode (l[2], SImode);
13109 break;
13110 case DFmode:
13111 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13112 break;
13113 default:
13114 gcc_unreachable ();
13115 }
13116 parts[1] = gen_int_mode (l[1], SImode);
13117 parts[0] = gen_int_mode (l[0], SImode);
13118 }
13119 else
13120 gcc_unreachable ();
13121 }
13122 }
13123 else
13124 {
13125 if (mode == TImode)
13126 split_ti (&operand, 1, &parts[0], &parts[1]);
13127 if (mode == XFmode || mode == TFmode)
13128 {
13129 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13130 if (REG_P (operand))
13131 {
13132 gcc_assert (reload_completed);
13133 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13134 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13135 }
13136 else if (offsettable_memref_p (operand))
13137 {
13138 operand = adjust_address (operand, DImode, 0);
13139 parts[0] = operand;
13140 parts[1] = adjust_address (operand, upper_mode, 8);
13141 }
13142 else if (GET_CODE (operand) == CONST_DOUBLE)
13143 {
13144 REAL_VALUE_TYPE r;
13145 long l[4];
13146
13147 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13148 real_to_target (l, &r, mode);
13149
13150 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13151 if (HOST_BITS_PER_WIDE_INT >= 64)
13152 parts[0]
13153 = gen_int_mode
13154 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13155 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13156 DImode);
13157 else
13158 parts[0] = immed_double_const (l[0], l[1], DImode);
13159
13160 if (upper_mode == SImode)
13161 parts[1] = gen_int_mode (l[2], SImode);
13162 else if (HOST_BITS_PER_WIDE_INT >= 64)
13163 parts[1]
13164 = gen_int_mode
13165 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13166 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13167 DImode);
13168 else
13169 parts[1] = immed_double_const (l[2], l[3], DImode);
13170 }
13171 else
13172 gcc_unreachable ();
13173 }
13174 }
13175
13176 return size;
13177 }
13178
13179 /* Emit insns to perform a move or push of DI, DF, and XF values.
13180 Return false when normal moves are needed; true when all required
13181 insns have been emitted. Operands 2-4 contain the input values
13182 int the correct order; operands 5-7 contain the output values. */
13183
13184 void
13185 ix86_split_long_move (rtx operands[])
13186 {
13187 rtx part[2][3];
13188 int nparts;
13189 int push = 0;
13190 int collisions = 0;
13191 enum machine_mode mode = GET_MODE (operands[0]);
13192
13193 /* The DFmode expanders may ask us to move double.
13194 For 64bit target this is single move. By hiding the fact
13195 here we simplify i386.md splitters. */
13196 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13197 {
13198 /* Optimize constant pool reference to immediates. This is used by
13199 fp moves, that force all constants to memory to allow combining. */
13200
13201 if (MEM_P (operands[1])
13202 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13203 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13204 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13205 if (push_operand (operands[0], VOIDmode))
13206 {
13207 operands[0] = copy_rtx (operands[0]);
13208 PUT_MODE (operands[0], Pmode);
13209 }
13210 else
13211 operands[0] = gen_lowpart (DImode, operands[0]);
13212 operands[1] = gen_lowpart (DImode, operands[1]);
13213 emit_move_insn (operands[0], operands[1]);
13214 return;
13215 }
13216
13217 /* The only non-offsettable memory we handle is push. */
13218 if (push_operand (operands[0], VOIDmode))
13219 push = 1;
13220 else
13221 gcc_assert (!MEM_P (operands[0])
13222 || offsettable_memref_p (operands[0]));
13223
13224 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13225 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13226
13227 /* When emitting push, take care for source operands on the stack. */
13228 if (push && MEM_P (operands[1])
13229 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13230 {
13231 if (nparts == 3)
13232 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13233 XEXP (part[1][2], 0));
13234 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13235 XEXP (part[1][1], 0));
13236 }
13237
13238 /* We need to do copy in the right order in case an address register
13239 of the source overlaps the destination. */
13240 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13241 {
13242 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13243 collisions++;
13244 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13245 collisions++;
13246 if (nparts == 3
13247 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13248 collisions++;
13249
13250 /* Collision in the middle part can be handled by reordering. */
13251 if (collisions == 1 && nparts == 3
13252 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13253 {
13254 rtx tmp;
13255 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13256 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13257 }
13258
13259 /* If there are more collisions, we can't handle it by reordering.
13260 Do an lea to the last part and use only one colliding move. */
13261 else if (collisions > 1)
13262 {
13263 rtx base;
13264
13265 collisions = 1;
13266
13267 base = part[0][nparts - 1];
13268
13269 /* Handle the case when the last part isn't valid for lea.
13270 Happens in 64-bit mode storing the 12-byte XFmode. */
13271 if (GET_MODE (base) != Pmode)
13272 base = gen_rtx_REG (Pmode, REGNO (base));
13273
13274 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13275 part[1][0] = replace_equiv_address (part[1][0], base);
13276 part[1][1] = replace_equiv_address (part[1][1],
13277 plus_constant (base, UNITS_PER_WORD));
13278 if (nparts == 3)
13279 part[1][2] = replace_equiv_address (part[1][2],
13280 plus_constant (base, 8));
13281 }
13282 }
13283
13284 if (push)
13285 {
13286 if (!TARGET_64BIT)
13287 {
13288 if (nparts == 3)
13289 {
13290 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13291 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13292 emit_move_insn (part[0][2], part[1][2]);
13293 }
13294 }
13295 else
13296 {
13297 /* In 64bit mode we don't have 32bit push available. In case this is
13298 register, it is OK - we will just use larger counterpart. We also
13299 retype memory - these comes from attempt to avoid REX prefix on
13300 moving of second half of TFmode value. */
13301 if (GET_MODE (part[1][1]) == SImode)
13302 {
13303 switch (GET_CODE (part[1][1]))
13304 {
13305 case MEM:
13306 part[1][1] = adjust_address (part[1][1], DImode, 0);
13307 break;
13308
13309 case REG:
13310 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13311 break;
13312
13313 default:
13314 gcc_unreachable ();
13315 }
13316
13317 if (GET_MODE (part[1][0]) == SImode)
13318 part[1][0] = part[1][1];
13319 }
13320 }
13321 emit_move_insn (part[0][1], part[1][1]);
13322 emit_move_insn (part[0][0], part[1][0]);
13323 return;
13324 }
13325
13326 /* Choose correct order to not overwrite the source before it is copied. */
13327 if ((REG_P (part[0][0])
13328 && REG_P (part[1][1])
13329 && (REGNO (part[0][0]) == REGNO (part[1][1])
13330 || (nparts == 3
13331 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13332 || (collisions > 0
13333 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13334 {
13335 if (nparts == 3)
13336 {
13337 operands[2] = part[0][2];
13338 operands[3] = part[0][1];
13339 operands[4] = part[0][0];
13340 operands[5] = part[1][2];
13341 operands[6] = part[1][1];
13342 operands[7] = part[1][0];
13343 }
13344 else
13345 {
13346 operands[2] = part[0][1];
13347 operands[3] = part[0][0];
13348 operands[5] = part[1][1];
13349 operands[6] = part[1][0];
13350 }
13351 }
13352 else
13353 {
13354 if (nparts == 3)
13355 {
13356 operands[2] = part[0][0];
13357 operands[3] = part[0][1];
13358 operands[4] = part[0][2];
13359 operands[5] = part[1][0];
13360 operands[6] = part[1][1];
13361 operands[7] = part[1][2];
13362 }
13363 else
13364 {
13365 operands[2] = part[0][0];
13366 operands[3] = part[0][1];
13367 operands[5] = part[1][0];
13368 operands[6] = part[1][1];
13369 }
13370 }
13371
13372 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13373 if (optimize_size)
13374 {
13375 if (CONST_INT_P (operands[5])
13376 && operands[5] != const0_rtx
13377 && REG_P (operands[2]))
13378 {
13379 if (CONST_INT_P (operands[6])
13380 && INTVAL (operands[6]) == INTVAL (operands[5]))
13381 operands[6] = operands[2];
13382
13383 if (nparts == 3
13384 && CONST_INT_P (operands[7])
13385 && INTVAL (operands[7]) == INTVAL (operands[5]))
13386 operands[7] = operands[2];
13387 }
13388
13389 if (nparts == 3
13390 && CONST_INT_P (operands[6])
13391 && operands[6] != const0_rtx
13392 && REG_P (operands[3])
13393 && CONST_INT_P (operands[7])
13394 && INTVAL (operands[7]) == INTVAL (operands[6]))
13395 operands[7] = operands[3];
13396 }
13397
13398 emit_move_insn (operands[2], operands[5]);
13399 emit_move_insn (operands[3], operands[6]);
13400 if (nparts == 3)
13401 emit_move_insn (operands[4], operands[7]);
13402
13403 return;
13404 }
13405
13406 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13407 left shift by a constant, either using a single shift or
13408 a sequence of add instructions. */
13409
13410 static void
13411 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13412 {
13413 if (count == 1)
13414 {
13415 emit_insn ((mode == DImode
13416 ? gen_addsi3
13417 : gen_adddi3) (operand, operand, operand));
13418 }
13419 else if (!optimize_size
13420 && count * ix86_cost->add <= ix86_cost->shift_const)
13421 {
13422 int i;
13423 for (i=0; i<count; i++)
13424 {
13425 emit_insn ((mode == DImode
13426 ? gen_addsi3
13427 : gen_adddi3) (operand, operand, operand));
13428 }
13429 }
13430 else
13431 emit_insn ((mode == DImode
13432 ? gen_ashlsi3
13433 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13434 }
13435
13436 void
13437 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13438 {
13439 rtx low[2], high[2];
13440 int count;
13441 const int single_width = mode == DImode ? 32 : 64;
13442
13443 if (CONST_INT_P (operands[2]))
13444 {
13445 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13446 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13447
13448 if (count >= single_width)
13449 {
13450 emit_move_insn (high[0], low[1]);
13451 emit_move_insn (low[0], const0_rtx);
13452
13453 if (count > single_width)
13454 ix86_expand_ashl_const (high[0], count - single_width, mode);
13455 }
13456 else
13457 {
13458 if (!rtx_equal_p (operands[0], operands[1]))
13459 emit_move_insn (operands[0], operands[1]);
13460 emit_insn ((mode == DImode
13461 ? gen_x86_shld_1
13462 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13463 ix86_expand_ashl_const (low[0], count, mode);
13464 }
13465 return;
13466 }
13467
13468 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13469
13470 if (operands[1] == const1_rtx)
13471 {
13472 /* Assuming we've chosen a QImode capable registers, then 1 << N
13473 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13474 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13475 {
13476 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13477
13478 ix86_expand_clear (low[0]);
13479 ix86_expand_clear (high[0]);
13480 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13481
13482 d = gen_lowpart (QImode, low[0]);
13483 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13484 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13485 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13486
13487 d = gen_lowpart (QImode, high[0]);
13488 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13489 s = gen_rtx_NE (QImode, flags, const0_rtx);
13490 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13491 }
13492
13493 /* Otherwise, we can get the same results by manually performing
13494 a bit extract operation on bit 5/6, and then performing the two
13495 shifts. The two methods of getting 0/1 into low/high are exactly
13496 the same size. Avoiding the shift in the bit extract case helps
13497 pentium4 a bit; no one else seems to care much either way. */
13498 else
13499 {
13500 rtx x;
13501
13502 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13503 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13504 else
13505 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13506 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13507
13508 emit_insn ((mode == DImode
13509 ? gen_lshrsi3
13510 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13511 emit_insn ((mode == DImode
13512 ? gen_andsi3
13513 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13514 emit_move_insn (low[0], high[0]);
13515 emit_insn ((mode == DImode
13516 ? gen_xorsi3
13517 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13518 }
13519
13520 emit_insn ((mode == DImode
13521 ? gen_ashlsi3
13522 : gen_ashldi3) (low[0], low[0], operands[2]));
13523 emit_insn ((mode == DImode
13524 ? gen_ashlsi3
13525 : gen_ashldi3) (high[0], high[0], operands[2]));
13526 return;
13527 }
13528
13529 if (operands[1] == constm1_rtx)
13530 {
13531 /* For -1 << N, we can avoid the shld instruction, because we
13532 know that we're shifting 0...31/63 ones into a -1. */
13533 emit_move_insn (low[0], constm1_rtx);
13534 if (optimize_size)
13535 emit_move_insn (high[0], low[0]);
13536 else
13537 emit_move_insn (high[0], constm1_rtx);
13538 }
13539 else
13540 {
13541 if (!rtx_equal_p (operands[0], operands[1]))
13542 emit_move_insn (operands[0], operands[1]);
13543
13544 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13545 emit_insn ((mode == DImode
13546 ? gen_x86_shld_1
13547 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13548 }
13549
13550 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13551
13552 if (TARGET_CMOVE && scratch)
13553 {
13554 ix86_expand_clear (scratch);
13555 emit_insn ((mode == DImode
13556 ? gen_x86_shift_adj_1
13557 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13558 }
13559 else
13560 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13561 }
13562
13563 void
13564 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13565 {
13566 rtx low[2], high[2];
13567 int count;
13568 const int single_width = mode == DImode ? 32 : 64;
13569
13570 if (CONST_INT_P (operands[2]))
13571 {
13572 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13573 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13574
13575 if (count == single_width * 2 - 1)
13576 {
13577 emit_move_insn (high[0], high[1]);
13578 emit_insn ((mode == DImode
13579 ? gen_ashrsi3
13580 : gen_ashrdi3) (high[0], high[0],
13581 GEN_INT (single_width - 1)));
13582 emit_move_insn (low[0], high[0]);
13583
13584 }
13585 else if (count >= single_width)
13586 {
13587 emit_move_insn (low[0], high[1]);
13588 emit_move_insn (high[0], low[0]);
13589 emit_insn ((mode == DImode
13590 ? gen_ashrsi3
13591 : gen_ashrdi3) (high[0], high[0],
13592 GEN_INT (single_width - 1)));
13593 if (count > single_width)
13594 emit_insn ((mode == DImode
13595 ? gen_ashrsi3
13596 : gen_ashrdi3) (low[0], low[0],
13597 GEN_INT (count - single_width)));
13598 }
13599 else
13600 {
13601 if (!rtx_equal_p (operands[0], operands[1]))
13602 emit_move_insn (operands[0], operands[1]);
13603 emit_insn ((mode == DImode
13604 ? gen_x86_shrd_1
13605 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13606 emit_insn ((mode == DImode
13607 ? gen_ashrsi3
13608 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13609 }
13610 }
13611 else
13612 {
13613 if (!rtx_equal_p (operands[0], operands[1]))
13614 emit_move_insn (operands[0], operands[1]);
13615
13616 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13617
13618 emit_insn ((mode == DImode
13619 ? gen_x86_shrd_1
13620 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13621 emit_insn ((mode == DImode
13622 ? gen_ashrsi3
13623 : gen_ashrdi3) (high[0], high[0], operands[2]));
13624
13625 if (TARGET_CMOVE && scratch)
13626 {
13627 emit_move_insn (scratch, high[0]);
13628 emit_insn ((mode == DImode
13629 ? gen_ashrsi3
13630 : gen_ashrdi3) (scratch, scratch,
13631 GEN_INT (single_width - 1)));
13632 emit_insn ((mode == DImode
13633 ? gen_x86_shift_adj_1
13634 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13635 scratch));
13636 }
13637 else
13638 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13639 }
13640 }
13641
13642 void
13643 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13644 {
13645 rtx low[2], high[2];
13646 int count;
13647 const int single_width = mode == DImode ? 32 : 64;
13648
13649 if (CONST_INT_P (operands[2]))
13650 {
13651 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13652 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13653
13654 if (count >= single_width)
13655 {
13656 emit_move_insn (low[0], high[1]);
13657 ix86_expand_clear (high[0]);
13658
13659 if (count > single_width)
13660 emit_insn ((mode == DImode
13661 ? gen_lshrsi3
13662 : gen_lshrdi3) (low[0], low[0],
13663 GEN_INT (count - single_width)));
13664 }
13665 else
13666 {
13667 if (!rtx_equal_p (operands[0], operands[1]))
13668 emit_move_insn (operands[0], operands[1]);
13669 emit_insn ((mode == DImode
13670 ? gen_x86_shrd_1
13671 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13672 emit_insn ((mode == DImode
13673 ? gen_lshrsi3
13674 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13675 }
13676 }
13677 else
13678 {
13679 if (!rtx_equal_p (operands[0], operands[1]))
13680 emit_move_insn (operands[0], operands[1]);
13681
13682 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13683
13684 emit_insn ((mode == DImode
13685 ? gen_x86_shrd_1
13686 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13687 emit_insn ((mode == DImode
13688 ? gen_lshrsi3
13689 : gen_lshrdi3) (high[0], high[0], operands[2]));
13690
13691 /* Heh. By reversing the arguments, we can reuse this pattern. */
13692 if (TARGET_CMOVE && scratch)
13693 {
13694 ix86_expand_clear (scratch);
13695 emit_insn ((mode == DImode
13696 ? gen_x86_shift_adj_1
13697 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13698 scratch));
13699 }
13700 else
13701 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13702 }
13703 }
13704
13705 /* Predict just emitted jump instruction to be taken with probability PROB. */
13706 static void
13707 predict_jump (int prob)
13708 {
13709 rtx insn = get_last_insn ();
13710 gcc_assert (JUMP_P (insn));
13711 REG_NOTES (insn)
13712 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13713 GEN_INT (prob),
13714 REG_NOTES (insn));
13715 }
13716
13717 /* Helper function for the string operations below. Dest VARIABLE whether
13718 it is aligned to VALUE bytes. If true, jump to the label. */
13719 static rtx
13720 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13721 {
13722 rtx label = gen_label_rtx ();
13723 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13724 if (GET_MODE (variable) == DImode)
13725 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13726 else
13727 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13728 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13729 1, label);
13730 if (epilogue)
13731 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13732 else
13733 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13734 return label;
13735 }
13736
13737 /* Adjust COUNTER by the VALUE. */
13738 static void
13739 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13740 {
13741 if (GET_MODE (countreg) == DImode)
13742 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13743 else
13744 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13745 }
13746
13747 /* Zero extend possibly SImode EXP to Pmode register. */
13748 rtx
13749 ix86_zero_extend_to_Pmode (rtx exp)
13750 {
13751 rtx r;
13752 if (GET_MODE (exp) == VOIDmode)
13753 return force_reg (Pmode, exp);
13754 if (GET_MODE (exp) == Pmode)
13755 return copy_to_mode_reg (Pmode, exp);
13756 r = gen_reg_rtx (Pmode);
13757 emit_insn (gen_zero_extendsidi2 (r, exp));
13758 return r;
13759 }
13760
13761 /* Divide COUNTREG by SCALE. */
13762 static rtx
13763 scale_counter (rtx countreg, int scale)
13764 {
13765 rtx sc;
13766 rtx piece_size_mask;
13767
13768 if (scale == 1)
13769 return countreg;
13770 if (CONST_INT_P (countreg))
13771 return GEN_INT (INTVAL (countreg) / scale);
13772 gcc_assert (REG_P (countreg));
13773
13774 piece_size_mask = GEN_INT (scale - 1);
13775 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13776 GEN_INT (exact_log2 (scale)),
13777 NULL, 1, OPTAB_DIRECT);
13778 return sc;
13779 }
13780
13781 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13782 DImode for constant loop counts. */
13783
13784 static enum machine_mode
13785 counter_mode (rtx count_exp)
13786 {
13787 if (GET_MODE (count_exp) != VOIDmode)
13788 return GET_MODE (count_exp);
13789 if (GET_CODE (count_exp) != CONST_INT)
13790 return Pmode;
13791 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13792 return DImode;
13793 return SImode;
13794 }
13795
13796 /* When SRCPTR is non-NULL, output simple loop to move memory
13797 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13798 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13799 equivalent loop to set memory by VALUE (supposed to be in MODE).
13800
13801 The size is rounded down to whole number of chunk size moved at once.
13802 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13803
13804
13805 static void
13806 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13807 rtx destptr, rtx srcptr, rtx value,
13808 rtx count, enum machine_mode mode, int unroll,
13809 int expected_size)
13810 {
13811 rtx out_label, top_label, iter, tmp;
13812 enum machine_mode iter_mode = counter_mode (count);
13813 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13814 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13815 rtx size;
13816 rtx x_addr;
13817 rtx y_addr;
13818 int i;
13819
13820 top_label = gen_label_rtx ();
13821 out_label = gen_label_rtx ();
13822 iter = gen_reg_rtx (iter_mode);
13823
13824 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13825 NULL, 1, OPTAB_DIRECT);
13826 /* Those two should combine. */
13827 if (piece_size == const1_rtx)
13828 {
13829 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13830 true, out_label);
13831 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13832 }
13833 emit_move_insn (iter, const0_rtx);
13834
13835 emit_label (top_label);
13836
13837 tmp = convert_modes (Pmode, iter_mode, iter, true);
13838 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13839 destmem = change_address (destmem, mode, x_addr);
13840
13841 if (srcmem)
13842 {
13843 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13844 srcmem = change_address (srcmem, mode, y_addr);
13845
13846 /* When unrolling for chips that reorder memory reads and writes,
13847 we can save registers by using single temporary.
13848 Also using 4 temporaries is overkill in 32bit mode. */
13849 if (!TARGET_64BIT && 0)
13850 {
13851 for (i = 0; i < unroll; i++)
13852 {
13853 if (i)
13854 {
13855 destmem =
13856 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13857 srcmem =
13858 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13859 }
13860 emit_move_insn (destmem, srcmem);
13861 }
13862 }
13863 else
13864 {
13865 rtx tmpreg[4];
13866 gcc_assert (unroll <= 4);
13867 for (i = 0; i < unroll; i++)
13868 {
13869 tmpreg[i] = gen_reg_rtx (mode);
13870 if (i)
13871 {
13872 srcmem =
13873 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13874 }
13875 emit_move_insn (tmpreg[i], srcmem);
13876 }
13877 for (i = 0; i < unroll; i++)
13878 {
13879 if (i)
13880 {
13881 destmem =
13882 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13883 }
13884 emit_move_insn (destmem, tmpreg[i]);
13885 }
13886 }
13887 }
13888 else
13889 for (i = 0; i < unroll; i++)
13890 {
13891 if (i)
13892 destmem =
13893 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13894 emit_move_insn (destmem, value);
13895 }
13896
13897 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13898 true, OPTAB_LIB_WIDEN);
13899 if (tmp != iter)
13900 emit_move_insn (iter, tmp);
13901
13902 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13903 true, top_label);
13904 if (expected_size != -1)
13905 {
13906 expected_size /= GET_MODE_SIZE (mode) * unroll;
13907 if (expected_size == 0)
13908 predict_jump (0);
13909 else if (expected_size > REG_BR_PROB_BASE)
13910 predict_jump (REG_BR_PROB_BASE - 1);
13911 else
13912 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13913 }
13914 else
13915 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13916 iter = ix86_zero_extend_to_Pmode (iter);
13917 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13918 true, OPTAB_LIB_WIDEN);
13919 if (tmp != destptr)
13920 emit_move_insn (destptr, tmp);
13921 if (srcptr)
13922 {
13923 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13924 true, OPTAB_LIB_WIDEN);
13925 if (tmp != srcptr)
13926 emit_move_insn (srcptr, tmp);
13927 }
13928 emit_label (out_label);
13929 }
13930
13931 /* Output "rep; mov" instruction.
13932 Arguments have same meaning as for previous function */
13933 static void
13934 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13935 rtx destptr, rtx srcptr,
13936 rtx count,
13937 enum machine_mode mode)
13938 {
13939 rtx destexp;
13940 rtx srcexp;
13941 rtx countreg;
13942
13943 /* If the size is known, it is shorter to use rep movs. */
13944 if (mode == QImode && CONST_INT_P (count)
13945 && !(INTVAL (count) & 3))
13946 mode = SImode;
13947
13948 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13949 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13950 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13951 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13952 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13953 if (mode != QImode)
13954 {
13955 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13956 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13957 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13958 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13959 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13960 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13961 }
13962 else
13963 {
13964 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13965 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13966 }
13967 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13968 destexp, srcexp));
13969 }
13970
13971 /* Output "rep; stos" instruction.
13972 Arguments have same meaning as for previous function */
13973 static void
13974 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13975 rtx count,
13976 enum machine_mode mode)
13977 {
13978 rtx destexp;
13979 rtx countreg;
13980
13981 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13982 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13983 value = force_reg (mode, gen_lowpart (mode, value));
13984 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13985 if (mode != QImode)
13986 {
13987 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13988 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13989 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13990 }
13991 else
13992 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13993 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13994 }
13995
13996 static void
13997 emit_strmov (rtx destmem, rtx srcmem,
13998 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13999 {
14000 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14001 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14002 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14003 }
14004
14005 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14006 static void
14007 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14008 rtx destptr, rtx srcptr, rtx count, int max_size)
14009 {
14010 rtx src, dest;
14011 if (CONST_INT_P (count))
14012 {
14013 HOST_WIDE_INT countval = INTVAL (count);
14014 int offset = 0;
14015
14016 if ((countval & 0x10) && max_size > 16)
14017 {
14018 if (TARGET_64BIT)
14019 {
14020 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14021 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14022 }
14023 else
14024 gcc_unreachable ();
14025 offset += 16;
14026 }
14027 if ((countval & 0x08) && max_size > 8)
14028 {
14029 if (TARGET_64BIT)
14030 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14031 else
14032 {
14033 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14034 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14035 }
14036 offset += 8;
14037 }
14038 if ((countval & 0x04) && max_size > 4)
14039 {
14040 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14041 offset += 4;
14042 }
14043 if ((countval & 0x02) && max_size > 2)
14044 {
14045 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14046 offset += 2;
14047 }
14048 if ((countval & 0x01) && max_size > 1)
14049 {
14050 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14051 offset += 1;
14052 }
14053 return;
14054 }
14055 if (max_size > 8)
14056 {
14057 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14058 count, 1, OPTAB_DIRECT);
14059 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14060 count, QImode, 1, 4);
14061 return;
14062 }
14063
14064 /* When there are stringops, we can cheaply increase dest and src pointers.
14065 Otherwise we save code size by maintaining offset (zero is readily
14066 available from preceding rep operation) and using x86 addressing modes.
14067 */
14068 if (TARGET_SINGLE_STRINGOP)
14069 {
14070 if (max_size > 4)
14071 {
14072 rtx label = ix86_expand_aligntest (count, 4, true);
14073 src = change_address (srcmem, SImode, srcptr);
14074 dest = change_address (destmem, SImode, destptr);
14075 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14076 emit_label (label);
14077 LABEL_NUSES (label) = 1;
14078 }
14079 if (max_size > 2)
14080 {
14081 rtx label = ix86_expand_aligntest (count, 2, true);
14082 src = change_address (srcmem, HImode, srcptr);
14083 dest = change_address (destmem, HImode, destptr);
14084 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14085 emit_label (label);
14086 LABEL_NUSES (label) = 1;
14087 }
14088 if (max_size > 1)
14089 {
14090 rtx label = ix86_expand_aligntest (count, 1, true);
14091 src = change_address (srcmem, QImode, srcptr);
14092 dest = change_address (destmem, QImode, destptr);
14093 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14094 emit_label (label);
14095 LABEL_NUSES (label) = 1;
14096 }
14097 }
14098 else
14099 {
14100 rtx offset = force_reg (Pmode, const0_rtx);
14101 rtx tmp;
14102
14103 if (max_size > 4)
14104 {
14105 rtx label = ix86_expand_aligntest (count, 4, true);
14106 src = change_address (srcmem, SImode, srcptr);
14107 dest = change_address (destmem, SImode, destptr);
14108 emit_move_insn (dest, src);
14109 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14110 true, OPTAB_LIB_WIDEN);
14111 if (tmp != offset)
14112 emit_move_insn (offset, tmp);
14113 emit_label (label);
14114 LABEL_NUSES (label) = 1;
14115 }
14116 if (max_size > 2)
14117 {
14118 rtx label = ix86_expand_aligntest (count, 2, true);
14119 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14120 src = change_address (srcmem, HImode, tmp);
14121 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14122 dest = change_address (destmem, HImode, tmp);
14123 emit_move_insn (dest, src);
14124 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14125 true, OPTAB_LIB_WIDEN);
14126 if (tmp != offset)
14127 emit_move_insn (offset, tmp);
14128 emit_label (label);
14129 LABEL_NUSES (label) = 1;
14130 }
14131 if (max_size > 1)
14132 {
14133 rtx label = ix86_expand_aligntest (count, 1, true);
14134 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14135 src = change_address (srcmem, QImode, tmp);
14136 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14137 dest = change_address (destmem, QImode, tmp);
14138 emit_move_insn (dest, src);
14139 emit_label (label);
14140 LABEL_NUSES (label) = 1;
14141 }
14142 }
14143 }
14144
14145 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14146 static void
14147 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14148 rtx count, int max_size)
14149 {
14150 count =
14151 expand_simple_binop (counter_mode (count), AND, count,
14152 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14153 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14154 gen_lowpart (QImode, value), count, QImode,
14155 1, max_size / 2);
14156 }
14157
14158 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14159 static void
14160 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14161 {
14162 rtx dest;
14163
14164 if (CONST_INT_P (count))
14165 {
14166 HOST_WIDE_INT countval = INTVAL (count);
14167 int offset = 0;
14168
14169 if ((countval & 0x10) && max_size > 16)
14170 {
14171 if (TARGET_64BIT)
14172 {
14173 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14174 emit_insn (gen_strset (destptr, dest, value));
14175 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14176 emit_insn (gen_strset (destptr, dest, value));
14177 }
14178 else
14179 gcc_unreachable ();
14180 offset += 16;
14181 }
14182 if ((countval & 0x08) && max_size > 8)
14183 {
14184 if (TARGET_64BIT)
14185 {
14186 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14187 emit_insn (gen_strset (destptr, dest, value));
14188 }
14189 else
14190 {
14191 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14192 emit_insn (gen_strset (destptr, dest, value));
14193 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14194 emit_insn (gen_strset (destptr, dest, value));
14195 }
14196 offset += 8;
14197 }
14198 if ((countval & 0x04) && max_size > 4)
14199 {
14200 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14201 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14202 offset += 4;
14203 }
14204 if ((countval & 0x02) && max_size > 2)
14205 {
14206 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14207 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14208 offset += 2;
14209 }
14210 if ((countval & 0x01) && max_size > 1)
14211 {
14212 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14213 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14214 offset += 1;
14215 }
14216 return;
14217 }
14218 if (max_size > 32)
14219 {
14220 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14221 return;
14222 }
14223 if (max_size > 16)
14224 {
14225 rtx label = ix86_expand_aligntest (count, 16, true);
14226 if (TARGET_64BIT)
14227 {
14228 dest = change_address (destmem, DImode, destptr);
14229 emit_insn (gen_strset (destptr, dest, value));
14230 emit_insn (gen_strset (destptr, dest, value));
14231 }
14232 else
14233 {
14234 dest = change_address (destmem, SImode, destptr);
14235 emit_insn (gen_strset (destptr, dest, value));
14236 emit_insn (gen_strset (destptr, dest, value));
14237 emit_insn (gen_strset (destptr, dest, value));
14238 emit_insn (gen_strset (destptr, dest, value));
14239 }
14240 emit_label (label);
14241 LABEL_NUSES (label) = 1;
14242 }
14243 if (max_size > 8)
14244 {
14245 rtx label = ix86_expand_aligntest (count, 8, true);
14246 if (TARGET_64BIT)
14247 {
14248 dest = change_address (destmem, DImode, destptr);
14249 emit_insn (gen_strset (destptr, dest, value));
14250 }
14251 else
14252 {
14253 dest = change_address (destmem, SImode, destptr);
14254 emit_insn (gen_strset (destptr, dest, value));
14255 emit_insn (gen_strset (destptr, dest, value));
14256 }
14257 emit_label (label);
14258 LABEL_NUSES (label) = 1;
14259 }
14260 if (max_size > 4)
14261 {
14262 rtx label = ix86_expand_aligntest (count, 4, true);
14263 dest = change_address (destmem, SImode, destptr);
14264 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14265 emit_label (label);
14266 LABEL_NUSES (label) = 1;
14267 }
14268 if (max_size > 2)
14269 {
14270 rtx label = ix86_expand_aligntest (count, 2, true);
14271 dest = change_address (destmem, HImode, destptr);
14272 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14273 emit_label (label);
14274 LABEL_NUSES (label) = 1;
14275 }
14276 if (max_size > 1)
14277 {
14278 rtx label = ix86_expand_aligntest (count, 1, true);
14279 dest = change_address (destmem, QImode, destptr);
14280 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14281 emit_label (label);
14282 LABEL_NUSES (label) = 1;
14283 }
14284 }
14285
14286 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14287 DESIRED_ALIGNMENT. */
14288 static void
14289 expand_movmem_prologue (rtx destmem, rtx srcmem,
14290 rtx destptr, rtx srcptr, rtx count,
14291 int align, int desired_alignment)
14292 {
14293 if (align <= 1 && desired_alignment > 1)
14294 {
14295 rtx label = ix86_expand_aligntest (destptr, 1, false);
14296 srcmem = change_address (srcmem, QImode, srcptr);
14297 destmem = change_address (destmem, QImode, destptr);
14298 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14299 ix86_adjust_counter (count, 1);
14300 emit_label (label);
14301 LABEL_NUSES (label) = 1;
14302 }
14303 if (align <= 2 && desired_alignment > 2)
14304 {
14305 rtx label = ix86_expand_aligntest (destptr, 2, false);
14306 srcmem = change_address (srcmem, HImode, srcptr);
14307 destmem = change_address (destmem, HImode, destptr);
14308 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14309 ix86_adjust_counter (count, 2);
14310 emit_label (label);
14311 LABEL_NUSES (label) = 1;
14312 }
14313 if (align <= 4 && desired_alignment > 4)
14314 {
14315 rtx label = ix86_expand_aligntest (destptr, 4, false);
14316 srcmem = change_address (srcmem, SImode, srcptr);
14317 destmem = change_address (destmem, SImode, destptr);
14318 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14319 ix86_adjust_counter (count, 4);
14320 emit_label (label);
14321 LABEL_NUSES (label) = 1;
14322 }
14323 gcc_assert (desired_alignment <= 8);
14324 }
14325
14326 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14327 DESIRED_ALIGNMENT. */
14328 static void
14329 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14330 int align, int desired_alignment)
14331 {
14332 if (align <= 1 && desired_alignment > 1)
14333 {
14334 rtx label = ix86_expand_aligntest (destptr, 1, false);
14335 destmem = change_address (destmem, QImode, destptr);
14336 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14337 ix86_adjust_counter (count, 1);
14338 emit_label (label);
14339 LABEL_NUSES (label) = 1;
14340 }
14341 if (align <= 2 && desired_alignment > 2)
14342 {
14343 rtx label = ix86_expand_aligntest (destptr, 2, false);
14344 destmem = change_address (destmem, HImode, destptr);
14345 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14346 ix86_adjust_counter (count, 2);
14347 emit_label (label);
14348 LABEL_NUSES (label) = 1;
14349 }
14350 if (align <= 4 && desired_alignment > 4)
14351 {
14352 rtx label = ix86_expand_aligntest (destptr, 4, false);
14353 destmem = change_address (destmem, SImode, destptr);
14354 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14355 ix86_adjust_counter (count, 4);
14356 emit_label (label);
14357 LABEL_NUSES (label) = 1;
14358 }
14359 gcc_assert (desired_alignment <= 8);
14360 }
14361
14362 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14363 static enum stringop_alg
14364 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14365 int *dynamic_check)
14366 {
14367 const struct stringop_algs * algs;
14368
14369 *dynamic_check = -1;
14370 if (memset)
14371 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14372 else
14373 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14374 if (stringop_alg != no_stringop)
14375 return stringop_alg;
14376 /* rep; movq or rep; movl is the smallest variant. */
14377 else if (optimize_size)
14378 {
14379 if (!count || (count & 3))
14380 return rep_prefix_1_byte;
14381 else
14382 return rep_prefix_4_byte;
14383 }
14384 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14385 */
14386 else if (expected_size != -1 && expected_size < 4)
14387 return loop_1_byte;
14388 else if (expected_size != -1)
14389 {
14390 unsigned int i;
14391 enum stringop_alg alg = libcall;
14392 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14393 {
14394 gcc_assert (algs->size[i].max);
14395 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14396 {
14397 if (algs->size[i].alg != libcall)
14398 alg = algs->size[i].alg;
14399 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14400 last non-libcall inline algorithm. */
14401 if (TARGET_INLINE_ALL_STRINGOPS)
14402 {
14403 /* When the current size is best to be copied by a libcall,
14404 but we are still forced to inline, run the heuristic bellow
14405 that will pick code for medium sized blocks. */
14406 if (alg != libcall)
14407 return alg;
14408 break;
14409 }
14410 else
14411 return algs->size[i].alg;
14412 }
14413 }
14414 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14415 }
14416 /* When asked to inline the call anyway, try to pick meaningful choice.
14417 We look for maximal size of block that is faster to copy by hand and
14418 take blocks of at most of that size guessing that average size will
14419 be roughly half of the block.
14420
14421 If this turns out to be bad, we might simply specify the preferred
14422 choice in ix86_costs. */
14423 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14424 && algs->unknown_size == libcall)
14425 {
14426 int max = -1;
14427 enum stringop_alg alg;
14428 int i;
14429
14430 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14431 if (algs->size[i].alg != libcall && algs->size[i].alg)
14432 max = algs->size[i].max;
14433 if (max == -1)
14434 max = 4096;
14435 alg = decide_alg (count, max / 2, memset, dynamic_check);
14436 gcc_assert (*dynamic_check == -1);
14437 gcc_assert (alg != libcall);
14438 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14439 *dynamic_check = max;
14440 return alg;
14441 }
14442 return algs->unknown_size;
14443 }
14444
14445 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14446 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14447 static int
14448 decide_alignment (int align,
14449 enum stringop_alg alg,
14450 int expected_size)
14451 {
14452 int desired_align = 0;
14453 switch (alg)
14454 {
14455 case no_stringop:
14456 gcc_unreachable ();
14457 case loop:
14458 case unrolled_loop:
14459 desired_align = GET_MODE_SIZE (Pmode);
14460 break;
14461 case rep_prefix_8_byte:
14462 desired_align = 8;
14463 break;
14464 case rep_prefix_4_byte:
14465 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14466 copying whole cacheline at once. */
14467 if (TARGET_PENTIUMPRO)
14468 desired_align = 8;
14469 else
14470 desired_align = 4;
14471 break;
14472 case rep_prefix_1_byte:
14473 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14474 copying whole cacheline at once. */
14475 if (TARGET_PENTIUMPRO)
14476 desired_align = 8;
14477 else
14478 desired_align = 1;
14479 break;
14480 case loop_1_byte:
14481 desired_align = 1;
14482 break;
14483 case libcall:
14484 return 0;
14485 }
14486
14487 if (optimize_size)
14488 desired_align = 1;
14489 if (desired_align < align)
14490 desired_align = align;
14491 if (expected_size != -1 && expected_size < 4)
14492 desired_align = align;
14493 return desired_align;
14494 }
14495
14496 /* Return the smallest power of 2 greater than VAL. */
14497 static int
14498 smallest_pow2_greater_than (int val)
14499 {
14500 int ret = 1;
14501 while (ret <= val)
14502 ret <<= 1;
14503 return ret;
14504 }
14505
14506 /* Expand string move (memcpy) operation. Use i386 string operations when
14507 profitable. expand_clrmem contains similar code. The code depends upon
14508 architecture, block size and alignment, but always has the same
14509 overall structure:
14510
14511 1) Prologue guard: Conditional that jumps up to epilogues for small
14512 blocks that can be handled by epilogue alone. This is faster but
14513 also needed for correctness, since prologue assume the block is larger
14514 than the desired alignment.
14515
14516 Optional dynamic check for size and libcall for large
14517 blocks is emitted here too, with -minline-stringops-dynamically.
14518
14519 2) Prologue: copy first few bytes in order to get destination aligned
14520 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14521 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14522 We emit either a jump tree on power of two sized blocks, or a byte loop.
14523
14524 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14525 with specified algorithm.
14526
14527 4) Epilogue: code copying tail of the block that is too small to be
14528 handled by main body (or up to size guarded by prologue guard). */
14529
14530 int
14531 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14532 rtx expected_align_exp, rtx expected_size_exp)
14533 {
14534 rtx destreg;
14535 rtx srcreg;
14536 rtx label = NULL;
14537 rtx tmp;
14538 rtx jump_around_label = NULL;
14539 HOST_WIDE_INT align = 1;
14540 unsigned HOST_WIDE_INT count = 0;
14541 HOST_WIDE_INT expected_size = -1;
14542 int size_needed = 0, epilogue_size_needed;
14543 int desired_align = 0;
14544 enum stringop_alg alg;
14545 int dynamic_check;
14546
14547 if (CONST_INT_P (align_exp))
14548 align = INTVAL (align_exp);
14549 /* i386 can do misaligned access on reasonably increased cost. */
14550 if (CONST_INT_P (expected_align_exp)
14551 && INTVAL (expected_align_exp) > align)
14552 align = INTVAL (expected_align_exp);
14553 if (CONST_INT_P (count_exp))
14554 count = expected_size = INTVAL (count_exp);
14555 if (CONST_INT_P (expected_size_exp) && count == 0)
14556 expected_size = INTVAL (expected_size_exp);
14557
14558 /* Step 0: Decide on preferred algorithm, desired alignment and
14559 size of chunks to be copied by main loop. */
14560
14561 alg = decide_alg (count, expected_size, false, &dynamic_check);
14562 desired_align = decide_alignment (align, alg, expected_size);
14563
14564 if (!TARGET_ALIGN_STRINGOPS)
14565 align = desired_align;
14566
14567 if (alg == libcall)
14568 return 0;
14569 gcc_assert (alg != no_stringop);
14570 if (!count)
14571 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14572 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14573 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14574 switch (alg)
14575 {
14576 case libcall:
14577 case no_stringop:
14578 gcc_unreachable ();
14579 case loop:
14580 size_needed = GET_MODE_SIZE (Pmode);
14581 break;
14582 case unrolled_loop:
14583 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14584 break;
14585 case rep_prefix_8_byte:
14586 size_needed = 8;
14587 break;
14588 case rep_prefix_4_byte:
14589 size_needed = 4;
14590 break;
14591 case rep_prefix_1_byte:
14592 case loop_1_byte:
14593 size_needed = 1;
14594 break;
14595 }
14596
14597 epilogue_size_needed = size_needed;
14598
14599 /* Step 1: Prologue guard. */
14600
14601 /* Alignment code needs count to be in register. */
14602 if (CONST_INT_P (count_exp) && desired_align > align)
14603 {
14604 enum machine_mode mode = SImode;
14605 if (TARGET_64BIT && (count & ~0xffffffff))
14606 mode = DImode;
14607 count_exp = force_reg (mode, count_exp);
14608 }
14609 gcc_assert (desired_align >= 1 && align >= 1);
14610
14611 /* Ensure that alignment prologue won't copy past end of block. */
14612 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14613 {
14614 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14615 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14616 Make sure it is power of 2. */
14617 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14618
14619 label = gen_label_rtx ();
14620 emit_cmp_and_jump_insns (count_exp,
14621 GEN_INT (epilogue_size_needed),
14622 LTU, 0, counter_mode (count_exp), 1, label);
14623 if (GET_CODE (count_exp) == CONST_INT)
14624 ;
14625 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14626 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14627 else
14628 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14629 }
14630 /* Emit code to decide on runtime whether library call or inline should be
14631 used. */
14632 if (dynamic_check != -1)
14633 {
14634 rtx hot_label = gen_label_rtx ();
14635 jump_around_label = gen_label_rtx ();
14636 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14637 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14638 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14639 emit_block_move_via_libcall (dst, src, count_exp, false);
14640 emit_jump (jump_around_label);
14641 emit_label (hot_label);
14642 }
14643
14644 /* Step 2: Alignment prologue. */
14645
14646 if (desired_align > align)
14647 {
14648 /* Except for the first move in epilogue, we no longer know
14649 constant offset in aliasing info. It don't seems to worth
14650 the pain to maintain it for the first move, so throw away
14651 the info early. */
14652 src = change_address (src, BLKmode, srcreg);
14653 dst = change_address (dst, BLKmode, destreg);
14654 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14655 desired_align);
14656 }
14657 if (label && size_needed == 1)
14658 {
14659 emit_label (label);
14660 LABEL_NUSES (label) = 1;
14661 label = NULL;
14662 }
14663
14664 /* Step 3: Main loop. */
14665
14666 switch (alg)
14667 {
14668 case libcall:
14669 case no_stringop:
14670 gcc_unreachable ();
14671 case loop_1_byte:
14672 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14673 count_exp, QImode, 1, expected_size);
14674 break;
14675 case loop:
14676 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14677 count_exp, Pmode, 1, expected_size);
14678 break;
14679 case unrolled_loop:
14680 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14681 registers for 4 temporaries anyway. */
14682 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14683 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14684 expected_size);
14685 break;
14686 case rep_prefix_8_byte:
14687 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14688 DImode);
14689 break;
14690 case rep_prefix_4_byte:
14691 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14692 SImode);
14693 break;
14694 case rep_prefix_1_byte:
14695 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14696 QImode);
14697 break;
14698 }
14699 /* Adjust properly the offset of src and dest memory for aliasing. */
14700 if (CONST_INT_P (count_exp))
14701 {
14702 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14703 (count / size_needed) * size_needed);
14704 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14705 (count / size_needed) * size_needed);
14706 }
14707 else
14708 {
14709 src = change_address (src, BLKmode, srcreg);
14710 dst = change_address (dst, BLKmode, destreg);
14711 }
14712
14713 /* Step 4: Epilogue to copy the remaining bytes. */
14714
14715 if (label)
14716 {
14717 /* When the main loop is done, COUNT_EXP might hold original count,
14718 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14719 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14720 bytes. Compensate if needed. */
14721
14722 if (size_needed < epilogue_size_needed)
14723 {
14724 tmp =
14725 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14726 GEN_INT (size_needed - 1), count_exp, 1,
14727 OPTAB_DIRECT);
14728 if (tmp != count_exp)
14729 emit_move_insn (count_exp, tmp);
14730 }
14731 emit_label (label);
14732 LABEL_NUSES (label) = 1;
14733 }
14734
14735 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14736 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14737 epilogue_size_needed);
14738 if (jump_around_label)
14739 emit_label (jump_around_label);
14740 return 1;
14741 }
14742
14743 /* Helper function for memcpy. For QImode value 0xXY produce
14744 0xXYXYXYXY of wide specified by MODE. This is essentially
14745 a * 0x10101010, but we can do slightly better than
14746 synth_mult by unwinding the sequence by hand on CPUs with
14747 slow multiply. */
14748 static rtx
14749 promote_duplicated_reg (enum machine_mode mode, rtx val)
14750 {
14751 enum machine_mode valmode = GET_MODE (val);
14752 rtx tmp;
14753 int nops = mode == DImode ? 3 : 2;
14754
14755 gcc_assert (mode == SImode || mode == DImode);
14756 if (val == const0_rtx)
14757 return copy_to_mode_reg (mode, const0_rtx);
14758 if (CONST_INT_P (val))
14759 {
14760 HOST_WIDE_INT v = INTVAL (val) & 255;
14761
14762 v |= v << 8;
14763 v |= v << 16;
14764 if (mode == DImode)
14765 v |= (v << 16) << 16;
14766 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14767 }
14768
14769 if (valmode == VOIDmode)
14770 valmode = QImode;
14771 if (valmode != QImode)
14772 val = gen_lowpart (QImode, val);
14773 if (mode == QImode)
14774 return val;
14775 if (!TARGET_PARTIAL_REG_STALL)
14776 nops--;
14777 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14778 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14779 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14780 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14781 {
14782 rtx reg = convert_modes (mode, QImode, val, true);
14783 tmp = promote_duplicated_reg (mode, const1_rtx);
14784 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14785 OPTAB_DIRECT);
14786 }
14787 else
14788 {
14789 rtx reg = convert_modes (mode, QImode, val, true);
14790
14791 if (!TARGET_PARTIAL_REG_STALL)
14792 if (mode == SImode)
14793 emit_insn (gen_movsi_insv_1 (reg, reg));
14794 else
14795 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14796 else
14797 {
14798 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14799 NULL, 1, OPTAB_DIRECT);
14800 reg =
14801 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14802 }
14803 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14804 NULL, 1, OPTAB_DIRECT);
14805 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14806 if (mode == SImode)
14807 return reg;
14808 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14809 NULL, 1, OPTAB_DIRECT);
14810 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14811 return reg;
14812 }
14813 }
14814
14815 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14816 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14817 alignment from ALIGN to DESIRED_ALIGN. */
14818 static rtx
14819 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14820 {
14821 rtx promoted_val;
14822
14823 if (TARGET_64BIT
14824 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14825 promoted_val = promote_duplicated_reg (DImode, val);
14826 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14827 promoted_val = promote_duplicated_reg (SImode, val);
14828 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14829 promoted_val = promote_duplicated_reg (HImode, val);
14830 else
14831 promoted_val = val;
14832
14833 return promoted_val;
14834 }
14835
14836 /* Expand string clear operation (bzero). Use i386 string operations when
14837 profitable. See expand_movmem comment for explanation of individual
14838 steps performed. */
14839 int
14840 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14841 rtx expected_align_exp, rtx expected_size_exp)
14842 {
14843 rtx destreg;
14844 rtx label = NULL;
14845 rtx tmp;
14846 rtx jump_around_label = NULL;
14847 HOST_WIDE_INT align = 1;
14848 unsigned HOST_WIDE_INT count = 0;
14849 HOST_WIDE_INT expected_size = -1;
14850 int size_needed = 0, epilogue_size_needed;
14851 int desired_align = 0;
14852 enum stringop_alg alg;
14853 rtx promoted_val = NULL;
14854 bool force_loopy_epilogue = false;
14855 int dynamic_check;
14856
14857 if (CONST_INT_P (align_exp))
14858 align = INTVAL (align_exp);
14859 /* i386 can do misaligned access on reasonably increased cost. */
14860 if (CONST_INT_P (expected_align_exp)
14861 && INTVAL (expected_align_exp) > align)
14862 align = INTVAL (expected_align_exp);
14863 if (CONST_INT_P (count_exp))
14864 count = expected_size = INTVAL (count_exp);
14865 if (CONST_INT_P (expected_size_exp) && count == 0)
14866 expected_size = INTVAL (expected_size_exp);
14867
14868 /* Step 0: Decide on preferred algorithm, desired alignment and
14869 size of chunks to be copied by main loop. */
14870
14871 alg = decide_alg (count, expected_size, true, &dynamic_check);
14872 desired_align = decide_alignment (align, alg, expected_size);
14873
14874 if (!TARGET_ALIGN_STRINGOPS)
14875 align = desired_align;
14876
14877 if (alg == libcall)
14878 return 0;
14879 gcc_assert (alg != no_stringop);
14880 if (!count)
14881 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14882 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14883 switch (alg)
14884 {
14885 case libcall:
14886 case no_stringop:
14887 gcc_unreachable ();
14888 case loop:
14889 size_needed = GET_MODE_SIZE (Pmode);
14890 break;
14891 case unrolled_loop:
14892 size_needed = GET_MODE_SIZE (Pmode) * 4;
14893 break;
14894 case rep_prefix_8_byte:
14895 size_needed = 8;
14896 break;
14897 case rep_prefix_4_byte:
14898 size_needed = 4;
14899 break;
14900 case rep_prefix_1_byte:
14901 case loop_1_byte:
14902 size_needed = 1;
14903 break;
14904 }
14905 epilogue_size_needed = size_needed;
14906
14907 /* Step 1: Prologue guard. */
14908
14909 /* Alignment code needs count to be in register. */
14910 if (CONST_INT_P (count_exp) && desired_align > align)
14911 {
14912 enum machine_mode mode = SImode;
14913 if (TARGET_64BIT && (count & ~0xffffffff))
14914 mode = DImode;
14915 count_exp = force_reg (mode, count_exp);
14916 }
14917 /* Do the cheap promotion to allow better CSE across the
14918 main loop and epilogue (ie one load of the big constant in the
14919 front of all code. */
14920 if (CONST_INT_P (val_exp))
14921 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14922 desired_align, align);
14923 /* Ensure that alignment prologue won't copy past end of block. */
14924 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14925 {
14926 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14927 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14928 Make sure it is power of 2. */
14929 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14930
14931 /* To improve performance of small blocks, we jump around the VAL
14932 promoting mode. This mean that if the promoted VAL is not constant,
14933 we might not use it in the epilogue and have to use byte
14934 loop variant. */
14935 if (epilogue_size_needed > 2 && !promoted_val)
14936 force_loopy_epilogue = true;
14937 label = gen_label_rtx ();
14938 emit_cmp_and_jump_insns (count_exp,
14939 GEN_INT (epilogue_size_needed),
14940 LTU, 0, counter_mode (count_exp), 1, label);
14941 if (GET_CODE (count_exp) == CONST_INT)
14942 ;
14943 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14944 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14945 else
14946 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14947 }
14948 if (dynamic_check != -1)
14949 {
14950 rtx hot_label = gen_label_rtx ();
14951 jump_around_label = gen_label_rtx ();
14952 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14953 LEU, 0, counter_mode (count_exp), 1, hot_label);
14954 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14955 set_storage_via_libcall (dst, count_exp, val_exp, false);
14956 emit_jump (jump_around_label);
14957 emit_label (hot_label);
14958 }
14959
14960 /* Step 2: Alignment prologue. */
14961
14962 /* Do the expensive promotion once we branched off the small blocks. */
14963 if (!promoted_val)
14964 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14965 desired_align, align);
14966 gcc_assert (desired_align >= 1 && align >= 1);
14967
14968 if (desired_align > align)
14969 {
14970 /* Except for the first move in epilogue, we no longer know
14971 constant offset in aliasing info. It don't seems to worth
14972 the pain to maintain it for the first move, so throw away
14973 the info early. */
14974 dst = change_address (dst, BLKmode, destreg);
14975 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14976 desired_align);
14977 }
14978 if (label && size_needed == 1)
14979 {
14980 emit_label (label);
14981 LABEL_NUSES (label) = 1;
14982 label = NULL;
14983 }
14984
14985 /* Step 3: Main loop. */
14986
14987 switch (alg)
14988 {
14989 case libcall:
14990 case no_stringop:
14991 gcc_unreachable ();
14992 case loop_1_byte:
14993 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14994 count_exp, QImode, 1, expected_size);
14995 break;
14996 case loop:
14997 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14998 count_exp, Pmode, 1, expected_size);
14999 break;
15000 case unrolled_loop:
15001 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15002 count_exp, Pmode, 4, expected_size);
15003 break;
15004 case rep_prefix_8_byte:
15005 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15006 DImode);
15007 break;
15008 case rep_prefix_4_byte:
15009 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15010 SImode);
15011 break;
15012 case rep_prefix_1_byte:
15013 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15014 QImode);
15015 break;
15016 }
15017 /* Adjust properly the offset of src and dest memory for aliasing. */
15018 if (CONST_INT_P (count_exp))
15019 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15020 (count / size_needed) * size_needed);
15021 else
15022 dst = change_address (dst, BLKmode, destreg);
15023
15024 /* Step 4: Epilogue to copy the remaining bytes. */
15025
15026 if (label)
15027 {
15028 /* When the main loop is done, COUNT_EXP might hold original count,
15029 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15030 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15031 bytes. Compensate if needed. */
15032
15033 if (size_needed < desired_align - align)
15034 {
15035 tmp =
15036 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15037 GEN_INT (size_needed - 1), count_exp, 1,
15038 OPTAB_DIRECT);
15039 size_needed = desired_align - align + 1;
15040 if (tmp != count_exp)
15041 emit_move_insn (count_exp, tmp);
15042 }
15043 emit_label (label);
15044 LABEL_NUSES (label) = 1;
15045 }
15046 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15047 {
15048 if (force_loopy_epilogue)
15049 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15050 size_needed);
15051 else
15052 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15053 size_needed);
15054 }
15055 if (jump_around_label)
15056 emit_label (jump_around_label);
15057 return 1;
15058 }
15059
15060 /* Expand the appropriate insns for doing strlen if not just doing
15061 repnz; scasb
15062
15063 out = result, initialized with the start address
15064 align_rtx = alignment of the address.
15065 scratch = scratch register, initialized with the startaddress when
15066 not aligned, otherwise undefined
15067
15068 This is just the body. It needs the initializations mentioned above and
15069 some address computing at the end. These things are done in i386.md. */
15070
15071 static void
15072 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15073 {
15074 int align;
15075 rtx tmp;
15076 rtx align_2_label = NULL_RTX;
15077 rtx align_3_label = NULL_RTX;
15078 rtx align_4_label = gen_label_rtx ();
15079 rtx end_0_label = gen_label_rtx ();
15080 rtx mem;
15081 rtx tmpreg = gen_reg_rtx (SImode);
15082 rtx scratch = gen_reg_rtx (SImode);
15083 rtx cmp;
15084
15085 align = 0;
15086 if (CONST_INT_P (align_rtx))
15087 align = INTVAL (align_rtx);
15088
15089 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15090
15091 /* Is there a known alignment and is it less than 4? */
15092 if (align < 4)
15093 {
15094 rtx scratch1 = gen_reg_rtx (Pmode);
15095 emit_move_insn (scratch1, out);
15096 /* Is there a known alignment and is it not 2? */
15097 if (align != 2)
15098 {
15099 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15100 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15101
15102 /* Leave just the 3 lower bits. */
15103 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15104 NULL_RTX, 0, OPTAB_WIDEN);
15105
15106 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15107 Pmode, 1, align_4_label);
15108 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15109 Pmode, 1, align_2_label);
15110 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15111 Pmode, 1, align_3_label);
15112 }
15113 else
15114 {
15115 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15116 check if is aligned to 4 - byte. */
15117
15118 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15119 NULL_RTX, 0, OPTAB_WIDEN);
15120
15121 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15122 Pmode, 1, align_4_label);
15123 }
15124
15125 mem = change_address (src, QImode, out);
15126
15127 /* Now compare the bytes. */
15128
15129 /* Compare the first n unaligned byte on a byte per byte basis. */
15130 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15131 QImode, 1, end_0_label);
15132
15133 /* Increment the address. */
15134 if (TARGET_64BIT)
15135 emit_insn (gen_adddi3 (out, out, const1_rtx));
15136 else
15137 emit_insn (gen_addsi3 (out, out, const1_rtx));
15138
15139 /* Not needed with an alignment of 2 */
15140 if (align != 2)
15141 {
15142 emit_label (align_2_label);
15143
15144 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15145 end_0_label);
15146
15147 if (TARGET_64BIT)
15148 emit_insn (gen_adddi3 (out, out, const1_rtx));
15149 else
15150 emit_insn (gen_addsi3 (out, out, const1_rtx));
15151
15152 emit_label (align_3_label);
15153 }
15154
15155 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15156 end_0_label);
15157
15158 if (TARGET_64BIT)
15159 emit_insn (gen_adddi3 (out, out, const1_rtx));
15160 else
15161 emit_insn (gen_addsi3 (out, out, const1_rtx));
15162 }
15163
15164 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15165 align this loop. It gives only huge programs, but does not help to
15166 speed up. */
15167 emit_label (align_4_label);
15168
15169 mem = change_address (src, SImode, out);
15170 emit_move_insn (scratch, mem);
15171 if (TARGET_64BIT)
15172 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15173 else
15174 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15175
15176 /* This formula yields a nonzero result iff one of the bytes is zero.
15177 This saves three branches inside loop and many cycles. */
15178
15179 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15180 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15181 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15182 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15183 gen_int_mode (0x80808080, SImode)));
15184 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15185 align_4_label);
15186
15187 if (TARGET_CMOVE)
15188 {
15189 rtx reg = gen_reg_rtx (SImode);
15190 rtx reg2 = gen_reg_rtx (Pmode);
15191 emit_move_insn (reg, tmpreg);
15192 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15193
15194 /* If zero is not in the first two bytes, move two bytes forward. */
15195 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15196 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15197 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15198 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15199 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15200 reg,
15201 tmpreg)));
15202 /* Emit lea manually to avoid clobbering of flags. */
15203 emit_insn (gen_rtx_SET (SImode, reg2,
15204 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15205
15206 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15207 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15208 emit_insn (gen_rtx_SET (VOIDmode, out,
15209 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15210 reg2,
15211 out)));
15212
15213 }
15214 else
15215 {
15216 rtx end_2_label = gen_label_rtx ();
15217 /* Is zero in the first two bytes? */
15218
15219 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15220 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15221 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15222 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15223 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15224 pc_rtx);
15225 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15226 JUMP_LABEL (tmp) = end_2_label;
15227
15228 /* Not in the first two. Move two bytes forward. */
15229 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15230 if (TARGET_64BIT)
15231 emit_insn (gen_adddi3 (out, out, const2_rtx));
15232 else
15233 emit_insn (gen_addsi3 (out, out, const2_rtx));
15234
15235 emit_label (end_2_label);
15236
15237 }
15238
15239 /* Avoid branch in fixing the byte. */
15240 tmpreg = gen_lowpart (QImode, tmpreg);
15241 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15242 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15243 if (TARGET_64BIT)
15244 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15245 else
15246 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15247
15248 emit_label (end_0_label);
15249 }
15250
15251 /* Expand strlen. */
15252
15253 int
15254 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15255 {
15256 rtx addr, scratch1, scratch2, scratch3, scratch4;
15257
15258 /* The generic case of strlen expander is long. Avoid it's
15259 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15260
15261 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15262 && !TARGET_INLINE_ALL_STRINGOPS
15263 && !optimize_size
15264 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15265 return 0;
15266
15267 addr = force_reg (Pmode, XEXP (src, 0));
15268 scratch1 = gen_reg_rtx (Pmode);
15269
15270 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15271 && !optimize_size)
15272 {
15273 /* Well it seems that some optimizer does not combine a call like
15274 foo(strlen(bar), strlen(bar));
15275 when the move and the subtraction is done here. It does calculate
15276 the length just once when these instructions are done inside of
15277 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15278 often used and I use one fewer register for the lifetime of
15279 output_strlen_unroll() this is better. */
15280
15281 emit_move_insn (out, addr);
15282
15283 ix86_expand_strlensi_unroll_1 (out, src, align);
15284
15285 /* strlensi_unroll_1 returns the address of the zero at the end of
15286 the string, like memchr(), so compute the length by subtracting
15287 the start address. */
15288 if (TARGET_64BIT)
15289 emit_insn (gen_subdi3 (out, out, addr));
15290 else
15291 emit_insn (gen_subsi3 (out, out, addr));
15292 }
15293 else
15294 {
15295 rtx unspec;
15296 scratch2 = gen_reg_rtx (Pmode);
15297 scratch3 = gen_reg_rtx (Pmode);
15298 scratch4 = force_reg (Pmode, constm1_rtx);
15299
15300 emit_move_insn (scratch3, addr);
15301 eoschar = force_reg (QImode, eoschar);
15302
15303 src = replace_equiv_address_nv (src, scratch3);
15304
15305 /* If .md starts supporting :P, this can be done in .md. */
15306 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15307 scratch4), UNSPEC_SCAS);
15308 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15309 if (TARGET_64BIT)
15310 {
15311 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15312 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15313 }
15314 else
15315 {
15316 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15317 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15318 }
15319 }
15320 return 1;
15321 }
15322
15323 /* For given symbol (function) construct code to compute address of it's PLT
15324 entry in large x86-64 PIC model. */
15325 rtx
15326 construct_plt_address (rtx symbol)
15327 {
15328 rtx tmp = gen_reg_rtx (Pmode);
15329 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15330
15331 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15332 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15333
15334 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15335 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15336 return tmp;
15337 }
15338
15339 void
15340 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15341 rtx callarg2 ATTRIBUTE_UNUSED,
15342 rtx pop, int sibcall)
15343 {
15344 rtx use = NULL, call;
15345
15346 if (pop == const0_rtx)
15347 pop = NULL;
15348 gcc_assert (!TARGET_64BIT || !pop);
15349
15350 if (TARGET_MACHO && !TARGET_64BIT)
15351 {
15352 #if TARGET_MACHO
15353 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15354 fnaddr = machopic_indirect_call_target (fnaddr);
15355 #endif
15356 }
15357 else
15358 {
15359 /* Static functions and indirect calls don't need the pic register. */
15360 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15361 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15362 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15363 use_reg (&use, pic_offset_table_rtx);
15364 }
15365
15366 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15367 {
15368 rtx al = gen_rtx_REG (QImode, 0);
15369 emit_move_insn (al, callarg2);
15370 use_reg (&use, al);
15371 }
15372
15373 if (ix86_cmodel == CM_LARGE_PIC
15374 && GET_CODE (fnaddr) == MEM
15375 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15376 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15377 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15378 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15379 {
15380 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15381 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15382 }
15383 if (sibcall && TARGET_64BIT
15384 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15385 {
15386 rtx addr;
15387 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15388 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15389 emit_move_insn (fnaddr, addr);
15390 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15391 }
15392
15393 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15394 if (retval)
15395 call = gen_rtx_SET (VOIDmode, retval, call);
15396 if (pop)
15397 {
15398 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15399 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15400 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15401 }
15402
15403 call = emit_call_insn (call);
15404 if (use)
15405 CALL_INSN_FUNCTION_USAGE (call) = use;
15406 }
15407
15408 \f
15409 /* Clear stack slot assignments remembered from previous functions.
15410 This is called from INIT_EXPANDERS once before RTL is emitted for each
15411 function. */
15412
15413 static struct machine_function *
15414 ix86_init_machine_status (void)
15415 {
15416 struct machine_function *f;
15417
15418 f = ggc_alloc_cleared (sizeof (struct machine_function));
15419 f->use_fast_prologue_epilogue_nregs = -1;
15420 f->tls_descriptor_call_expanded_p = 0;
15421
15422 return f;
15423 }
15424
15425 /* Return a MEM corresponding to a stack slot with mode MODE.
15426 Allocate a new slot if necessary.
15427
15428 The RTL for a function can have several slots available: N is
15429 which slot to use. */
15430
15431 rtx
15432 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15433 {
15434 struct stack_local_entry *s;
15435
15436 gcc_assert (n < MAX_386_STACK_LOCALS);
15437
15438 for (s = ix86_stack_locals; s; s = s->next)
15439 if (s->mode == mode && s->n == n)
15440 return copy_rtx (s->rtl);
15441
15442 s = (struct stack_local_entry *)
15443 ggc_alloc (sizeof (struct stack_local_entry));
15444 s->n = n;
15445 s->mode = mode;
15446 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15447
15448 s->next = ix86_stack_locals;
15449 ix86_stack_locals = s;
15450 return s->rtl;
15451 }
15452
15453 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15454
15455 static GTY(()) rtx ix86_tls_symbol;
15456 rtx
15457 ix86_tls_get_addr (void)
15458 {
15459
15460 if (!ix86_tls_symbol)
15461 {
15462 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15463 (TARGET_ANY_GNU_TLS
15464 && !TARGET_64BIT)
15465 ? "___tls_get_addr"
15466 : "__tls_get_addr");
15467 }
15468
15469 return ix86_tls_symbol;
15470 }
15471
15472 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15473
15474 static GTY(()) rtx ix86_tls_module_base_symbol;
15475 rtx
15476 ix86_tls_module_base (void)
15477 {
15478
15479 if (!ix86_tls_module_base_symbol)
15480 {
15481 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15482 "_TLS_MODULE_BASE_");
15483 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15484 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15485 }
15486
15487 return ix86_tls_module_base_symbol;
15488 }
15489 \f
15490 /* Calculate the length of the memory address in the instruction
15491 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15492
15493 int
15494 memory_address_length (rtx addr)
15495 {
15496 struct ix86_address parts;
15497 rtx base, index, disp;
15498 int len;
15499 int ok;
15500
15501 if (GET_CODE (addr) == PRE_DEC
15502 || GET_CODE (addr) == POST_INC
15503 || GET_CODE (addr) == PRE_MODIFY
15504 || GET_CODE (addr) == POST_MODIFY)
15505 return 0;
15506
15507 ok = ix86_decompose_address (addr, &parts);
15508 gcc_assert (ok);
15509
15510 if (parts.base && GET_CODE (parts.base) == SUBREG)
15511 parts.base = SUBREG_REG (parts.base);
15512 if (parts.index && GET_CODE (parts.index) == SUBREG)
15513 parts.index = SUBREG_REG (parts.index);
15514
15515 base = parts.base;
15516 index = parts.index;
15517 disp = parts.disp;
15518 len = 0;
15519
15520 /* Rule of thumb:
15521 - esp as the base always wants an index,
15522 - ebp as the base always wants a displacement. */
15523
15524 /* Register Indirect. */
15525 if (base && !index && !disp)
15526 {
15527 /* esp (for its index) and ebp (for its displacement) need
15528 the two-byte modrm form. */
15529 if (addr == stack_pointer_rtx
15530 || addr == arg_pointer_rtx
15531 || addr == frame_pointer_rtx
15532 || addr == hard_frame_pointer_rtx)
15533 len = 1;
15534 }
15535
15536 /* Direct Addressing. */
15537 else if (disp && !base && !index)
15538 len = 4;
15539
15540 else
15541 {
15542 /* Find the length of the displacement constant. */
15543 if (disp)
15544 {
15545 if (base && satisfies_constraint_K (disp))
15546 len = 1;
15547 else
15548 len = 4;
15549 }
15550 /* ebp always wants a displacement. */
15551 else if (base == hard_frame_pointer_rtx)
15552 len = 1;
15553
15554 /* An index requires the two-byte modrm form.... */
15555 if (index
15556 /* ...like esp, which always wants an index. */
15557 || base == stack_pointer_rtx
15558 || base == arg_pointer_rtx
15559 || base == frame_pointer_rtx)
15560 len += 1;
15561 }
15562
15563 return len;
15564 }
15565
15566 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15567 is set, expect that insn have 8bit immediate alternative. */
15568 int
15569 ix86_attr_length_immediate_default (rtx insn, int shortform)
15570 {
15571 int len = 0;
15572 int i;
15573 extract_insn_cached (insn);
15574 for (i = recog_data.n_operands - 1; i >= 0; --i)
15575 if (CONSTANT_P (recog_data.operand[i]))
15576 {
15577 gcc_assert (!len);
15578 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15579 len = 1;
15580 else
15581 {
15582 switch (get_attr_mode (insn))
15583 {
15584 case MODE_QI:
15585 len+=1;
15586 break;
15587 case MODE_HI:
15588 len+=2;
15589 break;
15590 case MODE_SI:
15591 len+=4;
15592 break;
15593 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15594 case MODE_DI:
15595 len+=4;
15596 break;
15597 default:
15598 fatal_insn ("unknown insn mode", insn);
15599 }
15600 }
15601 }
15602 return len;
15603 }
15604 /* Compute default value for "length_address" attribute. */
15605 int
15606 ix86_attr_length_address_default (rtx insn)
15607 {
15608 int i;
15609
15610 if (get_attr_type (insn) == TYPE_LEA)
15611 {
15612 rtx set = PATTERN (insn);
15613
15614 if (GET_CODE (set) == PARALLEL)
15615 set = XVECEXP (set, 0, 0);
15616
15617 gcc_assert (GET_CODE (set) == SET);
15618
15619 return memory_address_length (SET_SRC (set));
15620 }
15621
15622 extract_insn_cached (insn);
15623 for (i = recog_data.n_operands - 1; i >= 0; --i)
15624 if (MEM_P (recog_data.operand[i]))
15625 {
15626 return memory_address_length (XEXP (recog_data.operand[i], 0));
15627 break;
15628 }
15629 return 0;
15630 }
15631 \f
15632 /* Return the maximum number of instructions a cpu can issue. */
15633
15634 static int
15635 ix86_issue_rate (void)
15636 {
15637 switch (ix86_tune)
15638 {
15639 case PROCESSOR_PENTIUM:
15640 case PROCESSOR_K6:
15641 return 2;
15642
15643 case PROCESSOR_PENTIUMPRO:
15644 case PROCESSOR_PENTIUM4:
15645 case PROCESSOR_ATHLON:
15646 case PROCESSOR_K8:
15647 case PROCESSOR_AMDFAM10:
15648 case PROCESSOR_NOCONA:
15649 case PROCESSOR_GENERIC32:
15650 case PROCESSOR_GENERIC64:
15651 return 3;
15652
15653 case PROCESSOR_CORE2:
15654 return 4;
15655
15656 default:
15657 return 1;
15658 }
15659 }
15660
15661 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15662 by DEP_INSN and nothing set by DEP_INSN. */
15663
15664 static int
15665 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15666 {
15667 rtx set, set2;
15668
15669 /* Simplify the test for uninteresting insns. */
15670 if (insn_type != TYPE_SETCC
15671 && insn_type != TYPE_ICMOV
15672 && insn_type != TYPE_FCMOV
15673 && insn_type != TYPE_IBR)
15674 return 0;
15675
15676 if ((set = single_set (dep_insn)) != 0)
15677 {
15678 set = SET_DEST (set);
15679 set2 = NULL_RTX;
15680 }
15681 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15682 && XVECLEN (PATTERN (dep_insn), 0) == 2
15683 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15684 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15685 {
15686 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15687 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15688 }
15689 else
15690 return 0;
15691
15692 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15693 return 0;
15694
15695 /* This test is true if the dependent insn reads the flags but
15696 not any other potentially set register. */
15697 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15698 return 0;
15699
15700 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15701 return 0;
15702
15703 return 1;
15704 }
15705
15706 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15707 address with operands set by DEP_INSN. */
15708
15709 static int
15710 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15711 {
15712 rtx addr;
15713
15714 if (insn_type == TYPE_LEA
15715 && TARGET_PENTIUM)
15716 {
15717 addr = PATTERN (insn);
15718
15719 if (GET_CODE (addr) == PARALLEL)
15720 addr = XVECEXP (addr, 0, 0);
15721
15722 gcc_assert (GET_CODE (addr) == SET);
15723
15724 addr = SET_SRC (addr);
15725 }
15726 else
15727 {
15728 int i;
15729 extract_insn_cached (insn);
15730 for (i = recog_data.n_operands - 1; i >= 0; --i)
15731 if (MEM_P (recog_data.operand[i]))
15732 {
15733 addr = XEXP (recog_data.operand[i], 0);
15734 goto found;
15735 }
15736 return 0;
15737 found:;
15738 }
15739
15740 return modified_in_p (addr, dep_insn);
15741 }
15742
15743 static int
15744 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15745 {
15746 enum attr_type insn_type, dep_insn_type;
15747 enum attr_memory memory;
15748 rtx set, set2;
15749 int dep_insn_code_number;
15750
15751 /* Anti and output dependencies have zero cost on all CPUs. */
15752 if (REG_NOTE_KIND (link) != 0)
15753 return 0;
15754
15755 dep_insn_code_number = recog_memoized (dep_insn);
15756
15757 /* If we can't recognize the insns, we can't really do anything. */
15758 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15759 return cost;
15760
15761 insn_type = get_attr_type (insn);
15762 dep_insn_type = get_attr_type (dep_insn);
15763
15764 switch (ix86_tune)
15765 {
15766 case PROCESSOR_PENTIUM:
15767 /* Address Generation Interlock adds a cycle of latency. */
15768 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15769 cost += 1;
15770
15771 /* ??? Compares pair with jump/setcc. */
15772 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15773 cost = 0;
15774
15775 /* Floating point stores require value to be ready one cycle earlier. */
15776 if (insn_type == TYPE_FMOV
15777 && get_attr_memory (insn) == MEMORY_STORE
15778 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15779 cost += 1;
15780 break;
15781
15782 case PROCESSOR_PENTIUMPRO:
15783 memory = get_attr_memory (insn);
15784
15785 /* INT->FP conversion is expensive. */
15786 if (get_attr_fp_int_src (dep_insn))
15787 cost += 5;
15788
15789 /* There is one cycle extra latency between an FP op and a store. */
15790 if (insn_type == TYPE_FMOV
15791 && (set = single_set (dep_insn)) != NULL_RTX
15792 && (set2 = single_set (insn)) != NULL_RTX
15793 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15794 && MEM_P (SET_DEST (set2)))
15795 cost += 1;
15796
15797 /* Show ability of reorder buffer to hide latency of load by executing
15798 in parallel with previous instruction in case
15799 previous instruction is not needed to compute the address. */
15800 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15801 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15802 {
15803 /* Claim moves to take one cycle, as core can issue one load
15804 at time and the next load can start cycle later. */
15805 if (dep_insn_type == TYPE_IMOV
15806 || dep_insn_type == TYPE_FMOV)
15807 cost = 1;
15808 else if (cost > 1)
15809 cost--;
15810 }
15811 break;
15812
15813 case PROCESSOR_K6:
15814 memory = get_attr_memory (insn);
15815
15816 /* The esp dependency is resolved before the instruction is really
15817 finished. */
15818 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15819 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15820 return 1;
15821
15822 /* INT->FP conversion is expensive. */
15823 if (get_attr_fp_int_src (dep_insn))
15824 cost += 5;
15825
15826 /* Show ability of reorder buffer to hide latency of load by executing
15827 in parallel with previous instruction in case
15828 previous instruction is not needed to compute the address. */
15829 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15830 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15831 {
15832 /* Claim moves to take one cycle, as core can issue one load
15833 at time and the next load can start cycle later. */
15834 if (dep_insn_type == TYPE_IMOV
15835 || dep_insn_type == TYPE_FMOV)
15836 cost = 1;
15837 else if (cost > 2)
15838 cost -= 2;
15839 else
15840 cost = 1;
15841 }
15842 break;
15843
15844 case PROCESSOR_ATHLON:
15845 case PROCESSOR_K8:
15846 case PROCESSOR_AMDFAM10:
15847 case PROCESSOR_GENERIC32:
15848 case PROCESSOR_GENERIC64:
15849 memory = get_attr_memory (insn);
15850
15851 /* Show ability of reorder buffer to hide latency of load by executing
15852 in parallel with previous instruction in case
15853 previous instruction is not needed to compute the address. */
15854 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15855 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15856 {
15857 enum attr_unit unit = get_attr_unit (insn);
15858 int loadcost = 3;
15859
15860 /* Because of the difference between the length of integer and
15861 floating unit pipeline preparation stages, the memory operands
15862 for floating point are cheaper.
15863
15864 ??? For Athlon it the difference is most probably 2. */
15865 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15866 loadcost = 3;
15867 else
15868 loadcost = TARGET_ATHLON ? 2 : 0;
15869
15870 if (cost >= loadcost)
15871 cost -= loadcost;
15872 else
15873 cost = 0;
15874 }
15875
15876 default:
15877 break;
15878 }
15879
15880 return cost;
15881 }
15882
15883 /* How many alternative schedules to try. This should be as wide as the
15884 scheduling freedom in the DFA, but no wider. Making this value too
15885 large results extra work for the scheduler. */
15886
15887 static int
15888 ia32_multipass_dfa_lookahead (void)
15889 {
15890 if (ix86_tune == PROCESSOR_PENTIUM)
15891 return 2;
15892
15893 if (ix86_tune == PROCESSOR_PENTIUMPRO
15894 || ix86_tune == PROCESSOR_K6)
15895 return 1;
15896
15897 else
15898 return 0;
15899 }
15900
15901 \f
15902 /* Compute the alignment given to a constant that is being placed in memory.
15903 EXP is the constant and ALIGN is the alignment that the object would
15904 ordinarily have.
15905 The value of this function is used instead of that alignment to align
15906 the object. */
15907
15908 int
15909 ix86_constant_alignment (tree exp, int align)
15910 {
15911 if (TREE_CODE (exp) == REAL_CST)
15912 {
15913 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15914 return 64;
15915 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15916 return 128;
15917 }
15918 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15919 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15920 return BITS_PER_WORD;
15921
15922 return align;
15923 }
15924
15925 /* Compute the alignment for a static variable.
15926 TYPE is the data type, and ALIGN is the alignment that
15927 the object would ordinarily have. The value of this function is used
15928 instead of that alignment to align the object. */
15929
15930 int
15931 ix86_data_alignment (tree type, int align)
15932 {
15933 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15934
15935 if (AGGREGATE_TYPE_P (type)
15936 && TYPE_SIZE (type)
15937 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15938 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15939 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15940 && align < max_align)
15941 align = max_align;
15942
15943 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15944 to 16byte boundary. */
15945 if (TARGET_64BIT)
15946 {
15947 if (AGGREGATE_TYPE_P (type)
15948 && TYPE_SIZE (type)
15949 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15950 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15951 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15952 return 128;
15953 }
15954
15955 if (TREE_CODE (type) == ARRAY_TYPE)
15956 {
15957 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15958 return 64;
15959 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15960 return 128;
15961 }
15962 else if (TREE_CODE (type) == COMPLEX_TYPE)
15963 {
15964
15965 if (TYPE_MODE (type) == DCmode && align < 64)
15966 return 64;
15967 if (TYPE_MODE (type) == XCmode && align < 128)
15968 return 128;
15969 }
15970 else if ((TREE_CODE (type) == RECORD_TYPE
15971 || TREE_CODE (type) == UNION_TYPE
15972 || TREE_CODE (type) == QUAL_UNION_TYPE)
15973 && TYPE_FIELDS (type))
15974 {
15975 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15976 return 64;
15977 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15978 return 128;
15979 }
15980 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15981 || TREE_CODE (type) == INTEGER_TYPE)
15982 {
15983 if (TYPE_MODE (type) == DFmode && align < 64)
15984 return 64;
15985 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15986 return 128;
15987 }
15988
15989 return align;
15990 }
15991
15992 /* Compute the alignment for a local variable.
15993 TYPE is the data type, and ALIGN is the alignment that
15994 the object would ordinarily have. The value of this macro is used
15995 instead of that alignment to align the object. */
15996
15997 int
15998 ix86_local_alignment (tree type, int align)
15999 {
16000 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16001 to 16byte boundary. */
16002 if (TARGET_64BIT)
16003 {
16004 if (AGGREGATE_TYPE_P (type)
16005 && TYPE_SIZE (type)
16006 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16007 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16008 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16009 return 128;
16010 }
16011 if (TREE_CODE (type) == ARRAY_TYPE)
16012 {
16013 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16014 return 64;
16015 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16016 return 128;
16017 }
16018 else if (TREE_CODE (type) == COMPLEX_TYPE)
16019 {
16020 if (TYPE_MODE (type) == DCmode && align < 64)
16021 return 64;
16022 if (TYPE_MODE (type) == XCmode && align < 128)
16023 return 128;
16024 }
16025 else if ((TREE_CODE (type) == RECORD_TYPE
16026 || TREE_CODE (type) == UNION_TYPE
16027 || TREE_CODE (type) == QUAL_UNION_TYPE)
16028 && TYPE_FIELDS (type))
16029 {
16030 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16031 return 64;
16032 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16033 return 128;
16034 }
16035 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16036 || TREE_CODE (type) == INTEGER_TYPE)
16037 {
16038
16039 if (TYPE_MODE (type) == DFmode && align < 64)
16040 return 64;
16041 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16042 return 128;
16043 }
16044 return align;
16045 }
16046 \f
16047 /* Emit RTL insns to initialize the variable parts of a trampoline.
16048 FNADDR is an RTX for the address of the function's pure code.
16049 CXT is an RTX for the static chain value for the function. */
16050 void
16051 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16052 {
16053 if (!TARGET_64BIT)
16054 {
16055 /* Compute offset from the end of the jmp to the target function. */
16056 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16057 plus_constant (tramp, 10),
16058 NULL_RTX, 1, OPTAB_DIRECT);
16059 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16060 gen_int_mode (0xb9, QImode));
16061 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16062 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16063 gen_int_mode (0xe9, QImode));
16064 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16065 }
16066 else
16067 {
16068 int offset = 0;
16069 /* Try to load address using shorter movl instead of movabs.
16070 We may want to support movq for kernel mode, but kernel does not use
16071 trampolines at the moment. */
16072 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16073 {
16074 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16075 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16076 gen_int_mode (0xbb41, HImode));
16077 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16078 gen_lowpart (SImode, fnaddr));
16079 offset += 6;
16080 }
16081 else
16082 {
16083 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16084 gen_int_mode (0xbb49, HImode));
16085 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16086 fnaddr);
16087 offset += 10;
16088 }
16089 /* Load static chain using movabs to r10. */
16090 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16091 gen_int_mode (0xba49, HImode));
16092 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16093 cxt);
16094 offset += 10;
16095 /* Jump to the r11 */
16096 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16097 gen_int_mode (0xff49, HImode));
16098 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16099 gen_int_mode (0xe3, QImode));
16100 offset += 3;
16101 gcc_assert (offset <= TRAMPOLINE_SIZE);
16102 }
16103
16104 #ifdef ENABLE_EXECUTE_STACK
16105 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16106 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16107 #endif
16108 }
16109 \f
16110 /* Codes for all the SSE/MMX builtins. */
16111 enum ix86_builtins
16112 {
16113 IX86_BUILTIN_ADDPS,
16114 IX86_BUILTIN_ADDSS,
16115 IX86_BUILTIN_DIVPS,
16116 IX86_BUILTIN_DIVSS,
16117 IX86_BUILTIN_MULPS,
16118 IX86_BUILTIN_MULSS,
16119 IX86_BUILTIN_SUBPS,
16120 IX86_BUILTIN_SUBSS,
16121
16122 IX86_BUILTIN_CMPEQPS,
16123 IX86_BUILTIN_CMPLTPS,
16124 IX86_BUILTIN_CMPLEPS,
16125 IX86_BUILTIN_CMPGTPS,
16126 IX86_BUILTIN_CMPGEPS,
16127 IX86_BUILTIN_CMPNEQPS,
16128 IX86_BUILTIN_CMPNLTPS,
16129 IX86_BUILTIN_CMPNLEPS,
16130 IX86_BUILTIN_CMPNGTPS,
16131 IX86_BUILTIN_CMPNGEPS,
16132 IX86_BUILTIN_CMPORDPS,
16133 IX86_BUILTIN_CMPUNORDPS,
16134 IX86_BUILTIN_CMPEQSS,
16135 IX86_BUILTIN_CMPLTSS,
16136 IX86_BUILTIN_CMPLESS,
16137 IX86_BUILTIN_CMPNEQSS,
16138 IX86_BUILTIN_CMPNLTSS,
16139 IX86_BUILTIN_CMPNLESS,
16140 IX86_BUILTIN_CMPNGTSS,
16141 IX86_BUILTIN_CMPNGESS,
16142 IX86_BUILTIN_CMPORDSS,
16143 IX86_BUILTIN_CMPUNORDSS,
16144
16145 IX86_BUILTIN_COMIEQSS,
16146 IX86_BUILTIN_COMILTSS,
16147 IX86_BUILTIN_COMILESS,
16148 IX86_BUILTIN_COMIGTSS,
16149 IX86_BUILTIN_COMIGESS,
16150 IX86_BUILTIN_COMINEQSS,
16151 IX86_BUILTIN_UCOMIEQSS,
16152 IX86_BUILTIN_UCOMILTSS,
16153 IX86_BUILTIN_UCOMILESS,
16154 IX86_BUILTIN_UCOMIGTSS,
16155 IX86_BUILTIN_UCOMIGESS,
16156 IX86_BUILTIN_UCOMINEQSS,
16157
16158 IX86_BUILTIN_CVTPI2PS,
16159 IX86_BUILTIN_CVTPS2PI,
16160 IX86_BUILTIN_CVTSI2SS,
16161 IX86_BUILTIN_CVTSI642SS,
16162 IX86_BUILTIN_CVTSS2SI,
16163 IX86_BUILTIN_CVTSS2SI64,
16164 IX86_BUILTIN_CVTTPS2PI,
16165 IX86_BUILTIN_CVTTSS2SI,
16166 IX86_BUILTIN_CVTTSS2SI64,
16167
16168 IX86_BUILTIN_MAXPS,
16169 IX86_BUILTIN_MAXSS,
16170 IX86_BUILTIN_MINPS,
16171 IX86_BUILTIN_MINSS,
16172
16173 IX86_BUILTIN_LOADUPS,
16174 IX86_BUILTIN_STOREUPS,
16175 IX86_BUILTIN_MOVSS,
16176
16177 IX86_BUILTIN_MOVHLPS,
16178 IX86_BUILTIN_MOVLHPS,
16179 IX86_BUILTIN_LOADHPS,
16180 IX86_BUILTIN_LOADLPS,
16181 IX86_BUILTIN_STOREHPS,
16182 IX86_BUILTIN_STORELPS,
16183
16184 IX86_BUILTIN_MASKMOVQ,
16185 IX86_BUILTIN_MOVMSKPS,
16186 IX86_BUILTIN_PMOVMSKB,
16187
16188 IX86_BUILTIN_MOVNTPS,
16189 IX86_BUILTIN_MOVNTQ,
16190
16191 IX86_BUILTIN_LOADDQU,
16192 IX86_BUILTIN_STOREDQU,
16193
16194 IX86_BUILTIN_PACKSSWB,
16195 IX86_BUILTIN_PACKSSDW,
16196 IX86_BUILTIN_PACKUSWB,
16197
16198 IX86_BUILTIN_PADDB,
16199 IX86_BUILTIN_PADDW,
16200 IX86_BUILTIN_PADDD,
16201 IX86_BUILTIN_PADDQ,
16202 IX86_BUILTIN_PADDSB,
16203 IX86_BUILTIN_PADDSW,
16204 IX86_BUILTIN_PADDUSB,
16205 IX86_BUILTIN_PADDUSW,
16206 IX86_BUILTIN_PSUBB,
16207 IX86_BUILTIN_PSUBW,
16208 IX86_BUILTIN_PSUBD,
16209 IX86_BUILTIN_PSUBQ,
16210 IX86_BUILTIN_PSUBSB,
16211 IX86_BUILTIN_PSUBSW,
16212 IX86_BUILTIN_PSUBUSB,
16213 IX86_BUILTIN_PSUBUSW,
16214
16215 IX86_BUILTIN_PAND,
16216 IX86_BUILTIN_PANDN,
16217 IX86_BUILTIN_POR,
16218 IX86_BUILTIN_PXOR,
16219
16220 IX86_BUILTIN_PAVGB,
16221 IX86_BUILTIN_PAVGW,
16222
16223 IX86_BUILTIN_PCMPEQB,
16224 IX86_BUILTIN_PCMPEQW,
16225 IX86_BUILTIN_PCMPEQD,
16226 IX86_BUILTIN_PCMPGTB,
16227 IX86_BUILTIN_PCMPGTW,
16228 IX86_BUILTIN_PCMPGTD,
16229
16230 IX86_BUILTIN_PMADDWD,
16231
16232 IX86_BUILTIN_PMAXSW,
16233 IX86_BUILTIN_PMAXUB,
16234 IX86_BUILTIN_PMINSW,
16235 IX86_BUILTIN_PMINUB,
16236
16237 IX86_BUILTIN_PMULHUW,
16238 IX86_BUILTIN_PMULHW,
16239 IX86_BUILTIN_PMULLW,
16240
16241 IX86_BUILTIN_PSADBW,
16242 IX86_BUILTIN_PSHUFW,
16243
16244 IX86_BUILTIN_PSLLW,
16245 IX86_BUILTIN_PSLLD,
16246 IX86_BUILTIN_PSLLQ,
16247 IX86_BUILTIN_PSRAW,
16248 IX86_BUILTIN_PSRAD,
16249 IX86_BUILTIN_PSRLW,
16250 IX86_BUILTIN_PSRLD,
16251 IX86_BUILTIN_PSRLQ,
16252 IX86_BUILTIN_PSLLWI,
16253 IX86_BUILTIN_PSLLDI,
16254 IX86_BUILTIN_PSLLQI,
16255 IX86_BUILTIN_PSRAWI,
16256 IX86_BUILTIN_PSRADI,
16257 IX86_BUILTIN_PSRLWI,
16258 IX86_BUILTIN_PSRLDI,
16259 IX86_BUILTIN_PSRLQI,
16260
16261 IX86_BUILTIN_PUNPCKHBW,
16262 IX86_BUILTIN_PUNPCKHWD,
16263 IX86_BUILTIN_PUNPCKHDQ,
16264 IX86_BUILTIN_PUNPCKLBW,
16265 IX86_BUILTIN_PUNPCKLWD,
16266 IX86_BUILTIN_PUNPCKLDQ,
16267
16268 IX86_BUILTIN_SHUFPS,
16269
16270 IX86_BUILTIN_RCPPS,
16271 IX86_BUILTIN_RCPSS,
16272 IX86_BUILTIN_RSQRTPS,
16273 IX86_BUILTIN_RSQRTSS,
16274 IX86_BUILTIN_SQRTPS,
16275 IX86_BUILTIN_SQRTSS,
16276
16277 IX86_BUILTIN_UNPCKHPS,
16278 IX86_BUILTIN_UNPCKLPS,
16279
16280 IX86_BUILTIN_ANDPS,
16281 IX86_BUILTIN_ANDNPS,
16282 IX86_BUILTIN_ORPS,
16283 IX86_BUILTIN_XORPS,
16284
16285 IX86_BUILTIN_EMMS,
16286 IX86_BUILTIN_LDMXCSR,
16287 IX86_BUILTIN_STMXCSR,
16288 IX86_BUILTIN_SFENCE,
16289
16290 /* 3DNow! Original */
16291 IX86_BUILTIN_FEMMS,
16292 IX86_BUILTIN_PAVGUSB,
16293 IX86_BUILTIN_PF2ID,
16294 IX86_BUILTIN_PFACC,
16295 IX86_BUILTIN_PFADD,
16296 IX86_BUILTIN_PFCMPEQ,
16297 IX86_BUILTIN_PFCMPGE,
16298 IX86_BUILTIN_PFCMPGT,
16299 IX86_BUILTIN_PFMAX,
16300 IX86_BUILTIN_PFMIN,
16301 IX86_BUILTIN_PFMUL,
16302 IX86_BUILTIN_PFRCP,
16303 IX86_BUILTIN_PFRCPIT1,
16304 IX86_BUILTIN_PFRCPIT2,
16305 IX86_BUILTIN_PFRSQIT1,
16306 IX86_BUILTIN_PFRSQRT,
16307 IX86_BUILTIN_PFSUB,
16308 IX86_BUILTIN_PFSUBR,
16309 IX86_BUILTIN_PI2FD,
16310 IX86_BUILTIN_PMULHRW,
16311
16312 /* 3DNow! Athlon Extensions */
16313 IX86_BUILTIN_PF2IW,
16314 IX86_BUILTIN_PFNACC,
16315 IX86_BUILTIN_PFPNACC,
16316 IX86_BUILTIN_PI2FW,
16317 IX86_BUILTIN_PSWAPDSI,
16318 IX86_BUILTIN_PSWAPDSF,
16319
16320 /* SSE2 */
16321 IX86_BUILTIN_ADDPD,
16322 IX86_BUILTIN_ADDSD,
16323 IX86_BUILTIN_DIVPD,
16324 IX86_BUILTIN_DIVSD,
16325 IX86_BUILTIN_MULPD,
16326 IX86_BUILTIN_MULSD,
16327 IX86_BUILTIN_SUBPD,
16328 IX86_BUILTIN_SUBSD,
16329
16330 IX86_BUILTIN_CMPEQPD,
16331 IX86_BUILTIN_CMPLTPD,
16332 IX86_BUILTIN_CMPLEPD,
16333 IX86_BUILTIN_CMPGTPD,
16334 IX86_BUILTIN_CMPGEPD,
16335 IX86_BUILTIN_CMPNEQPD,
16336 IX86_BUILTIN_CMPNLTPD,
16337 IX86_BUILTIN_CMPNLEPD,
16338 IX86_BUILTIN_CMPNGTPD,
16339 IX86_BUILTIN_CMPNGEPD,
16340 IX86_BUILTIN_CMPORDPD,
16341 IX86_BUILTIN_CMPUNORDPD,
16342 IX86_BUILTIN_CMPEQSD,
16343 IX86_BUILTIN_CMPLTSD,
16344 IX86_BUILTIN_CMPLESD,
16345 IX86_BUILTIN_CMPNEQSD,
16346 IX86_BUILTIN_CMPNLTSD,
16347 IX86_BUILTIN_CMPNLESD,
16348 IX86_BUILTIN_CMPORDSD,
16349 IX86_BUILTIN_CMPUNORDSD,
16350
16351 IX86_BUILTIN_COMIEQSD,
16352 IX86_BUILTIN_COMILTSD,
16353 IX86_BUILTIN_COMILESD,
16354 IX86_BUILTIN_COMIGTSD,
16355 IX86_BUILTIN_COMIGESD,
16356 IX86_BUILTIN_COMINEQSD,
16357 IX86_BUILTIN_UCOMIEQSD,
16358 IX86_BUILTIN_UCOMILTSD,
16359 IX86_BUILTIN_UCOMILESD,
16360 IX86_BUILTIN_UCOMIGTSD,
16361 IX86_BUILTIN_UCOMIGESD,
16362 IX86_BUILTIN_UCOMINEQSD,
16363
16364 IX86_BUILTIN_MAXPD,
16365 IX86_BUILTIN_MAXSD,
16366 IX86_BUILTIN_MINPD,
16367 IX86_BUILTIN_MINSD,
16368
16369 IX86_BUILTIN_ANDPD,
16370 IX86_BUILTIN_ANDNPD,
16371 IX86_BUILTIN_ORPD,
16372 IX86_BUILTIN_XORPD,
16373
16374 IX86_BUILTIN_SQRTPD,
16375 IX86_BUILTIN_SQRTSD,
16376
16377 IX86_BUILTIN_UNPCKHPD,
16378 IX86_BUILTIN_UNPCKLPD,
16379
16380 IX86_BUILTIN_SHUFPD,
16381
16382 IX86_BUILTIN_LOADUPD,
16383 IX86_BUILTIN_STOREUPD,
16384 IX86_BUILTIN_MOVSD,
16385
16386 IX86_BUILTIN_LOADHPD,
16387 IX86_BUILTIN_LOADLPD,
16388
16389 IX86_BUILTIN_CVTDQ2PD,
16390 IX86_BUILTIN_CVTDQ2PS,
16391
16392 IX86_BUILTIN_CVTPD2DQ,
16393 IX86_BUILTIN_CVTPD2PI,
16394 IX86_BUILTIN_CVTPD2PS,
16395 IX86_BUILTIN_CVTTPD2DQ,
16396 IX86_BUILTIN_CVTTPD2PI,
16397
16398 IX86_BUILTIN_CVTPI2PD,
16399 IX86_BUILTIN_CVTSI2SD,
16400 IX86_BUILTIN_CVTSI642SD,
16401
16402 IX86_BUILTIN_CVTSD2SI,
16403 IX86_BUILTIN_CVTSD2SI64,
16404 IX86_BUILTIN_CVTSD2SS,
16405 IX86_BUILTIN_CVTSS2SD,
16406 IX86_BUILTIN_CVTTSD2SI,
16407 IX86_BUILTIN_CVTTSD2SI64,
16408
16409 IX86_BUILTIN_CVTPS2DQ,
16410 IX86_BUILTIN_CVTPS2PD,
16411 IX86_BUILTIN_CVTTPS2DQ,
16412
16413 IX86_BUILTIN_MOVNTI,
16414 IX86_BUILTIN_MOVNTPD,
16415 IX86_BUILTIN_MOVNTDQ,
16416
16417 /* SSE2 MMX */
16418 IX86_BUILTIN_MASKMOVDQU,
16419 IX86_BUILTIN_MOVMSKPD,
16420 IX86_BUILTIN_PMOVMSKB128,
16421
16422 IX86_BUILTIN_PACKSSWB128,
16423 IX86_BUILTIN_PACKSSDW128,
16424 IX86_BUILTIN_PACKUSWB128,
16425
16426 IX86_BUILTIN_PADDB128,
16427 IX86_BUILTIN_PADDW128,
16428 IX86_BUILTIN_PADDD128,
16429 IX86_BUILTIN_PADDQ128,
16430 IX86_BUILTIN_PADDSB128,
16431 IX86_BUILTIN_PADDSW128,
16432 IX86_BUILTIN_PADDUSB128,
16433 IX86_BUILTIN_PADDUSW128,
16434 IX86_BUILTIN_PSUBB128,
16435 IX86_BUILTIN_PSUBW128,
16436 IX86_BUILTIN_PSUBD128,
16437 IX86_BUILTIN_PSUBQ128,
16438 IX86_BUILTIN_PSUBSB128,
16439 IX86_BUILTIN_PSUBSW128,
16440 IX86_BUILTIN_PSUBUSB128,
16441 IX86_BUILTIN_PSUBUSW128,
16442
16443 IX86_BUILTIN_PAND128,
16444 IX86_BUILTIN_PANDN128,
16445 IX86_BUILTIN_POR128,
16446 IX86_BUILTIN_PXOR128,
16447
16448 IX86_BUILTIN_PAVGB128,
16449 IX86_BUILTIN_PAVGW128,
16450
16451 IX86_BUILTIN_PCMPEQB128,
16452 IX86_BUILTIN_PCMPEQW128,
16453 IX86_BUILTIN_PCMPEQD128,
16454 IX86_BUILTIN_PCMPGTB128,
16455 IX86_BUILTIN_PCMPGTW128,
16456 IX86_BUILTIN_PCMPGTD128,
16457
16458 IX86_BUILTIN_PMADDWD128,
16459
16460 IX86_BUILTIN_PMAXSW128,
16461 IX86_BUILTIN_PMAXUB128,
16462 IX86_BUILTIN_PMINSW128,
16463 IX86_BUILTIN_PMINUB128,
16464
16465 IX86_BUILTIN_PMULUDQ,
16466 IX86_BUILTIN_PMULUDQ128,
16467 IX86_BUILTIN_PMULHUW128,
16468 IX86_BUILTIN_PMULHW128,
16469 IX86_BUILTIN_PMULLW128,
16470
16471 IX86_BUILTIN_PSADBW128,
16472 IX86_BUILTIN_PSHUFHW,
16473 IX86_BUILTIN_PSHUFLW,
16474 IX86_BUILTIN_PSHUFD,
16475
16476 IX86_BUILTIN_PSLLDQI128,
16477 IX86_BUILTIN_PSLLWI128,
16478 IX86_BUILTIN_PSLLDI128,
16479 IX86_BUILTIN_PSLLQI128,
16480 IX86_BUILTIN_PSRAWI128,
16481 IX86_BUILTIN_PSRADI128,
16482 IX86_BUILTIN_PSRLDQI128,
16483 IX86_BUILTIN_PSRLWI128,
16484 IX86_BUILTIN_PSRLDI128,
16485 IX86_BUILTIN_PSRLQI128,
16486
16487 IX86_BUILTIN_PSLLDQ128,
16488 IX86_BUILTIN_PSLLW128,
16489 IX86_BUILTIN_PSLLD128,
16490 IX86_BUILTIN_PSLLQ128,
16491 IX86_BUILTIN_PSRAW128,
16492 IX86_BUILTIN_PSRAD128,
16493 IX86_BUILTIN_PSRLW128,
16494 IX86_BUILTIN_PSRLD128,
16495 IX86_BUILTIN_PSRLQ128,
16496
16497 IX86_BUILTIN_PUNPCKHBW128,
16498 IX86_BUILTIN_PUNPCKHWD128,
16499 IX86_BUILTIN_PUNPCKHDQ128,
16500 IX86_BUILTIN_PUNPCKHQDQ128,
16501 IX86_BUILTIN_PUNPCKLBW128,
16502 IX86_BUILTIN_PUNPCKLWD128,
16503 IX86_BUILTIN_PUNPCKLDQ128,
16504 IX86_BUILTIN_PUNPCKLQDQ128,
16505
16506 IX86_BUILTIN_CLFLUSH,
16507 IX86_BUILTIN_MFENCE,
16508 IX86_BUILTIN_LFENCE,
16509
16510 /* Prescott New Instructions. */
16511 IX86_BUILTIN_ADDSUBPS,
16512 IX86_BUILTIN_HADDPS,
16513 IX86_BUILTIN_HSUBPS,
16514 IX86_BUILTIN_MOVSHDUP,
16515 IX86_BUILTIN_MOVSLDUP,
16516 IX86_BUILTIN_ADDSUBPD,
16517 IX86_BUILTIN_HADDPD,
16518 IX86_BUILTIN_HSUBPD,
16519 IX86_BUILTIN_LDDQU,
16520
16521 IX86_BUILTIN_MONITOR,
16522 IX86_BUILTIN_MWAIT,
16523
16524 /* SSSE3. */
16525 IX86_BUILTIN_PHADDW,
16526 IX86_BUILTIN_PHADDD,
16527 IX86_BUILTIN_PHADDSW,
16528 IX86_BUILTIN_PHSUBW,
16529 IX86_BUILTIN_PHSUBD,
16530 IX86_BUILTIN_PHSUBSW,
16531 IX86_BUILTIN_PMADDUBSW,
16532 IX86_BUILTIN_PMULHRSW,
16533 IX86_BUILTIN_PSHUFB,
16534 IX86_BUILTIN_PSIGNB,
16535 IX86_BUILTIN_PSIGNW,
16536 IX86_BUILTIN_PSIGND,
16537 IX86_BUILTIN_PALIGNR,
16538 IX86_BUILTIN_PABSB,
16539 IX86_BUILTIN_PABSW,
16540 IX86_BUILTIN_PABSD,
16541
16542 IX86_BUILTIN_PHADDW128,
16543 IX86_BUILTIN_PHADDD128,
16544 IX86_BUILTIN_PHADDSW128,
16545 IX86_BUILTIN_PHSUBW128,
16546 IX86_BUILTIN_PHSUBD128,
16547 IX86_BUILTIN_PHSUBSW128,
16548 IX86_BUILTIN_PMADDUBSW128,
16549 IX86_BUILTIN_PMULHRSW128,
16550 IX86_BUILTIN_PSHUFB128,
16551 IX86_BUILTIN_PSIGNB128,
16552 IX86_BUILTIN_PSIGNW128,
16553 IX86_BUILTIN_PSIGND128,
16554 IX86_BUILTIN_PALIGNR128,
16555 IX86_BUILTIN_PABSB128,
16556 IX86_BUILTIN_PABSW128,
16557 IX86_BUILTIN_PABSD128,
16558
16559 /* AMDFAM10 - SSE4A New Instructions. */
16560 IX86_BUILTIN_MOVNTSD,
16561 IX86_BUILTIN_MOVNTSS,
16562 IX86_BUILTIN_EXTRQI,
16563 IX86_BUILTIN_EXTRQ,
16564 IX86_BUILTIN_INSERTQI,
16565 IX86_BUILTIN_INSERTQ,
16566
16567 /* SSE4.1. */
16568 IX86_BUILTIN_BLENDPD,
16569 IX86_BUILTIN_BLENDPS,
16570 IX86_BUILTIN_BLENDVPD,
16571 IX86_BUILTIN_BLENDVPS,
16572 IX86_BUILTIN_PBLENDVB128,
16573 IX86_BUILTIN_PBLENDW128,
16574
16575 IX86_BUILTIN_DPPD,
16576 IX86_BUILTIN_DPPS,
16577
16578 IX86_BUILTIN_INSERTPS128,
16579
16580 IX86_BUILTIN_MOVNTDQA,
16581 IX86_BUILTIN_MPSADBW128,
16582 IX86_BUILTIN_PACKUSDW128,
16583 IX86_BUILTIN_PCMPEQQ,
16584 IX86_BUILTIN_PHMINPOSUW128,
16585
16586 IX86_BUILTIN_PMAXSB128,
16587 IX86_BUILTIN_PMAXSD128,
16588 IX86_BUILTIN_PMAXUD128,
16589 IX86_BUILTIN_PMAXUW128,
16590
16591 IX86_BUILTIN_PMINSB128,
16592 IX86_BUILTIN_PMINSD128,
16593 IX86_BUILTIN_PMINUD128,
16594 IX86_BUILTIN_PMINUW128,
16595
16596 IX86_BUILTIN_PMOVSXBW128,
16597 IX86_BUILTIN_PMOVSXBD128,
16598 IX86_BUILTIN_PMOVSXBQ128,
16599 IX86_BUILTIN_PMOVSXWD128,
16600 IX86_BUILTIN_PMOVSXWQ128,
16601 IX86_BUILTIN_PMOVSXDQ128,
16602
16603 IX86_BUILTIN_PMOVZXBW128,
16604 IX86_BUILTIN_PMOVZXBD128,
16605 IX86_BUILTIN_PMOVZXBQ128,
16606 IX86_BUILTIN_PMOVZXWD128,
16607 IX86_BUILTIN_PMOVZXWQ128,
16608 IX86_BUILTIN_PMOVZXDQ128,
16609
16610 IX86_BUILTIN_PMULDQ128,
16611 IX86_BUILTIN_PMULLD128,
16612
16613 IX86_BUILTIN_ROUNDPD,
16614 IX86_BUILTIN_ROUNDPS,
16615 IX86_BUILTIN_ROUNDSD,
16616 IX86_BUILTIN_ROUNDSS,
16617
16618 IX86_BUILTIN_PTESTZ,
16619 IX86_BUILTIN_PTESTC,
16620 IX86_BUILTIN_PTESTNZC,
16621
16622 IX86_BUILTIN_VEC_INIT_V2SI,
16623 IX86_BUILTIN_VEC_INIT_V4HI,
16624 IX86_BUILTIN_VEC_INIT_V8QI,
16625 IX86_BUILTIN_VEC_EXT_V2DF,
16626 IX86_BUILTIN_VEC_EXT_V2DI,
16627 IX86_BUILTIN_VEC_EXT_V4SF,
16628 IX86_BUILTIN_VEC_EXT_V4SI,
16629 IX86_BUILTIN_VEC_EXT_V8HI,
16630 IX86_BUILTIN_VEC_EXT_V2SI,
16631 IX86_BUILTIN_VEC_EXT_V4HI,
16632 IX86_BUILTIN_VEC_EXT_V16QI,
16633 IX86_BUILTIN_VEC_SET_V2DI,
16634 IX86_BUILTIN_VEC_SET_V4SF,
16635 IX86_BUILTIN_VEC_SET_V4SI,
16636 IX86_BUILTIN_VEC_SET_V8HI,
16637 IX86_BUILTIN_VEC_SET_V4HI,
16638 IX86_BUILTIN_VEC_SET_V16QI,
16639
16640 IX86_BUILTIN_MAX
16641 };
16642
16643 /* Table for the ix86 builtin decls. */
16644 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16645
16646 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16647 * if the target_flags include one of MASK. Stores the function decl
16648 * in the ix86_builtins array.
16649 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16650
16651 static inline tree
16652 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16653 {
16654 tree decl = NULL_TREE;
16655
16656 if (mask & ix86_isa_flags
16657 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16658 {
16659 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16660 NULL, NULL_TREE);
16661 ix86_builtins[(int) code] = decl;
16662 }
16663
16664 return decl;
16665 }
16666
16667 /* Like def_builtin, but also marks the function decl "const". */
16668
16669 static inline tree
16670 def_builtin_const (int mask, const char *name, tree type,
16671 enum ix86_builtins code)
16672 {
16673 tree decl = def_builtin (mask, name, type, code);
16674 if (decl)
16675 TREE_READONLY (decl) = 1;
16676 return decl;
16677 }
16678
16679 /* Bits for builtin_description.flag. */
16680
16681 /* Set when we don't support the comparison natively, and should
16682 swap_comparison in order to support it. */
16683 #define BUILTIN_DESC_SWAP_OPERANDS 1
16684
16685 struct builtin_description
16686 {
16687 const unsigned int mask;
16688 const enum insn_code icode;
16689 const char *const name;
16690 const enum ix86_builtins code;
16691 const enum rtx_code comparison;
16692 const unsigned int flag;
16693 };
16694
16695 static const struct builtin_description bdesc_comi[] =
16696 {
16697 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16698 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16699 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16700 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16701 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16702 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16703 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16704 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16705 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16706 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16707 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16708 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16709 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16710 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16711 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16712 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16713 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16714 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16715 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16716 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16717 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16718 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16719 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16720 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16721 };
16722
16723 static const struct builtin_description bdesc_ptest[] =
16724 {
16725 /* SSE4.1 */
16726 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16727 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16728 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16729 };
16730
16731 /* SSE builtins with 3 arguments and the last argument must be a 8 bit
16732 constant or xmm0. */
16733 static const struct builtin_description bdesc_sse_3arg[] =
16734 {
16735 /* SSE4.1 */
16736 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, 0, 0 },
16737 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, 0, 0 },
16738 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, 0, 0 },
16739 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, 0, 0 },
16740 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, 0, 0 },
16741 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, 0, 0 },
16742 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, 0, 0 },
16743 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, 0, 0 },
16744 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, 0, 0 },
16745 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, 0, 0 },
16746 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, 0, 0 },
16747 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, 0, 0 },
16748 };
16749
16750 static const struct builtin_description bdesc_2arg[] =
16751 {
16752 /* SSE */
16753 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16754 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16755 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16756 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16757 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16758 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16759 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16760 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16761
16762 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16763 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16764 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16765 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
16766 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
16767 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16768 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16769 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16770 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16771 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
16772 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
16773 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16774 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16775 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16776 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16777 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16778 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16779 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16780 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16781 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
16782 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
16783 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16784
16785 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16786 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16787 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16788 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16789
16790 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16791 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16792 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16793 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16794
16795 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16796 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16797 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16798 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16799 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16800
16801 /* MMX */
16802 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16803 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16804 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16805 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16806 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16807 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16808 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16809 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16810
16811 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16812 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16813 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16814 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16815 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16816 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16817 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16818 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16819
16820 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16821 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16822 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16823
16824 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16825 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16826 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16827 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16828
16829 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16830 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16831
16832 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16833 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16834 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16835 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16836 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16837 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16838
16839 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16840 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16841 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16842 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16843
16844 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16845 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16846 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16847 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16848 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16849 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16850
16851 /* Special. */
16852 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16853 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16854 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16855
16856 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16857 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16858 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16859
16860 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16861 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16862 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16863 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16864 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16865 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16866
16867 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16868 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16869 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16870 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16871 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16872 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16873
16874 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16875 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16876 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16877 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16878
16879 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16880 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16881
16882 /* SSE2 */
16883 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16884 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16885 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16886 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16887 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16888 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16889 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16890 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16891
16892 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16893 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16894 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16895 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
16896 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
16897 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16898 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16899 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16900 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16901 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
16902 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
16903 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16904 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16905 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16906 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16907 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16908 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16909 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16910 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16911 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16912
16913 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16914 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16915 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16916 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16917
16918 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16919 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16920 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16921 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16922
16923 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16924 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16925 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16926
16927 /* SSE2 MMX */
16928 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16929 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16930 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16931 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16932 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16933 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16934 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16935 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16936
16937 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16938 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16939 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16940 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16941 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16942 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16943 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16944 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16945
16946 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16947 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16948
16949 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16950 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16951 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16952 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16953
16954 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16955 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16956
16957 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16958 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16959 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16960 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16961 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16962 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16963
16964 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16965 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16966 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16967 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16968
16969 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16970 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16971 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16972 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16973 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16974 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16975 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16976 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16977
16978 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16979 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16980 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16981
16982 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16983 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16984
16985 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16986 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16987
16988 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16989 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16990 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16991
16992 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16993 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16994 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16995
16996 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16997 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16998
16999 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
17000
17001 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
17002 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
17003 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
17004 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
17005
17006 /* SSE3 MMX */
17007 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
17008 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
17009 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
17010 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
17011 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
17012 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
17013
17014 /* SSSE3 */
17015 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
17016 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
17017 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
17018 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
17019 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
17020 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
17021 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
17022 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
17023 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
17024 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
17025 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
17026 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
17027 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
17028 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
17029 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
17030 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
17031 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
17032 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
17033 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
17034 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
17035 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
17036 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
17037 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
17038 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
17039
17040 /* SSE4.1 */
17041 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, 0, 0 },
17042 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, 0, 0 },
17043 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, 0, 0 },
17044 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, 0, 0 },
17045 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, 0, 0 },
17046 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, 0, 0 },
17047 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, 0, 0 },
17048 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, 0, 0 },
17049 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, 0, 0 },
17050 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 },
17051 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 },
17052 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 },
17053 };
17054
17055 static const struct builtin_description bdesc_1arg[] =
17056 {
17057 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
17058 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
17059
17060 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
17061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
17062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
17063
17064 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
17065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
17066 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
17067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
17068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
17069 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
17070
17071 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
17072 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
17073
17074 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
17075
17076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
17077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
17078
17079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
17080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
17081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
17082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
17083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
17084
17085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
17086
17087 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
17088 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
17089 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
17090 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
17091
17092 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
17093 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
17094 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
17095
17096 /* SSE3 */
17097 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
17098 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
17099
17100 /* SSSE3 */
17101 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
17102 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
17103 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
17104 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
17105 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
17106 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
17107
17108 /* SSE4.1 */
17109 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, 0, 0 },
17110 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, 0, 0 },
17111 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, 0, 0 },
17112 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, 0, 0 },
17113 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, 0, 0 },
17114 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, 0, 0 },
17115 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, 0, 0 },
17116 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, 0, 0 },
17117 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, 0, 0 },
17118 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, 0, 0 },
17119 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, 0, 0 },
17120 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, 0, 0 },
17121 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, 0, 0 },
17122
17123 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17124 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, 0, 0 },
17125 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, 0, 0 },
17126 };
17127
17128 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17129 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17130 builtins. */
17131 static void
17132 ix86_init_mmx_sse_builtins (void)
17133 {
17134 const struct builtin_description * d;
17135 size_t i;
17136
17137 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17138 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17139 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17140 tree V2DI_type_node
17141 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17142 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17143 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17144 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17145 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17146 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17147 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17148
17149 tree pchar_type_node = build_pointer_type (char_type_node);
17150 tree pcchar_type_node = build_pointer_type (
17151 build_type_variant (char_type_node, 1, 0));
17152 tree pfloat_type_node = build_pointer_type (float_type_node);
17153 tree pcfloat_type_node = build_pointer_type (
17154 build_type_variant (float_type_node, 1, 0));
17155 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17156 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17157 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17158
17159 /* Comparisons. */
17160 tree int_ftype_v4sf_v4sf
17161 = build_function_type_list (integer_type_node,
17162 V4SF_type_node, V4SF_type_node, NULL_TREE);
17163 tree v4si_ftype_v4sf_v4sf
17164 = build_function_type_list (V4SI_type_node,
17165 V4SF_type_node, V4SF_type_node, NULL_TREE);
17166 /* MMX/SSE/integer conversions. */
17167 tree int_ftype_v4sf
17168 = build_function_type_list (integer_type_node,
17169 V4SF_type_node, NULL_TREE);
17170 tree int64_ftype_v4sf
17171 = build_function_type_list (long_long_integer_type_node,
17172 V4SF_type_node, NULL_TREE);
17173 tree int_ftype_v8qi
17174 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17175 tree v4sf_ftype_v4sf_int
17176 = build_function_type_list (V4SF_type_node,
17177 V4SF_type_node, integer_type_node, NULL_TREE);
17178 tree v4sf_ftype_v4sf_int64
17179 = build_function_type_list (V4SF_type_node,
17180 V4SF_type_node, long_long_integer_type_node,
17181 NULL_TREE);
17182 tree v4sf_ftype_v4sf_v2si
17183 = build_function_type_list (V4SF_type_node,
17184 V4SF_type_node, V2SI_type_node, NULL_TREE);
17185
17186 /* Miscellaneous. */
17187 tree v8qi_ftype_v4hi_v4hi
17188 = build_function_type_list (V8QI_type_node,
17189 V4HI_type_node, V4HI_type_node, NULL_TREE);
17190 tree v4hi_ftype_v2si_v2si
17191 = build_function_type_list (V4HI_type_node,
17192 V2SI_type_node, V2SI_type_node, NULL_TREE);
17193 tree v4sf_ftype_v4sf_v4sf_int
17194 = build_function_type_list (V4SF_type_node,
17195 V4SF_type_node, V4SF_type_node,
17196 integer_type_node, NULL_TREE);
17197 tree v2si_ftype_v4hi_v4hi
17198 = build_function_type_list (V2SI_type_node,
17199 V4HI_type_node, V4HI_type_node, NULL_TREE);
17200 tree v4hi_ftype_v4hi_int
17201 = build_function_type_list (V4HI_type_node,
17202 V4HI_type_node, integer_type_node, NULL_TREE);
17203 tree v4hi_ftype_v4hi_di
17204 = build_function_type_list (V4HI_type_node,
17205 V4HI_type_node, long_long_unsigned_type_node,
17206 NULL_TREE);
17207 tree v2si_ftype_v2si_di
17208 = build_function_type_list (V2SI_type_node,
17209 V2SI_type_node, long_long_unsigned_type_node,
17210 NULL_TREE);
17211 tree void_ftype_void
17212 = build_function_type (void_type_node, void_list_node);
17213 tree void_ftype_unsigned
17214 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17215 tree void_ftype_unsigned_unsigned
17216 = build_function_type_list (void_type_node, unsigned_type_node,
17217 unsigned_type_node, NULL_TREE);
17218 tree void_ftype_pcvoid_unsigned_unsigned
17219 = build_function_type_list (void_type_node, const_ptr_type_node,
17220 unsigned_type_node, unsigned_type_node,
17221 NULL_TREE);
17222 tree unsigned_ftype_void
17223 = build_function_type (unsigned_type_node, void_list_node);
17224 tree v2si_ftype_v4sf
17225 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17226 /* Loads/stores. */
17227 tree void_ftype_v8qi_v8qi_pchar
17228 = build_function_type_list (void_type_node,
17229 V8QI_type_node, V8QI_type_node,
17230 pchar_type_node, NULL_TREE);
17231 tree v4sf_ftype_pcfloat
17232 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17233 /* @@@ the type is bogus */
17234 tree v4sf_ftype_v4sf_pv2si
17235 = build_function_type_list (V4SF_type_node,
17236 V4SF_type_node, pv2si_type_node, NULL_TREE);
17237 tree void_ftype_pv2si_v4sf
17238 = build_function_type_list (void_type_node,
17239 pv2si_type_node, V4SF_type_node, NULL_TREE);
17240 tree void_ftype_pfloat_v4sf
17241 = build_function_type_list (void_type_node,
17242 pfloat_type_node, V4SF_type_node, NULL_TREE);
17243 tree void_ftype_pdi_di
17244 = build_function_type_list (void_type_node,
17245 pdi_type_node, long_long_unsigned_type_node,
17246 NULL_TREE);
17247 tree void_ftype_pv2di_v2di
17248 = build_function_type_list (void_type_node,
17249 pv2di_type_node, V2DI_type_node, NULL_TREE);
17250 /* Normal vector unops. */
17251 tree v4sf_ftype_v4sf
17252 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17253 tree v16qi_ftype_v16qi
17254 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17255 tree v8hi_ftype_v8hi
17256 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17257 tree v4si_ftype_v4si
17258 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17259 tree v8qi_ftype_v8qi
17260 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17261 tree v4hi_ftype_v4hi
17262 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17263
17264 /* Normal vector binops. */
17265 tree v4sf_ftype_v4sf_v4sf
17266 = build_function_type_list (V4SF_type_node,
17267 V4SF_type_node, V4SF_type_node, NULL_TREE);
17268 tree v8qi_ftype_v8qi_v8qi
17269 = build_function_type_list (V8QI_type_node,
17270 V8QI_type_node, V8QI_type_node, NULL_TREE);
17271 tree v4hi_ftype_v4hi_v4hi
17272 = build_function_type_list (V4HI_type_node,
17273 V4HI_type_node, V4HI_type_node, NULL_TREE);
17274 tree v2si_ftype_v2si_v2si
17275 = build_function_type_list (V2SI_type_node,
17276 V2SI_type_node, V2SI_type_node, NULL_TREE);
17277 tree di_ftype_di_di
17278 = build_function_type_list (long_long_unsigned_type_node,
17279 long_long_unsigned_type_node,
17280 long_long_unsigned_type_node, NULL_TREE);
17281
17282 tree di_ftype_di_di_int
17283 = build_function_type_list (long_long_unsigned_type_node,
17284 long_long_unsigned_type_node,
17285 long_long_unsigned_type_node,
17286 integer_type_node, NULL_TREE);
17287
17288 tree v2si_ftype_v2sf
17289 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17290 tree v2sf_ftype_v2si
17291 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17292 tree v2si_ftype_v2si
17293 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17294 tree v2sf_ftype_v2sf
17295 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17296 tree v2sf_ftype_v2sf_v2sf
17297 = build_function_type_list (V2SF_type_node,
17298 V2SF_type_node, V2SF_type_node, NULL_TREE);
17299 tree v2si_ftype_v2sf_v2sf
17300 = build_function_type_list (V2SI_type_node,
17301 V2SF_type_node, V2SF_type_node, NULL_TREE);
17302 tree pint_type_node = build_pointer_type (integer_type_node);
17303 tree pdouble_type_node = build_pointer_type (double_type_node);
17304 tree pcdouble_type_node = build_pointer_type (
17305 build_type_variant (double_type_node, 1, 0));
17306 tree int_ftype_v2df_v2df
17307 = build_function_type_list (integer_type_node,
17308 V2DF_type_node, V2DF_type_node, NULL_TREE);
17309
17310 tree void_ftype_pcvoid
17311 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17312 tree v4sf_ftype_v4si
17313 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17314 tree v4si_ftype_v4sf
17315 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17316 tree v2df_ftype_v4si
17317 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17318 tree v4si_ftype_v2df
17319 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17320 tree v2si_ftype_v2df
17321 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17322 tree v4sf_ftype_v2df
17323 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17324 tree v2df_ftype_v2si
17325 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17326 tree v2df_ftype_v4sf
17327 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17328 tree int_ftype_v2df
17329 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17330 tree int64_ftype_v2df
17331 = build_function_type_list (long_long_integer_type_node,
17332 V2DF_type_node, NULL_TREE);
17333 tree v2df_ftype_v2df_int
17334 = build_function_type_list (V2DF_type_node,
17335 V2DF_type_node, integer_type_node, NULL_TREE);
17336 tree v2df_ftype_v2df_int64
17337 = build_function_type_list (V2DF_type_node,
17338 V2DF_type_node, long_long_integer_type_node,
17339 NULL_TREE);
17340 tree v4sf_ftype_v4sf_v2df
17341 = build_function_type_list (V4SF_type_node,
17342 V4SF_type_node, V2DF_type_node, NULL_TREE);
17343 tree v2df_ftype_v2df_v4sf
17344 = build_function_type_list (V2DF_type_node,
17345 V2DF_type_node, V4SF_type_node, NULL_TREE);
17346 tree v2df_ftype_v2df_v2df_int
17347 = build_function_type_list (V2DF_type_node,
17348 V2DF_type_node, V2DF_type_node,
17349 integer_type_node,
17350 NULL_TREE);
17351 tree v2df_ftype_v2df_pcdouble
17352 = build_function_type_list (V2DF_type_node,
17353 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17354 tree void_ftype_pdouble_v2df
17355 = build_function_type_list (void_type_node,
17356 pdouble_type_node, V2DF_type_node, NULL_TREE);
17357 tree void_ftype_pint_int
17358 = build_function_type_list (void_type_node,
17359 pint_type_node, integer_type_node, NULL_TREE);
17360 tree void_ftype_v16qi_v16qi_pchar
17361 = build_function_type_list (void_type_node,
17362 V16QI_type_node, V16QI_type_node,
17363 pchar_type_node, NULL_TREE);
17364 tree v2df_ftype_pcdouble
17365 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17366 tree v2df_ftype_v2df_v2df
17367 = build_function_type_list (V2DF_type_node,
17368 V2DF_type_node, V2DF_type_node, NULL_TREE);
17369 tree v16qi_ftype_v16qi_v16qi
17370 = build_function_type_list (V16QI_type_node,
17371 V16QI_type_node, V16QI_type_node, NULL_TREE);
17372 tree v8hi_ftype_v8hi_v8hi
17373 = build_function_type_list (V8HI_type_node,
17374 V8HI_type_node, V8HI_type_node, NULL_TREE);
17375 tree v4si_ftype_v4si_v4si
17376 = build_function_type_list (V4SI_type_node,
17377 V4SI_type_node, V4SI_type_node, NULL_TREE);
17378 tree v2di_ftype_v2di_v2di
17379 = build_function_type_list (V2DI_type_node,
17380 V2DI_type_node, V2DI_type_node, NULL_TREE);
17381 tree v2di_ftype_v2df_v2df
17382 = build_function_type_list (V2DI_type_node,
17383 V2DF_type_node, V2DF_type_node, NULL_TREE);
17384 tree v2df_ftype_v2df
17385 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17386 tree v2di_ftype_v2di_int
17387 = build_function_type_list (V2DI_type_node,
17388 V2DI_type_node, integer_type_node, NULL_TREE);
17389 tree v2di_ftype_v2di_v2di_int
17390 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17391 V2DI_type_node, integer_type_node, NULL_TREE);
17392 tree v4si_ftype_v4si_int
17393 = build_function_type_list (V4SI_type_node,
17394 V4SI_type_node, integer_type_node, NULL_TREE);
17395 tree v8hi_ftype_v8hi_int
17396 = build_function_type_list (V8HI_type_node,
17397 V8HI_type_node, integer_type_node, NULL_TREE);
17398 tree v4si_ftype_v8hi_v8hi
17399 = build_function_type_list (V4SI_type_node,
17400 V8HI_type_node, V8HI_type_node, NULL_TREE);
17401 tree di_ftype_v8qi_v8qi
17402 = build_function_type_list (long_long_unsigned_type_node,
17403 V8QI_type_node, V8QI_type_node, NULL_TREE);
17404 tree di_ftype_v2si_v2si
17405 = build_function_type_list (long_long_unsigned_type_node,
17406 V2SI_type_node, V2SI_type_node, NULL_TREE);
17407 tree v2di_ftype_v16qi_v16qi
17408 = build_function_type_list (V2DI_type_node,
17409 V16QI_type_node, V16QI_type_node, NULL_TREE);
17410 tree v2di_ftype_v4si_v4si
17411 = build_function_type_list (V2DI_type_node,
17412 V4SI_type_node, V4SI_type_node, NULL_TREE);
17413 tree int_ftype_v16qi
17414 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17415 tree v16qi_ftype_pcchar
17416 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17417 tree void_ftype_pchar_v16qi
17418 = build_function_type_list (void_type_node,
17419 pchar_type_node, V16QI_type_node, NULL_TREE);
17420
17421 tree v2di_ftype_v2di_unsigned_unsigned
17422 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17423 unsigned_type_node, unsigned_type_node,
17424 NULL_TREE);
17425 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17426 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17427 unsigned_type_node, unsigned_type_node,
17428 NULL_TREE);
17429 tree v2di_ftype_v2di_v16qi
17430 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17431 NULL_TREE);
17432 tree v2df_ftype_v2df_v2df_v2df
17433 = build_function_type_list (V2DF_type_node,
17434 V2DF_type_node, V2DF_type_node,
17435 V2DF_type_node, NULL_TREE);
17436 tree v4sf_ftype_v4sf_v4sf_v4sf
17437 = build_function_type_list (V4SF_type_node,
17438 V4SF_type_node, V4SF_type_node,
17439 V4SF_type_node, NULL_TREE);
17440 tree v8hi_ftype_v16qi
17441 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17442 NULL_TREE);
17443 tree v4si_ftype_v16qi
17444 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17445 NULL_TREE);
17446 tree v2di_ftype_v16qi
17447 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17448 NULL_TREE);
17449 tree v4si_ftype_v8hi
17450 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17451 NULL_TREE);
17452 tree v2di_ftype_v8hi
17453 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17454 NULL_TREE);
17455 tree v2di_ftype_v4si
17456 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17457 NULL_TREE);
17458 tree v2di_ftype_pv2di
17459 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17460 NULL_TREE);
17461 tree v16qi_ftype_v16qi_v16qi_int
17462 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17463 V16QI_type_node, integer_type_node,
17464 NULL_TREE);
17465 tree v16qi_ftype_v16qi_v16qi_v16qi
17466 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17467 V16QI_type_node, V16QI_type_node,
17468 NULL_TREE);
17469 tree v8hi_ftype_v8hi_v8hi_int
17470 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17471 V8HI_type_node, integer_type_node,
17472 NULL_TREE);
17473 tree v4si_ftype_v4si_v4si_int
17474 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17475 V4SI_type_node, integer_type_node,
17476 NULL_TREE);
17477 tree int_ftype_v2di_v2di
17478 = build_function_type_list (integer_type_node,
17479 V2DI_type_node, V2DI_type_node,
17480 NULL_TREE);
17481
17482 tree float80_type;
17483 tree float128_type;
17484 tree ftype;
17485
17486 /* The __float80 type. */
17487 if (TYPE_MODE (long_double_type_node) == XFmode)
17488 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17489 "__float80");
17490 else
17491 {
17492 /* The __float80 type. */
17493 float80_type = make_node (REAL_TYPE);
17494 TYPE_PRECISION (float80_type) = 80;
17495 layout_type (float80_type);
17496 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17497 }
17498
17499 if (TARGET_64BIT)
17500 {
17501 float128_type = make_node (REAL_TYPE);
17502 TYPE_PRECISION (float128_type) = 128;
17503 layout_type (float128_type);
17504 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17505 }
17506
17507 /* Add all SSE builtins that are more or less simple operations on
17508 three operands. */
17509 for (i = 0, d = bdesc_sse_3arg;
17510 i < ARRAY_SIZE (bdesc_sse_3arg);
17511 i++, d++)
17512 {
17513 /* Use one of the operands; the target can have a different mode for
17514 mask-generating compares. */
17515 enum machine_mode mode;
17516 tree type;
17517
17518 if (d->name == 0)
17519 continue;
17520 mode = insn_data[d->icode].operand[1].mode;
17521
17522 switch (mode)
17523 {
17524 case V16QImode:
17525 type = v16qi_ftype_v16qi_v16qi_int;
17526 break;
17527 case V8HImode:
17528 type = v8hi_ftype_v8hi_v8hi_int;
17529 break;
17530 case V4SImode:
17531 type = v4si_ftype_v4si_v4si_int;
17532 break;
17533 case V2DImode:
17534 type = v2di_ftype_v2di_v2di_int;
17535 break;
17536 case V2DFmode:
17537 type = v2df_ftype_v2df_v2df_int;
17538 break;
17539 case V4SFmode:
17540 type = v4sf_ftype_v4sf_v4sf_int;
17541 break;
17542 default:
17543 gcc_unreachable ();
17544 }
17545
17546 /* Override for variable blends. */
17547 switch (d->icode)
17548 {
17549 case CODE_FOR_sse4_1_blendvpd:
17550 type = v2df_ftype_v2df_v2df_v2df;
17551 break;
17552 case CODE_FOR_sse4_1_blendvps:
17553 type = v4sf_ftype_v4sf_v4sf_v4sf;
17554 break;
17555 case CODE_FOR_sse4_1_pblendvb:
17556 type = v16qi_ftype_v16qi_v16qi_v16qi;
17557 break;
17558 default:
17559 break;
17560 }
17561
17562 def_builtin (d->mask, d->name, type, d->code);
17563 }
17564
17565 /* Add all builtins that are more or less simple operations on two
17566 operands. */
17567 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17568 {
17569 /* Use one of the operands; the target can have a different mode for
17570 mask-generating compares. */
17571 enum machine_mode mode;
17572 tree type;
17573
17574 if (d->name == 0)
17575 continue;
17576 mode = insn_data[d->icode].operand[1].mode;
17577
17578 switch (mode)
17579 {
17580 case V16QImode:
17581 type = v16qi_ftype_v16qi_v16qi;
17582 break;
17583 case V8HImode:
17584 type = v8hi_ftype_v8hi_v8hi;
17585 break;
17586 case V4SImode:
17587 type = v4si_ftype_v4si_v4si;
17588 break;
17589 case V2DImode:
17590 type = v2di_ftype_v2di_v2di;
17591 break;
17592 case V2DFmode:
17593 type = v2df_ftype_v2df_v2df;
17594 break;
17595 case V4SFmode:
17596 type = v4sf_ftype_v4sf_v4sf;
17597 break;
17598 case V8QImode:
17599 type = v8qi_ftype_v8qi_v8qi;
17600 break;
17601 case V4HImode:
17602 type = v4hi_ftype_v4hi_v4hi;
17603 break;
17604 case V2SImode:
17605 type = v2si_ftype_v2si_v2si;
17606 break;
17607 case DImode:
17608 type = di_ftype_di_di;
17609 break;
17610
17611 default:
17612 gcc_unreachable ();
17613 }
17614
17615 /* Override for comparisons. */
17616 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17617 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17618 type = v4si_ftype_v4sf_v4sf;
17619
17620 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17621 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17622 type = v2di_ftype_v2df_v2df;
17623
17624 def_builtin (d->mask, d->name, type, d->code);
17625 }
17626
17627 /* Add all builtins that are more or less simple operations on 1 operand. */
17628 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17629 {
17630 enum machine_mode mode;
17631 tree type;
17632
17633 if (d->name == 0)
17634 continue;
17635 mode = insn_data[d->icode].operand[1].mode;
17636
17637 switch (mode)
17638 {
17639 case V16QImode:
17640 type = v16qi_ftype_v16qi;
17641 break;
17642 case V8HImode:
17643 type = v8hi_ftype_v8hi;
17644 break;
17645 case V4SImode:
17646 type = v4si_ftype_v4si;
17647 break;
17648 case V2DFmode:
17649 type = v2df_ftype_v2df;
17650 break;
17651 case V4SFmode:
17652 type = v4sf_ftype_v4sf;
17653 break;
17654 case V8QImode:
17655 type = v8qi_ftype_v8qi;
17656 break;
17657 case V4HImode:
17658 type = v4hi_ftype_v4hi;
17659 break;
17660 case V2SImode:
17661 type = v2si_ftype_v2si;
17662 break;
17663
17664 default:
17665 abort ();
17666 }
17667
17668 def_builtin (d->mask, d->name, type, d->code);
17669 }
17670
17671 /* Add the remaining MMX insns with somewhat more complicated types. */
17672 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17673 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17674 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17675 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17676
17677 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17678 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17679 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17680
17681 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17682 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17683
17684 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17685 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17686
17687 /* comi/ucomi insns. */
17688 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17689 if (d->mask == OPTION_MASK_ISA_SSE2)
17690 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17691 else
17692 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17693
17694 /* ptest insns. */
17695 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
17696 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
17697
17698 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17699 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17700 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17701
17702 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17703 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17704 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17705 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17706 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17707 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17708 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17709 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17710 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17711 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17712 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17713
17714 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17715
17716 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17717 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17718
17719 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17720 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17721 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17722 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17723
17724 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17725 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17726 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17727 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17728
17729 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17730
17731 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17732
17733 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17734 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17735 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17736 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17737 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17738 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17739
17740 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17741
17742 /* Original 3DNow! */
17743 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17744 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17745 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17746 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17747 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17748 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17749 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17750 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17751 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17752 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17753 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17754 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17755 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17756 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17757 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17758 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17759 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17760 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17761 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17762 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17763
17764 /* 3DNow! extension as used in the Athlon CPU. */
17765 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17766 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17767 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17768 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17769 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17770 def_builtin (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17771
17772 /* SSE2 */
17773 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17774
17775 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17776 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17777
17778 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17779 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17780
17781 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17782 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17783 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17784 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17785 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17786
17787 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17788 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17789 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17790 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17791
17792 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17793 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17794
17795 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17796
17797 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17798 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17799
17800 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17801 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17802 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17803 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17804 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17805
17806 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17807
17808 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17809 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17810 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17811 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17812
17813 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17814 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17815 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17816
17817 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17818 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17819 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17820 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17821
17822 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17823 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17824 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17825
17826 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17827 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17828
17829 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17830 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17831
17832 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17833 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17834 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17835 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17836 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17837 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17838 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17839
17840 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17841 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17842 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17843 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17844 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17845 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17846 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17847
17848 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17849 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17850 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17851 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17852
17853 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17854
17855 /* Prescott New Instructions. */
17856 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
17857 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
17858 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17859
17860 /* SSSE3. */
17861 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17862 def_builtin (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
17863
17864 /* SSE4.1. */
17865 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
17866 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
17867 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
17868 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
17869 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
17870 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
17871 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
17872 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
17873 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
17874 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
17875 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
17876 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
17877 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
17878 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
17879 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
17880 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
17881 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
17882 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
17883
17884 /* AMDFAM10 SSE4A New built-ins */
17885 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17886 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17887 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17888 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17889 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17890 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17891
17892 /* Access to the vec_init patterns. */
17893 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17894 integer_type_node, NULL_TREE);
17895 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17896
17897 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17898 short_integer_type_node,
17899 short_integer_type_node,
17900 short_integer_type_node, NULL_TREE);
17901 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17902
17903 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17904 char_type_node, char_type_node,
17905 char_type_node, char_type_node,
17906 char_type_node, char_type_node,
17907 char_type_node, NULL_TREE);
17908 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17909
17910 /* Access to the vec_extract patterns. */
17911 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17912 integer_type_node, NULL_TREE);
17913 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17914
17915 ftype = build_function_type_list (long_long_integer_type_node,
17916 V2DI_type_node, integer_type_node,
17917 NULL_TREE);
17918 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17919
17920 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17921 integer_type_node, NULL_TREE);
17922 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17923
17924 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17925 integer_type_node, NULL_TREE);
17926 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17927
17928 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17929 integer_type_node, NULL_TREE);
17930 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17931
17932 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17933 integer_type_node, NULL_TREE);
17934 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17935
17936 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17937 integer_type_node, NULL_TREE);
17938 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17939
17940 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17941 integer_type_node, NULL_TREE);
17942 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
17943
17944 /* Access to the vec_set patterns. */
17945 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17946 intDI_type_node,
17947 integer_type_node, NULL_TREE);
17948 def_builtin (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
17949
17950 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17951 float_type_node,
17952 integer_type_node, NULL_TREE);
17953 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
17954
17955 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17956 intSI_type_node,
17957 integer_type_node, NULL_TREE);
17958 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
17959
17960 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17961 intHI_type_node,
17962 integer_type_node, NULL_TREE);
17963 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
17964
17965 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17966 intHI_type_node,
17967 integer_type_node, NULL_TREE);
17968 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
17969
17970 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17971 intQI_type_node,
17972 integer_type_node, NULL_TREE);
17973 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
17974 }
17975
17976 static void
17977 ix86_init_builtins (void)
17978 {
17979 if (TARGET_MMX)
17980 ix86_init_mmx_sse_builtins ();
17981 }
17982
17983 /* Errors in the source file can cause expand_expr to return const0_rtx
17984 where we expect a vector. To avoid crashing, use one of the vector
17985 clear instructions. */
17986 static rtx
17987 safe_vector_operand (rtx x, enum machine_mode mode)
17988 {
17989 if (x == const0_rtx)
17990 x = CONST0_RTX (mode);
17991 return x;
17992 }
17993
17994 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
17995 4 operands. The third argument must be a constant smaller than 8
17996 bits or xmm0. */
17997
17998 static rtx
17999 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18000 rtx target)
18001 {
18002 rtx pat;
18003 tree arg0 = CALL_EXPR_ARG (exp, 0);
18004 tree arg1 = CALL_EXPR_ARG (exp, 1);
18005 tree arg2 = CALL_EXPR_ARG (exp, 2);
18006 rtx op0 = expand_normal (arg0);
18007 rtx op1 = expand_normal (arg1);
18008 rtx op2 = expand_normal (arg2);
18009 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18010 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18011 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18012 enum machine_mode mode2;
18013 rtx xmm0;
18014
18015 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18016 op0 = copy_to_mode_reg (mode0, op0);
18017 if ((optimize && !register_operand (op1, mode1))
18018 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18019 op1 = copy_to_mode_reg (mode1, op1);
18020
18021 switch (icode)
18022 {
18023 case CODE_FOR_sse4_1_blendvpd:
18024 case CODE_FOR_sse4_1_blendvps:
18025 case CODE_FOR_sse4_1_pblendvb:
18026 /* The third argument of variable blends must be xmm0. */
18027 xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG);
18028 emit_move_insn (xmm0, op2);
18029 op2 = xmm0;
18030 break;
18031 default:
18032 mode2 = insn_data[icode].operand[2].mode;
18033 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18034 {
18035 switch (icode)
18036 {
18037 case CODE_FOR_sse4_1_roundsd:
18038 case CODE_FOR_sse4_1_roundss:
18039 error ("the third argument must be a 4-bit immediate");
18040 break;
18041 default:
18042 error ("the third argument must be a 8-bit immediate");
18043 break;
18044 }
18045 return const0_rtx;
18046 }
18047 break;
18048 }
18049
18050 if (optimize
18051 || target == 0
18052 || GET_MODE (target) != tmode
18053 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18054 target = gen_reg_rtx (tmode);
18055 pat = GEN_FCN (icode) (target, op0, op1, op2);
18056 if (! pat)
18057 return 0;
18058 emit_insn (pat);
18059 return target;
18060 }
18061
18062 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18063
18064 static rtx
18065 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18066 {
18067 rtx pat, xops[3];
18068 tree arg0 = CALL_EXPR_ARG (exp, 0);
18069 tree arg1 = CALL_EXPR_ARG (exp, 1);
18070 rtx op0 = expand_normal (arg0);
18071 rtx op1 = expand_normal (arg1);
18072 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18073 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18074 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18075
18076 if (VECTOR_MODE_P (mode0))
18077 op0 = safe_vector_operand (op0, mode0);
18078 if (VECTOR_MODE_P (mode1))
18079 op1 = safe_vector_operand (op1, mode1);
18080
18081 if (optimize || !target
18082 || GET_MODE (target) != tmode
18083 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18084 target = gen_reg_rtx (tmode);
18085
18086 if (GET_MODE (op1) == SImode && mode1 == TImode)
18087 {
18088 rtx x = gen_reg_rtx (V4SImode);
18089 emit_insn (gen_sse2_loadd (x, op1));
18090 op1 = gen_lowpart (TImode, x);
18091 }
18092
18093 /* The insn must want input operands in the same modes as the
18094 result. */
18095 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
18096 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
18097
18098 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18099 op0 = copy_to_mode_reg (mode0, op0);
18100 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18101 op1 = copy_to_mode_reg (mode1, op1);
18102
18103 /* ??? Using ix86_fixup_binary_operands is problematic when
18104 we've got mismatched modes. Fake it. */
18105
18106 xops[0] = target;
18107 xops[1] = op0;
18108 xops[2] = op1;
18109
18110 if (tmode == mode0 && tmode == mode1)
18111 {
18112 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18113 op0 = xops[1];
18114 op1 = xops[2];
18115 }
18116 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18117 {
18118 op0 = force_reg (mode0, op0);
18119 op1 = force_reg (mode1, op1);
18120 target = gen_reg_rtx (tmode);
18121 }
18122
18123 pat = GEN_FCN (icode) (target, op0, op1);
18124 if (! pat)
18125 return 0;
18126 emit_insn (pat);
18127 return target;
18128 }
18129
18130 /* Subroutine of ix86_expand_builtin to take care of stores. */
18131
18132 static rtx
18133 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18134 {
18135 rtx pat;
18136 tree arg0 = CALL_EXPR_ARG (exp, 0);
18137 tree arg1 = CALL_EXPR_ARG (exp, 1);
18138 rtx op0 = expand_normal (arg0);
18139 rtx op1 = expand_normal (arg1);
18140 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18141 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18142
18143 if (VECTOR_MODE_P (mode1))
18144 op1 = safe_vector_operand (op1, mode1);
18145
18146 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18147 op1 = copy_to_mode_reg (mode1, op1);
18148
18149 pat = GEN_FCN (icode) (op0, op1);
18150 if (pat)
18151 emit_insn (pat);
18152 return 0;
18153 }
18154
18155 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18156
18157 static rtx
18158 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18159 rtx target, int do_load)
18160 {
18161 rtx pat;
18162 tree arg0 = CALL_EXPR_ARG (exp, 0);
18163 rtx op0 = expand_normal (arg0);
18164 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18165 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18166
18167 if (optimize || !target
18168 || GET_MODE (target) != tmode
18169 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18170 target = gen_reg_rtx (tmode);
18171 if (do_load)
18172 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18173 else
18174 {
18175 if (VECTOR_MODE_P (mode0))
18176 op0 = safe_vector_operand (op0, mode0);
18177
18178 if ((optimize && !register_operand (op0, mode0))
18179 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18180 op0 = copy_to_mode_reg (mode0, op0);
18181 }
18182
18183 switch (icode)
18184 {
18185 case CODE_FOR_sse4_1_roundpd:
18186 case CODE_FOR_sse4_1_roundps:
18187 {
18188 tree arg1 = CALL_EXPR_ARG (exp, 1);
18189 rtx op1 = expand_normal (arg1);
18190 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18191
18192 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18193 {
18194 error ("the second argument must be a 4-bit immediate");
18195 return const0_rtx;
18196 }
18197 pat = GEN_FCN (icode) (target, op0, op1);
18198 }
18199 break;
18200 default:
18201 pat = GEN_FCN (icode) (target, op0);
18202 break;
18203 }
18204
18205 if (! pat)
18206 return 0;
18207 emit_insn (pat);
18208 return target;
18209 }
18210
18211 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18212 sqrtss, rsqrtss, rcpss. */
18213
18214 static rtx
18215 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18216 {
18217 rtx pat;
18218 tree arg0 = CALL_EXPR_ARG (exp, 0);
18219 rtx op1, op0 = expand_normal (arg0);
18220 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18221 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18222
18223 if (optimize || !target
18224 || GET_MODE (target) != tmode
18225 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18226 target = gen_reg_rtx (tmode);
18227
18228 if (VECTOR_MODE_P (mode0))
18229 op0 = safe_vector_operand (op0, mode0);
18230
18231 if ((optimize && !register_operand (op0, mode0))
18232 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18233 op0 = copy_to_mode_reg (mode0, op0);
18234
18235 op1 = op0;
18236 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18237 op1 = copy_to_mode_reg (mode0, op1);
18238
18239 pat = GEN_FCN (icode) (target, op0, op1);
18240 if (! pat)
18241 return 0;
18242 emit_insn (pat);
18243 return target;
18244 }
18245
18246 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18247
18248 static rtx
18249 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18250 rtx target)
18251 {
18252 rtx pat;
18253 tree arg0 = CALL_EXPR_ARG (exp, 0);
18254 tree arg1 = CALL_EXPR_ARG (exp, 1);
18255 rtx op0 = expand_normal (arg0);
18256 rtx op1 = expand_normal (arg1);
18257 rtx op2;
18258 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18259 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18260 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18261 enum rtx_code comparison = d->comparison;
18262
18263 if (VECTOR_MODE_P (mode0))
18264 op0 = safe_vector_operand (op0, mode0);
18265 if (VECTOR_MODE_P (mode1))
18266 op1 = safe_vector_operand (op1, mode1);
18267
18268 /* Swap operands if we have a comparison that isn't available in
18269 hardware. */
18270 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18271 {
18272 rtx tmp = gen_reg_rtx (mode1);
18273 emit_move_insn (tmp, op1);
18274 op1 = op0;
18275 op0 = tmp;
18276 }
18277
18278 if (optimize || !target
18279 || GET_MODE (target) != tmode
18280 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18281 target = gen_reg_rtx (tmode);
18282
18283 if ((optimize && !register_operand (op0, mode0))
18284 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18285 op0 = copy_to_mode_reg (mode0, op0);
18286 if ((optimize && !register_operand (op1, mode1))
18287 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18288 op1 = copy_to_mode_reg (mode1, op1);
18289
18290 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18291 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18292 if (! pat)
18293 return 0;
18294 emit_insn (pat);
18295 return target;
18296 }
18297
18298 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18299
18300 static rtx
18301 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18302 rtx target)
18303 {
18304 rtx pat;
18305 tree arg0 = CALL_EXPR_ARG (exp, 0);
18306 tree arg1 = CALL_EXPR_ARG (exp, 1);
18307 rtx op0 = expand_normal (arg0);
18308 rtx op1 = expand_normal (arg1);
18309 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18310 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18311 enum rtx_code comparison = d->comparison;
18312
18313 if (VECTOR_MODE_P (mode0))
18314 op0 = safe_vector_operand (op0, mode0);
18315 if (VECTOR_MODE_P (mode1))
18316 op1 = safe_vector_operand (op1, mode1);
18317
18318 /* Swap operands if we have a comparison that isn't available in
18319 hardware. */
18320 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18321 {
18322 rtx tmp = op1;
18323 op1 = op0;
18324 op0 = tmp;
18325 }
18326
18327 target = gen_reg_rtx (SImode);
18328 emit_move_insn (target, const0_rtx);
18329 target = gen_rtx_SUBREG (QImode, target, 0);
18330
18331 if ((optimize && !register_operand (op0, mode0))
18332 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18333 op0 = copy_to_mode_reg (mode0, op0);
18334 if ((optimize && !register_operand (op1, mode1))
18335 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18336 op1 = copy_to_mode_reg (mode1, op1);
18337
18338 pat = GEN_FCN (d->icode) (op0, op1);
18339 if (! pat)
18340 return 0;
18341 emit_insn (pat);
18342 emit_insn (gen_rtx_SET (VOIDmode,
18343 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18344 gen_rtx_fmt_ee (comparison, QImode,
18345 SET_DEST (pat),
18346 const0_rtx)));
18347
18348 return SUBREG_REG (target);
18349 }
18350
18351 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18352
18353 static rtx
18354 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18355 rtx target)
18356 {
18357 rtx pat;
18358 tree arg0 = CALL_EXPR_ARG (exp, 0);
18359 tree arg1 = CALL_EXPR_ARG (exp, 1);
18360 rtx op0 = expand_normal (arg0);
18361 rtx op1 = expand_normal (arg1);
18362 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18363 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18364 enum rtx_code comparison = d->comparison;
18365
18366 if (VECTOR_MODE_P (mode0))
18367 op0 = safe_vector_operand (op0, mode0);
18368 if (VECTOR_MODE_P (mode1))
18369 op1 = safe_vector_operand (op1, mode1);
18370
18371 target = gen_reg_rtx (SImode);
18372 emit_move_insn (target, const0_rtx);
18373 target = gen_rtx_SUBREG (QImode, target, 0);
18374
18375 if ((optimize && !register_operand (op0, mode0))
18376 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18377 op0 = copy_to_mode_reg (mode0, op0);
18378 if ((optimize && !register_operand (op1, mode1))
18379 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18380 op1 = copy_to_mode_reg (mode1, op1);
18381
18382 pat = GEN_FCN (d->icode) (op0, op1);
18383 if (! pat)
18384 return 0;
18385 emit_insn (pat);
18386 emit_insn (gen_rtx_SET (VOIDmode,
18387 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18388 gen_rtx_fmt_ee (comparison, QImode,
18389 SET_DEST (pat),
18390 const0_rtx)));
18391
18392 return SUBREG_REG (target);
18393 }
18394
18395 /* Return the integer constant in ARG. Constrain it to be in the range
18396 of the subparts of VEC_TYPE; issue an error if not. */
18397
18398 static int
18399 get_element_number (tree vec_type, tree arg)
18400 {
18401 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18402
18403 if (!host_integerp (arg, 1)
18404 || (elt = tree_low_cst (arg, 1), elt > max))
18405 {
18406 error ("selector must be an integer constant in the range 0..%wi", max);
18407 return 0;
18408 }
18409
18410 return elt;
18411 }
18412
18413 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18414 ix86_expand_vector_init. We DO have language-level syntax for this, in
18415 the form of (type){ init-list }. Except that since we can't place emms
18416 instructions from inside the compiler, we can't allow the use of MMX
18417 registers unless the user explicitly asks for it. So we do *not* define
18418 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18419 we have builtins invoked by mmintrin.h that gives us license to emit
18420 these sorts of instructions. */
18421
18422 static rtx
18423 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18424 {
18425 enum machine_mode tmode = TYPE_MODE (type);
18426 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18427 int i, n_elt = GET_MODE_NUNITS (tmode);
18428 rtvec v = rtvec_alloc (n_elt);
18429
18430 gcc_assert (VECTOR_MODE_P (tmode));
18431 gcc_assert (call_expr_nargs (exp) == n_elt);
18432
18433 for (i = 0; i < n_elt; ++i)
18434 {
18435 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
18436 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
18437 }
18438
18439 if (!target || !register_operand (target, tmode))
18440 target = gen_reg_rtx (tmode);
18441
18442 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
18443 return target;
18444 }
18445
18446 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18447 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
18448 had a language-level syntax for referencing vector elements. */
18449
18450 static rtx
18451 ix86_expand_vec_ext_builtin (tree exp, rtx target)
18452 {
18453 enum machine_mode tmode, mode0;
18454 tree arg0, arg1;
18455 int elt;
18456 rtx op0;
18457
18458 arg0 = CALL_EXPR_ARG (exp, 0);
18459 arg1 = CALL_EXPR_ARG (exp, 1);
18460
18461 op0 = expand_normal (arg0);
18462 elt = get_element_number (TREE_TYPE (arg0), arg1);
18463
18464 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18465 mode0 = TYPE_MODE (TREE_TYPE (arg0));
18466 gcc_assert (VECTOR_MODE_P (mode0));
18467
18468 op0 = force_reg (mode0, op0);
18469
18470 if (optimize || !target || !register_operand (target, tmode))
18471 target = gen_reg_rtx (tmode);
18472
18473 ix86_expand_vector_extract (true, target, op0, elt);
18474
18475 return target;
18476 }
18477
18478 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18479 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
18480 a language-level syntax for referencing vector elements. */
18481
18482 static rtx
18483 ix86_expand_vec_set_builtin (tree exp)
18484 {
18485 enum machine_mode tmode, mode1;
18486 tree arg0, arg1, arg2;
18487 int elt;
18488 rtx op0, op1, target;
18489
18490 arg0 = CALL_EXPR_ARG (exp, 0);
18491 arg1 = CALL_EXPR_ARG (exp, 1);
18492 arg2 = CALL_EXPR_ARG (exp, 2);
18493
18494 tmode = TYPE_MODE (TREE_TYPE (arg0));
18495 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18496 gcc_assert (VECTOR_MODE_P (tmode));
18497
18498 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
18499 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
18500 elt = get_element_number (TREE_TYPE (arg0), arg2);
18501
18502 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
18503 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
18504
18505 op0 = force_reg (tmode, op0);
18506 op1 = force_reg (mode1, op1);
18507
18508 /* OP0 is the source of these builtin functions and shouldn't be
18509 modified. Create a copy, use it and return it as target. */
18510 target = gen_reg_rtx (tmode);
18511 emit_move_insn (target, op0);
18512 ix86_expand_vector_set (true, target, op1, elt);
18513
18514 return target;
18515 }
18516
18517 /* Expand an expression EXP that calls a built-in function,
18518 with result going to TARGET if that's convenient
18519 (and in mode MODE if that's convenient).
18520 SUBTARGET may be used as the target for computing one of EXP's operands.
18521 IGNORE is nonzero if the value is to be ignored. */
18522
18523 static rtx
18524 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
18525 enum machine_mode mode ATTRIBUTE_UNUSED,
18526 int ignore ATTRIBUTE_UNUSED)
18527 {
18528 const struct builtin_description *d;
18529 size_t i;
18530 enum insn_code icode;
18531 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18532 tree arg0, arg1, arg2, arg3;
18533 rtx op0, op1, op2, op3, pat;
18534 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
18535 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
18536
18537 switch (fcode)
18538 {
18539 case IX86_BUILTIN_EMMS:
18540 emit_insn (gen_mmx_emms ());
18541 return 0;
18542
18543 case IX86_BUILTIN_SFENCE:
18544 emit_insn (gen_sse_sfence ());
18545 return 0;
18546
18547 case IX86_BUILTIN_MASKMOVQ:
18548 case IX86_BUILTIN_MASKMOVDQU:
18549 icode = (fcode == IX86_BUILTIN_MASKMOVQ
18550 ? CODE_FOR_mmx_maskmovq
18551 : CODE_FOR_sse2_maskmovdqu);
18552 /* Note the arg order is different from the operand order. */
18553 arg1 = CALL_EXPR_ARG (exp, 0);
18554 arg2 = CALL_EXPR_ARG (exp, 1);
18555 arg0 = CALL_EXPR_ARG (exp, 2);
18556 op0 = expand_normal (arg0);
18557 op1 = expand_normal (arg1);
18558 op2 = expand_normal (arg2);
18559 mode0 = insn_data[icode].operand[0].mode;
18560 mode1 = insn_data[icode].operand[1].mode;
18561 mode2 = insn_data[icode].operand[2].mode;
18562
18563 op0 = force_reg (Pmode, op0);
18564 op0 = gen_rtx_MEM (mode1, op0);
18565
18566 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
18567 op0 = copy_to_mode_reg (mode0, op0);
18568 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
18569 op1 = copy_to_mode_reg (mode1, op1);
18570 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
18571 op2 = copy_to_mode_reg (mode2, op2);
18572 pat = GEN_FCN (icode) (op0, op1, op2);
18573 if (! pat)
18574 return 0;
18575 emit_insn (pat);
18576 return 0;
18577
18578 case IX86_BUILTIN_SQRTSS:
18579 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
18580 case IX86_BUILTIN_RSQRTSS:
18581 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
18582 case IX86_BUILTIN_RCPSS:
18583 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
18584
18585 case IX86_BUILTIN_LOADUPS:
18586 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18587
18588 case IX86_BUILTIN_STOREUPS:
18589 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18590
18591 case IX86_BUILTIN_LOADHPS:
18592 case IX86_BUILTIN_LOADLPS:
18593 case IX86_BUILTIN_LOADHPD:
18594 case IX86_BUILTIN_LOADLPD:
18595 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18596 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18597 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18598 : CODE_FOR_sse2_loadlpd);
18599 arg0 = CALL_EXPR_ARG (exp, 0);
18600 arg1 = CALL_EXPR_ARG (exp, 1);
18601 op0 = expand_normal (arg0);
18602 op1 = expand_normal (arg1);
18603 tmode = insn_data[icode].operand[0].mode;
18604 mode0 = insn_data[icode].operand[1].mode;
18605 mode1 = insn_data[icode].operand[2].mode;
18606
18607 op0 = force_reg (mode0, op0);
18608 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18609 if (optimize || target == 0
18610 || GET_MODE (target) != tmode
18611 || !register_operand (target, tmode))
18612 target = gen_reg_rtx (tmode);
18613 pat = GEN_FCN (icode) (target, op0, op1);
18614 if (! pat)
18615 return 0;
18616 emit_insn (pat);
18617 return target;
18618
18619 case IX86_BUILTIN_STOREHPS:
18620 case IX86_BUILTIN_STORELPS:
18621 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18622 : CODE_FOR_sse_storelps);
18623 arg0 = CALL_EXPR_ARG (exp, 0);
18624 arg1 = CALL_EXPR_ARG (exp, 1);
18625 op0 = expand_normal (arg0);
18626 op1 = expand_normal (arg1);
18627 mode0 = insn_data[icode].operand[0].mode;
18628 mode1 = insn_data[icode].operand[1].mode;
18629
18630 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18631 op1 = force_reg (mode1, op1);
18632
18633 pat = GEN_FCN (icode) (op0, op1);
18634 if (! pat)
18635 return 0;
18636 emit_insn (pat);
18637 return const0_rtx;
18638
18639 case IX86_BUILTIN_MOVNTPS:
18640 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18641 case IX86_BUILTIN_MOVNTQ:
18642 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18643
18644 case IX86_BUILTIN_LDMXCSR:
18645 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18646 target = assign_386_stack_local (SImode, SLOT_TEMP);
18647 emit_move_insn (target, op0);
18648 emit_insn (gen_sse_ldmxcsr (target));
18649 return 0;
18650
18651 case IX86_BUILTIN_STMXCSR:
18652 target = assign_386_stack_local (SImode, SLOT_TEMP);
18653 emit_insn (gen_sse_stmxcsr (target));
18654 return copy_to_mode_reg (SImode, target);
18655
18656 case IX86_BUILTIN_SHUFPS:
18657 case IX86_BUILTIN_SHUFPD:
18658 icode = (fcode == IX86_BUILTIN_SHUFPS
18659 ? CODE_FOR_sse_shufps
18660 : CODE_FOR_sse2_shufpd);
18661 arg0 = CALL_EXPR_ARG (exp, 0);
18662 arg1 = CALL_EXPR_ARG (exp, 1);
18663 arg2 = CALL_EXPR_ARG (exp, 2);
18664 op0 = expand_normal (arg0);
18665 op1 = expand_normal (arg1);
18666 op2 = expand_normal (arg2);
18667 tmode = insn_data[icode].operand[0].mode;
18668 mode0 = insn_data[icode].operand[1].mode;
18669 mode1 = insn_data[icode].operand[2].mode;
18670 mode2 = insn_data[icode].operand[3].mode;
18671
18672 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18673 op0 = copy_to_mode_reg (mode0, op0);
18674 if ((optimize && !register_operand (op1, mode1))
18675 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18676 op1 = copy_to_mode_reg (mode1, op1);
18677 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18678 {
18679 /* @@@ better error message */
18680 error ("mask must be an immediate");
18681 return gen_reg_rtx (tmode);
18682 }
18683 if (optimize || target == 0
18684 || GET_MODE (target) != tmode
18685 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18686 target = gen_reg_rtx (tmode);
18687 pat = GEN_FCN (icode) (target, op0, op1, op2);
18688 if (! pat)
18689 return 0;
18690 emit_insn (pat);
18691 return target;
18692
18693 case IX86_BUILTIN_PSHUFW:
18694 case IX86_BUILTIN_PSHUFD:
18695 case IX86_BUILTIN_PSHUFHW:
18696 case IX86_BUILTIN_PSHUFLW:
18697 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18698 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18699 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18700 : CODE_FOR_mmx_pshufw);
18701 arg0 = CALL_EXPR_ARG (exp, 0);
18702 arg1 = CALL_EXPR_ARG (exp, 1);
18703 op0 = expand_normal (arg0);
18704 op1 = expand_normal (arg1);
18705 tmode = insn_data[icode].operand[0].mode;
18706 mode1 = insn_data[icode].operand[1].mode;
18707 mode2 = insn_data[icode].operand[2].mode;
18708
18709 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18710 op0 = copy_to_mode_reg (mode1, op0);
18711 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18712 {
18713 /* @@@ better error message */
18714 error ("mask must be an immediate");
18715 return const0_rtx;
18716 }
18717 if (target == 0
18718 || GET_MODE (target) != tmode
18719 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18720 target = gen_reg_rtx (tmode);
18721 pat = GEN_FCN (icode) (target, op0, op1);
18722 if (! pat)
18723 return 0;
18724 emit_insn (pat);
18725 return target;
18726
18727 case IX86_BUILTIN_PSLLWI128:
18728 icode = CODE_FOR_ashlv8hi3;
18729 goto do_pshifti;
18730 case IX86_BUILTIN_PSLLDI128:
18731 icode = CODE_FOR_ashlv4si3;
18732 goto do_pshifti;
18733 case IX86_BUILTIN_PSLLQI128:
18734 icode = CODE_FOR_ashlv2di3;
18735 goto do_pshifti;
18736 case IX86_BUILTIN_PSRAWI128:
18737 icode = CODE_FOR_ashrv8hi3;
18738 goto do_pshifti;
18739 case IX86_BUILTIN_PSRADI128:
18740 icode = CODE_FOR_ashrv4si3;
18741 goto do_pshifti;
18742 case IX86_BUILTIN_PSRLWI128:
18743 icode = CODE_FOR_lshrv8hi3;
18744 goto do_pshifti;
18745 case IX86_BUILTIN_PSRLDI128:
18746 icode = CODE_FOR_lshrv4si3;
18747 goto do_pshifti;
18748 case IX86_BUILTIN_PSRLQI128:
18749 icode = CODE_FOR_lshrv2di3;
18750 goto do_pshifti;
18751 do_pshifti:
18752 arg0 = CALL_EXPR_ARG (exp, 0);
18753 arg1 = CALL_EXPR_ARG (exp, 1);
18754 op0 = expand_normal (arg0);
18755 op1 = expand_normal (arg1);
18756
18757 if (!CONST_INT_P (op1))
18758 {
18759 error ("shift must be an immediate");
18760 return const0_rtx;
18761 }
18762 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18763 op1 = GEN_INT (255);
18764
18765 tmode = insn_data[icode].operand[0].mode;
18766 mode1 = insn_data[icode].operand[1].mode;
18767 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18768 op0 = copy_to_reg (op0);
18769
18770 target = gen_reg_rtx (tmode);
18771 pat = GEN_FCN (icode) (target, op0, op1);
18772 if (!pat)
18773 return 0;
18774 emit_insn (pat);
18775 return target;
18776
18777 case IX86_BUILTIN_PSLLW128:
18778 icode = CODE_FOR_ashlv8hi3;
18779 goto do_pshift;
18780 case IX86_BUILTIN_PSLLD128:
18781 icode = CODE_FOR_ashlv4si3;
18782 goto do_pshift;
18783 case IX86_BUILTIN_PSLLQ128:
18784 icode = CODE_FOR_ashlv2di3;
18785 goto do_pshift;
18786 case IX86_BUILTIN_PSRAW128:
18787 icode = CODE_FOR_ashrv8hi3;
18788 goto do_pshift;
18789 case IX86_BUILTIN_PSRAD128:
18790 icode = CODE_FOR_ashrv4si3;
18791 goto do_pshift;
18792 case IX86_BUILTIN_PSRLW128:
18793 icode = CODE_FOR_lshrv8hi3;
18794 goto do_pshift;
18795 case IX86_BUILTIN_PSRLD128:
18796 icode = CODE_FOR_lshrv4si3;
18797 goto do_pshift;
18798 case IX86_BUILTIN_PSRLQ128:
18799 icode = CODE_FOR_lshrv2di3;
18800 goto do_pshift;
18801 do_pshift:
18802 arg0 = CALL_EXPR_ARG (exp, 0);
18803 arg1 = CALL_EXPR_ARG (exp, 1);
18804 op0 = expand_normal (arg0);
18805 op1 = expand_normal (arg1);
18806
18807 tmode = insn_data[icode].operand[0].mode;
18808 mode1 = insn_data[icode].operand[1].mode;
18809
18810 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18811 op0 = copy_to_reg (op0);
18812
18813 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18814 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18815 op1 = copy_to_reg (op1);
18816
18817 target = gen_reg_rtx (tmode);
18818 pat = GEN_FCN (icode) (target, op0, op1);
18819 if (!pat)
18820 return 0;
18821 emit_insn (pat);
18822 return target;
18823
18824 case IX86_BUILTIN_PSLLDQI128:
18825 case IX86_BUILTIN_PSRLDQI128:
18826 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18827 : CODE_FOR_sse2_lshrti3);
18828 arg0 = CALL_EXPR_ARG (exp, 0);
18829 arg1 = CALL_EXPR_ARG (exp, 1);
18830 op0 = expand_normal (arg0);
18831 op1 = expand_normal (arg1);
18832 tmode = insn_data[icode].operand[0].mode;
18833 mode1 = insn_data[icode].operand[1].mode;
18834 mode2 = insn_data[icode].operand[2].mode;
18835
18836 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18837 {
18838 op0 = copy_to_reg (op0);
18839 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18840 }
18841 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18842 {
18843 error ("shift must be an immediate");
18844 return const0_rtx;
18845 }
18846 target = gen_reg_rtx (V2DImode);
18847 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18848 op0, op1);
18849 if (! pat)
18850 return 0;
18851 emit_insn (pat);
18852 return target;
18853
18854 case IX86_BUILTIN_FEMMS:
18855 emit_insn (gen_mmx_femms ());
18856 return NULL_RTX;
18857
18858 case IX86_BUILTIN_PAVGUSB:
18859 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18860
18861 case IX86_BUILTIN_PF2ID:
18862 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18863
18864 case IX86_BUILTIN_PFACC:
18865 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18866
18867 case IX86_BUILTIN_PFADD:
18868 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18869
18870 case IX86_BUILTIN_PFCMPEQ:
18871 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18872
18873 case IX86_BUILTIN_PFCMPGE:
18874 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18875
18876 case IX86_BUILTIN_PFCMPGT:
18877 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18878
18879 case IX86_BUILTIN_PFMAX:
18880 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18881
18882 case IX86_BUILTIN_PFMIN:
18883 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18884
18885 case IX86_BUILTIN_PFMUL:
18886 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18887
18888 case IX86_BUILTIN_PFRCP:
18889 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18890
18891 case IX86_BUILTIN_PFRCPIT1:
18892 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18893
18894 case IX86_BUILTIN_PFRCPIT2:
18895 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18896
18897 case IX86_BUILTIN_PFRSQIT1:
18898 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18899
18900 case IX86_BUILTIN_PFRSQRT:
18901 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18902
18903 case IX86_BUILTIN_PFSUB:
18904 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18905
18906 case IX86_BUILTIN_PFSUBR:
18907 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18908
18909 case IX86_BUILTIN_PI2FD:
18910 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18911
18912 case IX86_BUILTIN_PMULHRW:
18913 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18914
18915 case IX86_BUILTIN_PF2IW:
18916 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18917
18918 case IX86_BUILTIN_PFNACC:
18919 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18920
18921 case IX86_BUILTIN_PFPNACC:
18922 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18923
18924 case IX86_BUILTIN_PI2FW:
18925 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18926
18927 case IX86_BUILTIN_PSWAPDSI:
18928 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18929
18930 case IX86_BUILTIN_PSWAPDSF:
18931 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18932
18933 case IX86_BUILTIN_SQRTSD:
18934 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18935 case IX86_BUILTIN_LOADUPD:
18936 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18937 case IX86_BUILTIN_STOREUPD:
18938 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18939
18940 case IX86_BUILTIN_MFENCE:
18941 emit_insn (gen_sse2_mfence ());
18942 return 0;
18943 case IX86_BUILTIN_LFENCE:
18944 emit_insn (gen_sse2_lfence ());
18945 return 0;
18946
18947 case IX86_BUILTIN_CLFLUSH:
18948 arg0 = CALL_EXPR_ARG (exp, 0);
18949 op0 = expand_normal (arg0);
18950 icode = CODE_FOR_sse2_clflush;
18951 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18952 op0 = copy_to_mode_reg (Pmode, op0);
18953
18954 emit_insn (gen_sse2_clflush (op0));
18955 return 0;
18956
18957 case IX86_BUILTIN_MOVNTPD:
18958 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18959 case IX86_BUILTIN_MOVNTDQ:
18960 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18961 case IX86_BUILTIN_MOVNTI:
18962 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18963
18964 case IX86_BUILTIN_LOADDQU:
18965 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18966 case IX86_BUILTIN_STOREDQU:
18967 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18968
18969 case IX86_BUILTIN_MONITOR:
18970 arg0 = CALL_EXPR_ARG (exp, 0);
18971 arg1 = CALL_EXPR_ARG (exp, 1);
18972 arg2 = CALL_EXPR_ARG (exp, 2);
18973 op0 = expand_normal (arg0);
18974 op1 = expand_normal (arg1);
18975 op2 = expand_normal (arg2);
18976 if (!REG_P (op0))
18977 op0 = copy_to_mode_reg (Pmode, op0);
18978 if (!REG_P (op1))
18979 op1 = copy_to_mode_reg (SImode, op1);
18980 if (!REG_P (op2))
18981 op2 = copy_to_mode_reg (SImode, op2);
18982 if (!TARGET_64BIT)
18983 emit_insn (gen_sse3_monitor (op0, op1, op2));
18984 else
18985 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18986 return 0;
18987
18988 case IX86_BUILTIN_MWAIT:
18989 arg0 = CALL_EXPR_ARG (exp, 0);
18990 arg1 = CALL_EXPR_ARG (exp, 1);
18991 op0 = expand_normal (arg0);
18992 op1 = expand_normal (arg1);
18993 if (!REG_P (op0))
18994 op0 = copy_to_mode_reg (SImode, op0);
18995 if (!REG_P (op1))
18996 op1 = copy_to_mode_reg (SImode, op1);
18997 emit_insn (gen_sse3_mwait (op0, op1));
18998 return 0;
18999
19000 case IX86_BUILTIN_LDDQU:
19001 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19002 target, 1);
19003
19004 case IX86_BUILTIN_PALIGNR:
19005 case IX86_BUILTIN_PALIGNR128:
19006 if (fcode == IX86_BUILTIN_PALIGNR)
19007 {
19008 icode = CODE_FOR_ssse3_palignrdi;
19009 mode = DImode;
19010 }
19011 else
19012 {
19013 icode = CODE_FOR_ssse3_palignrti;
19014 mode = V2DImode;
19015 }
19016 arg0 = CALL_EXPR_ARG (exp, 0);
19017 arg1 = CALL_EXPR_ARG (exp, 1);
19018 arg2 = CALL_EXPR_ARG (exp, 2);
19019 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
19020 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
19021 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
19022 tmode = insn_data[icode].operand[0].mode;
19023 mode1 = insn_data[icode].operand[1].mode;
19024 mode2 = insn_data[icode].operand[2].mode;
19025 mode3 = insn_data[icode].operand[3].mode;
19026
19027 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19028 {
19029 op0 = copy_to_reg (op0);
19030 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19031 }
19032 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19033 {
19034 op1 = copy_to_reg (op1);
19035 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19036 }
19037 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19038 {
19039 error ("shift must be an immediate");
19040 return const0_rtx;
19041 }
19042 target = gen_reg_rtx (mode);
19043 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19044 op0, op1, op2);
19045 if (! pat)
19046 return 0;
19047 emit_insn (pat);
19048 return target;
19049
19050 case IX86_BUILTIN_MOVNTDQA:
19051 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19052 target, 1);
19053
19054 case IX86_BUILTIN_MOVNTSD:
19055 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19056
19057 case IX86_BUILTIN_MOVNTSS:
19058 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19059
19060 case IX86_BUILTIN_INSERTQ:
19061 case IX86_BUILTIN_EXTRQ:
19062 icode = (fcode == IX86_BUILTIN_EXTRQ
19063 ? CODE_FOR_sse4a_extrq
19064 : CODE_FOR_sse4a_insertq);
19065 arg0 = CALL_EXPR_ARG (exp, 0);
19066 arg1 = CALL_EXPR_ARG (exp, 1);
19067 op0 = expand_normal (arg0);
19068 op1 = expand_normal (arg1);
19069 tmode = insn_data[icode].operand[0].mode;
19070 mode1 = insn_data[icode].operand[1].mode;
19071 mode2 = insn_data[icode].operand[2].mode;
19072 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19073 op0 = copy_to_mode_reg (mode1, op0);
19074 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19075 op1 = copy_to_mode_reg (mode2, op1);
19076 if (optimize || target == 0
19077 || GET_MODE (target) != tmode
19078 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19079 target = gen_reg_rtx (tmode);
19080 pat = GEN_FCN (icode) (target, op0, op1);
19081 if (! pat)
19082 return NULL_RTX;
19083 emit_insn (pat);
19084 return target;
19085
19086 case IX86_BUILTIN_EXTRQI:
19087 icode = CODE_FOR_sse4a_extrqi;
19088 arg0 = CALL_EXPR_ARG (exp, 0);
19089 arg1 = CALL_EXPR_ARG (exp, 1);
19090 arg2 = CALL_EXPR_ARG (exp, 2);
19091 op0 = expand_normal (arg0);
19092 op1 = expand_normal (arg1);
19093 op2 = expand_normal (arg2);
19094 tmode = insn_data[icode].operand[0].mode;
19095 mode1 = insn_data[icode].operand[1].mode;
19096 mode2 = insn_data[icode].operand[2].mode;
19097 mode3 = insn_data[icode].operand[3].mode;
19098 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19099 op0 = copy_to_mode_reg (mode1, op0);
19100 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19101 {
19102 error ("index mask must be an immediate");
19103 return gen_reg_rtx (tmode);
19104 }
19105 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19106 {
19107 error ("length mask must be an immediate");
19108 return gen_reg_rtx (tmode);
19109 }
19110 if (optimize || target == 0
19111 || GET_MODE (target) != tmode
19112 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19113 target = gen_reg_rtx (tmode);
19114 pat = GEN_FCN (icode) (target, op0, op1, op2);
19115 if (! pat)
19116 return NULL_RTX;
19117 emit_insn (pat);
19118 return target;
19119
19120 case IX86_BUILTIN_INSERTQI:
19121 icode = CODE_FOR_sse4a_insertqi;
19122 arg0 = CALL_EXPR_ARG (exp, 0);
19123 arg1 = CALL_EXPR_ARG (exp, 1);
19124 arg2 = CALL_EXPR_ARG (exp, 2);
19125 arg3 = CALL_EXPR_ARG (exp, 3);
19126 op0 = expand_normal (arg0);
19127 op1 = expand_normal (arg1);
19128 op2 = expand_normal (arg2);
19129 op3 = expand_normal (arg3);
19130 tmode = insn_data[icode].operand[0].mode;
19131 mode1 = insn_data[icode].operand[1].mode;
19132 mode2 = insn_data[icode].operand[2].mode;
19133 mode3 = insn_data[icode].operand[3].mode;
19134 mode4 = insn_data[icode].operand[4].mode;
19135
19136 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19137 op0 = copy_to_mode_reg (mode1, op0);
19138
19139 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19140 op1 = copy_to_mode_reg (mode2, op1);
19141
19142 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19143 {
19144 error ("index mask must be an immediate");
19145 return gen_reg_rtx (tmode);
19146 }
19147 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19148 {
19149 error ("length mask must be an immediate");
19150 return gen_reg_rtx (tmode);
19151 }
19152 if (optimize || target == 0
19153 || GET_MODE (target) != tmode
19154 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19155 target = gen_reg_rtx (tmode);
19156 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19157 if (! pat)
19158 return NULL_RTX;
19159 emit_insn (pat);
19160 return target;
19161
19162 case IX86_BUILTIN_VEC_INIT_V2SI:
19163 case IX86_BUILTIN_VEC_INIT_V4HI:
19164 case IX86_BUILTIN_VEC_INIT_V8QI:
19165 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19166
19167 case IX86_BUILTIN_VEC_EXT_V2DF:
19168 case IX86_BUILTIN_VEC_EXT_V2DI:
19169 case IX86_BUILTIN_VEC_EXT_V4SF:
19170 case IX86_BUILTIN_VEC_EXT_V4SI:
19171 case IX86_BUILTIN_VEC_EXT_V8HI:
19172 case IX86_BUILTIN_VEC_EXT_V2SI:
19173 case IX86_BUILTIN_VEC_EXT_V4HI:
19174 case IX86_BUILTIN_VEC_EXT_V16QI:
19175 return ix86_expand_vec_ext_builtin (exp, target);
19176
19177 case IX86_BUILTIN_VEC_SET_V2DI:
19178 case IX86_BUILTIN_VEC_SET_V4SF:
19179 case IX86_BUILTIN_VEC_SET_V4SI:
19180 case IX86_BUILTIN_VEC_SET_V8HI:
19181 case IX86_BUILTIN_VEC_SET_V4HI:
19182 case IX86_BUILTIN_VEC_SET_V16QI:
19183 return ix86_expand_vec_set_builtin (exp);
19184
19185 default:
19186 break;
19187 }
19188
19189 for (i = 0, d = bdesc_sse_3arg;
19190 i < ARRAY_SIZE (bdesc_sse_3arg);
19191 i++, d++)
19192 if (d->code == fcode)
19193 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19194 target);
19195
19196 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19197 if (d->code == fcode)
19198 {
19199 /* Compares are treated specially. */
19200 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19201 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19202 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19203 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19204 return ix86_expand_sse_compare (d, exp, target);
19205
19206 return ix86_expand_binop_builtin (d->icode, exp, target);
19207 }
19208
19209 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19210 if (d->code == fcode)
19211 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19212
19213 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19214 if (d->code == fcode)
19215 return ix86_expand_sse_comi (d, exp, target);
19216
19217 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19218 if (d->code == fcode)
19219 return ix86_expand_sse_ptest (d, exp, target);
19220
19221 gcc_unreachable ();
19222 }
19223
19224 /* Returns a function decl for a vectorized version of the builtin function
19225 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19226 if it is not available. */
19227
19228 static tree
19229 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
19230 tree type_in)
19231 {
19232 enum machine_mode in_mode, out_mode;
19233 int in_n, out_n;
19234
19235 if (TREE_CODE (type_out) != VECTOR_TYPE
19236 || TREE_CODE (type_in) != VECTOR_TYPE)
19237 return NULL_TREE;
19238
19239 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19240 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19241 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19242 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19243
19244 switch (fn)
19245 {
19246 case BUILT_IN_SQRT:
19247 if (out_mode == DFmode && out_n == 2
19248 && in_mode == DFmode && in_n == 2)
19249 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19250 return NULL_TREE;
19251
19252 case BUILT_IN_SQRTF:
19253 if (out_mode == SFmode && out_n == 4
19254 && in_mode == SFmode && in_n == 4)
19255 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19256 return NULL_TREE;
19257
19258 case BUILT_IN_LRINTF:
19259 if (out_mode == SImode && out_n == 4
19260 && in_mode == SFmode && in_n == 4)
19261 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19262 return NULL_TREE;
19263
19264 default:
19265 ;
19266 }
19267
19268 return NULL_TREE;
19269 }
19270
19271 /* Returns a decl of a function that implements conversion of the
19272 input vector of type TYPE, or NULL_TREE if it is not available. */
19273
19274 static tree
19275 ix86_builtin_conversion (enum tree_code code, tree type)
19276 {
19277 if (TREE_CODE (type) != VECTOR_TYPE)
19278 return NULL_TREE;
19279
19280 switch (code)
19281 {
19282 case FLOAT_EXPR:
19283 switch (TYPE_MODE (type))
19284 {
19285 case V4SImode:
19286 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19287 default:
19288 return NULL_TREE;
19289 }
19290
19291 case FIX_TRUNC_EXPR:
19292 switch (TYPE_MODE (type))
19293 {
19294 case V4SFmode:
19295 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19296 default:
19297 return NULL_TREE;
19298 }
19299 default:
19300 return NULL_TREE;
19301
19302 }
19303 }
19304
19305 /* Store OPERAND to the memory after reload is completed. This means
19306 that we can't easily use assign_stack_local. */
19307 rtx
19308 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19309 {
19310 rtx result;
19311
19312 gcc_assert (reload_completed);
19313 if (TARGET_RED_ZONE)
19314 {
19315 result = gen_rtx_MEM (mode,
19316 gen_rtx_PLUS (Pmode,
19317 stack_pointer_rtx,
19318 GEN_INT (-RED_ZONE_SIZE)));
19319 emit_move_insn (result, operand);
19320 }
19321 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19322 {
19323 switch (mode)
19324 {
19325 case HImode:
19326 case SImode:
19327 operand = gen_lowpart (DImode, operand);
19328 /* FALLTHRU */
19329 case DImode:
19330 emit_insn (
19331 gen_rtx_SET (VOIDmode,
19332 gen_rtx_MEM (DImode,
19333 gen_rtx_PRE_DEC (DImode,
19334 stack_pointer_rtx)),
19335 operand));
19336 break;
19337 default:
19338 gcc_unreachable ();
19339 }
19340 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19341 }
19342 else
19343 {
19344 switch (mode)
19345 {
19346 case DImode:
19347 {
19348 rtx operands[2];
19349 split_di (&operand, 1, operands, operands + 1);
19350 emit_insn (
19351 gen_rtx_SET (VOIDmode,
19352 gen_rtx_MEM (SImode,
19353 gen_rtx_PRE_DEC (Pmode,
19354 stack_pointer_rtx)),
19355 operands[1]));
19356 emit_insn (
19357 gen_rtx_SET (VOIDmode,
19358 gen_rtx_MEM (SImode,
19359 gen_rtx_PRE_DEC (Pmode,
19360 stack_pointer_rtx)),
19361 operands[0]));
19362 }
19363 break;
19364 case HImode:
19365 /* Store HImodes as SImodes. */
19366 operand = gen_lowpart (SImode, operand);
19367 /* FALLTHRU */
19368 case SImode:
19369 emit_insn (
19370 gen_rtx_SET (VOIDmode,
19371 gen_rtx_MEM (GET_MODE (operand),
19372 gen_rtx_PRE_DEC (SImode,
19373 stack_pointer_rtx)),
19374 operand));
19375 break;
19376 default:
19377 gcc_unreachable ();
19378 }
19379 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19380 }
19381 return result;
19382 }
19383
19384 /* Free operand from the memory. */
19385 void
19386 ix86_free_from_memory (enum machine_mode mode)
19387 {
19388 if (!TARGET_RED_ZONE)
19389 {
19390 int size;
19391
19392 if (mode == DImode || TARGET_64BIT)
19393 size = 8;
19394 else
19395 size = 4;
19396 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19397 to pop or add instruction if registers are available. */
19398 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19399 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
19400 GEN_INT (size))));
19401 }
19402 }
19403
19404 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
19405 QImode must go into class Q_REGS.
19406 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19407 movdf to do mem-to-mem moves through integer regs. */
19408 enum reg_class
19409 ix86_preferred_reload_class (rtx x, enum reg_class class)
19410 {
19411 enum machine_mode mode = GET_MODE (x);
19412
19413 /* We're only allowed to return a subclass of CLASS. Many of the
19414 following checks fail for NO_REGS, so eliminate that early. */
19415 if (class == NO_REGS)
19416 return NO_REGS;
19417
19418 /* All classes can load zeros. */
19419 if (x == CONST0_RTX (mode))
19420 return class;
19421
19422 /* Force constants into memory if we are loading a (nonzero) constant into
19423 an MMX or SSE register. This is because there are no MMX/SSE instructions
19424 to load from a constant. */
19425 if (CONSTANT_P (x)
19426 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
19427 return NO_REGS;
19428
19429 /* Prefer SSE regs only, if we can use them for math. */
19430 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
19431 return SSE_CLASS_P (class) ? class : NO_REGS;
19432
19433 /* Floating-point constants need more complex checks. */
19434 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
19435 {
19436 /* General regs can load everything. */
19437 if (reg_class_subset_p (class, GENERAL_REGS))
19438 return class;
19439
19440 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19441 zero above. We only want to wind up preferring 80387 registers if
19442 we plan on doing computation with them. */
19443 if (TARGET_80387
19444 && standard_80387_constant_p (x))
19445 {
19446 /* Limit class to non-sse. */
19447 if (class == FLOAT_SSE_REGS)
19448 return FLOAT_REGS;
19449 if (class == FP_TOP_SSE_REGS)
19450 return FP_TOP_REG;
19451 if (class == FP_SECOND_SSE_REGS)
19452 return FP_SECOND_REG;
19453 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
19454 return class;
19455 }
19456
19457 return NO_REGS;
19458 }
19459
19460 /* Generally when we see PLUS here, it's the function invariant
19461 (plus soft-fp const_int). Which can only be computed into general
19462 regs. */
19463 if (GET_CODE (x) == PLUS)
19464 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
19465
19466 /* QImode constants are easy to load, but non-constant QImode data
19467 must go into Q_REGS. */
19468 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19469 {
19470 if (reg_class_subset_p (class, Q_REGS))
19471 return class;
19472 if (reg_class_subset_p (Q_REGS, class))
19473 return Q_REGS;
19474 return NO_REGS;
19475 }
19476
19477 return class;
19478 }
19479
19480 /* Discourage putting floating-point values in SSE registers unless
19481 SSE math is being used, and likewise for the 387 registers. */
19482 enum reg_class
19483 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
19484 {
19485 enum machine_mode mode = GET_MODE (x);
19486
19487 /* Restrict the output reload class to the register bank that we are doing
19488 math on. If we would like not to return a subset of CLASS, reject this
19489 alternative: if reload cannot do this, it will still use its choice. */
19490 mode = GET_MODE (x);
19491 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19492 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
19493
19494 if (X87_FLOAT_MODE_P (mode))
19495 {
19496 if (class == FP_TOP_SSE_REGS)
19497 return FP_TOP_REG;
19498 else if (class == FP_SECOND_SSE_REGS)
19499 return FP_SECOND_REG;
19500 else
19501 return FLOAT_CLASS_P (class) ? class : NO_REGS;
19502 }
19503
19504 return class;
19505 }
19506
19507 /* If we are copying between general and FP registers, we need a memory
19508 location. The same is true for SSE and MMX registers.
19509
19510 The macro can't work reliably when one of the CLASSES is class containing
19511 registers from multiple units (SSE, MMX, integer). We avoid this by never
19512 combining those units in single alternative in the machine description.
19513 Ensure that this constraint holds to avoid unexpected surprises.
19514
19515 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
19516 enforce these sanity checks. */
19517
19518 int
19519 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
19520 enum machine_mode mode, int strict)
19521 {
19522 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
19523 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
19524 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
19525 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
19526 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
19527 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
19528 {
19529 gcc_assert (!strict);
19530 return true;
19531 }
19532
19533 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
19534 return true;
19535
19536 /* ??? This is a lie. We do have moves between mmx/general, and for
19537 mmx/sse2. But by saying we need secondary memory we discourage the
19538 register allocator from using the mmx registers unless needed. */
19539 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19540 return true;
19541
19542 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19543 {
19544 /* SSE1 doesn't have any direct moves from other classes. */
19545 if (!TARGET_SSE2)
19546 return true;
19547
19548 /* If the target says that inter-unit moves are more expensive
19549 than moving through memory, then don't generate them. */
19550 if (!TARGET_INTER_UNIT_MOVES)
19551 return true;
19552
19553 /* Between SSE and general, we have moves no larger than word size. */
19554 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19555 return true;
19556 }
19557
19558 return false;
19559 }
19560
19561 /* Return true if the registers in CLASS cannot represent the change from
19562 modes FROM to TO. */
19563
19564 bool
19565 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
19566 enum reg_class class)
19567 {
19568 if (from == to)
19569 return false;
19570
19571 /* x87 registers can't do subreg at all, as all values are reformatted
19572 to extended precision. */
19573 if (MAYBE_FLOAT_CLASS_P (class))
19574 return true;
19575
19576 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
19577 {
19578 /* Vector registers do not support QI or HImode loads. If we don't
19579 disallow a change to these modes, reload will assume it's ok to
19580 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19581 the vec_dupv4hi pattern. */
19582 if (GET_MODE_SIZE (from) < 4)
19583 return true;
19584
19585 /* Vector registers do not support subreg with nonzero offsets, which
19586 are otherwise valid for integer registers. Since we can't see
19587 whether we have a nonzero offset from here, prohibit all
19588 nonparadoxical subregs changing size. */
19589 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
19590 return true;
19591 }
19592
19593 return false;
19594 }
19595
19596 /* Return the cost of moving data from a register in class CLASS1 to
19597 one in class CLASS2.
19598
19599 It is not required that the cost always equal 2 when FROM is the same as TO;
19600 on some machines it is expensive to move between registers if they are not
19601 general registers. */
19602
19603 int
19604 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
19605 enum reg_class class2)
19606 {
19607 /* In case we require secondary memory, compute cost of the store followed
19608 by load. In order to avoid bad register allocation choices, we need
19609 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19610
19611 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19612 {
19613 int cost = 1;
19614
19615 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19616 MEMORY_MOVE_COST (mode, class1, 1));
19617 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19618 MEMORY_MOVE_COST (mode, class2, 1));
19619
19620 /* In case of copying from general_purpose_register we may emit multiple
19621 stores followed by single load causing memory size mismatch stall.
19622 Count this as arbitrarily high cost of 20. */
19623 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19624 cost += 20;
19625
19626 /* In the case of FP/MMX moves, the registers actually overlap, and we
19627 have to switch modes in order to treat them differently. */
19628 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19629 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19630 cost += 20;
19631
19632 return cost;
19633 }
19634
19635 /* Moves between SSE/MMX and integer unit are expensive. */
19636 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19637 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19638 return ix86_cost->mmxsse_to_integer;
19639 if (MAYBE_FLOAT_CLASS_P (class1))
19640 return ix86_cost->fp_move;
19641 if (MAYBE_SSE_CLASS_P (class1))
19642 return ix86_cost->sse_move;
19643 if (MAYBE_MMX_CLASS_P (class1))
19644 return ix86_cost->mmx_move;
19645 return 2;
19646 }
19647
19648 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19649
19650 bool
19651 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19652 {
19653 /* Flags and only flags can only hold CCmode values. */
19654 if (CC_REGNO_P (regno))
19655 return GET_MODE_CLASS (mode) == MODE_CC;
19656 if (GET_MODE_CLASS (mode) == MODE_CC
19657 || GET_MODE_CLASS (mode) == MODE_RANDOM
19658 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19659 return 0;
19660 if (FP_REGNO_P (regno))
19661 return VALID_FP_MODE_P (mode);
19662 if (SSE_REGNO_P (regno))
19663 {
19664 /* We implement the move patterns for all vector modes into and
19665 out of SSE registers, even when no operation instructions
19666 are available. */
19667 return (VALID_SSE_REG_MODE (mode)
19668 || VALID_SSE2_REG_MODE (mode)
19669 || VALID_MMX_REG_MODE (mode)
19670 || VALID_MMX_REG_MODE_3DNOW (mode));
19671 }
19672 if (MMX_REGNO_P (regno))
19673 {
19674 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19675 so if the register is available at all, then we can move data of
19676 the given mode into or out of it. */
19677 return (VALID_MMX_REG_MODE (mode)
19678 || VALID_MMX_REG_MODE_3DNOW (mode));
19679 }
19680
19681 if (mode == QImode)
19682 {
19683 /* Take care for QImode values - they can be in non-QI regs,
19684 but then they do cause partial register stalls. */
19685 if (regno < 4 || TARGET_64BIT)
19686 return 1;
19687 if (!TARGET_PARTIAL_REG_STALL)
19688 return 1;
19689 return reload_in_progress || reload_completed;
19690 }
19691 /* We handle both integer and floats in the general purpose registers. */
19692 else if (VALID_INT_MODE_P (mode))
19693 return 1;
19694 else if (VALID_FP_MODE_P (mode))
19695 return 1;
19696 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19697 on to use that value in smaller contexts, this can easily force a
19698 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19699 supporting DImode, allow it. */
19700 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19701 return 1;
19702
19703 return 0;
19704 }
19705
19706 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19707 tieable integer mode. */
19708
19709 static bool
19710 ix86_tieable_integer_mode_p (enum machine_mode mode)
19711 {
19712 switch (mode)
19713 {
19714 case HImode:
19715 case SImode:
19716 return true;
19717
19718 case QImode:
19719 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19720
19721 case DImode:
19722 return TARGET_64BIT;
19723
19724 default:
19725 return false;
19726 }
19727 }
19728
19729 /* Return true if MODE1 is accessible in a register that can hold MODE2
19730 without copying. That is, all register classes that can hold MODE2
19731 can also hold MODE1. */
19732
19733 bool
19734 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19735 {
19736 if (mode1 == mode2)
19737 return true;
19738
19739 if (ix86_tieable_integer_mode_p (mode1)
19740 && ix86_tieable_integer_mode_p (mode2))
19741 return true;
19742
19743 /* MODE2 being XFmode implies fp stack or general regs, which means we
19744 can tie any smaller floating point modes to it. Note that we do not
19745 tie this with TFmode. */
19746 if (mode2 == XFmode)
19747 return mode1 == SFmode || mode1 == DFmode;
19748
19749 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19750 that we can tie it with SFmode. */
19751 if (mode2 == DFmode)
19752 return mode1 == SFmode;
19753
19754 /* If MODE2 is only appropriate for an SSE register, then tie with
19755 any other mode acceptable to SSE registers. */
19756 if (GET_MODE_SIZE (mode2) == 16
19757 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19758 return (GET_MODE_SIZE (mode1) == 16
19759 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19760
19761 /* If MODE2 is appropriate for an MMX register, then tie
19762 with any other mode acceptable to MMX registers. */
19763 if (GET_MODE_SIZE (mode2) == 8
19764 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19765 return (GET_MODE_SIZE (mode1) == 8
19766 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19767
19768 return false;
19769 }
19770
19771 /* Return the cost of moving data of mode M between a
19772 register and memory. A value of 2 is the default; this cost is
19773 relative to those in `REGISTER_MOVE_COST'.
19774
19775 If moving between registers and memory is more expensive than
19776 between two registers, you should define this macro to express the
19777 relative cost.
19778
19779 Model also increased moving costs of QImode registers in non
19780 Q_REGS classes.
19781 */
19782 int
19783 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19784 {
19785 if (FLOAT_CLASS_P (class))
19786 {
19787 int index;
19788 switch (mode)
19789 {
19790 case SFmode:
19791 index = 0;
19792 break;
19793 case DFmode:
19794 index = 1;
19795 break;
19796 case XFmode:
19797 index = 2;
19798 break;
19799 default:
19800 return 100;
19801 }
19802 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19803 }
19804 if (SSE_CLASS_P (class))
19805 {
19806 int index;
19807 switch (GET_MODE_SIZE (mode))
19808 {
19809 case 4:
19810 index = 0;
19811 break;
19812 case 8:
19813 index = 1;
19814 break;
19815 case 16:
19816 index = 2;
19817 break;
19818 default:
19819 return 100;
19820 }
19821 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19822 }
19823 if (MMX_CLASS_P (class))
19824 {
19825 int index;
19826 switch (GET_MODE_SIZE (mode))
19827 {
19828 case 4:
19829 index = 0;
19830 break;
19831 case 8:
19832 index = 1;
19833 break;
19834 default:
19835 return 100;
19836 }
19837 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19838 }
19839 switch (GET_MODE_SIZE (mode))
19840 {
19841 case 1:
19842 if (in)
19843 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19844 : ix86_cost->movzbl_load);
19845 else
19846 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19847 : ix86_cost->int_store[0] + 4);
19848 break;
19849 case 2:
19850 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19851 default:
19852 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19853 if (mode == TFmode)
19854 mode = XFmode;
19855 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19856 * (((int) GET_MODE_SIZE (mode)
19857 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19858 }
19859 }
19860
19861 /* Compute a (partial) cost for rtx X. Return true if the complete
19862 cost has been computed, and false if subexpressions should be
19863 scanned. In either case, *TOTAL contains the cost result. */
19864
19865 static bool
19866 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19867 {
19868 enum machine_mode mode = GET_MODE (x);
19869
19870 switch (code)
19871 {
19872 case CONST_INT:
19873 case CONST:
19874 case LABEL_REF:
19875 case SYMBOL_REF:
19876 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19877 *total = 3;
19878 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19879 *total = 2;
19880 else if (flag_pic && SYMBOLIC_CONST (x)
19881 && (!TARGET_64BIT
19882 || (!GET_CODE (x) != LABEL_REF
19883 && (GET_CODE (x) != SYMBOL_REF
19884 || !SYMBOL_REF_LOCAL_P (x)))))
19885 *total = 1;
19886 else
19887 *total = 0;
19888 return true;
19889
19890 case CONST_DOUBLE:
19891 if (mode == VOIDmode)
19892 *total = 0;
19893 else
19894 switch (standard_80387_constant_p (x))
19895 {
19896 case 1: /* 0.0 */
19897 *total = 1;
19898 break;
19899 default: /* Other constants */
19900 *total = 2;
19901 break;
19902 case 0:
19903 case -1:
19904 /* Start with (MEM (SYMBOL_REF)), since that's where
19905 it'll probably end up. Add a penalty for size. */
19906 *total = (COSTS_N_INSNS (1)
19907 + (flag_pic != 0 && !TARGET_64BIT)
19908 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19909 break;
19910 }
19911 return true;
19912
19913 case ZERO_EXTEND:
19914 /* The zero extensions is often completely free on x86_64, so make
19915 it as cheap as possible. */
19916 if (TARGET_64BIT && mode == DImode
19917 && GET_MODE (XEXP (x, 0)) == SImode)
19918 *total = 1;
19919 else if (TARGET_ZERO_EXTEND_WITH_AND)
19920 *total = ix86_cost->add;
19921 else
19922 *total = ix86_cost->movzx;
19923 return false;
19924
19925 case SIGN_EXTEND:
19926 *total = ix86_cost->movsx;
19927 return false;
19928
19929 case ASHIFT:
19930 if (CONST_INT_P (XEXP (x, 1))
19931 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19932 {
19933 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19934 if (value == 1)
19935 {
19936 *total = ix86_cost->add;
19937 return false;
19938 }
19939 if ((value == 2 || value == 3)
19940 && ix86_cost->lea <= ix86_cost->shift_const)
19941 {
19942 *total = ix86_cost->lea;
19943 return false;
19944 }
19945 }
19946 /* FALLTHRU */
19947
19948 case ROTATE:
19949 case ASHIFTRT:
19950 case LSHIFTRT:
19951 case ROTATERT:
19952 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19953 {
19954 if (CONST_INT_P (XEXP (x, 1)))
19955 {
19956 if (INTVAL (XEXP (x, 1)) > 32)
19957 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19958 else
19959 *total = ix86_cost->shift_const * 2;
19960 }
19961 else
19962 {
19963 if (GET_CODE (XEXP (x, 1)) == AND)
19964 *total = ix86_cost->shift_var * 2;
19965 else
19966 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19967 }
19968 }
19969 else
19970 {
19971 if (CONST_INT_P (XEXP (x, 1)))
19972 *total = ix86_cost->shift_const;
19973 else
19974 *total = ix86_cost->shift_var;
19975 }
19976 return false;
19977
19978 case MULT:
19979 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19980 {
19981 /* ??? SSE scalar cost should be used here. */
19982 *total = ix86_cost->fmul;
19983 return false;
19984 }
19985 else if (X87_FLOAT_MODE_P (mode))
19986 {
19987 *total = ix86_cost->fmul;
19988 return false;
19989 }
19990 else if (FLOAT_MODE_P (mode))
19991 {
19992 /* ??? SSE vector cost should be used here. */
19993 *total = ix86_cost->fmul;
19994 return false;
19995 }
19996 else
19997 {
19998 rtx op0 = XEXP (x, 0);
19999 rtx op1 = XEXP (x, 1);
20000 int nbits;
20001 if (CONST_INT_P (XEXP (x, 1)))
20002 {
20003 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20004 for (nbits = 0; value != 0; value &= value - 1)
20005 nbits++;
20006 }
20007 else
20008 /* This is arbitrary. */
20009 nbits = 7;
20010
20011 /* Compute costs correctly for widening multiplication. */
20012 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20013 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20014 == GET_MODE_SIZE (mode))
20015 {
20016 int is_mulwiden = 0;
20017 enum machine_mode inner_mode = GET_MODE (op0);
20018
20019 if (GET_CODE (op0) == GET_CODE (op1))
20020 is_mulwiden = 1, op1 = XEXP (op1, 0);
20021 else if (CONST_INT_P (op1))
20022 {
20023 if (GET_CODE (op0) == SIGN_EXTEND)
20024 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20025 == INTVAL (op1);
20026 else
20027 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20028 }
20029
20030 if (is_mulwiden)
20031 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20032 }
20033
20034 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20035 + nbits * ix86_cost->mult_bit
20036 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20037
20038 return true;
20039 }
20040
20041 case DIV:
20042 case UDIV:
20043 case MOD:
20044 case UMOD:
20045 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20046 /* ??? SSE cost should be used here. */
20047 *total = ix86_cost->fdiv;
20048 else if (X87_FLOAT_MODE_P (mode))
20049 *total = ix86_cost->fdiv;
20050 else if (FLOAT_MODE_P (mode))
20051 /* ??? SSE vector cost should be used here. */
20052 *total = ix86_cost->fdiv;
20053 else
20054 *total = ix86_cost->divide[MODE_INDEX (mode)];
20055 return false;
20056
20057 case PLUS:
20058 if (GET_MODE_CLASS (mode) == MODE_INT
20059 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20060 {
20061 if (GET_CODE (XEXP (x, 0)) == PLUS
20062 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20063 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20064 && CONSTANT_P (XEXP (x, 1)))
20065 {
20066 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20067 if (val == 2 || val == 4 || val == 8)
20068 {
20069 *total = ix86_cost->lea;
20070 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20071 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20072 outer_code);
20073 *total += rtx_cost (XEXP (x, 1), outer_code);
20074 return true;
20075 }
20076 }
20077 else if (GET_CODE (XEXP (x, 0)) == MULT
20078 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20079 {
20080 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20081 if (val == 2 || val == 4 || val == 8)
20082 {
20083 *total = ix86_cost->lea;
20084 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20085 *total += rtx_cost (XEXP (x, 1), outer_code);
20086 return true;
20087 }
20088 }
20089 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20090 {
20091 *total = ix86_cost->lea;
20092 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20093 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20094 *total += rtx_cost (XEXP (x, 1), outer_code);
20095 return true;
20096 }
20097 }
20098 /* FALLTHRU */
20099
20100 case MINUS:
20101 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20102 {
20103 /* ??? SSE cost should be used here. */
20104 *total = ix86_cost->fadd;
20105 return false;
20106 }
20107 else if (X87_FLOAT_MODE_P (mode))
20108 {
20109 *total = ix86_cost->fadd;
20110 return false;
20111 }
20112 else if (FLOAT_MODE_P (mode))
20113 {
20114 /* ??? SSE vector cost should be used here. */
20115 *total = ix86_cost->fadd;
20116 return false;
20117 }
20118 /* FALLTHRU */
20119
20120 case AND:
20121 case IOR:
20122 case XOR:
20123 if (!TARGET_64BIT && mode == DImode)
20124 {
20125 *total = (ix86_cost->add * 2
20126 + (rtx_cost (XEXP (x, 0), outer_code)
20127 << (GET_MODE (XEXP (x, 0)) != DImode))
20128 + (rtx_cost (XEXP (x, 1), outer_code)
20129 << (GET_MODE (XEXP (x, 1)) != DImode)));
20130 return true;
20131 }
20132 /* FALLTHRU */
20133
20134 case NEG:
20135 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20136 {
20137 /* ??? SSE cost should be used here. */
20138 *total = ix86_cost->fchs;
20139 return false;
20140 }
20141 else if (X87_FLOAT_MODE_P (mode))
20142 {
20143 *total = ix86_cost->fchs;
20144 return false;
20145 }
20146 else if (FLOAT_MODE_P (mode))
20147 {
20148 /* ??? SSE vector cost should be used here. */
20149 *total = ix86_cost->fchs;
20150 return false;
20151 }
20152 /* FALLTHRU */
20153
20154 case NOT:
20155 if (!TARGET_64BIT && mode == DImode)
20156 *total = ix86_cost->add * 2;
20157 else
20158 *total = ix86_cost->add;
20159 return false;
20160
20161 case COMPARE:
20162 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20163 && XEXP (XEXP (x, 0), 1) == const1_rtx
20164 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20165 && XEXP (x, 1) == const0_rtx)
20166 {
20167 /* This kind of construct is implemented using test[bwl].
20168 Treat it as if we had an AND. */
20169 *total = (ix86_cost->add
20170 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20171 + rtx_cost (const1_rtx, outer_code));
20172 return true;
20173 }
20174 return false;
20175
20176 case FLOAT_EXTEND:
20177 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20178 *total = 0;
20179 return false;
20180
20181 case ABS:
20182 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20183 /* ??? SSE cost should be used here. */
20184 *total = ix86_cost->fabs;
20185 else if (X87_FLOAT_MODE_P (mode))
20186 *total = ix86_cost->fabs;
20187 else if (FLOAT_MODE_P (mode))
20188 /* ??? SSE vector cost should be used here. */
20189 *total = ix86_cost->fabs;
20190 return false;
20191
20192 case SQRT:
20193 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20194 /* ??? SSE cost should be used here. */
20195 *total = ix86_cost->fsqrt;
20196 else if (X87_FLOAT_MODE_P (mode))
20197 *total = ix86_cost->fsqrt;
20198 else if (FLOAT_MODE_P (mode))
20199 /* ??? SSE vector cost should be used here. */
20200 *total = ix86_cost->fsqrt;
20201 return false;
20202
20203 case UNSPEC:
20204 if (XINT (x, 1) == UNSPEC_TP)
20205 *total = 0;
20206 return false;
20207
20208 default:
20209 return false;
20210 }
20211 }
20212
20213 #if TARGET_MACHO
20214
20215 static int current_machopic_label_num;
20216
20217 /* Given a symbol name and its associated stub, write out the
20218 definition of the stub. */
20219
20220 void
20221 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20222 {
20223 unsigned int length;
20224 char *binder_name, *symbol_name, lazy_ptr_name[32];
20225 int label = ++current_machopic_label_num;
20226
20227 /* For 64-bit we shouldn't get here. */
20228 gcc_assert (!TARGET_64BIT);
20229
20230 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20231 symb = (*targetm.strip_name_encoding) (symb);
20232
20233 length = strlen (stub);
20234 binder_name = alloca (length + 32);
20235 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20236
20237 length = strlen (symb);
20238 symbol_name = alloca (length + 32);
20239 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20240
20241 sprintf (lazy_ptr_name, "L%d$lz", label);
20242
20243 if (MACHOPIC_PURE)
20244 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20245 else
20246 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20247
20248 fprintf (file, "%s:\n", stub);
20249 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20250
20251 if (MACHOPIC_PURE)
20252 {
20253 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20254 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20255 fprintf (file, "\tjmp\t*%%edx\n");
20256 }
20257 else
20258 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20259
20260 fprintf (file, "%s:\n", binder_name);
20261
20262 if (MACHOPIC_PURE)
20263 {
20264 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20265 fprintf (file, "\tpushl\t%%eax\n");
20266 }
20267 else
20268 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20269
20270 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20271
20272 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20273 fprintf (file, "%s:\n", lazy_ptr_name);
20274 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20275 fprintf (file, "\t.long %s\n", binder_name);
20276 }
20277
20278 void
20279 darwin_x86_file_end (void)
20280 {
20281 darwin_file_end ();
20282 ix86_file_end ();
20283 }
20284 #endif /* TARGET_MACHO */
20285
20286 /* Order the registers for register allocator. */
20287
20288 void
20289 x86_order_regs_for_local_alloc (void)
20290 {
20291 int pos = 0;
20292 int i;
20293
20294 /* First allocate the local general purpose registers. */
20295 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20296 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20297 reg_alloc_order [pos++] = i;
20298
20299 /* Global general purpose registers. */
20300 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20301 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20302 reg_alloc_order [pos++] = i;
20303
20304 /* x87 registers come first in case we are doing FP math
20305 using them. */
20306 if (!TARGET_SSE_MATH)
20307 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20308 reg_alloc_order [pos++] = i;
20309
20310 /* SSE registers. */
20311 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20312 reg_alloc_order [pos++] = i;
20313 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20314 reg_alloc_order [pos++] = i;
20315
20316 /* x87 registers. */
20317 if (TARGET_SSE_MATH)
20318 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20319 reg_alloc_order [pos++] = i;
20320
20321 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20322 reg_alloc_order [pos++] = i;
20323
20324 /* Initialize the rest of array as we do not allocate some registers
20325 at all. */
20326 while (pos < FIRST_PSEUDO_REGISTER)
20327 reg_alloc_order [pos++] = 0;
20328 }
20329
20330 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20331 struct attribute_spec.handler. */
20332 static tree
20333 ix86_handle_struct_attribute (tree *node, tree name,
20334 tree args ATTRIBUTE_UNUSED,
20335 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20336 {
20337 tree *type = NULL;
20338 if (DECL_P (*node))
20339 {
20340 if (TREE_CODE (*node) == TYPE_DECL)
20341 type = &TREE_TYPE (*node);
20342 }
20343 else
20344 type = node;
20345
20346 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20347 || TREE_CODE (*type) == UNION_TYPE)))
20348 {
20349 warning (OPT_Wattributes, "%qs attribute ignored",
20350 IDENTIFIER_POINTER (name));
20351 *no_add_attrs = true;
20352 }
20353
20354 else if ((is_attribute_p ("ms_struct", name)
20355 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20356 || ((is_attribute_p ("gcc_struct", name)
20357 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20358 {
20359 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
20360 IDENTIFIER_POINTER (name));
20361 *no_add_attrs = true;
20362 }
20363
20364 return NULL_TREE;
20365 }
20366
20367 static bool
20368 ix86_ms_bitfield_layout_p (tree record_type)
20369 {
20370 return (TARGET_MS_BITFIELD_LAYOUT &&
20371 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20372 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20373 }
20374
20375 /* Returns an expression indicating where the this parameter is
20376 located on entry to the FUNCTION. */
20377
20378 static rtx
20379 x86_this_parameter (tree function)
20380 {
20381 tree type = TREE_TYPE (function);
20382 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20383
20384 if (TARGET_64BIT)
20385 {
20386 const int *parm_regs;
20387
20388 if (TARGET_64BIT_MS_ABI)
20389 parm_regs = x86_64_ms_abi_int_parameter_registers;
20390 else
20391 parm_regs = x86_64_int_parameter_registers;
20392 return gen_rtx_REG (DImode, parm_regs[aggr]);
20393 }
20394
20395 if (ix86_function_regparm (type, function) > 0
20396 && !type_has_variadic_args_p (type))
20397 {
20398 int regno = 0;
20399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
20400 regno = 2;
20401 return gen_rtx_REG (SImode, regno);
20402 }
20403
20404 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
20405 }
20406
20407 /* Determine whether x86_output_mi_thunk can succeed. */
20408
20409 static bool
20410 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
20411 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
20412 HOST_WIDE_INT vcall_offset, tree function)
20413 {
20414 /* 64-bit can handle anything. */
20415 if (TARGET_64BIT)
20416 return true;
20417
20418 /* For 32-bit, everything's fine if we have one free register. */
20419 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20420 return true;
20421
20422 /* Need a free register for vcall_offset. */
20423 if (vcall_offset)
20424 return false;
20425
20426 /* Need a free register for GOT references. */
20427 if (flag_pic && !(*targetm.binds_local_p) (function))
20428 return false;
20429
20430 /* Otherwise ok. */
20431 return true;
20432 }
20433
20434 /* Output the assembler code for a thunk function. THUNK_DECL is the
20435 declaration for the thunk function itself, FUNCTION is the decl for
20436 the target function. DELTA is an immediate constant offset to be
20437 added to THIS. If VCALL_OFFSET is nonzero, the word at
20438 *(*this + vcall_offset) should be added to THIS. */
20439
20440 static void
20441 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
20442 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
20443 HOST_WIDE_INT vcall_offset, tree function)
20444 {
20445 rtx xops[3];
20446 rtx this = x86_this_parameter (function);
20447 rtx this_reg, tmp;
20448
20449 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20450 pull it in now and let DELTA benefit. */
20451 if (REG_P (this))
20452 this_reg = this;
20453 else if (vcall_offset)
20454 {
20455 /* Put the this parameter into %eax. */
20456 xops[0] = this;
20457 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
20458 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20459 }
20460 else
20461 this_reg = NULL_RTX;
20462
20463 /* Adjust the this parameter by a fixed constant. */
20464 if (delta)
20465 {
20466 xops[0] = GEN_INT (delta);
20467 xops[1] = this_reg ? this_reg : this;
20468 if (TARGET_64BIT)
20469 {
20470 if (!x86_64_general_operand (xops[0], DImode))
20471 {
20472 tmp = gen_rtx_REG (DImode, R10_REG);
20473 xops[1] = tmp;
20474 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
20475 xops[0] = tmp;
20476 xops[1] = this;
20477 }
20478 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20479 }
20480 else
20481 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20482 }
20483
20484 /* Adjust the this parameter by a value stored in the vtable. */
20485 if (vcall_offset)
20486 {
20487 if (TARGET_64BIT)
20488 tmp = gen_rtx_REG (DImode, R10_REG);
20489 else
20490 {
20491 int tmp_regno = 2 /* ECX */;
20492 if (lookup_attribute ("fastcall",
20493 TYPE_ATTRIBUTES (TREE_TYPE (function))))
20494 tmp_regno = 0 /* EAX */;
20495 tmp = gen_rtx_REG (SImode, tmp_regno);
20496 }
20497
20498 xops[0] = gen_rtx_MEM (Pmode, this_reg);
20499 xops[1] = tmp;
20500 if (TARGET_64BIT)
20501 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20502 else
20503 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20504
20505 /* Adjust the this parameter. */
20506 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
20507 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
20508 {
20509 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
20510 xops[0] = GEN_INT (vcall_offset);
20511 xops[1] = tmp2;
20512 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20513 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
20514 }
20515 xops[1] = this_reg;
20516 if (TARGET_64BIT)
20517 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20518 else
20519 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20520 }
20521
20522 /* If necessary, drop THIS back to its stack slot. */
20523 if (this_reg && this_reg != this)
20524 {
20525 xops[0] = this_reg;
20526 xops[1] = this;
20527 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20528 }
20529
20530 xops[0] = XEXP (DECL_RTL (function), 0);
20531 if (TARGET_64BIT)
20532 {
20533 if (!flag_pic || (*targetm.binds_local_p) (function))
20534 output_asm_insn ("jmp\t%P0", xops);
20535 /* All thunks should be in the same object as their target,
20536 and thus binds_local_p should be true. */
20537 else if (TARGET_64BIT_MS_ABI)
20538 gcc_unreachable ();
20539 else
20540 {
20541 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
20542 tmp = gen_rtx_CONST (Pmode, tmp);
20543 tmp = gen_rtx_MEM (QImode, tmp);
20544 xops[0] = tmp;
20545 output_asm_insn ("jmp\t%A0", xops);
20546 }
20547 }
20548 else
20549 {
20550 if (!flag_pic || (*targetm.binds_local_p) (function))
20551 output_asm_insn ("jmp\t%P0", xops);
20552 else
20553 #if TARGET_MACHO
20554 if (TARGET_MACHO)
20555 {
20556 rtx sym_ref = XEXP (DECL_RTL (function), 0);
20557 tmp = (gen_rtx_SYMBOL_REF
20558 (Pmode,
20559 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
20560 tmp = gen_rtx_MEM (QImode, tmp);
20561 xops[0] = tmp;
20562 output_asm_insn ("jmp\t%0", xops);
20563 }
20564 else
20565 #endif /* TARGET_MACHO */
20566 {
20567 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20568 output_set_got (tmp, NULL_RTX);
20569
20570 xops[1] = tmp;
20571 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
20572 output_asm_insn ("jmp\t{*}%1", xops);
20573 }
20574 }
20575 }
20576
20577 static void
20578 x86_file_start (void)
20579 {
20580 default_file_start ();
20581 #if TARGET_MACHO
20582 darwin_file_start ();
20583 #endif
20584 if (X86_FILE_START_VERSION_DIRECTIVE)
20585 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20586 if (X86_FILE_START_FLTUSED)
20587 fputs ("\t.global\t__fltused\n", asm_out_file);
20588 if (ix86_asm_dialect == ASM_INTEL)
20589 fputs ("\t.intel_syntax\n", asm_out_file);
20590 }
20591
20592 int
20593 x86_field_alignment (tree field, int computed)
20594 {
20595 enum machine_mode mode;
20596 tree type = TREE_TYPE (field);
20597
20598 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20599 return computed;
20600 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
20601 ? get_inner_array_type (type) : type);
20602 if (mode == DFmode || mode == DCmode
20603 || GET_MODE_CLASS (mode) == MODE_INT
20604 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20605 return MIN (32, computed);
20606 return computed;
20607 }
20608
20609 /* Output assembler code to FILE to increment profiler label # LABELNO
20610 for profiling a function entry. */
20611 void
20612 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20613 {
20614 if (TARGET_64BIT)
20615 {
20616 #ifndef NO_PROFILE_COUNTERS
20617 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20618 #endif
20619
20620 if (!TARGET_64BIT_MS_ABI && flag_pic)
20621 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20622 else
20623 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20624 }
20625 else if (flag_pic)
20626 {
20627 #ifndef NO_PROFILE_COUNTERS
20628 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20629 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20630 #endif
20631 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20632 }
20633 else
20634 {
20635 #ifndef NO_PROFILE_COUNTERS
20636 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20637 PROFILE_COUNT_REGISTER);
20638 #endif
20639 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20640 }
20641 }
20642
20643 /* We don't have exact information about the insn sizes, but we may assume
20644 quite safely that we are informed about all 1 byte insns and memory
20645 address sizes. This is enough to eliminate unnecessary padding in
20646 99% of cases. */
20647
20648 static int
20649 min_insn_size (rtx insn)
20650 {
20651 int l = 0;
20652
20653 if (!INSN_P (insn) || !active_insn_p (insn))
20654 return 0;
20655
20656 /* Discard alignments we've emit and jump instructions. */
20657 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20658 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20659 return 0;
20660 if (JUMP_P (insn)
20661 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20662 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20663 return 0;
20664
20665 /* Important case - calls are always 5 bytes.
20666 It is common to have many calls in the row. */
20667 if (CALL_P (insn)
20668 && symbolic_reference_mentioned_p (PATTERN (insn))
20669 && !SIBLING_CALL_P (insn))
20670 return 5;
20671 if (get_attr_length (insn) <= 1)
20672 return 1;
20673
20674 /* For normal instructions we may rely on the sizes of addresses
20675 and the presence of symbol to require 4 bytes of encoding.
20676 This is not the case for jumps where references are PC relative. */
20677 if (!JUMP_P (insn))
20678 {
20679 l = get_attr_length_address (insn);
20680 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20681 l = 4;
20682 }
20683 if (l)
20684 return 1+l;
20685 else
20686 return 2;
20687 }
20688
20689 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20690 window. */
20691
20692 static void
20693 ix86_avoid_jump_misspredicts (void)
20694 {
20695 rtx insn, start = get_insns ();
20696 int nbytes = 0, njumps = 0;
20697 int isjump = 0;
20698
20699 /* Look for all minimal intervals of instructions containing 4 jumps.
20700 The intervals are bounded by START and INSN. NBYTES is the total
20701 size of instructions in the interval including INSN and not including
20702 START. When the NBYTES is smaller than 16 bytes, it is possible
20703 that the end of START and INSN ends up in the same 16byte page.
20704
20705 The smallest offset in the page INSN can start is the case where START
20706 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20707 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20708 */
20709 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20710 {
20711
20712 nbytes += min_insn_size (insn);
20713 if (dump_file)
20714 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20715 INSN_UID (insn), min_insn_size (insn));
20716 if ((JUMP_P (insn)
20717 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20718 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20719 || CALL_P (insn))
20720 njumps++;
20721 else
20722 continue;
20723
20724 while (njumps > 3)
20725 {
20726 start = NEXT_INSN (start);
20727 if ((JUMP_P (start)
20728 && GET_CODE (PATTERN (start)) != ADDR_VEC
20729 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20730 || CALL_P (start))
20731 njumps--, isjump = 1;
20732 else
20733 isjump = 0;
20734 nbytes -= min_insn_size (start);
20735 }
20736 gcc_assert (njumps >= 0);
20737 if (dump_file)
20738 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20739 INSN_UID (start), INSN_UID (insn), nbytes);
20740
20741 if (njumps == 3 && isjump && nbytes < 16)
20742 {
20743 int padsize = 15 - nbytes + min_insn_size (insn);
20744
20745 if (dump_file)
20746 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20747 INSN_UID (insn), padsize);
20748 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20749 }
20750 }
20751 }
20752
20753 /* AMD Athlon works faster
20754 when RET is not destination of conditional jump or directly preceded
20755 by other jump instruction. We avoid the penalty by inserting NOP just
20756 before the RET instructions in such cases. */
20757 static void
20758 ix86_pad_returns (void)
20759 {
20760 edge e;
20761 edge_iterator ei;
20762
20763 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20764 {
20765 basic_block bb = e->src;
20766 rtx ret = BB_END (bb);
20767 rtx prev;
20768 bool replace = false;
20769
20770 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20771 || !maybe_hot_bb_p (bb))
20772 continue;
20773 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20774 if (active_insn_p (prev) || LABEL_P (prev))
20775 break;
20776 if (prev && LABEL_P (prev))
20777 {
20778 edge e;
20779 edge_iterator ei;
20780
20781 FOR_EACH_EDGE (e, ei, bb->preds)
20782 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20783 && !(e->flags & EDGE_FALLTHRU))
20784 replace = true;
20785 }
20786 if (!replace)
20787 {
20788 prev = prev_active_insn (ret);
20789 if (prev
20790 && ((JUMP_P (prev) && any_condjump_p (prev))
20791 || CALL_P (prev)))
20792 replace = true;
20793 /* Empty functions get branch mispredict even when the jump destination
20794 is not visible to us. */
20795 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20796 replace = true;
20797 }
20798 if (replace)
20799 {
20800 emit_insn_before (gen_return_internal_long (), ret);
20801 delete_insn (ret);
20802 }
20803 }
20804 }
20805
20806 /* Implement machine specific optimizations. We implement padding of returns
20807 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20808 static void
20809 ix86_reorg (void)
20810 {
20811 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20812 ix86_pad_returns ();
20813 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20814 ix86_avoid_jump_misspredicts ();
20815 }
20816
20817 /* Return nonzero when QImode register that must be represented via REX prefix
20818 is used. */
20819 bool
20820 x86_extended_QIreg_mentioned_p (rtx insn)
20821 {
20822 int i;
20823 extract_insn_cached (insn);
20824 for (i = 0; i < recog_data.n_operands; i++)
20825 if (REG_P (recog_data.operand[i])
20826 && REGNO (recog_data.operand[i]) >= 4)
20827 return true;
20828 return false;
20829 }
20830
20831 /* Return nonzero when P points to register encoded via REX prefix.
20832 Called via for_each_rtx. */
20833 static int
20834 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20835 {
20836 unsigned int regno;
20837 if (!REG_P (*p))
20838 return 0;
20839 regno = REGNO (*p);
20840 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20841 }
20842
20843 /* Return true when INSN mentions register that must be encoded using REX
20844 prefix. */
20845 bool
20846 x86_extended_reg_mentioned_p (rtx insn)
20847 {
20848 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20849 }
20850
20851 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20852 optabs would emit if we didn't have TFmode patterns. */
20853
20854 void
20855 x86_emit_floatuns (rtx operands[2])
20856 {
20857 rtx neglab, donelab, i0, i1, f0, in, out;
20858 enum machine_mode mode, inmode;
20859
20860 inmode = GET_MODE (operands[1]);
20861 gcc_assert (inmode == SImode || inmode == DImode);
20862
20863 out = operands[0];
20864 in = force_reg (inmode, operands[1]);
20865 mode = GET_MODE (out);
20866 neglab = gen_label_rtx ();
20867 donelab = gen_label_rtx ();
20868 f0 = gen_reg_rtx (mode);
20869
20870 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20871
20872 expand_float (out, in, 0);
20873
20874 emit_jump_insn (gen_jump (donelab));
20875 emit_barrier ();
20876
20877 emit_label (neglab);
20878
20879 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20880 1, OPTAB_DIRECT);
20881 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20882 1, OPTAB_DIRECT);
20883 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20884
20885 expand_float (f0, i0, 0);
20886
20887 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20888
20889 emit_label (donelab);
20890 }
20891 \f
20892 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20893 with all elements equal to VAR. Return true if successful. */
20894
20895 static bool
20896 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20897 rtx target, rtx val)
20898 {
20899 enum machine_mode smode, wsmode, wvmode;
20900 rtx x;
20901
20902 switch (mode)
20903 {
20904 case V2SImode:
20905 case V2SFmode:
20906 if (!mmx_ok)
20907 return false;
20908 /* FALLTHRU */
20909
20910 case V2DFmode:
20911 case V2DImode:
20912 case V4SFmode:
20913 case V4SImode:
20914 val = force_reg (GET_MODE_INNER (mode), val);
20915 x = gen_rtx_VEC_DUPLICATE (mode, val);
20916 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20917 return true;
20918
20919 case V4HImode:
20920 if (!mmx_ok)
20921 return false;
20922 if (TARGET_SSE || TARGET_3DNOW_A)
20923 {
20924 val = gen_lowpart (SImode, val);
20925 x = gen_rtx_TRUNCATE (HImode, val);
20926 x = gen_rtx_VEC_DUPLICATE (mode, x);
20927 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20928 return true;
20929 }
20930 else
20931 {
20932 smode = HImode;
20933 wsmode = SImode;
20934 wvmode = V2SImode;
20935 goto widen;
20936 }
20937
20938 case V8QImode:
20939 if (!mmx_ok)
20940 return false;
20941 smode = QImode;
20942 wsmode = HImode;
20943 wvmode = V4HImode;
20944 goto widen;
20945 case V8HImode:
20946 if (TARGET_SSE2)
20947 {
20948 rtx tmp1, tmp2;
20949 /* Extend HImode to SImode using a paradoxical SUBREG. */
20950 tmp1 = gen_reg_rtx (SImode);
20951 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20952 /* Insert the SImode value as low element of V4SImode vector. */
20953 tmp2 = gen_reg_rtx (V4SImode);
20954 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20955 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20956 CONST0_RTX (V4SImode),
20957 const1_rtx);
20958 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20959 /* Cast the V4SImode vector back to a V8HImode vector. */
20960 tmp1 = gen_reg_rtx (V8HImode);
20961 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20962 /* Duplicate the low short through the whole low SImode word. */
20963 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20964 /* Cast the V8HImode vector back to a V4SImode vector. */
20965 tmp2 = gen_reg_rtx (V4SImode);
20966 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20967 /* Replicate the low element of the V4SImode vector. */
20968 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20969 /* Cast the V2SImode back to V8HImode, and store in target. */
20970 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20971 return true;
20972 }
20973 smode = HImode;
20974 wsmode = SImode;
20975 wvmode = V4SImode;
20976 goto widen;
20977 case V16QImode:
20978 if (TARGET_SSE2)
20979 {
20980 rtx tmp1, tmp2;
20981 /* Extend QImode to SImode using a paradoxical SUBREG. */
20982 tmp1 = gen_reg_rtx (SImode);
20983 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20984 /* Insert the SImode value as low element of V4SImode vector. */
20985 tmp2 = gen_reg_rtx (V4SImode);
20986 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20987 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20988 CONST0_RTX (V4SImode),
20989 const1_rtx);
20990 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20991 /* Cast the V4SImode vector back to a V16QImode vector. */
20992 tmp1 = gen_reg_rtx (V16QImode);
20993 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20994 /* Duplicate the low byte through the whole low SImode word. */
20995 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20996 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20997 /* Cast the V16QImode vector back to a V4SImode vector. */
20998 tmp2 = gen_reg_rtx (V4SImode);
20999 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21000 /* Replicate the low element of the V4SImode vector. */
21001 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21002 /* Cast the V2SImode back to V16QImode, and store in target. */
21003 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21004 return true;
21005 }
21006 smode = QImode;
21007 wsmode = HImode;
21008 wvmode = V8HImode;
21009 goto widen;
21010 widen:
21011 /* Replicate the value once into the next wider mode and recurse. */
21012 val = convert_modes (wsmode, smode, val, true);
21013 x = expand_simple_binop (wsmode, ASHIFT, val,
21014 GEN_INT (GET_MODE_BITSIZE (smode)),
21015 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21016 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21017
21018 x = gen_reg_rtx (wvmode);
21019 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21020 gcc_unreachable ();
21021 emit_move_insn (target, gen_lowpart (mode, x));
21022 return true;
21023
21024 default:
21025 return false;
21026 }
21027 }
21028
21029 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21030 whose ONE_VAR element is VAR, and other elements are zero. Return true
21031 if successful. */
21032
21033 static bool
21034 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21035 rtx target, rtx var, int one_var)
21036 {
21037 enum machine_mode vsimode;
21038 rtx new_target;
21039 rtx x, tmp;
21040
21041 switch (mode)
21042 {
21043 case V2SFmode:
21044 case V2SImode:
21045 if (!mmx_ok)
21046 return false;
21047 /* FALLTHRU */
21048
21049 case V2DFmode:
21050 case V2DImode:
21051 if (one_var != 0)
21052 return false;
21053 var = force_reg (GET_MODE_INNER (mode), var);
21054 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21055 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21056 return true;
21057
21058 case V4SFmode:
21059 case V4SImode:
21060 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21061 new_target = gen_reg_rtx (mode);
21062 else
21063 new_target = target;
21064 var = force_reg (GET_MODE_INNER (mode), var);
21065 x = gen_rtx_VEC_DUPLICATE (mode, var);
21066 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21067 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21068 if (one_var != 0)
21069 {
21070 /* We need to shuffle the value to the correct position, so
21071 create a new pseudo to store the intermediate result. */
21072
21073 /* With SSE2, we can use the integer shuffle insns. */
21074 if (mode != V4SFmode && TARGET_SSE2)
21075 {
21076 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21077 GEN_INT (1),
21078 GEN_INT (one_var == 1 ? 0 : 1),
21079 GEN_INT (one_var == 2 ? 0 : 1),
21080 GEN_INT (one_var == 3 ? 0 : 1)));
21081 if (target != new_target)
21082 emit_move_insn (target, new_target);
21083 return true;
21084 }
21085
21086 /* Otherwise convert the intermediate result to V4SFmode and
21087 use the SSE1 shuffle instructions. */
21088 if (mode != V4SFmode)
21089 {
21090 tmp = gen_reg_rtx (V4SFmode);
21091 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21092 }
21093 else
21094 tmp = new_target;
21095
21096 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21097 GEN_INT (1),
21098 GEN_INT (one_var == 1 ? 0 : 1),
21099 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21100 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21101
21102 if (mode != V4SFmode)
21103 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21104 else if (tmp != target)
21105 emit_move_insn (target, tmp);
21106 }
21107 else if (target != new_target)
21108 emit_move_insn (target, new_target);
21109 return true;
21110
21111 case V8HImode:
21112 case V16QImode:
21113 vsimode = V4SImode;
21114 goto widen;
21115 case V4HImode:
21116 case V8QImode:
21117 if (!mmx_ok)
21118 return false;
21119 vsimode = V2SImode;
21120 goto widen;
21121 widen:
21122 if (one_var != 0)
21123 return false;
21124
21125 /* Zero extend the variable element to SImode and recurse. */
21126 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21127
21128 x = gen_reg_rtx (vsimode);
21129 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21130 var, one_var))
21131 gcc_unreachable ();
21132
21133 emit_move_insn (target, gen_lowpart (mode, x));
21134 return true;
21135
21136 default:
21137 return false;
21138 }
21139 }
21140
21141 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21142 consisting of the values in VALS. It is known that all elements
21143 except ONE_VAR are constants. Return true if successful. */
21144
21145 static bool
21146 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21147 rtx target, rtx vals, int one_var)
21148 {
21149 rtx var = XVECEXP (vals, 0, one_var);
21150 enum machine_mode wmode;
21151 rtx const_vec, x;
21152
21153 const_vec = copy_rtx (vals);
21154 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21155 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21156
21157 switch (mode)
21158 {
21159 case V2DFmode:
21160 case V2DImode:
21161 case V2SFmode:
21162 case V2SImode:
21163 /* For the two element vectors, it's just as easy to use
21164 the general case. */
21165 return false;
21166
21167 case V4SFmode:
21168 case V4SImode:
21169 case V8HImode:
21170 case V4HImode:
21171 break;
21172
21173 case V16QImode:
21174 wmode = V8HImode;
21175 goto widen;
21176 case V8QImode:
21177 wmode = V4HImode;
21178 goto widen;
21179 widen:
21180 /* There's no way to set one QImode entry easily. Combine
21181 the variable value with its adjacent constant value, and
21182 promote to an HImode set. */
21183 x = XVECEXP (vals, 0, one_var ^ 1);
21184 if (one_var & 1)
21185 {
21186 var = convert_modes (HImode, QImode, var, true);
21187 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21188 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21189 x = GEN_INT (INTVAL (x) & 0xff);
21190 }
21191 else
21192 {
21193 var = convert_modes (HImode, QImode, var, true);
21194 x = gen_int_mode (INTVAL (x) << 8, HImode);
21195 }
21196 if (x != const0_rtx)
21197 var = expand_simple_binop (HImode, IOR, var, x, var,
21198 1, OPTAB_LIB_WIDEN);
21199
21200 x = gen_reg_rtx (wmode);
21201 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21202 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21203
21204 emit_move_insn (target, gen_lowpart (mode, x));
21205 return true;
21206
21207 default:
21208 return false;
21209 }
21210
21211 emit_move_insn (target, const_vec);
21212 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21213 return true;
21214 }
21215
21216 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21217 all values variable, and none identical. */
21218
21219 static void
21220 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21221 rtx target, rtx vals)
21222 {
21223 enum machine_mode half_mode = GET_MODE_INNER (mode);
21224 rtx op0 = NULL, op1 = NULL;
21225 bool use_vec_concat = false;
21226
21227 switch (mode)
21228 {
21229 case V2SFmode:
21230 case V2SImode:
21231 if (!mmx_ok && !TARGET_SSE)
21232 break;
21233 /* FALLTHRU */
21234
21235 case V2DFmode:
21236 case V2DImode:
21237 /* For the two element vectors, we always implement VEC_CONCAT. */
21238 op0 = XVECEXP (vals, 0, 0);
21239 op1 = XVECEXP (vals, 0, 1);
21240 use_vec_concat = true;
21241 break;
21242
21243 case V4SFmode:
21244 half_mode = V2SFmode;
21245 goto half;
21246 case V4SImode:
21247 half_mode = V2SImode;
21248 goto half;
21249 half:
21250 {
21251 rtvec v;
21252
21253 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21254 Recurse to load the two halves. */
21255
21256 op0 = gen_reg_rtx (half_mode);
21257 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21258 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21259
21260 op1 = gen_reg_rtx (half_mode);
21261 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21262 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21263
21264 use_vec_concat = true;
21265 }
21266 break;
21267
21268 case V8HImode:
21269 case V16QImode:
21270 case V4HImode:
21271 case V8QImode:
21272 break;
21273
21274 default:
21275 gcc_unreachable ();
21276 }
21277
21278 if (use_vec_concat)
21279 {
21280 if (!register_operand (op0, half_mode))
21281 op0 = force_reg (half_mode, op0);
21282 if (!register_operand (op1, half_mode))
21283 op1 = force_reg (half_mode, op1);
21284
21285 emit_insn (gen_rtx_SET (VOIDmode, target,
21286 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21287 }
21288 else
21289 {
21290 int i, j, n_elts, n_words, n_elt_per_word;
21291 enum machine_mode inner_mode;
21292 rtx words[4], shift;
21293
21294 inner_mode = GET_MODE_INNER (mode);
21295 n_elts = GET_MODE_NUNITS (mode);
21296 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21297 n_elt_per_word = n_elts / n_words;
21298 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21299
21300 for (i = 0; i < n_words; ++i)
21301 {
21302 rtx word = NULL_RTX;
21303
21304 for (j = 0; j < n_elt_per_word; ++j)
21305 {
21306 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21307 elt = convert_modes (word_mode, inner_mode, elt, true);
21308
21309 if (j == 0)
21310 word = elt;
21311 else
21312 {
21313 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21314 word, 1, OPTAB_LIB_WIDEN);
21315 word = expand_simple_binop (word_mode, IOR, word, elt,
21316 word, 1, OPTAB_LIB_WIDEN);
21317 }
21318 }
21319
21320 words[i] = word;
21321 }
21322
21323 if (n_words == 1)
21324 emit_move_insn (target, gen_lowpart (mode, words[0]));
21325 else if (n_words == 2)
21326 {
21327 rtx tmp = gen_reg_rtx (mode);
21328 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21329 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21330 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21331 emit_move_insn (target, tmp);
21332 }
21333 else if (n_words == 4)
21334 {
21335 rtx tmp = gen_reg_rtx (V4SImode);
21336 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21337 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21338 emit_move_insn (target, gen_lowpart (mode, tmp));
21339 }
21340 else
21341 gcc_unreachable ();
21342 }
21343 }
21344
21345 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21346 instructions unless MMX_OK is true. */
21347
21348 void
21349 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
21350 {
21351 enum machine_mode mode = GET_MODE (target);
21352 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21353 int n_elts = GET_MODE_NUNITS (mode);
21354 int n_var = 0, one_var = -1;
21355 bool all_same = true, all_const_zero = true;
21356 int i;
21357 rtx x;
21358
21359 for (i = 0; i < n_elts; ++i)
21360 {
21361 x = XVECEXP (vals, 0, i);
21362 if (!CONSTANT_P (x))
21363 n_var++, one_var = i;
21364 else if (x != CONST0_RTX (inner_mode))
21365 all_const_zero = false;
21366 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
21367 all_same = false;
21368 }
21369
21370 /* Constants are best loaded from the constant pool. */
21371 if (n_var == 0)
21372 {
21373 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
21374 return;
21375 }
21376
21377 /* If all values are identical, broadcast the value. */
21378 if (all_same
21379 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
21380 XVECEXP (vals, 0, 0)))
21381 return;
21382
21383 /* Values where only one field is non-constant are best loaded from
21384 the pool and overwritten via move later. */
21385 if (n_var == 1)
21386 {
21387 if (all_const_zero
21388 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
21389 XVECEXP (vals, 0, one_var),
21390 one_var))
21391 return;
21392
21393 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
21394 return;
21395 }
21396
21397 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
21398 }
21399
21400 void
21401 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
21402 {
21403 enum machine_mode mode = GET_MODE (target);
21404 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21405 bool use_vec_merge = false;
21406 rtx tmp;
21407
21408 switch (mode)
21409 {
21410 case V2SFmode:
21411 case V2SImode:
21412 if (mmx_ok)
21413 {
21414 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
21415 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
21416 if (elt == 0)
21417 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
21418 else
21419 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
21420 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21421 return;
21422 }
21423 break;
21424
21425 case V2DImode:
21426 use_vec_merge = TARGET_SSE4_1;
21427 if (use_vec_merge)
21428 break;
21429
21430 case V2DFmode:
21431 {
21432 rtx op0, op1;
21433
21434 /* For the two element vectors, we implement a VEC_CONCAT with
21435 the extraction of the other element. */
21436
21437 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
21438 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
21439
21440 if (elt == 0)
21441 op0 = val, op1 = tmp;
21442 else
21443 op0 = tmp, op1 = val;
21444
21445 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
21446 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21447 }
21448 return;
21449
21450 case V4SFmode:
21451 use_vec_merge = TARGET_SSE4_1;
21452 if (use_vec_merge)
21453 break;
21454
21455 switch (elt)
21456 {
21457 case 0:
21458 use_vec_merge = true;
21459 break;
21460
21461 case 1:
21462 /* tmp = target = A B C D */
21463 tmp = copy_to_reg (target);
21464 /* target = A A B B */
21465 emit_insn (gen_sse_unpcklps (target, target, target));
21466 /* target = X A B B */
21467 ix86_expand_vector_set (false, target, val, 0);
21468 /* target = A X C D */
21469 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21470 GEN_INT (1), GEN_INT (0),
21471 GEN_INT (2+4), GEN_INT (3+4)));
21472 return;
21473
21474 case 2:
21475 /* tmp = target = A B C D */
21476 tmp = copy_to_reg (target);
21477 /* tmp = X B C D */
21478 ix86_expand_vector_set (false, tmp, val, 0);
21479 /* target = A B X D */
21480 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21481 GEN_INT (0), GEN_INT (1),
21482 GEN_INT (0+4), GEN_INT (3+4)));
21483 return;
21484
21485 case 3:
21486 /* tmp = target = A B C D */
21487 tmp = copy_to_reg (target);
21488 /* tmp = X B C D */
21489 ix86_expand_vector_set (false, tmp, val, 0);
21490 /* target = A B X D */
21491 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21492 GEN_INT (0), GEN_INT (1),
21493 GEN_INT (2+4), GEN_INT (0+4)));
21494 return;
21495
21496 default:
21497 gcc_unreachable ();
21498 }
21499 break;
21500
21501 case V4SImode:
21502 use_vec_merge = TARGET_SSE4_1;
21503 if (use_vec_merge)
21504 break;
21505
21506 /* Element 0 handled by vec_merge below. */
21507 if (elt == 0)
21508 {
21509 use_vec_merge = true;
21510 break;
21511 }
21512
21513 if (TARGET_SSE2)
21514 {
21515 /* With SSE2, use integer shuffles to swap element 0 and ELT,
21516 store into element 0, then shuffle them back. */
21517
21518 rtx order[4];
21519
21520 order[0] = GEN_INT (elt);
21521 order[1] = const1_rtx;
21522 order[2] = const2_rtx;
21523 order[3] = GEN_INT (3);
21524 order[elt] = const0_rtx;
21525
21526 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21527 order[1], order[2], order[3]));
21528
21529 ix86_expand_vector_set (false, target, val, 0);
21530
21531 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21532 order[1], order[2], order[3]));
21533 }
21534 else
21535 {
21536 /* For SSE1, we have to reuse the V4SF code. */
21537 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
21538 gen_lowpart (SFmode, val), elt);
21539 }
21540 return;
21541
21542 case V8HImode:
21543 use_vec_merge = TARGET_SSE2;
21544 break;
21545 case V4HImode:
21546 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21547 break;
21548
21549 case V16QImode:
21550 use_vec_merge = TARGET_SSE4_1;
21551 break;
21552
21553 case V8QImode:
21554 default:
21555 break;
21556 }
21557
21558 if (use_vec_merge)
21559 {
21560 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
21561 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
21562 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21563 }
21564 else
21565 {
21566 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21567
21568 emit_move_insn (mem, target);
21569
21570 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21571 emit_move_insn (tmp, val);
21572
21573 emit_move_insn (target, mem);
21574 }
21575 }
21576
21577 void
21578 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
21579 {
21580 enum machine_mode mode = GET_MODE (vec);
21581 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21582 bool use_vec_extr = false;
21583 rtx tmp;
21584
21585 switch (mode)
21586 {
21587 case V2SImode:
21588 case V2SFmode:
21589 if (!mmx_ok)
21590 break;
21591 /* FALLTHRU */
21592
21593 case V2DFmode:
21594 case V2DImode:
21595 use_vec_extr = true;
21596 break;
21597
21598 case V4SFmode:
21599 use_vec_extr = TARGET_SSE4_1;
21600 if (use_vec_extr)
21601 break;
21602
21603 switch (elt)
21604 {
21605 case 0:
21606 tmp = vec;
21607 break;
21608
21609 case 1:
21610 case 3:
21611 tmp = gen_reg_rtx (mode);
21612 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
21613 GEN_INT (elt), GEN_INT (elt),
21614 GEN_INT (elt+4), GEN_INT (elt+4)));
21615 break;
21616
21617 case 2:
21618 tmp = gen_reg_rtx (mode);
21619 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
21620 break;
21621
21622 default:
21623 gcc_unreachable ();
21624 }
21625 vec = tmp;
21626 use_vec_extr = true;
21627 elt = 0;
21628 break;
21629
21630 case V4SImode:
21631 use_vec_extr = TARGET_SSE4_1;
21632 if (use_vec_extr)
21633 break;
21634
21635 if (TARGET_SSE2)
21636 {
21637 switch (elt)
21638 {
21639 case 0:
21640 tmp = vec;
21641 break;
21642
21643 case 1:
21644 case 3:
21645 tmp = gen_reg_rtx (mode);
21646 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21647 GEN_INT (elt), GEN_INT (elt),
21648 GEN_INT (elt), GEN_INT (elt)));
21649 break;
21650
21651 case 2:
21652 tmp = gen_reg_rtx (mode);
21653 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21654 break;
21655
21656 default:
21657 gcc_unreachable ();
21658 }
21659 vec = tmp;
21660 use_vec_extr = true;
21661 elt = 0;
21662 }
21663 else
21664 {
21665 /* For SSE1, we have to reuse the V4SF code. */
21666 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21667 gen_lowpart (V4SFmode, vec), elt);
21668 return;
21669 }
21670 break;
21671
21672 case V8HImode:
21673 use_vec_extr = TARGET_SSE2;
21674 break;
21675 case V4HImode:
21676 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21677 break;
21678
21679 case V16QImode:
21680 use_vec_extr = TARGET_SSE4_1;
21681 break;
21682
21683 case V8QImode:
21684 /* ??? Could extract the appropriate HImode element and shift. */
21685 default:
21686 break;
21687 }
21688
21689 if (use_vec_extr)
21690 {
21691 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21692 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21693
21694 /* Let the rtl optimizers know about the zero extension performed. */
21695 if (inner_mode == QImode || inner_mode == HImode)
21696 {
21697 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21698 target = gen_lowpart (SImode, target);
21699 }
21700
21701 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21702 }
21703 else
21704 {
21705 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21706
21707 emit_move_insn (mem, vec);
21708
21709 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21710 emit_move_insn (target, tmp);
21711 }
21712 }
21713
21714 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21715 pattern to reduce; DEST is the destination; IN is the input vector. */
21716
21717 void
21718 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21719 {
21720 rtx tmp1, tmp2, tmp3;
21721
21722 tmp1 = gen_reg_rtx (V4SFmode);
21723 tmp2 = gen_reg_rtx (V4SFmode);
21724 tmp3 = gen_reg_rtx (V4SFmode);
21725
21726 emit_insn (gen_sse_movhlps (tmp1, in, in));
21727 emit_insn (fn (tmp2, tmp1, in));
21728
21729 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21730 GEN_INT (1), GEN_INT (1),
21731 GEN_INT (1+4), GEN_INT (1+4)));
21732 emit_insn (fn (dest, tmp2, tmp3));
21733 }
21734 \f
21735 /* Target hook for scalar_mode_supported_p. */
21736 static bool
21737 ix86_scalar_mode_supported_p (enum machine_mode mode)
21738 {
21739 if (DECIMAL_FLOAT_MODE_P (mode))
21740 return true;
21741 else if (mode == TFmode)
21742 return TARGET_64BIT;
21743 else
21744 return default_scalar_mode_supported_p (mode);
21745 }
21746
21747 /* Implements target hook vector_mode_supported_p. */
21748 static bool
21749 ix86_vector_mode_supported_p (enum machine_mode mode)
21750 {
21751 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21752 return true;
21753 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21754 return true;
21755 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21756 return true;
21757 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21758 return true;
21759 return false;
21760 }
21761
21762 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21763
21764 We do this in the new i386 backend to maintain source compatibility
21765 with the old cc0-based compiler. */
21766
21767 static tree
21768 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21769 tree inputs ATTRIBUTE_UNUSED,
21770 tree clobbers)
21771 {
21772 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21773 clobbers);
21774 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21775 clobbers);
21776 return clobbers;
21777 }
21778
21779 /* Implements target vector targetm.asm.encode_section_info. This
21780 is not used by netware. */
21781
21782 static void ATTRIBUTE_UNUSED
21783 ix86_encode_section_info (tree decl, rtx rtl, int first)
21784 {
21785 default_encode_section_info (decl, rtl, first);
21786
21787 if (TREE_CODE (decl) == VAR_DECL
21788 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21789 && ix86_in_large_data_p (decl))
21790 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21791 }
21792
21793 /* Worker function for REVERSE_CONDITION. */
21794
21795 enum rtx_code
21796 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21797 {
21798 return (mode != CCFPmode && mode != CCFPUmode
21799 ? reverse_condition (code)
21800 : reverse_condition_maybe_unordered (code));
21801 }
21802
21803 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21804 to OPERANDS[0]. */
21805
21806 const char *
21807 output_387_reg_move (rtx insn, rtx *operands)
21808 {
21809 if (REG_P (operands[0]))
21810 {
21811 if (REG_P (operands[1])
21812 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21813 {
21814 if (REGNO (operands[0]) == FIRST_STACK_REG)
21815 return output_387_ffreep (operands, 0);
21816 return "fstp\t%y0";
21817 }
21818 if (STACK_TOP_P (operands[0]))
21819 return "fld%z1\t%y1";
21820 return "fst\t%y0";
21821 }
21822 else if (MEM_P (operands[0]))
21823 {
21824 gcc_assert (REG_P (operands[1]));
21825 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21826 return "fstp%z0\t%y0";
21827 else
21828 {
21829 /* There is no non-popping store to memory for XFmode.
21830 So if we need one, follow the store with a load. */
21831 if (GET_MODE (operands[0]) == XFmode)
21832 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21833 else
21834 return "fst%z0\t%y0";
21835 }
21836 }
21837 else
21838 gcc_unreachable();
21839 }
21840
21841 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21842 FP status register is set. */
21843
21844 void
21845 ix86_emit_fp_unordered_jump (rtx label)
21846 {
21847 rtx reg = gen_reg_rtx (HImode);
21848 rtx temp;
21849
21850 emit_insn (gen_x86_fnstsw_1 (reg));
21851
21852 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21853 {
21854 emit_insn (gen_x86_sahf_1 (reg));
21855
21856 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21857 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21858 }
21859 else
21860 {
21861 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21862
21863 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21864 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21865 }
21866
21867 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21868 gen_rtx_LABEL_REF (VOIDmode, label),
21869 pc_rtx);
21870 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21871
21872 emit_jump_insn (temp);
21873 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21874 }
21875
21876 /* Output code to perform a log1p XFmode calculation. */
21877
21878 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21879 {
21880 rtx label1 = gen_label_rtx ();
21881 rtx label2 = gen_label_rtx ();
21882
21883 rtx tmp = gen_reg_rtx (XFmode);
21884 rtx tmp2 = gen_reg_rtx (XFmode);
21885
21886 emit_insn (gen_absxf2 (tmp, op1));
21887 emit_insn (gen_cmpxf (tmp,
21888 CONST_DOUBLE_FROM_REAL_VALUE (
21889 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21890 XFmode)));
21891 emit_jump_insn (gen_bge (label1));
21892
21893 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21894 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21895 emit_jump (label2);
21896
21897 emit_label (label1);
21898 emit_move_insn (tmp, CONST1_RTX (XFmode));
21899 emit_insn (gen_addxf3 (tmp, op1, tmp));
21900 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21901 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21902
21903 emit_label (label2);
21904 }
21905
21906 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21907
21908 static void ATTRIBUTE_UNUSED
21909 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21910 tree decl)
21911 {
21912 /* With Binutils 2.15, the "@unwind" marker must be specified on
21913 every occurrence of the ".eh_frame" section, not just the first
21914 one. */
21915 if (TARGET_64BIT
21916 && strcmp (name, ".eh_frame") == 0)
21917 {
21918 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21919 flags & SECTION_WRITE ? "aw" : "a");
21920 return;
21921 }
21922 default_elf_asm_named_section (name, flags, decl);
21923 }
21924
21925 /* Return the mangling of TYPE if it is an extended fundamental type. */
21926
21927 static const char *
21928 ix86_mangle_fundamental_type (tree type)
21929 {
21930 switch (TYPE_MODE (type))
21931 {
21932 case TFmode:
21933 /* __float128 is "g". */
21934 return "g";
21935 case XFmode:
21936 /* "long double" or __float80 is "e". */
21937 return "e";
21938 default:
21939 return NULL;
21940 }
21941 }
21942
21943 /* For 32-bit code we can save PIC register setup by using
21944 __stack_chk_fail_local hidden function instead of calling
21945 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21946 register, so it is better to call __stack_chk_fail directly. */
21947
21948 static tree
21949 ix86_stack_protect_fail (void)
21950 {
21951 return TARGET_64BIT
21952 ? default_external_stack_protect_fail ()
21953 : default_hidden_stack_protect_fail ();
21954 }
21955
21956 /* Select a format to encode pointers in exception handling data. CODE
21957 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21958 true if the symbol may be affected by dynamic relocations.
21959
21960 ??? All x86 object file formats are capable of representing this.
21961 After all, the relocation needed is the same as for the call insn.
21962 Whether or not a particular assembler allows us to enter such, I
21963 guess we'll have to see. */
21964 int
21965 asm_preferred_eh_data_format (int code, int global)
21966 {
21967 if (flag_pic)
21968 {
21969 int type = DW_EH_PE_sdata8;
21970 if (!TARGET_64BIT
21971 || ix86_cmodel == CM_SMALL_PIC
21972 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21973 type = DW_EH_PE_sdata4;
21974 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21975 }
21976 if (ix86_cmodel == CM_SMALL
21977 || (ix86_cmodel == CM_MEDIUM && code))
21978 return DW_EH_PE_udata4;
21979 return DW_EH_PE_absptr;
21980 }
21981 \f
21982 /* Expand copysign from SIGN to the positive value ABS_VALUE
21983 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21984 the sign-bit. */
21985 static void
21986 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21987 {
21988 enum machine_mode mode = GET_MODE (sign);
21989 rtx sgn = gen_reg_rtx (mode);
21990 if (mask == NULL_RTX)
21991 {
21992 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21993 if (!VECTOR_MODE_P (mode))
21994 {
21995 /* We need to generate a scalar mode mask in this case. */
21996 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21997 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21998 mask = gen_reg_rtx (mode);
21999 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22000 }
22001 }
22002 else
22003 mask = gen_rtx_NOT (mode, mask);
22004 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22005 gen_rtx_AND (mode, mask, sign)));
22006 emit_insn (gen_rtx_SET (VOIDmode, result,
22007 gen_rtx_IOR (mode, abs_value, sgn)));
22008 }
22009
22010 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22011 mask for masking out the sign-bit is stored in *SMASK, if that is
22012 non-null. */
22013 static rtx
22014 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22015 {
22016 enum machine_mode mode = GET_MODE (op0);
22017 rtx xa, mask;
22018
22019 xa = gen_reg_rtx (mode);
22020 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22021 if (!VECTOR_MODE_P (mode))
22022 {
22023 /* We need to generate a scalar mode mask in this case. */
22024 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22025 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22026 mask = gen_reg_rtx (mode);
22027 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22028 }
22029 emit_insn (gen_rtx_SET (VOIDmode, xa,
22030 gen_rtx_AND (mode, op0, mask)));
22031
22032 if (smask)
22033 *smask = mask;
22034
22035 return xa;
22036 }
22037
22038 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22039 swapping the operands if SWAP_OPERANDS is true. The expanded
22040 code is a forward jump to a newly created label in case the
22041 comparison is true. The generated label rtx is returned. */
22042 static rtx
22043 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22044 bool swap_operands)
22045 {
22046 rtx label, tmp;
22047
22048 if (swap_operands)
22049 {
22050 tmp = op0;
22051 op0 = op1;
22052 op1 = tmp;
22053 }
22054
22055 label = gen_label_rtx ();
22056 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22057 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22058 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22059 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22060 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22061 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22062 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22063 JUMP_LABEL (tmp) = label;
22064
22065 return label;
22066 }
22067
22068 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22069 using comparison code CODE. Operands are swapped for the comparison if
22070 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22071 static rtx
22072 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22073 bool swap_operands)
22074 {
22075 enum machine_mode mode = GET_MODE (op0);
22076 rtx mask = gen_reg_rtx (mode);
22077
22078 if (swap_operands)
22079 {
22080 rtx tmp = op0;
22081 op0 = op1;
22082 op1 = tmp;
22083 }
22084
22085 if (mode == DFmode)
22086 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22087 gen_rtx_fmt_ee (code, mode, op0, op1)));
22088 else
22089 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22090 gen_rtx_fmt_ee (code, mode, op0, op1)));
22091
22092 return mask;
22093 }
22094
22095 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22096 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22097 static rtx
22098 ix86_gen_TWO52 (enum machine_mode mode)
22099 {
22100 REAL_VALUE_TYPE TWO52r;
22101 rtx TWO52;
22102
22103 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22104 TWO52 = const_double_from_real_value (TWO52r, mode);
22105 TWO52 = force_reg (mode, TWO52);
22106
22107 return TWO52;
22108 }
22109
22110 /* Expand SSE sequence for computing lround from OP1 storing
22111 into OP0. */
22112 void
22113 ix86_expand_lround (rtx op0, rtx op1)
22114 {
22115 /* C code for the stuff we're doing below:
22116 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22117 return (long)tmp;
22118 */
22119 enum machine_mode mode = GET_MODE (op1);
22120 const struct real_format *fmt;
22121 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22122 rtx adj;
22123
22124 /* load nextafter (0.5, 0.0) */
22125 fmt = REAL_MODE_FORMAT (mode);
22126 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22127 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22128
22129 /* adj = copysign (0.5, op1) */
22130 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22131 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22132
22133 /* adj = op1 + adj */
22134 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22135
22136 /* op0 = (imode)adj */
22137 expand_fix (op0, adj, 0);
22138 }
22139
22140 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22141 into OPERAND0. */
22142 void
22143 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22144 {
22145 /* C code for the stuff we're doing below (for do_floor):
22146 xi = (long)op1;
22147 xi -= (double)xi > op1 ? 1 : 0;
22148 return xi;
22149 */
22150 enum machine_mode fmode = GET_MODE (op1);
22151 enum machine_mode imode = GET_MODE (op0);
22152 rtx ireg, freg, label, tmp;
22153
22154 /* reg = (long)op1 */
22155 ireg = gen_reg_rtx (imode);
22156 expand_fix (ireg, op1, 0);
22157
22158 /* freg = (double)reg */
22159 freg = gen_reg_rtx (fmode);
22160 expand_float (freg, ireg, 0);
22161
22162 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22163 label = ix86_expand_sse_compare_and_jump (UNLE,
22164 freg, op1, !do_floor);
22165 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22166 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22167 emit_move_insn (ireg, tmp);
22168
22169 emit_label (label);
22170 LABEL_NUSES (label) = 1;
22171
22172 emit_move_insn (op0, ireg);
22173 }
22174
22175 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22176 result in OPERAND0. */
22177 void
22178 ix86_expand_rint (rtx operand0, rtx operand1)
22179 {
22180 /* C code for the stuff we're doing below:
22181 xa = fabs (operand1);
22182 if (!isless (xa, 2**52))
22183 return operand1;
22184 xa = xa + 2**52 - 2**52;
22185 return copysign (xa, operand1);
22186 */
22187 enum machine_mode mode = GET_MODE (operand0);
22188 rtx res, xa, label, TWO52, mask;
22189
22190 res = gen_reg_rtx (mode);
22191 emit_move_insn (res, operand1);
22192
22193 /* xa = abs (operand1) */
22194 xa = ix86_expand_sse_fabs (res, &mask);
22195
22196 /* if (!isless (xa, TWO52)) goto label; */
22197 TWO52 = ix86_gen_TWO52 (mode);
22198 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22199
22200 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22201 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22202
22203 ix86_sse_copysign_to_positive (res, xa, res, mask);
22204
22205 emit_label (label);
22206 LABEL_NUSES (label) = 1;
22207
22208 emit_move_insn (operand0, res);
22209 }
22210
22211 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22212 into OPERAND0. */
22213 void
22214 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22215 {
22216 /* C code for the stuff we expand below.
22217 double xa = fabs (x), x2;
22218 if (!isless (xa, TWO52))
22219 return x;
22220 xa = xa + TWO52 - TWO52;
22221 x2 = copysign (xa, x);
22222 Compensate. Floor:
22223 if (x2 > x)
22224 x2 -= 1;
22225 Compensate. Ceil:
22226 if (x2 < x)
22227 x2 -= -1;
22228 return x2;
22229 */
22230 enum machine_mode mode = GET_MODE (operand0);
22231 rtx xa, TWO52, tmp, label, one, res, mask;
22232
22233 TWO52 = ix86_gen_TWO52 (mode);
22234
22235 /* Temporary for holding the result, initialized to the input
22236 operand to ease control flow. */
22237 res = gen_reg_rtx (mode);
22238 emit_move_insn (res, operand1);
22239
22240 /* xa = abs (operand1) */
22241 xa = ix86_expand_sse_fabs (res, &mask);
22242
22243 /* if (!isless (xa, TWO52)) goto label; */
22244 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22245
22246 /* xa = xa + TWO52 - TWO52; */
22247 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22248 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22249
22250 /* xa = copysign (xa, operand1) */
22251 ix86_sse_copysign_to_positive (xa, xa, res, mask);
22252
22253 /* generate 1.0 or -1.0 */
22254 one = force_reg (mode,
22255 const_double_from_real_value (do_floor
22256 ? dconst1 : dconstm1, mode));
22257
22258 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22259 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22260 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22261 gen_rtx_AND (mode, one, tmp)));
22262 /* We always need to subtract here to preserve signed zero. */
22263 tmp = expand_simple_binop (mode, MINUS,
22264 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22265 emit_move_insn (res, tmp);
22266
22267 emit_label (label);
22268 LABEL_NUSES (label) = 1;
22269
22270 emit_move_insn (operand0, res);
22271 }
22272
22273 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22274 into OPERAND0. */
22275 void
22276 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
22277 {
22278 /* C code for the stuff we expand below.
22279 double xa = fabs (x), x2;
22280 if (!isless (xa, TWO52))
22281 return x;
22282 x2 = (double)(long)x;
22283 Compensate. Floor:
22284 if (x2 > x)
22285 x2 -= 1;
22286 Compensate. Ceil:
22287 if (x2 < x)
22288 x2 += 1;
22289 if (HONOR_SIGNED_ZEROS (mode))
22290 return copysign (x2, x);
22291 return x2;
22292 */
22293 enum machine_mode mode = GET_MODE (operand0);
22294 rtx xa, xi, TWO52, tmp, label, one, res, mask;
22295
22296 TWO52 = ix86_gen_TWO52 (mode);
22297
22298 /* Temporary for holding the result, initialized to the input
22299 operand to ease control flow. */
22300 res = gen_reg_rtx (mode);
22301 emit_move_insn (res, operand1);
22302
22303 /* xa = abs (operand1) */
22304 xa = ix86_expand_sse_fabs (res, &mask);
22305
22306 /* if (!isless (xa, TWO52)) goto label; */
22307 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22308
22309 /* xa = (double)(long)x */
22310 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22311 expand_fix (xi, res, 0);
22312 expand_float (xa, xi, 0);
22313
22314 /* generate 1.0 */
22315 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22316
22317 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22318 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22319 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22320 gen_rtx_AND (mode, one, tmp)));
22321 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
22322 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22323 emit_move_insn (res, tmp);
22324
22325 if (HONOR_SIGNED_ZEROS (mode))
22326 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22327
22328 emit_label (label);
22329 LABEL_NUSES (label) = 1;
22330
22331 emit_move_insn (operand0, res);
22332 }
22333
22334 /* Expand SSE sequence for computing round from OPERAND1 storing
22335 into OPERAND0. Sequence that works without relying on DImode truncation
22336 via cvttsd2siq that is only available on 64bit targets. */
22337 void
22338 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
22339 {
22340 /* C code for the stuff we expand below.
22341 double xa = fabs (x), xa2, x2;
22342 if (!isless (xa, TWO52))
22343 return x;
22344 Using the absolute value and copying back sign makes
22345 -0.0 -> -0.0 correct.
22346 xa2 = xa + TWO52 - TWO52;
22347 Compensate.
22348 dxa = xa2 - xa;
22349 if (dxa <= -0.5)
22350 xa2 += 1;
22351 else if (dxa > 0.5)
22352 xa2 -= 1;
22353 x2 = copysign (xa2, x);
22354 return x2;
22355 */
22356 enum machine_mode mode = GET_MODE (operand0);
22357 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
22358
22359 TWO52 = ix86_gen_TWO52 (mode);
22360
22361 /* Temporary for holding the result, initialized to the input
22362 operand to ease control flow. */
22363 res = gen_reg_rtx (mode);
22364 emit_move_insn (res, operand1);
22365
22366 /* xa = abs (operand1) */
22367 xa = ix86_expand_sse_fabs (res, &mask);
22368
22369 /* if (!isless (xa, TWO52)) goto label; */
22370 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22371
22372 /* xa2 = xa + TWO52 - TWO52; */
22373 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22374 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
22375
22376 /* dxa = xa2 - xa; */
22377 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
22378
22379 /* generate 0.5, 1.0 and -0.5 */
22380 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
22381 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
22382 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
22383 0, OPTAB_DIRECT);
22384
22385 /* Compensate. */
22386 tmp = gen_reg_rtx (mode);
22387 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22388 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
22389 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22390 gen_rtx_AND (mode, one, tmp)));
22391 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22392 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22393 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
22394 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22395 gen_rtx_AND (mode, one, tmp)));
22396 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22397
22398 /* res = copysign (xa2, operand1) */
22399 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
22400
22401 emit_label (label);
22402 LABEL_NUSES (label) = 1;
22403
22404 emit_move_insn (operand0, res);
22405 }
22406
22407 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22408 into OPERAND0. */
22409 void
22410 ix86_expand_trunc (rtx operand0, rtx operand1)
22411 {
22412 /* C code for SSE variant we expand below.
22413 double xa = fabs (x), x2;
22414 if (!isless (xa, TWO52))
22415 return x;
22416 x2 = (double)(long)x;
22417 if (HONOR_SIGNED_ZEROS (mode))
22418 return copysign (x2, x);
22419 return x2;
22420 */
22421 enum machine_mode mode = GET_MODE (operand0);
22422 rtx xa, xi, TWO52, label, res, mask;
22423
22424 TWO52 = ix86_gen_TWO52 (mode);
22425
22426 /* Temporary for holding the result, initialized to the input
22427 operand to ease control flow. */
22428 res = gen_reg_rtx (mode);
22429 emit_move_insn (res, operand1);
22430
22431 /* xa = abs (operand1) */
22432 xa = ix86_expand_sse_fabs (res, &mask);
22433
22434 /* if (!isless (xa, TWO52)) goto label; */
22435 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22436
22437 /* x = (double)(long)x */
22438 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22439 expand_fix (xi, res, 0);
22440 expand_float (res, xi, 0);
22441
22442 if (HONOR_SIGNED_ZEROS (mode))
22443 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22444
22445 emit_label (label);
22446 LABEL_NUSES (label) = 1;
22447
22448 emit_move_insn (operand0, res);
22449 }
22450
22451 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22452 into OPERAND0. */
22453 void
22454 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
22455 {
22456 enum machine_mode mode = GET_MODE (operand0);
22457 rtx xa, mask, TWO52, label, one, res, smask, tmp;
22458
22459 /* C code for SSE variant we expand below.
22460 double xa = fabs (x), x2;
22461 if (!isless (xa, TWO52))
22462 return x;
22463 xa2 = xa + TWO52 - TWO52;
22464 Compensate:
22465 if (xa2 > xa)
22466 xa2 -= 1.0;
22467 x2 = copysign (xa2, x);
22468 return x2;
22469 */
22470
22471 TWO52 = ix86_gen_TWO52 (mode);
22472
22473 /* Temporary for holding the result, initialized to the input
22474 operand to ease control flow. */
22475 res = gen_reg_rtx (mode);
22476 emit_move_insn (res, operand1);
22477
22478 /* xa = abs (operand1) */
22479 xa = ix86_expand_sse_fabs (res, &smask);
22480
22481 /* if (!isless (xa, TWO52)) goto label; */
22482 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22483
22484 /* res = xa + TWO52 - TWO52; */
22485 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22486 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
22487 emit_move_insn (res, tmp);
22488
22489 /* generate 1.0 */
22490 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22491
22492 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
22493 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
22494 emit_insn (gen_rtx_SET (VOIDmode, mask,
22495 gen_rtx_AND (mode, mask, one)));
22496 tmp = expand_simple_binop (mode, MINUS,
22497 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
22498 emit_move_insn (res, tmp);
22499
22500 /* res = copysign (res, operand1) */
22501 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
22502
22503 emit_label (label);
22504 LABEL_NUSES (label) = 1;
22505
22506 emit_move_insn (operand0, res);
22507 }
22508
22509 /* Expand SSE sequence for computing round from OPERAND1 storing
22510 into OPERAND0. */
22511 void
22512 ix86_expand_round (rtx operand0, rtx operand1)
22513 {
22514 /* C code for the stuff we're doing below:
22515 double xa = fabs (x);
22516 if (!isless (xa, TWO52))
22517 return x;
22518 xa = (double)(long)(xa + nextafter (0.5, 0.0));
22519 return copysign (xa, x);
22520 */
22521 enum machine_mode mode = GET_MODE (operand0);
22522 rtx res, TWO52, xa, label, xi, half, mask;
22523 const struct real_format *fmt;
22524 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22525
22526 /* Temporary for holding the result, initialized to the input
22527 operand to ease control flow. */
22528 res = gen_reg_rtx (mode);
22529 emit_move_insn (res, operand1);
22530
22531 TWO52 = ix86_gen_TWO52 (mode);
22532 xa = ix86_expand_sse_fabs (res, &mask);
22533 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22534
22535 /* load nextafter (0.5, 0.0) */
22536 fmt = REAL_MODE_FORMAT (mode);
22537 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22538 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22539
22540 /* xa = xa + 0.5 */
22541 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
22542 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
22543
22544 /* xa = (double)(int64_t)xa */
22545 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22546 expand_fix (xi, xa, 0);
22547 expand_float (xa, xi, 0);
22548
22549 /* res = copysign (xa, operand1) */
22550 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
22551
22552 emit_label (label);
22553 LABEL_NUSES (label) = 1;
22554
22555 emit_move_insn (operand0, res);
22556 }
22557
22558 \f
22559 /* Table of valid machine attributes. */
22560 static const struct attribute_spec ix86_attribute_table[] =
22561 {
22562 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
22563 /* Stdcall attribute says callee is responsible for popping arguments
22564 if they are not variable. */
22565 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22566 /* Fastcall attribute says callee is responsible for popping arguments
22567 if they are not variable. */
22568 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22569 /* Cdecl attribute says the callee is a normal C declaration */
22570 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22571 /* Regparm attribute specifies how many integer arguments are to be
22572 passed in registers. */
22573 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
22574 /* Sseregparm attribute says we are using x86_64 calling conventions
22575 for FP arguments. */
22576 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22577 /* force_align_arg_pointer says this function realigns the stack at entry. */
22578 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
22579 false, true, true, ix86_handle_cconv_attribute },
22580 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22581 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
22582 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
22583 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
22584 #endif
22585 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22586 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22587 #ifdef SUBTARGET_ATTRIBUTE_TABLE
22588 SUBTARGET_ATTRIBUTE_TABLE,
22589 #endif
22590 { NULL, 0, 0, false, false, false, NULL }
22591 };
22592
22593 /* Initialize the GCC target structure. */
22594 #undef TARGET_ATTRIBUTE_TABLE
22595 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22596 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22597 # undef TARGET_MERGE_DECL_ATTRIBUTES
22598 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22599 #endif
22600
22601 #undef TARGET_COMP_TYPE_ATTRIBUTES
22602 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22603
22604 #undef TARGET_INIT_BUILTINS
22605 #define TARGET_INIT_BUILTINS ix86_init_builtins
22606 #undef TARGET_EXPAND_BUILTIN
22607 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22608
22609 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22610 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
22611 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
22612 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
22613
22614 #undef TARGET_ASM_FUNCTION_EPILOGUE
22615 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22616
22617 #undef TARGET_ENCODE_SECTION_INFO
22618 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22619 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22620 #else
22621 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22622 #endif
22623
22624 #undef TARGET_ASM_OPEN_PAREN
22625 #define TARGET_ASM_OPEN_PAREN ""
22626 #undef TARGET_ASM_CLOSE_PAREN
22627 #define TARGET_ASM_CLOSE_PAREN ""
22628
22629 #undef TARGET_ASM_ALIGNED_HI_OP
22630 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22631 #undef TARGET_ASM_ALIGNED_SI_OP
22632 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22633 #ifdef ASM_QUAD
22634 #undef TARGET_ASM_ALIGNED_DI_OP
22635 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22636 #endif
22637
22638 #undef TARGET_ASM_UNALIGNED_HI_OP
22639 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22640 #undef TARGET_ASM_UNALIGNED_SI_OP
22641 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22642 #undef TARGET_ASM_UNALIGNED_DI_OP
22643 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22644
22645 #undef TARGET_SCHED_ADJUST_COST
22646 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22647 #undef TARGET_SCHED_ISSUE_RATE
22648 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22649 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22650 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22651 ia32_multipass_dfa_lookahead
22652
22653 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22654 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22655
22656 #ifdef HAVE_AS_TLS
22657 #undef TARGET_HAVE_TLS
22658 #define TARGET_HAVE_TLS true
22659 #endif
22660 #undef TARGET_CANNOT_FORCE_CONST_MEM
22661 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22662 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22663 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22664
22665 #undef TARGET_DELEGITIMIZE_ADDRESS
22666 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22667
22668 #undef TARGET_MS_BITFIELD_LAYOUT_P
22669 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22670
22671 #if TARGET_MACHO
22672 #undef TARGET_BINDS_LOCAL_P
22673 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22674 #endif
22675 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22676 #undef TARGET_BINDS_LOCAL_P
22677 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22678 #endif
22679
22680 #undef TARGET_ASM_OUTPUT_MI_THUNK
22681 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22682 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22683 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22684
22685 #undef TARGET_ASM_FILE_START
22686 #define TARGET_ASM_FILE_START x86_file_start
22687
22688 #undef TARGET_DEFAULT_TARGET_FLAGS
22689 #define TARGET_DEFAULT_TARGET_FLAGS \
22690 (TARGET_DEFAULT \
22691 | TARGET_SUBTARGET_DEFAULT \
22692 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22693
22694 #undef TARGET_HANDLE_OPTION
22695 #define TARGET_HANDLE_OPTION ix86_handle_option
22696
22697 #undef TARGET_RTX_COSTS
22698 #define TARGET_RTX_COSTS ix86_rtx_costs
22699 #undef TARGET_ADDRESS_COST
22700 #define TARGET_ADDRESS_COST ix86_address_cost
22701
22702 #undef TARGET_FIXED_CONDITION_CODE_REGS
22703 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22704 #undef TARGET_CC_MODES_COMPATIBLE
22705 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22706
22707 #undef TARGET_MACHINE_DEPENDENT_REORG
22708 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22709
22710 #undef TARGET_BUILD_BUILTIN_VA_LIST
22711 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22712
22713 #undef TARGET_MD_ASM_CLOBBERS
22714 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22715
22716 #undef TARGET_PROMOTE_PROTOTYPES
22717 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22718 #undef TARGET_STRUCT_VALUE_RTX
22719 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22720 #undef TARGET_SETUP_INCOMING_VARARGS
22721 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22722 #undef TARGET_MUST_PASS_IN_STACK
22723 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22724 #undef TARGET_PASS_BY_REFERENCE
22725 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22726 #undef TARGET_INTERNAL_ARG_POINTER
22727 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22728 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22729 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22730 #undef TARGET_STRICT_ARGUMENT_NAMING
22731 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22732
22733 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22734 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22735
22736 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22737 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22738
22739 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22740 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22741
22742 #ifdef HAVE_AS_TLS
22743 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22744 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22745 #endif
22746
22747 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22748 #undef TARGET_INSERT_ATTRIBUTES
22749 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22750 #endif
22751
22752 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22753 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22754
22755 #undef TARGET_STACK_PROTECT_FAIL
22756 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22757
22758 #undef TARGET_FUNCTION_VALUE
22759 #define TARGET_FUNCTION_VALUE ix86_function_value
22760
22761 struct gcc_target targetm = TARGET_INITIALIZER;
22762 \f
22763 #include "gt-i386.h"