reg-stack.c (reg_to_stack): Large models don't allow NAN to be loaded for constant...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32,
1043 /* | m_GENERIC | m_ATHLON_K8 ? */
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1196 };
1197
1198 /* Feature tests against the various architecture variations. */
1199 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1200 /* X86_ARCH_CMOVE */
1201 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1202
1203 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1204 ~m_386,
1205
1206 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1207 ~(m_386 | m_486),
1208
1209 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1210 ~m_386,
1211
1212 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1213 ~m_386,
1214 };
1215
1216 static const unsigned int x86_accumulate_outgoing_args
1217 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1218
1219 static const unsigned int x86_arch_always_fancy_math_387
1220 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1221 | m_NOCONA | m_CORE2 | m_GENERIC;
1222
1223 static enum stringop_alg stringop_alg = no_stringop;
1224
1225 /* In case the average insn count for single function invocation is
1226 lower than this constant, emit fast (but longer) prologue and
1227 epilogue code. */
1228 #define FAST_PROLOGUE_INSN_COUNT 20
1229
1230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1231 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1232 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1233 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1234
1235 /* Array of the smallest class containing reg number REGNO, indexed by
1236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1237
1238 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1239 {
1240 /* ax, dx, cx, bx */
1241 AREG, DREG, CREG, BREG,
1242 /* si, di, bp, sp */
1243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1244 /* FP registers */
1245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1247 /* arg pointer */
1248 NON_Q_REGS,
1249 /* flags, fpsr, fpcr, frame */
1250 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1252 SSE_REGS, SSE_REGS,
1253 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1254 MMX_REGS, MMX_REGS,
1255 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1256 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1257 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1258 SSE_REGS, SSE_REGS,
1259 };
1260
1261 /* The "default" register map used in 32bit mode. */
1262
1263 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1264 {
1265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1272 };
1273
1274 static int const x86_64_int_parameter_registers[6] =
1275 {
1276 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1277 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1278 };
1279
1280 static int const x86_64_int_return_registers[4] =
1281 {
1282 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1283 };
1284
1285 /* The "default" register map used in 64bit mode. */
1286 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1287 {
1288 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1289 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1290 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1291 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1292 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1293 8,9,10,11,12,13,14,15, /* extended integer registers */
1294 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1295 };
1296
1297 /* Define the register numbers to be used in Dwarf debugging information.
1298 The SVR4 reference port C compiler uses the following register numbers
1299 in its Dwarf output code:
1300 0 for %eax (gcc regno = 0)
1301 1 for %ecx (gcc regno = 2)
1302 2 for %edx (gcc regno = 1)
1303 3 for %ebx (gcc regno = 3)
1304 4 for %esp (gcc regno = 7)
1305 5 for %ebp (gcc regno = 6)
1306 6 for %esi (gcc regno = 4)
1307 7 for %edi (gcc regno = 5)
1308 The following three DWARF register numbers are never generated by
1309 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1310 believes these numbers have these meanings.
1311 8 for %eip (no gcc equivalent)
1312 9 for %eflags (gcc regno = 17)
1313 10 for %trapno (no gcc equivalent)
1314 It is not at all clear how we should number the FP stack registers
1315 for the x86 architecture. If the version of SDB on x86/svr4 were
1316 a bit less brain dead with respect to floating-point then we would
1317 have a precedent to follow with respect to DWARF register numbers
1318 for x86 FP registers, but the SDB on x86/svr4 is so completely
1319 broken with respect to FP registers that it is hardly worth thinking
1320 of it as something to strive for compatibility with.
1321 The version of x86/svr4 SDB I have at the moment does (partially)
1322 seem to believe that DWARF register number 11 is associated with
1323 the x86 register %st(0), but that's about all. Higher DWARF
1324 register numbers don't seem to be associated with anything in
1325 particular, and even for DWARF regno 11, SDB only seems to under-
1326 stand that it should say that a variable lives in %st(0) (when
1327 asked via an `=' command) if we said it was in DWARF regno 11,
1328 but SDB still prints garbage when asked for the value of the
1329 variable in question (via a `/' command).
1330 (Also note that the labels SDB prints for various FP stack regs
1331 when doing an `x' command are all wrong.)
1332 Note that these problems generally don't affect the native SVR4
1333 C compiler because it doesn't allow the use of -O with -g and
1334 because when it is *not* optimizing, it allocates a memory
1335 location for each floating-point variable, and the memory
1336 location is what gets described in the DWARF AT_location
1337 attribute for the variable in question.
1338 Regardless of the severe mental illness of the x86/svr4 SDB, we
1339 do something sensible here and we use the following DWARF
1340 register numbers. Note that these are all stack-top-relative
1341 numbers.
1342 11 for %st(0) (gcc regno = 8)
1343 12 for %st(1) (gcc regno = 9)
1344 13 for %st(2) (gcc regno = 10)
1345 14 for %st(3) (gcc regno = 11)
1346 15 for %st(4) (gcc regno = 12)
1347 16 for %st(5) (gcc regno = 13)
1348 17 for %st(6) (gcc regno = 14)
1349 18 for %st(7) (gcc regno = 15)
1350 */
1351 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1352 {
1353 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1354 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1355 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1360 };
1361
1362 /* Test and compare insns in i386.md store the information needed to
1363 generate branch and scc insns here. */
1364
1365 rtx ix86_compare_op0 = NULL_RTX;
1366 rtx ix86_compare_op1 = NULL_RTX;
1367 rtx ix86_compare_emitted = NULL_RTX;
1368
1369 /* Size of the register save area. */
1370 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1371
1372 /* Define the structure for the machine field in struct function. */
1373
1374 struct stack_local_entry GTY(())
1375 {
1376 unsigned short mode;
1377 unsigned short n;
1378 rtx rtl;
1379 struct stack_local_entry *next;
1380 };
1381
1382 /* Structure describing stack frame layout.
1383 Stack grows downward:
1384
1385 [arguments]
1386 <- ARG_POINTER
1387 saved pc
1388
1389 saved frame pointer if frame_pointer_needed
1390 <- HARD_FRAME_POINTER
1391 [saved regs]
1392
1393 [padding1] \
1394 )
1395 [va_arg registers] (
1396 > to_allocate <- FRAME_POINTER
1397 [frame] (
1398 )
1399 [padding2] /
1400 */
1401 struct ix86_frame
1402 {
1403 int nregs;
1404 int padding1;
1405 int va_arg_size;
1406 HOST_WIDE_INT frame;
1407 int padding2;
1408 int outgoing_arguments_size;
1409 int red_zone_size;
1410
1411 HOST_WIDE_INT to_allocate;
1412 /* The offsets relative to ARG_POINTER. */
1413 HOST_WIDE_INT frame_pointer_offset;
1414 HOST_WIDE_INT hard_frame_pointer_offset;
1415 HOST_WIDE_INT stack_pointer_offset;
1416
1417 /* When save_regs_using_mov is set, emit prologue using
1418 move instead of push instructions. */
1419 bool save_regs_using_mov;
1420 };
1421
1422 /* Code model option. */
1423 enum cmodel ix86_cmodel;
1424 /* Asm dialect. */
1425 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1426 /* TLS dialects. */
1427 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1428
1429 /* Which unit we are generating floating point math for. */
1430 enum fpmath_unit ix86_fpmath;
1431
1432 /* Which cpu are we scheduling for. */
1433 enum processor_type ix86_tune;
1434
1435 /* Which instruction set architecture to use. */
1436 enum processor_type ix86_arch;
1437
1438 /* true if sse prefetch instruction is not NOOP. */
1439 int x86_prefetch_sse;
1440
1441 /* true if cmpxchg16b is supported. */
1442 int x86_cmpxchg16b;
1443
1444 /* ix86_regparm_string as a number */
1445 static int ix86_regparm;
1446
1447 /* -mstackrealign option */
1448 extern int ix86_force_align_arg_pointer;
1449 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1450
1451 /* Preferred alignment for stack boundary in bits. */
1452 unsigned int ix86_preferred_stack_boundary;
1453
1454 /* Values 1-5: see jump.c */
1455 int ix86_branch_cost;
1456
1457 /* Variables which are this size or smaller are put in the data/bss
1458 or ldata/lbss sections. */
1459
1460 int ix86_section_threshold = 65536;
1461
1462 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1463 char internal_label_prefix[16];
1464 int internal_label_prefix_len;
1465 \f
1466 static bool ix86_handle_option (size_t, const char *, int);
1467 static void output_pic_addr_const (FILE *, rtx, int);
1468 static void put_condition_code (enum rtx_code, enum machine_mode,
1469 int, int, FILE *);
1470 static const char *get_some_local_dynamic_name (void);
1471 static int get_some_local_dynamic_name_1 (rtx *, void *);
1472 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1473 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1474 rtx *);
1475 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1476 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1477 enum machine_mode);
1478 static rtx get_thread_pointer (int);
1479 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1480 static void get_pc_thunk_name (char [32], unsigned int);
1481 static rtx gen_push (rtx);
1482 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1483 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1484 static struct machine_function * ix86_init_machine_status (void);
1485 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1486 static int ix86_nsaved_regs (void);
1487 static void ix86_emit_save_regs (void);
1488 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1489 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1490 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1491 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1492 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1493 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1494 static int ix86_issue_rate (void);
1495 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1496 static int ia32_multipass_dfa_lookahead (void);
1497 static void ix86_init_mmx_sse_builtins (void);
1498 static rtx x86_this_parameter (tree);
1499 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1500 HOST_WIDE_INT, tree);
1501 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1502 static void x86_file_start (void);
1503 static void ix86_reorg (void);
1504 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1505 static tree ix86_build_builtin_va_list (void);
1506 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1507 tree, int *, int);
1508 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1509 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1510 static bool ix86_vector_mode_supported_p (enum machine_mode);
1511
1512 static int ix86_address_cost (rtx);
1513 static bool ix86_cannot_force_const_mem (rtx);
1514 static rtx ix86_delegitimize_address (rtx);
1515
1516 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1517
1518 struct builtin_description;
1519 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1520 tree, rtx);
1521 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1522 tree, rtx);
1523 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1524 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1525 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1526 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1527 static rtx safe_vector_operand (rtx, enum machine_mode);
1528 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1529 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1530 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1531 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1532 static int ix86_fp_comparison_cost (enum rtx_code code);
1533 static unsigned int ix86_select_alt_pic_regnum (void);
1534 static int ix86_save_reg (unsigned int, int);
1535 static void ix86_compute_frame_layout (struct ix86_frame *);
1536 static int ix86_comp_type_attributes (tree, tree);
1537 static int ix86_function_regparm (tree, tree);
1538 const struct attribute_spec ix86_attribute_table[];
1539 static bool ix86_function_ok_for_sibcall (tree, tree);
1540 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1541 static int ix86_value_regno (enum machine_mode, tree, tree);
1542 static bool contains_128bit_aligned_vector_p (tree);
1543 static rtx ix86_struct_value_rtx (tree, int);
1544 static bool ix86_ms_bitfield_layout_p (tree);
1545 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1546 static int extended_reg_mentioned_1 (rtx *, void *);
1547 static bool ix86_rtx_costs (rtx, int, int, int *);
1548 static int min_insn_size (rtx);
1549 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1550 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1551 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1552 tree, bool);
1553 static void ix86_init_builtins (void);
1554 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1555 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1556 static tree ix86_builtin_conversion (enum tree_code, tree);
1557 static const char *ix86_mangle_fundamental_type (tree);
1558 static tree ix86_stack_protect_fail (void);
1559 static rtx ix86_internal_arg_pointer (void);
1560 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1561 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1562 rtx, rtx, int);
1563
1564 /* This function is only used on Solaris. */
1565 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1566 ATTRIBUTE_UNUSED;
1567
1568 /* Register class used for passing given 64bit part of the argument.
1569 These represent classes as documented by the PS ABI, with the exception
1570 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1571 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1572
1573 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1574 whenever possible (upper half does contain padding).
1575 */
1576 enum x86_64_reg_class
1577 {
1578 X86_64_NO_CLASS,
1579 X86_64_INTEGER_CLASS,
1580 X86_64_INTEGERSI_CLASS,
1581 X86_64_SSE_CLASS,
1582 X86_64_SSESF_CLASS,
1583 X86_64_SSEDF_CLASS,
1584 X86_64_SSEUP_CLASS,
1585 X86_64_X87_CLASS,
1586 X86_64_X87UP_CLASS,
1587 X86_64_COMPLEX_X87_CLASS,
1588 X86_64_MEMORY_CLASS
1589 };
1590 static const char * const x86_64_reg_class_name[] = {
1591 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1592 "sseup", "x87", "x87up", "cplx87", "no"
1593 };
1594
1595 #define MAX_CLASSES 4
1596
1597 /* Table of constants used by fldpi, fldln2, etc.... */
1598 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1599 static bool ext_80387_constants_init = 0;
1600 static void init_ext_80387_constants (void);
1601 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1602 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1603 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1604 static section *x86_64_elf_select_section (tree decl, int reloc,
1605 unsigned HOST_WIDE_INT align)
1606 ATTRIBUTE_UNUSED;
1607 \f
1608 /* Initialize the GCC target structure. */
1609 #undef TARGET_ATTRIBUTE_TABLE
1610 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1611 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1612 # undef TARGET_MERGE_DECL_ATTRIBUTES
1613 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1614 #endif
1615
1616 #undef TARGET_COMP_TYPE_ATTRIBUTES
1617 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1618
1619 #undef TARGET_INIT_BUILTINS
1620 #define TARGET_INIT_BUILTINS ix86_init_builtins
1621 #undef TARGET_EXPAND_BUILTIN
1622 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1623
1624 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1625 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1626 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1627 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1628
1629 #undef TARGET_ASM_FUNCTION_EPILOGUE
1630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1631
1632 #undef TARGET_ENCODE_SECTION_INFO
1633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1635 #else
1636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1637 #endif
1638
1639 #undef TARGET_ASM_OPEN_PAREN
1640 #define TARGET_ASM_OPEN_PAREN ""
1641 #undef TARGET_ASM_CLOSE_PAREN
1642 #define TARGET_ASM_CLOSE_PAREN ""
1643
1644 #undef TARGET_ASM_ALIGNED_HI_OP
1645 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1646 #undef TARGET_ASM_ALIGNED_SI_OP
1647 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1648 #ifdef ASM_QUAD
1649 #undef TARGET_ASM_ALIGNED_DI_OP
1650 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1651 #endif
1652
1653 #undef TARGET_ASM_UNALIGNED_HI_OP
1654 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1655 #undef TARGET_ASM_UNALIGNED_SI_OP
1656 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1657 #undef TARGET_ASM_UNALIGNED_DI_OP
1658 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1659
1660 #undef TARGET_SCHED_ADJUST_COST
1661 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1662 #undef TARGET_SCHED_ISSUE_RATE
1663 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1666 ia32_multipass_dfa_lookahead
1667
1668 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1669 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1670
1671 #ifdef HAVE_AS_TLS
1672 #undef TARGET_HAVE_TLS
1673 #define TARGET_HAVE_TLS true
1674 #endif
1675 #undef TARGET_CANNOT_FORCE_CONST_MEM
1676 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1677 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1678 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1679
1680 #undef TARGET_DELEGITIMIZE_ADDRESS
1681 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1682
1683 #undef TARGET_MS_BITFIELD_LAYOUT_P
1684 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1685
1686 #if TARGET_MACHO
1687 #undef TARGET_BINDS_LOCAL_P
1688 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1689 #endif
1690
1691 #undef TARGET_ASM_OUTPUT_MI_THUNK
1692 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1693 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1694 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1695
1696 #undef TARGET_ASM_FILE_START
1697 #define TARGET_ASM_FILE_START x86_file_start
1698
1699 #undef TARGET_DEFAULT_TARGET_FLAGS
1700 #define TARGET_DEFAULT_TARGET_FLAGS \
1701 (TARGET_DEFAULT \
1702 | TARGET_64BIT_DEFAULT \
1703 | TARGET_SUBTARGET_DEFAULT \
1704 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1705
1706 #undef TARGET_HANDLE_OPTION
1707 #define TARGET_HANDLE_OPTION ix86_handle_option
1708
1709 #undef TARGET_RTX_COSTS
1710 #define TARGET_RTX_COSTS ix86_rtx_costs
1711 #undef TARGET_ADDRESS_COST
1712 #define TARGET_ADDRESS_COST ix86_address_cost
1713
1714 #undef TARGET_FIXED_CONDITION_CODE_REGS
1715 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1716 #undef TARGET_CC_MODES_COMPATIBLE
1717 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1718
1719 #undef TARGET_MACHINE_DEPENDENT_REORG
1720 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1721
1722 #undef TARGET_BUILD_BUILTIN_VA_LIST
1723 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1724
1725 #undef TARGET_MD_ASM_CLOBBERS
1726 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1727
1728 #undef TARGET_PROMOTE_PROTOTYPES
1729 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1730 #undef TARGET_STRUCT_VALUE_RTX
1731 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1732 #undef TARGET_SETUP_INCOMING_VARARGS
1733 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1734 #undef TARGET_MUST_PASS_IN_STACK
1735 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1736 #undef TARGET_PASS_BY_REFERENCE
1737 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1738 #undef TARGET_INTERNAL_ARG_POINTER
1739 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1740 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1741 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1742
1743 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1744 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1745
1746 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1747 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1748
1749 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1750 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1751
1752 #ifdef HAVE_AS_TLS
1753 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1754 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1755 #endif
1756
1757 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1758 #undef TARGET_INSERT_ATTRIBUTES
1759 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1760 #endif
1761
1762 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1763 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1764
1765 #undef TARGET_STACK_PROTECT_FAIL
1766 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1767
1768 #undef TARGET_FUNCTION_VALUE
1769 #define TARGET_FUNCTION_VALUE ix86_function_value
1770
1771 struct gcc_target targetm = TARGET_INITIALIZER;
1772
1773 \f
1774 /* The svr4 ABI for the i386 says that records and unions are returned
1775 in memory. */
1776 #ifndef DEFAULT_PCC_STRUCT_RETURN
1777 #define DEFAULT_PCC_STRUCT_RETURN 1
1778 #endif
1779
1780 /* Implement TARGET_HANDLE_OPTION. */
1781
1782 static bool
1783 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1784 {
1785 switch (code)
1786 {
1787 case OPT_m3dnow:
1788 if (!value)
1789 {
1790 target_flags &= ~MASK_3DNOW_A;
1791 target_flags_explicit |= MASK_3DNOW_A;
1792 }
1793 return true;
1794
1795 case OPT_mmmx:
1796 if (!value)
1797 {
1798 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1799 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1800 }
1801 return true;
1802
1803 case OPT_msse:
1804 if (!value)
1805 {
1806 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1807 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1808 }
1809 return true;
1810
1811 case OPT_msse2:
1812 if (!value)
1813 {
1814 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1815 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1816 }
1817 return true;
1818
1819 case OPT_msse3:
1820 if (!value)
1821 {
1822 target_flags &= ~MASK_SSE4A;
1823 target_flags_explicit |= MASK_SSE4A;
1824 }
1825 return true;
1826
1827 default:
1828 return true;
1829 }
1830 }
1831
1832 /* Sometimes certain combinations of command options do not make
1833 sense on a particular target machine. You can define a macro
1834 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1835 defined, is executed once just after all the command options have
1836 been parsed.
1837
1838 Don't use this macro to turn on various extra optimizations for
1839 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1840
1841 void
1842 override_options (void)
1843 {
1844 int i;
1845 int ix86_tune_defaulted = 0;
1846 unsigned int ix86_arch_mask, ix86_tune_mask;
1847
1848 /* Comes from final.c -- no real reason to change it. */
1849 #define MAX_CODE_ALIGN 16
1850
1851 static struct ptt
1852 {
1853 const struct processor_costs *cost; /* Processor costs */
1854 const int target_enable; /* Target flags to enable. */
1855 const int target_disable; /* Target flags to disable. */
1856 const int align_loop; /* Default alignments. */
1857 const int align_loop_max_skip;
1858 const int align_jump;
1859 const int align_jump_max_skip;
1860 const int align_func;
1861 }
1862 const processor_target_table[PROCESSOR_max] =
1863 {
1864 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1865 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1866 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1867 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1868 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1869 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1870 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1871 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1872 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1873 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1874 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1875 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1876 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1877 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1878 };
1879
1880 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1881 static struct pta
1882 {
1883 const char *const name; /* processor name or nickname. */
1884 const enum processor_type processor;
1885 const enum pta_flags
1886 {
1887 PTA_SSE = 1,
1888 PTA_SSE2 = 2,
1889 PTA_SSE3 = 4,
1890 PTA_MMX = 8,
1891 PTA_PREFETCH_SSE = 16,
1892 PTA_3DNOW = 32,
1893 PTA_3DNOW_A = 64,
1894 PTA_64BIT = 128,
1895 PTA_SSSE3 = 256,
1896 PTA_CX16 = 512,
1897 PTA_POPCNT = 1024,
1898 PTA_ABM = 2048,
1899 PTA_SSE4A = 4096
1900 } flags;
1901 }
1902 const processor_alias_table[] =
1903 {
1904 {"i386", PROCESSOR_I386, 0},
1905 {"i486", PROCESSOR_I486, 0},
1906 {"i586", PROCESSOR_PENTIUM, 0},
1907 {"pentium", PROCESSOR_PENTIUM, 0},
1908 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1909 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1910 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1911 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1912 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1913 {"i686", PROCESSOR_PENTIUMPRO, 0},
1914 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1915 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1916 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1917 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1918 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1919 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1920 | PTA_MMX | PTA_PREFETCH_SSE},
1921 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1922 | PTA_MMX | PTA_PREFETCH_SSE},
1923 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1924 | PTA_MMX | PTA_PREFETCH_SSE},
1925 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1926 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1927 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1928 | PTA_64BIT | PTA_MMX
1929 | PTA_PREFETCH_SSE | PTA_CX16},
1930 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1931 | PTA_3DNOW_A},
1932 {"k6", PROCESSOR_K6, PTA_MMX},
1933 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1934 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1935 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1936 | PTA_3DNOW_A},
1937 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1938 | PTA_3DNOW | PTA_3DNOW_A},
1939 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1940 | PTA_3DNOW_A | PTA_SSE},
1941 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1942 | PTA_3DNOW_A | PTA_SSE},
1943 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1944 | PTA_3DNOW_A | PTA_SSE},
1945 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1946 | PTA_SSE | PTA_SSE2 },
1947 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1948 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1949 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1950 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1951 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1952 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1953 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1954 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1955 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1956 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1957 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1958 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1959 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1960 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1961 };
1962
1963 int const pta_size = ARRAY_SIZE (processor_alias_table);
1964
1965 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1966 SUBTARGET_OVERRIDE_OPTIONS;
1967 #endif
1968
1969 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1970 SUBSUBTARGET_OVERRIDE_OPTIONS;
1971 #endif
1972
1973 /* -fPIC is the default for x86_64. */
1974 if (TARGET_MACHO && TARGET_64BIT)
1975 flag_pic = 2;
1976
1977 /* Set the default values for switches whose default depends on TARGET_64BIT
1978 in case they weren't overwritten by command line options. */
1979 if (TARGET_64BIT)
1980 {
1981 /* Mach-O doesn't support omitting the frame pointer for now. */
1982 if (flag_omit_frame_pointer == 2)
1983 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1984 if (flag_asynchronous_unwind_tables == 2)
1985 flag_asynchronous_unwind_tables = 1;
1986 if (flag_pcc_struct_return == 2)
1987 flag_pcc_struct_return = 0;
1988 }
1989 else
1990 {
1991 if (flag_omit_frame_pointer == 2)
1992 flag_omit_frame_pointer = 0;
1993 if (flag_asynchronous_unwind_tables == 2)
1994 flag_asynchronous_unwind_tables = 0;
1995 if (flag_pcc_struct_return == 2)
1996 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1997 }
1998
1999 /* Need to check -mtune=generic first. */
2000 if (ix86_tune_string)
2001 {
2002 if (!strcmp (ix86_tune_string, "generic")
2003 || !strcmp (ix86_tune_string, "i686")
2004 /* As special support for cross compilers we read -mtune=native
2005 as -mtune=generic. With native compilers we won't see the
2006 -mtune=native, as it was changed by the driver. */
2007 || !strcmp (ix86_tune_string, "native"))
2008 {
2009 if (TARGET_64BIT)
2010 ix86_tune_string = "generic64";
2011 else
2012 ix86_tune_string = "generic32";
2013 }
2014 else if (!strncmp (ix86_tune_string, "generic", 7))
2015 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2016 }
2017 else
2018 {
2019 if (ix86_arch_string)
2020 ix86_tune_string = ix86_arch_string;
2021 if (!ix86_tune_string)
2022 {
2023 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2024 ix86_tune_defaulted = 1;
2025 }
2026
2027 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2028 need to use a sensible tune option. */
2029 if (!strcmp (ix86_tune_string, "generic")
2030 || !strcmp (ix86_tune_string, "x86-64")
2031 || !strcmp (ix86_tune_string, "i686"))
2032 {
2033 if (TARGET_64BIT)
2034 ix86_tune_string = "generic64";
2035 else
2036 ix86_tune_string = "generic32";
2037 }
2038 }
2039 if (ix86_stringop_string)
2040 {
2041 if (!strcmp (ix86_stringop_string, "rep_byte"))
2042 stringop_alg = rep_prefix_1_byte;
2043 else if (!strcmp (ix86_stringop_string, "libcall"))
2044 stringop_alg = libcall;
2045 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2046 stringop_alg = rep_prefix_4_byte;
2047 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2048 stringop_alg = rep_prefix_8_byte;
2049 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2050 stringop_alg = loop_1_byte;
2051 else if (!strcmp (ix86_stringop_string, "loop"))
2052 stringop_alg = loop;
2053 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2054 stringop_alg = unrolled_loop;
2055 else
2056 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2057 }
2058 if (!strcmp (ix86_tune_string, "x86-64"))
2059 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2060 "-mtune=generic instead as appropriate.");
2061
2062 if (!ix86_arch_string)
2063 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2064 if (!strcmp (ix86_arch_string, "generic"))
2065 error ("generic CPU can be used only for -mtune= switch");
2066 if (!strncmp (ix86_arch_string, "generic", 7))
2067 error ("bad value (%s) for -march= switch", ix86_arch_string);
2068
2069 if (ix86_cmodel_string != 0)
2070 {
2071 if (!strcmp (ix86_cmodel_string, "small"))
2072 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2073 else if (!strcmp (ix86_cmodel_string, "medium"))
2074 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2075 else if (!strcmp (ix86_cmodel_string, "large"))
2076 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2077 else if (flag_pic)
2078 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2079 else if (!strcmp (ix86_cmodel_string, "32"))
2080 ix86_cmodel = CM_32;
2081 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2082 ix86_cmodel = CM_KERNEL;
2083 else
2084 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2085 }
2086 else
2087 {
2088 ix86_cmodel = CM_32;
2089 if (TARGET_64BIT)
2090 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2091 }
2092 if (ix86_asm_string != 0)
2093 {
2094 if (! TARGET_MACHO
2095 && !strcmp (ix86_asm_string, "intel"))
2096 ix86_asm_dialect = ASM_INTEL;
2097 else if (!strcmp (ix86_asm_string, "att"))
2098 ix86_asm_dialect = ASM_ATT;
2099 else
2100 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2101 }
2102 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2103 error ("code model %qs not supported in the %s bit mode",
2104 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2105 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2106 sorry ("%i-bit mode not compiled in",
2107 (target_flags & MASK_64BIT) ? 64 : 32);
2108
2109 for (i = 0; i < pta_size; i++)
2110 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2111 {
2112 ix86_arch = processor_alias_table[i].processor;
2113 /* Default cpu tuning to the architecture. */
2114 ix86_tune = ix86_arch;
2115 if (processor_alias_table[i].flags & PTA_MMX
2116 && !(target_flags_explicit & MASK_MMX))
2117 target_flags |= MASK_MMX;
2118 if (processor_alias_table[i].flags & PTA_3DNOW
2119 && !(target_flags_explicit & MASK_3DNOW))
2120 target_flags |= MASK_3DNOW;
2121 if (processor_alias_table[i].flags & PTA_3DNOW_A
2122 && !(target_flags_explicit & MASK_3DNOW_A))
2123 target_flags |= MASK_3DNOW_A;
2124 if (processor_alias_table[i].flags & PTA_SSE
2125 && !(target_flags_explicit & MASK_SSE))
2126 target_flags |= MASK_SSE;
2127 if (processor_alias_table[i].flags & PTA_SSE2
2128 && !(target_flags_explicit & MASK_SSE2))
2129 target_flags |= MASK_SSE2;
2130 if (processor_alias_table[i].flags & PTA_SSE3
2131 && !(target_flags_explicit & MASK_SSE3))
2132 target_flags |= MASK_SSE3;
2133 if (processor_alias_table[i].flags & PTA_SSSE3
2134 && !(target_flags_explicit & MASK_SSSE3))
2135 target_flags |= MASK_SSSE3;
2136 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2137 x86_prefetch_sse = true;
2138 if (processor_alias_table[i].flags & PTA_CX16)
2139 x86_cmpxchg16b = true;
2140 if (processor_alias_table[i].flags & PTA_POPCNT
2141 && !(target_flags_explicit & MASK_POPCNT))
2142 target_flags |= MASK_POPCNT;
2143 if (processor_alias_table[i].flags & PTA_ABM
2144 && !(target_flags_explicit & MASK_ABM))
2145 target_flags |= MASK_ABM;
2146 if (processor_alias_table[i].flags & PTA_SSE4A
2147 && !(target_flags_explicit & MASK_SSE4A))
2148 target_flags |= MASK_SSE4A;
2149 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2150 error ("CPU you selected does not support x86-64 "
2151 "instruction set");
2152 break;
2153 }
2154
2155 if (i == pta_size)
2156 error ("bad value (%s) for -march= switch", ix86_arch_string);
2157
2158 ix86_arch_mask = 1u << ix86_arch;
2159 for (i = 0; i < X86_ARCH_LAST; ++i)
2160 ix86_arch_features[i] &= ix86_arch_mask;
2161
2162 for (i = 0; i < pta_size; i++)
2163 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2164 {
2165 ix86_tune = processor_alias_table[i].processor;
2166 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2167 {
2168 if (ix86_tune_defaulted)
2169 {
2170 ix86_tune_string = "x86-64";
2171 for (i = 0; i < pta_size; i++)
2172 if (! strcmp (ix86_tune_string,
2173 processor_alias_table[i].name))
2174 break;
2175 ix86_tune = processor_alias_table[i].processor;
2176 }
2177 else
2178 error ("CPU you selected does not support x86-64 "
2179 "instruction set");
2180 }
2181 /* Intel CPUs have always interpreted SSE prefetch instructions as
2182 NOPs; so, we can enable SSE prefetch instructions even when
2183 -mtune (rather than -march) points us to a processor that has them.
2184 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2185 higher processors. */
2186 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2187 x86_prefetch_sse = true;
2188 break;
2189 }
2190 if (i == pta_size)
2191 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2192
2193 ix86_tune_mask = 1u << ix86_tune;
2194 for (i = 0; i < X86_TUNE_LAST; ++i)
2195 ix86_tune_features[i] &= ix86_tune_mask;
2196
2197 if (optimize_size)
2198 ix86_cost = &size_cost;
2199 else
2200 ix86_cost = processor_target_table[ix86_tune].cost;
2201 target_flags |= processor_target_table[ix86_tune].target_enable;
2202 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2203
2204 /* Arrange to set up i386_stack_locals for all functions. */
2205 init_machine_status = ix86_init_machine_status;
2206
2207 /* Validate -mregparm= value. */
2208 if (ix86_regparm_string)
2209 {
2210 i = atoi (ix86_regparm_string);
2211 if (i < 0 || i > REGPARM_MAX)
2212 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2213 else
2214 ix86_regparm = i;
2215 }
2216 else
2217 if (TARGET_64BIT)
2218 ix86_regparm = REGPARM_MAX;
2219
2220 /* If the user has provided any of the -malign-* options,
2221 warn and use that value only if -falign-* is not set.
2222 Remove this code in GCC 3.2 or later. */
2223 if (ix86_align_loops_string)
2224 {
2225 warning (0, "-malign-loops is obsolete, use -falign-loops");
2226 if (align_loops == 0)
2227 {
2228 i = atoi (ix86_align_loops_string);
2229 if (i < 0 || i > MAX_CODE_ALIGN)
2230 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2231 else
2232 align_loops = 1 << i;
2233 }
2234 }
2235
2236 if (ix86_align_jumps_string)
2237 {
2238 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2239 if (align_jumps == 0)
2240 {
2241 i = atoi (ix86_align_jumps_string);
2242 if (i < 0 || i > MAX_CODE_ALIGN)
2243 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2244 else
2245 align_jumps = 1 << i;
2246 }
2247 }
2248
2249 if (ix86_align_funcs_string)
2250 {
2251 warning (0, "-malign-functions is obsolete, use -falign-functions");
2252 if (align_functions == 0)
2253 {
2254 i = atoi (ix86_align_funcs_string);
2255 if (i < 0 || i > MAX_CODE_ALIGN)
2256 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2257 else
2258 align_functions = 1 << i;
2259 }
2260 }
2261
2262 /* Default align_* from the processor table. */
2263 if (align_loops == 0)
2264 {
2265 align_loops = processor_target_table[ix86_tune].align_loop;
2266 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2267 }
2268 if (align_jumps == 0)
2269 {
2270 align_jumps = processor_target_table[ix86_tune].align_jump;
2271 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2272 }
2273 if (align_functions == 0)
2274 {
2275 align_functions = processor_target_table[ix86_tune].align_func;
2276 }
2277
2278 /* Validate -mbranch-cost= value, or provide default. */
2279 ix86_branch_cost = ix86_cost->branch_cost;
2280 if (ix86_branch_cost_string)
2281 {
2282 i = atoi (ix86_branch_cost_string);
2283 if (i < 0 || i > 5)
2284 error ("-mbranch-cost=%d is not between 0 and 5", i);
2285 else
2286 ix86_branch_cost = i;
2287 }
2288 if (ix86_section_threshold_string)
2289 {
2290 i = atoi (ix86_section_threshold_string);
2291 if (i < 0)
2292 error ("-mlarge-data-threshold=%d is negative", i);
2293 else
2294 ix86_section_threshold = i;
2295 }
2296
2297 if (ix86_tls_dialect_string)
2298 {
2299 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2300 ix86_tls_dialect = TLS_DIALECT_GNU;
2301 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2302 ix86_tls_dialect = TLS_DIALECT_GNU2;
2303 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2304 ix86_tls_dialect = TLS_DIALECT_SUN;
2305 else
2306 error ("bad value (%s) for -mtls-dialect= switch",
2307 ix86_tls_dialect_string);
2308 }
2309
2310 /* Keep nonleaf frame pointers. */
2311 if (flag_omit_frame_pointer)
2312 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2313 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2314 flag_omit_frame_pointer = 1;
2315
2316 /* If we're doing fast math, we don't care about comparison order
2317 wrt NaNs. This lets us use a shorter comparison sequence. */
2318 if (flag_finite_math_only)
2319 target_flags &= ~MASK_IEEE_FP;
2320
2321 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2322 since the insns won't need emulation. */
2323 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2324 target_flags &= ~MASK_NO_FANCY_MATH_387;
2325
2326 /* Likewise, if the target doesn't have a 387, or we've specified
2327 software floating point, don't use 387 inline intrinsics. */
2328 if (!TARGET_80387)
2329 target_flags |= MASK_NO_FANCY_MATH_387;
2330
2331 /* Turn on SSE3 builtins for -mssse3. */
2332 if (TARGET_SSSE3)
2333 target_flags |= MASK_SSE3;
2334
2335 /* Turn on SSE3 builtins for -msse4a. */
2336 if (TARGET_SSE4A)
2337 target_flags |= MASK_SSE3;
2338
2339 /* Turn on SSE2 builtins for -msse3. */
2340 if (TARGET_SSE3)
2341 target_flags |= MASK_SSE2;
2342
2343 /* Turn on SSE builtins for -msse2. */
2344 if (TARGET_SSE2)
2345 target_flags |= MASK_SSE;
2346
2347 /* Turn on MMX builtins for -msse. */
2348 if (TARGET_SSE)
2349 {
2350 target_flags |= MASK_MMX & ~target_flags_explicit;
2351 x86_prefetch_sse = true;
2352 }
2353
2354 /* Turn on MMX builtins for 3Dnow. */
2355 if (TARGET_3DNOW)
2356 target_flags |= MASK_MMX;
2357
2358 /* Turn on POPCNT builtins for -mabm. */
2359 if (TARGET_ABM)
2360 target_flags |= MASK_POPCNT;
2361
2362 if (TARGET_64BIT)
2363 {
2364 if (TARGET_ALIGN_DOUBLE)
2365 error ("-malign-double makes no sense in the 64bit mode");
2366 if (TARGET_RTD)
2367 error ("-mrtd calling convention not supported in the 64bit mode");
2368
2369 /* Enable by default the SSE and MMX builtins. Do allow the user to
2370 explicitly disable any of these. In particular, disabling SSE and
2371 MMX for kernel code is extremely useful. */
2372 target_flags
2373 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2374 & ~target_flags_explicit);
2375 }
2376 else
2377 {
2378 /* i386 ABI does not specify red zone. It still makes sense to use it
2379 when programmer takes care to stack from being destroyed. */
2380 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2381 target_flags |= MASK_NO_RED_ZONE;
2382 }
2383
2384 /* Validate -mpreferred-stack-boundary= value, or provide default.
2385 The default of 128 bits is for Pentium III's SSE __m128. We can't
2386 change it because of optimize_size. Otherwise, we can't mix object
2387 files compiled with -Os and -On. */
2388 ix86_preferred_stack_boundary = 128;
2389 if (ix86_preferred_stack_boundary_string)
2390 {
2391 i = atoi (ix86_preferred_stack_boundary_string);
2392 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2393 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2394 TARGET_64BIT ? 4 : 2);
2395 else
2396 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2397 }
2398
2399 /* Accept -msseregparm only if at least SSE support is enabled. */
2400 if (TARGET_SSEREGPARM
2401 && ! TARGET_SSE)
2402 error ("-msseregparm used without SSE enabled");
2403
2404 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2405 if (ix86_fpmath_string != 0)
2406 {
2407 if (! strcmp (ix86_fpmath_string, "387"))
2408 ix86_fpmath = FPMATH_387;
2409 else if (! strcmp (ix86_fpmath_string, "sse"))
2410 {
2411 if (!TARGET_SSE)
2412 {
2413 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2414 ix86_fpmath = FPMATH_387;
2415 }
2416 else
2417 ix86_fpmath = FPMATH_SSE;
2418 }
2419 else if (! strcmp (ix86_fpmath_string, "387,sse")
2420 || ! strcmp (ix86_fpmath_string, "sse,387"))
2421 {
2422 if (!TARGET_SSE)
2423 {
2424 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2425 ix86_fpmath = FPMATH_387;
2426 }
2427 else if (!TARGET_80387)
2428 {
2429 warning (0, "387 instruction set disabled, using SSE arithmetics");
2430 ix86_fpmath = FPMATH_SSE;
2431 }
2432 else
2433 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2434 }
2435 else
2436 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2437 }
2438
2439 /* If the i387 is disabled, then do not return values in it. */
2440 if (!TARGET_80387)
2441 target_flags &= ~MASK_FLOAT_RETURNS;
2442
2443 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2444 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2445 && !optimize_size)
2446 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2447
2448 /* ??? Unwind info is not correct around the CFG unless either a frame
2449 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2450 unwind info generation to be aware of the CFG and propagating states
2451 around edges. */
2452 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2453 || flag_exceptions || flag_non_call_exceptions)
2454 && flag_omit_frame_pointer
2455 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2456 {
2457 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2458 warning (0, "unwind tables currently require either a frame pointer "
2459 "or -maccumulate-outgoing-args for correctness");
2460 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2461 }
2462
2463 /* For sane SSE instruction set generation we need fcomi instruction.
2464 It is safe to enable all CMOVE instructions. */
2465 if (TARGET_SSE)
2466 TARGET_CMOVE = 1;
2467
2468 /* ??? Any idea why this is unconditionally disabled for 64-bit? */
2469 if (TARGET_64BIT)
2470 TARGET_USE_SAHF = 0;
2471
2472 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2473 {
2474 char *p;
2475 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2476 p = strchr (internal_label_prefix, 'X');
2477 internal_label_prefix_len = p - internal_label_prefix;
2478 *p = '\0';
2479 }
2480
2481 /* When scheduling description is not available, disable scheduler pass
2482 so it won't slow down the compilation and make x87 code slower. */
2483 if (!TARGET_SCHEDULE)
2484 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2485
2486 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2487 set_param_value ("simultaneous-prefetches",
2488 ix86_cost->simultaneous_prefetches);
2489 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2490 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2491 }
2492 \f
2493 /* switch to the appropriate section for output of DECL.
2494 DECL is either a `VAR_DECL' node or a constant of some sort.
2495 RELOC indicates whether forming the initial value of DECL requires
2496 link-time relocations. */
2497
2498 static section *
2499 x86_64_elf_select_section (tree decl, int reloc,
2500 unsigned HOST_WIDE_INT align)
2501 {
2502 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2503 && ix86_in_large_data_p (decl))
2504 {
2505 const char *sname = NULL;
2506 unsigned int flags = SECTION_WRITE;
2507 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2508 {
2509 case SECCAT_DATA:
2510 sname = ".ldata";
2511 break;
2512 case SECCAT_DATA_REL:
2513 sname = ".ldata.rel";
2514 break;
2515 case SECCAT_DATA_REL_LOCAL:
2516 sname = ".ldata.rel.local";
2517 break;
2518 case SECCAT_DATA_REL_RO:
2519 sname = ".ldata.rel.ro";
2520 break;
2521 case SECCAT_DATA_REL_RO_LOCAL:
2522 sname = ".ldata.rel.ro.local";
2523 break;
2524 case SECCAT_BSS:
2525 sname = ".lbss";
2526 flags |= SECTION_BSS;
2527 break;
2528 case SECCAT_RODATA:
2529 case SECCAT_RODATA_MERGE_STR:
2530 case SECCAT_RODATA_MERGE_STR_INIT:
2531 case SECCAT_RODATA_MERGE_CONST:
2532 sname = ".lrodata";
2533 flags = 0;
2534 break;
2535 case SECCAT_SRODATA:
2536 case SECCAT_SDATA:
2537 case SECCAT_SBSS:
2538 gcc_unreachable ();
2539 case SECCAT_TEXT:
2540 case SECCAT_TDATA:
2541 case SECCAT_TBSS:
2542 /* We don't split these for medium model. Place them into
2543 default sections and hope for best. */
2544 break;
2545 }
2546 if (sname)
2547 {
2548 /* We might get called with string constants, but get_named_section
2549 doesn't like them as they are not DECLs. Also, we need to set
2550 flags in that case. */
2551 if (!DECL_P (decl))
2552 return get_section (sname, flags, NULL);
2553 return get_named_section (decl, sname, reloc);
2554 }
2555 }
2556 return default_elf_select_section (decl, reloc, align);
2557 }
2558
2559 /* Build up a unique section name, expressed as a
2560 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2561 RELOC indicates whether the initial value of EXP requires
2562 link-time relocations. */
2563
2564 static void
2565 x86_64_elf_unique_section (tree decl, int reloc)
2566 {
2567 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2568 && ix86_in_large_data_p (decl))
2569 {
2570 const char *prefix = NULL;
2571 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2572 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2573
2574 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2575 {
2576 case SECCAT_DATA:
2577 case SECCAT_DATA_REL:
2578 case SECCAT_DATA_REL_LOCAL:
2579 case SECCAT_DATA_REL_RO:
2580 case SECCAT_DATA_REL_RO_LOCAL:
2581 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2582 break;
2583 case SECCAT_BSS:
2584 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2585 break;
2586 case SECCAT_RODATA:
2587 case SECCAT_RODATA_MERGE_STR:
2588 case SECCAT_RODATA_MERGE_STR_INIT:
2589 case SECCAT_RODATA_MERGE_CONST:
2590 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2591 break;
2592 case SECCAT_SRODATA:
2593 case SECCAT_SDATA:
2594 case SECCAT_SBSS:
2595 gcc_unreachable ();
2596 case SECCAT_TEXT:
2597 case SECCAT_TDATA:
2598 case SECCAT_TBSS:
2599 /* We don't split these for medium model. Place them into
2600 default sections and hope for best. */
2601 break;
2602 }
2603 if (prefix)
2604 {
2605 const char *name;
2606 size_t nlen, plen;
2607 char *string;
2608 plen = strlen (prefix);
2609
2610 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2611 name = targetm.strip_name_encoding (name);
2612 nlen = strlen (name);
2613
2614 string = alloca (nlen + plen + 1);
2615 memcpy (string, prefix, plen);
2616 memcpy (string + plen, name, nlen + 1);
2617
2618 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2619 return;
2620 }
2621 }
2622 default_unique_section (decl, reloc);
2623 }
2624
2625 #ifdef COMMON_ASM_OP
2626 /* This says how to output assembler code to declare an
2627 uninitialized external linkage data object.
2628
2629 For medium model x86-64 we need to use .largecomm opcode for
2630 large objects. */
2631 void
2632 x86_elf_aligned_common (FILE *file,
2633 const char *name, unsigned HOST_WIDE_INT size,
2634 int align)
2635 {
2636 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2637 && size > (unsigned int)ix86_section_threshold)
2638 fprintf (file, ".largecomm\t");
2639 else
2640 fprintf (file, "%s", COMMON_ASM_OP);
2641 assemble_name (file, name);
2642 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2643 size, align / BITS_PER_UNIT);
2644 }
2645 #endif
2646 /* Utility function for targets to use in implementing
2647 ASM_OUTPUT_ALIGNED_BSS. */
2648
2649 void
2650 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2651 const char *name, unsigned HOST_WIDE_INT size,
2652 int align)
2653 {
2654 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2655 && size > (unsigned int)ix86_section_threshold)
2656 switch_to_section (get_named_section (decl, ".lbss", 0));
2657 else
2658 switch_to_section (bss_section);
2659 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2660 #ifdef ASM_DECLARE_OBJECT_NAME
2661 last_assemble_variable_decl = decl;
2662 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2663 #else
2664 /* Standard thing is just output label for the object. */
2665 ASM_OUTPUT_LABEL (file, name);
2666 #endif /* ASM_DECLARE_OBJECT_NAME */
2667 ASM_OUTPUT_SKIP (file, size ? size : 1);
2668 }
2669 \f
2670 void
2671 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2672 {
2673 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2674 make the problem with not enough registers even worse. */
2675 #ifdef INSN_SCHEDULING
2676 if (level > 1)
2677 flag_schedule_insns = 0;
2678 #endif
2679
2680 if (TARGET_MACHO)
2681 /* The Darwin libraries never set errno, so we might as well
2682 avoid calling them when that's the only reason we would. */
2683 flag_errno_math = 0;
2684
2685 /* The default values of these switches depend on the TARGET_64BIT
2686 that is not known at this moment. Mark these values with 2 and
2687 let user the to override these. In case there is no command line option
2688 specifying them, we will set the defaults in override_options. */
2689 if (optimize >= 1)
2690 flag_omit_frame_pointer = 2;
2691 flag_pcc_struct_return = 2;
2692 flag_asynchronous_unwind_tables = 2;
2693 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2694 SUBTARGET_OPTIMIZATION_OPTIONS;
2695 #endif
2696 }
2697 \f
2698 /* Table of valid machine attributes. */
2699 const struct attribute_spec ix86_attribute_table[] =
2700 {
2701 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2702 /* Stdcall attribute says callee is responsible for popping arguments
2703 if they are not variable. */
2704 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2705 /* Fastcall attribute says callee is responsible for popping arguments
2706 if they are not variable. */
2707 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2708 /* Cdecl attribute says the callee is a normal C declaration */
2709 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2710 /* Regparm attribute specifies how many integer arguments are to be
2711 passed in registers. */
2712 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2713 /* Sseregparm attribute says we are using x86_64 calling conventions
2714 for FP arguments. */
2715 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2716 /* force_align_arg_pointer says this function realigns the stack at entry. */
2717 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2718 false, true, true, ix86_handle_cconv_attribute },
2719 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2720 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2721 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2722 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2723 #endif
2724 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2725 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2726 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2727 SUBTARGET_ATTRIBUTE_TABLE,
2728 #endif
2729 { NULL, 0, 0, false, false, false, NULL }
2730 };
2731
2732 /* Decide whether we can make a sibling call to a function. DECL is the
2733 declaration of the function being targeted by the call and EXP is the
2734 CALL_EXPR representing the call. */
2735
2736 static bool
2737 ix86_function_ok_for_sibcall (tree decl, tree exp)
2738 {
2739 tree func;
2740 rtx a, b;
2741
2742 /* If we are generating position-independent code, we cannot sibcall
2743 optimize any indirect call, or a direct call to a global function,
2744 as the PLT requires %ebx be live. */
2745 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2746 return false;
2747
2748 if (decl)
2749 func = decl;
2750 else
2751 {
2752 func = TREE_TYPE (CALL_EXPR_FN (exp));
2753 if (POINTER_TYPE_P (func))
2754 func = TREE_TYPE (func);
2755 }
2756
2757 /* Check that the return value locations are the same. Like
2758 if we are returning floats on the 80387 register stack, we cannot
2759 make a sibcall from a function that doesn't return a float to a
2760 function that does or, conversely, from a function that does return
2761 a float to a function that doesn't; the necessary stack adjustment
2762 would not be executed. This is also the place we notice
2763 differences in the return value ABI. Note that it is ok for one
2764 of the functions to have void return type as long as the return
2765 value of the other is passed in a register. */
2766 a = ix86_function_value (TREE_TYPE (exp), func, false);
2767 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2768 cfun->decl, false);
2769 if (STACK_REG_P (a) || STACK_REG_P (b))
2770 {
2771 if (!rtx_equal_p (a, b))
2772 return false;
2773 }
2774 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2775 ;
2776 else if (!rtx_equal_p (a, b))
2777 return false;
2778
2779 /* If this call is indirect, we'll need to be able to use a call-clobbered
2780 register for the address of the target function. Make sure that all
2781 such registers are not used for passing parameters. */
2782 if (!decl && !TARGET_64BIT)
2783 {
2784 tree type;
2785
2786 /* We're looking at the CALL_EXPR, we need the type of the function. */
2787 type = CALL_EXPR_FN (exp); /* pointer expression */
2788 type = TREE_TYPE (type); /* pointer type */
2789 type = TREE_TYPE (type); /* function type */
2790
2791 if (ix86_function_regparm (type, NULL) >= 3)
2792 {
2793 /* ??? Need to count the actual number of registers to be used,
2794 not the possible number of registers. Fix later. */
2795 return false;
2796 }
2797 }
2798
2799 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2800 /* Dllimport'd functions are also called indirectly. */
2801 if (decl && DECL_DLLIMPORT_P (decl)
2802 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2803 return false;
2804 #endif
2805
2806 /* If we forced aligned the stack, then sibcalling would unalign the
2807 stack, which may break the called function. */
2808 if (cfun->machine->force_align_arg_pointer)
2809 return false;
2810
2811 /* Otherwise okay. That also includes certain types of indirect calls. */
2812 return true;
2813 }
2814
2815 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2816 calling convention attributes;
2817 arguments as in struct attribute_spec.handler. */
2818
2819 static tree
2820 ix86_handle_cconv_attribute (tree *node, tree name,
2821 tree args,
2822 int flags ATTRIBUTE_UNUSED,
2823 bool *no_add_attrs)
2824 {
2825 if (TREE_CODE (*node) != FUNCTION_TYPE
2826 && TREE_CODE (*node) != METHOD_TYPE
2827 && TREE_CODE (*node) != FIELD_DECL
2828 && TREE_CODE (*node) != TYPE_DECL)
2829 {
2830 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2831 IDENTIFIER_POINTER (name));
2832 *no_add_attrs = true;
2833 return NULL_TREE;
2834 }
2835
2836 /* Can combine regparm with all attributes but fastcall. */
2837 if (is_attribute_p ("regparm", name))
2838 {
2839 tree cst;
2840
2841 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2842 {
2843 error ("fastcall and regparm attributes are not compatible");
2844 }
2845
2846 cst = TREE_VALUE (args);
2847 if (TREE_CODE (cst) != INTEGER_CST)
2848 {
2849 warning (OPT_Wattributes,
2850 "%qs attribute requires an integer constant argument",
2851 IDENTIFIER_POINTER (name));
2852 *no_add_attrs = true;
2853 }
2854 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2855 {
2856 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2857 IDENTIFIER_POINTER (name), REGPARM_MAX);
2858 *no_add_attrs = true;
2859 }
2860
2861 if (!TARGET_64BIT
2862 && lookup_attribute (ix86_force_align_arg_pointer_string,
2863 TYPE_ATTRIBUTES (*node))
2864 && compare_tree_int (cst, REGPARM_MAX-1))
2865 {
2866 error ("%s functions limited to %d register parameters",
2867 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2868 }
2869
2870 return NULL_TREE;
2871 }
2872
2873 if (TARGET_64BIT)
2874 {
2875 warning (OPT_Wattributes, "%qs attribute ignored",
2876 IDENTIFIER_POINTER (name));
2877 *no_add_attrs = true;
2878 return NULL_TREE;
2879 }
2880
2881 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2882 if (is_attribute_p ("fastcall", name))
2883 {
2884 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2885 {
2886 error ("fastcall and cdecl attributes are not compatible");
2887 }
2888 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2889 {
2890 error ("fastcall and stdcall attributes are not compatible");
2891 }
2892 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2893 {
2894 error ("fastcall and regparm attributes are not compatible");
2895 }
2896 }
2897
2898 /* Can combine stdcall with fastcall (redundant), regparm and
2899 sseregparm. */
2900 else if (is_attribute_p ("stdcall", name))
2901 {
2902 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2903 {
2904 error ("stdcall and cdecl attributes are not compatible");
2905 }
2906 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2907 {
2908 error ("stdcall and fastcall attributes are not compatible");
2909 }
2910 }
2911
2912 /* Can combine cdecl with regparm and sseregparm. */
2913 else if (is_attribute_p ("cdecl", name))
2914 {
2915 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2916 {
2917 error ("stdcall and cdecl attributes are not compatible");
2918 }
2919 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2920 {
2921 error ("fastcall and cdecl attributes are not compatible");
2922 }
2923 }
2924
2925 /* Can combine sseregparm with all attributes. */
2926
2927 return NULL_TREE;
2928 }
2929
2930 /* Return 0 if the attributes for two types are incompatible, 1 if they
2931 are compatible, and 2 if they are nearly compatible (which causes a
2932 warning to be generated). */
2933
2934 static int
2935 ix86_comp_type_attributes (tree type1, tree type2)
2936 {
2937 /* Check for mismatch of non-default calling convention. */
2938 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2939
2940 if (TREE_CODE (type1) != FUNCTION_TYPE)
2941 return 1;
2942
2943 /* Check for mismatched fastcall/regparm types. */
2944 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2945 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2946 || (ix86_function_regparm (type1, NULL)
2947 != ix86_function_regparm (type2, NULL)))
2948 return 0;
2949
2950 /* Check for mismatched sseregparm types. */
2951 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2952 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2953 return 0;
2954
2955 /* Check for mismatched return types (cdecl vs stdcall). */
2956 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2957 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2958 return 0;
2959
2960 return 1;
2961 }
2962 \f
2963 /* Return the regparm value for a function with the indicated TYPE and DECL.
2964 DECL may be NULL when calling function indirectly
2965 or considering a libcall. */
2966
2967 static int
2968 ix86_function_regparm (tree type, tree decl)
2969 {
2970 tree attr;
2971 int regparm = ix86_regparm;
2972 bool user_convention = false;
2973
2974 if (!TARGET_64BIT)
2975 {
2976 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2977 if (attr)
2978 {
2979 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2980 user_convention = true;
2981 }
2982
2983 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2984 {
2985 regparm = 2;
2986 user_convention = true;
2987 }
2988
2989 /* Use register calling convention for local functions when possible. */
2990 if (!TARGET_64BIT && !user_convention && decl
2991 && flag_unit_at_a_time && !profile_flag)
2992 {
2993 struct cgraph_local_info *i = cgraph_local_info (decl);
2994 if (i && i->local)
2995 {
2996 int local_regparm, globals = 0, regno;
2997
2998 /* Make sure no regparm register is taken by a global register
2999 variable. */
3000 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3001 if (global_regs[local_regparm])
3002 break;
3003 /* We can't use regparm(3) for nested functions as these use
3004 static chain pointer in third argument. */
3005 if (local_regparm == 3
3006 && decl_function_context (decl)
3007 && !DECL_NO_STATIC_CHAIN (decl))
3008 local_regparm = 2;
3009 /* If the function realigns its stackpointer, the
3010 prologue will clobber %ecx. If we've already
3011 generated code for the callee, the callee
3012 DECL_STRUCT_FUNCTION is gone, so we fall back to
3013 scanning the attributes for the self-realigning
3014 property. */
3015 if ((DECL_STRUCT_FUNCTION (decl)
3016 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
3017 || (!DECL_STRUCT_FUNCTION (decl)
3018 && lookup_attribute (ix86_force_align_arg_pointer_string,
3019 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3020 local_regparm = 2;
3021 /* Each global register variable increases register preassure,
3022 so the more global reg vars there are, the smaller regparm
3023 optimization use, unless requested by the user explicitly. */
3024 for (regno = 0; regno < 6; regno++)
3025 if (global_regs[regno])
3026 globals++;
3027 local_regparm
3028 = globals < local_regparm ? local_regparm - globals : 0;
3029
3030 if (local_regparm > regparm)
3031 regparm = local_regparm;
3032 }
3033 }
3034 }
3035 return regparm;
3036 }
3037
3038 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3039 DFmode (2) arguments in SSE registers for a function with the
3040 indicated TYPE and DECL. DECL may be NULL when calling function
3041 indirectly or considering a libcall. Otherwise return 0. */
3042
3043 static int
3044 ix86_function_sseregparm (tree type, tree decl)
3045 {
3046 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3047 by the sseregparm attribute. */
3048 if (TARGET_SSEREGPARM
3049 || (type
3050 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3051 {
3052 if (!TARGET_SSE)
3053 {
3054 if (decl)
3055 error ("Calling %qD with attribute sseregparm without "
3056 "SSE/SSE2 enabled", decl);
3057 else
3058 error ("Calling %qT with attribute sseregparm without "
3059 "SSE/SSE2 enabled", type);
3060 return 0;
3061 }
3062
3063 return 2;
3064 }
3065
3066 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3067 (and DFmode for SSE2) arguments in SSE registers,
3068 even for 32-bit targets. */
3069 if (!TARGET_64BIT && decl
3070 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3071 {
3072 struct cgraph_local_info *i = cgraph_local_info (decl);
3073 if (i && i->local)
3074 return TARGET_SSE2 ? 2 : 1;
3075 }
3076
3077 return 0;
3078 }
3079
3080 /* Return true if EAX is live at the start of the function. Used by
3081 ix86_expand_prologue to determine if we need special help before
3082 calling allocate_stack_worker. */
3083
3084 static bool
3085 ix86_eax_live_at_start_p (void)
3086 {
3087 /* Cheat. Don't bother working forward from ix86_function_regparm
3088 to the function type to whether an actual argument is located in
3089 eax. Instead just look at cfg info, which is still close enough
3090 to correct at this point. This gives false positives for broken
3091 functions that might use uninitialized data that happens to be
3092 allocated in eax, but who cares? */
3093 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3094 }
3095
3096 /* Value is the number of bytes of arguments automatically
3097 popped when returning from a subroutine call.
3098 FUNDECL is the declaration node of the function (as a tree),
3099 FUNTYPE is the data type of the function (as a tree),
3100 or for a library call it is an identifier node for the subroutine name.
3101 SIZE is the number of bytes of arguments passed on the stack.
3102
3103 On the 80386, the RTD insn may be used to pop them if the number
3104 of args is fixed, but if the number is variable then the caller
3105 must pop them all. RTD can't be used for library calls now
3106 because the library is compiled with the Unix compiler.
3107 Use of RTD is a selectable option, since it is incompatible with
3108 standard Unix calling sequences. If the option is not selected,
3109 the caller must always pop the args.
3110
3111 The attribute stdcall is equivalent to RTD on a per module basis. */
3112
3113 int
3114 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3115 {
3116 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3117
3118 /* Cdecl functions override -mrtd, and never pop the stack. */
3119 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3120
3121 /* Stdcall and fastcall functions will pop the stack if not
3122 variable args. */
3123 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3124 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3125 rtd = 1;
3126
3127 if (rtd
3128 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3129 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3130 == void_type_node)))
3131 return size;
3132 }
3133
3134 /* Lose any fake structure return argument if it is passed on the stack. */
3135 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3136 && !TARGET_64BIT
3137 && !KEEP_AGGREGATE_RETURN_POINTER)
3138 {
3139 int nregs = ix86_function_regparm (funtype, fundecl);
3140
3141 if (!nregs)
3142 return GET_MODE_SIZE (Pmode);
3143 }
3144
3145 return 0;
3146 }
3147 \f
3148 /* Argument support functions. */
3149
3150 /* Return true when register may be used to pass function parameters. */
3151 bool
3152 ix86_function_arg_regno_p (int regno)
3153 {
3154 int i;
3155 if (!TARGET_64BIT)
3156 {
3157 if (TARGET_MACHO)
3158 return (regno < REGPARM_MAX
3159 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3160 else
3161 return (regno < REGPARM_MAX
3162 || (TARGET_MMX && MMX_REGNO_P (regno)
3163 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3164 || (TARGET_SSE && SSE_REGNO_P (regno)
3165 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3166 }
3167
3168 if (TARGET_MACHO)
3169 {
3170 if (SSE_REGNO_P (regno) && TARGET_SSE)
3171 return true;
3172 }
3173 else
3174 {
3175 if (TARGET_SSE && SSE_REGNO_P (regno)
3176 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3177 return true;
3178 }
3179 /* RAX is used as hidden argument to va_arg functions. */
3180 if (!regno)
3181 return true;
3182 for (i = 0; i < REGPARM_MAX; i++)
3183 if (regno == x86_64_int_parameter_registers[i])
3184 return true;
3185 return false;
3186 }
3187
3188 /* Return if we do not know how to pass TYPE solely in registers. */
3189
3190 static bool
3191 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3192 {
3193 if (must_pass_in_stack_var_size_or_pad (mode, type))
3194 return true;
3195
3196 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3197 The layout_type routine is crafty and tries to trick us into passing
3198 currently unsupported vector types on the stack by using TImode. */
3199 return (!TARGET_64BIT && mode == TImode
3200 && type && TREE_CODE (type) != VECTOR_TYPE);
3201 }
3202
3203 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3204 for a call to a function whose data type is FNTYPE.
3205 For a library call, FNTYPE is 0. */
3206
3207 void
3208 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3209 tree fntype, /* tree ptr for function decl */
3210 rtx libname, /* SYMBOL_REF of library name or 0 */
3211 tree fndecl)
3212 {
3213 static CUMULATIVE_ARGS zero_cum;
3214 tree param, next_param;
3215
3216 if (TARGET_DEBUG_ARG)
3217 {
3218 fprintf (stderr, "\ninit_cumulative_args (");
3219 if (fntype)
3220 fprintf (stderr, "fntype code = %s, ret code = %s",
3221 tree_code_name[(int) TREE_CODE (fntype)],
3222 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3223 else
3224 fprintf (stderr, "no fntype");
3225
3226 if (libname)
3227 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3228 }
3229
3230 *cum = zero_cum;
3231
3232 /* Set up the number of registers to use for passing arguments. */
3233 cum->nregs = ix86_regparm;
3234 if (TARGET_SSE)
3235 cum->sse_nregs = SSE_REGPARM_MAX;
3236 if (TARGET_MMX)
3237 cum->mmx_nregs = MMX_REGPARM_MAX;
3238 cum->warn_sse = true;
3239 cum->warn_mmx = true;
3240 cum->maybe_vaarg = false;
3241
3242 /* Use ecx and edx registers if function has fastcall attribute,
3243 else look for regparm information. */
3244 if (fntype && !TARGET_64BIT)
3245 {
3246 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3247 {
3248 cum->nregs = 2;
3249 cum->fastcall = 1;
3250 }
3251 else
3252 cum->nregs = ix86_function_regparm (fntype, fndecl);
3253 }
3254
3255 /* Set up the number of SSE registers used for passing SFmode
3256 and DFmode arguments. Warn for mismatching ABI. */
3257 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3258
3259 /* Determine if this function has variable arguments. This is
3260 indicated by the last argument being 'void_type_mode' if there
3261 are no variable arguments. If there are variable arguments, then
3262 we won't pass anything in registers in 32-bit mode. */
3263
3264 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3265 {
3266 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3267 param != 0; param = next_param)
3268 {
3269 next_param = TREE_CHAIN (param);
3270 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3271 {
3272 if (!TARGET_64BIT)
3273 {
3274 cum->nregs = 0;
3275 cum->sse_nregs = 0;
3276 cum->mmx_nregs = 0;
3277 cum->warn_sse = 0;
3278 cum->warn_mmx = 0;
3279 cum->fastcall = 0;
3280 cum->float_in_sse = 0;
3281 }
3282 cum->maybe_vaarg = true;
3283 }
3284 }
3285 }
3286 if ((!fntype && !libname)
3287 || (fntype && !TYPE_ARG_TYPES (fntype)))
3288 cum->maybe_vaarg = true;
3289
3290 if (TARGET_DEBUG_ARG)
3291 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3292
3293 return;
3294 }
3295
3296 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3297 But in the case of vector types, it is some vector mode.
3298
3299 When we have only some of our vector isa extensions enabled, then there
3300 are some modes for which vector_mode_supported_p is false. For these
3301 modes, the generic vector support in gcc will choose some non-vector mode
3302 in order to implement the type. By computing the natural mode, we'll
3303 select the proper ABI location for the operand and not depend on whatever
3304 the middle-end decides to do with these vector types. */
3305
3306 static enum machine_mode
3307 type_natural_mode (tree type)
3308 {
3309 enum machine_mode mode = TYPE_MODE (type);
3310
3311 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3312 {
3313 HOST_WIDE_INT size = int_size_in_bytes (type);
3314 if ((size == 8 || size == 16)
3315 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3316 && TYPE_VECTOR_SUBPARTS (type) > 1)
3317 {
3318 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3319
3320 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3321 mode = MIN_MODE_VECTOR_FLOAT;
3322 else
3323 mode = MIN_MODE_VECTOR_INT;
3324
3325 /* Get the mode which has this inner mode and number of units. */
3326 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3327 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3328 && GET_MODE_INNER (mode) == innermode)
3329 return mode;
3330
3331 gcc_unreachable ();
3332 }
3333 }
3334
3335 return mode;
3336 }
3337
3338 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3339 this may not agree with the mode that the type system has chosen for the
3340 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3341 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3342
3343 static rtx
3344 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3345 unsigned int regno)
3346 {
3347 rtx tmp;
3348
3349 if (orig_mode != BLKmode)
3350 tmp = gen_rtx_REG (orig_mode, regno);
3351 else
3352 {
3353 tmp = gen_rtx_REG (mode, regno);
3354 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3355 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3356 }
3357
3358 return tmp;
3359 }
3360
3361 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3362 of this code is to classify each 8bytes of incoming argument by the register
3363 class and assign registers accordingly. */
3364
3365 /* Return the union class of CLASS1 and CLASS2.
3366 See the x86-64 PS ABI for details. */
3367
3368 static enum x86_64_reg_class
3369 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3370 {
3371 /* Rule #1: If both classes are equal, this is the resulting class. */
3372 if (class1 == class2)
3373 return class1;
3374
3375 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3376 the other class. */
3377 if (class1 == X86_64_NO_CLASS)
3378 return class2;
3379 if (class2 == X86_64_NO_CLASS)
3380 return class1;
3381
3382 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3383 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3384 return X86_64_MEMORY_CLASS;
3385
3386 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3387 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3388 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3389 return X86_64_INTEGERSI_CLASS;
3390 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3391 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3392 return X86_64_INTEGER_CLASS;
3393
3394 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3395 MEMORY is used. */
3396 if (class1 == X86_64_X87_CLASS
3397 || class1 == X86_64_X87UP_CLASS
3398 || class1 == X86_64_COMPLEX_X87_CLASS
3399 || class2 == X86_64_X87_CLASS
3400 || class2 == X86_64_X87UP_CLASS
3401 || class2 == X86_64_COMPLEX_X87_CLASS)
3402 return X86_64_MEMORY_CLASS;
3403
3404 /* Rule #6: Otherwise class SSE is used. */
3405 return X86_64_SSE_CLASS;
3406 }
3407
3408 /* Classify the argument of type TYPE and mode MODE.
3409 CLASSES will be filled by the register class used to pass each word
3410 of the operand. The number of words is returned. In case the parameter
3411 should be passed in memory, 0 is returned. As a special case for zero
3412 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3413
3414 BIT_OFFSET is used internally for handling records and specifies offset
3415 of the offset in bits modulo 256 to avoid overflow cases.
3416
3417 See the x86-64 PS ABI for details.
3418 */
3419
3420 static int
3421 classify_argument (enum machine_mode mode, tree type,
3422 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3423 {
3424 HOST_WIDE_INT bytes =
3425 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3426 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3427
3428 /* Variable sized entities are always passed/returned in memory. */
3429 if (bytes < 0)
3430 return 0;
3431
3432 if (mode != VOIDmode
3433 && targetm.calls.must_pass_in_stack (mode, type))
3434 return 0;
3435
3436 if (type && AGGREGATE_TYPE_P (type))
3437 {
3438 int i;
3439 tree field;
3440 enum x86_64_reg_class subclasses[MAX_CLASSES];
3441
3442 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3443 if (bytes > 16)
3444 return 0;
3445
3446 for (i = 0; i < words; i++)
3447 classes[i] = X86_64_NO_CLASS;
3448
3449 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3450 signalize memory class, so handle it as special case. */
3451 if (!words)
3452 {
3453 classes[0] = X86_64_NO_CLASS;
3454 return 1;
3455 }
3456
3457 /* Classify each field of record and merge classes. */
3458 switch (TREE_CODE (type))
3459 {
3460 case RECORD_TYPE:
3461 /* And now merge the fields of structure. */
3462 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3463 {
3464 if (TREE_CODE (field) == FIELD_DECL)
3465 {
3466 int num;
3467
3468 if (TREE_TYPE (field) == error_mark_node)
3469 continue;
3470
3471 /* Bitfields are always classified as integer. Handle them
3472 early, since later code would consider them to be
3473 misaligned integers. */
3474 if (DECL_BIT_FIELD (field))
3475 {
3476 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3477 i < ((int_bit_position (field) + (bit_offset % 64))
3478 + tree_low_cst (DECL_SIZE (field), 0)
3479 + 63) / 8 / 8; i++)
3480 classes[i] =
3481 merge_classes (X86_64_INTEGER_CLASS,
3482 classes[i]);
3483 }
3484 else
3485 {
3486 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3487 TREE_TYPE (field), subclasses,
3488 (int_bit_position (field)
3489 + bit_offset) % 256);
3490 if (!num)
3491 return 0;
3492 for (i = 0; i < num; i++)
3493 {
3494 int pos =
3495 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3496 classes[i + pos] =
3497 merge_classes (subclasses[i], classes[i + pos]);
3498 }
3499 }
3500 }
3501 }
3502 break;
3503
3504 case ARRAY_TYPE:
3505 /* Arrays are handled as small records. */
3506 {
3507 int num;
3508 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3509 TREE_TYPE (type), subclasses, bit_offset);
3510 if (!num)
3511 return 0;
3512
3513 /* The partial classes are now full classes. */
3514 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3515 subclasses[0] = X86_64_SSE_CLASS;
3516 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3517 subclasses[0] = X86_64_INTEGER_CLASS;
3518
3519 for (i = 0; i < words; i++)
3520 classes[i] = subclasses[i % num];
3521
3522 break;
3523 }
3524 case UNION_TYPE:
3525 case QUAL_UNION_TYPE:
3526 /* Unions are similar to RECORD_TYPE but offset is always 0.
3527 */
3528 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3529 {
3530 if (TREE_CODE (field) == FIELD_DECL)
3531 {
3532 int num;
3533
3534 if (TREE_TYPE (field) == error_mark_node)
3535 continue;
3536
3537 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3538 TREE_TYPE (field), subclasses,
3539 bit_offset);
3540 if (!num)
3541 return 0;
3542 for (i = 0; i < num; i++)
3543 classes[i] = merge_classes (subclasses[i], classes[i]);
3544 }
3545 }
3546 break;
3547
3548 default:
3549 gcc_unreachable ();
3550 }
3551
3552 /* Final merger cleanup. */
3553 for (i = 0; i < words; i++)
3554 {
3555 /* If one class is MEMORY, everything should be passed in
3556 memory. */
3557 if (classes[i] == X86_64_MEMORY_CLASS)
3558 return 0;
3559
3560 /* The X86_64_SSEUP_CLASS should be always preceded by
3561 X86_64_SSE_CLASS. */
3562 if (classes[i] == X86_64_SSEUP_CLASS
3563 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3564 classes[i] = X86_64_SSE_CLASS;
3565
3566 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3567 if (classes[i] == X86_64_X87UP_CLASS
3568 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3569 classes[i] = X86_64_SSE_CLASS;
3570 }
3571 return words;
3572 }
3573
3574 /* Compute alignment needed. We align all types to natural boundaries with
3575 exception of XFmode that is aligned to 64bits. */
3576 if (mode != VOIDmode && mode != BLKmode)
3577 {
3578 int mode_alignment = GET_MODE_BITSIZE (mode);
3579
3580 if (mode == XFmode)
3581 mode_alignment = 128;
3582 else if (mode == XCmode)
3583 mode_alignment = 256;
3584 if (COMPLEX_MODE_P (mode))
3585 mode_alignment /= 2;
3586 /* Misaligned fields are always returned in memory. */
3587 if (bit_offset % mode_alignment)
3588 return 0;
3589 }
3590
3591 /* for V1xx modes, just use the base mode */
3592 if (VECTOR_MODE_P (mode)
3593 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3594 mode = GET_MODE_INNER (mode);
3595
3596 /* Classification of atomic types. */
3597 switch (mode)
3598 {
3599 case SDmode:
3600 case DDmode:
3601 classes[0] = X86_64_SSE_CLASS;
3602 return 1;
3603 case TDmode:
3604 classes[0] = X86_64_SSE_CLASS;
3605 classes[1] = X86_64_SSEUP_CLASS;
3606 return 2;
3607 case DImode:
3608 case SImode:
3609 case HImode:
3610 case QImode:
3611 case CSImode:
3612 case CHImode:
3613 case CQImode:
3614 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3615 classes[0] = X86_64_INTEGERSI_CLASS;
3616 else
3617 classes[0] = X86_64_INTEGER_CLASS;
3618 return 1;
3619 case CDImode:
3620 case TImode:
3621 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3622 return 2;
3623 case CTImode:
3624 return 0;
3625 case SFmode:
3626 if (!(bit_offset % 64))
3627 classes[0] = X86_64_SSESF_CLASS;
3628 else
3629 classes[0] = X86_64_SSE_CLASS;
3630 return 1;
3631 case DFmode:
3632 classes[0] = X86_64_SSEDF_CLASS;
3633 return 1;
3634 case XFmode:
3635 classes[0] = X86_64_X87_CLASS;
3636 classes[1] = X86_64_X87UP_CLASS;
3637 return 2;
3638 case TFmode:
3639 classes[0] = X86_64_SSE_CLASS;
3640 classes[1] = X86_64_SSEUP_CLASS;
3641 return 2;
3642 case SCmode:
3643 classes[0] = X86_64_SSE_CLASS;
3644 return 1;
3645 case DCmode:
3646 classes[0] = X86_64_SSEDF_CLASS;
3647 classes[1] = X86_64_SSEDF_CLASS;
3648 return 2;
3649 case XCmode:
3650 classes[0] = X86_64_COMPLEX_X87_CLASS;
3651 return 1;
3652 case TCmode:
3653 /* This modes is larger than 16 bytes. */
3654 return 0;
3655 case V4SFmode:
3656 case V4SImode:
3657 case V16QImode:
3658 case V8HImode:
3659 case V2DFmode:
3660 case V2DImode:
3661 classes[0] = X86_64_SSE_CLASS;
3662 classes[1] = X86_64_SSEUP_CLASS;
3663 return 2;
3664 case V2SFmode:
3665 case V2SImode:
3666 case V4HImode:
3667 case V8QImode:
3668 classes[0] = X86_64_SSE_CLASS;
3669 return 1;
3670 case BLKmode:
3671 case VOIDmode:
3672 return 0;
3673 default:
3674 gcc_assert (VECTOR_MODE_P (mode));
3675
3676 if (bytes > 16)
3677 return 0;
3678
3679 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3680
3681 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3682 classes[0] = X86_64_INTEGERSI_CLASS;
3683 else
3684 classes[0] = X86_64_INTEGER_CLASS;
3685 classes[1] = X86_64_INTEGER_CLASS;
3686 return 1 + (bytes > 8);
3687 }
3688 }
3689
3690 /* Examine the argument and return set number of register required in each
3691 class. Return 0 iff parameter should be passed in memory. */
3692 static int
3693 examine_argument (enum machine_mode mode, tree type, int in_return,
3694 int *int_nregs, int *sse_nregs)
3695 {
3696 enum x86_64_reg_class class[MAX_CLASSES];
3697 int n = classify_argument (mode, type, class, 0);
3698
3699 *int_nregs = 0;
3700 *sse_nregs = 0;
3701 if (!n)
3702 return 0;
3703 for (n--; n >= 0; n--)
3704 switch (class[n])
3705 {
3706 case X86_64_INTEGER_CLASS:
3707 case X86_64_INTEGERSI_CLASS:
3708 (*int_nregs)++;
3709 break;
3710 case X86_64_SSE_CLASS:
3711 case X86_64_SSESF_CLASS:
3712 case X86_64_SSEDF_CLASS:
3713 (*sse_nregs)++;
3714 break;
3715 case X86_64_NO_CLASS:
3716 case X86_64_SSEUP_CLASS:
3717 break;
3718 case X86_64_X87_CLASS:
3719 case X86_64_X87UP_CLASS:
3720 if (!in_return)
3721 return 0;
3722 break;
3723 case X86_64_COMPLEX_X87_CLASS:
3724 return in_return ? 2 : 0;
3725 case X86_64_MEMORY_CLASS:
3726 gcc_unreachable ();
3727 }
3728 return 1;
3729 }
3730
3731 /* Construct container for the argument used by GCC interface. See
3732 FUNCTION_ARG for the detailed description. */
3733
3734 static rtx
3735 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3736 tree type, int in_return, int nintregs, int nsseregs,
3737 const int *intreg, int sse_regno)
3738 {
3739 /* The following variables hold the static issued_error state. */
3740 static bool issued_sse_arg_error;
3741 static bool issued_sse_ret_error;
3742 static bool issued_x87_ret_error;
3743
3744 enum machine_mode tmpmode;
3745 int bytes =
3746 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3747 enum x86_64_reg_class class[MAX_CLASSES];
3748 int n;
3749 int i;
3750 int nexps = 0;
3751 int needed_sseregs, needed_intregs;
3752 rtx exp[MAX_CLASSES];
3753 rtx ret;
3754
3755 n = classify_argument (mode, type, class, 0);
3756 if (TARGET_DEBUG_ARG)
3757 {
3758 if (!n)
3759 fprintf (stderr, "Memory class\n");
3760 else
3761 {
3762 fprintf (stderr, "Classes:");
3763 for (i = 0; i < n; i++)
3764 {
3765 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3766 }
3767 fprintf (stderr, "\n");
3768 }
3769 }
3770 if (!n)
3771 return NULL;
3772 if (!examine_argument (mode, type, in_return, &needed_intregs,
3773 &needed_sseregs))
3774 return NULL;
3775 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3776 return NULL;
3777
3778 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3779 some less clueful developer tries to use floating-point anyway. */
3780 if (needed_sseregs && !TARGET_SSE)
3781 {
3782 if (in_return)
3783 {
3784 if (!issued_sse_ret_error)
3785 {
3786 error ("SSE register return with SSE disabled");
3787 issued_sse_ret_error = true;
3788 }
3789 }
3790 else if (!issued_sse_arg_error)
3791 {
3792 error ("SSE register argument with SSE disabled");
3793 issued_sse_arg_error = true;
3794 }
3795 return NULL;
3796 }
3797
3798 /* Likewise, error if the ABI requires us to return values in the
3799 x87 registers and the user specified -mno-80387. */
3800 if (!TARGET_80387 && in_return)
3801 for (i = 0; i < n; i++)
3802 if (class[i] == X86_64_X87_CLASS
3803 || class[i] == X86_64_X87UP_CLASS
3804 || class[i] == X86_64_COMPLEX_X87_CLASS)
3805 {
3806 if (!issued_x87_ret_error)
3807 {
3808 error ("x87 register return with x87 disabled");
3809 issued_x87_ret_error = true;
3810 }
3811 return NULL;
3812 }
3813
3814 /* First construct simple cases. Avoid SCmode, since we want to use
3815 single register to pass this type. */
3816 if (n == 1 && mode != SCmode)
3817 switch (class[0])
3818 {
3819 case X86_64_INTEGER_CLASS:
3820 case X86_64_INTEGERSI_CLASS:
3821 return gen_rtx_REG (mode, intreg[0]);
3822 case X86_64_SSE_CLASS:
3823 case X86_64_SSESF_CLASS:
3824 case X86_64_SSEDF_CLASS:
3825 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3826 case X86_64_X87_CLASS:
3827 case X86_64_COMPLEX_X87_CLASS:
3828 return gen_rtx_REG (mode, FIRST_STACK_REG);
3829 case X86_64_NO_CLASS:
3830 /* Zero sized array, struct or class. */
3831 return NULL;
3832 default:
3833 gcc_unreachable ();
3834 }
3835 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3836 && mode != BLKmode)
3837 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3838 if (n == 2
3839 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3840 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3841 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3842 && class[1] == X86_64_INTEGER_CLASS
3843 && (mode == CDImode || mode == TImode || mode == TFmode)
3844 && intreg[0] + 1 == intreg[1])
3845 return gen_rtx_REG (mode, intreg[0]);
3846
3847 /* Otherwise figure out the entries of the PARALLEL. */
3848 for (i = 0; i < n; i++)
3849 {
3850 switch (class[i])
3851 {
3852 case X86_64_NO_CLASS:
3853 break;
3854 case X86_64_INTEGER_CLASS:
3855 case X86_64_INTEGERSI_CLASS:
3856 /* Merge TImodes on aligned occasions here too. */
3857 if (i * 8 + 8 > bytes)
3858 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3859 else if (class[i] == X86_64_INTEGERSI_CLASS)
3860 tmpmode = SImode;
3861 else
3862 tmpmode = DImode;
3863 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3864 if (tmpmode == BLKmode)
3865 tmpmode = DImode;
3866 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3867 gen_rtx_REG (tmpmode, *intreg),
3868 GEN_INT (i*8));
3869 intreg++;
3870 break;
3871 case X86_64_SSESF_CLASS:
3872 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3873 gen_rtx_REG (SFmode,
3874 SSE_REGNO (sse_regno)),
3875 GEN_INT (i*8));
3876 sse_regno++;
3877 break;
3878 case X86_64_SSEDF_CLASS:
3879 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3880 gen_rtx_REG (DFmode,
3881 SSE_REGNO (sse_regno)),
3882 GEN_INT (i*8));
3883 sse_regno++;
3884 break;
3885 case X86_64_SSE_CLASS:
3886 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3887 tmpmode = TImode;
3888 else
3889 tmpmode = DImode;
3890 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3891 gen_rtx_REG (tmpmode,
3892 SSE_REGNO (sse_regno)),
3893 GEN_INT (i*8));
3894 if (tmpmode == TImode)
3895 i++;
3896 sse_regno++;
3897 break;
3898 default:
3899 gcc_unreachable ();
3900 }
3901 }
3902
3903 /* Empty aligned struct, union or class. */
3904 if (nexps == 0)
3905 return NULL;
3906
3907 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3908 for (i = 0; i < nexps; i++)
3909 XVECEXP (ret, 0, i) = exp [i];
3910 return ret;
3911 }
3912
3913 /* Update the data in CUM to advance over an argument
3914 of mode MODE and data type TYPE.
3915 (TYPE is null for libcalls where that information may not be available.) */
3916
3917 void
3918 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3919 tree type, int named)
3920 {
3921 int bytes =
3922 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3923 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3924
3925 if (type)
3926 mode = type_natural_mode (type);
3927
3928 if (TARGET_DEBUG_ARG)
3929 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3930 "mode=%s, named=%d)\n\n",
3931 words, cum->words, cum->nregs, cum->sse_nregs,
3932 GET_MODE_NAME (mode), named);
3933
3934 if (TARGET_64BIT)
3935 {
3936 int int_nregs, sse_nregs;
3937 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3938 cum->words += words;
3939 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3940 {
3941 cum->nregs -= int_nregs;
3942 cum->sse_nregs -= sse_nregs;
3943 cum->regno += int_nregs;
3944 cum->sse_regno += sse_nregs;
3945 }
3946 else
3947 cum->words += words;
3948 }
3949 else
3950 {
3951 switch (mode)
3952 {
3953 default:
3954 break;
3955
3956 case BLKmode:
3957 if (bytes < 0)
3958 break;
3959 /* FALLTHRU */
3960
3961 case DImode:
3962 case SImode:
3963 case HImode:
3964 case QImode:
3965 cum->words += words;
3966 cum->nregs -= words;
3967 cum->regno += words;
3968
3969 if (cum->nregs <= 0)
3970 {
3971 cum->nregs = 0;
3972 cum->regno = 0;
3973 }
3974 break;
3975
3976 case DFmode:
3977 if (cum->float_in_sse < 2)
3978 break;
3979 case SFmode:
3980 if (cum->float_in_sse < 1)
3981 break;
3982 /* FALLTHRU */
3983
3984 case TImode:
3985 case V16QImode:
3986 case V8HImode:
3987 case V4SImode:
3988 case V2DImode:
3989 case V4SFmode:
3990 case V2DFmode:
3991 if (!type || !AGGREGATE_TYPE_P (type))
3992 {
3993 cum->sse_words += words;
3994 cum->sse_nregs -= 1;
3995 cum->sse_regno += 1;
3996 if (cum->sse_nregs <= 0)
3997 {
3998 cum->sse_nregs = 0;
3999 cum->sse_regno = 0;
4000 }
4001 }
4002 break;
4003
4004 case V8QImode:
4005 case V4HImode:
4006 case V2SImode:
4007 case V2SFmode:
4008 if (!type || !AGGREGATE_TYPE_P (type))
4009 {
4010 cum->mmx_words += words;
4011 cum->mmx_nregs -= 1;
4012 cum->mmx_regno += 1;
4013 if (cum->mmx_nregs <= 0)
4014 {
4015 cum->mmx_nregs = 0;
4016 cum->mmx_regno = 0;
4017 }
4018 }
4019 break;
4020 }
4021 }
4022 }
4023
4024 /* Define where to put the arguments to a function.
4025 Value is zero to push the argument on the stack,
4026 or a hard register in which to store the argument.
4027
4028 MODE is the argument's machine mode.
4029 TYPE is the data type of the argument (as a tree).
4030 This is null for libcalls where that information may
4031 not be available.
4032 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4033 the preceding args and about the function being called.
4034 NAMED is nonzero if this argument is a named parameter
4035 (otherwise it is an extra parameter matching an ellipsis). */
4036
4037 rtx
4038 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
4039 tree type, int named)
4040 {
4041 enum machine_mode mode = orig_mode;
4042 rtx ret = NULL_RTX;
4043 int bytes =
4044 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4045 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4046 static bool warnedsse, warnedmmx;
4047
4048 /* To simplify the code below, represent vector types with a vector mode
4049 even if MMX/SSE are not active. */
4050 if (type && TREE_CODE (type) == VECTOR_TYPE)
4051 mode = type_natural_mode (type);
4052
4053 /* Handle a hidden AL argument containing number of registers for varargs
4054 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4055 any AL settings. */
4056 if (mode == VOIDmode)
4057 {
4058 if (TARGET_64BIT)
4059 return GEN_INT (cum->maybe_vaarg
4060 ? (cum->sse_nregs < 0
4061 ? SSE_REGPARM_MAX
4062 : cum->sse_regno)
4063 : -1);
4064 else
4065 return constm1_rtx;
4066 }
4067 if (TARGET_64BIT)
4068 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4069 cum->sse_nregs,
4070 &x86_64_int_parameter_registers [cum->regno],
4071 cum->sse_regno);
4072 else
4073 switch (mode)
4074 {
4075 /* For now, pass fp/complex values on the stack. */
4076 default:
4077 break;
4078
4079 case BLKmode:
4080 if (bytes < 0)
4081 break;
4082 /* FALLTHRU */
4083 case DImode:
4084 case SImode:
4085 case HImode:
4086 case QImode:
4087 if (words <= cum->nregs)
4088 {
4089 int regno = cum->regno;
4090
4091 /* Fastcall allocates the first two DWORD (SImode) or
4092 smaller arguments to ECX and EDX. */
4093 if (cum->fastcall)
4094 {
4095 if (mode == BLKmode || mode == DImode)
4096 break;
4097
4098 /* ECX not EAX is the first allocated register. */
4099 if (regno == 0)
4100 regno = 2;
4101 }
4102 ret = gen_rtx_REG (mode, regno);
4103 }
4104 break;
4105 case DFmode:
4106 if (cum->float_in_sse < 2)
4107 break;
4108 case SFmode:
4109 if (cum->float_in_sse < 1)
4110 break;
4111 /* FALLTHRU */
4112 case TImode:
4113 case V16QImode:
4114 case V8HImode:
4115 case V4SImode:
4116 case V2DImode:
4117 case V4SFmode:
4118 case V2DFmode:
4119 if (!type || !AGGREGATE_TYPE_P (type))
4120 {
4121 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4122 {
4123 warnedsse = true;
4124 warning (0, "SSE vector argument without SSE enabled "
4125 "changes the ABI");
4126 }
4127 if (cum->sse_nregs)
4128 ret = gen_reg_or_parallel (mode, orig_mode,
4129 cum->sse_regno + FIRST_SSE_REG);
4130 }
4131 break;
4132 case V8QImode:
4133 case V4HImode:
4134 case V2SImode:
4135 case V2SFmode:
4136 if (!type || !AGGREGATE_TYPE_P (type))
4137 {
4138 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4139 {
4140 warnedmmx = true;
4141 warning (0, "MMX vector argument without MMX enabled "
4142 "changes the ABI");
4143 }
4144 if (cum->mmx_nregs)
4145 ret = gen_reg_or_parallel (mode, orig_mode,
4146 cum->mmx_regno + FIRST_MMX_REG);
4147 }
4148 break;
4149 }
4150
4151 if (TARGET_DEBUG_ARG)
4152 {
4153 fprintf (stderr,
4154 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4155 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4156
4157 if (ret)
4158 print_simple_rtl (stderr, ret);
4159 else
4160 fprintf (stderr, ", stack");
4161
4162 fprintf (stderr, " )\n");
4163 }
4164
4165 return ret;
4166 }
4167
4168 /* A C expression that indicates when an argument must be passed by
4169 reference. If nonzero for an argument, a copy of that argument is
4170 made in memory and a pointer to the argument is passed instead of
4171 the argument itself. The pointer is passed in whatever way is
4172 appropriate for passing a pointer to that type. */
4173
4174 static bool
4175 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4176 enum machine_mode mode ATTRIBUTE_UNUSED,
4177 tree type, bool named ATTRIBUTE_UNUSED)
4178 {
4179 if (!TARGET_64BIT)
4180 return 0;
4181
4182 if (type && int_size_in_bytes (type) == -1)
4183 {
4184 if (TARGET_DEBUG_ARG)
4185 fprintf (stderr, "function_arg_pass_by_reference\n");
4186 return 1;
4187 }
4188
4189 return 0;
4190 }
4191
4192 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4193 ABI. Only called if TARGET_SSE. */
4194 static bool
4195 contains_128bit_aligned_vector_p (tree type)
4196 {
4197 enum machine_mode mode = TYPE_MODE (type);
4198 if (SSE_REG_MODE_P (mode)
4199 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4200 return true;
4201 if (TYPE_ALIGN (type) < 128)
4202 return false;
4203
4204 if (AGGREGATE_TYPE_P (type))
4205 {
4206 /* Walk the aggregates recursively. */
4207 switch (TREE_CODE (type))
4208 {
4209 case RECORD_TYPE:
4210 case UNION_TYPE:
4211 case QUAL_UNION_TYPE:
4212 {
4213 tree field;
4214
4215 /* Walk all the structure fields. */
4216 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4217 {
4218 if (TREE_CODE (field) == FIELD_DECL
4219 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4220 return true;
4221 }
4222 break;
4223 }
4224
4225 case ARRAY_TYPE:
4226 /* Just for use if some languages passes arrays by value. */
4227 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4228 return true;
4229 break;
4230
4231 default:
4232 gcc_unreachable ();
4233 }
4234 }
4235 return false;
4236 }
4237
4238 /* Gives the alignment boundary, in bits, of an argument with the
4239 specified mode and type. */
4240
4241 int
4242 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4243 {
4244 int align;
4245 if (type)
4246 align = TYPE_ALIGN (type);
4247 else
4248 align = GET_MODE_ALIGNMENT (mode);
4249 if (align < PARM_BOUNDARY)
4250 align = PARM_BOUNDARY;
4251 if (!TARGET_64BIT)
4252 {
4253 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4254 make an exception for SSE modes since these require 128bit
4255 alignment.
4256
4257 The handling here differs from field_alignment. ICC aligns MMX
4258 arguments to 4 byte boundaries, while structure fields are aligned
4259 to 8 byte boundaries. */
4260 if (!TARGET_SSE)
4261 align = PARM_BOUNDARY;
4262 else if (!type)
4263 {
4264 if (!SSE_REG_MODE_P (mode))
4265 align = PARM_BOUNDARY;
4266 }
4267 else
4268 {
4269 if (!contains_128bit_aligned_vector_p (type))
4270 align = PARM_BOUNDARY;
4271 }
4272 }
4273 if (align > 128)
4274 align = 128;
4275 return align;
4276 }
4277
4278 /* Return true if N is a possible register number of function value. */
4279 bool
4280 ix86_function_value_regno_p (int regno)
4281 {
4282 if (TARGET_MACHO)
4283 {
4284 if (!TARGET_64BIT)
4285 {
4286 return ((regno) == 0
4287 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4288 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4289 }
4290 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4291 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4292 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4293 }
4294 else
4295 {
4296 if (regno == 0
4297 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4298 || (regno == FIRST_SSE_REG && TARGET_SSE))
4299 return true;
4300
4301 if (!TARGET_64BIT
4302 && (regno == FIRST_MMX_REG && TARGET_MMX))
4303 return true;
4304
4305 return false;
4306 }
4307 }
4308
4309 /* Define how to find the value returned by a function.
4310 VALTYPE is the data type of the value (as a tree).
4311 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4312 otherwise, FUNC is 0. */
4313 rtx
4314 ix86_function_value (tree valtype, tree fntype_or_decl,
4315 bool outgoing ATTRIBUTE_UNUSED)
4316 {
4317 enum machine_mode natmode = type_natural_mode (valtype);
4318
4319 if (TARGET_64BIT)
4320 {
4321 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4322 1, REGPARM_MAX, SSE_REGPARM_MAX,
4323 x86_64_int_return_registers, 0);
4324 /* For zero sized structures, construct_container return NULL, but we
4325 need to keep rest of compiler happy by returning meaningful value. */
4326 if (!ret)
4327 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4328 return ret;
4329 }
4330 else
4331 {
4332 tree fn = NULL_TREE, fntype;
4333 if (fntype_or_decl
4334 && DECL_P (fntype_or_decl))
4335 fn = fntype_or_decl;
4336 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4337 return gen_rtx_REG (TYPE_MODE (valtype),
4338 ix86_value_regno (natmode, fn, fntype));
4339 }
4340 }
4341
4342 /* Return true iff type is returned in memory. */
4343 int
4344 ix86_return_in_memory (tree type)
4345 {
4346 int needed_intregs, needed_sseregs, size;
4347 enum machine_mode mode = type_natural_mode (type);
4348
4349 if (TARGET_64BIT)
4350 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4351
4352 if (mode == BLKmode)
4353 return 1;
4354
4355 size = int_size_in_bytes (type);
4356
4357 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4358 return 0;
4359
4360 if (VECTOR_MODE_P (mode) || mode == TImode)
4361 {
4362 /* User-created vectors small enough to fit in EAX. */
4363 if (size < 8)
4364 return 0;
4365
4366 /* MMX/3dNow values are returned in MM0,
4367 except when it doesn't exits. */
4368 if (size == 8)
4369 return (TARGET_MMX ? 0 : 1);
4370
4371 /* SSE values are returned in XMM0, except when it doesn't exist. */
4372 if (size == 16)
4373 return (TARGET_SSE ? 0 : 1);
4374 }
4375
4376 if (mode == XFmode)
4377 return 0;
4378
4379 if (mode == TDmode)
4380 return 1;
4381
4382 if (size > 12)
4383 return 1;
4384 return 0;
4385 }
4386
4387 /* When returning SSE vector types, we have a choice of either
4388 (1) being abi incompatible with a -march switch, or
4389 (2) generating an error.
4390 Given no good solution, I think the safest thing is one warning.
4391 The user won't be able to use -Werror, but....
4392
4393 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4394 called in response to actually generating a caller or callee that
4395 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4396 via aggregate_value_p for general type probing from tree-ssa. */
4397
4398 static rtx
4399 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4400 {
4401 static bool warnedsse, warnedmmx;
4402
4403 if (type)
4404 {
4405 /* Look at the return type of the function, not the function type. */
4406 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4407
4408 if (!TARGET_SSE && !warnedsse)
4409 {
4410 if (mode == TImode
4411 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4412 {
4413 warnedsse = true;
4414 warning (0, "SSE vector return without SSE enabled "
4415 "changes the ABI");
4416 }
4417 }
4418
4419 if (!TARGET_MMX && !warnedmmx)
4420 {
4421 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4422 {
4423 warnedmmx = true;
4424 warning (0, "MMX vector return without MMX enabled "
4425 "changes the ABI");
4426 }
4427 }
4428 }
4429
4430 return NULL;
4431 }
4432
4433 /* Define how to find the value returned by a library function
4434 assuming the value has mode MODE. */
4435 rtx
4436 ix86_libcall_value (enum machine_mode mode)
4437 {
4438 if (TARGET_64BIT)
4439 {
4440 switch (mode)
4441 {
4442 case SFmode:
4443 case SCmode:
4444 case DFmode:
4445 case DCmode:
4446 case TFmode:
4447 case SDmode:
4448 case DDmode:
4449 case TDmode:
4450 return gen_rtx_REG (mode, FIRST_SSE_REG);
4451 case XFmode:
4452 case XCmode:
4453 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4454 case TCmode:
4455 return NULL;
4456 default:
4457 return gen_rtx_REG (mode, 0);
4458 }
4459 }
4460 else
4461 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4462 }
4463
4464 /* Given a mode, return the register to use for a return value. */
4465
4466 static int
4467 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4468 {
4469 gcc_assert (!TARGET_64BIT);
4470
4471 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4472 we normally prevent this case when mmx is not available. However
4473 some ABIs may require the result to be returned like DImode. */
4474 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4475 return TARGET_MMX ? FIRST_MMX_REG : 0;
4476
4477 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4478 we prevent this case when sse is not available. However some ABIs
4479 may require the result to be returned like integer TImode. */
4480 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4481 return TARGET_SSE ? FIRST_SSE_REG : 0;
4482
4483 /* Decimal floating point values can go in %eax, unlike other float modes. */
4484 if (DECIMAL_FLOAT_MODE_P (mode))
4485 return 0;
4486
4487 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4488 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4489 return 0;
4490
4491 /* Floating point return values in %st(0), except for local functions when
4492 SSE math is enabled or for functions with sseregparm attribute. */
4493 if ((func || fntype)
4494 && (mode == SFmode || mode == DFmode))
4495 {
4496 int sse_level = ix86_function_sseregparm (fntype, func);
4497 if ((sse_level >= 1 && mode == SFmode)
4498 || (sse_level == 2 && mode == DFmode))
4499 return FIRST_SSE_REG;
4500 }
4501
4502 return FIRST_FLOAT_REG;
4503 }
4504 \f
4505 /* Create the va_list data type. */
4506
4507 static tree
4508 ix86_build_builtin_va_list (void)
4509 {
4510 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4511
4512 /* For i386 we use plain pointer to argument area. */
4513 if (!TARGET_64BIT)
4514 return build_pointer_type (char_type_node);
4515
4516 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4517 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4518
4519 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4520 unsigned_type_node);
4521 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4522 unsigned_type_node);
4523 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4524 ptr_type_node);
4525 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4526 ptr_type_node);
4527
4528 va_list_gpr_counter_field = f_gpr;
4529 va_list_fpr_counter_field = f_fpr;
4530
4531 DECL_FIELD_CONTEXT (f_gpr) = record;
4532 DECL_FIELD_CONTEXT (f_fpr) = record;
4533 DECL_FIELD_CONTEXT (f_ovf) = record;
4534 DECL_FIELD_CONTEXT (f_sav) = record;
4535
4536 TREE_CHAIN (record) = type_decl;
4537 TYPE_NAME (record) = type_decl;
4538 TYPE_FIELDS (record) = f_gpr;
4539 TREE_CHAIN (f_gpr) = f_fpr;
4540 TREE_CHAIN (f_fpr) = f_ovf;
4541 TREE_CHAIN (f_ovf) = f_sav;
4542
4543 layout_type (record);
4544
4545 /* The correct type is an array type of one element. */
4546 return build_array_type (record, build_index_type (size_zero_node));
4547 }
4548
4549 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4550
4551 static void
4552 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4553 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4554 int no_rtl)
4555 {
4556 CUMULATIVE_ARGS next_cum;
4557 rtx save_area = NULL_RTX, mem;
4558 rtx label;
4559 rtx label_ref;
4560 rtx tmp_reg;
4561 rtx nsse_reg;
4562 int set;
4563 tree fntype;
4564 int stdarg_p;
4565 int i;
4566
4567 if (!TARGET_64BIT)
4568 return;
4569
4570 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4571 return;
4572
4573 /* Indicate to allocate space on the stack for varargs save area. */
4574 ix86_save_varrargs_registers = 1;
4575
4576 cfun->stack_alignment_needed = 128;
4577
4578 fntype = TREE_TYPE (current_function_decl);
4579 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4580 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4581 != void_type_node));
4582
4583 /* For varargs, we do not want to skip the dummy va_dcl argument.
4584 For stdargs, we do want to skip the last named argument. */
4585 next_cum = *cum;
4586 if (stdarg_p)
4587 function_arg_advance (&next_cum, mode, type, 1);
4588
4589 if (!no_rtl)
4590 save_area = frame_pointer_rtx;
4591
4592 set = get_varargs_alias_set ();
4593
4594 for (i = next_cum.regno;
4595 i < ix86_regparm
4596 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4597 i++)
4598 {
4599 mem = gen_rtx_MEM (Pmode,
4600 plus_constant (save_area, i * UNITS_PER_WORD));
4601 MEM_NOTRAP_P (mem) = 1;
4602 set_mem_alias_set (mem, set);
4603 emit_move_insn (mem, gen_rtx_REG (Pmode,
4604 x86_64_int_parameter_registers[i]));
4605 }
4606
4607 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4608 {
4609 /* Now emit code to save SSE registers. The AX parameter contains number
4610 of SSE parameter registers used to call this function. We use
4611 sse_prologue_save insn template that produces computed jump across
4612 SSE saves. We need some preparation work to get this working. */
4613
4614 label = gen_label_rtx ();
4615 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4616
4617 /* Compute address to jump to :
4618 label - 5*eax + nnamed_sse_arguments*5 */
4619 tmp_reg = gen_reg_rtx (Pmode);
4620 nsse_reg = gen_reg_rtx (Pmode);
4621 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4622 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4623 gen_rtx_MULT (Pmode, nsse_reg,
4624 GEN_INT (4))));
4625 if (next_cum.sse_regno)
4626 emit_move_insn
4627 (nsse_reg,
4628 gen_rtx_CONST (DImode,
4629 gen_rtx_PLUS (DImode,
4630 label_ref,
4631 GEN_INT (next_cum.sse_regno * 4))));
4632 else
4633 emit_move_insn (nsse_reg, label_ref);
4634 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4635
4636 /* Compute address of memory block we save into. We always use pointer
4637 pointing 127 bytes after first byte to store - this is needed to keep
4638 instruction size limited by 4 bytes. */
4639 tmp_reg = gen_reg_rtx (Pmode);
4640 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4641 plus_constant (save_area,
4642 8 * REGPARM_MAX + 127)));
4643 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4644 MEM_NOTRAP_P (mem) = 1;
4645 set_mem_alias_set (mem, set);
4646 set_mem_align (mem, BITS_PER_WORD);
4647
4648 /* And finally do the dirty job! */
4649 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4650 GEN_INT (next_cum.sse_regno), label));
4651 }
4652
4653 }
4654
4655 /* Implement va_start. */
4656
4657 void
4658 ix86_va_start (tree valist, rtx nextarg)
4659 {
4660 HOST_WIDE_INT words, n_gpr, n_fpr;
4661 tree f_gpr, f_fpr, f_ovf, f_sav;
4662 tree gpr, fpr, ovf, sav, t;
4663 tree type;
4664
4665 /* Only 64bit target needs something special. */
4666 if (!TARGET_64BIT)
4667 {
4668 std_expand_builtin_va_start (valist, nextarg);
4669 return;
4670 }
4671
4672 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4673 f_fpr = TREE_CHAIN (f_gpr);
4674 f_ovf = TREE_CHAIN (f_fpr);
4675 f_sav = TREE_CHAIN (f_ovf);
4676
4677 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4678 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4679 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4680 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4681 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4682
4683 /* Count number of gp and fp argument registers used. */
4684 words = current_function_args_info.words;
4685 n_gpr = current_function_args_info.regno;
4686 n_fpr = current_function_args_info.sse_regno;
4687
4688 if (TARGET_DEBUG_ARG)
4689 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4690 (int) words, (int) n_gpr, (int) n_fpr);
4691
4692 if (cfun->va_list_gpr_size)
4693 {
4694 type = TREE_TYPE (gpr);
4695 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4696 build_int_cst (type, n_gpr * 8));
4697 TREE_SIDE_EFFECTS (t) = 1;
4698 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4699 }
4700
4701 if (cfun->va_list_fpr_size)
4702 {
4703 type = TREE_TYPE (fpr);
4704 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4705 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4706 TREE_SIDE_EFFECTS (t) = 1;
4707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4708 }
4709
4710 /* Find the overflow area. */
4711 type = TREE_TYPE (ovf);
4712 t = make_tree (type, virtual_incoming_args_rtx);
4713 if (words != 0)
4714 t = build2 (PLUS_EXPR, type, t,
4715 build_int_cst (type, words * UNITS_PER_WORD));
4716 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4717 TREE_SIDE_EFFECTS (t) = 1;
4718 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4719
4720 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4721 {
4722 /* Find the register save area.
4723 Prologue of the function save it right above stack frame. */
4724 type = TREE_TYPE (sav);
4725 t = make_tree (type, frame_pointer_rtx);
4726 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4727 TREE_SIDE_EFFECTS (t) = 1;
4728 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4729 }
4730 }
4731
4732 /* Implement va_arg. */
4733
4734 tree
4735 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4736 {
4737 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4738 tree f_gpr, f_fpr, f_ovf, f_sav;
4739 tree gpr, fpr, ovf, sav, t;
4740 int size, rsize;
4741 tree lab_false, lab_over = NULL_TREE;
4742 tree addr, t2;
4743 rtx container;
4744 int indirect_p = 0;
4745 tree ptrtype;
4746 enum machine_mode nat_mode;
4747
4748 /* Only 64bit target needs something special. */
4749 if (!TARGET_64BIT)
4750 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4751
4752 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4753 f_fpr = TREE_CHAIN (f_gpr);
4754 f_ovf = TREE_CHAIN (f_fpr);
4755 f_sav = TREE_CHAIN (f_ovf);
4756
4757 valist = build_va_arg_indirect_ref (valist);
4758 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4759 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4760 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4761 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4762
4763 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4764 if (indirect_p)
4765 type = build_pointer_type (type);
4766 size = int_size_in_bytes (type);
4767 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4768
4769 nat_mode = type_natural_mode (type);
4770 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4771 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4772
4773 /* Pull the value out of the saved registers. */
4774
4775 addr = create_tmp_var (ptr_type_node, "addr");
4776 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4777
4778 if (container)
4779 {
4780 int needed_intregs, needed_sseregs;
4781 bool need_temp;
4782 tree int_addr, sse_addr;
4783
4784 lab_false = create_artificial_label ();
4785 lab_over = create_artificial_label ();
4786
4787 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4788
4789 need_temp = (!REG_P (container)
4790 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4791 || TYPE_ALIGN (type) > 128));
4792
4793 /* In case we are passing structure, verify that it is consecutive block
4794 on the register save area. If not we need to do moves. */
4795 if (!need_temp && !REG_P (container))
4796 {
4797 /* Verify that all registers are strictly consecutive */
4798 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4799 {
4800 int i;
4801
4802 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4803 {
4804 rtx slot = XVECEXP (container, 0, i);
4805 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4806 || INTVAL (XEXP (slot, 1)) != i * 16)
4807 need_temp = 1;
4808 }
4809 }
4810 else
4811 {
4812 int i;
4813
4814 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4815 {
4816 rtx slot = XVECEXP (container, 0, i);
4817 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4818 || INTVAL (XEXP (slot, 1)) != i * 8)
4819 need_temp = 1;
4820 }
4821 }
4822 }
4823 if (!need_temp)
4824 {
4825 int_addr = addr;
4826 sse_addr = addr;
4827 }
4828 else
4829 {
4830 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4831 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4832 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4833 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4834 }
4835
4836 /* First ensure that we fit completely in registers. */
4837 if (needed_intregs)
4838 {
4839 t = build_int_cst (TREE_TYPE (gpr),
4840 (REGPARM_MAX - needed_intregs + 1) * 8);
4841 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4842 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4843 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4844 gimplify_and_add (t, pre_p);
4845 }
4846 if (needed_sseregs)
4847 {
4848 t = build_int_cst (TREE_TYPE (fpr),
4849 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4850 + REGPARM_MAX * 8);
4851 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4852 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4853 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4854 gimplify_and_add (t, pre_p);
4855 }
4856
4857 /* Compute index to start of area used for integer regs. */
4858 if (needed_intregs)
4859 {
4860 /* int_addr = gpr + sav; */
4861 t = fold_convert (ptr_type_node, gpr);
4862 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4863 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4864 gimplify_and_add (t, pre_p);
4865 }
4866 if (needed_sseregs)
4867 {
4868 /* sse_addr = fpr + sav; */
4869 t = fold_convert (ptr_type_node, fpr);
4870 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4871 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4872 gimplify_and_add (t, pre_p);
4873 }
4874 if (need_temp)
4875 {
4876 int i;
4877 tree temp = create_tmp_var (type, "va_arg_tmp");
4878
4879 /* addr = &temp; */
4880 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4881 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4882 gimplify_and_add (t, pre_p);
4883
4884 for (i = 0; i < XVECLEN (container, 0); i++)
4885 {
4886 rtx slot = XVECEXP (container, 0, i);
4887 rtx reg = XEXP (slot, 0);
4888 enum machine_mode mode = GET_MODE (reg);
4889 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4890 tree addr_type = build_pointer_type (piece_type);
4891 tree src_addr, src;
4892 int src_offset;
4893 tree dest_addr, dest;
4894
4895 if (SSE_REGNO_P (REGNO (reg)))
4896 {
4897 src_addr = sse_addr;
4898 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4899 }
4900 else
4901 {
4902 src_addr = int_addr;
4903 src_offset = REGNO (reg) * 8;
4904 }
4905 src_addr = fold_convert (addr_type, src_addr);
4906 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4907 size_int (src_offset));
4908 src = build_va_arg_indirect_ref (src_addr);
4909
4910 dest_addr = fold_convert (addr_type, addr);
4911 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4912 size_int (INTVAL (XEXP (slot, 1))));
4913 dest = build_va_arg_indirect_ref (dest_addr);
4914
4915 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4916 gimplify_and_add (t, pre_p);
4917 }
4918 }
4919
4920 if (needed_intregs)
4921 {
4922 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4923 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4924 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4925 gimplify_and_add (t, pre_p);
4926 }
4927 if (needed_sseregs)
4928 {
4929 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4930 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4931 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4932 gimplify_and_add (t, pre_p);
4933 }
4934
4935 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4936 gimplify_and_add (t, pre_p);
4937
4938 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4939 append_to_statement_list (t, pre_p);
4940 }
4941
4942 /* ... otherwise out of the overflow area. */
4943
4944 /* Care for on-stack alignment if needed. */
4945 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4946 || integer_zerop (TYPE_SIZE (type)))
4947 t = ovf;
4948 else
4949 {
4950 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4951 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4952 build_int_cst (TREE_TYPE (ovf), align - 1));
4953 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4954 build_int_cst (TREE_TYPE (t), -align));
4955 }
4956 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4957
4958 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4959 gimplify_and_add (t2, pre_p);
4960
4961 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4962 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4963 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4964 gimplify_and_add (t, pre_p);
4965
4966 if (container)
4967 {
4968 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4969 append_to_statement_list (t, pre_p);
4970 }
4971
4972 ptrtype = build_pointer_type (type);
4973 addr = fold_convert (ptrtype, addr);
4974
4975 if (indirect_p)
4976 addr = build_va_arg_indirect_ref (addr);
4977 return build_va_arg_indirect_ref (addr);
4978 }
4979 \f
4980 /* Return nonzero if OPNUM's MEM should be matched
4981 in movabs* patterns. */
4982
4983 int
4984 ix86_check_movabs (rtx insn, int opnum)
4985 {
4986 rtx set, mem;
4987
4988 set = PATTERN (insn);
4989 if (GET_CODE (set) == PARALLEL)
4990 set = XVECEXP (set, 0, 0);
4991 gcc_assert (GET_CODE (set) == SET);
4992 mem = XEXP (set, opnum);
4993 while (GET_CODE (mem) == SUBREG)
4994 mem = SUBREG_REG (mem);
4995 gcc_assert (MEM_P (mem));
4996 return (volatile_ok || !MEM_VOLATILE_P (mem));
4997 }
4998 \f
4999 /* Initialize the table of extra 80387 mathematical constants. */
5000
5001 static void
5002 init_ext_80387_constants (void)
5003 {
5004 static const char * cst[5] =
5005 {
5006 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5007 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5008 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5009 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5010 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5011 };
5012 int i;
5013
5014 for (i = 0; i < 5; i++)
5015 {
5016 real_from_string (&ext_80387_constants_table[i], cst[i]);
5017 /* Ensure each constant is rounded to XFmode precision. */
5018 real_convert (&ext_80387_constants_table[i],
5019 XFmode, &ext_80387_constants_table[i]);
5020 }
5021
5022 ext_80387_constants_init = 1;
5023 }
5024
5025 /* Return true if the constant is something that can be loaded with
5026 a special instruction. */
5027
5028 int
5029 standard_80387_constant_p (rtx x)
5030 {
5031 REAL_VALUE_TYPE r;
5032
5033 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
5034 return -1;
5035
5036 if (x == CONST0_RTX (GET_MODE (x)))
5037 return 1;
5038 if (x == CONST1_RTX (GET_MODE (x)))
5039 return 2;
5040
5041 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5042
5043 /* For XFmode constants, try to find a special 80387 instruction when
5044 optimizing for size or on those CPUs that benefit from them. */
5045 if (GET_MODE (x) == XFmode
5046 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5047 {
5048 int i;
5049
5050 if (! ext_80387_constants_init)
5051 init_ext_80387_constants ();
5052
5053 for (i = 0; i < 5; i++)
5054 if (real_identical (&r, &ext_80387_constants_table[i]))
5055 return i + 3;
5056 }
5057
5058 /* Load of the constant -0.0 or -1.0 will be split as
5059 fldz;fchs or fld1;fchs sequence. */
5060 if (real_isnegzero (&r))
5061 return 8;
5062 if (real_identical (&r, &dconstm1))
5063 return 9;
5064
5065 return 0;
5066 }
5067
5068 /* Return the opcode of the special instruction to be used to load
5069 the constant X. */
5070
5071 const char *
5072 standard_80387_constant_opcode (rtx x)
5073 {
5074 switch (standard_80387_constant_p (x))
5075 {
5076 case 1:
5077 return "fldz";
5078 case 2:
5079 return "fld1";
5080 case 3:
5081 return "fldlg2";
5082 case 4:
5083 return "fldln2";
5084 case 5:
5085 return "fldl2e";
5086 case 6:
5087 return "fldl2t";
5088 case 7:
5089 return "fldpi";
5090 case 8:
5091 case 9:
5092 return "#";
5093 default:
5094 gcc_unreachable ();
5095 }
5096 }
5097
5098 /* Return the CONST_DOUBLE representing the 80387 constant that is
5099 loaded by the specified special instruction. The argument IDX
5100 matches the return value from standard_80387_constant_p. */
5101
5102 rtx
5103 standard_80387_constant_rtx (int idx)
5104 {
5105 int i;
5106
5107 if (! ext_80387_constants_init)
5108 init_ext_80387_constants ();
5109
5110 switch (idx)
5111 {
5112 case 3:
5113 case 4:
5114 case 5:
5115 case 6:
5116 case 7:
5117 i = idx - 3;
5118 break;
5119
5120 default:
5121 gcc_unreachable ();
5122 }
5123
5124 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5125 XFmode);
5126 }
5127
5128 /* Return 1 if mode is a valid mode for sse. */
5129 static int
5130 standard_sse_mode_p (enum machine_mode mode)
5131 {
5132 switch (mode)
5133 {
5134 case V16QImode:
5135 case V8HImode:
5136 case V4SImode:
5137 case V2DImode:
5138 case V4SFmode:
5139 case V2DFmode:
5140 return 1;
5141
5142 default:
5143 return 0;
5144 }
5145 }
5146
5147 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5148 */
5149 int
5150 standard_sse_constant_p (rtx x)
5151 {
5152 enum machine_mode mode = GET_MODE (x);
5153
5154 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5155 return 1;
5156 if (vector_all_ones_operand (x, mode)
5157 && standard_sse_mode_p (mode))
5158 return TARGET_SSE2 ? 2 : -1;
5159
5160 return 0;
5161 }
5162
5163 /* Return the opcode of the special instruction to be used to load
5164 the constant X. */
5165
5166 const char *
5167 standard_sse_constant_opcode (rtx insn, rtx x)
5168 {
5169 switch (standard_sse_constant_p (x))
5170 {
5171 case 1:
5172 if (get_attr_mode (insn) == MODE_V4SF)
5173 return "xorps\t%0, %0";
5174 else if (get_attr_mode (insn) == MODE_V2DF)
5175 return "xorpd\t%0, %0";
5176 else
5177 return "pxor\t%0, %0";
5178 case 2:
5179 return "pcmpeqd\t%0, %0";
5180 }
5181 gcc_unreachable ();
5182 }
5183
5184 /* Returns 1 if OP contains a symbol reference */
5185
5186 int
5187 symbolic_reference_mentioned_p (rtx op)
5188 {
5189 const char *fmt;
5190 int i;
5191
5192 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5193 return 1;
5194
5195 fmt = GET_RTX_FORMAT (GET_CODE (op));
5196 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5197 {
5198 if (fmt[i] == 'E')
5199 {
5200 int j;
5201
5202 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5203 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5204 return 1;
5205 }
5206
5207 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5208 return 1;
5209 }
5210
5211 return 0;
5212 }
5213
5214 /* Return 1 if it is appropriate to emit `ret' instructions in the
5215 body of a function. Do this only if the epilogue is simple, needing a
5216 couple of insns. Prior to reloading, we can't tell how many registers
5217 must be saved, so return 0 then. Return 0 if there is no frame
5218 marker to de-allocate. */
5219
5220 int
5221 ix86_can_use_return_insn_p (void)
5222 {
5223 struct ix86_frame frame;
5224
5225 if (! reload_completed || frame_pointer_needed)
5226 return 0;
5227
5228 /* Don't allow more than 32 pop, since that's all we can do
5229 with one instruction. */
5230 if (current_function_pops_args
5231 && current_function_args_size >= 32768)
5232 return 0;
5233
5234 ix86_compute_frame_layout (&frame);
5235 return frame.to_allocate == 0 && frame.nregs == 0;
5236 }
5237 \f
5238 /* Value should be nonzero if functions must have frame pointers.
5239 Zero means the frame pointer need not be set up (and parms may
5240 be accessed via the stack pointer) in functions that seem suitable. */
5241
5242 int
5243 ix86_frame_pointer_required (void)
5244 {
5245 /* If we accessed previous frames, then the generated code expects
5246 to be able to access the saved ebp value in our frame. */
5247 if (cfun->machine->accesses_prev_frame)
5248 return 1;
5249
5250 /* Several x86 os'es need a frame pointer for other reasons,
5251 usually pertaining to setjmp. */
5252 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5253 return 1;
5254
5255 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5256 the frame pointer by default. Turn it back on now if we've not
5257 got a leaf function. */
5258 if (TARGET_OMIT_LEAF_FRAME_POINTER
5259 && (!current_function_is_leaf
5260 || ix86_current_function_calls_tls_descriptor))
5261 return 1;
5262
5263 if (current_function_profile)
5264 return 1;
5265
5266 return 0;
5267 }
5268
5269 /* Record that the current function accesses previous call frames. */
5270
5271 void
5272 ix86_setup_frame_addresses (void)
5273 {
5274 cfun->machine->accesses_prev_frame = 1;
5275 }
5276 \f
5277 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5278 # define USE_HIDDEN_LINKONCE 1
5279 #else
5280 # define USE_HIDDEN_LINKONCE 0
5281 #endif
5282
5283 static int pic_labels_used;
5284
5285 /* Fills in the label name that should be used for a pc thunk for
5286 the given register. */
5287
5288 static void
5289 get_pc_thunk_name (char name[32], unsigned int regno)
5290 {
5291 gcc_assert (!TARGET_64BIT);
5292
5293 if (USE_HIDDEN_LINKONCE)
5294 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5295 else
5296 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5297 }
5298
5299
5300 /* This function generates code for -fpic that loads %ebx with
5301 the return address of the caller and then returns. */
5302
5303 void
5304 ix86_file_end (void)
5305 {
5306 rtx xops[2];
5307 int regno;
5308
5309 for (regno = 0; regno < 8; ++regno)
5310 {
5311 char name[32];
5312
5313 if (! ((pic_labels_used >> regno) & 1))
5314 continue;
5315
5316 get_pc_thunk_name (name, regno);
5317
5318 #if TARGET_MACHO
5319 if (TARGET_MACHO)
5320 {
5321 switch_to_section (darwin_sections[text_coal_section]);
5322 fputs ("\t.weak_definition\t", asm_out_file);
5323 assemble_name (asm_out_file, name);
5324 fputs ("\n\t.private_extern\t", asm_out_file);
5325 assemble_name (asm_out_file, name);
5326 fputs ("\n", asm_out_file);
5327 ASM_OUTPUT_LABEL (asm_out_file, name);
5328 }
5329 else
5330 #endif
5331 if (USE_HIDDEN_LINKONCE)
5332 {
5333 tree decl;
5334
5335 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5336 error_mark_node);
5337 TREE_PUBLIC (decl) = 1;
5338 TREE_STATIC (decl) = 1;
5339 DECL_ONE_ONLY (decl) = 1;
5340
5341 (*targetm.asm_out.unique_section) (decl, 0);
5342 switch_to_section (get_named_section (decl, NULL, 0));
5343
5344 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5345 fputs ("\t.hidden\t", asm_out_file);
5346 assemble_name (asm_out_file, name);
5347 fputc ('\n', asm_out_file);
5348 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5349 }
5350 else
5351 {
5352 switch_to_section (text_section);
5353 ASM_OUTPUT_LABEL (asm_out_file, name);
5354 }
5355
5356 xops[0] = gen_rtx_REG (SImode, regno);
5357 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5358 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5359 output_asm_insn ("ret", xops);
5360 }
5361
5362 if (NEED_INDICATE_EXEC_STACK)
5363 file_end_indicate_exec_stack ();
5364 }
5365
5366 /* Emit code for the SET_GOT patterns. */
5367
5368 const char *
5369 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5370 {
5371 rtx xops[3];
5372
5373 xops[0] = dest;
5374 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5375
5376 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5377 {
5378 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5379
5380 if (!flag_pic)
5381 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5382 else
5383 output_asm_insn ("call\t%a2", xops);
5384
5385 #if TARGET_MACHO
5386 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5387 is what will be referenced by the Mach-O PIC subsystem. */
5388 if (!label)
5389 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5390 #endif
5391
5392 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5393 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5394
5395 if (flag_pic)
5396 output_asm_insn ("pop{l}\t%0", xops);
5397 }
5398 else
5399 {
5400 char name[32];
5401 get_pc_thunk_name (name, REGNO (dest));
5402 pic_labels_used |= 1 << REGNO (dest);
5403
5404 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5405 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5406 output_asm_insn ("call\t%X2", xops);
5407 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5408 is what will be referenced by the Mach-O PIC subsystem. */
5409 #if TARGET_MACHO
5410 if (!label)
5411 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5412 else
5413 targetm.asm_out.internal_label (asm_out_file, "L",
5414 CODE_LABEL_NUMBER (label));
5415 #endif
5416 }
5417
5418 if (TARGET_MACHO)
5419 return "";
5420
5421 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5422 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5423 else
5424 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5425
5426 return "";
5427 }
5428
5429 /* Generate an "push" pattern for input ARG. */
5430
5431 static rtx
5432 gen_push (rtx arg)
5433 {
5434 return gen_rtx_SET (VOIDmode,
5435 gen_rtx_MEM (Pmode,
5436 gen_rtx_PRE_DEC (Pmode,
5437 stack_pointer_rtx)),
5438 arg);
5439 }
5440
5441 /* Return >= 0 if there is an unused call-clobbered register available
5442 for the entire function. */
5443
5444 static unsigned int
5445 ix86_select_alt_pic_regnum (void)
5446 {
5447 if (current_function_is_leaf && !current_function_profile
5448 && !ix86_current_function_calls_tls_descriptor)
5449 {
5450 int i;
5451 for (i = 2; i >= 0; --i)
5452 if (!regs_ever_live[i])
5453 return i;
5454 }
5455
5456 return INVALID_REGNUM;
5457 }
5458
5459 /* Return 1 if we need to save REGNO. */
5460 static int
5461 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5462 {
5463 if (pic_offset_table_rtx
5464 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5465 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5466 || current_function_profile
5467 || current_function_calls_eh_return
5468 || current_function_uses_const_pool))
5469 {
5470 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5471 return 0;
5472 return 1;
5473 }
5474
5475 if (current_function_calls_eh_return && maybe_eh_return)
5476 {
5477 unsigned i;
5478 for (i = 0; ; i++)
5479 {
5480 unsigned test = EH_RETURN_DATA_REGNO (i);
5481 if (test == INVALID_REGNUM)
5482 break;
5483 if (test == regno)
5484 return 1;
5485 }
5486 }
5487
5488 if (cfun->machine->force_align_arg_pointer
5489 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5490 return 1;
5491
5492 return (regs_ever_live[regno]
5493 && !call_used_regs[regno]
5494 && !fixed_regs[regno]
5495 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5496 }
5497
5498 /* Return number of registers to be saved on the stack. */
5499
5500 static int
5501 ix86_nsaved_regs (void)
5502 {
5503 int nregs = 0;
5504 int regno;
5505
5506 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5507 if (ix86_save_reg (regno, true))
5508 nregs++;
5509 return nregs;
5510 }
5511
5512 /* Return the offset between two registers, one to be eliminated, and the other
5513 its replacement, at the start of a routine. */
5514
5515 HOST_WIDE_INT
5516 ix86_initial_elimination_offset (int from, int to)
5517 {
5518 struct ix86_frame frame;
5519 ix86_compute_frame_layout (&frame);
5520
5521 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5522 return frame.hard_frame_pointer_offset;
5523 else if (from == FRAME_POINTER_REGNUM
5524 && to == HARD_FRAME_POINTER_REGNUM)
5525 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5526 else
5527 {
5528 gcc_assert (to == STACK_POINTER_REGNUM);
5529
5530 if (from == ARG_POINTER_REGNUM)
5531 return frame.stack_pointer_offset;
5532
5533 gcc_assert (from == FRAME_POINTER_REGNUM);
5534 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5535 }
5536 }
5537
5538 /* Fill structure ix86_frame about frame of currently computed function. */
5539
5540 static void
5541 ix86_compute_frame_layout (struct ix86_frame *frame)
5542 {
5543 HOST_WIDE_INT total_size;
5544 unsigned int stack_alignment_needed;
5545 HOST_WIDE_INT offset;
5546 unsigned int preferred_alignment;
5547 HOST_WIDE_INT size = get_frame_size ();
5548
5549 frame->nregs = ix86_nsaved_regs ();
5550 total_size = size;
5551
5552 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5553 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5554
5555 /* During reload iteration the amount of registers saved can change.
5556 Recompute the value as needed. Do not recompute when amount of registers
5557 didn't change as reload does multiple calls to the function and does not
5558 expect the decision to change within single iteration. */
5559 if (!optimize_size
5560 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5561 {
5562 int count = frame->nregs;
5563
5564 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5565 /* The fast prologue uses move instead of push to save registers. This
5566 is significantly longer, but also executes faster as modern hardware
5567 can execute the moves in parallel, but can't do that for push/pop.
5568
5569 Be careful about choosing what prologue to emit: When function takes
5570 many instructions to execute we may use slow version as well as in
5571 case function is known to be outside hot spot (this is known with
5572 feedback only). Weight the size of function by number of registers
5573 to save as it is cheap to use one or two push instructions but very
5574 slow to use many of them. */
5575 if (count)
5576 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5577 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5578 || (flag_branch_probabilities
5579 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5580 cfun->machine->use_fast_prologue_epilogue = false;
5581 else
5582 cfun->machine->use_fast_prologue_epilogue
5583 = !expensive_function_p (count);
5584 }
5585 if (TARGET_PROLOGUE_USING_MOVE
5586 && cfun->machine->use_fast_prologue_epilogue)
5587 frame->save_regs_using_mov = true;
5588 else
5589 frame->save_regs_using_mov = false;
5590
5591
5592 /* Skip return address and saved base pointer. */
5593 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5594
5595 frame->hard_frame_pointer_offset = offset;
5596
5597 /* Do some sanity checking of stack_alignment_needed and
5598 preferred_alignment, since i386 port is the only using those features
5599 that may break easily. */
5600
5601 gcc_assert (!size || stack_alignment_needed);
5602 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5603 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5604 gcc_assert (stack_alignment_needed
5605 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5606
5607 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5608 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5609
5610 /* Register save area */
5611 offset += frame->nregs * UNITS_PER_WORD;
5612
5613 /* Va-arg area */
5614 if (ix86_save_varrargs_registers)
5615 {
5616 offset += X86_64_VARARGS_SIZE;
5617 frame->va_arg_size = X86_64_VARARGS_SIZE;
5618 }
5619 else
5620 frame->va_arg_size = 0;
5621
5622 /* Align start of frame for local function. */
5623 frame->padding1 = ((offset + stack_alignment_needed - 1)
5624 & -stack_alignment_needed) - offset;
5625
5626 offset += frame->padding1;
5627
5628 /* Frame pointer points here. */
5629 frame->frame_pointer_offset = offset;
5630
5631 offset += size;
5632
5633 /* Add outgoing arguments area. Can be skipped if we eliminated
5634 all the function calls as dead code.
5635 Skipping is however impossible when function calls alloca. Alloca
5636 expander assumes that last current_function_outgoing_args_size
5637 of stack frame are unused. */
5638 if (ACCUMULATE_OUTGOING_ARGS
5639 && (!current_function_is_leaf || current_function_calls_alloca
5640 || ix86_current_function_calls_tls_descriptor))
5641 {
5642 offset += current_function_outgoing_args_size;
5643 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5644 }
5645 else
5646 frame->outgoing_arguments_size = 0;
5647
5648 /* Align stack boundary. Only needed if we're calling another function
5649 or using alloca. */
5650 if (!current_function_is_leaf || current_function_calls_alloca
5651 || ix86_current_function_calls_tls_descriptor)
5652 frame->padding2 = ((offset + preferred_alignment - 1)
5653 & -preferred_alignment) - offset;
5654 else
5655 frame->padding2 = 0;
5656
5657 offset += frame->padding2;
5658
5659 /* We've reached end of stack frame. */
5660 frame->stack_pointer_offset = offset;
5661
5662 /* Size prologue needs to allocate. */
5663 frame->to_allocate =
5664 (size + frame->padding1 + frame->padding2
5665 + frame->outgoing_arguments_size + frame->va_arg_size);
5666
5667 if ((!frame->to_allocate && frame->nregs <= 1)
5668 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5669 frame->save_regs_using_mov = false;
5670
5671 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5672 && current_function_is_leaf
5673 && !ix86_current_function_calls_tls_descriptor)
5674 {
5675 frame->red_zone_size = frame->to_allocate;
5676 if (frame->save_regs_using_mov)
5677 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5678 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5679 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5680 }
5681 else
5682 frame->red_zone_size = 0;
5683 frame->to_allocate -= frame->red_zone_size;
5684 frame->stack_pointer_offset -= frame->red_zone_size;
5685 #if 0
5686 fprintf (stderr, "\n");
5687 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5688 fprintf (stderr, "size: %ld\n", (long)size);
5689 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5690 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5691 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5692 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5693 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5694 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5695 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5696 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5697 (long)frame->hard_frame_pointer_offset);
5698 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5699 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5700 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5701 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5702 #endif
5703 }
5704
5705 /* Emit code to save registers in the prologue. */
5706
5707 static void
5708 ix86_emit_save_regs (void)
5709 {
5710 unsigned int regno;
5711 rtx insn;
5712
5713 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5714 if (ix86_save_reg (regno, true))
5715 {
5716 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5717 RTX_FRAME_RELATED_P (insn) = 1;
5718 }
5719 }
5720
5721 /* Emit code to save registers using MOV insns. First register
5722 is restored from POINTER + OFFSET. */
5723 static void
5724 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5725 {
5726 unsigned int regno;
5727 rtx insn;
5728
5729 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5730 if (ix86_save_reg (regno, true))
5731 {
5732 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5733 Pmode, offset),
5734 gen_rtx_REG (Pmode, regno));
5735 RTX_FRAME_RELATED_P (insn) = 1;
5736 offset += UNITS_PER_WORD;
5737 }
5738 }
5739
5740 /* Expand prologue or epilogue stack adjustment.
5741 The pattern exist to put a dependency on all ebp-based memory accesses.
5742 STYLE should be negative if instructions should be marked as frame related,
5743 zero if %r11 register is live and cannot be freely used and positive
5744 otherwise. */
5745
5746 static void
5747 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5748 {
5749 rtx insn;
5750
5751 if (! TARGET_64BIT)
5752 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5753 else if (x86_64_immediate_operand (offset, DImode))
5754 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5755 else
5756 {
5757 rtx r11;
5758 /* r11 is used by indirect sibcall return as well, set before the
5759 epilogue and used after the epilogue. ATM indirect sibcall
5760 shouldn't be used together with huge frame sizes in one
5761 function because of the frame_size check in sibcall.c. */
5762 gcc_assert (style);
5763 r11 = gen_rtx_REG (DImode, R11_REG);
5764 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5765 if (style < 0)
5766 RTX_FRAME_RELATED_P (insn) = 1;
5767 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5768 offset));
5769 }
5770 if (style < 0)
5771 RTX_FRAME_RELATED_P (insn) = 1;
5772 }
5773
5774 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5775
5776 static rtx
5777 ix86_internal_arg_pointer (void)
5778 {
5779 bool has_force_align_arg_pointer =
5780 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5781 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5782 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5783 && DECL_NAME (current_function_decl)
5784 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5785 && DECL_FILE_SCOPE_P (current_function_decl))
5786 || ix86_force_align_arg_pointer
5787 || has_force_align_arg_pointer)
5788 {
5789 /* Nested functions can't realign the stack due to a register
5790 conflict. */
5791 if (DECL_CONTEXT (current_function_decl)
5792 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5793 {
5794 if (ix86_force_align_arg_pointer)
5795 warning (0, "-mstackrealign ignored for nested functions");
5796 if (has_force_align_arg_pointer)
5797 error ("%s not supported for nested functions",
5798 ix86_force_align_arg_pointer_string);
5799 return virtual_incoming_args_rtx;
5800 }
5801 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5802 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5803 }
5804 else
5805 return virtual_incoming_args_rtx;
5806 }
5807
5808 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5809 This is called from dwarf2out.c to emit call frame instructions
5810 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5811 static void
5812 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5813 {
5814 rtx unspec = SET_SRC (pattern);
5815 gcc_assert (GET_CODE (unspec) == UNSPEC);
5816
5817 switch (index)
5818 {
5819 case UNSPEC_REG_SAVE:
5820 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5821 SET_DEST (pattern));
5822 break;
5823 case UNSPEC_DEF_CFA:
5824 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5825 INTVAL (XVECEXP (unspec, 0, 0)));
5826 break;
5827 default:
5828 gcc_unreachable ();
5829 }
5830 }
5831
5832 /* Expand the prologue into a bunch of separate insns. */
5833
5834 void
5835 ix86_expand_prologue (void)
5836 {
5837 rtx insn;
5838 bool pic_reg_used;
5839 struct ix86_frame frame;
5840 HOST_WIDE_INT allocate;
5841
5842 ix86_compute_frame_layout (&frame);
5843
5844 if (cfun->machine->force_align_arg_pointer)
5845 {
5846 rtx x, y;
5847
5848 /* Grab the argument pointer. */
5849 x = plus_constant (stack_pointer_rtx, 4);
5850 y = cfun->machine->force_align_arg_pointer;
5851 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5852 RTX_FRAME_RELATED_P (insn) = 1;
5853
5854 /* The unwind info consists of two parts: install the fafp as the cfa,
5855 and record the fafp as the "save register" of the stack pointer.
5856 The later is there in order that the unwinder can see where it
5857 should restore the stack pointer across the and insn. */
5858 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5859 x = gen_rtx_SET (VOIDmode, y, x);
5860 RTX_FRAME_RELATED_P (x) = 1;
5861 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5862 UNSPEC_REG_SAVE);
5863 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5864 RTX_FRAME_RELATED_P (y) = 1;
5865 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5866 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5867 REG_NOTES (insn) = x;
5868
5869 /* Align the stack. */
5870 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5871 GEN_INT (-16)));
5872
5873 /* And here we cheat like madmen with the unwind info. We force the
5874 cfa register back to sp+4, which is exactly what it was at the
5875 start of the function. Re-pushing the return address results in
5876 the return at the same spot relative to the cfa, and thus is
5877 correct wrt the unwind info. */
5878 x = cfun->machine->force_align_arg_pointer;
5879 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5880 insn = emit_insn (gen_push (x));
5881 RTX_FRAME_RELATED_P (insn) = 1;
5882
5883 x = GEN_INT (4);
5884 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5885 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5886 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5887 REG_NOTES (insn) = x;
5888 }
5889
5890 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5891 slower on all targets. Also sdb doesn't like it. */
5892
5893 if (frame_pointer_needed)
5894 {
5895 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5896 RTX_FRAME_RELATED_P (insn) = 1;
5897
5898 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5899 RTX_FRAME_RELATED_P (insn) = 1;
5900 }
5901
5902 allocate = frame.to_allocate;
5903
5904 if (!frame.save_regs_using_mov)
5905 ix86_emit_save_regs ();
5906 else
5907 allocate += frame.nregs * UNITS_PER_WORD;
5908
5909 /* When using red zone we may start register saving before allocating
5910 the stack frame saving one cycle of the prologue. */
5911 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5912 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5913 : stack_pointer_rtx,
5914 -frame.nregs * UNITS_PER_WORD);
5915
5916 if (allocate == 0)
5917 ;
5918 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5919 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5920 GEN_INT (-allocate), -1);
5921 else
5922 {
5923 /* Only valid for Win32. */
5924 rtx eax = gen_rtx_REG (SImode, 0);
5925 bool eax_live = ix86_eax_live_at_start_p ();
5926 rtx t;
5927
5928 gcc_assert (!TARGET_64BIT);
5929
5930 if (eax_live)
5931 {
5932 emit_insn (gen_push (eax));
5933 allocate -= 4;
5934 }
5935
5936 emit_move_insn (eax, GEN_INT (allocate));
5937
5938 insn = emit_insn (gen_allocate_stack_worker (eax));
5939 RTX_FRAME_RELATED_P (insn) = 1;
5940 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5941 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5942 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5943 t, REG_NOTES (insn));
5944
5945 if (eax_live)
5946 {
5947 if (frame_pointer_needed)
5948 t = plus_constant (hard_frame_pointer_rtx,
5949 allocate
5950 - frame.to_allocate
5951 - frame.nregs * UNITS_PER_WORD);
5952 else
5953 t = plus_constant (stack_pointer_rtx, allocate);
5954 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5955 }
5956 }
5957
5958 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5959 {
5960 if (!frame_pointer_needed || !frame.to_allocate)
5961 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5962 else
5963 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5964 -frame.nregs * UNITS_PER_WORD);
5965 }
5966
5967 pic_reg_used = false;
5968 if (pic_offset_table_rtx
5969 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5970 || current_function_profile))
5971 {
5972 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5973
5974 if (alt_pic_reg_used != INVALID_REGNUM)
5975 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5976
5977 pic_reg_used = true;
5978 }
5979
5980 if (pic_reg_used)
5981 {
5982 if (TARGET_64BIT)
5983 {
5984 if (ix86_cmodel == CM_LARGE_PIC)
5985 {
5986 rtx tmp_reg = gen_rtx_REG (DImode,
5987 FIRST_REX_INT_REG + 3 /* R11 */);
5988 rtx label = gen_label_rtx ();
5989 emit_label (label);
5990 LABEL_PRESERVE_P (label) = 1;
5991 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
5992 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
5993 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5994 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
5995 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5996 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
5997 pic_offset_table_rtx, tmp_reg));
5998 }
5999 else
6000 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6001 }
6002 else
6003 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6004
6005 /* Even with accurate pre-reload life analysis, we can wind up
6006 deleting all references to the pic register after reload.
6007 Consider if cross-jumping unifies two sides of a branch
6008 controlled by a comparison vs the only read from a global.
6009 In which case, allow the set_got to be deleted, though we're
6010 too late to do anything about the ebx save in the prologue. */
6011 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6012 }
6013
6014 /* Prevent function calls from be scheduled before the call to mcount.
6015 In the pic_reg_used case, make sure that the got load isn't deleted. */
6016 if (current_function_profile)
6017 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6018 }
6019
6020 /* Emit code to restore saved registers using MOV insns. First register
6021 is restored from POINTER + OFFSET. */
6022 static void
6023 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6024 int maybe_eh_return)
6025 {
6026 int regno;
6027 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6028
6029 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6030 if (ix86_save_reg (regno, maybe_eh_return))
6031 {
6032 /* Ensure that adjust_address won't be forced to produce pointer
6033 out of range allowed by x86-64 instruction set. */
6034 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6035 {
6036 rtx r11;
6037
6038 r11 = gen_rtx_REG (DImode, R11_REG);
6039 emit_move_insn (r11, GEN_INT (offset));
6040 emit_insn (gen_adddi3 (r11, r11, pointer));
6041 base_address = gen_rtx_MEM (Pmode, r11);
6042 offset = 0;
6043 }
6044 emit_move_insn (gen_rtx_REG (Pmode, regno),
6045 adjust_address (base_address, Pmode, offset));
6046 offset += UNITS_PER_WORD;
6047 }
6048 }
6049
6050 /* Restore function stack, frame, and registers. */
6051
6052 void
6053 ix86_expand_epilogue (int style)
6054 {
6055 int regno;
6056 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6057 struct ix86_frame frame;
6058 HOST_WIDE_INT offset;
6059
6060 ix86_compute_frame_layout (&frame);
6061
6062 /* Calculate start of saved registers relative to ebp. Special care
6063 must be taken for the normal return case of a function using
6064 eh_return: the eax and edx registers are marked as saved, but not
6065 restored along this path. */
6066 offset = frame.nregs;
6067 if (current_function_calls_eh_return && style != 2)
6068 offset -= 2;
6069 offset *= -UNITS_PER_WORD;
6070
6071 /* If we're only restoring one register and sp is not valid then
6072 using a move instruction to restore the register since it's
6073 less work than reloading sp and popping the register.
6074
6075 The default code result in stack adjustment using add/lea instruction,
6076 while this code results in LEAVE instruction (or discrete equivalent),
6077 so it is profitable in some other cases as well. Especially when there
6078 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6079 and there is exactly one register to pop. This heuristic may need some
6080 tuning in future. */
6081 if ((!sp_valid && frame.nregs <= 1)
6082 || (TARGET_EPILOGUE_USING_MOVE
6083 && cfun->machine->use_fast_prologue_epilogue
6084 && (frame.nregs > 1 || frame.to_allocate))
6085 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6086 || (frame_pointer_needed && TARGET_USE_LEAVE
6087 && cfun->machine->use_fast_prologue_epilogue
6088 && frame.nregs == 1)
6089 || current_function_calls_eh_return)
6090 {
6091 /* Restore registers. We can use ebp or esp to address the memory
6092 locations. If both are available, default to ebp, since offsets
6093 are known to be small. Only exception is esp pointing directly to the
6094 end of block of saved registers, where we may simplify addressing
6095 mode. */
6096
6097 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6098 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6099 frame.to_allocate, style == 2);
6100 else
6101 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6102 offset, style == 2);
6103
6104 /* eh_return epilogues need %ecx added to the stack pointer. */
6105 if (style == 2)
6106 {
6107 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6108
6109 if (frame_pointer_needed)
6110 {
6111 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6112 tmp = plus_constant (tmp, UNITS_PER_WORD);
6113 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6114
6115 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6116 emit_move_insn (hard_frame_pointer_rtx, tmp);
6117
6118 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6119 const0_rtx, style);
6120 }
6121 else
6122 {
6123 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6124 tmp = plus_constant (tmp, (frame.to_allocate
6125 + frame.nregs * UNITS_PER_WORD));
6126 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6127 }
6128 }
6129 else if (!frame_pointer_needed)
6130 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6131 GEN_INT (frame.to_allocate
6132 + frame.nregs * UNITS_PER_WORD),
6133 style);
6134 /* If not an i386, mov & pop is faster than "leave". */
6135 else if (TARGET_USE_LEAVE || optimize_size
6136 || !cfun->machine->use_fast_prologue_epilogue)
6137 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6138 else
6139 {
6140 pro_epilogue_adjust_stack (stack_pointer_rtx,
6141 hard_frame_pointer_rtx,
6142 const0_rtx, style);
6143 if (TARGET_64BIT)
6144 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6145 else
6146 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6147 }
6148 }
6149 else
6150 {
6151 /* First step is to deallocate the stack frame so that we can
6152 pop the registers. */
6153 if (!sp_valid)
6154 {
6155 gcc_assert (frame_pointer_needed);
6156 pro_epilogue_adjust_stack (stack_pointer_rtx,
6157 hard_frame_pointer_rtx,
6158 GEN_INT (offset), style);
6159 }
6160 else if (frame.to_allocate)
6161 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6162 GEN_INT (frame.to_allocate), style);
6163
6164 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6165 if (ix86_save_reg (regno, false))
6166 {
6167 if (TARGET_64BIT)
6168 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6169 else
6170 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6171 }
6172 if (frame_pointer_needed)
6173 {
6174 /* Leave results in shorter dependency chains on CPUs that are
6175 able to grok it fast. */
6176 if (TARGET_USE_LEAVE)
6177 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6178 else if (TARGET_64BIT)
6179 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6180 else
6181 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6182 }
6183 }
6184
6185 if (cfun->machine->force_align_arg_pointer)
6186 {
6187 emit_insn (gen_addsi3 (stack_pointer_rtx,
6188 cfun->machine->force_align_arg_pointer,
6189 GEN_INT (-4)));
6190 }
6191
6192 /* Sibcall epilogues don't want a return instruction. */
6193 if (style == 0)
6194 return;
6195
6196 if (current_function_pops_args && current_function_args_size)
6197 {
6198 rtx popc = GEN_INT (current_function_pops_args);
6199
6200 /* i386 can only pop 64K bytes. If asked to pop more, pop
6201 return address, do explicit add, and jump indirectly to the
6202 caller. */
6203
6204 if (current_function_pops_args >= 65536)
6205 {
6206 rtx ecx = gen_rtx_REG (SImode, 2);
6207
6208 /* There is no "pascal" calling convention in 64bit ABI. */
6209 gcc_assert (!TARGET_64BIT);
6210
6211 emit_insn (gen_popsi1 (ecx));
6212 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6213 emit_jump_insn (gen_return_indirect_internal (ecx));
6214 }
6215 else
6216 emit_jump_insn (gen_return_pop_internal (popc));
6217 }
6218 else
6219 emit_jump_insn (gen_return_internal ());
6220 }
6221
6222 /* Reset from the function's potential modifications. */
6223
6224 static void
6225 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6226 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6227 {
6228 if (pic_offset_table_rtx)
6229 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6230 #if TARGET_MACHO
6231 /* Mach-O doesn't support labels at the end of objects, so if
6232 it looks like we might want one, insert a NOP. */
6233 {
6234 rtx insn = get_last_insn ();
6235 while (insn
6236 && NOTE_P (insn)
6237 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6238 insn = PREV_INSN (insn);
6239 if (insn
6240 && (LABEL_P (insn)
6241 || (NOTE_P (insn)
6242 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6243 fputs ("\tnop\n", file);
6244 }
6245 #endif
6246
6247 }
6248 \f
6249 /* Extract the parts of an RTL expression that is a valid memory address
6250 for an instruction. Return 0 if the structure of the address is
6251 grossly off. Return -1 if the address contains ASHIFT, so it is not
6252 strictly valid, but still used for computing length of lea instruction. */
6253
6254 int
6255 ix86_decompose_address (rtx addr, struct ix86_address *out)
6256 {
6257 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6258 rtx base_reg, index_reg;
6259 HOST_WIDE_INT scale = 1;
6260 rtx scale_rtx = NULL_RTX;
6261 int retval = 1;
6262 enum ix86_address_seg seg = SEG_DEFAULT;
6263
6264 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6265 base = addr;
6266 else if (GET_CODE (addr) == PLUS)
6267 {
6268 rtx addends[4], op;
6269 int n = 0, i;
6270
6271 op = addr;
6272 do
6273 {
6274 if (n >= 4)
6275 return 0;
6276 addends[n++] = XEXP (op, 1);
6277 op = XEXP (op, 0);
6278 }
6279 while (GET_CODE (op) == PLUS);
6280 if (n >= 4)
6281 return 0;
6282 addends[n] = op;
6283
6284 for (i = n; i >= 0; --i)
6285 {
6286 op = addends[i];
6287 switch (GET_CODE (op))
6288 {
6289 case MULT:
6290 if (index)
6291 return 0;
6292 index = XEXP (op, 0);
6293 scale_rtx = XEXP (op, 1);
6294 break;
6295
6296 case UNSPEC:
6297 if (XINT (op, 1) == UNSPEC_TP
6298 && TARGET_TLS_DIRECT_SEG_REFS
6299 && seg == SEG_DEFAULT)
6300 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6301 else
6302 return 0;
6303 break;
6304
6305 case REG:
6306 case SUBREG:
6307 if (!base)
6308 base = op;
6309 else if (!index)
6310 index = op;
6311 else
6312 return 0;
6313 break;
6314
6315 case CONST:
6316 case CONST_INT:
6317 case SYMBOL_REF:
6318 case LABEL_REF:
6319 if (disp)
6320 return 0;
6321 disp = op;
6322 break;
6323
6324 default:
6325 return 0;
6326 }
6327 }
6328 }
6329 else if (GET_CODE (addr) == MULT)
6330 {
6331 index = XEXP (addr, 0); /* index*scale */
6332 scale_rtx = XEXP (addr, 1);
6333 }
6334 else if (GET_CODE (addr) == ASHIFT)
6335 {
6336 rtx tmp;
6337
6338 /* We're called for lea too, which implements ashift on occasion. */
6339 index = XEXP (addr, 0);
6340 tmp = XEXP (addr, 1);
6341 if (!CONST_INT_P (tmp))
6342 return 0;
6343 scale = INTVAL (tmp);
6344 if ((unsigned HOST_WIDE_INT) scale > 3)
6345 return 0;
6346 scale = 1 << scale;
6347 retval = -1;
6348 }
6349 else
6350 disp = addr; /* displacement */
6351
6352 /* Extract the integral value of scale. */
6353 if (scale_rtx)
6354 {
6355 if (!CONST_INT_P (scale_rtx))
6356 return 0;
6357 scale = INTVAL (scale_rtx);
6358 }
6359
6360 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6361 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6362
6363 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6364 if (base_reg && index_reg && scale == 1
6365 && (index_reg == arg_pointer_rtx
6366 || index_reg == frame_pointer_rtx
6367 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6368 {
6369 rtx tmp;
6370 tmp = base, base = index, index = tmp;
6371 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6372 }
6373
6374 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6375 if ((base_reg == hard_frame_pointer_rtx
6376 || base_reg == frame_pointer_rtx
6377 || base_reg == arg_pointer_rtx) && !disp)
6378 disp = const0_rtx;
6379
6380 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6381 Avoid this by transforming to [%esi+0]. */
6382 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6383 && base_reg && !index_reg && !disp
6384 && REG_P (base_reg)
6385 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6386 disp = const0_rtx;
6387
6388 /* Special case: encode reg+reg instead of reg*2. */
6389 if (!base && index && scale && scale == 2)
6390 base = index, base_reg = index_reg, scale = 1;
6391
6392 /* Special case: scaling cannot be encoded without base or displacement. */
6393 if (!base && !disp && index && scale != 1)
6394 disp = const0_rtx;
6395
6396 out->base = base;
6397 out->index = index;
6398 out->disp = disp;
6399 out->scale = scale;
6400 out->seg = seg;
6401
6402 return retval;
6403 }
6404 \f
6405 /* Return cost of the memory address x.
6406 For i386, it is better to use a complex address than let gcc copy
6407 the address into a reg and make a new pseudo. But not if the address
6408 requires to two regs - that would mean more pseudos with longer
6409 lifetimes. */
6410 static int
6411 ix86_address_cost (rtx x)
6412 {
6413 struct ix86_address parts;
6414 int cost = 1;
6415 int ok = ix86_decompose_address (x, &parts);
6416
6417 gcc_assert (ok);
6418
6419 if (parts.base && GET_CODE (parts.base) == SUBREG)
6420 parts.base = SUBREG_REG (parts.base);
6421 if (parts.index && GET_CODE (parts.index) == SUBREG)
6422 parts.index = SUBREG_REG (parts.index);
6423
6424 /* More complex memory references are better. */
6425 if (parts.disp && parts.disp != const0_rtx)
6426 cost--;
6427 if (parts.seg != SEG_DEFAULT)
6428 cost--;
6429
6430 /* Attempt to minimize number of registers in the address. */
6431 if ((parts.base
6432 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6433 || (parts.index
6434 && (!REG_P (parts.index)
6435 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6436 cost++;
6437
6438 if (parts.base
6439 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6440 && parts.index
6441 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6442 && parts.base != parts.index)
6443 cost++;
6444
6445 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6446 since it's predecode logic can't detect the length of instructions
6447 and it degenerates to vector decoded. Increase cost of such
6448 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6449 to split such addresses or even refuse such addresses at all.
6450
6451 Following addressing modes are affected:
6452 [base+scale*index]
6453 [scale*index+disp]
6454 [base+index]
6455
6456 The first and last case may be avoidable by explicitly coding the zero in
6457 memory address, but I don't have AMD-K6 machine handy to check this
6458 theory. */
6459
6460 if (TARGET_K6
6461 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6462 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6463 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6464 cost += 10;
6465
6466 return cost;
6467 }
6468 \f
6469 /* If X is a machine specific address (i.e. a symbol or label being
6470 referenced as a displacement from the GOT implemented using an
6471 UNSPEC), then return the base term. Otherwise return X. */
6472
6473 rtx
6474 ix86_find_base_term (rtx x)
6475 {
6476 rtx term;
6477
6478 if (TARGET_64BIT)
6479 {
6480 if (GET_CODE (x) != CONST)
6481 return x;
6482 term = XEXP (x, 0);
6483 if (GET_CODE (term) == PLUS
6484 && (CONST_INT_P (XEXP (term, 1))
6485 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6486 term = XEXP (term, 0);
6487 if (GET_CODE (term) != UNSPEC
6488 || XINT (term, 1) != UNSPEC_GOTPCREL)
6489 return x;
6490
6491 term = XVECEXP (term, 0, 0);
6492
6493 if (GET_CODE (term) != SYMBOL_REF
6494 && GET_CODE (term) != LABEL_REF)
6495 return x;
6496
6497 return term;
6498 }
6499
6500 term = ix86_delegitimize_address (x);
6501
6502 if (GET_CODE (term) != SYMBOL_REF
6503 && GET_CODE (term) != LABEL_REF)
6504 return x;
6505
6506 return term;
6507 }
6508
6509 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6510 this is used for to form addresses to local data when -fPIC is in
6511 use. */
6512
6513 static bool
6514 darwin_local_data_pic (rtx disp)
6515 {
6516 if (GET_CODE (disp) == MINUS)
6517 {
6518 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6519 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6520 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6521 {
6522 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6523 if (! strcmp (sym_name, "<pic base>"))
6524 return true;
6525 }
6526 }
6527
6528 return false;
6529 }
6530 \f
6531 /* Determine if a given RTX is a valid constant. We already know this
6532 satisfies CONSTANT_P. */
6533
6534 bool
6535 legitimate_constant_p (rtx x)
6536 {
6537 switch (GET_CODE (x))
6538 {
6539 case CONST:
6540 x = XEXP (x, 0);
6541
6542 if (GET_CODE (x) == PLUS)
6543 {
6544 if (!CONST_INT_P (XEXP (x, 1)))
6545 return false;
6546 x = XEXP (x, 0);
6547 }
6548
6549 if (TARGET_MACHO && darwin_local_data_pic (x))
6550 return true;
6551
6552 /* Only some unspecs are valid as "constants". */
6553 if (GET_CODE (x) == UNSPEC)
6554 switch (XINT (x, 1))
6555 {
6556 case UNSPEC_GOT:
6557 case UNSPEC_GOTOFF:
6558 case UNSPEC_PLTOFF:
6559 return TARGET_64BIT;
6560 case UNSPEC_TPOFF:
6561 case UNSPEC_NTPOFF:
6562 x = XVECEXP (x, 0, 0);
6563 return (GET_CODE (x) == SYMBOL_REF
6564 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6565 case UNSPEC_DTPOFF:
6566 x = XVECEXP (x, 0, 0);
6567 return (GET_CODE (x) == SYMBOL_REF
6568 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6569 default:
6570 return false;
6571 }
6572
6573 /* We must have drilled down to a symbol. */
6574 if (GET_CODE (x) == LABEL_REF)
6575 return true;
6576 if (GET_CODE (x) != SYMBOL_REF)
6577 return false;
6578 /* FALLTHRU */
6579
6580 case SYMBOL_REF:
6581 /* TLS symbols are never valid. */
6582 if (SYMBOL_REF_TLS_MODEL (x))
6583 return false;
6584 break;
6585
6586 case CONST_DOUBLE:
6587 if (GET_MODE (x) == TImode
6588 && x != CONST0_RTX (TImode)
6589 && !TARGET_64BIT)
6590 return false;
6591 break;
6592
6593 case CONST_VECTOR:
6594 if (x == CONST0_RTX (GET_MODE (x)))
6595 return true;
6596 return false;
6597
6598 default:
6599 break;
6600 }
6601
6602 /* Otherwise we handle everything else in the move patterns. */
6603 return true;
6604 }
6605
6606 /* Determine if it's legal to put X into the constant pool. This
6607 is not possible for the address of thread-local symbols, which
6608 is checked above. */
6609
6610 static bool
6611 ix86_cannot_force_const_mem (rtx x)
6612 {
6613 /* We can always put integral constants and vectors in memory. */
6614 switch (GET_CODE (x))
6615 {
6616 case CONST_INT:
6617 case CONST_DOUBLE:
6618 case CONST_VECTOR:
6619 return false;
6620
6621 default:
6622 break;
6623 }
6624 return !legitimate_constant_p (x);
6625 }
6626
6627 /* Determine if a given RTX is a valid constant address. */
6628
6629 bool
6630 constant_address_p (rtx x)
6631 {
6632 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6633 }
6634
6635 /* Nonzero if the constant value X is a legitimate general operand
6636 when generating PIC code. It is given that flag_pic is on and
6637 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6638
6639 bool
6640 legitimate_pic_operand_p (rtx x)
6641 {
6642 rtx inner;
6643
6644 switch (GET_CODE (x))
6645 {
6646 case CONST:
6647 inner = XEXP (x, 0);
6648 if (GET_CODE (inner) == PLUS
6649 && CONST_INT_P (XEXP (inner, 1)))
6650 inner = XEXP (inner, 0);
6651
6652 /* Only some unspecs are valid as "constants". */
6653 if (GET_CODE (inner) == UNSPEC)
6654 switch (XINT (inner, 1))
6655 {
6656 case UNSPEC_GOT:
6657 case UNSPEC_GOTOFF:
6658 case UNSPEC_PLTOFF:
6659 return TARGET_64BIT;
6660 case UNSPEC_TPOFF:
6661 x = XVECEXP (inner, 0, 0);
6662 return (GET_CODE (x) == SYMBOL_REF
6663 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6664 default:
6665 return false;
6666 }
6667 /* FALLTHRU */
6668
6669 case SYMBOL_REF:
6670 case LABEL_REF:
6671 return legitimate_pic_address_disp_p (x);
6672
6673 default:
6674 return true;
6675 }
6676 }
6677
6678 /* Determine if a given CONST RTX is a valid memory displacement
6679 in PIC mode. */
6680
6681 int
6682 legitimate_pic_address_disp_p (rtx disp)
6683 {
6684 bool saw_plus;
6685
6686 /* In 64bit mode we can allow direct addresses of symbols and labels
6687 when they are not dynamic symbols. */
6688 if (TARGET_64BIT)
6689 {
6690 rtx op0 = disp, op1;
6691
6692 switch (GET_CODE (disp))
6693 {
6694 case LABEL_REF:
6695 return true;
6696
6697 case CONST:
6698 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6699 break;
6700 op0 = XEXP (XEXP (disp, 0), 0);
6701 op1 = XEXP (XEXP (disp, 0), 1);
6702 if (!CONST_INT_P (op1)
6703 || INTVAL (op1) >= 16*1024*1024
6704 || INTVAL (op1) < -16*1024*1024)
6705 break;
6706 if (GET_CODE (op0) == LABEL_REF)
6707 return true;
6708 if (GET_CODE (op0) != SYMBOL_REF)
6709 break;
6710 /* FALLTHRU */
6711
6712 case SYMBOL_REF:
6713 /* TLS references should always be enclosed in UNSPEC. */
6714 if (SYMBOL_REF_TLS_MODEL (op0))
6715 return false;
6716 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6717 && ix86_cmodel != CM_LARGE_PIC)
6718 return true;
6719 break;
6720
6721 default:
6722 break;
6723 }
6724 }
6725 if (GET_CODE (disp) != CONST)
6726 return 0;
6727 disp = XEXP (disp, 0);
6728
6729 if (TARGET_64BIT)
6730 {
6731 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6732 of GOT tables. We should not need these anyway. */
6733 if (GET_CODE (disp) != UNSPEC
6734 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6735 && XINT (disp, 1) != UNSPEC_GOTOFF
6736 && XINT (disp, 1) != UNSPEC_PLTOFF))
6737 return 0;
6738
6739 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6740 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6741 return 0;
6742 return 1;
6743 }
6744
6745 saw_plus = false;
6746 if (GET_CODE (disp) == PLUS)
6747 {
6748 if (!CONST_INT_P (XEXP (disp, 1)))
6749 return 0;
6750 disp = XEXP (disp, 0);
6751 saw_plus = true;
6752 }
6753
6754 if (TARGET_MACHO && darwin_local_data_pic (disp))
6755 return 1;
6756
6757 if (GET_CODE (disp) != UNSPEC)
6758 return 0;
6759
6760 switch (XINT (disp, 1))
6761 {
6762 case UNSPEC_GOT:
6763 if (saw_plus)
6764 return false;
6765 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6766 case UNSPEC_GOTOFF:
6767 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6768 While ABI specify also 32bit relocation but we don't produce it in
6769 small PIC model at all. */
6770 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6771 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6772 && !TARGET_64BIT)
6773 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6774 return false;
6775 case UNSPEC_GOTTPOFF:
6776 case UNSPEC_GOTNTPOFF:
6777 case UNSPEC_INDNTPOFF:
6778 if (saw_plus)
6779 return false;
6780 disp = XVECEXP (disp, 0, 0);
6781 return (GET_CODE (disp) == SYMBOL_REF
6782 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6783 case UNSPEC_NTPOFF:
6784 disp = XVECEXP (disp, 0, 0);
6785 return (GET_CODE (disp) == SYMBOL_REF
6786 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6787 case UNSPEC_DTPOFF:
6788 disp = XVECEXP (disp, 0, 0);
6789 return (GET_CODE (disp) == SYMBOL_REF
6790 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6791 }
6792
6793 return 0;
6794 }
6795
6796 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6797 memory address for an instruction. The MODE argument is the machine mode
6798 for the MEM expression that wants to use this address.
6799
6800 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6801 convert common non-canonical forms to canonical form so that they will
6802 be recognized. */
6803
6804 int
6805 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6806 {
6807 struct ix86_address parts;
6808 rtx base, index, disp;
6809 HOST_WIDE_INT scale;
6810 const char *reason = NULL;
6811 rtx reason_rtx = NULL_RTX;
6812
6813 if (TARGET_DEBUG_ADDR)
6814 {
6815 fprintf (stderr,
6816 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6817 GET_MODE_NAME (mode), strict);
6818 debug_rtx (addr);
6819 }
6820
6821 if (ix86_decompose_address (addr, &parts) <= 0)
6822 {
6823 reason = "decomposition failed";
6824 goto report_error;
6825 }
6826
6827 base = parts.base;
6828 index = parts.index;
6829 disp = parts.disp;
6830 scale = parts.scale;
6831
6832 /* Validate base register.
6833
6834 Don't allow SUBREG's that span more than a word here. It can lead to spill
6835 failures when the base is one word out of a two word structure, which is
6836 represented internally as a DImode int. */
6837
6838 if (base)
6839 {
6840 rtx reg;
6841 reason_rtx = base;
6842
6843 if (REG_P (base))
6844 reg = base;
6845 else if (GET_CODE (base) == SUBREG
6846 && REG_P (SUBREG_REG (base))
6847 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6848 <= UNITS_PER_WORD)
6849 reg = SUBREG_REG (base);
6850 else
6851 {
6852 reason = "base is not a register";
6853 goto report_error;
6854 }
6855
6856 if (GET_MODE (base) != Pmode)
6857 {
6858 reason = "base is not in Pmode";
6859 goto report_error;
6860 }
6861
6862 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6863 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6864 {
6865 reason = "base is not valid";
6866 goto report_error;
6867 }
6868 }
6869
6870 /* Validate index register.
6871
6872 Don't allow SUBREG's that span more than a word here -- same as above. */
6873
6874 if (index)
6875 {
6876 rtx reg;
6877 reason_rtx = index;
6878
6879 if (REG_P (index))
6880 reg = index;
6881 else if (GET_CODE (index) == SUBREG
6882 && REG_P (SUBREG_REG (index))
6883 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6884 <= UNITS_PER_WORD)
6885 reg = SUBREG_REG (index);
6886 else
6887 {
6888 reason = "index is not a register";
6889 goto report_error;
6890 }
6891
6892 if (GET_MODE (index) != Pmode)
6893 {
6894 reason = "index is not in Pmode";
6895 goto report_error;
6896 }
6897
6898 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6899 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6900 {
6901 reason = "index is not valid";
6902 goto report_error;
6903 }
6904 }
6905
6906 /* Validate scale factor. */
6907 if (scale != 1)
6908 {
6909 reason_rtx = GEN_INT (scale);
6910 if (!index)
6911 {
6912 reason = "scale without index";
6913 goto report_error;
6914 }
6915
6916 if (scale != 2 && scale != 4 && scale != 8)
6917 {
6918 reason = "scale is not a valid multiplier";
6919 goto report_error;
6920 }
6921 }
6922
6923 /* Validate displacement. */
6924 if (disp)
6925 {
6926 reason_rtx = disp;
6927
6928 if (GET_CODE (disp) == CONST
6929 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6930 switch (XINT (XEXP (disp, 0), 1))
6931 {
6932 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6933 used. While ABI specify also 32bit relocations, we don't produce
6934 them at all and use IP relative instead. */
6935 case UNSPEC_GOT:
6936 case UNSPEC_GOTOFF:
6937 gcc_assert (flag_pic);
6938 if (!TARGET_64BIT)
6939 goto is_legitimate_pic;
6940 reason = "64bit address unspec";
6941 goto report_error;
6942
6943 case UNSPEC_GOTPCREL:
6944 gcc_assert (flag_pic);
6945 goto is_legitimate_pic;
6946
6947 case UNSPEC_GOTTPOFF:
6948 case UNSPEC_GOTNTPOFF:
6949 case UNSPEC_INDNTPOFF:
6950 case UNSPEC_NTPOFF:
6951 case UNSPEC_DTPOFF:
6952 break;
6953
6954 default:
6955 reason = "invalid address unspec";
6956 goto report_error;
6957 }
6958
6959 else if (SYMBOLIC_CONST (disp)
6960 && (flag_pic
6961 || (TARGET_MACHO
6962 #if TARGET_MACHO
6963 && MACHOPIC_INDIRECT
6964 && !machopic_operand_p (disp)
6965 #endif
6966 )))
6967 {
6968
6969 is_legitimate_pic:
6970 if (TARGET_64BIT && (index || base))
6971 {
6972 /* foo@dtpoff(%rX) is ok. */
6973 if (GET_CODE (disp) != CONST
6974 || GET_CODE (XEXP (disp, 0)) != PLUS
6975 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6976 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6977 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6978 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6979 {
6980 reason = "non-constant pic memory reference";
6981 goto report_error;
6982 }
6983 }
6984 else if (! legitimate_pic_address_disp_p (disp))
6985 {
6986 reason = "displacement is an invalid pic construct";
6987 goto report_error;
6988 }
6989
6990 /* This code used to verify that a symbolic pic displacement
6991 includes the pic_offset_table_rtx register.
6992
6993 While this is good idea, unfortunately these constructs may
6994 be created by "adds using lea" optimization for incorrect
6995 code like:
6996
6997 int a;
6998 int foo(int i)
6999 {
7000 return *(&a+i);
7001 }
7002
7003 This code is nonsensical, but results in addressing
7004 GOT table with pic_offset_table_rtx base. We can't
7005 just refuse it easily, since it gets matched by
7006 "addsi3" pattern, that later gets split to lea in the
7007 case output register differs from input. While this
7008 can be handled by separate addsi pattern for this case
7009 that never results in lea, this seems to be easier and
7010 correct fix for crash to disable this test. */
7011 }
7012 else if (GET_CODE (disp) != LABEL_REF
7013 && !CONST_INT_P (disp)
7014 && (GET_CODE (disp) != CONST
7015 || !legitimate_constant_p (disp))
7016 && (GET_CODE (disp) != SYMBOL_REF
7017 || !legitimate_constant_p (disp)))
7018 {
7019 reason = "displacement is not constant";
7020 goto report_error;
7021 }
7022 else if (TARGET_64BIT
7023 && !x86_64_immediate_operand (disp, VOIDmode))
7024 {
7025 reason = "displacement is out of range";
7026 goto report_error;
7027 }
7028 }
7029
7030 /* Everything looks valid. */
7031 if (TARGET_DEBUG_ADDR)
7032 fprintf (stderr, "Success.\n");
7033 return TRUE;
7034
7035 report_error:
7036 if (TARGET_DEBUG_ADDR)
7037 {
7038 fprintf (stderr, "Error: %s\n", reason);
7039 debug_rtx (reason_rtx);
7040 }
7041 return FALSE;
7042 }
7043 \f
7044 /* Return a unique alias set for the GOT. */
7045
7046 static HOST_WIDE_INT
7047 ix86_GOT_alias_set (void)
7048 {
7049 static HOST_WIDE_INT set = -1;
7050 if (set == -1)
7051 set = new_alias_set ();
7052 return set;
7053 }
7054
7055 /* Return a legitimate reference for ORIG (an address) using the
7056 register REG. If REG is 0, a new pseudo is generated.
7057
7058 There are two types of references that must be handled:
7059
7060 1. Global data references must load the address from the GOT, via
7061 the PIC reg. An insn is emitted to do this load, and the reg is
7062 returned.
7063
7064 2. Static data references, constant pool addresses, and code labels
7065 compute the address as an offset from the GOT, whose base is in
7066 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7067 differentiate them from global data objects. The returned
7068 address is the PIC reg + an unspec constant.
7069
7070 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7071 reg also appears in the address. */
7072
7073 static rtx
7074 legitimize_pic_address (rtx orig, rtx reg)
7075 {
7076 rtx addr = orig;
7077 rtx new = orig;
7078 rtx base;
7079
7080 #if TARGET_MACHO
7081 if (TARGET_MACHO && !TARGET_64BIT)
7082 {
7083 if (reg == 0)
7084 reg = gen_reg_rtx (Pmode);
7085 /* Use the generic Mach-O PIC machinery. */
7086 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7087 }
7088 #endif
7089
7090 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7091 new = addr;
7092 else if (TARGET_64BIT
7093 && ix86_cmodel != CM_SMALL_PIC
7094 && local_symbolic_operand (addr, Pmode))
7095 {
7096 rtx tmpreg;
7097 /* This symbol may be referenced via a displacement from the PIC
7098 base address (@GOTOFF). */
7099
7100 if (reload_in_progress)
7101 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7102 if (GET_CODE (addr) == CONST)
7103 addr = XEXP (addr, 0);
7104 if (GET_CODE (addr) == PLUS)
7105 {
7106 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7107 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7108 }
7109 else
7110 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7111 new = gen_rtx_CONST (Pmode, new);
7112 if (!reg)
7113 tmpreg = gen_reg_rtx (Pmode);
7114 else
7115 tmpreg = reg;
7116 emit_move_insn (tmpreg, new);
7117
7118 if (reg != 0)
7119 {
7120 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7121 tmpreg, 1, OPTAB_DIRECT);
7122 new = reg;
7123 }
7124 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7125 }
7126 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
7127 {
7128 /* This symbol may be referenced via a displacement from the PIC
7129 base address (@GOTOFF). */
7130
7131 if (reload_in_progress)
7132 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7133 if (GET_CODE (addr) == CONST)
7134 addr = XEXP (addr, 0);
7135 if (GET_CODE (addr) == PLUS)
7136 {
7137 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7138 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7139 }
7140 else
7141 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7142 new = gen_rtx_CONST (Pmode, new);
7143 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7144
7145 if (reg != 0)
7146 {
7147 emit_move_insn (reg, new);
7148 new = reg;
7149 }
7150 }
7151 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7152 {
7153 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7154 {
7155 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7156 new = gen_rtx_CONST (Pmode, new);
7157 new = gen_const_mem (Pmode, new);
7158 set_mem_alias_set (new, ix86_GOT_alias_set ());
7159
7160 if (reg == 0)
7161 reg = gen_reg_rtx (Pmode);
7162 /* Use directly gen_movsi, otherwise the address is loaded
7163 into register for CSE. We don't want to CSE this addresses,
7164 instead we CSE addresses from the GOT table, so skip this. */
7165 emit_insn (gen_movsi (reg, new));
7166 new = reg;
7167 }
7168 else
7169 {
7170 /* This symbol must be referenced via a load from the
7171 Global Offset Table (@GOT). */
7172
7173 if (reload_in_progress)
7174 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7175 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7176 new = gen_rtx_CONST (Pmode, new);
7177 if (TARGET_64BIT)
7178 new = force_reg (Pmode, new);
7179 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7180 new = gen_const_mem (Pmode, new);
7181 set_mem_alias_set (new, ix86_GOT_alias_set ());
7182
7183 if (reg == 0)
7184 reg = gen_reg_rtx (Pmode);
7185 emit_move_insn (reg, new);
7186 new = reg;
7187 }
7188 }
7189 else
7190 {
7191 if (CONST_INT_P (addr)
7192 && !x86_64_immediate_operand (addr, VOIDmode))
7193 {
7194 if (reg)
7195 {
7196 emit_move_insn (reg, addr);
7197 new = reg;
7198 }
7199 else
7200 new = force_reg (Pmode, addr);
7201 }
7202 else if (GET_CODE (addr) == CONST)
7203 {
7204 addr = XEXP (addr, 0);
7205
7206 /* We must match stuff we generate before. Assume the only
7207 unspecs that can get here are ours. Not that we could do
7208 anything with them anyway.... */
7209 if (GET_CODE (addr) == UNSPEC
7210 || (GET_CODE (addr) == PLUS
7211 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7212 return orig;
7213 gcc_assert (GET_CODE (addr) == PLUS);
7214 }
7215 if (GET_CODE (addr) == PLUS)
7216 {
7217 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7218
7219 /* Check first to see if this is a constant offset from a @GOTOFF
7220 symbol reference. */
7221 if (local_symbolic_operand (op0, Pmode)
7222 && CONST_INT_P (op1))
7223 {
7224 if (!TARGET_64BIT)
7225 {
7226 if (reload_in_progress)
7227 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7228 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7229 UNSPEC_GOTOFF);
7230 new = gen_rtx_PLUS (Pmode, new, op1);
7231 new = gen_rtx_CONST (Pmode, new);
7232 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7233
7234 if (reg != 0)
7235 {
7236 emit_move_insn (reg, new);
7237 new = reg;
7238 }
7239 }
7240 else
7241 {
7242 if (INTVAL (op1) < -16*1024*1024
7243 || INTVAL (op1) >= 16*1024*1024)
7244 {
7245 if (!x86_64_immediate_operand (op1, Pmode))
7246 op1 = force_reg (Pmode, op1);
7247 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7248 }
7249 }
7250 }
7251 else
7252 {
7253 base = legitimize_pic_address (XEXP (addr, 0), reg);
7254 new = legitimize_pic_address (XEXP (addr, 1),
7255 base == reg ? NULL_RTX : reg);
7256
7257 if (CONST_INT_P (new))
7258 new = plus_constant (base, INTVAL (new));
7259 else
7260 {
7261 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7262 {
7263 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7264 new = XEXP (new, 1);
7265 }
7266 new = gen_rtx_PLUS (Pmode, base, new);
7267 }
7268 }
7269 }
7270 }
7271 return new;
7272 }
7273 \f
7274 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7275
7276 static rtx
7277 get_thread_pointer (int to_reg)
7278 {
7279 rtx tp, reg, insn;
7280
7281 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7282 if (!to_reg)
7283 return tp;
7284
7285 reg = gen_reg_rtx (Pmode);
7286 insn = gen_rtx_SET (VOIDmode, reg, tp);
7287 insn = emit_insn (insn);
7288
7289 return reg;
7290 }
7291
7292 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7293 false if we expect this to be used for a memory address and true if
7294 we expect to load the address into a register. */
7295
7296 static rtx
7297 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7298 {
7299 rtx dest, base, off, pic, tp;
7300 int type;
7301
7302 switch (model)
7303 {
7304 case TLS_MODEL_GLOBAL_DYNAMIC:
7305 dest = gen_reg_rtx (Pmode);
7306 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7307
7308 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7309 {
7310 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7311
7312 start_sequence ();
7313 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7314 insns = get_insns ();
7315 end_sequence ();
7316
7317 emit_libcall_block (insns, dest, rax, x);
7318 }
7319 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7320 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7321 else
7322 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7323
7324 if (TARGET_GNU2_TLS)
7325 {
7326 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7327
7328 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7329 }
7330 break;
7331
7332 case TLS_MODEL_LOCAL_DYNAMIC:
7333 base = gen_reg_rtx (Pmode);
7334 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7335
7336 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7337 {
7338 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7339
7340 start_sequence ();
7341 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7342 insns = get_insns ();
7343 end_sequence ();
7344
7345 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7346 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7347 emit_libcall_block (insns, base, rax, note);
7348 }
7349 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7350 emit_insn (gen_tls_local_dynamic_base_64 (base));
7351 else
7352 emit_insn (gen_tls_local_dynamic_base_32 (base));
7353
7354 if (TARGET_GNU2_TLS)
7355 {
7356 rtx x = ix86_tls_module_base ();
7357
7358 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7359 gen_rtx_MINUS (Pmode, x, tp));
7360 }
7361
7362 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7363 off = gen_rtx_CONST (Pmode, off);
7364
7365 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7366
7367 if (TARGET_GNU2_TLS)
7368 {
7369 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7370
7371 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7372 }
7373
7374 break;
7375
7376 case TLS_MODEL_INITIAL_EXEC:
7377 if (TARGET_64BIT)
7378 {
7379 pic = NULL;
7380 type = UNSPEC_GOTNTPOFF;
7381 }
7382 else if (flag_pic)
7383 {
7384 if (reload_in_progress)
7385 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7386 pic = pic_offset_table_rtx;
7387 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7388 }
7389 else if (!TARGET_ANY_GNU_TLS)
7390 {
7391 pic = gen_reg_rtx (Pmode);
7392 emit_insn (gen_set_got (pic));
7393 type = UNSPEC_GOTTPOFF;
7394 }
7395 else
7396 {
7397 pic = NULL;
7398 type = UNSPEC_INDNTPOFF;
7399 }
7400
7401 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7402 off = gen_rtx_CONST (Pmode, off);
7403 if (pic)
7404 off = gen_rtx_PLUS (Pmode, pic, off);
7405 off = gen_const_mem (Pmode, off);
7406 set_mem_alias_set (off, ix86_GOT_alias_set ());
7407
7408 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7409 {
7410 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7411 off = force_reg (Pmode, off);
7412 return gen_rtx_PLUS (Pmode, base, off);
7413 }
7414 else
7415 {
7416 base = get_thread_pointer (true);
7417 dest = gen_reg_rtx (Pmode);
7418 emit_insn (gen_subsi3 (dest, base, off));
7419 }
7420 break;
7421
7422 case TLS_MODEL_LOCAL_EXEC:
7423 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7424 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7425 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7426 off = gen_rtx_CONST (Pmode, off);
7427
7428 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7429 {
7430 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7431 return gen_rtx_PLUS (Pmode, base, off);
7432 }
7433 else
7434 {
7435 base = get_thread_pointer (true);
7436 dest = gen_reg_rtx (Pmode);
7437 emit_insn (gen_subsi3 (dest, base, off));
7438 }
7439 break;
7440
7441 default:
7442 gcc_unreachable ();
7443 }
7444
7445 return dest;
7446 }
7447
7448 /* Try machine-dependent ways of modifying an illegitimate address
7449 to be legitimate. If we find one, return the new, valid address.
7450 This macro is used in only one place: `memory_address' in explow.c.
7451
7452 OLDX is the address as it was before break_out_memory_refs was called.
7453 In some cases it is useful to look at this to decide what needs to be done.
7454
7455 MODE and WIN are passed so that this macro can use
7456 GO_IF_LEGITIMATE_ADDRESS.
7457
7458 It is always safe for this macro to do nothing. It exists to recognize
7459 opportunities to optimize the output.
7460
7461 For the 80386, we handle X+REG by loading X into a register R and
7462 using R+REG. R will go in a general reg and indexing will be used.
7463 However, if REG is a broken-out memory address or multiplication,
7464 nothing needs to be done because REG can certainly go in a general reg.
7465
7466 When -fpic is used, special handling is needed for symbolic references.
7467 See comments by legitimize_pic_address in i386.c for details. */
7468
7469 rtx
7470 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7471 {
7472 int changed = 0;
7473 unsigned log;
7474
7475 if (TARGET_DEBUG_ADDR)
7476 {
7477 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7478 GET_MODE_NAME (mode));
7479 debug_rtx (x);
7480 }
7481
7482 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7483 if (log)
7484 return legitimize_tls_address (x, log, false);
7485 if (GET_CODE (x) == CONST
7486 && GET_CODE (XEXP (x, 0)) == PLUS
7487 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7488 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7489 {
7490 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7491 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7492 }
7493
7494 if (flag_pic && SYMBOLIC_CONST (x))
7495 return legitimize_pic_address (x, 0);
7496
7497 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7498 if (GET_CODE (x) == ASHIFT
7499 && CONST_INT_P (XEXP (x, 1))
7500 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7501 {
7502 changed = 1;
7503 log = INTVAL (XEXP (x, 1));
7504 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7505 GEN_INT (1 << log));
7506 }
7507
7508 if (GET_CODE (x) == PLUS)
7509 {
7510 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7511
7512 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7513 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7514 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7515 {
7516 changed = 1;
7517 log = INTVAL (XEXP (XEXP (x, 0), 1));
7518 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7519 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7520 GEN_INT (1 << log));
7521 }
7522
7523 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7524 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7525 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7526 {
7527 changed = 1;
7528 log = INTVAL (XEXP (XEXP (x, 1), 1));
7529 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7530 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7531 GEN_INT (1 << log));
7532 }
7533
7534 /* Put multiply first if it isn't already. */
7535 if (GET_CODE (XEXP (x, 1)) == MULT)
7536 {
7537 rtx tmp = XEXP (x, 0);
7538 XEXP (x, 0) = XEXP (x, 1);
7539 XEXP (x, 1) = tmp;
7540 changed = 1;
7541 }
7542
7543 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7544 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7545 created by virtual register instantiation, register elimination, and
7546 similar optimizations. */
7547 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7548 {
7549 changed = 1;
7550 x = gen_rtx_PLUS (Pmode,
7551 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7552 XEXP (XEXP (x, 1), 0)),
7553 XEXP (XEXP (x, 1), 1));
7554 }
7555
7556 /* Canonicalize
7557 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7558 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7559 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7560 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7561 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7562 && CONSTANT_P (XEXP (x, 1)))
7563 {
7564 rtx constant;
7565 rtx other = NULL_RTX;
7566
7567 if (CONST_INT_P (XEXP (x, 1)))
7568 {
7569 constant = XEXP (x, 1);
7570 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7571 }
7572 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7573 {
7574 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7575 other = XEXP (x, 1);
7576 }
7577 else
7578 constant = 0;
7579
7580 if (constant)
7581 {
7582 changed = 1;
7583 x = gen_rtx_PLUS (Pmode,
7584 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7585 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7586 plus_constant (other, INTVAL (constant)));
7587 }
7588 }
7589
7590 if (changed && legitimate_address_p (mode, x, FALSE))
7591 return x;
7592
7593 if (GET_CODE (XEXP (x, 0)) == MULT)
7594 {
7595 changed = 1;
7596 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7597 }
7598
7599 if (GET_CODE (XEXP (x, 1)) == MULT)
7600 {
7601 changed = 1;
7602 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7603 }
7604
7605 if (changed
7606 && REG_P (XEXP (x, 1))
7607 && REG_P (XEXP (x, 0)))
7608 return x;
7609
7610 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7611 {
7612 changed = 1;
7613 x = legitimize_pic_address (x, 0);
7614 }
7615
7616 if (changed && legitimate_address_p (mode, x, FALSE))
7617 return x;
7618
7619 if (REG_P (XEXP (x, 0)))
7620 {
7621 rtx temp = gen_reg_rtx (Pmode);
7622 rtx val = force_operand (XEXP (x, 1), temp);
7623 if (val != temp)
7624 emit_move_insn (temp, val);
7625
7626 XEXP (x, 1) = temp;
7627 return x;
7628 }
7629
7630 else if (REG_P (XEXP (x, 1)))
7631 {
7632 rtx temp = gen_reg_rtx (Pmode);
7633 rtx val = force_operand (XEXP (x, 0), temp);
7634 if (val != temp)
7635 emit_move_insn (temp, val);
7636
7637 XEXP (x, 0) = temp;
7638 return x;
7639 }
7640 }
7641
7642 return x;
7643 }
7644 \f
7645 /* Print an integer constant expression in assembler syntax. Addition
7646 and subtraction are the only arithmetic that may appear in these
7647 expressions. FILE is the stdio stream to write to, X is the rtx, and
7648 CODE is the operand print code from the output string. */
7649
7650 static void
7651 output_pic_addr_const (FILE *file, rtx x, int code)
7652 {
7653 char buf[256];
7654
7655 switch (GET_CODE (x))
7656 {
7657 case PC:
7658 gcc_assert (flag_pic);
7659 putc ('.', file);
7660 break;
7661
7662 case SYMBOL_REF:
7663 if (! TARGET_MACHO || TARGET_64BIT)
7664 output_addr_const (file, x);
7665 else
7666 {
7667 const char *name = XSTR (x, 0);
7668
7669 /* Mark the decl as referenced so that cgraph will output the function. */
7670 if (SYMBOL_REF_DECL (x))
7671 mark_decl_referenced (SYMBOL_REF_DECL (x));
7672
7673 #if TARGET_MACHO
7674 if (MACHOPIC_INDIRECT
7675 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7676 name = machopic_indirection_name (x, /*stub_p=*/true);
7677 #endif
7678 assemble_name (file, name);
7679 }
7680 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7681 fputs ("@PLT", file);
7682 break;
7683
7684 case LABEL_REF:
7685 x = XEXP (x, 0);
7686 /* FALLTHRU */
7687 case CODE_LABEL:
7688 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7689 assemble_name (asm_out_file, buf);
7690 break;
7691
7692 case CONST_INT:
7693 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7694 break;
7695
7696 case CONST:
7697 /* This used to output parentheses around the expression,
7698 but that does not work on the 386 (either ATT or BSD assembler). */
7699 output_pic_addr_const (file, XEXP (x, 0), code);
7700 break;
7701
7702 case CONST_DOUBLE:
7703 if (GET_MODE (x) == VOIDmode)
7704 {
7705 /* We can use %d if the number is <32 bits and positive. */
7706 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7707 fprintf (file, "0x%lx%08lx",
7708 (unsigned long) CONST_DOUBLE_HIGH (x),
7709 (unsigned long) CONST_DOUBLE_LOW (x));
7710 else
7711 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7712 }
7713 else
7714 /* We can't handle floating point constants;
7715 PRINT_OPERAND must handle them. */
7716 output_operand_lossage ("floating constant misused");
7717 break;
7718
7719 case PLUS:
7720 /* Some assemblers need integer constants to appear first. */
7721 if (CONST_INT_P (XEXP (x, 0)))
7722 {
7723 output_pic_addr_const (file, XEXP (x, 0), code);
7724 putc ('+', file);
7725 output_pic_addr_const (file, XEXP (x, 1), code);
7726 }
7727 else
7728 {
7729 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7730 output_pic_addr_const (file, XEXP (x, 1), code);
7731 putc ('+', file);
7732 output_pic_addr_const (file, XEXP (x, 0), code);
7733 }
7734 break;
7735
7736 case MINUS:
7737 if (!TARGET_MACHO)
7738 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7739 output_pic_addr_const (file, XEXP (x, 0), code);
7740 putc ('-', file);
7741 output_pic_addr_const (file, XEXP (x, 1), code);
7742 if (!TARGET_MACHO)
7743 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7744 break;
7745
7746 case UNSPEC:
7747 gcc_assert (XVECLEN (x, 0) == 1);
7748 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7749 switch (XINT (x, 1))
7750 {
7751 case UNSPEC_GOT:
7752 fputs ("@GOT", file);
7753 break;
7754 case UNSPEC_GOTOFF:
7755 fputs ("@GOTOFF", file);
7756 break;
7757 case UNSPEC_PLTOFF:
7758 fputs ("@PLTOFF", file);
7759 break;
7760 case UNSPEC_GOTPCREL:
7761 fputs ("@GOTPCREL(%rip)", file);
7762 break;
7763 case UNSPEC_GOTTPOFF:
7764 /* FIXME: This might be @TPOFF in Sun ld too. */
7765 fputs ("@GOTTPOFF", file);
7766 break;
7767 case UNSPEC_TPOFF:
7768 fputs ("@TPOFF", file);
7769 break;
7770 case UNSPEC_NTPOFF:
7771 if (TARGET_64BIT)
7772 fputs ("@TPOFF", file);
7773 else
7774 fputs ("@NTPOFF", file);
7775 break;
7776 case UNSPEC_DTPOFF:
7777 fputs ("@DTPOFF", file);
7778 break;
7779 case UNSPEC_GOTNTPOFF:
7780 if (TARGET_64BIT)
7781 fputs ("@GOTTPOFF(%rip)", file);
7782 else
7783 fputs ("@GOTNTPOFF", file);
7784 break;
7785 case UNSPEC_INDNTPOFF:
7786 fputs ("@INDNTPOFF", file);
7787 break;
7788 default:
7789 output_operand_lossage ("invalid UNSPEC as operand");
7790 break;
7791 }
7792 break;
7793
7794 default:
7795 output_operand_lossage ("invalid expression as operand");
7796 }
7797 }
7798
7799 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7800 We need to emit DTP-relative relocations. */
7801
7802 static void
7803 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7804 {
7805 fputs (ASM_LONG, file);
7806 output_addr_const (file, x);
7807 fputs ("@DTPOFF", file);
7808 switch (size)
7809 {
7810 case 4:
7811 break;
7812 case 8:
7813 fputs (", 0", file);
7814 break;
7815 default:
7816 gcc_unreachable ();
7817 }
7818 }
7819
7820 /* In the name of slightly smaller debug output, and to cater to
7821 general assembler lossage, recognize PIC+GOTOFF and turn it back
7822 into a direct symbol reference.
7823
7824 On Darwin, this is necessary to avoid a crash, because Darwin
7825 has a different PIC label for each routine but the DWARF debugging
7826 information is not associated with any particular routine, so it's
7827 necessary to remove references to the PIC label from RTL stored by
7828 the DWARF output code. */
7829
7830 static rtx
7831 ix86_delegitimize_address (rtx orig_x)
7832 {
7833 rtx x = orig_x;
7834 /* reg_addend is NULL or a multiple of some register. */
7835 rtx reg_addend = NULL_RTX;
7836 /* const_addend is NULL or a const_int. */
7837 rtx const_addend = NULL_RTX;
7838 /* This is the result, or NULL. */
7839 rtx result = NULL_RTX;
7840
7841 if (MEM_P (x))
7842 x = XEXP (x, 0);
7843
7844 if (TARGET_64BIT)
7845 {
7846 if (GET_CODE (x) != CONST
7847 || GET_CODE (XEXP (x, 0)) != UNSPEC
7848 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7849 || !MEM_P (orig_x))
7850 return orig_x;
7851 return XVECEXP (XEXP (x, 0), 0, 0);
7852 }
7853
7854 if (GET_CODE (x) != PLUS
7855 || GET_CODE (XEXP (x, 1)) != CONST)
7856 return orig_x;
7857
7858 if (REG_P (XEXP (x, 0))
7859 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7860 /* %ebx + GOT/GOTOFF */
7861 ;
7862 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7863 {
7864 /* %ebx + %reg * scale + GOT/GOTOFF */
7865 reg_addend = XEXP (x, 0);
7866 if (REG_P (XEXP (reg_addend, 0))
7867 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7868 reg_addend = XEXP (reg_addend, 1);
7869 else if (REG_P (XEXP (reg_addend, 1))
7870 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7871 reg_addend = XEXP (reg_addend, 0);
7872 else
7873 return orig_x;
7874 if (!REG_P (reg_addend)
7875 && GET_CODE (reg_addend) != MULT
7876 && GET_CODE (reg_addend) != ASHIFT)
7877 return orig_x;
7878 }
7879 else
7880 return orig_x;
7881
7882 x = XEXP (XEXP (x, 1), 0);
7883 if (GET_CODE (x) == PLUS
7884 && CONST_INT_P (XEXP (x, 1)))
7885 {
7886 const_addend = XEXP (x, 1);
7887 x = XEXP (x, 0);
7888 }
7889
7890 if (GET_CODE (x) == UNSPEC
7891 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7892 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7893 result = XVECEXP (x, 0, 0);
7894
7895 if (TARGET_MACHO && darwin_local_data_pic (x)
7896 && !MEM_P (orig_x))
7897 result = XEXP (x, 0);
7898
7899 if (! result)
7900 return orig_x;
7901
7902 if (const_addend)
7903 result = gen_rtx_PLUS (Pmode, result, const_addend);
7904 if (reg_addend)
7905 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7906 return result;
7907 }
7908 \f
7909 static void
7910 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7911 int fp, FILE *file)
7912 {
7913 const char *suffix;
7914
7915 if (mode == CCFPmode || mode == CCFPUmode)
7916 {
7917 enum rtx_code second_code, bypass_code;
7918 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7919 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7920 code = ix86_fp_compare_code_to_integer (code);
7921 mode = CCmode;
7922 }
7923 if (reverse)
7924 code = reverse_condition (code);
7925
7926 switch (code)
7927 {
7928 case EQ:
7929 suffix = "e";
7930 break;
7931 case NE:
7932 suffix = "ne";
7933 break;
7934 case GT:
7935 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7936 suffix = "g";
7937 break;
7938 case GTU:
7939 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7940 Those same assemblers have the same but opposite lossage on cmov. */
7941 gcc_assert (mode == CCmode);
7942 suffix = fp ? "nbe" : "a";
7943 break;
7944 case LT:
7945 switch (mode)
7946 {
7947 case CCNOmode:
7948 case CCGOCmode:
7949 suffix = "s";
7950 break;
7951
7952 case CCmode:
7953 case CCGCmode:
7954 suffix = "l";
7955 break;
7956
7957 default:
7958 gcc_unreachable ();
7959 }
7960 break;
7961 case LTU:
7962 gcc_assert (mode == CCmode);
7963 suffix = "b";
7964 break;
7965 case GE:
7966 switch (mode)
7967 {
7968 case CCNOmode:
7969 case CCGOCmode:
7970 suffix = "ns";
7971 break;
7972
7973 case CCmode:
7974 case CCGCmode:
7975 suffix = "ge";
7976 break;
7977
7978 default:
7979 gcc_unreachable ();
7980 }
7981 break;
7982 case GEU:
7983 /* ??? As above. */
7984 gcc_assert (mode == CCmode);
7985 suffix = fp ? "nb" : "ae";
7986 break;
7987 case LE:
7988 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7989 suffix = "le";
7990 break;
7991 case LEU:
7992 gcc_assert (mode == CCmode);
7993 suffix = "be";
7994 break;
7995 case UNORDERED:
7996 suffix = fp ? "u" : "p";
7997 break;
7998 case ORDERED:
7999 suffix = fp ? "nu" : "np";
8000 break;
8001 default:
8002 gcc_unreachable ();
8003 }
8004 fputs (suffix, file);
8005 }
8006
8007 /* Print the name of register X to FILE based on its machine mode and number.
8008 If CODE is 'w', pretend the mode is HImode.
8009 If CODE is 'b', pretend the mode is QImode.
8010 If CODE is 'k', pretend the mode is SImode.
8011 If CODE is 'q', pretend the mode is DImode.
8012 If CODE is 'h', pretend the reg is the 'high' byte register.
8013 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8014
8015 void
8016 print_reg (rtx x, int code, FILE *file)
8017 {
8018 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8019 && REGNO (x) != FRAME_POINTER_REGNUM
8020 && REGNO (x) != FLAGS_REG
8021 && REGNO (x) != FPSR_REG
8022 && REGNO (x) != FPCR_REG);
8023
8024 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8025 putc ('%', file);
8026
8027 if (code == 'w' || MMX_REG_P (x))
8028 code = 2;
8029 else if (code == 'b')
8030 code = 1;
8031 else if (code == 'k')
8032 code = 4;
8033 else if (code == 'q')
8034 code = 8;
8035 else if (code == 'y')
8036 code = 3;
8037 else if (code == 'h')
8038 code = 0;
8039 else
8040 code = GET_MODE_SIZE (GET_MODE (x));
8041
8042 /* Irritatingly, AMD extended registers use different naming convention
8043 from the normal registers. */
8044 if (REX_INT_REG_P (x))
8045 {
8046 gcc_assert (TARGET_64BIT);
8047 switch (code)
8048 {
8049 case 0:
8050 error ("extended registers have no high halves");
8051 break;
8052 case 1:
8053 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8054 break;
8055 case 2:
8056 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8057 break;
8058 case 4:
8059 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8060 break;
8061 case 8:
8062 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8063 break;
8064 default:
8065 error ("unsupported operand size for extended register");
8066 break;
8067 }
8068 return;
8069 }
8070 switch (code)
8071 {
8072 case 3:
8073 if (STACK_TOP_P (x))
8074 {
8075 fputs ("st(0)", file);
8076 break;
8077 }
8078 /* FALLTHRU */
8079 case 8:
8080 case 4:
8081 case 12:
8082 if (! ANY_FP_REG_P (x))
8083 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8084 /* FALLTHRU */
8085 case 16:
8086 case 2:
8087 normal:
8088 fputs (hi_reg_name[REGNO (x)], file);
8089 break;
8090 case 1:
8091 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8092 goto normal;
8093 fputs (qi_reg_name[REGNO (x)], file);
8094 break;
8095 case 0:
8096 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8097 goto normal;
8098 fputs (qi_high_reg_name[REGNO (x)], file);
8099 break;
8100 default:
8101 gcc_unreachable ();
8102 }
8103 }
8104
8105 /* Locate some local-dynamic symbol still in use by this function
8106 so that we can print its name in some tls_local_dynamic_base
8107 pattern. */
8108
8109 static const char *
8110 get_some_local_dynamic_name (void)
8111 {
8112 rtx insn;
8113
8114 if (cfun->machine->some_ld_name)
8115 return cfun->machine->some_ld_name;
8116
8117 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8118 if (INSN_P (insn)
8119 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8120 return cfun->machine->some_ld_name;
8121
8122 gcc_unreachable ();
8123 }
8124
8125 static int
8126 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8127 {
8128 rtx x = *px;
8129
8130 if (GET_CODE (x) == SYMBOL_REF
8131 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8132 {
8133 cfun->machine->some_ld_name = XSTR (x, 0);
8134 return 1;
8135 }
8136
8137 return 0;
8138 }
8139
8140 /* Meaning of CODE:
8141 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8142 C -- print opcode suffix for set/cmov insn.
8143 c -- like C, but print reversed condition
8144 F,f -- likewise, but for floating-point.
8145 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8146 otherwise nothing
8147 R -- print the prefix for register names.
8148 z -- print the opcode suffix for the size of the current operand.
8149 * -- print a star (in certain assembler syntax)
8150 A -- print an absolute memory reference.
8151 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8152 s -- print a shift double count, followed by the assemblers argument
8153 delimiter.
8154 b -- print the QImode name of the register for the indicated operand.
8155 %b0 would print %al if operands[0] is reg 0.
8156 w -- likewise, print the HImode name of the register.
8157 k -- likewise, print the SImode name of the register.
8158 q -- likewise, print the DImode name of the register.
8159 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8160 y -- print "st(0)" instead of "st" as a register.
8161 D -- print condition for SSE cmp instruction.
8162 P -- if PIC, print an @PLT suffix.
8163 X -- don't print any sort of PIC '@' suffix for a symbol.
8164 & -- print some in-use local-dynamic symbol name.
8165 H -- print a memory address offset by 8; used for sse high-parts
8166 */
8167
8168 void
8169 print_operand (FILE *file, rtx x, int code)
8170 {
8171 if (code)
8172 {
8173 switch (code)
8174 {
8175 case '*':
8176 if (ASSEMBLER_DIALECT == ASM_ATT)
8177 putc ('*', file);
8178 return;
8179
8180 case '&':
8181 assemble_name (file, get_some_local_dynamic_name ());
8182 return;
8183
8184 case 'A':
8185 switch (ASSEMBLER_DIALECT)
8186 {
8187 case ASM_ATT:
8188 putc ('*', file);
8189 break;
8190
8191 case ASM_INTEL:
8192 /* Intel syntax. For absolute addresses, registers should not
8193 be surrounded by braces. */
8194 if (!REG_P (x))
8195 {
8196 putc ('[', file);
8197 PRINT_OPERAND (file, x, 0);
8198 putc (']', file);
8199 return;
8200 }
8201 break;
8202
8203 default:
8204 gcc_unreachable ();
8205 }
8206
8207 PRINT_OPERAND (file, x, 0);
8208 return;
8209
8210
8211 case 'L':
8212 if (ASSEMBLER_DIALECT == ASM_ATT)
8213 putc ('l', file);
8214 return;
8215
8216 case 'W':
8217 if (ASSEMBLER_DIALECT == ASM_ATT)
8218 putc ('w', file);
8219 return;
8220
8221 case 'B':
8222 if (ASSEMBLER_DIALECT == ASM_ATT)
8223 putc ('b', file);
8224 return;
8225
8226 case 'Q':
8227 if (ASSEMBLER_DIALECT == ASM_ATT)
8228 putc ('l', file);
8229 return;
8230
8231 case 'S':
8232 if (ASSEMBLER_DIALECT == ASM_ATT)
8233 putc ('s', file);
8234 return;
8235
8236 case 'T':
8237 if (ASSEMBLER_DIALECT == ASM_ATT)
8238 putc ('t', file);
8239 return;
8240
8241 case 'z':
8242 /* 387 opcodes don't get size suffixes if the operands are
8243 registers. */
8244 if (STACK_REG_P (x))
8245 return;
8246
8247 /* Likewise if using Intel opcodes. */
8248 if (ASSEMBLER_DIALECT == ASM_INTEL)
8249 return;
8250
8251 /* This is the size of op from size of operand. */
8252 switch (GET_MODE_SIZE (GET_MODE (x)))
8253 {
8254 case 1:
8255 putc ('b', file);
8256 return;
8257
8258 case 2:
8259 #ifdef HAVE_GAS_FILDS_FISTS
8260 putc ('s', file);
8261 #endif
8262 return;
8263
8264 case 4:
8265 if (GET_MODE (x) == SFmode)
8266 {
8267 putc ('s', file);
8268 return;
8269 }
8270 else
8271 putc ('l', file);
8272 return;
8273
8274 case 12:
8275 case 16:
8276 putc ('t', file);
8277 return;
8278
8279 case 8:
8280 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8281 {
8282 #ifdef GAS_MNEMONICS
8283 putc ('q', file);
8284 #else
8285 putc ('l', file);
8286 putc ('l', file);
8287 #endif
8288 }
8289 else
8290 putc ('l', file);
8291 return;
8292
8293 default:
8294 gcc_unreachable ();
8295 }
8296
8297 case 'b':
8298 case 'w':
8299 case 'k':
8300 case 'q':
8301 case 'h':
8302 case 'y':
8303 case 'X':
8304 case 'P':
8305 break;
8306
8307 case 's':
8308 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8309 {
8310 PRINT_OPERAND (file, x, 0);
8311 putc (',', file);
8312 }
8313 return;
8314
8315 case 'D':
8316 /* Little bit of braindamage here. The SSE compare instructions
8317 does use completely different names for the comparisons that the
8318 fp conditional moves. */
8319 switch (GET_CODE (x))
8320 {
8321 case EQ:
8322 case UNEQ:
8323 fputs ("eq", file);
8324 break;
8325 case LT:
8326 case UNLT:
8327 fputs ("lt", file);
8328 break;
8329 case LE:
8330 case UNLE:
8331 fputs ("le", file);
8332 break;
8333 case UNORDERED:
8334 fputs ("unord", file);
8335 break;
8336 case NE:
8337 case LTGT:
8338 fputs ("neq", file);
8339 break;
8340 case UNGE:
8341 case GE:
8342 fputs ("nlt", file);
8343 break;
8344 case UNGT:
8345 case GT:
8346 fputs ("nle", file);
8347 break;
8348 case ORDERED:
8349 fputs ("ord", file);
8350 break;
8351 default:
8352 gcc_unreachable ();
8353 }
8354 return;
8355 case 'O':
8356 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8357 if (ASSEMBLER_DIALECT == ASM_ATT)
8358 {
8359 switch (GET_MODE (x))
8360 {
8361 case HImode: putc ('w', file); break;
8362 case SImode:
8363 case SFmode: putc ('l', file); break;
8364 case DImode:
8365 case DFmode: putc ('q', file); break;
8366 default: gcc_unreachable ();
8367 }
8368 putc ('.', file);
8369 }
8370 #endif
8371 return;
8372 case 'C':
8373 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8374 return;
8375 case 'F':
8376 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8377 if (ASSEMBLER_DIALECT == ASM_ATT)
8378 putc ('.', file);
8379 #endif
8380 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8381 return;
8382
8383 /* Like above, but reverse condition */
8384 case 'c':
8385 /* Check to see if argument to %c is really a constant
8386 and not a condition code which needs to be reversed. */
8387 if (!COMPARISON_P (x))
8388 {
8389 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8390 return;
8391 }
8392 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8393 return;
8394 case 'f':
8395 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8396 if (ASSEMBLER_DIALECT == ASM_ATT)
8397 putc ('.', file);
8398 #endif
8399 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8400 return;
8401
8402 case 'H':
8403 /* It doesn't actually matter what mode we use here, as we're
8404 only going to use this for printing. */
8405 x = adjust_address_nv (x, DImode, 8);
8406 break;
8407
8408 case '+':
8409 {
8410 rtx x;
8411
8412 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8413 return;
8414
8415 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8416 if (x)
8417 {
8418 int pred_val = INTVAL (XEXP (x, 0));
8419
8420 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8421 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8422 {
8423 int taken = pred_val > REG_BR_PROB_BASE / 2;
8424 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8425
8426 /* Emit hints only in the case default branch prediction
8427 heuristics would fail. */
8428 if (taken != cputaken)
8429 {
8430 /* We use 3e (DS) prefix for taken branches and
8431 2e (CS) prefix for not taken branches. */
8432 if (taken)
8433 fputs ("ds ; ", file);
8434 else
8435 fputs ("cs ; ", file);
8436 }
8437 }
8438 }
8439 return;
8440 }
8441 default:
8442 output_operand_lossage ("invalid operand code '%c'", code);
8443 }
8444 }
8445
8446 if (REG_P (x))
8447 print_reg (x, code, file);
8448
8449 else if (MEM_P (x))
8450 {
8451 /* No `byte ptr' prefix for call instructions. */
8452 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8453 {
8454 const char * size;
8455 switch (GET_MODE_SIZE (GET_MODE (x)))
8456 {
8457 case 1: size = "BYTE"; break;
8458 case 2: size = "WORD"; break;
8459 case 4: size = "DWORD"; break;
8460 case 8: size = "QWORD"; break;
8461 case 12: size = "XWORD"; break;
8462 case 16: size = "XMMWORD"; break;
8463 default:
8464 gcc_unreachable ();
8465 }
8466
8467 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8468 if (code == 'b')
8469 size = "BYTE";
8470 else if (code == 'w')
8471 size = "WORD";
8472 else if (code == 'k')
8473 size = "DWORD";
8474
8475 fputs (size, file);
8476 fputs (" PTR ", file);
8477 }
8478
8479 x = XEXP (x, 0);
8480 /* Avoid (%rip) for call operands. */
8481 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8482 && !CONST_INT_P (x))
8483 output_addr_const (file, x);
8484 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8485 output_operand_lossage ("invalid constraints for operand");
8486 else
8487 output_address (x);
8488 }
8489
8490 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8491 {
8492 REAL_VALUE_TYPE r;
8493 long l;
8494
8495 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8496 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8497
8498 if (ASSEMBLER_DIALECT == ASM_ATT)
8499 putc ('$', file);
8500 fprintf (file, "0x%08lx", l);
8501 }
8502
8503 /* These float cases don't actually occur as immediate operands. */
8504 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8505 {
8506 char dstr[30];
8507
8508 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8509 fprintf (file, "%s", dstr);
8510 }
8511
8512 else if (GET_CODE (x) == CONST_DOUBLE
8513 && GET_MODE (x) == XFmode)
8514 {
8515 char dstr[30];
8516
8517 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8518 fprintf (file, "%s", dstr);
8519 }
8520
8521 else
8522 {
8523 /* We have patterns that allow zero sets of memory, for instance.
8524 In 64-bit mode, we should probably support all 8-byte vectors,
8525 since we can in fact encode that into an immediate. */
8526 if (GET_CODE (x) == CONST_VECTOR)
8527 {
8528 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8529 x = const0_rtx;
8530 }
8531
8532 if (code != 'P')
8533 {
8534 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8535 {
8536 if (ASSEMBLER_DIALECT == ASM_ATT)
8537 putc ('$', file);
8538 }
8539 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8540 || GET_CODE (x) == LABEL_REF)
8541 {
8542 if (ASSEMBLER_DIALECT == ASM_ATT)
8543 putc ('$', file);
8544 else
8545 fputs ("OFFSET FLAT:", file);
8546 }
8547 }
8548 if (CONST_INT_P (x))
8549 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8550 else if (flag_pic)
8551 output_pic_addr_const (file, x, code);
8552 else
8553 output_addr_const (file, x);
8554 }
8555 }
8556 \f
8557 /* Print a memory operand whose address is ADDR. */
8558
8559 void
8560 print_operand_address (FILE *file, rtx addr)
8561 {
8562 struct ix86_address parts;
8563 rtx base, index, disp;
8564 int scale;
8565 int ok = ix86_decompose_address (addr, &parts);
8566
8567 gcc_assert (ok);
8568
8569 base = parts.base;
8570 index = parts.index;
8571 disp = parts.disp;
8572 scale = parts.scale;
8573
8574 switch (parts.seg)
8575 {
8576 case SEG_DEFAULT:
8577 break;
8578 case SEG_FS:
8579 case SEG_GS:
8580 if (USER_LABEL_PREFIX[0] == 0)
8581 putc ('%', file);
8582 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8583 break;
8584 default:
8585 gcc_unreachable ();
8586 }
8587
8588 if (!base && !index)
8589 {
8590 /* Displacement only requires special attention. */
8591
8592 if (CONST_INT_P (disp))
8593 {
8594 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8595 {
8596 if (USER_LABEL_PREFIX[0] == 0)
8597 putc ('%', file);
8598 fputs ("ds:", file);
8599 }
8600 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8601 }
8602 else if (flag_pic)
8603 output_pic_addr_const (file, disp, 0);
8604 else
8605 output_addr_const (file, disp);
8606
8607 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8608 if (TARGET_64BIT)
8609 {
8610 if (GET_CODE (disp) == CONST
8611 && GET_CODE (XEXP (disp, 0)) == PLUS
8612 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8613 disp = XEXP (XEXP (disp, 0), 0);
8614 if (GET_CODE (disp) == LABEL_REF
8615 || (GET_CODE (disp) == SYMBOL_REF
8616 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8617 fputs ("(%rip)", file);
8618 }
8619 }
8620 else
8621 {
8622 if (ASSEMBLER_DIALECT == ASM_ATT)
8623 {
8624 if (disp)
8625 {
8626 if (flag_pic)
8627 output_pic_addr_const (file, disp, 0);
8628 else if (GET_CODE (disp) == LABEL_REF)
8629 output_asm_label (disp);
8630 else
8631 output_addr_const (file, disp);
8632 }
8633
8634 putc ('(', file);
8635 if (base)
8636 print_reg (base, 0, file);
8637 if (index)
8638 {
8639 putc (',', file);
8640 print_reg (index, 0, file);
8641 if (scale != 1)
8642 fprintf (file, ",%d", scale);
8643 }
8644 putc (')', file);
8645 }
8646 else
8647 {
8648 rtx offset = NULL_RTX;
8649
8650 if (disp)
8651 {
8652 /* Pull out the offset of a symbol; print any symbol itself. */
8653 if (GET_CODE (disp) == CONST
8654 && GET_CODE (XEXP (disp, 0)) == PLUS
8655 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8656 {
8657 offset = XEXP (XEXP (disp, 0), 1);
8658 disp = gen_rtx_CONST (VOIDmode,
8659 XEXP (XEXP (disp, 0), 0));
8660 }
8661
8662 if (flag_pic)
8663 output_pic_addr_const (file, disp, 0);
8664 else if (GET_CODE (disp) == LABEL_REF)
8665 output_asm_label (disp);
8666 else if (CONST_INT_P (disp))
8667 offset = disp;
8668 else
8669 output_addr_const (file, disp);
8670 }
8671
8672 putc ('[', file);
8673 if (base)
8674 {
8675 print_reg (base, 0, file);
8676 if (offset)
8677 {
8678 if (INTVAL (offset) >= 0)
8679 putc ('+', file);
8680 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8681 }
8682 }
8683 else if (offset)
8684 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8685 else
8686 putc ('0', file);
8687
8688 if (index)
8689 {
8690 putc ('+', file);
8691 print_reg (index, 0, file);
8692 if (scale != 1)
8693 fprintf (file, "*%d", scale);
8694 }
8695 putc (']', file);
8696 }
8697 }
8698 }
8699
8700 bool
8701 output_addr_const_extra (FILE *file, rtx x)
8702 {
8703 rtx op;
8704
8705 if (GET_CODE (x) != UNSPEC)
8706 return false;
8707
8708 op = XVECEXP (x, 0, 0);
8709 switch (XINT (x, 1))
8710 {
8711 case UNSPEC_GOTTPOFF:
8712 output_addr_const (file, op);
8713 /* FIXME: This might be @TPOFF in Sun ld. */
8714 fputs ("@GOTTPOFF", file);
8715 break;
8716 case UNSPEC_TPOFF:
8717 output_addr_const (file, op);
8718 fputs ("@TPOFF", file);
8719 break;
8720 case UNSPEC_NTPOFF:
8721 output_addr_const (file, op);
8722 if (TARGET_64BIT)
8723 fputs ("@TPOFF", file);
8724 else
8725 fputs ("@NTPOFF", file);
8726 break;
8727 case UNSPEC_DTPOFF:
8728 output_addr_const (file, op);
8729 fputs ("@DTPOFF", file);
8730 break;
8731 case UNSPEC_GOTNTPOFF:
8732 output_addr_const (file, op);
8733 if (TARGET_64BIT)
8734 fputs ("@GOTTPOFF(%rip)", file);
8735 else
8736 fputs ("@GOTNTPOFF", file);
8737 break;
8738 case UNSPEC_INDNTPOFF:
8739 output_addr_const (file, op);
8740 fputs ("@INDNTPOFF", file);
8741 break;
8742
8743 default:
8744 return false;
8745 }
8746
8747 return true;
8748 }
8749 \f
8750 /* Split one or more DImode RTL references into pairs of SImode
8751 references. The RTL can be REG, offsettable MEM, integer constant, or
8752 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8753 split and "num" is its length. lo_half and hi_half are output arrays
8754 that parallel "operands". */
8755
8756 void
8757 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8758 {
8759 while (num--)
8760 {
8761 rtx op = operands[num];
8762
8763 /* simplify_subreg refuse to split volatile memory addresses,
8764 but we still have to handle it. */
8765 if (MEM_P (op))
8766 {
8767 lo_half[num] = adjust_address (op, SImode, 0);
8768 hi_half[num] = adjust_address (op, SImode, 4);
8769 }
8770 else
8771 {
8772 lo_half[num] = simplify_gen_subreg (SImode, op,
8773 GET_MODE (op) == VOIDmode
8774 ? DImode : GET_MODE (op), 0);
8775 hi_half[num] = simplify_gen_subreg (SImode, op,
8776 GET_MODE (op) == VOIDmode
8777 ? DImode : GET_MODE (op), 4);
8778 }
8779 }
8780 }
8781 /* Split one or more TImode RTL references into pairs of DImode
8782 references. The RTL can be REG, offsettable MEM, integer constant, or
8783 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8784 split and "num" is its length. lo_half and hi_half are output arrays
8785 that parallel "operands". */
8786
8787 void
8788 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8789 {
8790 while (num--)
8791 {
8792 rtx op = operands[num];
8793
8794 /* simplify_subreg refuse to split volatile memory addresses, but we
8795 still have to handle it. */
8796 if (MEM_P (op))
8797 {
8798 lo_half[num] = adjust_address (op, DImode, 0);
8799 hi_half[num] = adjust_address (op, DImode, 8);
8800 }
8801 else
8802 {
8803 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8804 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8805 }
8806 }
8807 }
8808 \f
8809 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8810 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8811 is the expression of the binary operation. The output may either be
8812 emitted here, or returned to the caller, like all output_* functions.
8813
8814 There is no guarantee that the operands are the same mode, as they
8815 might be within FLOAT or FLOAT_EXTEND expressions. */
8816
8817 #ifndef SYSV386_COMPAT
8818 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8819 wants to fix the assemblers because that causes incompatibility
8820 with gcc. No-one wants to fix gcc because that causes
8821 incompatibility with assemblers... You can use the option of
8822 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8823 #define SYSV386_COMPAT 1
8824 #endif
8825
8826 const char *
8827 output_387_binary_op (rtx insn, rtx *operands)
8828 {
8829 static char buf[30];
8830 const char *p;
8831 const char *ssep;
8832 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8833
8834 #ifdef ENABLE_CHECKING
8835 /* Even if we do not want to check the inputs, this documents input
8836 constraints. Which helps in understanding the following code. */
8837 if (STACK_REG_P (operands[0])
8838 && ((REG_P (operands[1])
8839 && REGNO (operands[0]) == REGNO (operands[1])
8840 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8841 || (REG_P (operands[2])
8842 && REGNO (operands[0]) == REGNO (operands[2])
8843 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8844 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8845 ; /* ok */
8846 else
8847 gcc_assert (is_sse);
8848 #endif
8849
8850 switch (GET_CODE (operands[3]))
8851 {
8852 case PLUS:
8853 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8854 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8855 p = "fiadd";
8856 else
8857 p = "fadd";
8858 ssep = "add";
8859 break;
8860
8861 case MINUS:
8862 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8863 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8864 p = "fisub";
8865 else
8866 p = "fsub";
8867 ssep = "sub";
8868 break;
8869
8870 case MULT:
8871 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8872 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8873 p = "fimul";
8874 else
8875 p = "fmul";
8876 ssep = "mul";
8877 break;
8878
8879 case DIV:
8880 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8881 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8882 p = "fidiv";
8883 else
8884 p = "fdiv";
8885 ssep = "div";
8886 break;
8887
8888 default:
8889 gcc_unreachable ();
8890 }
8891
8892 if (is_sse)
8893 {
8894 strcpy (buf, ssep);
8895 if (GET_MODE (operands[0]) == SFmode)
8896 strcat (buf, "ss\t{%2, %0|%0, %2}");
8897 else
8898 strcat (buf, "sd\t{%2, %0|%0, %2}");
8899 return buf;
8900 }
8901 strcpy (buf, p);
8902
8903 switch (GET_CODE (operands[3]))
8904 {
8905 case MULT:
8906 case PLUS:
8907 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8908 {
8909 rtx temp = operands[2];
8910 operands[2] = operands[1];
8911 operands[1] = temp;
8912 }
8913
8914 /* know operands[0] == operands[1]. */
8915
8916 if (MEM_P (operands[2]))
8917 {
8918 p = "%z2\t%2";
8919 break;
8920 }
8921
8922 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8923 {
8924 if (STACK_TOP_P (operands[0]))
8925 /* How is it that we are storing to a dead operand[2]?
8926 Well, presumably operands[1] is dead too. We can't
8927 store the result to st(0) as st(0) gets popped on this
8928 instruction. Instead store to operands[2] (which I
8929 think has to be st(1)). st(1) will be popped later.
8930 gcc <= 2.8.1 didn't have this check and generated
8931 assembly code that the Unixware assembler rejected. */
8932 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8933 else
8934 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8935 break;
8936 }
8937
8938 if (STACK_TOP_P (operands[0]))
8939 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8940 else
8941 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8942 break;
8943
8944 case MINUS:
8945 case DIV:
8946 if (MEM_P (operands[1]))
8947 {
8948 p = "r%z1\t%1";
8949 break;
8950 }
8951
8952 if (MEM_P (operands[2]))
8953 {
8954 p = "%z2\t%2";
8955 break;
8956 }
8957
8958 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8959 {
8960 #if SYSV386_COMPAT
8961 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8962 derived assemblers, confusingly reverse the direction of
8963 the operation for fsub{r} and fdiv{r} when the
8964 destination register is not st(0). The Intel assembler
8965 doesn't have this brain damage. Read !SYSV386_COMPAT to
8966 figure out what the hardware really does. */
8967 if (STACK_TOP_P (operands[0]))
8968 p = "{p\t%0, %2|rp\t%2, %0}";
8969 else
8970 p = "{rp\t%2, %0|p\t%0, %2}";
8971 #else
8972 if (STACK_TOP_P (operands[0]))
8973 /* As above for fmul/fadd, we can't store to st(0). */
8974 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8975 else
8976 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8977 #endif
8978 break;
8979 }
8980
8981 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8982 {
8983 #if SYSV386_COMPAT
8984 if (STACK_TOP_P (operands[0]))
8985 p = "{rp\t%0, %1|p\t%1, %0}";
8986 else
8987 p = "{p\t%1, %0|rp\t%0, %1}";
8988 #else
8989 if (STACK_TOP_P (operands[0]))
8990 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8991 else
8992 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8993 #endif
8994 break;
8995 }
8996
8997 if (STACK_TOP_P (operands[0]))
8998 {
8999 if (STACK_TOP_P (operands[1]))
9000 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9001 else
9002 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9003 break;
9004 }
9005 else if (STACK_TOP_P (operands[1]))
9006 {
9007 #if SYSV386_COMPAT
9008 p = "{\t%1, %0|r\t%0, %1}";
9009 #else
9010 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9011 #endif
9012 }
9013 else
9014 {
9015 #if SYSV386_COMPAT
9016 p = "{r\t%2, %0|\t%0, %2}";
9017 #else
9018 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9019 #endif
9020 }
9021 break;
9022
9023 default:
9024 gcc_unreachable ();
9025 }
9026
9027 strcat (buf, p);
9028 return buf;
9029 }
9030
9031 /* Return needed mode for entity in optimize_mode_switching pass. */
9032
9033 int
9034 ix86_mode_needed (int entity, rtx insn)
9035 {
9036 enum attr_i387_cw mode;
9037
9038 /* The mode UNINITIALIZED is used to store control word after a
9039 function call or ASM pattern. The mode ANY specify that function
9040 has no requirements on the control word and make no changes in the
9041 bits we are interested in. */
9042
9043 if (CALL_P (insn)
9044 || (NONJUMP_INSN_P (insn)
9045 && (asm_noperands (PATTERN (insn)) >= 0
9046 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9047 return I387_CW_UNINITIALIZED;
9048
9049 if (recog_memoized (insn) < 0)
9050 return I387_CW_ANY;
9051
9052 mode = get_attr_i387_cw (insn);
9053
9054 switch (entity)
9055 {
9056 case I387_TRUNC:
9057 if (mode == I387_CW_TRUNC)
9058 return mode;
9059 break;
9060
9061 case I387_FLOOR:
9062 if (mode == I387_CW_FLOOR)
9063 return mode;
9064 break;
9065
9066 case I387_CEIL:
9067 if (mode == I387_CW_CEIL)
9068 return mode;
9069 break;
9070
9071 case I387_MASK_PM:
9072 if (mode == I387_CW_MASK_PM)
9073 return mode;
9074 break;
9075
9076 default:
9077 gcc_unreachable ();
9078 }
9079
9080 return I387_CW_ANY;
9081 }
9082
9083 /* Output code to initialize control word copies used by trunc?f?i and
9084 rounding patterns. CURRENT_MODE is set to current control word,
9085 while NEW_MODE is set to new control word. */
9086
9087 void
9088 emit_i387_cw_initialization (int mode)
9089 {
9090 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9091 rtx new_mode;
9092
9093 int slot;
9094
9095 rtx reg = gen_reg_rtx (HImode);
9096
9097 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9098 emit_move_insn (reg, copy_rtx (stored_mode));
9099
9100 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9101 {
9102 switch (mode)
9103 {
9104 case I387_CW_TRUNC:
9105 /* round toward zero (truncate) */
9106 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9107 slot = SLOT_CW_TRUNC;
9108 break;
9109
9110 case I387_CW_FLOOR:
9111 /* round down toward -oo */
9112 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9113 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9114 slot = SLOT_CW_FLOOR;
9115 break;
9116
9117 case I387_CW_CEIL:
9118 /* round up toward +oo */
9119 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9120 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9121 slot = SLOT_CW_CEIL;
9122 break;
9123
9124 case I387_CW_MASK_PM:
9125 /* mask precision exception for nearbyint() */
9126 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9127 slot = SLOT_CW_MASK_PM;
9128 break;
9129
9130 default:
9131 gcc_unreachable ();
9132 }
9133 }
9134 else
9135 {
9136 switch (mode)
9137 {
9138 case I387_CW_TRUNC:
9139 /* round toward zero (truncate) */
9140 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9141 slot = SLOT_CW_TRUNC;
9142 break;
9143
9144 case I387_CW_FLOOR:
9145 /* round down toward -oo */
9146 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9147 slot = SLOT_CW_FLOOR;
9148 break;
9149
9150 case I387_CW_CEIL:
9151 /* round up toward +oo */
9152 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9153 slot = SLOT_CW_CEIL;
9154 break;
9155
9156 case I387_CW_MASK_PM:
9157 /* mask precision exception for nearbyint() */
9158 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9159 slot = SLOT_CW_MASK_PM;
9160 break;
9161
9162 default:
9163 gcc_unreachable ();
9164 }
9165 }
9166
9167 gcc_assert (slot < MAX_386_STACK_LOCALS);
9168
9169 new_mode = assign_386_stack_local (HImode, slot);
9170 emit_move_insn (new_mode, reg);
9171 }
9172
9173 /* Output code for INSN to convert a float to a signed int. OPERANDS
9174 are the insn operands. The output may be [HSD]Imode and the input
9175 operand may be [SDX]Fmode. */
9176
9177 const char *
9178 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9179 {
9180 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9181 int dimode_p = GET_MODE (operands[0]) == DImode;
9182 int round_mode = get_attr_i387_cw (insn);
9183
9184 /* Jump through a hoop or two for DImode, since the hardware has no
9185 non-popping instruction. We used to do this a different way, but
9186 that was somewhat fragile and broke with post-reload splitters. */
9187 if ((dimode_p || fisttp) && !stack_top_dies)
9188 output_asm_insn ("fld\t%y1", operands);
9189
9190 gcc_assert (STACK_TOP_P (operands[1]));
9191 gcc_assert (MEM_P (operands[0]));
9192
9193 if (fisttp)
9194 output_asm_insn ("fisttp%z0\t%0", operands);
9195 else
9196 {
9197 if (round_mode != I387_CW_ANY)
9198 output_asm_insn ("fldcw\t%3", operands);
9199 if (stack_top_dies || dimode_p)
9200 output_asm_insn ("fistp%z0\t%0", operands);
9201 else
9202 output_asm_insn ("fist%z0\t%0", operands);
9203 if (round_mode != I387_CW_ANY)
9204 output_asm_insn ("fldcw\t%2", operands);
9205 }
9206
9207 return "";
9208 }
9209
9210 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9211 have the values zero or one, indicates the ffreep insn's operand
9212 from the OPERANDS array. */
9213
9214 static const char *
9215 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9216 {
9217 if (TARGET_USE_FFREEP)
9218 #if HAVE_AS_IX86_FFREEP
9219 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9220 #else
9221 {
9222 static char retval[] = ".word\t0xc_df";
9223 int regno = REGNO (operands[opno]);
9224
9225 gcc_assert (FP_REGNO_P (regno));
9226
9227 retval[9] = '0' + (regno - FIRST_STACK_REG);
9228 return retval;
9229 }
9230 #endif
9231
9232 return opno ? "fstp\t%y1" : "fstp\t%y0";
9233 }
9234
9235
9236 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9237 should be used. UNORDERED_P is true when fucom should be used. */
9238
9239 const char *
9240 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9241 {
9242 int stack_top_dies;
9243 rtx cmp_op0, cmp_op1;
9244 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9245
9246 if (eflags_p)
9247 {
9248 cmp_op0 = operands[0];
9249 cmp_op1 = operands[1];
9250 }
9251 else
9252 {
9253 cmp_op0 = operands[1];
9254 cmp_op1 = operands[2];
9255 }
9256
9257 if (is_sse)
9258 {
9259 if (GET_MODE (operands[0]) == SFmode)
9260 if (unordered_p)
9261 return "ucomiss\t{%1, %0|%0, %1}";
9262 else
9263 return "comiss\t{%1, %0|%0, %1}";
9264 else
9265 if (unordered_p)
9266 return "ucomisd\t{%1, %0|%0, %1}";
9267 else
9268 return "comisd\t{%1, %0|%0, %1}";
9269 }
9270
9271 gcc_assert (STACK_TOP_P (cmp_op0));
9272
9273 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9274
9275 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9276 {
9277 if (stack_top_dies)
9278 {
9279 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9280 return output_387_ffreep (operands, 1);
9281 }
9282 else
9283 return "ftst\n\tfnstsw\t%0";
9284 }
9285
9286 if (STACK_REG_P (cmp_op1)
9287 && stack_top_dies
9288 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9289 && REGNO (cmp_op1) != FIRST_STACK_REG)
9290 {
9291 /* If both the top of the 387 stack dies, and the other operand
9292 is also a stack register that dies, then this must be a
9293 `fcompp' float compare */
9294
9295 if (eflags_p)
9296 {
9297 /* There is no double popping fcomi variant. Fortunately,
9298 eflags is immune from the fstp's cc clobbering. */
9299 if (unordered_p)
9300 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9301 else
9302 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9303 return output_387_ffreep (operands, 0);
9304 }
9305 else
9306 {
9307 if (unordered_p)
9308 return "fucompp\n\tfnstsw\t%0";
9309 else
9310 return "fcompp\n\tfnstsw\t%0";
9311 }
9312 }
9313 else
9314 {
9315 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9316
9317 static const char * const alt[16] =
9318 {
9319 "fcom%z2\t%y2\n\tfnstsw\t%0",
9320 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9321 "fucom%z2\t%y2\n\tfnstsw\t%0",
9322 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9323
9324 "ficom%z2\t%y2\n\tfnstsw\t%0",
9325 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9326 NULL,
9327 NULL,
9328
9329 "fcomi\t{%y1, %0|%0, %y1}",
9330 "fcomip\t{%y1, %0|%0, %y1}",
9331 "fucomi\t{%y1, %0|%0, %y1}",
9332 "fucomip\t{%y1, %0|%0, %y1}",
9333
9334 NULL,
9335 NULL,
9336 NULL,
9337 NULL
9338 };
9339
9340 int mask;
9341 const char *ret;
9342
9343 mask = eflags_p << 3;
9344 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9345 mask |= unordered_p << 1;
9346 mask |= stack_top_dies;
9347
9348 gcc_assert (mask < 16);
9349 ret = alt[mask];
9350 gcc_assert (ret);
9351
9352 return ret;
9353 }
9354 }
9355
9356 void
9357 ix86_output_addr_vec_elt (FILE *file, int value)
9358 {
9359 const char *directive = ASM_LONG;
9360
9361 #ifdef ASM_QUAD
9362 if (TARGET_64BIT)
9363 directive = ASM_QUAD;
9364 #else
9365 gcc_assert (!TARGET_64BIT);
9366 #endif
9367
9368 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9369 }
9370
9371 void
9372 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9373 {
9374 const char *directive = ASM_LONG;
9375
9376 #ifdef ASM_QUAD
9377 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9378 directive = ASM_QUAD;
9379 #else
9380 gcc_assert (!TARGET_64BIT);
9381 #endif
9382 if (TARGET_64BIT)
9383 fprintf (file, "%s%s%d-%s%d\n",
9384 directive, LPREFIX, value, LPREFIX, rel);
9385 else if (HAVE_AS_GOTOFF_IN_DATA)
9386 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9387 #if TARGET_MACHO
9388 else if (TARGET_MACHO)
9389 {
9390 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9391 machopic_output_function_base_name (file);
9392 fprintf(file, "\n");
9393 }
9394 #endif
9395 else
9396 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9397 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9398 }
9399 \f
9400 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9401 for the target. */
9402
9403 void
9404 ix86_expand_clear (rtx dest)
9405 {
9406 rtx tmp;
9407
9408 /* We play register width games, which are only valid after reload. */
9409 gcc_assert (reload_completed);
9410
9411 /* Avoid HImode and its attendant prefix byte. */
9412 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9413 dest = gen_rtx_REG (SImode, REGNO (dest));
9414
9415 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9416
9417 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9418 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9419 {
9420 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9421 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9422 }
9423
9424 emit_insn (tmp);
9425 }
9426
9427 /* X is an unchanging MEM. If it is a constant pool reference, return
9428 the constant pool rtx, else NULL. */
9429
9430 rtx
9431 maybe_get_pool_constant (rtx x)
9432 {
9433 x = ix86_delegitimize_address (XEXP (x, 0));
9434
9435 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9436 return get_pool_constant (x);
9437
9438 return NULL_RTX;
9439 }
9440
9441 void
9442 ix86_expand_move (enum machine_mode mode, rtx operands[])
9443 {
9444 int strict = (reload_in_progress || reload_completed);
9445 rtx op0, op1;
9446 enum tls_model model;
9447
9448 op0 = operands[0];
9449 op1 = operands[1];
9450
9451 if (GET_CODE (op1) == SYMBOL_REF)
9452 {
9453 model = SYMBOL_REF_TLS_MODEL (op1);
9454 if (model)
9455 {
9456 op1 = legitimize_tls_address (op1, model, true);
9457 op1 = force_operand (op1, op0);
9458 if (op1 == op0)
9459 return;
9460 }
9461 }
9462 else if (GET_CODE (op1) == CONST
9463 && GET_CODE (XEXP (op1, 0)) == PLUS
9464 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9465 {
9466 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9467 if (model)
9468 {
9469 rtx addend = XEXP (XEXP (op1, 0), 1);
9470 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9471 op1 = force_operand (op1, NULL);
9472 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9473 op0, 1, OPTAB_DIRECT);
9474 if (op1 == op0)
9475 return;
9476 }
9477 }
9478
9479 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9480 {
9481 if (TARGET_MACHO && !TARGET_64BIT)
9482 {
9483 #if TARGET_MACHO
9484 if (MACHOPIC_PURE)
9485 {
9486 rtx temp = ((reload_in_progress
9487 || ((op0 && REG_P (op0))
9488 && mode == Pmode))
9489 ? op0 : gen_reg_rtx (Pmode));
9490 op1 = machopic_indirect_data_reference (op1, temp);
9491 op1 = machopic_legitimize_pic_address (op1, mode,
9492 temp == op1 ? 0 : temp);
9493 }
9494 else if (MACHOPIC_INDIRECT)
9495 op1 = machopic_indirect_data_reference (op1, 0);
9496 if (op0 == op1)
9497 return;
9498 #endif
9499 }
9500 else
9501 {
9502 if (MEM_P (op0))
9503 op1 = force_reg (Pmode, op1);
9504 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9505 op1 = legitimize_pic_address (op1, op0);
9506 }
9507 }
9508 else
9509 {
9510 if (MEM_P (op0)
9511 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9512 || !push_operand (op0, mode))
9513 && MEM_P (op1))
9514 op1 = force_reg (mode, op1);
9515
9516 if (push_operand (op0, mode)
9517 && ! general_no_elim_operand (op1, mode))
9518 op1 = copy_to_mode_reg (mode, op1);
9519
9520 /* Force large constants in 64bit compilation into register
9521 to get them CSEed. */
9522 if (TARGET_64BIT && mode == DImode
9523 && immediate_operand (op1, mode)
9524 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9525 && !register_operand (op0, mode)
9526 && optimize && !reload_completed && !reload_in_progress)
9527 op1 = copy_to_mode_reg (mode, op1);
9528
9529 if (FLOAT_MODE_P (mode))
9530 {
9531 /* If we are loading a floating point constant to a register,
9532 force the value to memory now, since we'll get better code
9533 out the back end. */
9534
9535 if (strict)
9536 ;
9537 else if (GET_CODE (op1) == CONST_DOUBLE)
9538 {
9539 op1 = validize_mem (force_const_mem (mode, op1));
9540 if (!register_operand (op0, mode))
9541 {
9542 rtx temp = gen_reg_rtx (mode);
9543 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9544 emit_move_insn (op0, temp);
9545 return;
9546 }
9547 }
9548 }
9549 }
9550
9551 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9552 }
9553
9554 void
9555 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9556 {
9557 rtx op0 = operands[0], op1 = operands[1];
9558
9559 /* Force constants other than zero into memory. We do not know how
9560 the instructions used to build constants modify the upper 64 bits
9561 of the register, once we have that information we may be able
9562 to handle some of them more efficiently. */
9563 if ((reload_in_progress | reload_completed) == 0
9564 && register_operand (op0, mode)
9565 && CONSTANT_P (op1)
9566 && standard_sse_constant_p (op1) <= 0)
9567 op1 = validize_mem (force_const_mem (mode, op1));
9568
9569 /* Make operand1 a register if it isn't already. */
9570 if (!no_new_pseudos
9571 && !register_operand (op0, mode)
9572 && !register_operand (op1, mode))
9573 {
9574 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9575 return;
9576 }
9577
9578 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9579 }
9580
9581 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9582 straight to ix86_expand_vector_move. */
9583 /* Code generation for scalar reg-reg moves of single and double precision data:
9584 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9585 movaps reg, reg
9586 else
9587 movss reg, reg
9588 if (x86_sse_partial_reg_dependency == true)
9589 movapd reg, reg
9590 else
9591 movsd reg, reg
9592
9593 Code generation for scalar loads of double precision data:
9594 if (x86_sse_split_regs == true)
9595 movlpd mem, reg (gas syntax)
9596 else
9597 movsd mem, reg
9598
9599 Code generation for unaligned packed loads of single precision data
9600 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9601 if (x86_sse_unaligned_move_optimal)
9602 movups mem, reg
9603
9604 if (x86_sse_partial_reg_dependency == true)
9605 {
9606 xorps reg, reg
9607 movlps mem, reg
9608 movhps mem+8, reg
9609 }
9610 else
9611 {
9612 movlps mem, reg
9613 movhps mem+8, reg
9614 }
9615
9616 Code generation for unaligned packed loads of double precision data
9617 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9618 if (x86_sse_unaligned_move_optimal)
9619 movupd mem, reg
9620
9621 if (x86_sse_split_regs == true)
9622 {
9623 movlpd mem, reg
9624 movhpd mem+8, reg
9625 }
9626 else
9627 {
9628 movsd mem, reg
9629 movhpd mem+8, reg
9630 }
9631 */
9632
9633 void
9634 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9635 {
9636 rtx op0, op1, m;
9637
9638 op0 = operands[0];
9639 op1 = operands[1];
9640
9641 if (MEM_P (op1))
9642 {
9643 /* If we're optimizing for size, movups is the smallest. */
9644 if (optimize_size)
9645 {
9646 op0 = gen_lowpart (V4SFmode, op0);
9647 op1 = gen_lowpart (V4SFmode, op1);
9648 emit_insn (gen_sse_movups (op0, op1));
9649 return;
9650 }
9651
9652 /* ??? If we have typed data, then it would appear that using
9653 movdqu is the only way to get unaligned data loaded with
9654 integer type. */
9655 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9656 {
9657 op0 = gen_lowpart (V16QImode, op0);
9658 op1 = gen_lowpart (V16QImode, op1);
9659 emit_insn (gen_sse2_movdqu (op0, op1));
9660 return;
9661 }
9662
9663 if (TARGET_SSE2 && mode == V2DFmode)
9664 {
9665 rtx zero;
9666
9667 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9668 {
9669 op0 = gen_lowpart (V2DFmode, op0);
9670 op1 = gen_lowpart (V2DFmode, op1);
9671 emit_insn (gen_sse2_movupd (op0, op1));
9672 return;
9673 }
9674
9675 /* When SSE registers are split into halves, we can avoid
9676 writing to the top half twice. */
9677 if (TARGET_SSE_SPLIT_REGS)
9678 {
9679 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9680 zero = op0;
9681 }
9682 else
9683 {
9684 /* ??? Not sure about the best option for the Intel chips.
9685 The following would seem to satisfy; the register is
9686 entirely cleared, breaking the dependency chain. We
9687 then store to the upper half, with a dependency depth
9688 of one. A rumor has it that Intel recommends two movsd
9689 followed by an unpacklpd, but this is unconfirmed. And
9690 given that the dependency depth of the unpacklpd would
9691 still be one, I'm not sure why this would be better. */
9692 zero = CONST0_RTX (V2DFmode);
9693 }
9694
9695 m = adjust_address (op1, DFmode, 0);
9696 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9697 m = adjust_address (op1, DFmode, 8);
9698 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9699 }
9700 else
9701 {
9702 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9703 {
9704 op0 = gen_lowpart (V4SFmode, op0);
9705 op1 = gen_lowpart (V4SFmode, op1);
9706 emit_insn (gen_sse_movups (op0, op1));
9707 return;
9708 }
9709
9710 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9711 emit_move_insn (op0, CONST0_RTX (mode));
9712 else
9713 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9714
9715 if (mode != V4SFmode)
9716 op0 = gen_lowpart (V4SFmode, op0);
9717 m = adjust_address (op1, V2SFmode, 0);
9718 emit_insn (gen_sse_loadlps (op0, op0, m));
9719 m = adjust_address (op1, V2SFmode, 8);
9720 emit_insn (gen_sse_loadhps (op0, op0, m));
9721 }
9722 }
9723 else if (MEM_P (op0))
9724 {
9725 /* If we're optimizing for size, movups is the smallest. */
9726 if (optimize_size)
9727 {
9728 op0 = gen_lowpart (V4SFmode, op0);
9729 op1 = gen_lowpart (V4SFmode, op1);
9730 emit_insn (gen_sse_movups (op0, op1));
9731 return;
9732 }
9733
9734 /* ??? Similar to above, only less clear because of quote
9735 typeless stores unquote. */
9736 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9737 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9738 {
9739 op0 = gen_lowpart (V16QImode, op0);
9740 op1 = gen_lowpart (V16QImode, op1);
9741 emit_insn (gen_sse2_movdqu (op0, op1));
9742 return;
9743 }
9744
9745 if (TARGET_SSE2 && mode == V2DFmode)
9746 {
9747 m = adjust_address (op0, DFmode, 0);
9748 emit_insn (gen_sse2_storelpd (m, op1));
9749 m = adjust_address (op0, DFmode, 8);
9750 emit_insn (gen_sse2_storehpd (m, op1));
9751 }
9752 else
9753 {
9754 if (mode != V4SFmode)
9755 op1 = gen_lowpart (V4SFmode, op1);
9756 m = adjust_address (op0, V2SFmode, 0);
9757 emit_insn (gen_sse_storelps (m, op1));
9758 m = adjust_address (op0, V2SFmode, 8);
9759 emit_insn (gen_sse_storehps (m, op1));
9760 }
9761 }
9762 else
9763 gcc_unreachable ();
9764 }
9765
9766 /* Expand a push in MODE. This is some mode for which we do not support
9767 proper push instructions, at least from the registers that we expect
9768 the value to live in. */
9769
9770 void
9771 ix86_expand_push (enum machine_mode mode, rtx x)
9772 {
9773 rtx tmp;
9774
9775 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9776 GEN_INT (-GET_MODE_SIZE (mode)),
9777 stack_pointer_rtx, 1, OPTAB_DIRECT);
9778 if (tmp != stack_pointer_rtx)
9779 emit_move_insn (stack_pointer_rtx, tmp);
9780
9781 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9782 emit_move_insn (tmp, x);
9783 }
9784
9785 /* Helper function of ix86_fixup_binary_operands to canonicalize
9786 operand order. Returns true if the operands should be swapped. */
9787
9788 static bool
9789 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9790 rtx operands[])
9791 {
9792 rtx dst = operands[0];
9793 rtx src1 = operands[1];
9794 rtx src2 = operands[2];
9795
9796 /* If the operation is not commutative, we can't do anything. */
9797 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9798 return false;
9799
9800 /* Highest priority is that src1 should match dst. */
9801 if (rtx_equal_p (dst, src1))
9802 return false;
9803 if (rtx_equal_p (dst, src2))
9804 return true;
9805
9806 /* Next highest priority is that immediate constants come second. */
9807 if (immediate_operand (src2, mode))
9808 return false;
9809 if (immediate_operand (src1, mode))
9810 return true;
9811
9812 /* Lowest priority is that memory references should come second. */
9813 if (MEM_P (src2))
9814 return false;
9815 if (MEM_P (src1))
9816 return true;
9817
9818 return false;
9819 }
9820
9821
9822 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9823 destination to use for the operation. If different from the true
9824 destination in operands[0], a copy operation will be required. */
9825
9826 rtx
9827 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9828 rtx operands[])
9829 {
9830 rtx dst = operands[0];
9831 rtx src1 = operands[1];
9832 rtx src2 = operands[2];
9833
9834 /* Canonicalize operand order. */
9835 if (ix86_swap_binary_operands_p (code, mode, operands))
9836 {
9837 rtx temp = src1;
9838 src1 = src2;
9839 src2 = temp;
9840 }
9841
9842 /* Both source operands cannot be in memory. */
9843 if (MEM_P (src1) && MEM_P (src2))
9844 {
9845 /* Optimization: Only read from memory once. */
9846 if (rtx_equal_p (src1, src2))
9847 {
9848 src2 = force_reg (mode, src2);
9849 src1 = src2;
9850 }
9851 else
9852 src2 = force_reg (mode, src2);
9853 }
9854
9855 /* If the destination is memory, and we do not have matching source
9856 operands, do things in registers. */
9857 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9858 dst = gen_reg_rtx (mode);
9859
9860 /* Source 1 cannot be a constant. */
9861 if (CONSTANT_P (src1))
9862 src1 = force_reg (mode, src1);
9863
9864 /* Source 1 cannot be a non-matching memory. */
9865 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9866 src1 = force_reg (mode, src1);
9867
9868 operands[1] = src1;
9869 operands[2] = src2;
9870 return dst;
9871 }
9872
9873 /* Similarly, but assume that the destination has already been
9874 set up properly. */
9875
9876 void
9877 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9878 enum machine_mode mode, rtx operands[])
9879 {
9880 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9881 gcc_assert (dst == operands[0]);
9882 }
9883
9884 /* Attempt to expand a binary operator. Make the expansion closer to the
9885 actual machine, then just general_operand, which will allow 3 separate
9886 memory references (one output, two input) in a single insn. */
9887
9888 void
9889 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9890 rtx operands[])
9891 {
9892 rtx src1, src2, dst, op, clob;
9893
9894 dst = ix86_fixup_binary_operands (code, mode, operands);
9895 src1 = operands[1];
9896 src2 = operands[2];
9897
9898 /* Emit the instruction. */
9899
9900 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9901 if (reload_in_progress)
9902 {
9903 /* Reload doesn't know about the flags register, and doesn't know that
9904 it doesn't want to clobber it. We can only do this with PLUS. */
9905 gcc_assert (code == PLUS);
9906 emit_insn (op);
9907 }
9908 else
9909 {
9910 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9911 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9912 }
9913
9914 /* Fix up the destination if needed. */
9915 if (dst != operands[0])
9916 emit_move_insn (operands[0], dst);
9917 }
9918
9919 /* Return TRUE or FALSE depending on whether the binary operator meets the
9920 appropriate constraints. */
9921
9922 int
9923 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9924 rtx operands[3])
9925 {
9926 rtx dst = operands[0];
9927 rtx src1 = operands[1];
9928 rtx src2 = operands[2];
9929
9930 /* Both source operands cannot be in memory. */
9931 if (MEM_P (src1) && MEM_P (src2))
9932 return 0;
9933
9934 /* Canonicalize operand order for commutative operators. */
9935 if (ix86_swap_binary_operands_p (code, mode, operands))
9936 {
9937 rtx temp = src1;
9938 src1 = src2;
9939 src2 = temp;
9940 }
9941
9942 /* If the destination is memory, we must have a matching source operand. */
9943 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9944 return 0;
9945
9946 /* Source 1 cannot be a constant. */
9947 if (CONSTANT_P (src1))
9948 return 0;
9949
9950 /* Source 1 cannot be a non-matching memory. */
9951 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9952 return 0;
9953
9954 return 1;
9955 }
9956
9957 /* Attempt to expand a unary operator. Make the expansion closer to the
9958 actual machine, then just general_operand, which will allow 2 separate
9959 memory references (one output, one input) in a single insn. */
9960
9961 void
9962 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9963 rtx operands[])
9964 {
9965 int matching_memory;
9966 rtx src, dst, op, clob;
9967
9968 dst = operands[0];
9969 src = operands[1];
9970
9971 /* If the destination is memory, and we do not have matching source
9972 operands, do things in registers. */
9973 matching_memory = 0;
9974 if (MEM_P (dst))
9975 {
9976 if (rtx_equal_p (dst, src))
9977 matching_memory = 1;
9978 else
9979 dst = gen_reg_rtx (mode);
9980 }
9981
9982 /* When source operand is memory, destination must match. */
9983 if (MEM_P (src) && !matching_memory)
9984 src = force_reg (mode, src);
9985
9986 /* Emit the instruction. */
9987
9988 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9989 if (reload_in_progress || code == NOT)
9990 {
9991 /* Reload doesn't know about the flags register, and doesn't know that
9992 it doesn't want to clobber it. */
9993 gcc_assert (code == NOT);
9994 emit_insn (op);
9995 }
9996 else
9997 {
9998 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9999 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10000 }
10001
10002 /* Fix up the destination if needed. */
10003 if (dst != operands[0])
10004 emit_move_insn (operands[0], dst);
10005 }
10006
10007 /* Return TRUE or FALSE depending on whether the unary operator meets the
10008 appropriate constraints. */
10009
10010 int
10011 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10012 enum machine_mode mode ATTRIBUTE_UNUSED,
10013 rtx operands[2] ATTRIBUTE_UNUSED)
10014 {
10015 /* If one of operands is memory, source and destination must match. */
10016 if ((MEM_P (operands[0])
10017 || MEM_P (operands[1]))
10018 && ! rtx_equal_p (operands[0], operands[1]))
10019 return FALSE;
10020 return TRUE;
10021 }
10022
10023 /* Post-reload splitter for converting an SF or DFmode value in an
10024 SSE register into an unsigned SImode. */
10025
10026 void
10027 ix86_split_convert_uns_si_sse (rtx operands[])
10028 {
10029 enum machine_mode vecmode;
10030 rtx value, large, zero_or_two31, input, two31, x;
10031
10032 large = operands[1];
10033 zero_or_two31 = operands[2];
10034 input = operands[3];
10035 two31 = operands[4];
10036 vecmode = GET_MODE (large);
10037 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10038
10039 /* Load up the value into the low element. We must ensure that the other
10040 elements are valid floats -- zero is the easiest such value. */
10041 if (MEM_P (input))
10042 {
10043 if (vecmode == V4SFmode)
10044 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10045 else
10046 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10047 }
10048 else
10049 {
10050 input = gen_rtx_REG (vecmode, REGNO (input));
10051 emit_move_insn (value, CONST0_RTX (vecmode));
10052 if (vecmode == V4SFmode)
10053 emit_insn (gen_sse_movss (value, value, input));
10054 else
10055 emit_insn (gen_sse2_movsd (value, value, input));
10056 }
10057
10058 emit_move_insn (large, two31);
10059 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10060
10061 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10062 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10063
10064 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10065 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10066
10067 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10068 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10069
10070 large = gen_rtx_REG (V4SImode, REGNO (large));
10071 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10072
10073 x = gen_rtx_REG (V4SImode, REGNO (value));
10074 if (vecmode == V4SFmode)
10075 emit_insn (gen_sse2_cvttps2dq (x, value));
10076 else
10077 emit_insn (gen_sse2_cvttpd2dq (x, value));
10078 value = x;
10079
10080 emit_insn (gen_xorv4si3 (value, value, large));
10081 }
10082
10083 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10084 Expects the 64-bit DImode to be supplied in a pair of integral
10085 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10086 -mfpmath=sse, !optimize_size only. */
10087
10088 void
10089 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10090 {
10091 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10092 rtx int_xmm, fp_xmm;
10093 rtx biases, exponents;
10094 rtx x;
10095
10096 int_xmm = gen_reg_rtx (V4SImode);
10097 if (TARGET_INTER_UNIT_MOVES)
10098 emit_insn (gen_movdi_to_sse (int_xmm, input));
10099 else if (TARGET_SSE_SPLIT_REGS)
10100 {
10101 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10102 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10103 }
10104 else
10105 {
10106 x = gen_reg_rtx (V2DImode);
10107 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10108 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10109 }
10110
10111 x = gen_rtx_CONST_VECTOR (V4SImode,
10112 gen_rtvec (4, GEN_INT (0x43300000UL),
10113 GEN_INT (0x45300000UL),
10114 const0_rtx, const0_rtx));
10115 exponents = validize_mem (force_const_mem (V4SImode, x));
10116
10117 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10118 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10119
10120 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10121 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10122 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10123 (0x1.0p84 + double(fp_value_hi_xmm)).
10124 Note these exponents differ by 32. */
10125
10126 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10127
10128 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10129 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10130 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10131 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10132 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10133 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10134 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10135 biases = validize_mem (force_const_mem (V2DFmode, biases));
10136 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10137
10138 /* Add the upper and lower DFmode values together. */
10139 if (TARGET_SSE3)
10140 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10141 else
10142 {
10143 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10144 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10145 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10146 }
10147
10148 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10149 }
10150
10151 /* Convert an unsigned SImode value into a DFmode. Only currently used
10152 for SSE, but applicable anywhere. */
10153
10154 void
10155 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10156 {
10157 REAL_VALUE_TYPE TWO31r;
10158 rtx x, fp;
10159
10160 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10161 NULL, 1, OPTAB_DIRECT);
10162
10163 fp = gen_reg_rtx (DFmode);
10164 emit_insn (gen_floatsidf2 (fp, x));
10165
10166 real_ldexp (&TWO31r, &dconst1, 31);
10167 x = const_double_from_real_value (TWO31r, DFmode);
10168
10169 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10170 if (x != target)
10171 emit_move_insn (target, x);
10172 }
10173
10174 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10175 32-bit mode; otherwise we have a direct convert instruction. */
10176
10177 void
10178 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10179 {
10180 REAL_VALUE_TYPE TWO32r;
10181 rtx fp_lo, fp_hi, x;
10182
10183 fp_lo = gen_reg_rtx (DFmode);
10184 fp_hi = gen_reg_rtx (DFmode);
10185
10186 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10187
10188 real_ldexp (&TWO32r, &dconst1, 32);
10189 x = const_double_from_real_value (TWO32r, DFmode);
10190 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10191
10192 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10193
10194 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10195 0, OPTAB_DIRECT);
10196 if (x != target)
10197 emit_move_insn (target, x);
10198 }
10199
10200 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10201 For x86_32, -mfpmath=sse, !optimize_size only. */
10202 void
10203 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10204 {
10205 REAL_VALUE_TYPE ONE16r;
10206 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10207
10208 real_ldexp (&ONE16r, &dconst1, 16);
10209 x = const_double_from_real_value (ONE16r, SFmode);
10210 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10211 NULL, 0, OPTAB_DIRECT);
10212 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10213 NULL, 0, OPTAB_DIRECT);
10214 fp_hi = gen_reg_rtx (SFmode);
10215 fp_lo = gen_reg_rtx (SFmode);
10216 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10217 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10218 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10219 0, OPTAB_DIRECT);
10220 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10221 0, OPTAB_DIRECT);
10222 if (!rtx_equal_p (target, fp_hi))
10223 emit_move_insn (target, fp_hi);
10224 }
10225
10226 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10227 then replicate the value for all elements of the vector
10228 register. */
10229
10230 rtx
10231 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10232 {
10233 rtvec v;
10234 switch (mode)
10235 {
10236 case SFmode:
10237 if (vect)
10238 v = gen_rtvec (4, value, value, value, value);
10239 else
10240 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10241 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10242 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10243
10244 case DFmode:
10245 if (vect)
10246 v = gen_rtvec (2, value, value);
10247 else
10248 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10249 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10250
10251 default:
10252 gcc_unreachable ();
10253 }
10254 }
10255
10256 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10257 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10258 true, then replicate the mask for all elements of the vector register.
10259 If INVERT is true, then create a mask excluding the sign bit. */
10260
10261 rtx
10262 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10263 {
10264 enum machine_mode vec_mode;
10265 HOST_WIDE_INT hi, lo;
10266 int shift = 63;
10267 rtx v;
10268 rtx mask;
10269
10270 /* Find the sign bit, sign extended to 2*HWI. */
10271 if (mode == SFmode)
10272 lo = 0x80000000, hi = lo < 0;
10273 else if (HOST_BITS_PER_WIDE_INT >= 64)
10274 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10275 else
10276 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10277
10278 if (invert)
10279 lo = ~lo, hi = ~hi;
10280
10281 /* Force this value into the low part of a fp vector constant. */
10282 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10283 mask = gen_lowpart (mode, mask);
10284
10285 v = ix86_build_const_vector (mode, vect, mask);
10286 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10287 return force_reg (vec_mode, v);
10288 }
10289
10290 /* Generate code for floating point ABS or NEG. */
10291
10292 void
10293 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10294 rtx operands[])
10295 {
10296 rtx mask, set, use, clob, dst, src;
10297 bool matching_memory;
10298 bool use_sse = false;
10299 bool vector_mode = VECTOR_MODE_P (mode);
10300 enum machine_mode elt_mode = mode;
10301
10302 if (vector_mode)
10303 {
10304 elt_mode = GET_MODE_INNER (mode);
10305 use_sse = true;
10306 }
10307 else if (TARGET_SSE_MATH)
10308 use_sse = SSE_FLOAT_MODE_P (mode);
10309
10310 /* NEG and ABS performed with SSE use bitwise mask operations.
10311 Create the appropriate mask now. */
10312 if (use_sse)
10313 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10314 else
10315 mask = NULL_RTX;
10316
10317 dst = operands[0];
10318 src = operands[1];
10319
10320 /* If the destination is memory, and we don't have matching source
10321 operands or we're using the x87, do things in registers. */
10322 matching_memory = false;
10323 if (MEM_P (dst))
10324 {
10325 if (use_sse && rtx_equal_p (dst, src))
10326 matching_memory = true;
10327 else
10328 dst = gen_reg_rtx (mode);
10329 }
10330 if (MEM_P (src) && !matching_memory)
10331 src = force_reg (mode, src);
10332
10333 if (vector_mode)
10334 {
10335 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10336 set = gen_rtx_SET (VOIDmode, dst, set);
10337 emit_insn (set);
10338 }
10339 else
10340 {
10341 set = gen_rtx_fmt_e (code, mode, src);
10342 set = gen_rtx_SET (VOIDmode, dst, set);
10343 if (mask)
10344 {
10345 use = gen_rtx_USE (VOIDmode, mask);
10346 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10347 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10348 gen_rtvec (3, set, use, clob)));
10349 }
10350 else
10351 emit_insn (set);
10352 }
10353
10354 if (dst != operands[0])
10355 emit_move_insn (operands[0], dst);
10356 }
10357
10358 /* Expand a copysign operation. Special case operand 0 being a constant. */
10359
10360 void
10361 ix86_expand_copysign (rtx operands[])
10362 {
10363 enum machine_mode mode, vmode;
10364 rtx dest, op0, op1, mask, nmask;
10365
10366 dest = operands[0];
10367 op0 = operands[1];
10368 op1 = operands[2];
10369
10370 mode = GET_MODE (dest);
10371 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10372
10373 if (GET_CODE (op0) == CONST_DOUBLE)
10374 {
10375 rtvec v;
10376
10377 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10378 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10379
10380 if (op0 == CONST0_RTX (mode))
10381 op0 = CONST0_RTX (vmode);
10382 else
10383 {
10384 if (mode == SFmode)
10385 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10386 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10387 else
10388 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10389 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10390 }
10391
10392 mask = ix86_build_signbit_mask (mode, 0, 0);
10393
10394 if (mode == SFmode)
10395 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10396 else
10397 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10398 }
10399 else
10400 {
10401 nmask = ix86_build_signbit_mask (mode, 0, 1);
10402 mask = ix86_build_signbit_mask (mode, 0, 0);
10403
10404 if (mode == SFmode)
10405 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10406 else
10407 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10408 }
10409 }
10410
10411 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10412 be a constant, and so has already been expanded into a vector constant. */
10413
10414 void
10415 ix86_split_copysign_const (rtx operands[])
10416 {
10417 enum machine_mode mode, vmode;
10418 rtx dest, op0, op1, mask, x;
10419
10420 dest = operands[0];
10421 op0 = operands[1];
10422 op1 = operands[2];
10423 mask = operands[3];
10424
10425 mode = GET_MODE (dest);
10426 vmode = GET_MODE (mask);
10427
10428 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10429 x = gen_rtx_AND (vmode, dest, mask);
10430 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10431
10432 if (op0 != CONST0_RTX (vmode))
10433 {
10434 x = gen_rtx_IOR (vmode, dest, op0);
10435 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10436 }
10437 }
10438
10439 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10440 so we have to do two masks. */
10441
10442 void
10443 ix86_split_copysign_var (rtx operands[])
10444 {
10445 enum machine_mode mode, vmode;
10446 rtx dest, scratch, op0, op1, mask, nmask, x;
10447
10448 dest = operands[0];
10449 scratch = operands[1];
10450 op0 = operands[2];
10451 op1 = operands[3];
10452 nmask = operands[4];
10453 mask = operands[5];
10454
10455 mode = GET_MODE (dest);
10456 vmode = GET_MODE (mask);
10457
10458 if (rtx_equal_p (op0, op1))
10459 {
10460 /* Shouldn't happen often (it's useless, obviously), but when it does
10461 we'd generate incorrect code if we continue below. */
10462 emit_move_insn (dest, op0);
10463 return;
10464 }
10465
10466 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10467 {
10468 gcc_assert (REGNO (op1) == REGNO (scratch));
10469
10470 x = gen_rtx_AND (vmode, scratch, mask);
10471 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10472
10473 dest = mask;
10474 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10475 x = gen_rtx_NOT (vmode, dest);
10476 x = gen_rtx_AND (vmode, x, op0);
10477 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10478 }
10479 else
10480 {
10481 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10482 {
10483 x = gen_rtx_AND (vmode, scratch, mask);
10484 }
10485 else /* alternative 2,4 */
10486 {
10487 gcc_assert (REGNO (mask) == REGNO (scratch));
10488 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10489 x = gen_rtx_AND (vmode, scratch, op1);
10490 }
10491 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10492
10493 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10494 {
10495 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10496 x = gen_rtx_AND (vmode, dest, nmask);
10497 }
10498 else /* alternative 3,4 */
10499 {
10500 gcc_assert (REGNO (nmask) == REGNO (dest));
10501 dest = nmask;
10502 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10503 x = gen_rtx_AND (vmode, dest, op0);
10504 }
10505 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10506 }
10507
10508 x = gen_rtx_IOR (vmode, dest, scratch);
10509 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10510 }
10511
10512 /* Return TRUE or FALSE depending on whether the first SET in INSN
10513 has source and destination with matching CC modes, and that the
10514 CC mode is at least as constrained as REQ_MODE. */
10515
10516 int
10517 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10518 {
10519 rtx set;
10520 enum machine_mode set_mode;
10521
10522 set = PATTERN (insn);
10523 if (GET_CODE (set) == PARALLEL)
10524 set = XVECEXP (set, 0, 0);
10525 gcc_assert (GET_CODE (set) == SET);
10526 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10527
10528 set_mode = GET_MODE (SET_DEST (set));
10529 switch (set_mode)
10530 {
10531 case CCNOmode:
10532 if (req_mode != CCNOmode
10533 && (req_mode != CCmode
10534 || XEXP (SET_SRC (set), 1) != const0_rtx))
10535 return 0;
10536 break;
10537 case CCmode:
10538 if (req_mode == CCGCmode)
10539 return 0;
10540 /* FALLTHRU */
10541 case CCGCmode:
10542 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10543 return 0;
10544 /* FALLTHRU */
10545 case CCGOCmode:
10546 if (req_mode == CCZmode)
10547 return 0;
10548 /* FALLTHRU */
10549 case CCZmode:
10550 break;
10551
10552 default:
10553 gcc_unreachable ();
10554 }
10555
10556 return (GET_MODE (SET_SRC (set)) == set_mode);
10557 }
10558
10559 /* Generate insn patterns to do an integer compare of OPERANDS. */
10560
10561 static rtx
10562 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10563 {
10564 enum machine_mode cmpmode;
10565 rtx tmp, flags;
10566
10567 cmpmode = SELECT_CC_MODE (code, op0, op1);
10568 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10569
10570 /* This is very simple, but making the interface the same as in the
10571 FP case makes the rest of the code easier. */
10572 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10573 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10574
10575 /* Return the test that should be put into the flags user, i.e.
10576 the bcc, scc, or cmov instruction. */
10577 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10578 }
10579
10580 /* Figure out whether to use ordered or unordered fp comparisons.
10581 Return the appropriate mode to use. */
10582
10583 enum machine_mode
10584 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10585 {
10586 /* ??? In order to make all comparisons reversible, we do all comparisons
10587 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10588 all forms trapping and nontrapping comparisons, we can make inequality
10589 comparisons trapping again, since it results in better code when using
10590 FCOM based compares. */
10591 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10592 }
10593
10594 enum machine_mode
10595 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10596 {
10597 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10598 return ix86_fp_compare_mode (code);
10599 switch (code)
10600 {
10601 /* Only zero flag is needed. */
10602 case EQ: /* ZF=0 */
10603 case NE: /* ZF!=0 */
10604 return CCZmode;
10605 /* Codes needing carry flag. */
10606 case GEU: /* CF=0 */
10607 case GTU: /* CF=0 & ZF=0 */
10608 case LTU: /* CF=1 */
10609 case LEU: /* CF=1 | ZF=1 */
10610 return CCmode;
10611 /* Codes possibly doable only with sign flag when
10612 comparing against zero. */
10613 case GE: /* SF=OF or SF=0 */
10614 case LT: /* SF<>OF or SF=1 */
10615 if (op1 == const0_rtx)
10616 return CCGOCmode;
10617 else
10618 /* For other cases Carry flag is not required. */
10619 return CCGCmode;
10620 /* Codes doable only with sign flag when comparing
10621 against zero, but we miss jump instruction for it
10622 so we need to use relational tests against overflow
10623 that thus needs to be zero. */
10624 case GT: /* ZF=0 & SF=OF */
10625 case LE: /* ZF=1 | SF<>OF */
10626 if (op1 == const0_rtx)
10627 return CCNOmode;
10628 else
10629 return CCGCmode;
10630 /* strcmp pattern do (use flags) and combine may ask us for proper
10631 mode. */
10632 case USE:
10633 return CCmode;
10634 default:
10635 gcc_unreachable ();
10636 }
10637 }
10638
10639 /* Return the fixed registers used for condition codes. */
10640
10641 static bool
10642 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10643 {
10644 *p1 = FLAGS_REG;
10645 *p2 = FPSR_REG;
10646 return true;
10647 }
10648
10649 /* If two condition code modes are compatible, return a condition code
10650 mode which is compatible with both. Otherwise, return
10651 VOIDmode. */
10652
10653 static enum machine_mode
10654 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10655 {
10656 if (m1 == m2)
10657 return m1;
10658
10659 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10660 return VOIDmode;
10661
10662 if ((m1 == CCGCmode && m2 == CCGOCmode)
10663 || (m1 == CCGOCmode && m2 == CCGCmode))
10664 return CCGCmode;
10665
10666 switch (m1)
10667 {
10668 default:
10669 gcc_unreachable ();
10670
10671 case CCmode:
10672 case CCGCmode:
10673 case CCGOCmode:
10674 case CCNOmode:
10675 case CCZmode:
10676 switch (m2)
10677 {
10678 default:
10679 return VOIDmode;
10680
10681 case CCmode:
10682 case CCGCmode:
10683 case CCGOCmode:
10684 case CCNOmode:
10685 case CCZmode:
10686 return CCmode;
10687 }
10688
10689 case CCFPmode:
10690 case CCFPUmode:
10691 /* These are only compatible with themselves, which we already
10692 checked above. */
10693 return VOIDmode;
10694 }
10695 }
10696
10697 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10698
10699 int
10700 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10701 {
10702 enum rtx_code swapped_code = swap_condition (code);
10703 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10704 || (ix86_fp_comparison_cost (swapped_code)
10705 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10706 }
10707
10708 /* Swap, force into registers, or otherwise massage the two operands
10709 to a fp comparison. The operands are updated in place; the new
10710 comparison code is returned. */
10711
10712 static enum rtx_code
10713 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10714 {
10715 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10716 rtx op0 = *pop0, op1 = *pop1;
10717 enum machine_mode op_mode = GET_MODE (op0);
10718 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10719
10720 /* All of the unordered compare instructions only work on registers.
10721 The same is true of the fcomi compare instructions. The XFmode
10722 compare instructions require registers except when comparing
10723 against zero or when converting operand 1 from fixed point to
10724 floating point. */
10725
10726 if (!is_sse
10727 && (fpcmp_mode == CCFPUmode
10728 || (op_mode == XFmode
10729 && ! (standard_80387_constant_p (op0) == 1
10730 || standard_80387_constant_p (op1) == 1)
10731 && GET_CODE (op1) != FLOAT)
10732 || ix86_use_fcomi_compare (code)))
10733 {
10734 op0 = force_reg (op_mode, op0);
10735 op1 = force_reg (op_mode, op1);
10736 }
10737 else
10738 {
10739 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10740 things around if they appear profitable, otherwise force op0
10741 into a register. */
10742
10743 if (standard_80387_constant_p (op0) == 0
10744 || (MEM_P (op0)
10745 && ! (standard_80387_constant_p (op1) == 0
10746 || MEM_P (op1))))
10747 {
10748 rtx tmp;
10749 tmp = op0, op0 = op1, op1 = tmp;
10750 code = swap_condition (code);
10751 }
10752
10753 if (!REG_P (op0))
10754 op0 = force_reg (op_mode, op0);
10755
10756 if (CONSTANT_P (op1))
10757 {
10758 int tmp = standard_80387_constant_p (op1);
10759 if (tmp == 0)
10760 op1 = validize_mem (force_const_mem (op_mode, op1));
10761 else if (tmp == 1)
10762 {
10763 if (TARGET_CMOVE)
10764 op1 = force_reg (op_mode, op1);
10765 }
10766 else
10767 op1 = force_reg (op_mode, op1);
10768 }
10769 }
10770
10771 /* Try to rearrange the comparison to make it cheaper. */
10772 if (ix86_fp_comparison_cost (code)
10773 > ix86_fp_comparison_cost (swap_condition (code))
10774 && (REG_P (op1) || !no_new_pseudos))
10775 {
10776 rtx tmp;
10777 tmp = op0, op0 = op1, op1 = tmp;
10778 code = swap_condition (code);
10779 if (!REG_P (op0))
10780 op0 = force_reg (op_mode, op0);
10781 }
10782
10783 *pop0 = op0;
10784 *pop1 = op1;
10785 return code;
10786 }
10787
10788 /* Convert comparison codes we use to represent FP comparison to integer
10789 code that will result in proper branch. Return UNKNOWN if no such code
10790 is available. */
10791
10792 enum rtx_code
10793 ix86_fp_compare_code_to_integer (enum rtx_code code)
10794 {
10795 switch (code)
10796 {
10797 case GT:
10798 return GTU;
10799 case GE:
10800 return GEU;
10801 case ORDERED:
10802 case UNORDERED:
10803 return code;
10804 break;
10805 case UNEQ:
10806 return EQ;
10807 break;
10808 case UNLT:
10809 return LTU;
10810 break;
10811 case UNLE:
10812 return LEU;
10813 break;
10814 case LTGT:
10815 return NE;
10816 break;
10817 default:
10818 return UNKNOWN;
10819 }
10820 }
10821
10822 /* Split comparison code CODE into comparisons we can do using branch
10823 instructions. BYPASS_CODE is comparison code for branch that will
10824 branch around FIRST_CODE and SECOND_CODE. If some of branches
10825 is not required, set value to UNKNOWN.
10826 We never require more than two branches. */
10827
10828 void
10829 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10830 enum rtx_code *first_code,
10831 enum rtx_code *second_code)
10832 {
10833 *first_code = code;
10834 *bypass_code = UNKNOWN;
10835 *second_code = UNKNOWN;
10836
10837 /* The fcomi comparison sets flags as follows:
10838
10839 cmp ZF PF CF
10840 > 0 0 0
10841 < 0 0 1
10842 = 1 0 0
10843 un 1 1 1 */
10844
10845 switch (code)
10846 {
10847 case GT: /* GTU - CF=0 & ZF=0 */
10848 case GE: /* GEU - CF=0 */
10849 case ORDERED: /* PF=0 */
10850 case UNORDERED: /* PF=1 */
10851 case UNEQ: /* EQ - ZF=1 */
10852 case UNLT: /* LTU - CF=1 */
10853 case UNLE: /* LEU - CF=1 | ZF=1 */
10854 case LTGT: /* EQ - ZF=0 */
10855 break;
10856 case LT: /* LTU - CF=1 - fails on unordered */
10857 *first_code = UNLT;
10858 *bypass_code = UNORDERED;
10859 break;
10860 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10861 *first_code = UNLE;
10862 *bypass_code = UNORDERED;
10863 break;
10864 case EQ: /* EQ - ZF=1 - fails on unordered */
10865 *first_code = UNEQ;
10866 *bypass_code = UNORDERED;
10867 break;
10868 case NE: /* NE - ZF=0 - fails on unordered */
10869 *first_code = LTGT;
10870 *second_code = UNORDERED;
10871 break;
10872 case UNGE: /* GEU - CF=0 - fails on unordered */
10873 *first_code = GE;
10874 *second_code = UNORDERED;
10875 break;
10876 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10877 *first_code = GT;
10878 *second_code = UNORDERED;
10879 break;
10880 default:
10881 gcc_unreachable ();
10882 }
10883 if (!TARGET_IEEE_FP)
10884 {
10885 *second_code = UNKNOWN;
10886 *bypass_code = UNKNOWN;
10887 }
10888 }
10889
10890 /* Return cost of comparison done fcom + arithmetics operations on AX.
10891 All following functions do use number of instructions as a cost metrics.
10892 In future this should be tweaked to compute bytes for optimize_size and
10893 take into account performance of various instructions on various CPUs. */
10894 static int
10895 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10896 {
10897 if (!TARGET_IEEE_FP)
10898 return 4;
10899 /* The cost of code output by ix86_expand_fp_compare. */
10900 switch (code)
10901 {
10902 case UNLE:
10903 case UNLT:
10904 case LTGT:
10905 case GT:
10906 case GE:
10907 case UNORDERED:
10908 case ORDERED:
10909 case UNEQ:
10910 return 4;
10911 break;
10912 case LT:
10913 case NE:
10914 case EQ:
10915 case UNGE:
10916 return 5;
10917 break;
10918 case LE:
10919 case UNGT:
10920 return 6;
10921 break;
10922 default:
10923 gcc_unreachable ();
10924 }
10925 }
10926
10927 /* Return cost of comparison done using fcomi operation.
10928 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10929 static int
10930 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10931 {
10932 enum rtx_code bypass_code, first_code, second_code;
10933 /* Return arbitrarily high cost when instruction is not supported - this
10934 prevents gcc from using it. */
10935 if (!TARGET_CMOVE)
10936 return 1024;
10937 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10938 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10939 }
10940
10941 /* Return cost of comparison done using sahf operation.
10942 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10943 static int
10944 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10945 {
10946 enum rtx_code bypass_code, first_code, second_code;
10947 /* Return arbitrarily high cost when instruction is not preferred - this
10948 avoids gcc from using it. */
10949 if (!TARGET_USE_SAHF && !optimize_size)
10950 return 1024;
10951 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10952 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10953 }
10954
10955 /* Compute cost of the comparison done using any method.
10956 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10957 static int
10958 ix86_fp_comparison_cost (enum rtx_code code)
10959 {
10960 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10961 int min;
10962
10963 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10964 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10965
10966 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10967 if (min > sahf_cost)
10968 min = sahf_cost;
10969 if (min > fcomi_cost)
10970 min = fcomi_cost;
10971 return min;
10972 }
10973
10974 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10975
10976 static rtx
10977 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10978 rtx *second_test, rtx *bypass_test)
10979 {
10980 enum machine_mode fpcmp_mode, intcmp_mode;
10981 rtx tmp, tmp2;
10982 int cost = ix86_fp_comparison_cost (code);
10983 enum rtx_code bypass_code, first_code, second_code;
10984
10985 fpcmp_mode = ix86_fp_compare_mode (code);
10986 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10987
10988 if (second_test)
10989 *second_test = NULL_RTX;
10990 if (bypass_test)
10991 *bypass_test = NULL_RTX;
10992
10993 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10994
10995 /* Do fcomi/sahf based test when profitable. */
10996 if ((bypass_code == UNKNOWN || bypass_test)
10997 && (second_code == UNKNOWN || second_test)
10998 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10999 {
11000 if (TARGET_CMOVE)
11001 {
11002 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11003 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11004 tmp);
11005 emit_insn (tmp);
11006 }
11007 else
11008 {
11009 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11010 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11011 if (!scratch)
11012 scratch = gen_reg_rtx (HImode);
11013 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11014 emit_insn (gen_x86_sahf_1 (scratch));
11015 }
11016
11017 /* The FP codes work out to act like unsigned. */
11018 intcmp_mode = fpcmp_mode;
11019 code = first_code;
11020 if (bypass_code != UNKNOWN)
11021 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11022 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11023 const0_rtx);
11024 if (second_code != UNKNOWN)
11025 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11026 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11027 const0_rtx);
11028 }
11029 else
11030 {
11031 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11032 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11033 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11034 if (!scratch)
11035 scratch = gen_reg_rtx (HImode);
11036 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11037
11038 /* In the unordered case, we have to check C2 for NaN's, which
11039 doesn't happen to work out to anything nice combination-wise.
11040 So do some bit twiddling on the value we've got in AH to come
11041 up with an appropriate set of condition codes. */
11042
11043 intcmp_mode = CCNOmode;
11044 switch (code)
11045 {
11046 case GT:
11047 case UNGT:
11048 if (code == GT || !TARGET_IEEE_FP)
11049 {
11050 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11051 code = EQ;
11052 }
11053 else
11054 {
11055 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11056 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11057 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11058 intcmp_mode = CCmode;
11059 code = GEU;
11060 }
11061 break;
11062 case LT:
11063 case UNLT:
11064 if (code == LT && TARGET_IEEE_FP)
11065 {
11066 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11067 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11068 intcmp_mode = CCmode;
11069 code = EQ;
11070 }
11071 else
11072 {
11073 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11074 code = NE;
11075 }
11076 break;
11077 case GE:
11078 case UNGE:
11079 if (code == GE || !TARGET_IEEE_FP)
11080 {
11081 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11082 code = EQ;
11083 }
11084 else
11085 {
11086 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11087 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11088 GEN_INT (0x01)));
11089 code = NE;
11090 }
11091 break;
11092 case LE:
11093 case UNLE:
11094 if (code == LE && TARGET_IEEE_FP)
11095 {
11096 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11097 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11098 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11099 intcmp_mode = CCmode;
11100 code = LTU;
11101 }
11102 else
11103 {
11104 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11105 code = NE;
11106 }
11107 break;
11108 case EQ:
11109 case UNEQ:
11110 if (code == EQ && TARGET_IEEE_FP)
11111 {
11112 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11113 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11114 intcmp_mode = CCmode;
11115 code = EQ;
11116 }
11117 else
11118 {
11119 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11120 code = NE;
11121 break;
11122 }
11123 break;
11124 case NE:
11125 case LTGT:
11126 if (code == NE && TARGET_IEEE_FP)
11127 {
11128 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11129 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11130 GEN_INT (0x40)));
11131 code = NE;
11132 }
11133 else
11134 {
11135 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11136 code = EQ;
11137 }
11138 break;
11139
11140 case UNORDERED:
11141 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11142 code = NE;
11143 break;
11144 case ORDERED:
11145 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11146 code = EQ;
11147 break;
11148
11149 default:
11150 gcc_unreachable ();
11151 }
11152 }
11153
11154 /* Return the test that should be put into the flags user, i.e.
11155 the bcc, scc, or cmov instruction. */
11156 return gen_rtx_fmt_ee (code, VOIDmode,
11157 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11158 const0_rtx);
11159 }
11160
11161 rtx
11162 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11163 {
11164 rtx op0, op1, ret;
11165 op0 = ix86_compare_op0;
11166 op1 = ix86_compare_op1;
11167
11168 if (second_test)
11169 *second_test = NULL_RTX;
11170 if (bypass_test)
11171 *bypass_test = NULL_RTX;
11172
11173 if (ix86_compare_emitted)
11174 {
11175 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11176 ix86_compare_emitted = NULL_RTX;
11177 }
11178 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11179 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11180 second_test, bypass_test);
11181 else
11182 ret = ix86_expand_int_compare (code, op0, op1);
11183
11184 return ret;
11185 }
11186
11187 /* Return true if the CODE will result in nontrivial jump sequence. */
11188 bool
11189 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11190 {
11191 enum rtx_code bypass_code, first_code, second_code;
11192 if (!TARGET_CMOVE)
11193 return true;
11194 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11195 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11196 }
11197
11198 void
11199 ix86_expand_branch (enum rtx_code code, rtx label)
11200 {
11201 rtx tmp;
11202
11203 /* If we have emitted a compare insn, go straight to simple.
11204 ix86_expand_compare won't emit anything if ix86_compare_emitted
11205 is non NULL. */
11206 if (ix86_compare_emitted)
11207 goto simple;
11208
11209 switch (GET_MODE (ix86_compare_op0))
11210 {
11211 case QImode:
11212 case HImode:
11213 case SImode:
11214 simple:
11215 tmp = ix86_expand_compare (code, NULL, NULL);
11216 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11217 gen_rtx_LABEL_REF (VOIDmode, label),
11218 pc_rtx);
11219 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11220 return;
11221
11222 case SFmode:
11223 case DFmode:
11224 case XFmode:
11225 {
11226 rtvec vec;
11227 int use_fcomi;
11228 enum rtx_code bypass_code, first_code, second_code;
11229
11230 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11231 &ix86_compare_op1);
11232
11233 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11234
11235 /* Check whether we will use the natural sequence with one jump. If
11236 so, we can expand jump early. Otherwise delay expansion by
11237 creating compound insn to not confuse optimizers. */
11238 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11239 && TARGET_CMOVE)
11240 {
11241 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11242 gen_rtx_LABEL_REF (VOIDmode, label),
11243 pc_rtx, NULL_RTX, NULL_RTX);
11244 }
11245 else
11246 {
11247 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11248 ix86_compare_op0, ix86_compare_op1);
11249 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11250 gen_rtx_LABEL_REF (VOIDmode, label),
11251 pc_rtx);
11252 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11253
11254 use_fcomi = ix86_use_fcomi_compare (code);
11255 vec = rtvec_alloc (3 + !use_fcomi);
11256 RTVEC_ELT (vec, 0) = tmp;
11257 RTVEC_ELT (vec, 1)
11258 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11259 RTVEC_ELT (vec, 2)
11260 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11261 if (! use_fcomi)
11262 RTVEC_ELT (vec, 3)
11263 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11264
11265 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11266 }
11267 return;
11268 }
11269
11270 case DImode:
11271 if (TARGET_64BIT)
11272 goto simple;
11273 case TImode:
11274 /* Expand DImode branch into multiple compare+branch. */
11275 {
11276 rtx lo[2], hi[2], label2;
11277 enum rtx_code code1, code2, code3;
11278 enum machine_mode submode;
11279
11280 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11281 {
11282 tmp = ix86_compare_op0;
11283 ix86_compare_op0 = ix86_compare_op1;
11284 ix86_compare_op1 = tmp;
11285 code = swap_condition (code);
11286 }
11287 if (GET_MODE (ix86_compare_op0) == DImode)
11288 {
11289 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11290 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11291 submode = SImode;
11292 }
11293 else
11294 {
11295 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11296 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11297 submode = DImode;
11298 }
11299
11300 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11301 avoid two branches. This costs one extra insn, so disable when
11302 optimizing for size. */
11303
11304 if ((code == EQ || code == NE)
11305 && (!optimize_size
11306 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11307 {
11308 rtx xor0, xor1;
11309
11310 xor1 = hi[0];
11311 if (hi[1] != const0_rtx)
11312 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11313 NULL_RTX, 0, OPTAB_WIDEN);
11314
11315 xor0 = lo[0];
11316 if (lo[1] != const0_rtx)
11317 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11318 NULL_RTX, 0, OPTAB_WIDEN);
11319
11320 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11321 NULL_RTX, 0, OPTAB_WIDEN);
11322
11323 ix86_compare_op0 = tmp;
11324 ix86_compare_op1 = const0_rtx;
11325 ix86_expand_branch (code, label);
11326 return;
11327 }
11328
11329 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11330 op1 is a constant and the low word is zero, then we can just
11331 examine the high word. */
11332
11333 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11334 switch (code)
11335 {
11336 case LT: case LTU: case GE: case GEU:
11337 ix86_compare_op0 = hi[0];
11338 ix86_compare_op1 = hi[1];
11339 ix86_expand_branch (code, label);
11340 return;
11341 default:
11342 break;
11343 }
11344
11345 /* Otherwise, we need two or three jumps. */
11346
11347 label2 = gen_label_rtx ();
11348
11349 code1 = code;
11350 code2 = swap_condition (code);
11351 code3 = unsigned_condition (code);
11352
11353 switch (code)
11354 {
11355 case LT: case GT: case LTU: case GTU:
11356 break;
11357
11358 case LE: code1 = LT; code2 = GT; break;
11359 case GE: code1 = GT; code2 = LT; break;
11360 case LEU: code1 = LTU; code2 = GTU; break;
11361 case GEU: code1 = GTU; code2 = LTU; break;
11362
11363 case EQ: code1 = UNKNOWN; code2 = NE; break;
11364 case NE: code2 = UNKNOWN; break;
11365
11366 default:
11367 gcc_unreachable ();
11368 }
11369
11370 /*
11371 * a < b =>
11372 * if (hi(a) < hi(b)) goto true;
11373 * if (hi(a) > hi(b)) goto false;
11374 * if (lo(a) < lo(b)) goto true;
11375 * false:
11376 */
11377
11378 ix86_compare_op0 = hi[0];
11379 ix86_compare_op1 = hi[1];
11380
11381 if (code1 != UNKNOWN)
11382 ix86_expand_branch (code1, label);
11383 if (code2 != UNKNOWN)
11384 ix86_expand_branch (code2, label2);
11385
11386 ix86_compare_op0 = lo[0];
11387 ix86_compare_op1 = lo[1];
11388 ix86_expand_branch (code3, label);
11389
11390 if (code2 != UNKNOWN)
11391 emit_label (label2);
11392 return;
11393 }
11394
11395 default:
11396 gcc_unreachable ();
11397 }
11398 }
11399
11400 /* Split branch based on floating point condition. */
11401 void
11402 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11403 rtx target1, rtx target2, rtx tmp, rtx pushed)
11404 {
11405 rtx second, bypass;
11406 rtx label = NULL_RTX;
11407 rtx condition;
11408 int bypass_probability = -1, second_probability = -1, probability = -1;
11409 rtx i;
11410
11411 if (target2 != pc_rtx)
11412 {
11413 rtx tmp = target2;
11414 code = reverse_condition_maybe_unordered (code);
11415 target2 = target1;
11416 target1 = tmp;
11417 }
11418
11419 condition = ix86_expand_fp_compare (code, op1, op2,
11420 tmp, &second, &bypass);
11421
11422 /* Remove pushed operand from stack. */
11423 if (pushed)
11424 ix86_free_from_memory (GET_MODE (pushed));
11425
11426 if (split_branch_probability >= 0)
11427 {
11428 /* Distribute the probabilities across the jumps.
11429 Assume the BYPASS and SECOND to be always test
11430 for UNORDERED. */
11431 probability = split_branch_probability;
11432
11433 /* Value of 1 is low enough to make no need for probability
11434 to be updated. Later we may run some experiments and see
11435 if unordered values are more frequent in practice. */
11436 if (bypass)
11437 bypass_probability = 1;
11438 if (second)
11439 second_probability = 1;
11440 }
11441 if (bypass != NULL_RTX)
11442 {
11443 label = gen_label_rtx ();
11444 i = emit_jump_insn (gen_rtx_SET
11445 (VOIDmode, pc_rtx,
11446 gen_rtx_IF_THEN_ELSE (VOIDmode,
11447 bypass,
11448 gen_rtx_LABEL_REF (VOIDmode,
11449 label),
11450 pc_rtx)));
11451 if (bypass_probability >= 0)
11452 REG_NOTES (i)
11453 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11454 GEN_INT (bypass_probability),
11455 REG_NOTES (i));
11456 }
11457 i = emit_jump_insn (gen_rtx_SET
11458 (VOIDmode, pc_rtx,
11459 gen_rtx_IF_THEN_ELSE (VOIDmode,
11460 condition, target1, target2)));
11461 if (probability >= 0)
11462 REG_NOTES (i)
11463 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11464 GEN_INT (probability),
11465 REG_NOTES (i));
11466 if (second != NULL_RTX)
11467 {
11468 i = emit_jump_insn (gen_rtx_SET
11469 (VOIDmode, pc_rtx,
11470 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11471 target2)));
11472 if (second_probability >= 0)
11473 REG_NOTES (i)
11474 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11475 GEN_INT (second_probability),
11476 REG_NOTES (i));
11477 }
11478 if (label != NULL_RTX)
11479 emit_label (label);
11480 }
11481
11482 int
11483 ix86_expand_setcc (enum rtx_code code, rtx dest)
11484 {
11485 rtx ret, tmp, tmpreg, equiv;
11486 rtx second_test, bypass_test;
11487
11488 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11489 return 0; /* FAIL */
11490
11491 gcc_assert (GET_MODE (dest) == QImode);
11492
11493 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11494 PUT_MODE (ret, QImode);
11495
11496 tmp = dest;
11497 tmpreg = dest;
11498
11499 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11500 if (bypass_test || second_test)
11501 {
11502 rtx test = second_test;
11503 int bypass = 0;
11504 rtx tmp2 = gen_reg_rtx (QImode);
11505 if (bypass_test)
11506 {
11507 gcc_assert (!second_test);
11508 test = bypass_test;
11509 bypass = 1;
11510 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11511 }
11512 PUT_MODE (test, QImode);
11513 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11514
11515 if (bypass)
11516 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11517 else
11518 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11519 }
11520
11521 /* Attach a REG_EQUAL note describing the comparison result. */
11522 if (ix86_compare_op0 && ix86_compare_op1)
11523 {
11524 equiv = simplify_gen_relational (code, QImode,
11525 GET_MODE (ix86_compare_op0),
11526 ix86_compare_op0, ix86_compare_op1);
11527 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11528 }
11529
11530 return 1; /* DONE */
11531 }
11532
11533 /* Expand comparison setting or clearing carry flag. Return true when
11534 successful and set pop for the operation. */
11535 static bool
11536 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11537 {
11538 enum machine_mode mode =
11539 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11540
11541 /* Do not handle DImode compares that go through special path. Also we can't
11542 deal with FP compares yet. This is possible to add. */
11543 if (mode == (TARGET_64BIT ? TImode : DImode))
11544 return false;
11545 if (FLOAT_MODE_P (mode))
11546 {
11547 rtx second_test = NULL, bypass_test = NULL;
11548 rtx compare_op, compare_seq;
11549
11550 /* Shortcut: following common codes never translate into carry flag compares. */
11551 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11552 || code == ORDERED || code == UNORDERED)
11553 return false;
11554
11555 /* These comparisons require zero flag; swap operands so they won't. */
11556 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11557 && !TARGET_IEEE_FP)
11558 {
11559 rtx tmp = op0;
11560 op0 = op1;
11561 op1 = tmp;
11562 code = swap_condition (code);
11563 }
11564
11565 /* Try to expand the comparison and verify that we end up with carry flag
11566 based comparison. This is fails to be true only when we decide to expand
11567 comparison using arithmetic that is not too common scenario. */
11568 start_sequence ();
11569 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11570 &second_test, &bypass_test);
11571 compare_seq = get_insns ();
11572 end_sequence ();
11573
11574 if (second_test || bypass_test)
11575 return false;
11576 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11577 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11578 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11579 else
11580 code = GET_CODE (compare_op);
11581 if (code != LTU && code != GEU)
11582 return false;
11583 emit_insn (compare_seq);
11584 *pop = compare_op;
11585 return true;
11586 }
11587 if (!INTEGRAL_MODE_P (mode))
11588 return false;
11589 switch (code)
11590 {
11591 case LTU:
11592 case GEU:
11593 break;
11594
11595 /* Convert a==0 into (unsigned)a<1. */
11596 case EQ:
11597 case NE:
11598 if (op1 != const0_rtx)
11599 return false;
11600 op1 = const1_rtx;
11601 code = (code == EQ ? LTU : GEU);
11602 break;
11603
11604 /* Convert a>b into b<a or a>=b-1. */
11605 case GTU:
11606 case LEU:
11607 if (CONST_INT_P (op1))
11608 {
11609 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11610 /* Bail out on overflow. We still can swap operands but that
11611 would force loading of the constant into register. */
11612 if (op1 == const0_rtx
11613 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11614 return false;
11615 code = (code == GTU ? GEU : LTU);
11616 }
11617 else
11618 {
11619 rtx tmp = op1;
11620 op1 = op0;
11621 op0 = tmp;
11622 code = (code == GTU ? LTU : GEU);
11623 }
11624 break;
11625
11626 /* Convert a>=0 into (unsigned)a<0x80000000. */
11627 case LT:
11628 case GE:
11629 if (mode == DImode || op1 != const0_rtx)
11630 return false;
11631 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11632 code = (code == LT ? GEU : LTU);
11633 break;
11634 case LE:
11635 case GT:
11636 if (mode == DImode || op1 != constm1_rtx)
11637 return false;
11638 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11639 code = (code == LE ? GEU : LTU);
11640 break;
11641
11642 default:
11643 return false;
11644 }
11645 /* Swapping operands may cause constant to appear as first operand. */
11646 if (!nonimmediate_operand (op0, VOIDmode))
11647 {
11648 if (no_new_pseudos)
11649 return false;
11650 op0 = force_reg (mode, op0);
11651 }
11652 ix86_compare_op0 = op0;
11653 ix86_compare_op1 = op1;
11654 *pop = ix86_expand_compare (code, NULL, NULL);
11655 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11656 return true;
11657 }
11658
11659 int
11660 ix86_expand_int_movcc (rtx operands[])
11661 {
11662 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11663 rtx compare_seq, compare_op;
11664 rtx second_test, bypass_test;
11665 enum machine_mode mode = GET_MODE (operands[0]);
11666 bool sign_bit_compare_p = false;;
11667
11668 start_sequence ();
11669 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11670 compare_seq = get_insns ();
11671 end_sequence ();
11672
11673 compare_code = GET_CODE (compare_op);
11674
11675 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11676 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11677 sign_bit_compare_p = true;
11678
11679 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11680 HImode insns, we'd be swallowed in word prefix ops. */
11681
11682 if ((mode != HImode || TARGET_FAST_PREFIX)
11683 && (mode != (TARGET_64BIT ? TImode : DImode))
11684 && CONST_INT_P (operands[2])
11685 && CONST_INT_P (operands[3]))
11686 {
11687 rtx out = operands[0];
11688 HOST_WIDE_INT ct = INTVAL (operands[2]);
11689 HOST_WIDE_INT cf = INTVAL (operands[3]);
11690 HOST_WIDE_INT diff;
11691
11692 diff = ct - cf;
11693 /* Sign bit compares are better done using shifts than we do by using
11694 sbb. */
11695 if (sign_bit_compare_p
11696 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11697 ix86_compare_op1, &compare_op))
11698 {
11699 /* Detect overlap between destination and compare sources. */
11700 rtx tmp = out;
11701
11702 if (!sign_bit_compare_p)
11703 {
11704 bool fpcmp = false;
11705
11706 compare_code = GET_CODE (compare_op);
11707
11708 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11709 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11710 {
11711 fpcmp = true;
11712 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11713 }
11714
11715 /* To simplify rest of code, restrict to the GEU case. */
11716 if (compare_code == LTU)
11717 {
11718 HOST_WIDE_INT tmp = ct;
11719 ct = cf;
11720 cf = tmp;
11721 compare_code = reverse_condition (compare_code);
11722 code = reverse_condition (code);
11723 }
11724 else
11725 {
11726 if (fpcmp)
11727 PUT_CODE (compare_op,
11728 reverse_condition_maybe_unordered
11729 (GET_CODE (compare_op)));
11730 else
11731 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11732 }
11733 diff = ct - cf;
11734
11735 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11736 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11737 tmp = gen_reg_rtx (mode);
11738
11739 if (mode == DImode)
11740 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11741 else
11742 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11743 }
11744 else
11745 {
11746 if (code == GT || code == GE)
11747 code = reverse_condition (code);
11748 else
11749 {
11750 HOST_WIDE_INT tmp = ct;
11751 ct = cf;
11752 cf = tmp;
11753 diff = ct - cf;
11754 }
11755 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11756 ix86_compare_op1, VOIDmode, 0, -1);
11757 }
11758
11759 if (diff == 1)
11760 {
11761 /*
11762 * cmpl op0,op1
11763 * sbbl dest,dest
11764 * [addl dest, ct]
11765 *
11766 * Size 5 - 8.
11767 */
11768 if (ct)
11769 tmp = expand_simple_binop (mode, PLUS,
11770 tmp, GEN_INT (ct),
11771 copy_rtx (tmp), 1, OPTAB_DIRECT);
11772 }
11773 else if (cf == -1)
11774 {
11775 /*
11776 * cmpl op0,op1
11777 * sbbl dest,dest
11778 * orl $ct, dest
11779 *
11780 * Size 8.
11781 */
11782 tmp = expand_simple_binop (mode, IOR,
11783 tmp, GEN_INT (ct),
11784 copy_rtx (tmp), 1, OPTAB_DIRECT);
11785 }
11786 else if (diff == -1 && ct)
11787 {
11788 /*
11789 * cmpl op0,op1
11790 * sbbl dest,dest
11791 * notl dest
11792 * [addl dest, cf]
11793 *
11794 * Size 8 - 11.
11795 */
11796 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11797 if (cf)
11798 tmp = expand_simple_binop (mode, PLUS,
11799 copy_rtx (tmp), GEN_INT (cf),
11800 copy_rtx (tmp), 1, OPTAB_DIRECT);
11801 }
11802 else
11803 {
11804 /*
11805 * cmpl op0,op1
11806 * sbbl dest,dest
11807 * [notl dest]
11808 * andl cf - ct, dest
11809 * [addl dest, ct]
11810 *
11811 * Size 8 - 11.
11812 */
11813
11814 if (cf == 0)
11815 {
11816 cf = ct;
11817 ct = 0;
11818 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11819 }
11820
11821 tmp = expand_simple_binop (mode, AND,
11822 copy_rtx (tmp),
11823 gen_int_mode (cf - ct, mode),
11824 copy_rtx (tmp), 1, OPTAB_DIRECT);
11825 if (ct)
11826 tmp = expand_simple_binop (mode, PLUS,
11827 copy_rtx (tmp), GEN_INT (ct),
11828 copy_rtx (tmp), 1, OPTAB_DIRECT);
11829 }
11830
11831 if (!rtx_equal_p (tmp, out))
11832 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11833
11834 return 1; /* DONE */
11835 }
11836
11837 if (diff < 0)
11838 {
11839 HOST_WIDE_INT tmp;
11840 tmp = ct, ct = cf, cf = tmp;
11841 diff = -diff;
11842 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11843 {
11844 /* We may be reversing unordered compare to normal compare, that
11845 is not valid in general (we may convert non-trapping condition
11846 to trapping one), however on i386 we currently emit all
11847 comparisons unordered. */
11848 compare_code = reverse_condition_maybe_unordered (compare_code);
11849 code = reverse_condition_maybe_unordered (code);
11850 }
11851 else
11852 {
11853 compare_code = reverse_condition (compare_code);
11854 code = reverse_condition (code);
11855 }
11856 }
11857
11858 compare_code = UNKNOWN;
11859 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11860 && CONST_INT_P (ix86_compare_op1))
11861 {
11862 if (ix86_compare_op1 == const0_rtx
11863 && (code == LT || code == GE))
11864 compare_code = code;
11865 else if (ix86_compare_op1 == constm1_rtx)
11866 {
11867 if (code == LE)
11868 compare_code = LT;
11869 else if (code == GT)
11870 compare_code = GE;
11871 }
11872 }
11873
11874 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11875 if (compare_code != UNKNOWN
11876 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11877 && (cf == -1 || ct == -1))
11878 {
11879 /* If lea code below could be used, only optimize
11880 if it results in a 2 insn sequence. */
11881
11882 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11883 || diff == 3 || diff == 5 || diff == 9)
11884 || (compare_code == LT && ct == -1)
11885 || (compare_code == GE && cf == -1))
11886 {
11887 /*
11888 * notl op1 (if necessary)
11889 * sarl $31, op1
11890 * orl cf, op1
11891 */
11892 if (ct != -1)
11893 {
11894 cf = ct;
11895 ct = -1;
11896 code = reverse_condition (code);
11897 }
11898
11899 out = emit_store_flag (out, code, ix86_compare_op0,
11900 ix86_compare_op1, VOIDmode, 0, -1);
11901
11902 out = expand_simple_binop (mode, IOR,
11903 out, GEN_INT (cf),
11904 out, 1, OPTAB_DIRECT);
11905 if (out != operands[0])
11906 emit_move_insn (operands[0], out);
11907
11908 return 1; /* DONE */
11909 }
11910 }
11911
11912
11913 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11914 || diff == 3 || diff == 5 || diff == 9)
11915 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11916 && (mode != DImode
11917 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11918 {
11919 /*
11920 * xorl dest,dest
11921 * cmpl op1,op2
11922 * setcc dest
11923 * lea cf(dest*(ct-cf)),dest
11924 *
11925 * Size 14.
11926 *
11927 * This also catches the degenerate setcc-only case.
11928 */
11929
11930 rtx tmp;
11931 int nops;
11932
11933 out = emit_store_flag (out, code, ix86_compare_op0,
11934 ix86_compare_op1, VOIDmode, 0, 1);
11935
11936 nops = 0;
11937 /* On x86_64 the lea instruction operates on Pmode, so we need
11938 to get arithmetics done in proper mode to match. */
11939 if (diff == 1)
11940 tmp = copy_rtx (out);
11941 else
11942 {
11943 rtx out1;
11944 out1 = copy_rtx (out);
11945 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11946 nops++;
11947 if (diff & 1)
11948 {
11949 tmp = gen_rtx_PLUS (mode, tmp, out1);
11950 nops++;
11951 }
11952 }
11953 if (cf != 0)
11954 {
11955 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11956 nops++;
11957 }
11958 if (!rtx_equal_p (tmp, out))
11959 {
11960 if (nops == 1)
11961 out = force_operand (tmp, copy_rtx (out));
11962 else
11963 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11964 }
11965 if (!rtx_equal_p (out, operands[0]))
11966 emit_move_insn (operands[0], copy_rtx (out));
11967
11968 return 1; /* DONE */
11969 }
11970
11971 /*
11972 * General case: Jumpful:
11973 * xorl dest,dest cmpl op1, op2
11974 * cmpl op1, op2 movl ct, dest
11975 * setcc dest jcc 1f
11976 * decl dest movl cf, dest
11977 * andl (cf-ct),dest 1:
11978 * addl ct,dest
11979 *
11980 * Size 20. Size 14.
11981 *
11982 * This is reasonably steep, but branch mispredict costs are
11983 * high on modern cpus, so consider failing only if optimizing
11984 * for space.
11985 */
11986
11987 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11988 && BRANCH_COST >= 2)
11989 {
11990 if (cf == 0)
11991 {
11992 cf = ct;
11993 ct = 0;
11994 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11995 /* We may be reversing unordered compare to normal compare,
11996 that is not valid in general (we may convert non-trapping
11997 condition to trapping one), however on i386 we currently
11998 emit all comparisons unordered. */
11999 code = reverse_condition_maybe_unordered (code);
12000 else
12001 {
12002 code = reverse_condition (code);
12003 if (compare_code != UNKNOWN)
12004 compare_code = reverse_condition (compare_code);
12005 }
12006 }
12007
12008 if (compare_code != UNKNOWN)
12009 {
12010 /* notl op1 (if needed)
12011 sarl $31, op1
12012 andl (cf-ct), op1
12013 addl ct, op1
12014
12015 For x < 0 (resp. x <= -1) there will be no notl,
12016 so if possible swap the constants to get rid of the
12017 complement.
12018 True/false will be -1/0 while code below (store flag
12019 followed by decrement) is 0/-1, so the constants need
12020 to be exchanged once more. */
12021
12022 if (compare_code == GE || !cf)
12023 {
12024 code = reverse_condition (code);
12025 compare_code = LT;
12026 }
12027 else
12028 {
12029 HOST_WIDE_INT tmp = cf;
12030 cf = ct;
12031 ct = tmp;
12032 }
12033
12034 out = emit_store_flag (out, code, ix86_compare_op0,
12035 ix86_compare_op1, VOIDmode, 0, -1);
12036 }
12037 else
12038 {
12039 out = emit_store_flag (out, code, ix86_compare_op0,
12040 ix86_compare_op1, VOIDmode, 0, 1);
12041
12042 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12043 copy_rtx (out), 1, OPTAB_DIRECT);
12044 }
12045
12046 out = expand_simple_binop (mode, AND, copy_rtx (out),
12047 gen_int_mode (cf - ct, mode),
12048 copy_rtx (out), 1, OPTAB_DIRECT);
12049 if (ct)
12050 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12051 copy_rtx (out), 1, OPTAB_DIRECT);
12052 if (!rtx_equal_p (out, operands[0]))
12053 emit_move_insn (operands[0], copy_rtx (out));
12054
12055 return 1; /* DONE */
12056 }
12057 }
12058
12059 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12060 {
12061 /* Try a few things more with specific constants and a variable. */
12062
12063 optab op;
12064 rtx var, orig_out, out, tmp;
12065
12066 if (BRANCH_COST <= 2)
12067 return 0; /* FAIL */
12068
12069 /* If one of the two operands is an interesting constant, load a
12070 constant with the above and mask it in with a logical operation. */
12071
12072 if (CONST_INT_P (operands[2]))
12073 {
12074 var = operands[3];
12075 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12076 operands[3] = constm1_rtx, op = and_optab;
12077 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12078 operands[3] = const0_rtx, op = ior_optab;
12079 else
12080 return 0; /* FAIL */
12081 }
12082 else if (CONST_INT_P (operands[3]))
12083 {
12084 var = operands[2];
12085 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12086 operands[2] = constm1_rtx, op = and_optab;
12087 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12088 operands[2] = const0_rtx, op = ior_optab;
12089 else
12090 return 0; /* FAIL */
12091 }
12092 else
12093 return 0; /* FAIL */
12094
12095 orig_out = operands[0];
12096 tmp = gen_reg_rtx (mode);
12097 operands[0] = tmp;
12098
12099 /* Recurse to get the constant loaded. */
12100 if (ix86_expand_int_movcc (operands) == 0)
12101 return 0; /* FAIL */
12102
12103 /* Mask in the interesting variable. */
12104 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12105 OPTAB_WIDEN);
12106 if (!rtx_equal_p (out, orig_out))
12107 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12108
12109 return 1; /* DONE */
12110 }
12111
12112 /*
12113 * For comparison with above,
12114 *
12115 * movl cf,dest
12116 * movl ct,tmp
12117 * cmpl op1,op2
12118 * cmovcc tmp,dest
12119 *
12120 * Size 15.
12121 */
12122
12123 if (! nonimmediate_operand (operands[2], mode))
12124 operands[2] = force_reg (mode, operands[2]);
12125 if (! nonimmediate_operand (operands[3], mode))
12126 operands[3] = force_reg (mode, operands[3]);
12127
12128 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12129 {
12130 rtx tmp = gen_reg_rtx (mode);
12131 emit_move_insn (tmp, operands[3]);
12132 operands[3] = tmp;
12133 }
12134 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12135 {
12136 rtx tmp = gen_reg_rtx (mode);
12137 emit_move_insn (tmp, operands[2]);
12138 operands[2] = tmp;
12139 }
12140
12141 if (! register_operand (operands[2], VOIDmode)
12142 && (mode == QImode
12143 || ! register_operand (operands[3], VOIDmode)))
12144 operands[2] = force_reg (mode, operands[2]);
12145
12146 if (mode == QImode
12147 && ! register_operand (operands[3], VOIDmode))
12148 operands[3] = force_reg (mode, operands[3]);
12149
12150 emit_insn (compare_seq);
12151 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12152 gen_rtx_IF_THEN_ELSE (mode,
12153 compare_op, operands[2],
12154 operands[3])));
12155 if (bypass_test)
12156 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12157 gen_rtx_IF_THEN_ELSE (mode,
12158 bypass_test,
12159 copy_rtx (operands[3]),
12160 copy_rtx (operands[0]))));
12161 if (second_test)
12162 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12163 gen_rtx_IF_THEN_ELSE (mode,
12164 second_test,
12165 copy_rtx (operands[2]),
12166 copy_rtx (operands[0]))));
12167
12168 return 1; /* DONE */
12169 }
12170
12171 /* Swap, force into registers, or otherwise massage the two operands
12172 to an sse comparison with a mask result. Thus we differ a bit from
12173 ix86_prepare_fp_compare_args which expects to produce a flags result.
12174
12175 The DEST operand exists to help determine whether to commute commutative
12176 operators. The POP0/POP1 operands are updated in place. The new
12177 comparison code is returned, or UNKNOWN if not implementable. */
12178
12179 static enum rtx_code
12180 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12181 rtx *pop0, rtx *pop1)
12182 {
12183 rtx tmp;
12184
12185 switch (code)
12186 {
12187 case LTGT:
12188 case UNEQ:
12189 /* We have no LTGT as an operator. We could implement it with
12190 NE & ORDERED, but this requires an extra temporary. It's
12191 not clear that it's worth it. */
12192 return UNKNOWN;
12193
12194 case LT:
12195 case LE:
12196 case UNGT:
12197 case UNGE:
12198 /* These are supported directly. */
12199 break;
12200
12201 case EQ:
12202 case NE:
12203 case UNORDERED:
12204 case ORDERED:
12205 /* For commutative operators, try to canonicalize the destination
12206 operand to be first in the comparison - this helps reload to
12207 avoid extra moves. */
12208 if (!dest || !rtx_equal_p (dest, *pop1))
12209 break;
12210 /* FALLTHRU */
12211
12212 case GE:
12213 case GT:
12214 case UNLE:
12215 case UNLT:
12216 /* These are not supported directly. Swap the comparison operands
12217 to transform into something that is supported. */
12218 tmp = *pop0;
12219 *pop0 = *pop1;
12220 *pop1 = tmp;
12221 code = swap_condition (code);
12222 break;
12223
12224 default:
12225 gcc_unreachable ();
12226 }
12227
12228 return code;
12229 }
12230
12231 /* Detect conditional moves that exactly match min/max operational
12232 semantics. Note that this is IEEE safe, as long as we don't
12233 interchange the operands.
12234
12235 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12236 and TRUE if the operation is successful and instructions are emitted. */
12237
12238 static bool
12239 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12240 rtx cmp_op1, rtx if_true, rtx if_false)
12241 {
12242 enum machine_mode mode;
12243 bool is_min;
12244 rtx tmp;
12245
12246 if (code == LT)
12247 ;
12248 else if (code == UNGE)
12249 {
12250 tmp = if_true;
12251 if_true = if_false;
12252 if_false = tmp;
12253 }
12254 else
12255 return false;
12256
12257 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12258 is_min = true;
12259 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12260 is_min = false;
12261 else
12262 return false;
12263
12264 mode = GET_MODE (dest);
12265
12266 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12267 but MODE may be a vector mode and thus not appropriate. */
12268 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12269 {
12270 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12271 rtvec v;
12272
12273 if_true = force_reg (mode, if_true);
12274 v = gen_rtvec (2, if_true, if_false);
12275 tmp = gen_rtx_UNSPEC (mode, v, u);
12276 }
12277 else
12278 {
12279 code = is_min ? SMIN : SMAX;
12280 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12281 }
12282
12283 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12284 return true;
12285 }
12286
12287 /* Expand an sse vector comparison. Return the register with the result. */
12288
12289 static rtx
12290 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12291 rtx op_true, rtx op_false)
12292 {
12293 enum machine_mode mode = GET_MODE (dest);
12294 rtx x;
12295
12296 cmp_op0 = force_reg (mode, cmp_op0);
12297 if (!nonimmediate_operand (cmp_op1, mode))
12298 cmp_op1 = force_reg (mode, cmp_op1);
12299
12300 if (optimize
12301 || reg_overlap_mentioned_p (dest, op_true)
12302 || reg_overlap_mentioned_p (dest, op_false))
12303 dest = gen_reg_rtx (mode);
12304
12305 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12306 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12307
12308 return dest;
12309 }
12310
12311 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12312 operations. This is used for both scalar and vector conditional moves. */
12313
12314 static void
12315 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12316 {
12317 enum machine_mode mode = GET_MODE (dest);
12318 rtx t2, t3, x;
12319
12320 if (op_false == CONST0_RTX (mode))
12321 {
12322 op_true = force_reg (mode, op_true);
12323 x = gen_rtx_AND (mode, cmp, op_true);
12324 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12325 }
12326 else if (op_true == CONST0_RTX (mode))
12327 {
12328 op_false = force_reg (mode, op_false);
12329 x = gen_rtx_NOT (mode, cmp);
12330 x = gen_rtx_AND (mode, x, op_false);
12331 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12332 }
12333 else
12334 {
12335 op_true = force_reg (mode, op_true);
12336 op_false = force_reg (mode, op_false);
12337
12338 t2 = gen_reg_rtx (mode);
12339 if (optimize)
12340 t3 = gen_reg_rtx (mode);
12341 else
12342 t3 = dest;
12343
12344 x = gen_rtx_AND (mode, op_true, cmp);
12345 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12346
12347 x = gen_rtx_NOT (mode, cmp);
12348 x = gen_rtx_AND (mode, x, op_false);
12349 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12350
12351 x = gen_rtx_IOR (mode, t3, t2);
12352 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12353 }
12354 }
12355
12356 /* Expand a floating-point conditional move. Return true if successful. */
12357
12358 int
12359 ix86_expand_fp_movcc (rtx operands[])
12360 {
12361 enum machine_mode mode = GET_MODE (operands[0]);
12362 enum rtx_code code = GET_CODE (operands[1]);
12363 rtx tmp, compare_op, second_test, bypass_test;
12364
12365 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12366 {
12367 enum machine_mode cmode;
12368
12369 /* Since we've no cmove for sse registers, don't force bad register
12370 allocation just to gain access to it. Deny movcc when the
12371 comparison mode doesn't match the move mode. */
12372 cmode = GET_MODE (ix86_compare_op0);
12373 if (cmode == VOIDmode)
12374 cmode = GET_MODE (ix86_compare_op1);
12375 if (cmode != mode)
12376 return 0;
12377
12378 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12379 &ix86_compare_op0,
12380 &ix86_compare_op1);
12381 if (code == UNKNOWN)
12382 return 0;
12383
12384 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12385 ix86_compare_op1, operands[2],
12386 operands[3]))
12387 return 1;
12388
12389 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12390 ix86_compare_op1, operands[2], operands[3]);
12391 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12392 return 1;
12393 }
12394
12395 /* The floating point conditional move instructions don't directly
12396 support conditions resulting from a signed integer comparison. */
12397
12398 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12399
12400 /* The floating point conditional move instructions don't directly
12401 support signed integer comparisons. */
12402
12403 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12404 {
12405 gcc_assert (!second_test && !bypass_test);
12406 tmp = gen_reg_rtx (QImode);
12407 ix86_expand_setcc (code, tmp);
12408 code = NE;
12409 ix86_compare_op0 = tmp;
12410 ix86_compare_op1 = const0_rtx;
12411 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12412 }
12413 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12414 {
12415 tmp = gen_reg_rtx (mode);
12416 emit_move_insn (tmp, operands[3]);
12417 operands[3] = tmp;
12418 }
12419 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12420 {
12421 tmp = gen_reg_rtx (mode);
12422 emit_move_insn (tmp, operands[2]);
12423 operands[2] = tmp;
12424 }
12425
12426 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12427 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12428 operands[2], operands[3])));
12429 if (bypass_test)
12430 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12431 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12432 operands[3], operands[0])));
12433 if (second_test)
12434 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12435 gen_rtx_IF_THEN_ELSE (mode, second_test,
12436 operands[2], operands[0])));
12437
12438 return 1;
12439 }
12440
12441 /* Expand a floating-point vector conditional move; a vcond operation
12442 rather than a movcc operation. */
12443
12444 bool
12445 ix86_expand_fp_vcond (rtx operands[])
12446 {
12447 enum rtx_code code = GET_CODE (operands[3]);
12448 rtx cmp;
12449
12450 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12451 &operands[4], &operands[5]);
12452 if (code == UNKNOWN)
12453 return false;
12454
12455 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12456 operands[5], operands[1], operands[2]))
12457 return true;
12458
12459 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12460 operands[1], operands[2]);
12461 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12462 return true;
12463 }
12464
12465 /* Expand a signed integral vector conditional move. */
12466
12467 bool
12468 ix86_expand_int_vcond (rtx operands[])
12469 {
12470 enum machine_mode mode = GET_MODE (operands[0]);
12471 enum rtx_code code = GET_CODE (operands[3]);
12472 bool negate = false;
12473 rtx x, cop0, cop1;
12474
12475 cop0 = operands[4];
12476 cop1 = operands[5];
12477
12478 /* Canonicalize the comparison to EQ, GT, GTU. */
12479 switch (code)
12480 {
12481 case EQ:
12482 case GT:
12483 case GTU:
12484 break;
12485
12486 case NE:
12487 case LE:
12488 case LEU:
12489 code = reverse_condition (code);
12490 negate = true;
12491 break;
12492
12493 case GE:
12494 case GEU:
12495 code = reverse_condition (code);
12496 negate = true;
12497 /* FALLTHRU */
12498
12499 case LT:
12500 case LTU:
12501 code = swap_condition (code);
12502 x = cop0, cop0 = cop1, cop1 = x;
12503 break;
12504
12505 default:
12506 gcc_unreachable ();
12507 }
12508
12509 /* Unsigned parallel compare is not supported by the hardware. Play some
12510 tricks to turn this into a signed comparison against 0. */
12511 if (code == GTU)
12512 {
12513 cop0 = force_reg (mode, cop0);
12514
12515 switch (mode)
12516 {
12517 case V4SImode:
12518 {
12519 rtx t1, t2, mask;
12520
12521 /* Perform a parallel modulo subtraction. */
12522 t1 = gen_reg_rtx (mode);
12523 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12524
12525 /* Extract the original sign bit of op0. */
12526 mask = GEN_INT (-0x80000000);
12527 mask = gen_rtx_CONST_VECTOR (mode,
12528 gen_rtvec (4, mask, mask, mask, mask));
12529 mask = force_reg (mode, mask);
12530 t2 = gen_reg_rtx (mode);
12531 emit_insn (gen_andv4si3 (t2, cop0, mask));
12532
12533 /* XOR it back into the result of the subtraction. This results
12534 in the sign bit set iff we saw unsigned underflow. */
12535 x = gen_reg_rtx (mode);
12536 emit_insn (gen_xorv4si3 (x, t1, t2));
12537
12538 code = GT;
12539 }
12540 break;
12541
12542 case V16QImode:
12543 case V8HImode:
12544 /* Perform a parallel unsigned saturating subtraction. */
12545 x = gen_reg_rtx (mode);
12546 emit_insn (gen_rtx_SET (VOIDmode, x,
12547 gen_rtx_US_MINUS (mode, cop0, cop1)));
12548
12549 code = EQ;
12550 negate = !negate;
12551 break;
12552
12553 default:
12554 gcc_unreachable ();
12555 }
12556
12557 cop0 = x;
12558 cop1 = CONST0_RTX (mode);
12559 }
12560
12561 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12562 operands[1+negate], operands[2-negate]);
12563
12564 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12565 operands[2-negate]);
12566 return true;
12567 }
12568
12569 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12570 true if we should do zero extension, else sign extension. HIGH_P is
12571 true if we want the N/2 high elements, else the low elements. */
12572
12573 void
12574 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12575 {
12576 enum machine_mode imode = GET_MODE (operands[1]);
12577 rtx (*unpack)(rtx, rtx, rtx);
12578 rtx se, dest;
12579
12580 switch (imode)
12581 {
12582 case V16QImode:
12583 if (high_p)
12584 unpack = gen_vec_interleave_highv16qi;
12585 else
12586 unpack = gen_vec_interleave_lowv16qi;
12587 break;
12588 case V8HImode:
12589 if (high_p)
12590 unpack = gen_vec_interleave_highv8hi;
12591 else
12592 unpack = gen_vec_interleave_lowv8hi;
12593 break;
12594 case V4SImode:
12595 if (high_p)
12596 unpack = gen_vec_interleave_highv4si;
12597 else
12598 unpack = gen_vec_interleave_lowv4si;
12599 break;
12600 default:
12601 gcc_unreachable ();
12602 }
12603
12604 dest = gen_lowpart (imode, operands[0]);
12605
12606 if (unsigned_p)
12607 se = force_reg (imode, CONST0_RTX (imode));
12608 else
12609 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12610 operands[1], pc_rtx, pc_rtx);
12611
12612 emit_insn (unpack (dest, operands[1], se));
12613 }
12614
12615 /* Expand conditional increment or decrement using adb/sbb instructions.
12616 The default case using setcc followed by the conditional move can be
12617 done by generic code. */
12618 int
12619 ix86_expand_int_addcc (rtx operands[])
12620 {
12621 enum rtx_code code = GET_CODE (operands[1]);
12622 rtx compare_op;
12623 rtx val = const0_rtx;
12624 bool fpcmp = false;
12625 enum machine_mode mode = GET_MODE (operands[0]);
12626
12627 if (operands[3] != const1_rtx
12628 && operands[3] != constm1_rtx)
12629 return 0;
12630 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12631 ix86_compare_op1, &compare_op))
12632 return 0;
12633 code = GET_CODE (compare_op);
12634
12635 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12636 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12637 {
12638 fpcmp = true;
12639 code = ix86_fp_compare_code_to_integer (code);
12640 }
12641
12642 if (code != LTU)
12643 {
12644 val = constm1_rtx;
12645 if (fpcmp)
12646 PUT_CODE (compare_op,
12647 reverse_condition_maybe_unordered
12648 (GET_CODE (compare_op)));
12649 else
12650 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12651 }
12652 PUT_MODE (compare_op, mode);
12653
12654 /* Construct either adc or sbb insn. */
12655 if ((code == LTU) == (operands[3] == constm1_rtx))
12656 {
12657 switch (GET_MODE (operands[0]))
12658 {
12659 case QImode:
12660 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12661 break;
12662 case HImode:
12663 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12664 break;
12665 case SImode:
12666 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12667 break;
12668 case DImode:
12669 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12670 break;
12671 default:
12672 gcc_unreachable ();
12673 }
12674 }
12675 else
12676 {
12677 switch (GET_MODE (operands[0]))
12678 {
12679 case QImode:
12680 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12681 break;
12682 case HImode:
12683 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12684 break;
12685 case SImode:
12686 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12687 break;
12688 case DImode:
12689 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12690 break;
12691 default:
12692 gcc_unreachable ();
12693 }
12694 }
12695 return 1; /* DONE */
12696 }
12697
12698
12699 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12700 works for floating pointer parameters and nonoffsetable memories.
12701 For pushes, it returns just stack offsets; the values will be saved
12702 in the right order. Maximally three parts are generated. */
12703
12704 static int
12705 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12706 {
12707 int size;
12708
12709 if (!TARGET_64BIT)
12710 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12711 else
12712 size = (GET_MODE_SIZE (mode) + 4) / 8;
12713
12714 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12715 gcc_assert (size >= 2 && size <= 3);
12716
12717 /* Optimize constant pool reference to immediates. This is used by fp
12718 moves, that force all constants to memory to allow combining. */
12719 if (MEM_P (operand) && MEM_READONLY_P (operand))
12720 {
12721 rtx tmp = maybe_get_pool_constant (operand);
12722 if (tmp)
12723 operand = tmp;
12724 }
12725
12726 if (MEM_P (operand) && !offsettable_memref_p (operand))
12727 {
12728 /* The only non-offsetable memories we handle are pushes. */
12729 int ok = push_operand (operand, VOIDmode);
12730
12731 gcc_assert (ok);
12732
12733 operand = copy_rtx (operand);
12734 PUT_MODE (operand, Pmode);
12735 parts[0] = parts[1] = parts[2] = operand;
12736 return size;
12737 }
12738
12739 if (GET_CODE (operand) == CONST_VECTOR)
12740 {
12741 enum machine_mode imode = int_mode_for_mode (mode);
12742 /* Caution: if we looked through a constant pool memory above,
12743 the operand may actually have a different mode now. That's
12744 ok, since we want to pun this all the way back to an integer. */
12745 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12746 gcc_assert (operand != NULL);
12747 mode = imode;
12748 }
12749
12750 if (!TARGET_64BIT)
12751 {
12752 if (mode == DImode)
12753 split_di (&operand, 1, &parts[0], &parts[1]);
12754 else
12755 {
12756 if (REG_P (operand))
12757 {
12758 gcc_assert (reload_completed);
12759 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12760 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12761 if (size == 3)
12762 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12763 }
12764 else if (offsettable_memref_p (operand))
12765 {
12766 operand = adjust_address (operand, SImode, 0);
12767 parts[0] = operand;
12768 parts[1] = adjust_address (operand, SImode, 4);
12769 if (size == 3)
12770 parts[2] = adjust_address (operand, SImode, 8);
12771 }
12772 else if (GET_CODE (operand) == CONST_DOUBLE)
12773 {
12774 REAL_VALUE_TYPE r;
12775 long l[4];
12776
12777 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12778 switch (mode)
12779 {
12780 case XFmode:
12781 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12782 parts[2] = gen_int_mode (l[2], SImode);
12783 break;
12784 case DFmode:
12785 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12786 break;
12787 default:
12788 gcc_unreachable ();
12789 }
12790 parts[1] = gen_int_mode (l[1], SImode);
12791 parts[0] = gen_int_mode (l[0], SImode);
12792 }
12793 else
12794 gcc_unreachable ();
12795 }
12796 }
12797 else
12798 {
12799 if (mode == TImode)
12800 split_ti (&operand, 1, &parts[0], &parts[1]);
12801 if (mode == XFmode || mode == TFmode)
12802 {
12803 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12804 if (REG_P (operand))
12805 {
12806 gcc_assert (reload_completed);
12807 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12808 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12809 }
12810 else if (offsettable_memref_p (operand))
12811 {
12812 operand = adjust_address (operand, DImode, 0);
12813 parts[0] = operand;
12814 parts[1] = adjust_address (operand, upper_mode, 8);
12815 }
12816 else if (GET_CODE (operand) == CONST_DOUBLE)
12817 {
12818 REAL_VALUE_TYPE r;
12819 long l[4];
12820
12821 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12822 real_to_target (l, &r, mode);
12823
12824 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12825 if (HOST_BITS_PER_WIDE_INT >= 64)
12826 parts[0]
12827 = gen_int_mode
12828 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12829 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12830 DImode);
12831 else
12832 parts[0] = immed_double_const (l[0], l[1], DImode);
12833
12834 if (upper_mode == SImode)
12835 parts[1] = gen_int_mode (l[2], SImode);
12836 else if (HOST_BITS_PER_WIDE_INT >= 64)
12837 parts[1]
12838 = gen_int_mode
12839 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12840 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12841 DImode);
12842 else
12843 parts[1] = immed_double_const (l[2], l[3], DImode);
12844 }
12845 else
12846 gcc_unreachable ();
12847 }
12848 }
12849
12850 return size;
12851 }
12852
12853 /* Emit insns to perform a move or push of DI, DF, and XF values.
12854 Return false when normal moves are needed; true when all required
12855 insns have been emitted. Operands 2-4 contain the input values
12856 int the correct order; operands 5-7 contain the output values. */
12857
12858 void
12859 ix86_split_long_move (rtx operands[])
12860 {
12861 rtx part[2][3];
12862 int nparts;
12863 int push = 0;
12864 int collisions = 0;
12865 enum machine_mode mode = GET_MODE (operands[0]);
12866
12867 /* The DFmode expanders may ask us to move double.
12868 For 64bit target this is single move. By hiding the fact
12869 here we simplify i386.md splitters. */
12870 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12871 {
12872 /* Optimize constant pool reference to immediates. This is used by
12873 fp moves, that force all constants to memory to allow combining. */
12874
12875 if (MEM_P (operands[1])
12876 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12877 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12878 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12879 if (push_operand (operands[0], VOIDmode))
12880 {
12881 operands[0] = copy_rtx (operands[0]);
12882 PUT_MODE (operands[0], Pmode);
12883 }
12884 else
12885 operands[0] = gen_lowpart (DImode, operands[0]);
12886 operands[1] = gen_lowpart (DImode, operands[1]);
12887 emit_move_insn (operands[0], operands[1]);
12888 return;
12889 }
12890
12891 /* The only non-offsettable memory we handle is push. */
12892 if (push_operand (operands[0], VOIDmode))
12893 push = 1;
12894 else
12895 gcc_assert (!MEM_P (operands[0])
12896 || offsettable_memref_p (operands[0]));
12897
12898 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12899 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12900
12901 /* When emitting push, take care for source operands on the stack. */
12902 if (push && MEM_P (operands[1])
12903 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12904 {
12905 if (nparts == 3)
12906 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12907 XEXP (part[1][2], 0));
12908 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12909 XEXP (part[1][1], 0));
12910 }
12911
12912 /* We need to do copy in the right order in case an address register
12913 of the source overlaps the destination. */
12914 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12915 {
12916 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12917 collisions++;
12918 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12919 collisions++;
12920 if (nparts == 3
12921 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12922 collisions++;
12923
12924 /* Collision in the middle part can be handled by reordering. */
12925 if (collisions == 1 && nparts == 3
12926 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12927 {
12928 rtx tmp;
12929 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12930 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12931 }
12932
12933 /* If there are more collisions, we can't handle it by reordering.
12934 Do an lea to the last part and use only one colliding move. */
12935 else if (collisions > 1)
12936 {
12937 rtx base;
12938
12939 collisions = 1;
12940
12941 base = part[0][nparts - 1];
12942
12943 /* Handle the case when the last part isn't valid for lea.
12944 Happens in 64-bit mode storing the 12-byte XFmode. */
12945 if (GET_MODE (base) != Pmode)
12946 base = gen_rtx_REG (Pmode, REGNO (base));
12947
12948 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12949 part[1][0] = replace_equiv_address (part[1][0], base);
12950 part[1][1] = replace_equiv_address (part[1][1],
12951 plus_constant (base, UNITS_PER_WORD));
12952 if (nparts == 3)
12953 part[1][2] = replace_equiv_address (part[1][2],
12954 plus_constant (base, 8));
12955 }
12956 }
12957
12958 if (push)
12959 {
12960 if (!TARGET_64BIT)
12961 {
12962 if (nparts == 3)
12963 {
12964 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12965 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12966 emit_move_insn (part[0][2], part[1][2]);
12967 }
12968 }
12969 else
12970 {
12971 /* In 64bit mode we don't have 32bit push available. In case this is
12972 register, it is OK - we will just use larger counterpart. We also
12973 retype memory - these comes from attempt to avoid REX prefix on
12974 moving of second half of TFmode value. */
12975 if (GET_MODE (part[1][1]) == SImode)
12976 {
12977 switch (GET_CODE (part[1][1]))
12978 {
12979 case MEM:
12980 part[1][1] = adjust_address (part[1][1], DImode, 0);
12981 break;
12982
12983 case REG:
12984 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12985 break;
12986
12987 default:
12988 gcc_unreachable ();
12989 }
12990
12991 if (GET_MODE (part[1][0]) == SImode)
12992 part[1][0] = part[1][1];
12993 }
12994 }
12995 emit_move_insn (part[0][1], part[1][1]);
12996 emit_move_insn (part[0][0], part[1][0]);
12997 return;
12998 }
12999
13000 /* Choose correct order to not overwrite the source before it is copied. */
13001 if ((REG_P (part[0][0])
13002 && REG_P (part[1][1])
13003 && (REGNO (part[0][0]) == REGNO (part[1][1])
13004 || (nparts == 3
13005 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13006 || (collisions > 0
13007 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13008 {
13009 if (nparts == 3)
13010 {
13011 operands[2] = part[0][2];
13012 operands[3] = part[0][1];
13013 operands[4] = part[0][0];
13014 operands[5] = part[1][2];
13015 operands[6] = part[1][1];
13016 operands[7] = part[1][0];
13017 }
13018 else
13019 {
13020 operands[2] = part[0][1];
13021 operands[3] = part[0][0];
13022 operands[5] = part[1][1];
13023 operands[6] = part[1][0];
13024 }
13025 }
13026 else
13027 {
13028 if (nparts == 3)
13029 {
13030 operands[2] = part[0][0];
13031 operands[3] = part[0][1];
13032 operands[4] = part[0][2];
13033 operands[5] = part[1][0];
13034 operands[6] = part[1][1];
13035 operands[7] = part[1][2];
13036 }
13037 else
13038 {
13039 operands[2] = part[0][0];
13040 operands[3] = part[0][1];
13041 operands[5] = part[1][0];
13042 operands[6] = part[1][1];
13043 }
13044 }
13045
13046 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13047 if (optimize_size)
13048 {
13049 if (CONST_INT_P (operands[5])
13050 && operands[5] != const0_rtx
13051 && REG_P (operands[2]))
13052 {
13053 if (CONST_INT_P (operands[6])
13054 && INTVAL (operands[6]) == INTVAL (operands[5]))
13055 operands[6] = operands[2];
13056
13057 if (nparts == 3
13058 && CONST_INT_P (operands[7])
13059 && INTVAL (operands[7]) == INTVAL (operands[5]))
13060 operands[7] = operands[2];
13061 }
13062
13063 if (nparts == 3
13064 && CONST_INT_P (operands[6])
13065 && operands[6] != const0_rtx
13066 && REG_P (operands[3])
13067 && CONST_INT_P (operands[7])
13068 && INTVAL (operands[7]) == INTVAL (operands[6]))
13069 operands[7] = operands[3];
13070 }
13071
13072 emit_move_insn (operands[2], operands[5]);
13073 emit_move_insn (operands[3], operands[6]);
13074 if (nparts == 3)
13075 emit_move_insn (operands[4], operands[7]);
13076
13077 return;
13078 }
13079
13080 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13081 left shift by a constant, either using a single shift or
13082 a sequence of add instructions. */
13083
13084 static void
13085 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13086 {
13087 if (count == 1)
13088 {
13089 emit_insn ((mode == DImode
13090 ? gen_addsi3
13091 : gen_adddi3) (operand, operand, operand));
13092 }
13093 else if (!optimize_size
13094 && count * ix86_cost->add <= ix86_cost->shift_const)
13095 {
13096 int i;
13097 for (i=0; i<count; i++)
13098 {
13099 emit_insn ((mode == DImode
13100 ? gen_addsi3
13101 : gen_adddi3) (operand, operand, operand));
13102 }
13103 }
13104 else
13105 emit_insn ((mode == DImode
13106 ? gen_ashlsi3
13107 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13108 }
13109
13110 void
13111 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13112 {
13113 rtx low[2], high[2];
13114 int count;
13115 const int single_width = mode == DImode ? 32 : 64;
13116
13117 if (CONST_INT_P (operands[2]))
13118 {
13119 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13120 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13121
13122 if (count >= single_width)
13123 {
13124 emit_move_insn (high[0], low[1]);
13125 emit_move_insn (low[0], const0_rtx);
13126
13127 if (count > single_width)
13128 ix86_expand_ashl_const (high[0], count - single_width, mode);
13129 }
13130 else
13131 {
13132 if (!rtx_equal_p (operands[0], operands[1]))
13133 emit_move_insn (operands[0], operands[1]);
13134 emit_insn ((mode == DImode
13135 ? gen_x86_shld_1
13136 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13137 ix86_expand_ashl_const (low[0], count, mode);
13138 }
13139 return;
13140 }
13141
13142 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13143
13144 if (operands[1] == const1_rtx)
13145 {
13146 /* Assuming we've chosen a QImode capable registers, then 1 << N
13147 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13148 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13149 {
13150 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13151
13152 ix86_expand_clear (low[0]);
13153 ix86_expand_clear (high[0]);
13154 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13155
13156 d = gen_lowpart (QImode, low[0]);
13157 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13158 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13159 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13160
13161 d = gen_lowpart (QImode, high[0]);
13162 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13163 s = gen_rtx_NE (QImode, flags, const0_rtx);
13164 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13165 }
13166
13167 /* Otherwise, we can get the same results by manually performing
13168 a bit extract operation on bit 5/6, and then performing the two
13169 shifts. The two methods of getting 0/1 into low/high are exactly
13170 the same size. Avoiding the shift in the bit extract case helps
13171 pentium4 a bit; no one else seems to care much either way. */
13172 else
13173 {
13174 rtx x;
13175
13176 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13177 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13178 else
13179 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13180 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13181
13182 emit_insn ((mode == DImode
13183 ? gen_lshrsi3
13184 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13185 emit_insn ((mode == DImode
13186 ? gen_andsi3
13187 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13188 emit_move_insn (low[0], high[0]);
13189 emit_insn ((mode == DImode
13190 ? gen_xorsi3
13191 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13192 }
13193
13194 emit_insn ((mode == DImode
13195 ? gen_ashlsi3
13196 : gen_ashldi3) (low[0], low[0], operands[2]));
13197 emit_insn ((mode == DImode
13198 ? gen_ashlsi3
13199 : gen_ashldi3) (high[0], high[0], operands[2]));
13200 return;
13201 }
13202
13203 if (operands[1] == constm1_rtx)
13204 {
13205 /* For -1 << N, we can avoid the shld instruction, because we
13206 know that we're shifting 0...31/63 ones into a -1. */
13207 emit_move_insn (low[0], constm1_rtx);
13208 if (optimize_size)
13209 emit_move_insn (high[0], low[0]);
13210 else
13211 emit_move_insn (high[0], constm1_rtx);
13212 }
13213 else
13214 {
13215 if (!rtx_equal_p (operands[0], operands[1]))
13216 emit_move_insn (operands[0], operands[1]);
13217
13218 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13219 emit_insn ((mode == DImode
13220 ? gen_x86_shld_1
13221 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13222 }
13223
13224 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13225
13226 if (TARGET_CMOVE && scratch)
13227 {
13228 ix86_expand_clear (scratch);
13229 emit_insn ((mode == DImode
13230 ? gen_x86_shift_adj_1
13231 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13232 }
13233 else
13234 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13235 }
13236
13237 void
13238 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13239 {
13240 rtx low[2], high[2];
13241 int count;
13242 const int single_width = mode == DImode ? 32 : 64;
13243
13244 if (CONST_INT_P (operands[2]))
13245 {
13246 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13247 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13248
13249 if (count == single_width * 2 - 1)
13250 {
13251 emit_move_insn (high[0], high[1]);
13252 emit_insn ((mode == DImode
13253 ? gen_ashrsi3
13254 : gen_ashrdi3) (high[0], high[0],
13255 GEN_INT (single_width - 1)));
13256 emit_move_insn (low[0], high[0]);
13257
13258 }
13259 else if (count >= single_width)
13260 {
13261 emit_move_insn (low[0], high[1]);
13262 emit_move_insn (high[0], low[0]);
13263 emit_insn ((mode == DImode
13264 ? gen_ashrsi3
13265 : gen_ashrdi3) (high[0], high[0],
13266 GEN_INT (single_width - 1)));
13267 if (count > single_width)
13268 emit_insn ((mode == DImode
13269 ? gen_ashrsi3
13270 : gen_ashrdi3) (low[0], low[0],
13271 GEN_INT (count - single_width)));
13272 }
13273 else
13274 {
13275 if (!rtx_equal_p (operands[0], operands[1]))
13276 emit_move_insn (operands[0], operands[1]);
13277 emit_insn ((mode == DImode
13278 ? gen_x86_shrd_1
13279 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13280 emit_insn ((mode == DImode
13281 ? gen_ashrsi3
13282 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13283 }
13284 }
13285 else
13286 {
13287 if (!rtx_equal_p (operands[0], operands[1]))
13288 emit_move_insn (operands[0], operands[1]);
13289
13290 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13291
13292 emit_insn ((mode == DImode
13293 ? gen_x86_shrd_1
13294 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13295 emit_insn ((mode == DImode
13296 ? gen_ashrsi3
13297 : gen_ashrdi3) (high[0], high[0], operands[2]));
13298
13299 if (TARGET_CMOVE && scratch)
13300 {
13301 emit_move_insn (scratch, high[0]);
13302 emit_insn ((mode == DImode
13303 ? gen_ashrsi3
13304 : gen_ashrdi3) (scratch, scratch,
13305 GEN_INT (single_width - 1)));
13306 emit_insn ((mode == DImode
13307 ? gen_x86_shift_adj_1
13308 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13309 scratch));
13310 }
13311 else
13312 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13313 }
13314 }
13315
13316 void
13317 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13318 {
13319 rtx low[2], high[2];
13320 int count;
13321 const int single_width = mode == DImode ? 32 : 64;
13322
13323 if (CONST_INT_P (operands[2]))
13324 {
13325 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13326 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13327
13328 if (count >= single_width)
13329 {
13330 emit_move_insn (low[0], high[1]);
13331 ix86_expand_clear (high[0]);
13332
13333 if (count > single_width)
13334 emit_insn ((mode == DImode
13335 ? gen_lshrsi3
13336 : gen_lshrdi3) (low[0], low[0],
13337 GEN_INT (count - single_width)));
13338 }
13339 else
13340 {
13341 if (!rtx_equal_p (operands[0], operands[1]))
13342 emit_move_insn (operands[0], operands[1]);
13343 emit_insn ((mode == DImode
13344 ? gen_x86_shrd_1
13345 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13346 emit_insn ((mode == DImode
13347 ? gen_lshrsi3
13348 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13349 }
13350 }
13351 else
13352 {
13353 if (!rtx_equal_p (operands[0], operands[1]))
13354 emit_move_insn (operands[0], operands[1]);
13355
13356 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13357
13358 emit_insn ((mode == DImode
13359 ? gen_x86_shrd_1
13360 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13361 emit_insn ((mode == DImode
13362 ? gen_lshrsi3
13363 : gen_lshrdi3) (high[0], high[0], operands[2]));
13364
13365 /* Heh. By reversing the arguments, we can reuse this pattern. */
13366 if (TARGET_CMOVE && scratch)
13367 {
13368 ix86_expand_clear (scratch);
13369 emit_insn ((mode == DImode
13370 ? gen_x86_shift_adj_1
13371 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13372 scratch));
13373 }
13374 else
13375 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13376 }
13377 }
13378
13379 /* Predict just emitted jump instruction to be taken with probability PROB. */
13380 static void
13381 predict_jump (int prob)
13382 {
13383 rtx insn = get_last_insn ();
13384 gcc_assert (JUMP_P (insn));
13385 REG_NOTES (insn)
13386 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13387 GEN_INT (prob),
13388 REG_NOTES (insn));
13389 }
13390
13391 /* Helper function for the string operations below. Dest VARIABLE whether
13392 it is aligned to VALUE bytes. If true, jump to the label. */
13393 static rtx
13394 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13395 {
13396 rtx label = gen_label_rtx ();
13397 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13398 if (GET_MODE (variable) == DImode)
13399 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13400 else
13401 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13402 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13403 1, label);
13404 if (epilogue)
13405 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13406 else
13407 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13408 return label;
13409 }
13410
13411 /* Adjust COUNTER by the VALUE. */
13412 static void
13413 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13414 {
13415 if (GET_MODE (countreg) == DImode)
13416 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13417 else
13418 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13419 }
13420
13421 /* Zero extend possibly SImode EXP to Pmode register. */
13422 rtx
13423 ix86_zero_extend_to_Pmode (rtx exp)
13424 {
13425 rtx r;
13426 if (GET_MODE (exp) == VOIDmode)
13427 return force_reg (Pmode, exp);
13428 if (GET_MODE (exp) == Pmode)
13429 return copy_to_mode_reg (Pmode, exp);
13430 r = gen_reg_rtx (Pmode);
13431 emit_insn (gen_zero_extendsidi2 (r, exp));
13432 return r;
13433 }
13434
13435 /* Divide COUNTREG by SCALE. */
13436 static rtx
13437 scale_counter (rtx countreg, int scale)
13438 {
13439 rtx sc;
13440 rtx piece_size_mask;
13441
13442 if (scale == 1)
13443 return countreg;
13444 if (CONST_INT_P (countreg))
13445 return GEN_INT (INTVAL (countreg) / scale);
13446 gcc_assert (REG_P (countreg));
13447
13448 piece_size_mask = GEN_INT (scale - 1);
13449 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13450 GEN_INT (exact_log2 (scale)),
13451 NULL, 1, OPTAB_DIRECT);
13452 return sc;
13453 }
13454
13455 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13456 for constant loop counts. */
13457
13458 static enum machine_mode
13459 counter_mode (rtx count_exp)
13460 {
13461 if (GET_MODE (count_exp) != VOIDmode)
13462 return GET_MODE (count_exp);
13463 if (GET_CODE (count_exp) != CONST_INT)
13464 return Pmode;
13465 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13466 return DImode;
13467 return SImode;
13468 }
13469
13470 /* When SRCPTR is non-NULL, output simple loop to move memory
13471 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13472 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13473 equivalent loop to set memory by VALUE (supposed to be in MODE).
13474
13475 The size is rounded down to whole number of chunk size moved at once.
13476 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13477
13478
13479 static void
13480 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13481 rtx destptr, rtx srcptr, rtx value,
13482 rtx count, enum machine_mode mode, int unroll,
13483 int expected_size)
13484 {
13485 rtx out_label, top_label, iter, tmp;
13486 enum machine_mode iter_mode = counter_mode (count);
13487 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13488 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13489 rtx size;
13490 rtx x_addr;
13491 rtx y_addr;
13492 int i;
13493
13494 top_label = gen_label_rtx ();
13495 out_label = gen_label_rtx ();
13496 iter = gen_reg_rtx (iter_mode);
13497
13498 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13499 NULL, 1, OPTAB_DIRECT);
13500 /* Those two should combine. */
13501 if (piece_size == const1_rtx)
13502 {
13503 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13504 true, out_label);
13505 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13506 }
13507 emit_move_insn (iter, const0_rtx);
13508
13509 emit_label (top_label);
13510
13511 tmp = convert_modes (Pmode, iter_mode, iter, true);
13512 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13513 destmem = change_address (destmem, mode, x_addr);
13514
13515 if (srcmem)
13516 {
13517 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13518 srcmem = change_address (srcmem, mode, y_addr);
13519
13520 /* When unrolling for chips that reorder memory reads and writes,
13521 we can save registers by using single temporary.
13522 Also using 4 temporaries is overkill in 32bit mode. */
13523 if (!TARGET_64BIT && 0)
13524 {
13525 for (i = 0; i < unroll; i++)
13526 {
13527 if (i)
13528 {
13529 destmem =
13530 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13531 srcmem =
13532 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13533 }
13534 emit_move_insn (destmem, srcmem);
13535 }
13536 }
13537 else
13538 {
13539 rtx tmpreg[4];
13540 gcc_assert (unroll <= 4);
13541 for (i = 0; i < unroll; i++)
13542 {
13543 tmpreg[i] = gen_reg_rtx (mode);
13544 if (i)
13545 {
13546 srcmem =
13547 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13548 }
13549 emit_move_insn (tmpreg[i], srcmem);
13550 }
13551 for (i = 0; i < unroll; i++)
13552 {
13553 if (i)
13554 {
13555 destmem =
13556 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13557 }
13558 emit_move_insn (destmem, tmpreg[i]);
13559 }
13560 }
13561 }
13562 else
13563 for (i = 0; i < unroll; i++)
13564 {
13565 if (i)
13566 destmem =
13567 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13568 emit_move_insn (destmem, value);
13569 }
13570
13571 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13572 true, OPTAB_LIB_WIDEN);
13573 if (tmp != iter)
13574 emit_move_insn (iter, tmp);
13575
13576 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13577 true, top_label);
13578 if (expected_size != -1)
13579 {
13580 expected_size /= GET_MODE_SIZE (mode) * unroll;
13581 if (expected_size == 0)
13582 predict_jump (0);
13583 else if (expected_size > REG_BR_PROB_BASE)
13584 predict_jump (REG_BR_PROB_BASE - 1);
13585 else
13586 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13587 }
13588 else
13589 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13590 iter = ix86_zero_extend_to_Pmode (iter);
13591 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13592 true, OPTAB_LIB_WIDEN);
13593 if (tmp != destptr)
13594 emit_move_insn (destptr, tmp);
13595 if (srcptr)
13596 {
13597 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13598 true, OPTAB_LIB_WIDEN);
13599 if (tmp != srcptr)
13600 emit_move_insn (srcptr, tmp);
13601 }
13602 emit_label (out_label);
13603 }
13604
13605 /* Output "rep; mov" instruction.
13606 Arguments have same meaning as for previous function */
13607 static void
13608 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13609 rtx destptr, rtx srcptr,
13610 rtx count,
13611 enum machine_mode mode)
13612 {
13613 rtx destexp;
13614 rtx srcexp;
13615 rtx countreg;
13616
13617 /* If the size is known, it is shorter to use rep movs. */
13618 if (mode == QImode && CONST_INT_P (count)
13619 && !(INTVAL (count) & 3))
13620 mode = SImode;
13621
13622 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13623 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13624 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13625 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13626 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13627 if (mode != QImode)
13628 {
13629 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13630 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13631 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13632 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13633 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13634 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13635 }
13636 else
13637 {
13638 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13639 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13640 }
13641 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13642 destexp, srcexp));
13643 }
13644
13645 /* Output "rep; stos" instruction.
13646 Arguments have same meaning as for previous function */
13647 static void
13648 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13649 rtx count,
13650 enum machine_mode mode)
13651 {
13652 rtx destexp;
13653 rtx countreg;
13654
13655 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13656 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13657 value = force_reg (mode, gen_lowpart (mode, value));
13658 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13659 if (mode != QImode)
13660 {
13661 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13662 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13663 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13664 }
13665 else
13666 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13667 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13668 }
13669
13670 static void
13671 emit_strmov (rtx destmem, rtx srcmem,
13672 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13673 {
13674 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13675 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13676 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13677 }
13678
13679 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13680 static void
13681 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13682 rtx destptr, rtx srcptr, rtx count, int max_size)
13683 {
13684 rtx src, dest;
13685 if (CONST_INT_P (count))
13686 {
13687 HOST_WIDE_INT countval = INTVAL (count);
13688 int offset = 0;
13689
13690 if ((countval & 0x10) && max_size > 16)
13691 {
13692 if (TARGET_64BIT)
13693 {
13694 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13695 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13696 }
13697 else
13698 gcc_unreachable ();
13699 offset += 16;
13700 }
13701 if ((countval & 0x08) && max_size > 8)
13702 {
13703 if (TARGET_64BIT)
13704 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13705 else
13706 {
13707 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13708 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13709 }
13710 offset += 8;
13711 }
13712 if ((countval & 0x04) && max_size > 4)
13713 {
13714 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13715 offset += 4;
13716 }
13717 if ((countval & 0x02) && max_size > 2)
13718 {
13719 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13720 offset += 2;
13721 }
13722 if ((countval & 0x01) && max_size > 1)
13723 {
13724 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13725 offset += 1;
13726 }
13727 return;
13728 }
13729 if (max_size > 8)
13730 {
13731 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13732 count, 1, OPTAB_DIRECT);
13733 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13734 count, QImode, 1, 4);
13735 return;
13736 }
13737
13738 /* When there are stringops, we can cheaply increase dest and src pointers.
13739 Otherwise we save code size by maintaining offset (zero is readily
13740 available from preceding rep operation) and using x86 addressing modes.
13741 */
13742 if (TARGET_SINGLE_STRINGOP)
13743 {
13744 if (max_size > 4)
13745 {
13746 rtx label = ix86_expand_aligntest (count, 4, true);
13747 src = change_address (srcmem, SImode, srcptr);
13748 dest = change_address (destmem, SImode, destptr);
13749 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13750 emit_label (label);
13751 LABEL_NUSES (label) = 1;
13752 }
13753 if (max_size > 2)
13754 {
13755 rtx label = ix86_expand_aligntest (count, 2, true);
13756 src = change_address (srcmem, HImode, srcptr);
13757 dest = change_address (destmem, HImode, destptr);
13758 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13759 emit_label (label);
13760 LABEL_NUSES (label) = 1;
13761 }
13762 if (max_size > 1)
13763 {
13764 rtx label = ix86_expand_aligntest (count, 1, true);
13765 src = change_address (srcmem, QImode, srcptr);
13766 dest = change_address (destmem, QImode, destptr);
13767 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13768 emit_label (label);
13769 LABEL_NUSES (label) = 1;
13770 }
13771 }
13772 else
13773 {
13774 rtx offset = force_reg (Pmode, const0_rtx);
13775 rtx tmp;
13776
13777 if (max_size > 4)
13778 {
13779 rtx label = ix86_expand_aligntest (count, 4, true);
13780 src = change_address (srcmem, SImode, srcptr);
13781 dest = change_address (destmem, SImode, destptr);
13782 emit_move_insn (dest, src);
13783 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13784 true, OPTAB_LIB_WIDEN);
13785 if (tmp != offset)
13786 emit_move_insn (offset, tmp);
13787 emit_label (label);
13788 LABEL_NUSES (label) = 1;
13789 }
13790 if (max_size > 2)
13791 {
13792 rtx label = ix86_expand_aligntest (count, 2, true);
13793 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13794 src = change_address (srcmem, HImode, tmp);
13795 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13796 dest = change_address (destmem, HImode, tmp);
13797 emit_move_insn (dest, src);
13798 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13799 true, OPTAB_LIB_WIDEN);
13800 if (tmp != offset)
13801 emit_move_insn (offset, tmp);
13802 emit_label (label);
13803 LABEL_NUSES (label) = 1;
13804 }
13805 if (max_size > 1)
13806 {
13807 rtx label = ix86_expand_aligntest (count, 1, true);
13808 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13809 src = change_address (srcmem, QImode, tmp);
13810 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13811 dest = change_address (destmem, QImode, tmp);
13812 emit_move_insn (dest, src);
13813 emit_label (label);
13814 LABEL_NUSES (label) = 1;
13815 }
13816 }
13817 }
13818
13819 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13820 static void
13821 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13822 rtx count, int max_size)
13823 {
13824 count =
13825 expand_simple_binop (counter_mode (count), AND, count,
13826 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13827 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13828 gen_lowpart (QImode, value), count, QImode,
13829 1, max_size / 2);
13830 }
13831
13832 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13833 static void
13834 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13835 {
13836 rtx dest;
13837
13838 if (CONST_INT_P (count))
13839 {
13840 HOST_WIDE_INT countval = INTVAL (count);
13841 int offset = 0;
13842
13843 if ((countval & 0x10) && max_size > 16)
13844 {
13845 if (TARGET_64BIT)
13846 {
13847 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13848 emit_insn (gen_strset (destptr, dest, value));
13849 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13850 emit_insn (gen_strset (destptr, dest, value));
13851 }
13852 else
13853 gcc_unreachable ();
13854 offset += 16;
13855 }
13856 if ((countval & 0x08) && max_size > 8)
13857 {
13858 if (TARGET_64BIT)
13859 {
13860 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13861 emit_insn (gen_strset (destptr, dest, value));
13862 }
13863 else
13864 {
13865 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13866 emit_insn (gen_strset (destptr, dest, value));
13867 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13868 emit_insn (gen_strset (destptr, dest, value));
13869 }
13870 offset += 8;
13871 }
13872 if ((countval & 0x04) && max_size > 4)
13873 {
13874 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13875 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13876 offset += 4;
13877 }
13878 if ((countval & 0x02) && max_size > 2)
13879 {
13880 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13881 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13882 offset += 2;
13883 }
13884 if ((countval & 0x01) && max_size > 1)
13885 {
13886 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13887 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13888 offset += 1;
13889 }
13890 return;
13891 }
13892 if (max_size > 32)
13893 {
13894 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13895 return;
13896 }
13897 if (max_size > 16)
13898 {
13899 rtx label = ix86_expand_aligntest (count, 16, true);
13900 if (TARGET_64BIT)
13901 {
13902 dest = change_address (destmem, DImode, destptr);
13903 emit_insn (gen_strset (destptr, dest, value));
13904 emit_insn (gen_strset (destptr, dest, value));
13905 }
13906 else
13907 {
13908 dest = change_address (destmem, SImode, destptr);
13909 emit_insn (gen_strset (destptr, dest, value));
13910 emit_insn (gen_strset (destptr, dest, value));
13911 emit_insn (gen_strset (destptr, dest, value));
13912 emit_insn (gen_strset (destptr, dest, value));
13913 }
13914 emit_label (label);
13915 LABEL_NUSES (label) = 1;
13916 }
13917 if (max_size > 8)
13918 {
13919 rtx label = ix86_expand_aligntest (count, 8, true);
13920 if (TARGET_64BIT)
13921 {
13922 dest = change_address (destmem, DImode, destptr);
13923 emit_insn (gen_strset (destptr, dest, value));
13924 }
13925 else
13926 {
13927 dest = change_address (destmem, SImode, destptr);
13928 emit_insn (gen_strset (destptr, dest, value));
13929 emit_insn (gen_strset (destptr, dest, value));
13930 }
13931 emit_label (label);
13932 LABEL_NUSES (label) = 1;
13933 }
13934 if (max_size > 4)
13935 {
13936 rtx label = ix86_expand_aligntest (count, 4, true);
13937 dest = change_address (destmem, SImode, destptr);
13938 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13939 emit_label (label);
13940 LABEL_NUSES (label) = 1;
13941 }
13942 if (max_size > 2)
13943 {
13944 rtx label = ix86_expand_aligntest (count, 2, true);
13945 dest = change_address (destmem, HImode, destptr);
13946 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13947 emit_label (label);
13948 LABEL_NUSES (label) = 1;
13949 }
13950 if (max_size > 1)
13951 {
13952 rtx label = ix86_expand_aligntest (count, 1, true);
13953 dest = change_address (destmem, QImode, destptr);
13954 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13955 emit_label (label);
13956 LABEL_NUSES (label) = 1;
13957 }
13958 }
13959
13960 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13961 DESIRED_ALIGNMENT. */
13962 static void
13963 expand_movmem_prologue (rtx destmem, rtx srcmem,
13964 rtx destptr, rtx srcptr, rtx count,
13965 int align, int desired_alignment)
13966 {
13967 if (align <= 1 && desired_alignment > 1)
13968 {
13969 rtx label = ix86_expand_aligntest (destptr, 1, false);
13970 srcmem = change_address (srcmem, QImode, srcptr);
13971 destmem = change_address (destmem, QImode, destptr);
13972 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13973 ix86_adjust_counter (count, 1);
13974 emit_label (label);
13975 LABEL_NUSES (label) = 1;
13976 }
13977 if (align <= 2 && desired_alignment > 2)
13978 {
13979 rtx label = ix86_expand_aligntest (destptr, 2, false);
13980 srcmem = change_address (srcmem, HImode, srcptr);
13981 destmem = change_address (destmem, HImode, destptr);
13982 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13983 ix86_adjust_counter (count, 2);
13984 emit_label (label);
13985 LABEL_NUSES (label) = 1;
13986 }
13987 if (align <= 4 && desired_alignment > 4)
13988 {
13989 rtx label = ix86_expand_aligntest (destptr, 4, false);
13990 srcmem = change_address (srcmem, SImode, srcptr);
13991 destmem = change_address (destmem, SImode, destptr);
13992 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13993 ix86_adjust_counter (count, 4);
13994 emit_label (label);
13995 LABEL_NUSES (label) = 1;
13996 }
13997 gcc_assert (desired_alignment <= 8);
13998 }
13999
14000 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14001 DESIRED_ALIGNMENT. */
14002 static void
14003 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14004 int align, int desired_alignment)
14005 {
14006 if (align <= 1 && desired_alignment > 1)
14007 {
14008 rtx label = ix86_expand_aligntest (destptr, 1, false);
14009 destmem = change_address (destmem, QImode, destptr);
14010 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14011 ix86_adjust_counter (count, 1);
14012 emit_label (label);
14013 LABEL_NUSES (label) = 1;
14014 }
14015 if (align <= 2 && desired_alignment > 2)
14016 {
14017 rtx label = ix86_expand_aligntest (destptr, 2, false);
14018 destmem = change_address (destmem, HImode, destptr);
14019 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14020 ix86_adjust_counter (count, 2);
14021 emit_label (label);
14022 LABEL_NUSES (label) = 1;
14023 }
14024 if (align <= 4 && desired_alignment > 4)
14025 {
14026 rtx label = ix86_expand_aligntest (destptr, 4, false);
14027 destmem = change_address (destmem, SImode, destptr);
14028 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14029 ix86_adjust_counter (count, 4);
14030 emit_label (label);
14031 LABEL_NUSES (label) = 1;
14032 }
14033 gcc_assert (desired_alignment <= 8);
14034 }
14035
14036 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14037 static enum stringop_alg
14038 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14039 int *dynamic_check)
14040 {
14041 const struct stringop_algs * algs;
14042
14043 *dynamic_check = -1;
14044 if (memset)
14045 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14046 else
14047 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14048 if (stringop_alg != no_stringop)
14049 return stringop_alg;
14050 /* rep; movq or rep; movl is the smallest variant. */
14051 else if (optimize_size)
14052 {
14053 if (!count || (count & 3))
14054 return rep_prefix_1_byte;
14055 else
14056 return rep_prefix_4_byte;
14057 }
14058 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14059 */
14060 else if (expected_size != -1 && expected_size < 4)
14061 return loop_1_byte;
14062 else if (expected_size != -1)
14063 {
14064 unsigned int i;
14065 enum stringop_alg alg = libcall;
14066 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14067 {
14068 gcc_assert (algs->size[i].max);
14069 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14070 {
14071 if (algs->size[i].alg != libcall)
14072 alg = algs->size[i].alg;
14073 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14074 last non-libcall inline algorithm. */
14075 if (TARGET_INLINE_ALL_STRINGOPS)
14076 {
14077 /* When the current size is best to be copied by a libcall,
14078 but we are still forced to inline, run the heuristic bellow
14079 that will pick code for medium sized blocks. */
14080 if (alg != libcall)
14081 return alg;
14082 break;
14083 }
14084 else
14085 return algs->size[i].alg;
14086 }
14087 }
14088 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14089 }
14090 /* When asked to inline the call anyway, try to pick meaningful choice.
14091 We look for maximal size of block that is faster to copy by hand and
14092 take blocks of at most of that size guessing that average size will
14093 be roughly half of the block.
14094
14095 If this turns out to be bad, we might simply specify the preferred
14096 choice in ix86_costs. */
14097 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14098 && algs->unknown_size == libcall)
14099 {
14100 int max = -1;
14101 enum stringop_alg alg;
14102 int i;
14103
14104 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14105 if (algs->size[i].alg != libcall && algs->size[i].alg)
14106 max = algs->size[i].max;
14107 if (max == -1)
14108 max = 4096;
14109 alg = decide_alg (count, max / 2, memset, dynamic_check);
14110 gcc_assert (*dynamic_check == -1);
14111 gcc_assert (alg != libcall);
14112 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14113 *dynamic_check = max;
14114 return alg;
14115 }
14116 return algs->unknown_size;
14117 }
14118
14119 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14120 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14121 static int
14122 decide_alignment (int align,
14123 enum stringop_alg alg,
14124 int expected_size)
14125 {
14126 int desired_align = 0;
14127 switch (alg)
14128 {
14129 case no_stringop:
14130 gcc_unreachable ();
14131 case loop:
14132 case unrolled_loop:
14133 desired_align = GET_MODE_SIZE (Pmode);
14134 break;
14135 case rep_prefix_8_byte:
14136 desired_align = 8;
14137 break;
14138 case rep_prefix_4_byte:
14139 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14140 copying whole cacheline at once. */
14141 if (TARGET_PENTIUMPRO)
14142 desired_align = 8;
14143 else
14144 desired_align = 4;
14145 break;
14146 case rep_prefix_1_byte:
14147 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14148 copying whole cacheline at once. */
14149 if (TARGET_PENTIUMPRO)
14150 desired_align = 8;
14151 else
14152 desired_align = 1;
14153 break;
14154 case loop_1_byte:
14155 desired_align = 1;
14156 break;
14157 case libcall:
14158 return 0;
14159 }
14160
14161 if (optimize_size)
14162 desired_align = 1;
14163 if (desired_align < align)
14164 desired_align = align;
14165 if (expected_size != -1 && expected_size < 4)
14166 desired_align = align;
14167 return desired_align;
14168 }
14169
14170 /* Return the smallest power of 2 greater than VAL. */
14171 static int
14172 smallest_pow2_greater_than (int val)
14173 {
14174 int ret = 1;
14175 while (ret <= val)
14176 ret <<= 1;
14177 return ret;
14178 }
14179
14180 /* Expand string move (memcpy) operation. Use i386 string operations when
14181 profitable. expand_clrmem contains similar code. The code depends upon
14182 architecture, block size and alignment, but always has the same
14183 overall structure:
14184
14185 1) Prologue guard: Conditional that jumps up to epilogues for small
14186 blocks that can be handled by epilogue alone. This is faster but
14187 also needed for correctness, since prologue assume the block is larger
14188 than the desired alignment.
14189
14190 Optional dynamic check for size and libcall for large
14191 blocks is emitted here too, with -minline-stringops-dynamically.
14192
14193 2) Prologue: copy first few bytes in order to get destination aligned
14194 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14195 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14196 We emit either a jump tree on power of two sized blocks, or a byte loop.
14197
14198 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14199 with specified algorithm.
14200
14201 4) Epilogue: code copying tail of the block that is too small to be
14202 handled by main body (or up to size guarded by prologue guard). */
14203
14204 int
14205 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14206 rtx expected_align_exp, rtx expected_size_exp)
14207 {
14208 rtx destreg;
14209 rtx srcreg;
14210 rtx label = NULL;
14211 rtx tmp;
14212 rtx jump_around_label = NULL;
14213 HOST_WIDE_INT align = 1;
14214 unsigned HOST_WIDE_INT count = 0;
14215 HOST_WIDE_INT expected_size = -1;
14216 int size_needed = 0, epilogue_size_needed;
14217 int desired_align = 0;
14218 enum stringop_alg alg;
14219 int dynamic_check;
14220
14221 if (CONST_INT_P (align_exp))
14222 align = INTVAL (align_exp);
14223 /* i386 can do misaligned access on reasonably increased cost. */
14224 if (CONST_INT_P (expected_align_exp)
14225 && INTVAL (expected_align_exp) > align)
14226 align = INTVAL (expected_align_exp);
14227 if (CONST_INT_P (count_exp))
14228 count = expected_size = INTVAL (count_exp);
14229 if (CONST_INT_P (expected_size_exp) && count == 0)
14230 expected_size = INTVAL (expected_size_exp);
14231
14232 /* Step 0: Decide on preferred algorithm, desired alignment and
14233 size of chunks to be copied by main loop. */
14234
14235 alg = decide_alg (count, expected_size, false, &dynamic_check);
14236 desired_align = decide_alignment (align, alg, expected_size);
14237
14238 if (!TARGET_ALIGN_STRINGOPS)
14239 align = desired_align;
14240
14241 if (alg == libcall)
14242 return 0;
14243 gcc_assert (alg != no_stringop);
14244 if (!count)
14245 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14246 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14247 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14248 switch (alg)
14249 {
14250 case libcall:
14251 case no_stringop:
14252 gcc_unreachable ();
14253 case loop:
14254 size_needed = GET_MODE_SIZE (Pmode);
14255 break;
14256 case unrolled_loop:
14257 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14258 break;
14259 case rep_prefix_8_byte:
14260 size_needed = 8;
14261 break;
14262 case rep_prefix_4_byte:
14263 size_needed = 4;
14264 break;
14265 case rep_prefix_1_byte:
14266 case loop_1_byte:
14267 size_needed = 1;
14268 break;
14269 }
14270
14271 epilogue_size_needed = size_needed;
14272
14273 /* Step 1: Prologue guard. */
14274
14275 /* Alignment code needs count to be in register. */
14276 if (CONST_INT_P (count_exp) && desired_align > align)
14277 {
14278 enum machine_mode mode = SImode;
14279 if (TARGET_64BIT && (count & ~0xffffffff))
14280 mode = DImode;
14281 count_exp = force_reg (mode, count_exp);
14282 }
14283 gcc_assert (desired_align >= 1 && align >= 1);
14284
14285 /* Ensure that alignment prologue won't copy past end of block. */
14286 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14287 {
14288 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14289 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14290 Make sure it is power of 2. */
14291 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14292
14293 label = gen_label_rtx ();
14294 emit_cmp_and_jump_insns (count_exp,
14295 GEN_INT (epilogue_size_needed),
14296 LTU, 0, counter_mode (count_exp), 1, label);
14297 if (GET_CODE (count_exp) == CONST_INT)
14298 ;
14299 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14300 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14301 else
14302 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14303 }
14304 /* Emit code to decide on runtime whether library call or inline should be
14305 used. */
14306 if (dynamic_check != -1)
14307 {
14308 rtx hot_label = gen_label_rtx ();
14309 jump_around_label = gen_label_rtx ();
14310 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14311 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14312 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14313 emit_block_move_via_libcall (dst, src, count_exp, false);
14314 emit_jump (jump_around_label);
14315 emit_label (hot_label);
14316 }
14317
14318 /* Step 2: Alignment prologue. */
14319
14320 if (desired_align > align)
14321 {
14322 /* Except for the first move in epilogue, we no longer know
14323 constant offset in aliasing info. It don't seems to worth
14324 the pain to maintain it for the first move, so throw away
14325 the info early. */
14326 src = change_address (src, BLKmode, srcreg);
14327 dst = change_address (dst, BLKmode, destreg);
14328 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14329 desired_align);
14330 }
14331 if (label && size_needed == 1)
14332 {
14333 emit_label (label);
14334 LABEL_NUSES (label) = 1;
14335 label = NULL;
14336 }
14337
14338 /* Step 3: Main loop. */
14339
14340 switch (alg)
14341 {
14342 case libcall:
14343 case no_stringop:
14344 gcc_unreachable ();
14345 case loop_1_byte:
14346 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14347 count_exp, QImode, 1, expected_size);
14348 break;
14349 case loop:
14350 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14351 count_exp, Pmode, 1, expected_size);
14352 break;
14353 case unrolled_loop:
14354 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14355 registers for 4 temporaries anyway. */
14356 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14357 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14358 expected_size);
14359 break;
14360 case rep_prefix_8_byte:
14361 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14362 DImode);
14363 break;
14364 case rep_prefix_4_byte:
14365 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14366 SImode);
14367 break;
14368 case rep_prefix_1_byte:
14369 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14370 QImode);
14371 break;
14372 }
14373 /* Adjust properly the offset of src and dest memory for aliasing. */
14374 if (CONST_INT_P (count_exp))
14375 {
14376 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14377 (count / size_needed) * size_needed);
14378 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14379 (count / size_needed) * size_needed);
14380 }
14381 else
14382 {
14383 src = change_address (src, BLKmode, srcreg);
14384 dst = change_address (dst, BLKmode, destreg);
14385 }
14386
14387 /* Step 4: Epilogue to copy the remaining bytes. */
14388
14389 if (label)
14390 {
14391 /* When the main loop is done, COUNT_EXP might hold original count,
14392 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14393 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14394 bytes. Compensate if needed. */
14395
14396 if (size_needed < epilogue_size_needed)
14397 {
14398 tmp =
14399 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14400 GEN_INT (size_needed - 1), count_exp, 1,
14401 OPTAB_DIRECT);
14402 if (tmp != count_exp)
14403 emit_move_insn (count_exp, tmp);
14404 }
14405 emit_label (label);
14406 LABEL_NUSES (label) = 1;
14407 }
14408
14409 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14410 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14411 epilogue_size_needed);
14412 if (jump_around_label)
14413 emit_label (jump_around_label);
14414 return 1;
14415 }
14416
14417 /* Helper function for memcpy. For QImode value 0xXY produce
14418 0xXYXYXYXY of wide specified by MODE. This is essentially
14419 a * 0x10101010, but we can do slightly better than
14420 synth_mult by unwinding the sequence by hand on CPUs with
14421 slow multiply. */
14422 static rtx
14423 promote_duplicated_reg (enum machine_mode mode, rtx val)
14424 {
14425 enum machine_mode valmode = GET_MODE (val);
14426 rtx tmp;
14427 int nops = mode == DImode ? 3 : 2;
14428
14429 gcc_assert (mode == SImode || mode == DImode);
14430 if (val == const0_rtx)
14431 return copy_to_mode_reg (mode, const0_rtx);
14432 if (CONST_INT_P (val))
14433 {
14434 HOST_WIDE_INT v = INTVAL (val) & 255;
14435
14436 v |= v << 8;
14437 v |= v << 16;
14438 if (mode == DImode)
14439 v |= (v << 16) << 16;
14440 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14441 }
14442
14443 if (valmode == VOIDmode)
14444 valmode = QImode;
14445 if (valmode != QImode)
14446 val = gen_lowpart (QImode, val);
14447 if (mode == QImode)
14448 return val;
14449 if (!TARGET_PARTIAL_REG_STALL)
14450 nops--;
14451 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14452 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14453 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14454 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14455 {
14456 rtx reg = convert_modes (mode, QImode, val, true);
14457 tmp = promote_duplicated_reg (mode, const1_rtx);
14458 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14459 OPTAB_DIRECT);
14460 }
14461 else
14462 {
14463 rtx reg = convert_modes (mode, QImode, val, true);
14464
14465 if (!TARGET_PARTIAL_REG_STALL)
14466 if (mode == SImode)
14467 emit_insn (gen_movsi_insv_1 (reg, reg));
14468 else
14469 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14470 else
14471 {
14472 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14473 NULL, 1, OPTAB_DIRECT);
14474 reg =
14475 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14476 }
14477 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14478 NULL, 1, OPTAB_DIRECT);
14479 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14480 if (mode == SImode)
14481 return reg;
14482 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14483 NULL, 1, OPTAB_DIRECT);
14484 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14485 return reg;
14486 }
14487 }
14488
14489 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14490 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14491 alignment from ALIGN to DESIRED_ALIGN. */
14492 static rtx
14493 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14494 {
14495 rtx promoted_val;
14496
14497 if (TARGET_64BIT
14498 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14499 promoted_val = promote_duplicated_reg (DImode, val);
14500 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14501 promoted_val = promote_duplicated_reg (SImode, val);
14502 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14503 promoted_val = promote_duplicated_reg (HImode, val);
14504 else
14505 promoted_val = val;
14506
14507 return promoted_val;
14508 }
14509
14510 /* Expand string clear operation (bzero). Use i386 string operations when
14511 profitable. See expand_movmem comment for explanation of individual
14512 steps performed. */
14513 int
14514 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14515 rtx expected_align_exp, rtx expected_size_exp)
14516 {
14517 rtx destreg;
14518 rtx label = NULL;
14519 rtx tmp;
14520 rtx jump_around_label = NULL;
14521 HOST_WIDE_INT align = 1;
14522 unsigned HOST_WIDE_INT count = 0;
14523 HOST_WIDE_INT expected_size = -1;
14524 int size_needed = 0, epilogue_size_needed;
14525 int desired_align = 0;
14526 enum stringop_alg alg;
14527 rtx promoted_val = NULL;
14528 bool force_loopy_epilogue = false;
14529 int dynamic_check;
14530
14531 if (CONST_INT_P (align_exp))
14532 align = INTVAL (align_exp);
14533 /* i386 can do misaligned access on reasonably increased cost. */
14534 if (CONST_INT_P (expected_align_exp)
14535 && INTVAL (expected_align_exp) > align)
14536 align = INTVAL (expected_align_exp);
14537 if (CONST_INT_P (count_exp))
14538 count = expected_size = INTVAL (count_exp);
14539 if (CONST_INT_P (expected_size_exp) && count == 0)
14540 expected_size = INTVAL (expected_size_exp);
14541
14542 /* Step 0: Decide on preferred algorithm, desired alignment and
14543 size of chunks to be copied by main loop. */
14544
14545 alg = decide_alg (count, expected_size, true, &dynamic_check);
14546 desired_align = decide_alignment (align, alg, expected_size);
14547
14548 if (!TARGET_ALIGN_STRINGOPS)
14549 align = desired_align;
14550
14551 if (alg == libcall)
14552 return 0;
14553 gcc_assert (alg != no_stringop);
14554 if (!count)
14555 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14556 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14557 switch (alg)
14558 {
14559 case libcall:
14560 case no_stringop:
14561 gcc_unreachable ();
14562 case loop:
14563 size_needed = GET_MODE_SIZE (Pmode);
14564 break;
14565 case unrolled_loop:
14566 size_needed = GET_MODE_SIZE (Pmode) * 4;
14567 break;
14568 case rep_prefix_8_byte:
14569 size_needed = 8;
14570 break;
14571 case rep_prefix_4_byte:
14572 size_needed = 4;
14573 break;
14574 case rep_prefix_1_byte:
14575 case loop_1_byte:
14576 size_needed = 1;
14577 break;
14578 }
14579 epilogue_size_needed = size_needed;
14580
14581 /* Step 1: Prologue guard. */
14582
14583 /* Alignment code needs count to be in register. */
14584 if (CONST_INT_P (count_exp) && desired_align > align)
14585 {
14586 enum machine_mode mode = SImode;
14587 if (TARGET_64BIT && (count & ~0xffffffff))
14588 mode = DImode;
14589 count_exp = force_reg (mode, count_exp);
14590 }
14591 /* Do the cheap promotion to allow better CSE across the
14592 main loop and epilogue (ie one load of the big constant in the
14593 front of all code. */
14594 if (CONST_INT_P (val_exp))
14595 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14596 desired_align, align);
14597 /* Ensure that alignment prologue won't copy past end of block. */
14598 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14599 {
14600 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14601 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14602 Make sure it is power of 2. */
14603 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14604
14605 /* To improve performance of small blocks, we jump around the VAL
14606 promoting mode. This mean that if the promoted VAL is not constant,
14607 we might not use it in the epilogue and have to use byte
14608 loop variant. */
14609 if (epilogue_size_needed > 2 && !promoted_val)
14610 force_loopy_epilogue = true;
14611 label = gen_label_rtx ();
14612 emit_cmp_and_jump_insns (count_exp,
14613 GEN_INT (epilogue_size_needed),
14614 LTU, 0, counter_mode (count_exp), 1, label);
14615 if (GET_CODE (count_exp) == CONST_INT)
14616 ;
14617 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14618 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14619 else
14620 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14621 }
14622 if (dynamic_check != -1)
14623 {
14624 rtx hot_label = gen_label_rtx ();
14625 jump_around_label = gen_label_rtx ();
14626 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14627 LEU, 0, counter_mode (count_exp), 1, hot_label);
14628 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14629 set_storage_via_libcall (dst, count_exp, val_exp, false);
14630 emit_jump (jump_around_label);
14631 emit_label (hot_label);
14632 }
14633
14634 /* Step 2: Alignment prologue. */
14635
14636 /* Do the expensive promotion once we branched off the small blocks. */
14637 if (!promoted_val)
14638 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14639 desired_align, align);
14640 gcc_assert (desired_align >= 1 && align >= 1);
14641
14642 if (desired_align > align)
14643 {
14644 /* Except for the first move in epilogue, we no longer know
14645 constant offset in aliasing info. It don't seems to worth
14646 the pain to maintain it for the first move, so throw away
14647 the info early. */
14648 dst = change_address (dst, BLKmode, destreg);
14649 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14650 desired_align);
14651 }
14652 if (label && size_needed == 1)
14653 {
14654 emit_label (label);
14655 LABEL_NUSES (label) = 1;
14656 label = NULL;
14657 }
14658
14659 /* Step 3: Main loop. */
14660
14661 switch (alg)
14662 {
14663 case libcall:
14664 case no_stringop:
14665 gcc_unreachable ();
14666 case loop_1_byte:
14667 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14668 count_exp, QImode, 1, expected_size);
14669 break;
14670 case loop:
14671 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14672 count_exp, Pmode, 1, expected_size);
14673 break;
14674 case unrolled_loop:
14675 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14676 count_exp, Pmode, 4, expected_size);
14677 break;
14678 case rep_prefix_8_byte:
14679 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14680 DImode);
14681 break;
14682 case rep_prefix_4_byte:
14683 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14684 SImode);
14685 break;
14686 case rep_prefix_1_byte:
14687 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14688 QImode);
14689 break;
14690 }
14691 /* Adjust properly the offset of src and dest memory for aliasing. */
14692 if (CONST_INT_P (count_exp))
14693 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14694 (count / size_needed) * size_needed);
14695 else
14696 dst = change_address (dst, BLKmode, destreg);
14697
14698 /* Step 4: Epilogue to copy the remaining bytes. */
14699
14700 if (label)
14701 {
14702 /* When the main loop is done, COUNT_EXP might hold original count,
14703 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14704 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14705 bytes. Compensate if needed. */
14706
14707 if (size_needed < desired_align - align)
14708 {
14709 tmp =
14710 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14711 GEN_INT (size_needed - 1), count_exp, 1,
14712 OPTAB_DIRECT);
14713 size_needed = desired_align - align + 1;
14714 if (tmp != count_exp)
14715 emit_move_insn (count_exp, tmp);
14716 }
14717 emit_label (label);
14718 LABEL_NUSES (label) = 1;
14719 }
14720 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14721 {
14722 if (force_loopy_epilogue)
14723 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14724 size_needed);
14725 else
14726 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14727 size_needed);
14728 }
14729 if (jump_around_label)
14730 emit_label (jump_around_label);
14731 return 1;
14732 }
14733
14734 /* Expand strlen. */
14735 int
14736 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14737 {
14738 rtx addr, scratch1, scratch2, scratch3, scratch4;
14739
14740 /* The generic case of strlen expander is long. Avoid it's
14741 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14742
14743 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14744 && !TARGET_INLINE_ALL_STRINGOPS
14745 && !optimize_size
14746 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14747 return 0;
14748
14749 addr = force_reg (Pmode, XEXP (src, 0));
14750 scratch1 = gen_reg_rtx (Pmode);
14751
14752 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14753 && !optimize_size)
14754 {
14755 /* Well it seems that some optimizer does not combine a call like
14756 foo(strlen(bar), strlen(bar));
14757 when the move and the subtraction is done here. It does calculate
14758 the length just once when these instructions are done inside of
14759 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14760 often used and I use one fewer register for the lifetime of
14761 output_strlen_unroll() this is better. */
14762
14763 emit_move_insn (out, addr);
14764
14765 ix86_expand_strlensi_unroll_1 (out, src, align);
14766
14767 /* strlensi_unroll_1 returns the address of the zero at the end of
14768 the string, like memchr(), so compute the length by subtracting
14769 the start address. */
14770 if (TARGET_64BIT)
14771 emit_insn (gen_subdi3 (out, out, addr));
14772 else
14773 emit_insn (gen_subsi3 (out, out, addr));
14774 }
14775 else
14776 {
14777 rtx unspec;
14778 scratch2 = gen_reg_rtx (Pmode);
14779 scratch3 = gen_reg_rtx (Pmode);
14780 scratch4 = force_reg (Pmode, constm1_rtx);
14781
14782 emit_move_insn (scratch3, addr);
14783 eoschar = force_reg (QImode, eoschar);
14784
14785 src = replace_equiv_address_nv (src, scratch3);
14786
14787 /* If .md starts supporting :P, this can be done in .md. */
14788 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14789 scratch4), UNSPEC_SCAS);
14790 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14791 if (TARGET_64BIT)
14792 {
14793 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14794 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14795 }
14796 else
14797 {
14798 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14799 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14800 }
14801 }
14802 return 1;
14803 }
14804
14805 /* Expand the appropriate insns for doing strlen if not just doing
14806 repnz; scasb
14807
14808 out = result, initialized with the start address
14809 align_rtx = alignment of the address.
14810 scratch = scratch register, initialized with the startaddress when
14811 not aligned, otherwise undefined
14812
14813 This is just the body. It needs the initializations mentioned above and
14814 some address computing at the end. These things are done in i386.md. */
14815
14816 static void
14817 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14818 {
14819 int align;
14820 rtx tmp;
14821 rtx align_2_label = NULL_RTX;
14822 rtx align_3_label = NULL_RTX;
14823 rtx align_4_label = gen_label_rtx ();
14824 rtx end_0_label = gen_label_rtx ();
14825 rtx mem;
14826 rtx tmpreg = gen_reg_rtx (SImode);
14827 rtx scratch = gen_reg_rtx (SImode);
14828 rtx cmp;
14829
14830 align = 0;
14831 if (CONST_INT_P (align_rtx))
14832 align = INTVAL (align_rtx);
14833
14834 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14835
14836 /* Is there a known alignment and is it less than 4? */
14837 if (align < 4)
14838 {
14839 rtx scratch1 = gen_reg_rtx (Pmode);
14840 emit_move_insn (scratch1, out);
14841 /* Is there a known alignment and is it not 2? */
14842 if (align != 2)
14843 {
14844 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14845 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14846
14847 /* Leave just the 3 lower bits. */
14848 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14849 NULL_RTX, 0, OPTAB_WIDEN);
14850
14851 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14852 Pmode, 1, align_4_label);
14853 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14854 Pmode, 1, align_2_label);
14855 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14856 Pmode, 1, align_3_label);
14857 }
14858 else
14859 {
14860 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14861 check if is aligned to 4 - byte. */
14862
14863 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14864 NULL_RTX, 0, OPTAB_WIDEN);
14865
14866 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14867 Pmode, 1, align_4_label);
14868 }
14869
14870 mem = change_address (src, QImode, out);
14871
14872 /* Now compare the bytes. */
14873
14874 /* Compare the first n unaligned byte on a byte per byte basis. */
14875 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14876 QImode, 1, end_0_label);
14877
14878 /* Increment the address. */
14879 if (TARGET_64BIT)
14880 emit_insn (gen_adddi3 (out, out, const1_rtx));
14881 else
14882 emit_insn (gen_addsi3 (out, out, const1_rtx));
14883
14884 /* Not needed with an alignment of 2 */
14885 if (align != 2)
14886 {
14887 emit_label (align_2_label);
14888
14889 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14890 end_0_label);
14891
14892 if (TARGET_64BIT)
14893 emit_insn (gen_adddi3 (out, out, const1_rtx));
14894 else
14895 emit_insn (gen_addsi3 (out, out, const1_rtx));
14896
14897 emit_label (align_3_label);
14898 }
14899
14900 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14901 end_0_label);
14902
14903 if (TARGET_64BIT)
14904 emit_insn (gen_adddi3 (out, out, const1_rtx));
14905 else
14906 emit_insn (gen_addsi3 (out, out, const1_rtx));
14907 }
14908
14909 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14910 align this loop. It gives only huge programs, but does not help to
14911 speed up. */
14912 emit_label (align_4_label);
14913
14914 mem = change_address (src, SImode, out);
14915 emit_move_insn (scratch, mem);
14916 if (TARGET_64BIT)
14917 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14918 else
14919 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14920
14921 /* This formula yields a nonzero result iff one of the bytes is zero.
14922 This saves three branches inside loop and many cycles. */
14923
14924 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14925 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14926 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14927 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14928 gen_int_mode (0x80808080, SImode)));
14929 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14930 align_4_label);
14931
14932 if (TARGET_CMOVE)
14933 {
14934 rtx reg = gen_reg_rtx (SImode);
14935 rtx reg2 = gen_reg_rtx (Pmode);
14936 emit_move_insn (reg, tmpreg);
14937 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14938
14939 /* If zero is not in the first two bytes, move two bytes forward. */
14940 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14941 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14942 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14943 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14944 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14945 reg,
14946 tmpreg)));
14947 /* Emit lea manually to avoid clobbering of flags. */
14948 emit_insn (gen_rtx_SET (SImode, reg2,
14949 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14950
14951 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14952 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14953 emit_insn (gen_rtx_SET (VOIDmode, out,
14954 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14955 reg2,
14956 out)));
14957
14958 }
14959 else
14960 {
14961 rtx end_2_label = gen_label_rtx ();
14962 /* Is zero in the first two bytes? */
14963
14964 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14965 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14966 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14967 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14968 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14969 pc_rtx);
14970 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14971 JUMP_LABEL (tmp) = end_2_label;
14972
14973 /* Not in the first two. Move two bytes forward. */
14974 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14975 if (TARGET_64BIT)
14976 emit_insn (gen_adddi3 (out, out, const2_rtx));
14977 else
14978 emit_insn (gen_addsi3 (out, out, const2_rtx));
14979
14980 emit_label (end_2_label);
14981
14982 }
14983
14984 /* Avoid branch in fixing the byte. */
14985 tmpreg = gen_lowpart (QImode, tmpreg);
14986 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14987 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14988 if (TARGET_64BIT)
14989 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14990 else
14991 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14992
14993 emit_label (end_0_label);
14994 }
14995
14996 /* For given symbol (function) construct code to compute address of it's PLT
14997 entry in large x86-64 PIC model. */
14998 rtx
14999 construct_plt_address (rtx symbol)
15000 {
15001 rtx tmp = gen_reg_rtx (Pmode);
15002 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15003
15004 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15005 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15006
15007 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15008 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15009 return tmp;
15010 }
15011
15012 void
15013 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15014 rtx callarg2 ATTRIBUTE_UNUSED,
15015 rtx pop, int sibcall)
15016 {
15017 rtx use = NULL, call;
15018
15019 if (pop == const0_rtx)
15020 pop = NULL;
15021 gcc_assert (!TARGET_64BIT || !pop);
15022
15023 if (TARGET_MACHO && !TARGET_64BIT)
15024 {
15025 #if TARGET_MACHO
15026 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15027 fnaddr = machopic_indirect_call_target (fnaddr);
15028 #endif
15029 }
15030 else
15031 {
15032 /* Static functions and indirect calls don't need the pic register. */
15033 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15034 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15035 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15036 use_reg (&use, pic_offset_table_rtx);
15037 }
15038
15039 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15040 {
15041 rtx al = gen_rtx_REG (QImode, 0);
15042 emit_move_insn (al, callarg2);
15043 use_reg (&use, al);
15044 }
15045
15046 if (ix86_cmodel == CM_LARGE_PIC
15047 && GET_CODE (fnaddr) == MEM
15048 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15049 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15050 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15051 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15052 {
15053 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15054 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15055 }
15056 if (sibcall && TARGET_64BIT
15057 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15058 {
15059 rtx addr;
15060 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15061 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15062 emit_move_insn (fnaddr, addr);
15063 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15064 }
15065
15066 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15067 if (retval)
15068 call = gen_rtx_SET (VOIDmode, retval, call);
15069 if (pop)
15070 {
15071 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15072 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15073 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15074 }
15075
15076 call = emit_call_insn (call);
15077 if (use)
15078 CALL_INSN_FUNCTION_USAGE (call) = use;
15079 }
15080
15081 \f
15082 /* Clear stack slot assignments remembered from previous functions.
15083 This is called from INIT_EXPANDERS once before RTL is emitted for each
15084 function. */
15085
15086 static struct machine_function *
15087 ix86_init_machine_status (void)
15088 {
15089 struct machine_function *f;
15090
15091 f = ggc_alloc_cleared (sizeof (struct machine_function));
15092 f->use_fast_prologue_epilogue_nregs = -1;
15093 f->tls_descriptor_call_expanded_p = 0;
15094
15095 return f;
15096 }
15097
15098 /* Return a MEM corresponding to a stack slot with mode MODE.
15099 Allocate a new slot if necessary.
15100
15101 The RTL for a function can have several slots available: N is
15102 which slot to use. */
15103
15104 rtx
15105 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15106 {
15107 struct stack_local_entry *s;
15108
15109 gcc_assert (n < MAX_386_STACK_LOCALS);
15110
15111 for (s = ix86_stack_locals; s; s = s->next)
15112 if (s->mode == mode && s->n == n)
15113 return copy_rtx (s->rtl);
15114
15115 s = (struct stack_local_entry *)
15116 ggc_alloc (sizeof (struct stack_local_entry));
15117 s->n = n;
15118 s->mode = mode;
15119 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15120
15121 s->next = ix86_stack_locals;
15122 ix86_stack_locals = s;
15123 return s->rtl;
15124 }
15125
15126 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15127
15128 static GTY(()) rtx ix86_tls_symbol;
15129 rtx
15130 ix86_tls_get_addr (void)
15131 {
15132
15133 if (!ix86_tls_symbol)
15134 {
15135 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15136 (TARGET_ANY_GNU_TLS
15137 && !TARGET_64BIT)
15138 ? "___tls_get_addr"
15139 : "__tls_get_addr");
15140 }
15141
15142 return ix86_tls_symbol;
15143 }
15144
15145 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15146
15147 static GTY(()) rtx ix86_tls_module_base_symbol;
15148 rtx
15149 ix86_tls_module_base (void)
15150 {
15151
15152 if (!ix86_tls_module_base_symbol)
15153 {
15154 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15155 "_TLS_MODULE_BASE_");
15156 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15157 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15158 }
15159
15160 return ix86_tls_module_base_symbol;
15161 }
15162 \f
15163 /* Calculate the length of the memory address in the instruction
15164 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15165
15166 int
15167 memory_address_length (rtx addr)
15168 {
15169 struct ix86_address parts;
15170 rtx base, index, disp;
15171 int len;
15172 int ok;
15173
15174 if (GET_CODE (addr) == PRE_DEC
15175 || GET_CODE (addr) == POST_INC
15176 || GET_CODE (addr) == PRE_MODIFY
15177 || GET_CODE (addr) == POST_MODIFY)
15178 return 0;
15179
15180 ok = ix86_decompose_address (addr, &parts);
15181 gcc_assert (ok);
15182
15183 if (parts.base && GET_CODE (parts.base) == SUBREG)
15184 parts.base = SUBREG_REG (parts.base);
15185 if (parts.index && GET_CODE (parts.index) == SUBREG)
15186 parts.index = SUBREG_REG (parts.index);
15187
15188 base = parts.base;
15189 index = parts.index;
15190 disp = parts.disp;
15191 len = 0;
15192
15193 /* Rule of thumb:
15194 - esp as the base always wants an index,
15195 - ebp as the base always wants a displacement. */
15196
15197 /* Register Indirect. */
15198 if (base && !index && !disp)
15199 {
15200 /* esp (for its index) and ebp (for its displacement) need
15201 the two-byte modrm form. */
15202 if (addr == stack_pointer_rtx
15203 || addr == arg_pointer_rtx
15204 || addr == frame_pointer_rtx
15205 || addr == hard_frame_pointer_rtx)
15206 len = 1;
15207 }
15208
15209 /* Direct Addressing. */
15210 else if (disp && !base && !index)
15211 len = 4;
15212
15213 else
15214 {
15215 /* Find the length of the displacement constant. */
15216 if (disp)
15217 {
15218 if (base && satisfies_constraint_K (disp))
15219 len = 1;
15220 else
15221 len = 4;
15222 }
15223 /* ebp always wants a displacement. */
15224 else if (base == hard_frame_pointer_rtx)
15225 len = 1;
15226
15227 /* An index requires the two-byte modrm form.... */
15228 if (index
15229 /* ...like esp, which always wants an index. */
15230 || base == stack_pointer_rtx
15231 || base == arg_pointer_rtx
15232 || base == frame_pointer_rtx)
15233 len += 1;
15234 }
15235
15236 return len;
15237 }
15238
15239 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15240 is set, expect that insn have 8bit immediate alternative. */
15241 int
15242 ix86_attr_length_immediate_default (rtx insn, int shortform)
15243 {
15244 int len = 0;
15245 int i;
15246 extract_insn_cached (insn);
15247 for (i = recog_data.n_operands - 1; i >= 0; --i)
15248 if (CONSTANT_P (recog_data.operand[i]))
15249 {
15250 gcc_assert (!len);
15251 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15252 len = 1;
15253 else
15254 {
15255 switch (get_attr_mode (insn))
15256 {
15257 case MODE_QI:
15258 len+=1;
15259 break;
15260 case MODE_HI:
15261 len+=2;
15262 break;
15263 case MODE_SI:
15264 len+=4;
15265 break;
15266 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15267 case MODE_DI:
15268 len+=4;
15269 break;
15270 default:
15271 fatal_insn ("unknown insn mode", insn);
15272 }
15273 }
15274 }
15275 return len;
15276 }
15277 /* Compute default value for "length_address" attribute. */
15278 int
15279 ix86_attr_length_address_default (rtx insn)
15280 {
15281 int i;
15282
15283 if (get_attr_type (insn) == TYPE_LEA)
15284 {
15285 rtx set = PATTERN (insn);
15286
15287 if (GET_CODE (set) == PARALLEL)
15288 set = XVECEXP (set, 0, 0);
15289
15290 gcc_assert (GET_CODE (set) == SET);
15291
15292 return memory_address_length (SET_SRC (set));
15293 }
15294
15295 extract_insn_cached (insn);
15296 for (i = recog_data.n_operands - 1; i >= 0; --i)
15297 if (MEM_P (recog_data.operand[i]))
15298 {
15299 return memory_address_length (XEXP (recog_data.operand[i], 0));
15300 break;
15301 }
15302 return 0;
15303 }
15304 \f
15305 /* Return the maximum number of instructions a cpu can issue. */
15306
15307 static int
15308 ix86_issue_rate (void)
15309 {
15310 switch (ix86_tune)
15311 {
15312 case PROCESSOR_PENTIUM:
15313 case PROCESSOR_K6:
15314 return 2;
15315
15316 case PROCESSOR_PENTIUMPRO:
15317 case PROCESSOR_PENTIUM4:
15318 case PROCESSOR_ATHLON:
15319 case PROCESSOR_K8:
15320 case PROCESSOR_AMDFAM10:
15321 case PROCESSOR_NOCONA:
15322 case PROCESSOR_GENERIC32:
15323 case PROCESSOR_GENERIC64:
15324 return 3;
15325
15326 case PROCESSOR_CORE2:
15327 return 4;
15328
15329 default:
15330 return 1;
15331 }
15332 }
15333
15334 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15335 by DEP_INSN and nothing set by DEP_INSN. */
15336
15337 static int
15338 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15339 {
15340 rtx set, set2;
15341
15342 /* Simplify the test for uninteresting insns. */
15343 if (insn_type != TYPE_SETCC
15344 && insn_type != TYPE_ICMOV
15345 && insn_type != TYPE_FCMOV
15346 && insn_type != TYPE_IBR)
15347 return 0;
15348
15349 if ((set = single_set (dep_insn)) != 0)
15350 {
15351 set = SET_DEST (set);
15352 set2 = NULL_RTX;
15353 }
15354 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15355 && XVECLEN (PATTERN (dep_insn), 0) == 2
15356 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15357 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15358 {
15359 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15360 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15361 }
15362 else
15363 return 0;
15364
15365 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15366 return 0;
15367
15368 /* This test is true if the dependent insn reads the flags but
15369 not any other potentially set register. */
15370 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15371 return 0;
15372
15373 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15374 return 0;
15375
15376 return 1;
15377 }
15378
15379 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15380 address with operands set by DEP_INSN. */
15381
15382 static int
15383 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15384 {
15385 rtx addr;
15386
15387 if (insn_type == TYPE_LEA
15388 && TARGET_PENTIUM)
15389 {
15390 addr = PATTERN (insn);
15391
15392 if (GET_CODE (addr) == PARALLEL)
15393 addr = XVECEXP (addr, 0, 0);
15394
15395 gcc_assert (GET_CODE (addr) == SET);
15396
15397 addr = SET_SRC (addr);
15398 }
15399 else
15400 {
15401 int i;
15402 extract_insn_cached (insn);
15403 for (i = recog_data.n_operands - 1; i >= 0; --i)
15404 if (MEM_P (recog_data.operand[i]))
15405 {
15406 addr = XEXP (recog_data.operand[i], 0);
15407 goto found;
15408 }
15409 return 0;
15410 found:;
15411 }
15412
15413 return modified_in_p (addr, dep_insn);
15414 }
15415
15416 static int
15417 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15418 {
15419 enum attr_type insn_type, dep_insn_type;
15420 enum attr_memory memory;
15421 rtx set, set2;
15422 int dep_insn_code_number;
15423
15424 /* Anti and output dependencies have zero cost on all CPUs. */
15425 if (REG_NOTE_KIND (link) != 0)
15426 return 0;
15427
15428 dep_insn_code_number = recog_memoized (dep_insn);
15429
15430 /* If we can't recognize the insns, we can't really do anything. */
15431 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15432 return cost;
15433
15434 insn_type = get_attr_type (insn);
15435 dep_insn_type = get_attr_type (dep_insn);
15436
15437 switch (ix86_tune)
15438 {
15439 case PROCESSOR_PENTIUM:
15440 /* Address Generation Interlock adds a cycle of latency. */
15441 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15442 cost += 1;
15443
15444 /* ??? Compares pair with jump/setcc. */
15445 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15446 cost = 0;
15447
15448 /* Floating point stores require value to be ready one cycle earlier. */
15449 if (insn_type == TYPE_FMOV
15450 && get_attr_memory (insn) == MEMORY_STORE
15451 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15452 cost += 1;
15453 break;
15454
15455 case PROCESSOR_PENTIUMPRO:
15456 memory = get_attr_memory (insn);
15457
15458 /* INT->FP conversion is expensive. */
15459 if (get_attr_fp_int_src (dep_insn))
15460 cost += 5;
15461
15462 /* There is one cycle extra latency between an FP op and a store. */
15463 if (insn_type == TYPE_FMOV
15464 && (set = single_set (dep_insn)) != NULL_RTX
15465 && (set2 = single_set (insn)) != NULL_RTX
15466 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15467 && MEM_P (SET_DEST (set2)))
15468 cost += 1;
15469
15470 /* Show ability of reorder buffer to hide latency of load by executing
15471 in parallel with previous instruction in case
15472 previous instruction is not needed to compute the address. */
15473 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15474 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15475 {
15476 /* Claim moves to take one cycle, as core can issue one load
15477 at time and the next load can start cycle later. */
15478 if (dep_insn_type == TYPE_IMOV
15479 || dep_insn_type == TYPE_FMOV)
15480 cost = 1;
15481 else if (cost > 1)
15482 cost--;
15483 }
15484 break;
15485
15486 case PROCESSOR_K6:
15487 memory = get_attr_memory (insn);
15488
15489 /* The esp dependency is resolved before the instruction is really
15490 finished. */
15491 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15492 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15493 return 1;
15494
15495 /* INT->FP conversion is expensive. */
15496 if (get_attr_fp_int_src (dep_insn))
15497 cost += 5;
15498
15499 /* Show ability of reorder buffer to hide latency of load by executing
15500 in parallel with previous instruction in case
15501 previous instruction is not needed to compute the address. */
15502 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15503 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15504 {
15505 /* Claim moves to take one cycle, as core can issue one load
15506 at time and the next load can start cycle later. */
15507 if (dep_insn_type == TYPE_IMOV
15508 || dep_insn_type == TYPE_FMOV)
15509 cost = 1;
15510 else if (cost > 2)
15511 cost -= 2;
15512 else
15513 cost = 1;
15514 }
15515 break;
15516
15517 case PROCESSOR_ATHLON:
15518 case PROCESSOR_K8:
15519 case PROCESSOR_AMDFAM10:
15520 case PROCESSOR_GENERIC32:
15521 case PROCESSOR_GENERIC64:
15522 memory = get_attr_memory (insn);
15523
15524 /* Show ability of reorder buffer to hide latency of load by executing
15525 in parallel with previous instruction in case
15526 previous instruction is not needed to compute the address. */
15527 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15528 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15529 {
15530 enum attr_unit unit = get_attr_unit (insn);
15531 int loadcost = 3;
15532
15533 /* Because of the difference between the length of integer and
15534 floating unit pipeline preparation stages, the memory operands
15535 for floating point are cheaper.
15536
15537 ??? For Athlon it the difference is most probably 2. */
15538 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15539 loadcost = 3;
15540 else
15541 loadcost = TARGET_ATHLON ? 2 : 0;
15542
15543 if (cost >= loadcost)
15544 cost -= loadcost;
15545 else
15546 cost = 0;
15547 }
15548
15549 default:
15550 break;
15551 }
15552
15553 return cost;
15554 }
15555
15556 /* How many alternative schedules to try. This should be as wide as the
15557 scheduling freedom in the DFA, but no wider. Making this value too
15558 large results extra work for the scheduler. */
15559
15560 static int
15561 ia32_multipass_dfa_lookahead (void)
15562 {
15563 if (ix86_tune == PROCESSOR_PENTIUM)
15564 return 2;
15565
15566 if (ix86_tune == PROCESSOR_PENTIUMPRO
15567 || ix86_tune == PROCESSOR_K6)
15568 return 1;
15569
15570 else
15571 return 0;
15572 }
15573
15574 \f
15575 /* Compute the alignment given to a constant that is being placed in memory.
15576 EXP is the constant and ALIGN is the alignment that the object would
15577 ordinarily have.
15578 The value of this function is used instead of that alignment to align
15579 the object. */
15580
15581 int
15582 ix86_constant_alignment (tree exp, int align)
15583 {
15584 if (TREE_CODE (exp) == REAL_CST)
15585 {
15586 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15587 return 64;
15588 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15589 return 128;
15590 }
15591 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15592 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15593 return BITS_PER_WORD;
15594
15595 return align;
15596 }
15597
15598 /* Compute the alignment for a static variable.
15599 TYPE is the data type, and ALIGN is the alignment that
15600 the object would ordinarily have. The value of this function is used
15601 instead of that alignment to align the object. */
15602
15603 int
15604 ix86_data_alignment (tree type, int align)
15605 {
15606 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15607
15608 if (AGGREGATE_TYPE_P (type)
15609 && TYPE_SIZE (type)
15610 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15611 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15612 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15613 && align < max_align)
15614 align = max_align;
15615
15616 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15617 to 16byte boundary. */
15618 if (TARGET_64BIT)
15619 {
15620 if (AGGREGATE_TYPE_P (type)
15621 && TYPE_SIZE (type)
15622 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15623 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15624 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15625 return 128;
15626 }
15627
15628 if (TREE_CODE (type) == ARRAY_TYPE)
15629 {
15630 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15631 return 64;
15632 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15633 return 128;
15634 }
15635 else if (TREE_CODE (type) == COMPLEX_TYPE)
15636 {
15637
15638 if (TYPE_MODE (type) == DCmode && align < 64)
15639 return 64;
15640 if (TYPE_MODE (type) == XCmode && align < 128)
15641 return 128;
15642 }
15643 else if ((TREE_CODE (type) == RECORD_TYPE
15644 || TREE_CODE (type) == UNION_TYPE
15645 || TREE_CODE (type) == QUAL_UNION_TYPE)
15646 && TYPE_FIELDS (type))
15647 {
15648 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15649 return 64;
15650 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15651 return 128;
15652 }
15653 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15654 || TREE_CODE (type) == INTEGER_TYPE)
15655 {
15656 if (TYPE_MODE (type) == DFmode && align < 64)
15657 return 64;
15658 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15659 return 128;
15660 }
15661
15662 return align;
15663 }
15664
15665 /* Compute the alignment for a local variable.
15666 TYPE is the data type, and ALIGN is the alignment that
15667 the object would ordinarily have. The value of this macro is used
15668 instead of that alignment to align the object. */
15669
15670 int
15671 ix86_local_alignment (tree type, int align)
15672 {
15673 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15674 to 16byte boundary. */
15675 if (TARGET_64BIT)
15676 {
15677 if (AGGREGATE_TYPE_P (type)
15678 && TYPE_SIZE (type)
15679 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15680 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15681 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15682 return 128;
15683 }
15684 if (TREE_CODE (type) == ARRAY_TYPE)
15685 {
15686 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15687 return 64;
15688 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15689 return 128;
15690 }
15691 else if (TREE_CODE (type) == COMPLEX_TYPE)
15692 {
15693 if (TYPE_MODE (type) == DCmode && align < 64)
15694 return 64;
15695 if (TYPE_MODE (type) == XCmode && align < 128)
15696 return 128;
15697 }
15698 else if ((TREE_CODE (type) == RECORD_TYPE
15699 || TREE_CODE (type) == UNION_TYPE
15700 || TREE_CODE (type) == QUAL_UNION_TYPE)
15701 && TYPE_FIELDS (type))
15702 {
15703 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15704 return 64;
15705 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15706 return 128;
15707 }
15708 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15709 || TREE_CODE (type) == INTEGER_TYPE)
15710 {
15711
15712 if (TYPE_MODE (type) == DFmode && align < 64)
15713 return 64;
15714 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15715 return 128;
15716 }
15717 return align;
15718 }
15719 \f
15720 /* Emit RTL insns to initialize the variable parts of a trampoline.
15721 FNADDR is an RTX for the address of the function's pure code.
15722 CXT is an RTX for the static chain value for the function. */
15723 void
15724 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15725 {
15726 if (!TARGET_64BIT)
15727 {
15728 /* Compute offset from the end of the jmp to the target function. */
15729 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15730 plus_constant (tramp, 10),
15731 NULL_RTX, 1, OPTAB_DIRECT);
15732 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15733 gen_int_mode (0xb9, QImode));
15734 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15735 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15736 gen_int_mode (0xe9, QImode));
15737 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15738 }
15739 else
15740 {
15741 int offset = 0;
15742 /* Try to load address using shorter movl instead of movabs.
15743 We may want to support movq for kernel mode, but kernel does not use
15744 trampolines at the moment. */
15745 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15746 {
15747 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15748 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15749 gen_int_mode (0xbb41, HImode));
15750 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15751 gen_lowpart (SImode, fnaddr));
15752 offset += 6;
15753 }
15754 else
15755 {
15756 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15757 gen_int_mode (0xbb49, HImode));
15758 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15759 fnaddr);
15760 offset += 10;
15761 }
15762 /* Load static chain using movabs to r10. */
15763 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15764 gen_int_mode (0xba49, HImode));
15765 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15766 cxt);
15767 offset += 10;
15768 /* Jump to the r11 */
15769 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15770 gen_int_mode (0xff49, HImode));
15771 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15772 gen_int_mode (0xe3, QImode));
15773 offset += 3;
15774 gcc_assert (offset <= TRAMPOLINE_SIZE);
15775 }
15776
15777 #ifdef ENABLE_EXECUTE_STACK
15778 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15779 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15780 #endif
15781 }
15782 \f
15783 /* Codes for all the SSE/MMX builtins. */
15784 enum ix86_builtins
15785 {
15786 IX86_BUILTIN_ADDPS,
15787 IX86_BUILTIN_ADDSS,
15788 IX86_BUILTIN_DIVPS,
15789 IX86_BUILTIN_DIVSS,
15790 IX86_BUILTIN_MULPS,
15791 IX86_BUILTIN_MULSS,
15792 IX86_BUILTIN_SUBPS,
15793 IX86_BUILTIN_SUBSS,
15794
15795 IX86_BUILTIN_CMPEQPS,
15796 IX86_BUILTIN_CMPLTPS,
15797 IX86_BUILTIN_CMPLEPS,
15798 IX86_BUILTIN_CMPGTPS,
15799 IX86_BUILTIN_CMPGEPS,
15800 IX86_BUILTIN_CMPNEQPS,
15801 IX86_BUILTIN_CMPNLTPS,
15802 IX86_BUILTIN_CMPNLEPS,
15803 IX86_BUILTIN_CMPNGTPS,
15804 IX86_BUILTIN_CMPNGEPS,
15805 IX86_BUILTIN_CMPORDPS,
15806 IX86_BUILTIN_CMPUNORDPS,
15807 IX86_BUILTIN_CMPEQSS,
15808 IX86_BUILTIN_CMPLTSS,
15809 IX86_BUILTIN_CMPLESS,
15810 IX86_BUILTIN_CMPNEQSS,
15811 IX86_BUILTIN_CMPNLTSS,
15812 IX86_BUILTIN_CMPNLESS,
15813 IX86_BUILTIN_CMPNGTSS,
15814 IX86_BUILTIN_CMPNGESS,
15815 IX86_BUILTIN_CMPORDSS,
15816 IX86_BUILTIN_CMPUNORDSS,
15817
15818 IX86_BUILTIN_COMIEQSS,
15819 IX86_BUILTIN_COMILTSS,
15820 IX86_BUILTIN_COMILESS,
15821 IX86_BUILTIN_COMIGTSS,
15822 IX86_BUILTIN_COMIGESS,
15823 IX86_BUILTIN_COMINEQSS,
15824 IX86_BUILTIN_UCOMIEQSS,
15825 IX86_BUILTIN_UCOMILTSS,
15826 IX86_BUILTIN_UCOMILESS,
15827 IX86_BUILTIN_UCOMIGTSS,
15828 IX86_BUILTIN_UCOMIGESS,
15829 IX86_BUILTIN_UCOMINEQSS,
15830
15831 IX86_BUILTIN_CVTPI2PS,
15832 IX86_BUILTIN_CVTPS2PI,
15833 IX86_BUILTIN_CVTSI2SS,
15834 IX86_BUILTIN_CVTSI642SS,
15835 IX86_BUILTIN_CVTSS2SI,
15836 IX86_BUILTIN_CVTSS2SI64,
15837 IX86_BUILTIN_CVTTPS2PI,
15838 IX86_BUILTIN_CVTTSS2SI,
15839 IX86_BUILTIN_CVTTSS2SI64,
15840
15841 IX86_BUILTIN_MAXPS,
15842 IX86_BUILTIN_MAXSS,
15843 IX86_BUILTIN_MINPS,
15844 IX86_BUILTIN_MINSS,
15845
15846 IX86_BUILTIN_LOADUPS,
15847 IX86_BUILTIN_STOREUPS,
15848 IX86_BUILTIN_MOVSS,
15849
15850 IX86_BUILTIN_MOVHLPS,
15851 IX86_BUILTIN_MOVLHPS,
15852 IX86_BUILTIN_LOADHPS,
15853 IX86_BUILTIN_LOADLPS,
15854 IX86_BUILTIN_STOREHPS,
15855 IX86_BUILTIN_STORELPS,
15856
15857 IX86_BUILTIN_MASKMOVQ,
15858 IX86_BUILTIN_MOVMSKPS,
15859 IX86_BUILTIN_PMOVMSKB,
15860
15861 IX86_BUILTIN_MOVNTPS,
15862 IX86_BUILTIN_MOVNTQ,
15863
15864 IX86_BUILTIN_LOADDQU,
15865 IX86_BUILTIN_STOREDQU,
15866
15867 IX86_BUILTIN_PACKSSWB,
15868 IX86_BUILTIN_PACKSSDW,
15869 IX86_BUILTIN_PACKUSWB,
15870
15871 IX86_BUILTIN_PADDB,
15872 IX86_BUILTIN_PADDW,
15873 IX86_BUILTIN_PADDD,
15874 IX86_BUILTIN_PADDQ,
15875 IX86_BUILTIN_PADDSB,
15876 IX86_BUILTIN_PADDSW,
15877 IX86_BUILTIN_PADDUSB,
15878 IX86_BUILTIN_PADDUSW,
15879 IX86_BUILTIN_PSUBB,
15880 IX86_BUILTIN_PSUBW,
15881 IX86_BUILTIN_PSUBD,
15882 IX86_BUILTIN_PSUBQ,
15883 IX86_BUILTIN_PSUBSB,
15884 IX86_BUILTIN_PSUBSW,
15885 IX86_BUILTIN_PSUBUSB,
15886 IX86_BUILTIN_PSUBUSW,
15887
15888 IX86_BUILTIN_PAND,
15889 IX86_BUILTIN_PANDN,
15890 IX86_BUILTIN_POR,
15891 IX86_BUILTIN_PXOR,
15892
15893 IX86_BUILTIN_PAVGB,
15894 IX86_BUILTIN_PAVGW,
15895
15896 IX86_BUILTIN_PCMPEQB,
15897 IX86_BUILTIN_PCMPEQW,
15898 IX86_BUILTIN_PCMPEQD,
15899 IX86_BUILTIN_PCMPGTB,
15900 IX86_BUILTIN_PCMPGTW,
15901 IX86_BUILTIN_PCMPGTD,
15902
15903 IX86_BUILTIN_PMADDWD,
15904
15905 IX86_BUILTIN_PMAXSW,
15906 IX86_BUILTIN_PMAXUB,
15907 IX86_BUILTIN_PMINSW,
15908 IX86_BUILTIN_PMINUB,
15909
15910 IX86_BUILTIN_PMULHUW,
15911 IX86_BUILTIN_PMULHW,
15912 IX86_BUILTIN_PMULLW,
15913
15914 IX86_BUILTIN_PSADBW,
15915 IX86_BUILTIN_PSHUFW,
15916
15917 IX86_BUILTIN_PSLLW,
15918 IX86_BUILTIN_PSLLD,
15919 IX86_BUILTIN_PSLLQ,
15920 IX86_BUILTIN_PSRAW,
15921 IX86_BUILTIN_PSRAD,
15922 IX86_BUILTIN_PSRLW,
15923 IX86_BUILTIN_PSRLD,
15924 IX86_BUILTIN_PSRLQ,
15925 IX86_BUILTIN_PSLLWI,
15926 IX86_BUILTIN_PSLLDI,
15927 IX86_BUILTIN_PSLLQI,
15928 IX86_BUILTIN_PSRAWI,
15929 IX86_BUILTIN_PSRADI,
15930 IX86_BUILTIN_PSRLWI,
15931 IX86_BUILTIN_PSRLDI,
15932 IX86_BUILTIN_PSRLQI,
15933
15934 IX86_BUILTIN_PUNPCKHBW,
15935 IX86_BUILTIN_PUNPCKHWD,
15936 IX86_BUILTIN_PUNPCKHDQ,
15937 IX86_BUILTIN_PUNPCKLBW,
15938 IX86_BUILTIN_PUNPCKLWD,
15939 IX86_BUILTIN_PUNPCKLDQ,
15940
15941 IX86_BUILTIN_SHUFPS,
15942
15943 IX86_BUILTIN_RCPPS,
15944 IX86_BUILTIN_RCPSS,
15945 IX86_BUILTIN_RSQRTPS,
15946 IX86_BUILTIN_RSQRTSS,
15947 IX86_BUILTIN_SQRTPS,
15948 IX86_BUILTIN_SQRTSS,
15949
15950 IX86_BUILTIN_UNPCKHPS,
15951 IX86_BUILTIN_UNPCKLPS,
15952
15953 IX86_BUILTIN_ANDPS,
15954 IX86_BUILTIN_ANDNPS,
15955 IX86_BUILTIN_ORPS,
15956 IX86_BUILTIN_XORPS,
15957
15958 IX86_BUILTIN_EMMS,
15959 IX86_BUILTIN_LDMXCSR,
15960 IX86_BUILTIN_STMXCSR,
15961 IX86_BUILTIN_SFENCE,
15962
15963 /* 3DNow! Original */
15964 IX86_BUILTIN_FEMMS,
15965 IX86_BUILTIN_PAVGUSB,
15966 IX86_BUILTIN_PF2ID,
15967 IX86_BUILTIN_PFACC,
15968 IX86_BUILTIN_PFADD,
15969 IX86_BUILTIN_PFCMPEQ,
15970 IX86_BUILTIN_PFCMPGE,
15971 IX86_BUILTIN_PFCMPGT,
15972 IX86_BUILTIN_PFMAX,
15973 IX86_BUILTIN_PFMIN,
15974 IX86_BUILTIN_PFMUL,
15975 IX86_BUILTIN_PFRCP,
15976 IX86_BUILTIN_PFRCPIT1,
15977 IX86_BUILTIN_PFRCPIT2,
15978 IX86_BUILTIN_PFRSQIT1,
15979 IX86_BUILTIN_PFRSQRT,
15980 IX86_BUILTIN_PFSUB,
15981 IX86_BUILTIN_PFSUBR,
15982 IX86_BUILTIN_PI2FD,
15983 IX86_BUILTIN_PMULHRW,
15984
15985 /* 3DNow! Athlon Extensions */
15986 IX86_BUILTIN_PF2IW,
15987 IX86_BUILTIN_PFNACC,
15988 IX86_BUILTIN_PFPNACC,
15989 IX86_BUILTIN_PI2FW,
15990 IX86_BUILTIN_PSWAPDSI,
15991 IX86_BUILTIN_PSWAPDSF,
15992
15993 /* SSE2 */
15994 IX86_BUILTIN_ADDPD,
15995 IX86_BUILTIN_ADDSD,
15996 IX86_BUILTIN_DIVPD,
15997 IX86_BUILTIN_DIVSD,
15998 IX86_BUILTIN_MULPD,
15999 IX86_BUILTIN_MULSD,
16000 IX86_BUILTIN_SUBPD,
16001 IX86_BUILTIN_SUBSD,
16002
16003 IX86_BUILTIN_CMPEQPD,
16004 IX86_BUILTIN_CMPLTPD,
16005 IX86_BUILTIN_CMPLEPD,
16006 IX86_BUILTIN_CMPGTPD,
16007 IX86_BUILTIN_CMPGEPD,
16008 IX86_BUILTIN_CMPNEQPD,
16009 IX86_BUILTIN_CMPNLTPD,
16010 IX86_BUILTIN_CMPNLEPD,
16011 IX86_BUILTIN_CMPNGTPD,
16012 IX86_BUILTIN_CMPNGEPD,
16013 IX86_BUILTIN_CMPORDPD,
16014 IX86_BUILTIN_CMPUNORDPD,
16015 IX86_BUILTIN_CMPNEPD,
16016 IX86_BUILTIN_CMPEQSD,
16017 IX86_BUILTIN_CMPLTSD,
16018 IX86_BUILTIN_CMPLESD,
16019 IX86_BUILTIN_CMPNEQSD,
16020 IX86_BUILTIN_CMPNLTSD,
16021 IX86_BUILTIN_CMPNLESD,
16022 IX86_BUILTIN_CMPORDSD,
16023 IX86_BUILTIN_CMPUNORDSD,
16024 IX86_BUILTIN_CMPNESD,
16025
16026 IX86_BUILTIN_COMIEQSD,
16027 IX86_BUILTIN_COMILTSD,
16028 IX86_BUILTIN_COMILESD,
16029 IX86_BUILTIN_COMIGTSD,
16030 IX86_BUILTIN_COMIGESD,
16031 IX86_BUILTIN_COMINEQSD,
16032 IX86_BUILTIN_UCOMIEQSD,
16033 IX86_BUILTIN_UCOMILTSD,
16034 IX86_BUILTIN_UCOMILESD,
16035 IX86_BUILTIN_UCOMIGTSD,
16036 IX86_BUILTIN_UCOMIGESD,
16037 IX86_BUILTIN_UCOMINEQSD,
16038
16039 IX86_BUILTIN_MAXPD,
16040 IX86_BUILTIN_MAXSD,
16041 IX86_BUILTIN_MINPD,
16042 IX86_BUILTIN_MINSD,
16043
16044 IX86_BUILTIN_ANDPD,
16045 IX86_BUILTIN_ANDNPD,
16046 IX86_BUILTIN_ORPD,
16047 IX86_BUILTIN_XORPD,
16048
16049 IX86_BUILTIN_SQRTPD,
16050 IX86_BUILTIN_SQRTSD,
16051
16052 IX86_BUILTIN_UNPCKHPD,
16053 IX86_BUILTIN_UNPCKLPD,
16054
16055 IX86_BUILTIN_SHUFPD,
16056
16057 IX86_BUILTIN_LOADUPD,
16058 IX86_BUILTIN_STOREUPD,
16059 IX86_BUILTIN_MOVSD,
16060
16061 IX86_BUILTIN_LOADHPD,
16062 IX86_BUILTIN_LOADLPD,
16063
16064 IX86_BUILTIN_CVTDQ2PD,
16065 IX86_BUILTIN_CVTDQ2PS,
16066
16067 IX86_BUILTIN_CVTPD2DQ,
16068 IX86_BUILTIN_CVTPD2PI,
16069 IX86_BUILTIN_CVTPD2PS,
16070 IX86_BUILTIN_CVTTPD2DQ,
16071 IX86_BUILTIN_CVTTPD2PI,
16072
16073 IX86_BUILTIN_CVTPI2PD,
16074 IX86_BUILTIN_CVTSI2SD,
16075 IX86_BUILTIN_CVTSI642SD,
16076
16077 IX86_BUILTIN_CVTSD2SI,
16078 IX86_BUILTIN_CVTSD2SI64,
16079 IX86_BUILTIN_CVTSD2SS,
16080 IX86_BUILTIN_CVTSS2SD,
16081 IX86_BUILTIN_CVTTSD2SI,
16082 IX86_BUILTIN_CVTTSD2SI64,
16083
16084 IX86_BUILTIN_CVTPS2DQ,
16085 IX86_BUILTIN_CVTPS2PD,
16086 IX86_BUILTIN_CVTTPS2DQ,
16087
16088 IX86_BUILTIN_MOVNTI,
16089 IX86_BUILTIN_MOVNTPD,
16090 IX86_BUILTIN_MOVNTDQ,
16091
16092 /* SSE2 MMX */
16093 IX86_BUILTIN_MASKMOVDQU,
16094 IX86_BUILTIN_MOVMSKPD,
16095 IX86_BUILTIN_PMOVMSKB128,
16096
16097 IX86_BUILTIN_PACKSSWB128,
16098 IX86_BUILTIN_PACKSSDW128,
16099 IX86_BUILTIN_PACKUSWB128,
16100
16101 IX86_BUILTIN_PADDB128,
16102 IX86_BUILTIN_PADDW128,
16103 IX86_BUILTIN_PADDD128,
16104 IX86_BUILTIN_PADDQ128,
16105 IX86_BUILTIN_PADDSB128,
16106 IX86_BUILTIN_PADDSW128,
16107 IX86_BUILTIN_PADDUSB128,
16108 IX86_BUILTIN_PADDUSW128,
16109 IX86_BUILTIN_PSUBB128,
16110 IX86_BUILTIN_PSUBW128,
16111 IX86_BUILTIN_PSUBD128,
16112 IX86_BUILTIN_PSUBQ128,
16113 IX86_BUILTIN_PSUBSB128,
16114 IX86_BUILTIN_PSUBSW128,
16115 IX86_BUILTIN_PSUBUSB128,
16116 IX86_BUILTIN_PSUBUSW128,
16117
16118 IX86_BUILTIN_PAND128,
16119 IX86_BUILTIN_PANDN128,
16120 IX86_BUILTIN_POR128,
16121 IX86_BUILTIN_PXOR128,
16122
16123 IX86_BUILTIN_PAVGB128,
16124 IX86_BUILTIN_PAVGW128,
16125
16126 IX86_BUILTIN_PCMPEQB128,
16127 IX86_BUILTIN_PCMPEQW128,
16128 IX86_BUILTIN_PCMPEQD128,
16129 IX86_BUILTIN_PCMPGTB128,
16130 IX86_BUILTIN_PCMPGTW128,
16131 IX86_BUILTIN_PCMPGTD128,
16132
16133 IX86_BUILTIN_PMADDWD128,
16134
16135 IX86_BUILTIN_PMAXSW128,
16136 IX86_BUILTIN_PMAXUB128,
16137 IX86_BUILTIN_PMINSW128,
16138 IX86_BUILTIN_PMINUB128,
16139
16140 IX86_BUILTIN_PMULUDQ,
16141 IX86_BUILTIN_PMULUDQ128,
16142 IX86_BUILTIN_PMULHUW128,
16143 IX86_BUILTIN_PMULHW128,
16144 IX86_BUILTIN_PMULLW128,
16145
16146 IX86_BUILTIN_PSADBW128,
16147 IX86_BUILTIN_PSHUFHW,
16148 IX86_BUILTIN_PSHUFLW,
16149 IX86_BUILTIN_PSHUFD,
16150
16151 IX86_BUILTIN_PSLLW128,
16152 IX86_BUILTIN_PSLLD128,
16153 IX86_BUILTIN_PSLLQ128,
16154 IX86_BUILTIN_PSRAW128,
16155 IX86_BUILTIN_PSRAD128,
16156 IX86_BUILTIN_PSRLW128,
16157 IX86_BUILTIN_PSRLD128,
16158 IX86_BUILTIN_PSRLQ128,
16159 IX86_BUILTIN_PSLLDQI128,
16160 IX86_BUILTIN_PSLLWI128,
16161 IX86_BUILTIN_PSLLDI128,
16162 IX86_BUILTIN_PSLLQI128,
16163 IX86_BUILTIN_PSRAWI128,
16164 IX86_BUILTIN_PSRADI128,
16165 IX86_BUILTIN_PSRLDQI128,
16166 IX86_BUILTIN_PSRLWI128,
16167 IX86_BUILTIN_PSRLDI128,
16168 IX86_BUILTIN_PSRLQI128,
16169
16170 IX86_BUILTIN_PUNPCKHBW128,
16171 IX86_BUILTIN_PUNPCKHWD128,
16172 IX86_BUILTIN_PUNPCKHDQ128,
16173 IX86_BUILTIN_PUNPCKHQDQ128,
16174 IX86_BUILTIN_PUNPCKLBW128,
16175 IX86_BUILTIN_PUNPCKLWD128,
16176 IX86_BUILTIN_PUNPCKLDQ128,
16177 IX86_BUILTIN_PUNPCKLQDQ128,
16178
16179 IX86_BUILTIN_CLFLUSH,
16180 IX86_BUILTIN_MFENCE,
16181 IX86_BUILTIN_LFENCE,
16182
16183 /* Prescott New Instructions. */
16184 IX86_BUILTIN_ADDSUBPS,
16185 IX86_BUILTIN_HADDPS,
16186 IX86_BUILTIN_HSUBPS,
16187 IX86_BUILTIN_MOVSHDUP,
16188 IX86_BUILTIN_MOVSLDUP,
16189 IX86_BUILTIN_ADDSUBPD,
16190 IX86_BUILTIN_HADDPD,
16191 IX86_BUILTIN_HSUBPD,
16192 IX86_BUILTIN_LDDQU,
16193
16194 IX86_BUILTIN_MONITOR,
16195 IX86_BUILTIN_MWAIT,
16196
16197 /* SSSE3. */
16198 IX86_BUILTIN_PHADDW,
16199 IX86_BUILTIN_PHADDD,
16200 IX86_BUILTIN_PHADDSW,
16201 IX86_BUILTIN_PHSUBW,
16202 IX86_BUILTIN_PHSUBD,
16203 IX86_BUILTIN_PHSUBSW,
16204 IX86_BUILTIN_PMADDUBSW,
16205 IX86_BUILTIN_PMULHRSW,
16206 IX86_BUILTIN_PSHUFB,
16207 IX86_BUILTIN_PSIGNB,
16208 IX86_BUILTIN_PSIGNW,
16209 IX86_BUILTIN_PSIGND,
16210 IX86_BUILTIN_PALIGNR,
16211 IX86_BUILTIN_PABSB,
16212 IX86_BUILTIN_PABSW,
16213 IX86_BUILTIN_PABSD,
16214
16215 IX86_BUILTIN_PHADDW128,
16216 IX86_BUILTIN_PHADDD128,
16217 IX86_BUILTIN_PHADDSW128,
16218 IX86_BUILTIN_PHSUBW128,
16219 IX86_BUILTIN_PHSUBD128,
16220 IX86_BUILTIN_PHSUBSW128,
16221 IX86_BUILTIN_PMADDUBSW128,
16222 IX86_BUILTIN_PMULHRSW128,
16223 IX86_BUILTIN_PSHUFB128,
16224 IX86_BUILTIN_PSIGNB128,
16225 IX86_BUILTIN_PSIGNW128,
16226 IX86_BUILTIN_PSIGND128,
16227 IX86_BUILTIN_PALIGNR128,
16228 IX86_BUILTIN_PABSB128,
16229 IX86_BUILTIN_PABSW128,
16230 IX86_BUILTIN_PABSD128,
16231
16232 /* AMDFAM10 - SSE4A New Instructions. */
16233 IX86_BUILTIN_MOVNTSD,
16234 IX86_BUILTIN_MOVNTSS,
16235 IX86_BUILTIN_EXTRQI,
16236 IX86_BUILTIN_EXTRQ,
16237 IX86_BUILTIN_INSERTQI,
16238 IX86_BUILTIN_INSERTQ,
16239
16240 IX86_BUILTIN_VEC_INIT_V2SI,
16241 IX86_BUILTIN_VEC_INIT_V4HI,
16242 IX86_BUILTIN_VEC_INIT_V8QI,
16243 IX86_BUILTIN_VEC_EXT_V2DF,
16244 IX86_BUILTIN_VEC_EXT_V2DI,
16245 IX86_BUILTIN_VEC_EXT_V4SF,
16246 IX86_BUILTIN_VEC_EXT_V4SI,
16247 IX86_BUILTIN_VEC_EXT_V8HI,
16248 IX86_BUILTIN_VEC_EXT_V2SI,
16249 IX86_BUILTIN_VEC_EXT_V4HI,
16250 IX86_BUILTIN_VEC_SET_V8HI,
16251 IX86_BUILTIN_VEC_SET_V4HI,
16252
16253 IX86_BUILTIN_MAX
16254 };
16255
16256 /* Table for the ix86 builtin decls. */
16257 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16258
16259 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16260 * if the target_flags include one of MASK. Stores the function decl
16261 * in the ix86_builtins array.
16262 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16263
16264 static inline tree
16265 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16266 {
16267 tree decl = NULL_TREE;
16268
16269 if (mask & target_flags
16270 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16271 {
16272 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16273 NULL, NULL_TREE);
16274 ix86_builtins[(int) code] = decl;
16275 }
16276
16277 return decl;
16278 }
16279
16280 /* Like def_builtin, but also marks the function decl "const". */
16281
16282 static inline tree
16283 def_builtin_const (int mask, const char *name, tree type,
16284 enum ix86_builtins code)
16285 {
16286 tree decl = def_builtin (mask, name, type, code);
16287 if (decl)
16288 TREE_READONLY (decl) = 1;
16289 return decl;
16290 }
16291
16292 /* Bits for builtin_description.flag. */
16293
16294 /* Set when we don't support the comparison natively, and should
16295 swap_comparison in order to support it. */
16296 #define BUILTIN_DESC_SWAP_OPERANDS 1
16297
16298 struct builtin_description
16299 {
16300 const unsigned int mask;
16301 const enum insn_code icode;
16302 const char *const name;
16303 const enum ix86_builtins code;
16304 const enum rtx_code comparison;
16305 const unsigned int flag;
16306 };
16307
16308 static const struct builtin_description bdesc_comi[] =
16309 {
16310 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16311 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16312 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16313 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16314 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16315 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16316 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16317 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16318 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16319 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16320 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16321 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16322 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16323 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16324 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16325 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16326 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16327 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16328 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16329 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16330 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16331 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16332 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16333 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16334 };
16335
16336 static const struct builtin_description bdesc_2arg[] =
16337 {
16338 /* SSE */
16339 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16340 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16341 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16342 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16343 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16344 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16345 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16346 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16347
16348 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16349 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16350 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16351 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16352 BUILTIN_DESC_SWAP_OPERANDS },
16353 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16354 BUILTIN_DESC_SWAP_OPERANDS },
16355 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16356 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16357 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16358 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16359 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16360 BUILTIN_DESC_SWAP_OPERANDS },
16361 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16362 BUILTIN_DESC_SWAP_OPERANDS },
16363 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16364 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16365 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16366 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16367 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16368 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16369 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16370 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16371 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16372 BUILTIN_DESC_SWAP_OPERANDS },
16373 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16374 BUILTIN_DESC_SWAP_OPERANDS },
16375 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16376
16377 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16378 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16379 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16380 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16381
16382 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16383 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16384 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16385 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16386
16387 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16388 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16389 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16390 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16391 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16392
16393 /* MMX */
16394 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16395 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16396 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16397 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16398 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16399 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16400 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16401 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16402
16403 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16404 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16405 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16406 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16407 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16408 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16409 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16410 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16411
16412 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16413 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16414 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16415
16416 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16417 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16418 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16419 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16420
16421 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16422 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16423
16424 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16425 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16426 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16427 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16428 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16429 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16430
16431 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16432 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16433 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16434 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16435
16436 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16437 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16438 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16439 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16440 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16441 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16442
16443 /* Special. */
16444 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16445 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16446 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16447
16448 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16449 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16450 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16451
16452 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16453 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16454 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16455 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16456 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16457 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16458
16459 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16460 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16461 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16462 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16463 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16464 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16465
16466 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16467 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16468 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16469 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16470
16471 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16472 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16473
16474 /* SSE2 */
16475 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16476 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16477 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16478 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16479 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16480 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16481 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16482 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16483
16484 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16485 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16486 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16487 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16488 BUILTIN_DESC_SWAP_OPERANDS },
16489 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16490 BUILTIN_DESC_SWAP_OPERANDS },
16491 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16492 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16493 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16494 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16495 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16496 BUILTIN_DESC_SWAP_OPERANDS },
16497 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16498 BUILTIN_DESC_SWAP_OPERANDS },
16499 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16500 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16501 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16502 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16503 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16504 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16505 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16506 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16507 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16508
16509 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16510 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16511 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16512 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16513
16514 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16515 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16516 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16517 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16518
16519 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16520 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16521 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16522
16523 /* SSE2 MMX */
16524 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16525 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16526 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16527 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16528 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16529 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16530 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16531 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16532
16533 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16534 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16535 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16536 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16537 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16538 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16539 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16540 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16541
16542 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16543 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16544
16545 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16546 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16547 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16548 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16549
16550 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16551 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16552
16553 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16554 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16555 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16556 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16557 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16558 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16559
16560 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16561 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16562 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16563 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16564
16565 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16566 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16567 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16568 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16569 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16570 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16571 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16572 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16573
16574 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16575 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16576 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16577
16578 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16579 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16580
16581 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16582 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16583
16584 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16585 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16586 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16587
16588 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16589 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16590 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16591
16592 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16593 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16594
16595 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16596
16597 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16598 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16599 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16600 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16601
16602 /* SSE3 MMX */
16603 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16604 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16605 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16606 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16607 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16608 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16609
16610 /* SSSE3 */
16611 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16612 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16613 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16614 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16615 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16616 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16617 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16618 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16619 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16620 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16621 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16622 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16623 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16624 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16625 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16626 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16627 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16628 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16629 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16630 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16631 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16632 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16633 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16634 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16635 };
16636
16637 static const struct builtin_description bdesc_1arg[] =
16638 {
16639 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16640 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16641
16642 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16643 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16644 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16645
16646 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16647 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16648 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16649 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16650 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16651 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16652
16653 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16654 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16655
16656 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16657
16658 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16659 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16660
16661 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16662 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16663 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16664 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16665 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16666
16667 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16668
16669 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16670 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16671 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16672 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16673
16674 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16675 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16676 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16677
16678 /* SSE3 */
16679 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16680 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16681
16682 /* SSSE3 */
16683 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16684 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16685 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16686 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16687 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16688 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16689 };
16690
16691 static void
16692 ix86_init_builtins (void)
16693 {
16694 if (TARGET_MMX)
16695 ix86_init_mmx_sse_builtins ();
16696 }
16697
16698 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16699 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16700 builtins. */
16701 static void
16702 ix86_init_mmx_sse_builtins (void)
16703 {
16704 const struct builtin_description * d;
16705 size_t i;
16706
16707 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16708 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16709 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16710 tree V2DI_type_node
16711 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16712 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16713 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16714 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16715 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16716 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16717 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16718
16719 tree pchar_type_node = build_pointer_type (char_type_node);
16720 tree pcchar_type_node = build_pointer_type (
16721 build_type_variant (char_type_node, 1, 0));
16722 tree pfloat_type_node = build_pointer_type (float_type_node);
16723 tree pcfloat_type_node = build_pointer_type (
16724 build_type_variant (float_type_node, 1, 0));
16725 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16726 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16727 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16728
16729 /* Comparisons. */
16730 tree int_ftype_v4sf_v4sf
16731 = build_function_type_list (integer_type_node,
16732 V4SF_type_node, V4SF_type_node, NULL_TREE);
16733 tree v4si_ftype_v4sf_v4sf
16734 = build_function_type_list (V4SI_type_node,
16735 V4SF_type_node, V4SF_type_node, NULL_TREE);
16736 /* MMX/SSE/integer conversions. */
16737 tree int_ftype_v4sf
16738 = build_function_type_list (integer_type_node,
16739 V4SF_type_node, NULL_TREE);
16740 tree int64_ftype_v4sf
16741 = build_function_type_list (long_long_integer_type_node,
16742 V4SF_type_node, NULL_TREE);
16743 tree int_ftype_v8qi
16744 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16745 tree v4sf_ftype_v4sf_int
16746 = build_function_type_list (V4SF_type_node,
16747 V4SF_type_node, integer_type_node, NULL_TREE);
16748 tree v4sf_ftype_v4sf_int64
16749 = build_function_type_list (V4SF_type_node,
16750 V4SF_type_node, long_long_integer_type_node,
16751 NULL_TREE);
16752 tree v4sf_ftype_v4sf_v2si
16753 = build_function_type_list (V4SF_type_node,
16754 V4SF_type_node, V2SI_type_node, NULL_TREE);
16755
16756 /* Miscellaneous. */
16757 tree v8qi_ftype_v4hi_v4hi
16758 = build_function_type_list (V8QI_type_node,
16759 V4HI_type_node, V4HI_type_node, NULL_TREE);
16760 tree v4hi_ftype_v2si_v2si
16761 = build_function_type_list (V4HI_type_node,
16762 V2SI_type_node, V2SI_type_node, NULL_TREE);
16763 tree v4sf_ftype_v4sf_v4sf_int
16764 = build_function_type_list (V4SF_type_node,
16765 V4SF_type_node, V4SF_type_node,
16766 integer_type_node, NULL_TREE);
16767 tree v2si_ftype_v4hi_v4hi
16768 = build_function_type_list (V2SI_type_node,
16769 V4HI_type_node, V4HI_type_node, NULL_TREE);
16770 tree v4hi_ftype_v4hi_int
16771 = build_function_type_list (V4HI_type_node,
16772 V4HI_type_node, integer_type_node, NULL_TREE);
16773 tree v4hi_ftype_v4hi_di
16774 = build_function_type_list (V4HI_type_node,
16775 V4HI_type_node, long_long_unsigned_type_node,
16776 NULL_TREE);
16777 tree v2si_ftype_v2si_di
16778 = build_function_type_list (V2SI_type_node,
16779 V2SI_type_node, long_long_unsigned_type_node,
16780 NULL_TREE);
16781 tree void_ftype_void
16782 = build_function_type (void_type_node, void_list_node);
16783 tree void_ftype_unsigned
16784 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16785 tree void_ftype_unsigned_unsigned
16786 = build_function_type_list (void_type_node, unsigned_type_node,
16787 unsigned_type_node, NULL_TREE);
16788 tree void_ftype_pcvoid_unsigned_unsigned
16789 = build_function_type_list (void_type_node, const_ptr_type_node,
16790 unsigned_type_node, unsigned_type_node,
16791 NULL_TREE);
16792 tree unsigned_ftype_void
16793 = build_function_type (unsigned_type_node, void_list_node);
16794 tree v2si_ftype_v4sf
16795 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16796 /* Loads/stores. */
16797 tree void_ftype_v8qi_v8qi_pchar
16798 = build_function_type_list (void_type_node,
16799 V8QI_type_node, V8QI_type_node,
16800 pchar_type_node, NULL_TREE);
16801 tree v4sf_ftype_pcfloat
16802 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16803 /* @@@ the type is bogus */
16804 tree v4sf_ftype_v4sf_pv2si
16805 = build_function_type_list (V4SF_type_node,
16806 V4SF_type_node, pv2si_type_node, NULL_TREE);
16807 tree void_ftype_pv2si_v4sf
16808 = build_function_type_list (void_type_node,
16809 pv2si_type_node, V4SF_type_node, NULL_TREE);
16810 tree void_ftype_pfloat_v4sf
16811 = build_function_type_list (void_type_node,
16812 pfloat_type_node, V4SF_type_node, NULL_TREE);
16813 tree void_ftype_pdi_di
16814 = build_function_type_list (void_type_node,
16815 pdi_type_node, long_long_unsigned_type_node,
16816 NULL_TREE);
16817 tree void_ftype_pv2di_v2di
16818 = build_function_type_list (void_type_node,
16819 pv2di_type_node, V2DI_type_node, NULL_TREE);
16820 /* Normal vector unops. */
16821 tree v4sf_ftype_v4sf
16822 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16823 tree v16qi_ftype_v16qi
16824 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16825 tree v8hi_ftype_v8hi
16826 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16827 tree v4si_ftype_v4si
16828 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16829 tree v8qi_ftype_v8qi
16830 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16831 tree v4hi_ftype_v4hi
16832 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16833
16834 /* Normal vector binops. */
16835 tree v4sf_ftype_v4sf_v4sf
16836 = build_function_type_list (V4SF_type_node,
16837 V4SF_type_node, V4SF_type_node, NULL_TREE);
16838 tree v8qi_ftype_v8qi_v8qi
16839 = build_function_type_list (V8QI_type_node,
16840 V8QI_type_node, V8QI_type_node, NULL_TREE);
16841 tree v4hi_ftype_v4hi_v4hi
16842 = build_function_type_list (V4HI_type_node,
16843 V4HI_type_node, V4HI_type_node, NULL_TREE);
16844 tree v2si_ftype_v2si_v2si
16845 = build_function_type_list (V2SI_type_node,
16846 V2SI_type_node, V2SI_type_node, NULL_TREE);
16847 tree di_ftype_di_di
16848 = build_function_type_list (long_long_unsigned_type_node,
16849 long_long_unsigned_type_node,
16850 long_long_unsigned_type_node, NULL_TREE);
16851
16852 tree di_ftype_di_di_int
16853 = build_function_type_list (long_long_unsigned_type_node,
16854 long_long_unsigned_type_node,
16855 long_long_unsigned_type_node,
16856 integer_type_node, NULL_TREE);
16857
16858 tree v2si_ftype_v2sf
16859 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16860 tree v2sf_ftype_v2si
16861 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16862 tree v2si_ftype_v2si
16863 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16864 tree v2sf_ftype_v2sf
16865 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16866 tree v2sf_ftype_v2sf_v2sf
16867 = build_function_type_list (V2SF_type_node,
16868 V2SF_type_node, V2SF_type_node, NULL_TREE);
16869 tree v2si_ftype_v2sf_v2sf
16870 = build_function_type_list (V2SI_type_node,
16871 V2SF_type_node, V2SF_type_node, NULL_TREE);
16872 tree pint_type_node = build_pointer_type (integer_type_node);
16873 tree pdouble_type_node = build_pointer_type (double_type_node);
16874 tree pcdouble_type_node = build_pointer_type (
16875 build_type_variant (double_type_node, 1, 0));
16876 tree int_ftype_v2df_v2df
16877 = build_function_type_list (integer_type_node,
16878 V2DF_type_node, V2DF_type_node, NULL_TREE);
16879
16880 tree void_ftype_pcvoid
16881 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16882 tree v4sf_ftype_v4si
16883 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16884 tree v4si_ftype_v4sf
16885 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16886 tree v2df_ftype_v4si
16887 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16888 tree v4si_ftype_v2df
16889 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16890 tree v2si_ftype_v2df
16891 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16892 tree v4sf_ftype_v2df
16893 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16894 tree v2df_ftype_v2si
16895 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16896 tree v2df_ftype_v4sf
16897 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16898 tree int_ftype_v2df
16899 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16900 tree int64_ftype_v2df
16901 = build_function_type_list (long_long_integer_type_node,
16902 V2DF_type_node, NULL_TREE);
16903 tree v2df_ftype_v2df_int
16904 = build_function_type_list (V2DF_type_node,
16905 V2DF_type_node, integer_type_node, NULL_TREE);
16906 tree v2df_ftype_v2df_int64
16907 = build_function_type_list (V2DF_type_node,
16908 V2DF_type_node, long_long_integer_type_node,
16909 NULL_TREE);
16910 tree v4sf_ftype_v4sf_v2df
16911 = build_function_type_list (V4SF_type_node,
16912 V4SF_type_node, V2DF_type_node, NULL_TREE);
16913 tree v2df_ftype_v2df_v4sf
16914 = build_function_type_list (V2DF_type_node,
16915 V2DF_type_node, V4SF_type_node, NULL_TREE);
16916 tree v2df_ftype_v2df_v2df_int
16917 = build_function_type_list (V2DF_type_node,
16918 V2DF_type_node, V2DF_type_node,
16919 integer_type_node,
16920 NULL_TREE);
16921 tree v2df_ftype_v2df_pcdouble
16922 = build_function_type_list (V2DF_type_node,
16923 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16924 tree void_ftype_pdouble_v2df
16925 = build_function_type_list (void_type_node,
16926 pdouble_type_node, V2DF_type_node, NULL_TREE);
16927 tree void_ftype_pint_int
16928 = build_function_type_list (void_type_node,
16929 pint_type_node, integer_type_node, NULL_TREE);
16930 tree void_ftype_v16qi_v16qi_pchar
16931 = build_function_type_list (void_type_node,
16932 V16QI_type_node, V16QI_type_node,
16933 pchar_type_node, NULL_TREE);
16934 tree v2df_ftype_pcdouble
16935 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16936 tree v2df_ftype_v2df_v2df
16937 = build_function_type_list (V2DF_type_node,
16938 V2DF_type_node, V2DF_type_node, NULL_TREE);
16939 tree v16qi_ftype_v16qi_v16qi
16940 = build_function_type_list (V16QI_type_node,
16941 V16QI_type_node, V16QI_type_node, NULL_TREE);
16942 tree v8hi_ftype_v8hi_v8hi
16943 = build_function_type_list (V8HI_type_node,
16944 V8HI_type_node, V8HI_type_node, NULL_TREE);
16945 tree v4si_ftype_v4si_v4si
16946 = build_function_type_list (V4SI_type_node,
16947 V4SI_type_node, V4SI_type_node, NULL_TREE);
16948 tree v2di_ftype_v2di_v2di
16949 = build_function_type_list (V2DI_type_node,
16950 V2DI_type_node, V2DI_type_node, NULL_TREE);
16951 tree v2di_ftype_v2df_v2df
16952 = build_function_type_list (V2DI_type_node,
16953 V2DF_type_node, V2DF_type_node, NULL_TREE);
16954 tree v2df_ftype_v2df
16955 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16956 tree v2di_ftype_v2di_int
16957 = build_function_type_list (V2DI_type_node,
16958 V2DI_type_node, integer_type_node, NULL_TREE);
16959 tree v2di_ftype_v2di_v2di_int
16960 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16961 V2DI_type_node, integer_type_node, NULL_TREE);
16962 tree v4si_ftype_v4si_int
16963 = build_function_type_list (V4SI_type_node,
16964 V4SI_type_node, integer_type_node, NULL_TREE);
16965 tree v8hi_ftype_v8hi_int
16966 = build_function_type_list (V8HI_type_node,
16967 V8HI_type_node, integer_type_node, NULL_TREE);
16968 tree v8hi_ftype_v8hi_v2di
16969 = build_function_type_list (V8HI_type_node,
16970 V8HI_type_node, V2DI_type_node, NULL_TREE);
16971 tree v4si_ftype_v4si_v2di
16972 = build_function_type_list (V4SI_type_node,
16973 V4SI_type_node, V2DI_type_node, NULL_TREE);
16974 tree v4si_ftype_v8hi_v8hi
16975 = build_function_type_list (V4SI_type_node,
16976 V8HI_type_node, V8HI_type_node, NULL_TREE);
16977 tree di_ftype_v8qi_v8qi
16978 = build_function_type_list (long_long_unsigned_type_node,
16979 V8QI_type_node, V8QI_type_node, NULL_TREE);
16980 tree di_ftype_v2si_v2si
16981 = build_function_type_list (long_long_unsigned_type_node,
16982 V2SI_type_node, V2SI_type_node, NULL_TREE);
16983 tree v2di_ftype_v16qi_v16qi
16984 = build_function_type_list (V2DI_type_node,
16985 V16QI_type_node, V16QI_type_node, NULL_TREE);
16986 tree v2di_ftype_v4si_v4si
16987 = build_function_type_list (V2DI_type_node,
16988 V4SI_type_node, V4SI_type_node, NULL_TREE);
16989 tree int_ftype_v16qi
16990 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16991 tree v16qi_ftype_pcchar
16992 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16993 tree void_ftype_pchar_v16qi
16994 = build_function_type_list (void_type_node,
16995 pchar_type_node, V16QI_type_node, NULL_TREE);
16996
16997 tree v2di_ftype_v2di_unsigned_unsigned
16998 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16999 unsigned_type_node, unsigned_type_node,
17000 NULL_TREE);
17001 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17002 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17003 unsigned_type_node, unsigned_type_node,
17004 NULL_TREE);
17005 tree v2di_ftype_v2di_v16qi
17006 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17007 NULL_TREE);
17008
17009 tree float80_type;
17010 tree float128_type;
17011 tree ftype;
17012
17013 /* The __float80 type. */
17014 if (TYPE_MODE (long_double_type_node) == XFmode)
17015 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17016 "__float80");
17017 else
17018 {
17019 /* The __float80 type. */
17020 float80_type = make_node (REAL_TYPE);
17021 TYPE_PRECISION (float80_type) = 80;
17022 layout_type (float80_type);
17023 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17024 }
17025
17026 if (TARGET_64BIT)
17027 {
17028 float128_type = make_node (REAL_TYPE);
17029 TYPE_PRECISION (float128_type) = 128;
17030 layout_type (float128_type);
17031 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17032 }
17033
17034 /* Add all builtins that are more or less simple operations on two
17035 operands. */
17036 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17037 {
17038 /* Use one of the operands; the target can have a different mode for
17039 mask-generating compares. */
17040 enum machine_mode mode;
17041 tree type;
17042
17043 if (d->name == 0)
17044 continue;
17045 mode = insn_data[d->icode].operand[1].mode;
17046
17047 switch (mode)
17048 {
17049 case V16QImode:
17050 type = v16qi_ftype_v16qi_v16qi;
17051 break;
17052 case V8HImode:
17053 type = v8hi_ftype_v8hi_v8hi;
17054 break;
17055 case V4SImode:
17056 type = v4si_ftype_v4si_v4si;
17057 break;
17058 case V2DImode:
17059 type = v2di_ftype_v2di_v2di;
17060 break;
17061 case V2DFmode:
17062 type = v2df_ftype_v2df_v2df;
17063 break;
17064 case V4SFmode:
17065 type = v4sf_ftype_v4sf_v4sf;
17066 break;
17067 case V8QImode:
17068 type = v8qi_ftype_v8qi_v8qi;
17069 break;
17070 case V4HImode:
17071 type = v4hi_ftype_v4hi_v4hi;
17072 break;
17073 case V2SImode:
17074 type = v2si_ftype_v2si_v2si;
17075 break;
17076 case DImode:
17077 type = di_ftype_di_di;
17078 break;
17079
17080 default:
17081 gcc_unreachable ();
17082 }
17083
17084 /* Override for comparisons. */
17085 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17086 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17087 type = v4si_ftype_v4sf_v4sf;
17088
17089 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17090 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17091 type = v2di_ftype_v2df_v2df;
17092
17093 def_builtin (d->mask, d->name, type, d->code);
17094 }
17095
17096 /* Add all builtins that are more or less simple operations on 1 operand. */
17097 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17098 {
17099 enum machine_mode mode;
17100 tree type;
17101
17102 if (d->name == 0)
17103 continue;
17104 mode = insn_data[d->icode].operand[1].mode;
17105
17106 switch (mode)
17107 {
17108 case V16QImode:
17109 type = v16qi_ftype_v16qi;
17110 break;
17111 case V8HImode:
17112 type = v8hi_ftype_v8hi;
17113 break;
17114 case V4SImode:
17115 type = v4si_ftype_v4si;
17116 break;
17117 case V2DFmode:
17118 type = v2df_ftype_v2df;
17119 break;
17120 case V4SFmode:
17121 type = v4sf_ftype_v4sf;
17122 break;
17123 case V8QImode:
17124 type = v8qi_ftype_v8qi;
17125 break;
17126 case V4HImode:
17127 type = v4hi_ftype_v4hi;
17128 break;
17129 case V2SImode:
17130 type = v2si_ftype_v2si;
17131 break;
17132
17133 default:
17134 abort ();
17135 }
17136
17137 def_builtin (d->mask, d->name, type, d->code);
17138 }
17139
17140 /* Add the remaining MMX insns with somewhat more complicated types. */
17141 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17142 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17143 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17144 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17145
17146 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17147 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17148 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17149
17150 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17151 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17152
17153 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17154 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17155
17156 /* comi/ucomi insns. */
17157 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17158 if (d->mask == MASK_SSE2)
17159 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17160 else
17161 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17162
17163 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17164 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17165 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17166
17167 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17168 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17169 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17170 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17171 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17172 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17173 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17174 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17175 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17176 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17177 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17178
17179 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17180
17181 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17182 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17183
17184 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17185 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17186 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17187 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17188
17189 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17190 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17191 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17192 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17193
17194 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17195
17196 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17197
17198 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17199 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17200 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17201 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17202 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17203 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17204
17205 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17206
17207 /* Original 3DNow! */
17208 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17209 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17210 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17211 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17212 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17213 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17214 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17215 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17216 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17217 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17218 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17219 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17220 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17221 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17222 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17223 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17224 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17225 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17226 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17227 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17228
17229 /* 3DNow! extension as used in the Athlon CPU. */
17230 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17231 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17232 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17233 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17234 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17235 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17236
17237 /* SSE2 */
17238 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17239
17240 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17241 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17242
17243 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17244 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17245
17246 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17247 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17248 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17249 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17250 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17251
17252 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17253 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17254 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17255 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17256
17257 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17258 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17259
17260 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17261
17262 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17263 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17264
17265 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17266 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17267 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17268 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17269 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17270
17271 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17272
17273 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17274 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17275 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17276 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17277
17278 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17279 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17280 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17281
17282 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17283 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17284 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17285 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17286
17287 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17288 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17289 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17290
17291 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17292 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17293
17294 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17295 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17296
17297 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17298 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17299 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17300
17301 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17302 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17303 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17304
17305 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17306 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17307
17308 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17309 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17310 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17311 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17312
17313 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17314 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17315 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17316 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17317
17318 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17319 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17320
17321 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17322
17323 /* Prescott New Instructions. */
17324 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17325 void_ftype_pcvoid_unsigned_unsigned,
17326 IX86_BUILTIN_MONITOR);
17327 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17328 void_ftype_unsigned_unsigned,
17329 IX86_BUILTIN_MWAIT);
17330 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17331 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17332
17333 /* SSSE3. */
17334 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17335 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17336 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17337 IX86_BUILTIN_PALIGNR);
17338
17339 /* AMDFAM10 SSE4A New built-ins */
17340 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17341 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17342 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17343 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17344 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17345 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17346 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17347 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17348 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17349 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17350 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17351 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17352
17353 /* Access to the vec_init patterns. */
17354 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17355 integer_type_node, NULL_TREE);
17356 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17357 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17358
17359 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17360 short_integer_type_node,
17361 short_integer_type_node,
17362 short_integer_type_node, NULL_TREE);
17363 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17364 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17365
17366 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17367 char_type_node, char_type_node,
17368 char_type_node, char_type_node,
17369 char_type_node, char_type_node,
17370 char_type_node, NULL_TREE);
17371 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17372 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17373
17374 /* Access to the vec_extract patterns. */
17375 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17376 integer_type_node, NULL_TREE);
17377 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17378 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17379
17380 ftype = build_function_type_list (long_long_integer_type_node,
17381 V2DI_type_node, integer_type_node,
17382 NULL_TREE);
17383 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17384 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17385
17386 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17387 integer_type_node, NULL_TREE);
17388 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17389 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17390
17391 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17392 integer_type_node, NULL_TREE);
17393 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17394 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17395
17396 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17397 integer_type_node, NULL_TREE);
17398 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17399 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17400
17401 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17402 integer_type_node, NULL_TREE);
17403 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17404 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17405
17406 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17407 integer_type_node, NULL_TREE);
17408 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17409 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17410
17411 /* Access to the vec_set patterns. */
17412 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17413 intHI_type_node,
17414 integer_type_node, NULL_TREE);
17415 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17416 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17417
17418 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17419 intHI_type_node,
17420 integer_type_node, NULL_TREE);
17421 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17422 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17423 }
17424
17425 /* Errors in the source file can cause expand_expr to return const0_rtx
17426 where we expect a vector. To avoid crashing, use one of the vector
17427 clear instructions. */
17428 static rtx
17429 safe_vector_operand (rtx x, enum machine_mode mode)
17430 {
17431 if (x == const0_rtx)
17432 x = CONST0_RTX (mode);
17433 return x;
17434 }
17435
17436 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17437
17438 static rtx
17439 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17440 {
17441 rtx pat, xops[3];
17442 tree arg0 = CALL_EXPR_ARG (exp, 0);
17443 tree arg1 = CALL_EXPR_ARG (exp, 1);
17444 rtx op0 = expand_normal (arg0);
17445 rtx op1 = expand_normal (arg1);
17446 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17447 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17448 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17449
17450 if (VECTOR_MODE_P (mode0))
17451 op0 = safe_vector_operand (op0, mode0);
17452 if (VECTOR_MODE_P (mode1))
17453 op1 = safe_vector_operand (op1, mode1);
17454
17455 if (optimize || !target
17456 || GET_MODE (target) != tmode
17457 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17458 target = gen_reg_rtx (tmode);
17459
17460 if (GET_MODE (op1) == SImode && mode1 == TImode)
17461 {
17462 rtx x = gen_reg_rtx (V4SImode);
17463 emit_insn (gen_sse2_loadd (x, op1));
17464 op1 = gen_lowpart (TImode, x);
17465 }
17466
17467 /* The insn must want input operands in the same modes as the
17468 result. */
17469 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17470 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17471
17472 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17473 op0 = copy_to_mode_reg (mode0, op0);
17474 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17475 op1 = copy_to_mode_reg (mode1, op1);
17476
17477 /* ??? Using ix86_fixup_binary_operands is problematic when
17478 we've got mismatched modes. Fake it. */
17479
17480 xops[0] = target;
17481 xops[1] = op0;
17482 xops[2] = op1;
17483
17484 if (tmode == mode0 && tmode == mode1)
17485 {
17486 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17487 op0 = xops[1];
17488 op1 = xops[2];
17489 }
17490 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17491 {
17492 op0 = force_reg (mode0, op0);
17493 op1 = force_reg (mode1, op1);
17494 target = gen_reg_rtx (tmode);
17495 }
17496
17497 pat = GEN_FCN (icode) (target, op0, op1);
17498 if (! pat)
17499 return 0;
17500 emit_insn (pat);
17501 return target;
17502 }
17503
17504 /* Subroutine of ix86_expand_builtin to take care of stores. */
17505
17506 static rtx
17507 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17508 {
17509 rtx pat;
17510 tree arg0 = CALL_EXPR_ARG (exp, 0);
17511 tree arg1 = CALL_EXPR_ARG (exp, 1);
17512 rtx op0 = expand_normal (arg0);
17513 rtx op1 = expand_normal (arg1);
17514 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17515 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17516
17517 if (VECTOR_MODE_P (mode1))
17518 op1 = safe_vector_operand (op1, mode1);
17519
17520 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17521 op1 = copy_to_mode_reg (mode1, op1);
17522
17523 pat = GEN_FCN (icode) (op0, op1);
17524 if (pat)
17525 emit_insn (pat);
17526 return 0;
17527 }
17528
17529 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17530
17531 static rtx
17532 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17533 rtx target, int do_load)
17534 {
17535 rtx pat;
17536 tree arg0 = CALL_EXPR_ARG (exp, 0);
17537 rtx op0 = expand_normal (arg0);
17538 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17539 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17540
17541 if (optimize || !target
17542 || GET_MODE (target) != tmode
17543 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17544 target = gen_reg_rtx (tmode);
17545 if (do_load)
17546 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17547 else
17548 {
17549 if (VECTOR_MODE_P (mode0))
17550 op0 = safe_vector_operand (op0, mode0);
17551
17552 if ((optimize && !register_operand (op0, mode0))
17553 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17554 op0 = copy_to_mode_reg (mode0, op0);
17555 }
17556
17557 pat = GEN_FCN (icode) (target, op0);
17558 if (! pat)
17559 return 0;
17560 emit_insn (pat);
17561 return target;
17562 }
17563
17564 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17565 sqrtss, rsqrtss, rcpss. */
17566
17567 static rtx
17568 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17569 {
17570 rtx pat;
17571 tree arg0 = CALL_EXPR_ARG (exp, 0);
17572 rtx op1, op0 = expand_normal (arg0);
17573 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17574 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17575
17576 if (optimize || !target
17577 || GET_MODE (target) != tmode
17578 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17579 target = gen_reg_rtx (tmode);
17580
17581 if (VECTOR_MODE_P (mode0))
17582 op0 = safe_vector_operand (op0, mode0);
17583
17584 if ((optimize && !register_operand (op0, mode0))
17585 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17586 op0 = copy_to_mode_reg (mode0, op0);
17587
17588 op1 = op0;
17589 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17590 op1 = copy_to_mode_reg (mode0, op1);
17591
17592 pat = GEN_FCN (icode) (target, op0, op1);
17593 if (! pat)
17594 return 0;
17595 emit_insn (pat);
17596 return target;
17597 }
17598
17599 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17600
17601 static rtx
17602 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17603 rtx target)
17604 {
17605 rtx pat;
17606 tree arg0 = CALL_EXPR_ARG (exp, 0);
17607 tree arg1 = CALL_EXPR_ARG (exp, 1);
17608 rtx op0 = expand_normal (arg0);
17609 rtx op1 = expand_normal (arg1);
17610 rtx op2;
17611 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17612 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17613 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17614 enum rtx_code comparison = d->comparison;
17615
17616 if (VECTOR_MODE_P (mode0))
17617 op0 = safe_vector_operand (op0, mode0);
17618 if (VECTOR_MODE_P (mode1))
17619 op1 = safe_vector_operand (op1, mode1);
17620
17621 /* Swap operands if we have a comparison that isn't available in
17622 hardware. */
17623 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17624 {
17625 rtx tmp = gen_reg_rtx (mode1);
17626 emit_move_insn (tmp, op1);
17627 op1 = op0;
17628 op0 = tmp;
17629 }
17630
17631 if (optimize || !target
17632 || GET_MODE (target) != tmode
17633 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17634 target = gen_reg_rtx (tmode);
17635
17636 if ((optimize && !register_operand (op0, mode0))
17637 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17638 op0 = copy_to_mode_reg (mode0, op0);
17639 if ((optimize && !register_operand (op1, mode1))
17640 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17641 op1 = copy_to_mode_reg (mode1, op1);
17642
17643 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17644 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17645 if (! pat)
17646 return 0;
17647 emit_insn (pat);
17648 return target;
17649 }
17650
17651 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17652
17653 static rtx
17654 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17655 rtx target)
17656 {
17657 rtx pat;
17658 tree arg0 = CALL_EXPR_ARG (exp, 0);
17659 tree arg1 = CALL_EXPR_ARG (exp, 1);
17660 rtx op0 = expand_normal (arg0);
17661 rtx op1 = expand_normal (arg1);
17662 rtx op2;
17663 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17664 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17665 enum rtx_code comparison = d->comparison;
17666
17667 if (VECTOR_MODE_P (mode0))
17668 op0 = safe_vector_operand (op0, mode0);
17669 if (VECTOR_MODE_P (mode1))
17670 op1 = safe_vector_operand (op1, mode1);
17671
17672 /* Swap operands if we have a comparison that isn't available in
17673 hardware. */
17674 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17675 {
17676 rtx tmp = op1;
17677 op1 = op0;
17678 op0 = tmp;
17679 }
17680
17681 target = gen_reg_rtx (SImode);
17682 emit_move_insn (target, const0_rtx);
17683 target = gen_rtx_SUBREG (QImode, target, 0);
17684
17685 if ((optimize && !register_operand (op0, mode0))
17686 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17687 op0 = copy_to_mode_reg (mode0, op0);
17688 if ((optimize && !register_operand (op1, mode1))
17689 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17690 op1 = copy_to_mode_reg (mode1, op1);
17691
17692 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17693 pat = GEN_FCN (d->icode) (op0, op1);
17694 if (! pat)
17695 return 0;
17696 emit_insn (pat);
17697 emit_insn (gen_rtx_SET (VOIDmode,
17698 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17699 gen_rtx_fmt_ee (comparison, QImode,
17700 SET_DEST (pat),
17701 const0_rtx)));
17702
17703 return SUBREG_REG (target);
17704 }
17705
17706 /* Return the integer constant in ARG. Constrain it to be in the range
17707 of the subparts of VEC_TYPE; issue an error if not. */
17708
17709 static int
17710 get_element_number (tree vec_type, tree arg)
17711 {
17712 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17713
17714 if (!host_integerp (arg, 1)
17715 || (elt = tree_low_cst (arg, 1), elt > max))
17716 {
17717 error ("selector must be an integer constant in the range 0..%wi", max);
17718 return 0;
17719 }
17720
17721 return elt;
17722 }
17723
17724 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17725 ix86_expand_vector_init. We DO have language-level syntax for this, in
17726 the form of (type){ init-list }. Except that since we can't place emms
17727 instructions from inside the compiler, we can't allow the use of MMX
17728 registers unless the user explicitly asks for it. So we do *not* define
17729 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17730 we have builtins invoked by mmintrin.h that gives us license to emit
17731 these sorts of instructions. */
17732
17733 static rtx
17734 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17735 {
17736 enum machine_mode tmode = TYPE_MODE (type);
17737 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17738 int i, n_elt = GET_MODE_NUNITS (tmode);
17739 rtvec v = rtvec_alloc (n_elt);
17740
17741 gcc_assert (VECTOR_MODE_P (tmode));
17742 gcc_assert (call_expr_nargs (exp) == n_elt);
17743
17744 for (i = 0; i < n_elt; ++i)
17745 {
17746 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17747 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17748 }
17749
17750 if (!target || !register_operand (target, tmode))
17751 target = gen_reg_rtx (tmode);
17752
17753 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17754 return target;
17755 }
17756
17757 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17758 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17759 had a language-level syntax for referencing vector elements. */
17760
17761 static rtx
17762 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17763 {
17764 enum machine_mode tmode, mode0;
17765 tree arg0, arg1;
17766 int elt;
17767 rtx op0;
17768
17769 arg0 = CALL_EXPR_ARG (exp, 0);
17770 arg1 = CALL_EXPR_ARG (exp, 1);
17771
17772 op0 = expand_normal (arg0);
17773 elt = get_element_number (TREE_TYPE (arg0), arg1);
17774
17775 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17776 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17777 gcc_assert (VECTOR_MODE_P (mode0));
17778
17779 op0 = force_reg (mode0, op0);
17780
17781 if (optimize || !target || !register_operand (target, tmode))
17782 target = gen_reg_rtx (tmode);
17783
17784 ix86_expand_vector_extract (true, target, op0, elt);
17785
17786 return target;
17787 }
17788
17789 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17790 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17791 a language-level syntax for referencing vector elements. */
17792
17793 static rtx
17794 ix86_expand_vec_set_builtin (tree exp)
17795 {
17796 enum machine_mode tmode, mode1;
17797 tree arg0, arg1, arg2;
17798 int elt;
17799 rtx op0, op1;
17800
17801 arg0 = CALL_EXPR_ARG (exp, 0);
17802 arg1 = CALL_EXPR_ARG (exp, 1);
17803 arg2 = CALL_EXPR_ARG (exp, 2);
17804
17805 tmode = TYPE_MODE (TREE_TYPE (arg0));
17806 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17807 gcc_assert (VECTOR_MODE_P (tmode));
17808
17809 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17810 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17811 elt = get_element_number (TREE_TYPE (arg0), arg2);
17812
17813 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17814 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17815
17816 op0 = force_reg (tmode, op0);
17817 op1 = force_reg (mode1, op1);
17818
17819 ix86_expand_vector_set (true, op0, op1, elt);
17820
17821 return op0;
17822 }
17823
17824 /* Expand an expression EXP that calls a built-in function,
17825 with result going to TARGET if that's convenient
17826 (and in mode MODE if that's convenient).
17827 SUBTARGET may be used as the target for computing one of EXP's operands.
17828 IGNORE is nonzero if the value is to be ignored. */
17829
17830 static rtx
17831 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17832 enum machine_mode mode ATTRIBUTE_UNUSED,
17833 int ignore ATTRIBUTE_UNUSED)
17834 {
17835 const struct builtin_description *d;
17836 size_t i;
17837 enum insn_code icode;
17838 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17839 tree arg0, arg1, arg2, arg3;
17840 rtx op0, op1, op2, op3, pat;
17841 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17842 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17843
17844 switch (fcode)
17845 {
17846 case IX86_BUILTIN_EMMS:
17847 emit_insn (gen_mmx_emms ());
17848 return 0;
17849
17850 case IX86_BUILTIN_SFENCE:
17851 emit_insn (gen_sse_sfence ());
17852 return 0;
17853
17854 case IX86_BUILTIN_MASKMOVQ:
17855 case IX86_BUILTIN_MASKMOVDQU:
17856 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17857 ? CODE_FOR_mmx_maskmovq
17858 : CODE_FOR_sse2_maskmovdqu);
17859 /* Note the arg order is different from the operand order. */
17860 arg1 = CALL_EXPR_ARG (exp, 0);
17861 arg2 = CALL_EXPR_ARG (exp, 1);
17862 arg0 = CALL_EXPR_ARG (exp, 2);
17863 op0 = expand_normal (arg0);
17864 op1 = expand_normal (arg1);
17865 op2 = expand_normal (arg2);
17866 mode0 = insn_data[icode].operand[0].mode;
17867 mode1 = insn_data[icode].operand[1].mode;
17868 mode2 = insn_data[icode].operand[2].mode;
17869
17870 op0 = force_reg (Pmode, op0);
17871 op0 = gen_rtx_MEM (mode1, op0);
17872
17873 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17874 op0 = copy_to_mode_reg (mode0, op0);
17875 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17876 op1 = copy_to_mode_reg (mode1, op1);
17877 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17878 op2 = copy_to_mode_reg (mode2, op2);
17879 pat = GEN_FCN (icode) (op0, op1, op2);
17880 if (! pat)
17881 return 0;
17882 emit_insn (pat);
17883 return 0;
17884
17885 case IX86_BUILTIN_SQRTSS:
17886 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17887 case IX86_BUILTIN_RSQRTSS:
17888 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17889 case IX86_BUILTIN_RCPSS:
17890 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17891
17892 case IX86_BUILTIN_LOADUPS:
17893 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17894
17895 case IX86_BUILTIN_STOREUPS:
17896 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17897
17898 case IX86_BUILTIN_LOADHPS:
17899 case IX86_BUILTIN_LOADLPS:
17900 case IX86_BUILTIN_LOADHPD:
17901 case IX86_BUILTIN_LOADLPD:
17902 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17903 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17904 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17905 : CODE_FOR_sse2_loadlpd);
17906 arg0 = CALL_EXPR_ARG (exp, 0);
17907 arg1 = CALL_EXPR_ARG (exp, 1);
17908 op0 = expand_normal (arg0);
17909 op1 = expand_normal (arg1);
17910 tmode = insn_data[icode].operand[0].mode;
17911 mode0 = insn_data[icode].operand[1].mode;
17912 mode1 = insn_data[icode].operand[2].mode;
17913
17914 op0 = force_reg (mode0, op0);
17915 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17916 if (optimize || target == 0
17917 || GET_MODE (target) != tmode
17918 || !register_operand (target, tmode))
17919 target = gen_reg_rtx (tmode);
17920 pat = GEN_FCN (icode) (target, op0, op1);
17921 if (! pat)
17922 return 0;
17923 emit_insn (pat);
17924 return target;
17925
17926 case IX86_BUILTIN_STOREHPS:
17927 case IX86_BUILTIN_STORELPS:
17928 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17929 : CODE_FOR_sse_storelps);
17930 arg0 = CALL_EXPR_ARG (exp, 0);
17931 arg1 = CALL_EXPR_ARG (exp, 1);
17932 op0 = expand_normal (arg0);
17933 op1 = expand_normal (arg1);
17934 mode0 = insn_data[icode].operand[0].mode;
17935 mode1 = insn_data[icode].operand[1].mode;
17936
17937 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17938 op1 = force_reg (mode1, op1);
17939
17940 pat = GEN_FCN (icode) (op0, op1);
17941 if (! pat)
17942 return 0;
17943 emit_insn (pat);
17944 return const0_rtx;
17945
17946 case IX86_BUILTIN_MOVNTPS:
17947 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17948 case IX86_BUILTIN_MOVNTQ:
17949 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17950
17951 case IX86_BUILTIN_LDMXCSR:
17952 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17953 target = assign_386_stack_local (SImode, SLOT_TEMP);
17954 emit_move_insn (target, op0);
17955 emit_insn (gen_sse_ldmxcsr (target));
17956 return 0;
17957
17958 case IX86_BUILTIN_STMXCSR:
17959 target = assign_386_stack_local (SImode, SLOT_TEMP);
17960 emit_insn (gen_sse_stmxcsr (target));
17961 return copy_to_mode_reg (SImode, target);
17962
17963 case IX86_BUILTIN_SHUFPS:
17964 case IX86_BUILTIN_SHUFPD:
17965 icode = (fcode == IX86_BUILTIN_SHUFPS
17966 ? CODE_FOR_sse_shufps
17967 : CODE_FOR_sse2_shufpd);
17968 arg0 = CALL_EXPR_ARG (exp, 0);
17969 arg1 = CALL_EXPR_ARG (exp, 1);
17970 arg2 = CALL_EXPR_ARG (exp, 2);
17971 op0 = expand_normal (arg0);
17972 op1 = expand_normal (arg1);
17973 op2 = expand_normal (arg2);
17974 tmode = insn_data[icode].operand[0].mode;
17975 mode0 = insn_data[icode].operand[1].mode;
17976 mode1 = insn_data[icode].operand[2].mode;
17977 mode2 = insn_data[icode].operand[3].mode;
17978
17979 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17980 op0 = copy_to_mode_reg (mode0, op0);
17981 if ((optimize && !register_operand (op1, mode1))
17982 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17983 op1 = copy_to_mode_reg (mode1, op1);
17984 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17985 {
17986 /* @@@ better error message */
17987 error ("mask must be an immediate");
17988 return gen_reg_rtx (tmode);
17989 }
17990 if (optimize || target == 0
17991 || GET_MODE (target) != tmode
17992 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17993 target = gen_reg_rtx (tmode);
17994 pat = GEN_FCN (icode) (target, op0, op1, op2);
17995 if (! pat)
17996 return 0;
17997 emit_insn (pat);
17998 return target;
17999
18000 case IX86_BUILTIN_PSHUFW:
18001 case IX86_BUILTIN_PSHUFD:
18002 case IX86_BUILTIN_PSHUFHW:
18003 case IX86_BUILTIN_PSHUFLW:
18004 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18005 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18006 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18007 : CODE_FOR_mmx_pshufw);
18008 arg0 = CALL_EXPR_ARG (exp, 0);
18009 arg1 = CALL_EXPR_ARG (exp, 1);
18010 op0 = expand_normal (arg0);
18011 op1 = expand_normal (arg1);
18012 tmode = insn_data[icode].operand[0].mode;
18013 mode1 = insn_data[icode].operand[1].mode;
18014 mode2 = insn_data[icode].operand[2].mode;
18015
18016 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18017 op0 = copy_to_mode_reg (mode1, op0);
18018 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18019 {
18020 /* @@@ better error message */
18021 error ("mask must be an immediate");
18022 return const0_rtx;
18023 }
18024 if (target == 0
18025 || GET_MODE (target) != tmode
18026 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18027 target = gen_reg_rtx (tmode);
18028 pat = GEN_FCN (icode) (target, op0, op1);
18029 if (! pat)
18030 return 0;
18031 emit_insn (pat);
18032 return target;
18033
18034 case IX86_BUILTIN_PSLLDQI128:
18035 case IX86_BUILTIN_PSRLDQI128:
18036 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18037 : CODE_FOR_sse2_lshrti3);
18038 arg0 = CALL_EXPR_ARG (exp, 0);
18039 arg1 = CALL_EXPR_ARG (exp, 1);
18040 op0 = expand_normal (arg0);
18041 op1 = expand_normal (arg1);
18042 tmode = insn_data[icode].operand[0].mode;
18043 mode1 = insn_data[icode].operand[1].mode;
18044 mode2 = insn_data[icode].operand[2].mode;
18045
18046 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18047 {
18048 op0 = copy_to_reg (op0);
18049 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18050 }
18051 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18052 {
18053 error ("shift must be an immediate");
18054 return const0_rtx;
18055 }
18056 target = gen_reg_rtx (V2DImode);
18057 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
18058 if (! pat)
18059 return 0;
18060 emit_insn (pat);
18061 return target;
18062
18063 case IX86_BUILTIN_FEMMS:
18064 emit_insn (gen_mmx_femms ());
18065 return NULL_RTX;
18066
18067 case IX86_BUILTIN_PAVGUSB:
18068 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18069
18070 case IX86_BUILTIN_PF2ID:
18071 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18072
18073 case IX86_BUILTIN_PFACC:
18074 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18075
18076 case IX86_BUILTIN_PFADD:
18077 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18078
18079 case IX86_BUILTIN_PFCMPEQ:
18080 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18081
18082 case IX86_BUILTIN_PFCMPGE:
18083 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18084
18085 case IX86_BUILTIN_PFCMPGT:
18086 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18087
18088 case IX86_BUILTIN_PFMAX:
18089 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18090
18091 case IX86_BUILTIN_PFMIN:
18092 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18093
18094 case IX86_BUILTIN_PFMUL:
18095 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18096
18097 case IX86_BUILTIN_PFRCP:
18098 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18099
18100 case IX86_BUILTIN_PFRCPIT1:
18101 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18102
18103 case IX86_BUILTIN_PFRCPIT2:
18104 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18105
18106 case IX86_BUILTIN_PFRSQIT1:
18107 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18108
18109 case IX86_BUILTIN_PFRSQRT:
18110 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18111
18112 case IX86_BUILTIN_PFSUB:
18113 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18114
18115 case IX86_BUILTIN_PFSUBR:
18116 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18117
18118 case IX86_BUILTIN_PI2FD:
18119 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18120
18121 case IX86_BUILTIN_PMULHRW:
18122 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18123
18124 case IX86_BUILTIN_PF2IW:
18125 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18126
18127 case IX86_BUILTIN_PFNACC:
18128 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18129
18130 case IX86_BUILTIN_PFPNACC:
18131 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18132
18133 case IX86_BUILTIN_PI2FW:
18134 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18135
18136 case IX86_BUILTIN_PSWAPDSI:
18137 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18138
18139 case IX86_BUILTIN_PSWAPDSF:
18140 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18141
18142 case IX86_BUILTIN_SQRTSD:
18143 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18144 case IX86_BUILTIN_LOADUPD:
18145 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18146 case IX86_BUILTIN_STOREUPD:
18147 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18148
18149 case IX86_BUILTIN_MFENCE:
18150 emit_insn (gen_sse2_mfence ());
18151 return 0;
18152 case IX86_BUILTIN_LFENCE:
18153 emit_insn (gen_sse2_lfence ());
18154 return 0;
18155
18156 case IX86_BUILTIN_CLFLUSH:
18157 arg0 = CALL_EXPR_ARG (exp, 0);
18158 op0 = expand_normal (arg0);
18159 icode = CODE_FOR_sse2_clflush;
18160 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18161 op0 = copy_to_mode_reg (Pmode, op0);
18162
18163 emit_insn (gen_sse2_clflush (op0));
18164 return 0;
18165
18166 case IX86_BUILTIN_MOVNTPD:
18167 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18168 case IX86_BUILTIN_MOVNTDQ:
18169 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18170 case IX86_BUILTIN_MOVNTI:
18171 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18172
18173 case IX86_BUILTIN_LOADDQU:
18174 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18175 case IX86_BUILTIN_STOREDQU:
18176 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18177
18178 case IX86_BUILTIN_MONITOR:
18179 arg0 = CALL_EXPR_ARG (exp, 0);
18180 arg1 = CALL_EXPR_ARG (exp, 1);
18181 arg2 = CALL_EXPR_ARG (exp, 2);
18182 op0 = expand_normal (arg0);
18183 op1 = expand_normal (arg1);
18184 op2 = expand_normal (arg2);
18185 if (!REG_P (op0))
18186 op0 = copy_to_mode_reg (Pmode, op0);
18187 if (!REG_P (op1))
18188 op1 = copy_to_mode_reg (SImode, op1);
18189 if (!REG_P (op2))
18190 op2 = copy_to_mode_reg (SImode, op2);
18191 if (!TARGET_64BIT)
18192 emit_insn (gen_sse3_monitor (op0, op1, op2));
18193 else
18194 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18195 return 0;
18196
18197 case IX86_BUILTIN_MWAIT:
18198 arg0 = CALL_EXPR_ARG (exp, 0);
18199 arg1 = CALL_EXPR_ARG (exp, 1);
18200 op0 = expand_normal (arg0);
18201 op1 = expand_normal (arg1);
18202 if (!REG_P (op0))
18203 op0 = copy_to_mode_reg (SImode, op0);
18204 if (!REG_P (op1))
18205 op1 = copy_to_mode_reg (SImode, op1);
18206 emit_insn (gen_sse3_mwait (op0, op1));
18207 return 0;
18208
18209 case IX86_BUILTIN_LDDQU:
18210 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18211 target, 1);
18212
18213 case IX86_BUILTIN_PALIGNR:
18214 case IX86_BUILTIN_PALIGNR128:
18215 if (fcode == IX86_BUILTIN_PALIGNR)
18216 {
18217 icode = CODE_FOR_ssse3_palignrdi;
18218 mode = DImode;
18219 }
18220 else
18221 {
18222 icode = CODE_FOR_ssse3_palignrti;
18223 mode = V2DImode;
18224 }
18225 arg0 = CALL_EXPR_ARG (exp, 0);
18226 arg1 = CALL_EXPR_ARG (exp, 1);
18227 arg2 = CALL_EXPR_ARG (exp, 2);
18228 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18229 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18230 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18231 tmode = insn_data[icode].operand[0].mode;
18232 mode1 = insn_data[icode].operand[1].mode;
18233 mode2 = insn_data[icode].operand[2].mode;
18234 mode3 = insn_data[icode].operand[3].mode;
18235
18236 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18237 {
18238 op0 = copy_to_reg (op0);
18239 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18240 }
18241 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18242 {
18243 op1 = copy_to_reg (op1);
18244 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18245 }
18246 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18247 {
18248 error ("shift must be an immediate");
18249 return const0_rtx;
18250 }
18251 target = gen_reg_rtx (mode);
18252 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18253 op0, op1, op2);
18254 if (! pat)
18255 return 0;
18256 emit_insn (pat);
18257 return target;
18258
18259 case IX86_BUILTIN_MOVNTSD:
18260 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18261
18262 case IX86_BUILTIN_MOVNTSS:
18263 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18264
18265 case IX86_BUILTIN_INSERTQ:
18266 case IX86_BUILTIN_EXTRQ:
18267 icode = (fcode == IX86_BUILTIN_EXTRQ
18268 ? CODE_FOR_sse4a_extrq
18269 : CODE_FOR_sse4a_insertq);
18270 arg0 = CALL_EXPR_ARG (exp, 0);
18271 arg1 = CALL_EXPR_ARG (exp, 1);
18272 op0 = expand_normal (arg0);
18273 op1 = expand_normal (arg1);
18274 tmode = insn_data[icode].operand[0].mode;
18275 mode1 = insn_data[icode].operand[1].mode;
18276 mode2 = insn_data[icode].operand[2].mode;
18277 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18278 op0 = copy_to_mode_reg (mode1, op0);
18279 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18280 op1 = copy_to_mode_reg (mode2, op1);
18281 if (optimize || target == 0
18282 || GET_MODE (target) != tmode
18283 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18284 target = gen_reg_rtx (tmode);
18285 pat = GEN_FCN (icode) (target, op0, op1);
18286 if (! pat)
18287 return NULL_RTX;
18288 emit_insn (pat);
18289 return target;
18290
18291 case IX86_BUILTIN_EXTRQI:
18292 icode = CODE_FOR_sse4a_extrqi;
18293 arg0 = CALL_EXPR_ARG (exp, 0);
18294 arg1 = CALL_EXPR_ARG (exp, 1);
18295 arg2 = CALL_EXPR_ARG (exp, 2);
18296 op0 = expand_normal (arg0);
18297 op1 = expand_normal (arg1);
18298 op2 = expand_normal (arg2);
18299 tmode = insn_data[icode].operand[0].mode;
18300 mode1 = insn_data[icode].operand[1].mode;
18301 mode2 = insn_data[icode].operand[2].mode;
18302 mode3 = insn_data[icode].operand[3].mode;
18303 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18304 op0 = copy_to_mode_reg (mode1, op0);
18305 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18306 {
18307 error ("index mask must be an immediate");
18308 return gen_reg_rtx (tmode);
18309 }
18310 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18311 {
18312 error ("length mask must be an immediate");
18313 return gen_reg_rtx (tmode);
18314 }
18315 if (optimize || target == 0
18316 || GET_MODE (target) != tmode
18317 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18318 target = gen_reg_rtx (tmode);
18319 pat = GEN_FCN (icode) (target, op0, op1, op2);
18320 if (! pat)
18321 return NULL_RTX;
18322 emit_insn (pat);
18323 return target;
18324
18325 case IX86_BUILTIN_INSERTQI:
18326 icode = CODE_FOR_sse4a_insertqi;
18327 arg0 = CALL_EXPR_ARG (exp, 0);
18328 arg1 = CALL_EXPR_ARG (exp, 1);
18329 arg2 = CALL_EXPR_ARG (exp, 2);
18330 arg3 = CALL_EXPR_ARG (exp, 3);
18331 op0 = expand_normal (arg0);
18332 op1 = expand_normal (arg1);
18333 op2 = expand_normal (arg2);
18334 op3 = expand_normal (arg3);
18335 tmode = insn_data[icode].operand[0].mode;
18336 mode1 = insn_data[icode].operand[1].mode;
18337 mode2 = insn_data[icode].operand[2].mode;
18338 mode3 = insn_data[icode].operand[3].mode;
18339 mode4 = insn_data[icode].operand[4].mode;
18340
18341 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18342 op0 = copy_to_mode_reg (mode1, op0);
18343
18344 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18345 op1 = copy_to_mode_reg (mode2, op1);
18346
18347 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18348 {
18349 error ("index mask must be an immediate");
18350 return gen_reg_rtx (tmode);
18351 }
18352 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18353 {
18354 error ("length mask must be an immediate");
18355 return gen_reg_rtx (tmode);
18356 }
18357 if (optimize || target == 0
18358 || GET_MODE (target) != tmode
18359 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18360 target = gen_reg_rtx (tmode);
18361 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18362 if (! pat)
18363 return NULL_RTX;
18364 emit_insn (pat);
18365 return target;
18366
18367 case IX86_BUILTIN_VEC_INIT_V2SI:
18368 case IX86_BUILTIN_VEC_INIT_V4HI:
18369 case IX86_BUILTIN_VEC_INIT_V8QI:
18370 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18371
18372 case IX86_BUILTIN_VEC_EXT_V2DF:
18373 case IX86_BUILTIN_VEC_EXT_V2DI:
18374 case IX86_BUILTIN_VEC_EXT_V4SF:
18375 case IX86_BUILTIN_VEC_EXT_V4SI:
18376 case IX86_BUILTIN_VEC_EXT_V8HI:
18377 case IX86_BUILTIN_VEC_EXT_V2SI:
18378 case IX86_BUILTIN_VEC_EXT_V4HI:
18379 return ix86_expand_vec_ext_builtin (exp, target);
18380
18381 case IX86_BUILTIN_VEC_SET_V8HI:
18382 case IX86_BUILTIN_VEC_SET_V4HI:
18383 return ix86_expand_vec_set_builtin (exp);
18384
18385 default:
18386 break;
18387 }
18388
18389 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18390 if (d->code == fcode)
18391 {
18392 /* Compares are treated specially. */
18393 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18394 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18395 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18396 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18397 return ix86_expand_sse_compare (d, exp, target);
18398
18399 return ix86_expand_binop_builtin (d->icode, exp, target);
18400 }
18401
18402 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18403 if (d->code == fcode)
18404 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18405
18406 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18407 if (d->code == fcode)
18408 return ix86_expand_sse_comi (d, exp, target);
18409
18410 gcc_unreachable ();
18411 }
18412
18413 /* Returns a function decl for a vectorized version of the builtin function
18414 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18415 if it is not available. */
18416
18417 static tree
18418 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18419 tree type_in)
18420 {
18421 enum machine_mode in_mode, out_mode;
18422 int in_n, out_n;
18423
18424 if (TREE_CODE (type_out) != VECTOR_TYPE
18425 || TREE_CODE (type_in) != VECTOR_TYPE)
18426 return NULL_TREE;
18427
18428 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18429 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18430 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18431 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18432
18433 switch (fn)
18434 {
18435 case BUILT_IN_SQRT:
18436 if (out_mode == DFmode && out_n == 2
18437 && in_mode == DFmode && in_n == 2)
18438 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18439 return NULL_TREE;
18440
18441 case BUILT_IN_SQRTF:
18442 if (out_mode == SFmode && out_n == 4
18443 && in_mode == SFmode && in_n == 4)
18444 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18445 return NULL_TREE;
18446
18447 case BUILT_IN_LRINTF:
18448 if (out_mode == SImode && out_n == 4
18449 && in_mode == SFmode && in_n == 4)
18450 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18451 return NULL_TREE;
18452
18453 default:
18454 ;
18455 }
18456
18457 return NULL_TREE;
18458 }
18459
18460 /* Returns a decl of a function that implements conversion of the
18461 input vector of type TYPE, or NULL_TREE if it is not available. */
18462
18463 static tree
18464 ix86_builtin_conversion (enum tree_code code, tree type)
18465 {
18466 if (TREE_CODE (type) != VECTOR_TYPE)
18467 return NULL_TREE;
18468
18469 switch (code)
18470 {
18471 case FLOAT_EXPR:
18472 switch (TYPE_MODE (type))
18473 {
18474 case V4SImode:
18475 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18476 default:
18477 return NULL_TREE;
18478 }
18479
18480 case FIX_TRUNC_EXPR:
18481 switch (TYPE_MODE (type))
18482 {
18483 case V4SFmode:
18484 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18485 default:
18486 return NULL_TREE;
18487 }
18488 default:
18489 return NULL_TREE;
18490
18491 }
18492 }
18493
18494 /* Store OPERAND to the memory after reload is completed. This means
18495 that we can't easily use assign_stack_local. */
18496 rtx
18497 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18498 {
18499 rtx result;
18500
18501 gcc_assert (reload_completed);
18502 if (TARGET_RED_ZONE)
18503 {
18504 result = gen_rtx_MEM (mode,
18505 gen_rtx_PLUS (Pmode,
18506 stack_pointer_rtx,
18507 GEN_INT (-RED_ZONE_SIZE)));
18508 emit_move_insn (result, operand);
18509 }
18510 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18511 {
18512 switch (mode)
18513 {
18514 case HImode:
18515 case SImode:
18516 operand = gen_lowpart (DImode, operand);
18517 /* FALLTHRU */
18518 case DImode:
18519 emit_insn (
18520 gen_rtx_SET (VOIDmode,
18521 gen_rtx_MEM (DImode,
18522 gen_rtx_PRE_DEC (DImode,
18523 stack_pointer_rtx)),
18524 operand));
18525 break;
18526 default:
18527 gcc_unreachable ();
18528 }
18529 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18530 }
18531 else
18532 {
18533 switch (mode)
18534 {
18535 case DImode:
18536 {
18537 rtx operands[2];
18538 split_di (&operand, 1, operands, operands + 1);
18539 emit_insn (
18540 gen_rtx_SET (VOIDmode,
18541 gen_rtx_MEM (SImode,
18542 gen_rtx_PRE_DEC (Pmode,
18543 stack_pointer_rtx)),
18544 operands[1]));
18545 emit_insn (
18546 gen_rtx_SET (VOIDmode,
18547 gen_rtx_MEM (SImode,
18548 gen_rtx_PRE_DEC (Pmode,
18549 stack_pointer_rtx)),
18550 operands[0]));
18551 }
18552 break;
18553 case HImode:
18554 /* Store HImodes as SImodes. */
18555 operand = gen_lowpart (SImode, operand);
18556 /* FALLTHRU */
18557 case SImode:
18558 emit_insn (
18559 gen_rtx_SET (VOIDmode,
18560 gen_rtx_MEM (GET_MODE (operand),
18561 gen_rtx_PRE_DEC (SImode,
18562 stack_pointer_rtx)),
18563 operand));
18564 break;
18565 default:
18566 gcc_unreachable ();
18567 }
18568 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18569 }
18570 return result;
18571 }
18572
18573 /* Free operand from the memory. */
18574 void
18575 ix86_free_from_memory (enum machine_mode mode)
18576 {
18577 if (!TARGET_RED_ZONE)
18578 {
18579 int size;
18580
18581 if (mode == DImode || TARGET_64BIT)
18582 size = 8;
18583 else
18584 size = 4;
18585 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18586 to pop or add instruction if registers are available. */
18587 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18588 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18589 GEN_INT (size))));
18590 }
18591 }
18592
18593 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18594 QImode must go into class Q_REGS.
18595 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18596 movdf to do mem-to-mem moves through integer regs. */
18597 enum reg_class
18598 ix86_preferred_reload_class (rtx x, enum reg_class class)
18599 {
18600 enum machine_mode mode = GET_MODE (x);
18601
18602 /* We're only allowed to return a subclass of CLASS. Many of the
18603 following checks fail for NO_REGS, so eliminate that early. */
18604 if (class == NO_REGS)
18605 return NO_REGS;
18606
18607 /* All classes can load zeros. */
18608 if (x == CONST0_RTX (mode))
18609 return class;
18610
18611 /* Force constants into memory if we are loading a (nonzero) constant into
18612 an MMX or SSE register. This is because there are no MMX/SSE instructions
18613 to load from a constant. */
18614 if (CONSTANT_P (x)
18615 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18616 return NO_REGS;
18617
18618 /* Prefer SSE regs only, if we can use them for math. */
18619 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18620 return SSE_CLASS_P (class) ? class : NO_REGS;
18621
18622 /* Floating-point constants need more complex checks. */
18623 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18624 {
18625 /* General regs can load everything. */
18626 if (reg_class_subset_p (class, GENERAL_REGS))
18627 return class;
18628
18629 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18630 zero above. We only want to wind up preferring 80387 registers if
18631 we plan on doing computation with them. */
18632 if (TARGET_80387
18633 && standard_80387_constant_p (x))
18634 {
18635 /* Limit class to non-sse. */
18636 if (class == FLOAT_SSE_REGS)
18637 return FLOAT_REGS;
18638 if (class == FP_TOP_SSE_REGS)
18639 return FP_TOP_REG;
18640 if (class == FP_SECOND_SSE_REGS)
18641 return FP_SECOND_REG;
18642 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18643 return class;
18644 }
18645
18646 return NO_REGS;
18647 }
18648
18649 /* Generally when we see PLUS here, it's the function invariant
18650 (plus soft-fp const_int). Which can only be computed into general
18651 regs. */
18652 if (GET_CODE (x) == PLUS)
18653 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18654
18655 /* QImode constants are easy to load, but non-constant QImode data
18656 must go into Q_REGS. */
18657 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18658 {
18659 if (reg_class_subset_p (class, Q_REGS))
18660 return class;
18661 if (reg_class_subset_p (Q_REGS, class))
18662 return Q_REGS;
18663 return NO_REGS;
18664 }
18665
18666 return class;
18667 }
18668
18669 /* Discourage putting floating-point values in SSE registers unless
18670 SSE math is being used, and likewise for the 387 registers. */
18671 enum reg_class
18672 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18673 {
18674 enum machine_mode mode = GET_MODE (x);
18675
18676 /* Restrict the output reload class to the register bank that we are doing
18677 math on. If we would like not to return a subset of CLASS, reject this
18678 alternative: if reload cannot do this, it will still use its choice. */
18679 mode = GET_MODE (x);
18680 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18681 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18682
18683 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18684 {
18685 if (class == FP_TOP_SSE_REGS)
18686 return FP_TOP_REG;
18687 else if (class == FP_SECOND_SSE_REGS)
18688 return FP_SECOND_REG;
18689 else
18690 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18691 }
18692
18693 return class;
18694 }
18695
18696 /* If we are copying between general and FP registers, we need a memory
18697 location. The same is true for SSE and MMX registers.
18698
18699 The macro can't work reliably when one of the CLASSES is class containing
18700 registers from multiple units (SSE, MMX, integer). We avoid this by never
18701 combining those units in single alternative in the machine description.
18702 Ensure that this constraint holds to avoid unexpected surprises.
18703
18704 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18705 enforce these sanity checks. */
18706
18707 int
18708 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18709 enum machine_mode mode, int strict)
18710 {
18711 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18712 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18713 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18714 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18715 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18716 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18717 {
18718 gcc_assert (!strict);
18719 return true;
18720 }
18721
18722 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18723 return true;
18724
18725 /* ??? This is a lie. We do have moves between mmx/general, and for
18726 mmx/sse2. But by saying we need secondary memory we discourage the
18727 register allocator from using the mmx registers unless needed. */
18728 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18729 return true;
18730
18731 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18732 {
18733 /* SSE1 doesn't have any direct moves from other classes. */
18734 if (!TARGET_SSE2)
18735 return true;
18736
18737 /* If the target says that inter-unit moves are more expensive
18738 than moving through memory, then don't generate them. */
18739 if (!TARGET_INTER_UNIT_MOVES)
18740 return true;
18741
18742 /* Between SSE and general, we have moves no larger than word size. */
18743 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18744 return true;
18745 }
18746
18747 return false;
18748 }
18749
18750 /* Return true if the registers in CLASS cannot represent the change from
18751 modes FROM to TO. */
18752
18753 bool
18754 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18755 enum reg_class class)
18756 {
18757 if (from == to)
18758 return false;
18759
18760 /* x87 registers can't do subreg at all, as all values are reformatted
18761 to extended precision. */
18762 if (MAYBE_FLOAT_CLASS_P (class))
18763 return true;
18764
18765 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18766 {
18767 /* Vector registers do not support QI or HImode loads. If we don't
18768 disallow a change to these modes, reload will assume it's ok to
18769 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18770 the vec_dupv4hi pattern. */
18771 if (GET_MODE_SIZE (from) < 4)
18772 return true;
18773
18774 /* Vector registers do not support subreg with nonzero offsets, which
18775 are otherwise valid for integer registers. Since we can't see
18776 whether we have a nonzero offset from here, prohibit all
18777 nonparadoxical subregs changing size. */
18778 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18779 return true;
18780 }
18781
18782 return false;
18783 }
18784
18785 /* Return the cost of moving data from a register in class CLASS1 to
18786 one in class CLASS2.
18787
18788 It is not required that the cost always equal 2 when FROM is the same as TO;
18789 on some machines it is expensive to move between registers if they are not
18790 general registers. */
18791
18792 int
18793 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18794 enum reg_class class2)
18795 {
18796 /* In case we require secondary memory, compute cost of the store followed
18797 by load. In order to avoid bad register allocation choices, we need
18798 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18799
18800 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18801 {
18802 int cost = 1;
18803
18804 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18805 MEMORY_MOVE_COST (mode, class1, 1));
18806 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18807 MEMORY_MOVE_COST (mode, class2, 1));
18808
18809 /* In case of copying from general_purpose_register we may emit multiple
18810 stores followed by single load causing memory size mismatch stall.
18811 Count this as arbitrarily high cost of 20. */
18812 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18813 cost += 20;
18814
18815 /* In the case of FP/MMX moves, the registers actually overlap, and we
18816 have to switch modes in order to treat them differently. */
18817 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18818 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18819 cost += 20;
18820
18821 return cost;
18822 }
18823
18824 /* Moves between SSE/MMX and integer unit are expensive. */
18825 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18826 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18827 return ix86_cost->mmxsse_to_integer;
18828 if (MAYBE_FLOAT_CLASS_P (class1))
18829 return ix86_cost->fp_move;
18830 if (MAYBE_SSE_CLASS_P (class1))
18831 return ix86_cost->sse_move;
18832 if (MAYBE_MMX_CLASS_P (class1))
18833 return ix86_cost->mmx_move;
18834 return 2;
18835 }
18836
18837 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18838
18839 bool
18840 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18841 {
18842 /* Flags and only flags can only hold CCmode values. */
18843 if (CC_REGNO_P (regno))
18844 return GET_MODE_CLASS (mode) == MODE_CC;
18845 if (GET_MODE_CLASS (mode) == MODE_CC
18846 || GET_MODE_CLASS (mode) == MODE_RANDOM
18847 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18848 return 0;
18849 if (FP_REGNO_P (regno))
18850 return VALID_FP_MODE_P (mode);
18851 if (SSE_REGNO_P (regno))
18852 {
18853 /* We implement the move patterns for all vector modes into and
18854 out of SSE registers, even when no operation instructions
18855 are available. */
18856 return (VALID_SSE_REG_MODE (mode)
18857 || VALID_SSE2_REG_MODE (mode)
18858 || VALID_MMX_REG_MODE (mode)
18859 || VALID_MMX_REG_MODE_3DNOW (mode));
18860 }
18861 if (MMX_REGNO_P (regno))
18862 {
18863 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18864 so if the register is available at all, then we can move data of
18865 the given mode into or out of it. */
18866 return (VALID_MMX_REG_MODE (mode)
18867 || VALID_MMX_REG_MODE_3DNOW (mode));
18868 }
18869
18870 if (mode == QImode)
18871 {
18872 /* Take care for QImode values - they can be in non-QI regs,
18873 but then they do cause partial register stalls. */
18874 if (regno < 4 || TARGET_64BIT)
18875 return 1;
18876 if (!TARGET_PARTIAL_REG_STALL)
18877 return 1;
18878 return reload_in_progress || reload_completed;
18879 }
18880 /* We handle both integer and floats in the general purpose registers. */
18881 else if (VALID_INT_MODE_P (mode))
18882 return 1;
18883 else if (VALID_FP_MODE_P (mode))
18884 return 1;
18885 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18886 on to use that value in smaller contexts, this can easily force a
18887 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18888 supporting DImode, allow it. */
18889 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18890 return 1;
18891
18892 return 0;
18893 }
18894
18895 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18896 tieable integer mode. */
18897
18898 static bool
18899 ix86_tieable_integer_mode_p (enum machine_mode mode)
18900 {
18901 switch (mode)
18902 {
18903 case HImode:
18904 case SImode:
18905 return true;
18906
18907 case QImode:
18908 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18909
18910 case DImode:
18911 return TARGET_64BIT;
18912
18913 default:
18914 return false;
18915 }
18916 }
18917
18918 /* Return true if MODE1 is accessible in a register that can hold MODE2
18919 without copying. That is, all register classes that can hold MODE2
18920 can also hold MODE1. */
18921
18922 bool
18923 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18924 {
18925 if (mode1 == mode2)
18926 return true;
18927
18928 if (ix86_tieable_integer_mode_p (mode1)
18929 && ix86_tieable_integer_mode_p (mode2))
18930 return true;
18931
18932 /* MODE2 being XFmode implies fp stack or general regs, which means we
18933 can tie any smaller floating point modes to it. Note that we do not
18934 tie this with TFmode. */
18935 if (mode2 == XFmode)
18936 return mode1 == SFmode || mode1 == DFmode;
18937
18938 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18939 that we can tie it with SFmode. */
18940 if (mode2 == DFmode)
18941 return mode1 == SFmode;
18942
18943 /* If MODE2 is only appropriate for an SSE register, then tie with
18944 any other mode acceptable to SSE registers. */
18945 if (GET_MODE_SIZE (mode2) == 16
18946 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18947 return (GET_MODE_SIZE (mode1) == 16
18948 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18949
18950 /* If MODE2 is appropriate for an MMX register, then tie
18951 with any other mode acceptable to MMX registers. */
18952 if (GET_MODE_SIZE (mode2) == 8
18953 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18954 return (GET_MODE_SIZE (mode1) == 8
18955 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18956
18957 return false;
18958 }
18959
18960 /* Return the cost of moving data of mode M between a
18961 register and memory. A value of 2 is the default; this cost is
18962 relative to those in `REGISTER_MOVE_COST'.
18963
18964 If moving between registers and memory is more expensive than
18965 between two registers, you should define this macro to express the
18966 relative cost.
18967
18968 Model also increased moving costs of QImode registers in non
18969 Q_REGS classes.
18970 */
18971 int
18972 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18973 {
18974 if (FLOAT_CLASS_P (class))
18975 {
18976 int index;
18977 switch (mode)
18978 {
18979 case SFmode:
18980 index = 0;
18981 break;
18982 case DFmode:
18983 index = 1;
18984 break;
18985 case XFmode:
18986 index = 2;
18987 break;
18988 default:
18989 return 100;
18990 }
18991 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18992 }
18993 if (SSE_CLASS_P (class))
18994 {
18995 int index;
18996 switch (GET_MODE_SIZE (mode))
18997 {
18998 case 4:
18999 index = 0;
19000 break;
19001 case 8:
19002 index = 1;
19003 break;
19004 case 16:
19005 index = 2;
19006 break;
19007 default:
19008 return 100;
19009 }
19010 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19011 }
19012 if (MMX_CLASS_P (class))
19013 {
19014 int index;
19015 switch (GET_MODE_SIZE (mode))
19016 {
19017 case 4:
19018 index = 0;
19019 break;
19020 case 8:
19021 index = 1;
19022 break;
19023 default:
19024 return 100;
19025 }
19026 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19027 }
19028 switch (GET_MODE_SIZE (mode))
19029 {
19030 case 1:
19031 if (in)
19032 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19033 : ix86_cost->movzbl_load);
19034 else
19035 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19036 : ix86_cost->int_store[0] + 4);
19037 break;
19038 case 2:
19039 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19040 default:
19041 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19042 if (mode == TFmode)
19043 mode = XFmode;
19044 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19045 * (((int) GET_MODE_SIZE (mode)
19046 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19047 }
19048 }
19049
19050 /* Compute a (partial) cost for rtx X. Return true if the complete
19051 cost has been computed, and false if subexpressions should be
19052 scanned. In either case, *TOTAL contains the cost result. */
19053
19054 static bool
19055 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19056 {
19057 enum machine_mode mode = GET_MODE (x);
19058
19059 switch (code)
19060 {
19061 case CONST_INT:
19062 case CONST:
19063 case LABEL_REF:
19064 case SYMBOL_REF:
19065 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19066 *total = 3;
19067 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19068 *total = 2;
19069 else if (flag_pic && SYMBOLIC_CONST (x)
19070 && (!TARGET_64BIT
19071 || (!GET_CODE (x) != LABEL_REF
19072 && (GET_CODE (x) != SYMBOL_REF
19073 || !SYMBOL_REF_LOCAL_P (x)))))
19074 *total = 1;
19075 else
19076 *total = 0;
19077 return true;
19078
19079 case CONST_DOUBLE:
19080 if (mode == VOIDmode)
19081 *total = 0;
19082 else
19083 switch (standard_80387_constant_p (x))
19084 {
19085 case 1: /* 0.0 */
19086 *total = 1;
19087 break;
19088 default: /* Other constants */
19089 *total = 2;
19090 break;
19091 case 0:
19092 case -1:
19093 /* Start with (MEM (SYMBOL_REF)), since that's where
19094 it'll probably end up. Add a penalty for size. */
19095 *total = (COSTS_N_INSNS (1)
19096 + (flag_pic != 0 && !TARGET_64BIT)
19097 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19098 break;
19099 }
19100 return true;
19101
19102 case ZERO_EXTEND:
19103 /* The zero extensions is often completely free on x86_64, so make
19104 it as cheap as possible. */
19105 if (TARGET_64BIT && mode == DImode
19106 && GET_MODE (XEXP (x, 0)) == SImode)
19107 *total = 1;
19108 else if (TARGET_ZERO_EXTEND_WITH_AND)
19109 *total = ix86_cost->add;
19110 else
19111 *total = ix86_cost->movzx;
19112 return false;
19113
19114 case SIGN_EXTEND:
19115 *total = ix86_cost->movsx;
19116 return false;
19117
19118 case ASHIFT:
19119 if (CONST_INT_P (XEXP (x, 1))
19120 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19121 {
19122 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19123 if (value == 1)
19124 {
19125 *total = ix86_cost->add;
19126 return false;
19127 }
19128 if ((value == 2 || value == 3)
19129 && ix86_cost->lea <= ix86_cost->shift_const)
19130 {
19131 *total = ix86_cost->lea;
19132 return false;
19133 }
19134 }
19135 /* FALLTHRU */
19136
19137 case ROTATE:
19138 case ASHIFTRT:
19139 case LSHIFTRT:
19140 case ROTATERT:
19141 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19142 {
19143 if (CONST_INT_P (XEXP (x, 1)))
19144 {
19145 if (INTVAL (XEXP (x, 1)) > 32)
19146 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19147 else
19148 *total = ix86_cost->shift_const * 2;
19149 }
19150 else
19151 {
19152 if (GET_CODE (XEXP (x, 1)) == AND)
19153 *total = ix86_cost->shift_var * 2;
19154 else
19155 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19156 }
19157 }
19158 else
19159 {
19160 if (CONST_INT_P (XEXP (x, 1)))
19161 *total = ix86_cost->shift_const;
19162 else
19163 *total = ix86_cost->shift_var;
19164 }
19165 return false;
19166
19167 case MULT:
19168 if (FLOAT_MODE_P (mode))
19169 {
19170 *total = ix86_cost->fmul;
19171 return false;
19172 }
19173 else
19174 {
19175 rtx op0 = XEXP (x, 0);
19176 rtx op1 = XEXP (x, 1);
19177 int nbits;
19178 if (CONST_INT_P (XEXP (x, 1)))
19179 {
19180 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19181 for (nbits = 0; value != 0; value &= value - 1)
19182 nbits++;
19183 }
19184 else
19185 /* This is arbitrary. */
19186 nbits = 7;
19187
19188 /* Compute costs correctly for widening multiplication. */
19189 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19190 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19191 == GET_MODE_SIZE (mode))
19192 {
19193 int is_mulwiden = 0;
19194 enum machine_mode inner_mode = GET_MODE (op0);
19195
19196 if (GET_CODE (op0) == GET_CODE (op1))
19197 is_mulwiden = 1, op1 = XEXP (op1, 0);
19198 else if (CONST_INT_P (op1))
19199 {
19200 if (GET_CODE (op0) == SIGN_EXTEND)
19201 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19202 == INTVAL (op1);
19203 else
19204 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19205 }
19206
19207 if (is_mulwiden)
19208 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19209 }
19210
19211 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19212 + nbits * ix86_cost->mult_bit
19213 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19214
19215 return true;
19216 }
19217
19218 case DIV:
19219 case UDIV:
19220 case MOD:
19221 case UMOD:
19222 if (FLOAT_MODE_P (mode))
19223 *total = ix86_cost->fdiv;
19224 else
19225 *total = ix86_cost->divide[MODE_INDEX (mode)];
19226 return false;
19227
19228 case PLUS:
19229 if (FLOAT_MODE_P (mode))
19230 *total = ix86_cost->fadd;
19231 else if (GET_MODE_CLASS (mode) == MODE_INT
19232 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19233 {
19234 if (GET_CODE (XEXP (x, 0)) == PLUS
19235 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19236 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19237 && CONSTANT_P (XEXP (x, 1)))
19238 {
19239 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19240 if (val == 2 || val == 4 || val == 8)
19241 {
19242 *total = ix86_cost->lea;
19243 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19244 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19245 outer_code);
19246 *total += rtx_cost (XEXP (x, 1), outer_code);
19247 return true;
19248 }
19249 }
19250 else if (GET_CODE (XEXP (x, 0)) == MULT
19251 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19252 {
19253 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19254 if (val == 2 || val == 4 || val == 8)
19255 {
19256 *total = ix86_cost->lea;
19257 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19258 *total += rtx_cost (XEXP (x, 1), outer_code);
19259 return true;
19260 }
19261 }
19262 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19263 {
19264 *total = ix86_cost->lea;
19265 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19266 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19267 *total += rtx_cost (XEXP (x, 1), outer_code);
19268 return true;
19269 }
19270 }
19271 /* FALLTHRU */
19272
19273 case MINUS:
19274 if (FLOAT_MODE_P (mode))
19275 {
19276 *total = ix86_cost->fadd;
19277 return false;
19278 }
19279 /* FALLTHRU */
19280
19281 case AND:
19282 case IOR:
19283 case XOR:
19284 if (!TARGET_64BIT && mode == DImode)
19285 {
19286 *total = (ix86_cost->add * 2
19287 + (rtx_cost (XEXP (x, 0), outer_code)
19288 << (GET_MODE (XEXP (x, 0)) != DImode))
19289 + (rtx_cost (XEXP (x, 1), outer_code)
19290 << (GET_MODE (XEXP (x, 1)) != DImode)));
19291 return true;
19292 }
19293 /* FALLTHRU */
19294
19295 case NEG:
19296 if (FLOAT_MODE_P (mode))
19297 {
19298 *total = ix86_cost->fchs;
19299 return false;
19300 }
19301 /* FALLTHRU */
19302
19303 case NOT:
19304 if (!TARGET_64BIT && mode == DImode)
19305 *total = ix86_cost->add * 2;
19306 else
19307 *total = ix86_cost->add;
19308 return false;
19309
19310 case COMPARE:
19311 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19312 && XEXP (XEXP (x, 0), 1) == const1_rtx
19313 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19314 && XEXP (x, 1) == const0_rtx)
19315 {
19316 /* This kind of construct is implemented using test[bwl].
19317 Treat it as if we had an AND. */
19318 *total = (ix86_cost->add
19319 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19320 + rtx_cost (const1_rtx, outer_code));
19321 return true;
19322 }
19323 return false;
19324
19325 case FLOAT_EXTEND:
19326 if (!TARGET_SSE_MATH
19327 || mode == XFmode
19328 || (mode == DFmode && !TARGET_SSE2))
19329 *total = 0;
19330 return false;
19331
19332 case ABS:
19333 if (FLOAT_MODE_P (mode))
19334 *total = ix86_cost->fabs;
19335 return false;
19336
19337 case SQRT:
19338 if (FLOAT_MODE_P (mode))
19339 *total = ix86_cost->fsqrt;
19340 return false;
19341
19342 case UNSPEC:
19343 if (XINT (x, 1) == UNSPEC_TP)
19344 *total = 0;
19345 return false;
19346
19347 default:
19348 return false;
19349 }
19350 }
19351
19352 #if TARGET_MACHO
19353
19354 static int current_machopic_label_num;
19355
19356 /* Given a symbol name and its associated stub, write out the
19357 definition of the stub. */
19358
19359 void
19360 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19361 {
19362 unsigned int length;
19363 char *binder_name, *symbol_name, lazy_ptr_name[32];
19364 int label = ++current_machopic_label_num;
19365
19366 /* For 64-bit we shouldn't get here. */
19367 gcc_assert (!TARGET_64BIT);
19368
19369 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19370 symb = (*targetm.strip_name_encoding) (symb);
19371
19372 length = strlen (stub);
19373 binder_name = alloca (length + 32);
19374 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19375
19376 length = strlen (symb);
19377 symbol_name = alloca (length + 32);
19378 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19379
19380 sprintf (lazy_ptr_name, "L%d$lz", label);
19381
19382 if (MACHOPIC_PURE)
19383 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19384 else
19385 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19386
19387 fprintf (file, "%s:\n", stub);
19388 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19389
19390 if (MACHOPIC_PURE)
19391 {
19392 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19393 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19394 fprintf (file, "\tjmp\t*%%edx\n");
19395 }
19396 else
19397 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19398
19399 fprintf (file, "%s:\n", binder_name);
19400
19401 if (MACHOPIC_PURE)
19402 {
19403 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19404 fprintf (file, "\tpushl\t%%eax\n");
19405 }
19406 else
19407 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19408
19409 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19410
19411 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19412 fprintf (file, "%s:\n", lazy_ptr_name);
19413 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19414 fprintf (file, "\t.long %s\n", binder_name);
19415 }
19416
19417 void
19418 darwin_x86_file_end (void)
19419 {
19420 darwin_file_end ();
19421 ix86_file_end ();
19422 }
19423 #endif /* TARGET_MACHO */
19424
19425 /* Order the registers for register allocator. */
19426
19427 void
19428 x86_order_regs_for_local_alloc (void)
19429 {
19430 int pos = 0;
19431 int i;
19432
19433 /* First allocate the local general purpose registers. */
19434 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19435 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19436 reg_alloc_order [pos++] = i;
19437
19438 /* Global general purpose registers. */
19439 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19440 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19441 reg_alloc_order [pos++] = i;
19442
19443 /* x87 registers come first in case we are doing FP math
19444 using them. */
19445 if (!TARGET_SSE_MATH)
19446 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19447 reg_alloc_order [pos++] = i;
19448
19449 /* SSE registers. */
19450 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19451 reg_alloc_order [pos++] = i;
19452 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19453 reg_alloc_order [pos++] = i;
19454
19455 /* x87 registers. */
19456 if (TARGET_SSE_MATH)
19457 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19458 reg_alloc_order [pos++] = i;
19459
19460 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19461 reg_alloc_order [pos++] = i;
19462
19463 /* Initialize the rest of array as we do not allocate some registers
19464 at all. */
19465 while (pos < FIRST_PSEUDO_REGISTER)
19466 reg_alloc_order [pos++] = 0;
19467 }
19468
19469 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19470 struct attribute_spec.handler. */
19471 static tree
19472 ix86_handle_struct_attribute (tree *node, tree name,
19473 tree args ATTRIBUTE_UNUSED,
19474 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19475 {
19476 tree *type = NULL;
19477 if (DECL_P (*node))
19478 {
19479 if (TREE_CODE (*node) == TYPE_DECL)
19480 type = &TREE_TYPE (*node);
19481 }
19482 else
19483 type = node;
19484
19485 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19486 || TREE_CODE (*type) == UNION_TYPE)))
19487 {
19488 warning (OPT_Wattributes, "%qs attribute ignored",
19489 IDENTIFIER_POINTER (name));
19490 *no_add_attrs = true;
19491 }
19492
19493 else if ((is_attribute_p ("ms_struct", name)
19494 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19495 || ((is_attribute_p ("gcc_struct", name)
19496 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19497 {
19498 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19499 IDENTIFIER_POINTER (name));
19500 *no_add_attrs = true;
19501 }
19502
19503 return NULL_TREE;
19504 }
19505
19506 static bool
19507 ix86_ms_bitfield_layout_p (tree record_type)
19508 {
19509 return (TARGET_MS_BITFIELD_LAYOUT &&
19510 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19511 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19512 }
19513
19514 /* Returns an expression indicating where the this parameter is
19515 located on entry to the FUNCTION. */
19516
19517 static rtx
19518 x86_this_parameter (tree function)
19519 {
19520 tree type = TREE_TYPE (function);
19521
19522 if (TARGET_64BIT)
19523 {
19524 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19525 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19526 }
19527
19528 if (ix86_function_regparm (type, function) > 0)
19529 {
19530 tree parm;
19531
19532 parm = TYPE_ARG_TYPES (type);
19533 /* Figure out whether or not the function has a variable number of
19534 arguments. */
19535 for (; parm; parm = TREE_CHAIN (parm))
19536 if (TREE_VALUE (parm) == void_type_node)
19537 break;
19538 /* If not, the this parameter is in the first argument. */
19539 if (parm)
19540 {
19541 int regno = 0;
19542 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19543 regno = 2;
19544 return gen_rtx_REG (SImode, regno);
19545 }
19546 }
19547
19548 if (aggregate_value_p (TREE_TYPE (type), type))
19549 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19550 else
19551 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19552 }
19553
19554 /* Determine whether x86_output_mi_thunk can succeed. */
19555
19556 static bool
19557 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19558 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19559 HOST_WIDE_INT vcall_offset, tree function)
19560 {
19561 /* 64-bit can handle anything. */
19562 if (TARGET_64BIT)
19563 return true;
19564
19565 /* For 32-bit, everything's fine if we have one free register. */
19566 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19567 return true;
19568
19569 /* Need a free register for vcall_offset. */
19570 if (vcall_offset)
19571 return false;
19572
19573 /* Need a free register for GOT references. */
19574 if (flag_pic && !(*targetm.binds_local_p) (function))
19575 return false;
19576
19577 /* Otherwise ok. */
19578 return true;
19579 }
19580
19581 /* Output the assembler code for a thunk function. THUNK_DECL is the
19582 declaration for the thunk function itself, FUNCTION is the decl for
19583 the target function. DELTA is an immediate constant offset to be
19584 added to THIS. If VCALL_OFFSET is nonzero, the word at
19585 *(*this + vcall_offset) should be added to THIS. */
19586
19587 static void
19588 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19589 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19590 HOST_WIDE_INT vcall_offset, tree function)
19591 {
19592 rtx xops[3];
19593 rtx this = x86_this_parameter (function);
19594 rtx this_reg, tmp;
19595
19596 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19597 pull it in now and let DELTA benefit. */
19598 if (REG_P (this))
19599 this_reg = this;
19600 else if (vcall_offset)
19601 {
19602 /* Put the this parameter into %eax. */
19603 xops[0] = this;
19604 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19605 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19606 }
19607 else
19608 this_reg = NULL_RTX;
19609
19610 /* Adjust the this parameter by a fixed constant. */
19611 if (delta)
19612 {
19613 xops[0] = GEN_INT (delta);
19614 xops[1] = this_reg ? this_reg : this;
19615 if (TARGET_64BIT)
19616 {
19617 if (!x86_64_general_operand (xops[0], DImode))
19618 {
19619 tmp = gen_rtx_REG (DImode, R10_REG);
19620 xops[1] = tmp;
19621 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19622 xops[0] = tmp;
19623 xops[1] = this;
19624 }
19625 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19626 }
19627 else
19628 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19629 }
19630
19631 /* Adjust the this parameter by a value stored in the vtable. */
19632 if (vcall_offset)
19633 {
19634 if (TARGET_64BIT)
19635 tmp = gen_rtx_REG (DImode, R10_REG);
19636 else
19637 {
19638 int tmp_regno = 2 /* ECX */;
19639 if (lookup_attribute ("fastcall",
19640 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19641 tmp_regno = 0 /* EAX */;
19642 tmp = gen_rtx_REG (SImode, tmp_regno);
19643 }
19644
19645 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19646 xops[1] = tmp;
19647 if (TARGET_64BIT)
19648 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19649 else
19650 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19651
19652 /* Adjust the this parameter. */
19653 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19654 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19655 {
19656 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19657 xops[0] = GEN_INT (vcall_offset);
19658 xops[1] = tmp2;
19659 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19660 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19661 }
19662 xops[1] = this_reg;
19663 if (TARGET_64BIT)
19664 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19665 else
19666 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19667 }
19668
19669 /* If necessary, drop THIS back to its stack slot. */
19670 if (this_reg && this_reg != this)
19671 {
19672 xops[0] = this_reg;
19673 xops[1] = this;
19674 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19675 }
19676
19677 xops[0] = XEXP (DECL_RTL (function), 0);
19678 if (TARGET_64BIT)
19679 {
19680 if (!flag_pic || (*targetm.binds_local_p) (function))
19681 output_asm_insn ("jmp\t%P0", xops);
19682 else
19683 {
19684 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19685 tmp = gen_rtx_CONST (Pmode, tmp);
19686 tmp = gen_rtx_MEM (QImode, tmp);
19687 xops[0] = tmp;
19688 output_asm_insn ("jmp\t%A0", xops);
19689 }
19690 }
19691 else
19692 {
19693 if (!flag_pic || (*targetm.binds_local_p) (function))
19694 output_asm_insn ("jmp\t%P0", xops);
19695 else
19696 #if TARGET_MACHO
19697 if (TARGET_MACHO)
19698 {
19699 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19700 tmp = (gen_rtx_SYMBOL_REF
19701 (Pmode,
19702 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19703 tmp = gen_rtx_MEM (QImode, tmp);
19704 xops[0] = tmp;
19705 output_asm_insn ("jmp\t%0", xops);
19706 }
19707 else
19708 #endif /* TARGET_MACHO */
19709 {
19710 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19711 output_set_got (tmp, NULL_RTX);
19712
19713 xops[1] = tmp;
19714 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19715 output_asm_insn ("jmp\t{*}%1", xops);
19716 }
19717 }
19718 }
19719
19720 static void
19721 x86_file_start (void)
19722 {
19723 default_file_start ();
19724 #if TARGET_MACHO
19725 darwin_file_start ();
19726 #endif
19727 if (X86_FILE_START_VERSION_DIRECTIVE)
19728 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19729 if (X86_FILE_START_FLTUSED)
19730 fputs ("\t.global\t__fltused\n", asm_out_file);
19731 if (ix86_asm_dialect == ASM_INTEL)
19732 fputs ("\t.intel_syntax\n", asm_out_file);
19733 }
19734
19735 int
19736 x86_field_alignment (tree field, int computed)
19737 {
19738 enum machine_mode mode;
19739 tree type = TREE_TYPE (field);
19740
19741 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19742 return computed;
19743 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19744 ? get_inner_array_type (type) : type);
19745 if (mode == DFmode || mode == DCmode
19746 || GET_MODE_CLASS (mode) == MODE_INT
19747 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19748 return MIN (32, computed);
19749 return computed;
19750 }
19751
19752 /* Output assembler code to FILE to increment profiler label # LABELNO
19753 for profiling a function entry. */
19754 void
19755 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19756 {
19757 if (TARGET_64BIT)
19758 if (flag_pic)
19759 {
19760 #ifndef NO_PROFILE_COUNTERS
19761 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19762 #endif
19763 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19764 }
19765 else
19766 {
19767 #ifndef NO_PROFILE_COUNTERS
19768 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19769 #endif
19770 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19771 }
19772 else if (flag_pic)
19773 {
19774 #ifndef NO_PROFILE_COUNTERS
19775 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19776 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19777 #endif
19778 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19779 }
19780 else
19781 {
19782 #ifndef NO_PROFILE_COUNTERS
19783 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19784 PROFILE_COUNT_REGISTER);
19785 #endif
19786 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19787 }
19788 }
19789
19790 /* We don't have exact information about the insn sizes, but we may assume
19791 quite safely that we are informed about all 1 byte insns and memory
19792 address sizes. This is enough to eliminate unnecessary padding in
19793 99% of cases. */
19794
19795 static int
19796 min_insn_size (rtx insn)
19797 {
19798 int l = 0;
19799
19800 if (!INSN_P (insn) || !active_insn_p (insn))
19801 return 0;
19802
19803 /* Discard alignments we've emit and jump instructions. */
19804 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19805 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19806 return 0;
19807 if (JUMP_P (insn)
19808 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19809 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19810 return 0;
19811
19812 /* Important case - calls are always 5 bytes.
19813 It is common to have many calls in the row. */
19814 if (CALL_P (insn)
19815 && symbolic_reference_mentioned_p (PATTERN (insn))
19816 && !SIBLING_CALL_P (insn))
19817 return 5;
19818 if (get_attr_length (insn) <= 1)
19819 return 1;
19820
19821 /* For normal instructions we may rely on the sizes of addresses
19822 and the presence of symbol to require 4 bytes of encoding.
19823 This is not the case for jumps where references are PC relative. */
19824 if (!JUMP_P (insn))
19825 {
19826 l = get_attr_length_address (insn);
19827 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19828 l = 4;
19829 }
19830 if (l)
19831 return 1+l;
19832 else
19833 return 2;
19834 }
19835
19836 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19837 window. */
19838
19839 static void
19840 ix86_avoid_jump_misspredicts (void)
19841 {
19842 rtx insn, start = get_insns ();
19843 int nbytes = 0, njumps = 0;
19844 int isjump = 0;
19845
19846 /* Look for all minimal intervals of instructions containing 4 jumps.
19847 The intervals are bounded by START and INSN. NBYTES is the total
19848 size of instructions in the interval including INSN and not including
19849 START. When the NBYTES is smaller than 16 bytes, it is possible
19850 that the end of START and INSN ends up in the same 16byte page.
19851
19852 The smallest offset in the page INSN can start is the case where START
19853 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19854 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19855 */
19856 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19857 {
19858
19859 nbytes += min_insn_size (insn);
19860 if (dump_file)
19861 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19862 INSN_UID (insn), min_insn_size (insn));
19863 if ((JUMP_P (insn)
19864 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19865 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19866 || CALL_P (insn))
19867 njumps++;
19868 else
19869 continue;
19870
19871 while (njumps > 3)
19872 {
19873 start = NEXT_INSN (start);
19874 if ((JUMP_P (start)
19875 && GET_CODE (PATTERN (start)) != ADDR_VEC
19876 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19877 || CALL_P (start))
19878 njumps--, isjump = 1;
19879 else
19880 isjump = 0;
19881 nbytes -= min_insn_size (start);
19882 }
19883 gcc_assert (njumps >= 0);
19884 if (dump_file)
19885 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19886 INSN_UID (start), INSN_UID (insn), nbytes);
19887
19888 if (njumps == 3 && isjump && nbytes < 16)
19889 {
19890 int padsize = 15 - nbytes + min_insn_size (insn);
19891
19892 if (dump_file)
19893 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19894 INSN_UID (insn), padsize);
19895 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19896 }
19897 }
19898 }
19899
19900 /* AMD Athlon works faster
19901 when RET is not destination of conditional jump or directly preceded
19902 by other jump instruction. We avoid the penalty by inserting NOP just
19903 before the RET instructions in such cases. */
19904 static void
19905 ix86_pad_returns (void)
19906 {
19907 edge e;
19908 edge_iterator ei;
19909
19910 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19911 {
19912 basic_block bb = e->src;
19913 rtx ret = BB_END (bb);
19914 rtx prev;
19915 bool replace = false;
19916
19917 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19918 || !maybe_hot_bb_p (bb))
19919 continue;
19920 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19921 if (active_insn_p (prev) || LABEL_P (prev))
19922 break;
19923 if (prev && LABEL_P (prev))
19924 {
19925 edge e;
19926 edge_iterator ei;
19927
19928 FOR_EACH_EDGE (e, ei, bb->preds)
19929 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19930 && !(e->flags & EDGE_FALLTHRU))
19931 replace = true;
19932 }
19933 if (!replace)
19934 {
19935 prev = prev_active_insn (ret);
19936 if (prev
19937 && ((JUMP_P (prev) && any_condjump_p (prev))
19938 || CALL_P (prev)))
19939 replace = true;
19940 /* Empty functions get branch mispredict even when the jump destination
19941 is not visible to us. */
19942 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19943 replace = true;
19944 }
19945 if (replace)
19946 {
19947 emit_insn_before (gen_return_internal_long (), ret);
19948 delete_insn (ret);
19949 }
19950 }
19951 }
19952
19953 /* Implement machine specific optimizations. We implement padding of returns
19954 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19955 static void
19956 ix86_reorg (void)
19957 {
19958 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19959 ix86_pad_returns ();
19960 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19961 ix86_avoid_jump_misspredicts ();
19962 }
19963
19964 /* Return nonzero when QImode register that must be represented via REX prefix
19965 is used. */
19966 bool
19967 x86_extended_QIreg_mentioned_p (rtx insn)
19968 {
19969 int i;
19970 extract_insn_cached (insn);
19971 for (i = 0; i < recog_data.n_operands; i++)
19972 if (REG_P (recog_data.operand[i])
19973 && REGNO (recog_data.operand[i]) >= 4)
19974 return true;
19975 return false;
19976 }
19977
19978 /* Return nonzero when P points to register encoded via REX prefix.
19979 Called via for_each_rtx. */
19980 static int
19981 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19982 {
19983 unsigned int regno;
19984 if (!REG_P (*p))
19985 return 0;
19986 regno = REGNO (*p);
19987 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19988 }
19989
19990 /* Return true when INSN mentions register that must be encoded using REX
19991 prefix. */
19992 bool
19993 x86_extended_reg_mentioned_p (rtx insn)
19994 {
19995 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19996 }
19997
19998 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19999 optabs would emit if we didn't have TFmode patterns. */
20000
20001 void
20002 x86_emit_floatuns (rtx operands[2])
20003 {
20004 rtx neglab, donelab, i0, i1, f0, in, out;
20005 enum machine_mode mode, inmode;
20006
20007 inmode = GET_MODE (operands[1]);
20008 gcc_assert (inmode == SImode || inmode == DImode);
20009
20010 out = operands[0];
20011 in = force_reg (inmode, operands[1]);
20012 mode = GET_MODE (out);
20013 neglab = gen_label_rtx ();
20014 donelab = gen_label_rtx ();
20015 f0 = gen_reg_rtx (mode);
20016
20017 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20018
20019 expand_float (out, in, 0);
20020
20021 emit_jump_insn (gen_jump (donelab));
20022 emit_barrier ();
20023
20024 emit_label (neglab);
20025
20026 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20027 1, OPTAB_DIRECT);
20028 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20029 1, OPTAB_DIRECT);
20030 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20031
20032 expand_float (f0, i0, 0);
20033
20034 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20035
20036 emit_label (donelab);
20037 }
20038 \f
20039 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20040 with all elements equal to VAR. Return true if successful. */
20041
20042 static bool
20043 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20044 rtx target, rtx val)
20045 {
20046 enum machine_mode smode, wsmode, wvmode;
20047 rtx x;
20048
20049 switch (mode)
20050 {
20051 case V2SImode:
20052 case V2SFmode:
20053 if (!mmx_ok)
20054 return false;
20055 /* FALLTHRU */
20056
20057 case V2DFmode:
20058 case V2DImode:
20059 case V4SFmode:
20060 case V4SImode:
20061 val = force_reg (GET_MODE_INNER (mode), val);
20062 x = gen_rtx_VEC_DUPLICATE (mode, val);
20063 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20064 return true;
20065
20066 case V4HImode:
20067 if (!mmx_ok)
20068 return false;
20069 if (TARGET_SSE || TARGET_3DNOW_A)
20070 {
20071 val = gen_lowpart (SImode, val);
20072 x = gen_rtx_TRUNCATE (HImode, val);
20073 x = gen_rtx_VEC_DUPLICATE (mode, x);
20074 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20075 return true;
20076 }
20077 else
20078 {
20079 smode = HImode;
20080 wsmode = SImode;
20081 wvmode = V2SImode;
20082 goto widen;
20083 }
20084
20085 case V8QImode:
20086 if (!mmx_ok)
20087 return false;
20088 smode = QImode;
20089 wsmode = HImode;
20090 wvmode = V4HImode;
20091 goto widen;
20092 case V8HImode:
20093 if (TARGET_SSE2)
20094 {
20095 rtx tmp1, tmp2;
20096 /* Extend HImode to SImode using a paradoxical SUBREG. */
20097 tmp1 = gen_reg_rtx (SImode);
20098 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20099 /* Insert the SImode value as low element of V4SImode vector. */
20100 tmp2 = gen_reg_rtx (V4SImode);
20101 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20102 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20103 CONST0_RTX (V4SImode),
20104 const1_rtx);
20105 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20106 /* Cast the V4SImode vector back to a V8HImode vector. */
20107 tmp1 = gen_reg_rtx (V8HImode);
20108 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20109 /* Duplicate the low short through the whole low SImode word. */
20110 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20111 /* Cast the V8HImode vector back to a V4SImode vector. */
20112 tmp2 = gen_reg_rtx (V4SImode);
20113 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20114 /* Replicate the low element of the V4SImode vector. */
20115 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20116 /* Cast the V2SImode back to V8HImode, and store in target. */
20117 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20118 return true;
20119 }
20120 smode = HImode;
20121 wsmode = SImode;
20122 wvmode = V4SImode;
20123 goto widen;
20124 case V16QImode:
20125 if (TARGET_SSE2)
20126 {
20127 rtx tmp1, tmp2;
20128 /* Extend QImode to SImode using a paradoxical SUBREG. */
20129 tmp1 = gen_reg_rtx (SImode);
20130 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20131 /* Insert the SImode value as low element of V4SImode vector. */
20132 tmp2 = gen_reg_rtx (V4SImode);
20133 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20134 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20135 CONST0_RTX (V4SImode),
20136 const1_rtx);
20137 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20138 /* Cast the V4SImode vector back to a V16QImode vector. */
20139 tmp1 = gen_reg_rtx (V16QImode);
20140 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20141 /* Duplicate the low byte through the whole low SImode word. */
20142 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20143 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20144 /* Cast the V16QImode vector back to a V4SImode vector. */
20145 tmp2 = gen_reg_rtx (V4SImode);
20146 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20147 /* Replicate the low element of the V4SImode vector. */
20148 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20149 /* Cast the V2SImode back to V16QImode, and store in target. */
20150 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20151 return true;
20152 }
20153 smode = QImode;
20154 wsmode = HImode;
20155 wvmode = V8HImode;
20156 goto widen;
20157 widen:
20158 /* Replicate the value once into the next wider mode and recurse. */
20159 val = convert_modes (wsmode, smode, val, true);
20160 x = expand_simple_binop (wsmode, ASHIFT, val,
20161 GEN_INT (GET_MODE_BITSIZE (smode)),
20162 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20163 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20164
20165 x = gen_reg_rtx (wvmode);
20166 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20167 gcc_unreachable ();
20168 emit_move_insn (target, gen_lowpart (mode, x));
20169 return true;
20170
20171 default:
20172 return false;
20173 }
20174 }
20175
20176 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20177 whose ONE_VAR element is VAR, and other elements are zero. Return true
20178 if successful. */
20179
20180 static bool
20181 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20182 rtx target, rtx var, int one_var)
20183 {
20184 enum machine_mode vsimode;
20185 rtx new_target;
20186 rtx x, tmp;
20187
20188 switch (mode)
20189 {
20190 case V2SFmode:
20191 case V2SImode:
20192 if (!mmx_ok)
20193 return false;
20194 /* FALLTHRU */
20195
20196 case V2DFmode:
20197 case V2DImode:
20198 if (one_var != 0)
20199 return false;
20200 var = force_reg (GET_MODE_INNER (mode), var);
20201 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20202 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20203 return true;
20204
20205 case V4SFmode:
20206 case V4SImode:
20207 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20208 new_target = gen_reg_rtx (mode);
20209 else
20210 new_target = target;
20211 var = force_reg (GET_MODE_INNER (mode), var);
20212 x = gen_rtx_VEC_DUPLICATE (mode, var);
20213 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20214 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20215 if (one_var != 0)
20216 {
20217 /* We need to shuffle the value to the correct position, so
20218 create a new pseudo to store the intermediate result. */
20219
20220 /* With SSE2, we can use the integer shuffle insns. */
20221 if (mode != V4SFmode && TARGET_SSE2)
20222 {
20223 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20224 GEN_INT (1),
20225 GEN_INT (one_var == 1 ? 0 : 1),
20226 GEN_INT (one_var == 2 ? 0 : 1),
20227 GEN_INT (one_var == 3 ? 0 : 1)));
20228 if (target != new_target)
20229 emit_move_insn (target, new_target);
20230 return true;
20231 }
20232
20233 /* Otherwise convert the intermediate result to V4SFmode and
20234 use the SSE1 shuffle instructions. */
20235 if (mode != V4SFmode)
20236 {
20237 tmp = gen_reg_rtx (V4SFmode);
20238 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20239 }
20240 else
20241 tmp = new_target;
20242
20243 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20244 GEN_INT (1),
20245 GEN_INT (one_var == 1 ? 0 : 1),
20246 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20247 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20248
20249 if (mode != V4SFmode)
20250 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20251 else if (tmp != target)
20252 emit_move_insn (target, tmp);
20253 }
20254 else if (target != new_target)
20255 emit_move_insn (target, new_target);
20256 return true;
20257
20258 case V8HImode:
20259 case V16QImode:
20260 vsimode = V4SImode;
20261 goto widen;
20262 case V4HImode:
20263 case V8QImode:
20264 if (!mmx_ok)
20265 return false;
20266 vsimode = V2SImode;
20267 goto widen;
20268 widen:
20269 if (one_var != 0)
20270 return false;
20271
20272 /* Zero extend the variable element to SImode and recurse. */
20273 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20274
20275 x = gen_reg_rtx (vsimode);
20276 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20277 var, one_var))
20278 gcc_unreachable ();
20279
20280 emit_move_insn (target, gen_lowpart (mode, x));
20281 return true;
20282
20283 default:
20284 return false;
20285 }
20286 }
20287
20288 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20289 consisting of the values in VALS. It is known that all elements
20290 except ONE_VAR are constants. Return true if successful. */
20291
20292 static bool
20293 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20294 rtx target, rtx vals, int one_var)
20295 {
20296 rtx var = XVECEXP (vals, 0, one_var);
20297 enum machine_mode wmode;
20298 rtx const_vec, x;
20299
20300 const_vec = copy_rtx (vals);
20301 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20302 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20303
20304 switch (mode)
20305 {
20306 case V2DFmode:
20307 case V2DImode:
20308 case V2SFmode:
20309 case V2SImode:
20310 /* For the two element vectors, it's just as easy to use
20311 the general case. */
20312 return false;
20313
20314 case V4SFmode:
20315 case V4SImode:
20316 case V8HImode:
20317 case V4HImode:
20318 break;
20319
20320 case V16QImode:
20321 wmode = V8HImode;
20322 goto widen;
20323 case V8QImode:
20324 wmode = V4HImode;
20325 goto widen;
20326 widen:
20327 /* There's no way to set one QImode entry easily. Combine
20328 the variable value with its adjacent constant value, and
20329 promote to an HImode set. */
20330 x = XVECEXP (vals, 0, one_var ^ 1);
20331 if (one_var & 1)
20332 {
20333 var = convert_modes (HImode, QImode, var, true);
20334 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20335 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20336 x = GEN_INT (INTVAL (x) & 0xff);
20337 }
20338 else
20339 {
20340 var = convert_modes (HImode, QImode, var, true);
20341 x = gen_int_mode (INTVAL (x) << 8, HImode);
20342 }
20343 if (x != const0_rtx)
20344 var = expand_simple_binop (HImode, IOR, var, x, var,
20345 1, OPTAB_LIB_WIDEN);
20346
20347 x = gen_reg_rtx (wmode);
20348 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20349 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20350
20351 emit_move_insn (target, gen_lowpart (mode, x));
20352 return true;
20353
20354 default:
20355 return false;
20356 }
20357
20358 emit_move_insn (target, const_vec);
20359 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20360 return true;
20361 }
20362
20363 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20364 all values variable, and none identical. */
20365
20366 static void
20367 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20368 rtx target, rtx vals)
20369 {
20370 enum machine_mode half_mode = GET_MODE_INNER (mode);
20371 rtx op0 = NULL, op1 = NULL;
20372 bool use_vec_concat = false;
20373
20374 switch (mode)
20375 {
20376 case V2SFmode:
20377 case V2SImode:
20378 if (!mmx_ok && !TARGET_SSE)
20379 break;
20380 /* FALLTHRU */
20381
20382 case V2DFmode:
20383 case V2DImode:
20384 /* For the two element vectors, we always implement VEC_CONCAT. */
20385 op0 = XVECEXP (vals, 0, 0);
20386 op1 = XVECEXP (vals, 0, 1);
20387 use_vec_concat = true;
20388 break;
20389
20390 case V4SFmode:
20391 half_mode = V2SFmode;
20392 goto half;
20393 case V4SImode:
20394 half_mode = V2SImode;
20395 goto half;
20396 half:
20397 {
20398 rtvec v;
20399
20400 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20401 Recurse to load the two halves. */
20402
20403 op0 = gen_reg_rtx (half_mode);
20404 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20405 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20406
20407 op1 = gen_reg_rtx (half_mode);
20408 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20409 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20410
20411 use_vec_concat = true;
20412 }
20413 break;
20414
20415 case V8HImode:
20416 case V16QImode:
20417 case V4HImode:
20418 case V8QImode:
20419 break;
20420
20421 default:
20422 gcc_unreachable ();
20423 }
20424
20425 if (use_vec_concat)
20426 {
20427 if (!register_operand (op0, half_mode))
20428 op0 = force_reg (half_mode, op0);
20429 if (!register_operand (op1, half_mode))
20430 op1 = force_reg (half_mode, op1);
20431
20432 emit_insn (gen_rtx_SET (VOIDmode, target,
20433 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20434 }
20435 else
20436 {
20437 int i, j, n_elts, n_words, n_elt_per_word;
20438 enum machine_mode inner_mode;
20439 rtx words[4], shift;
20440
20441 inner_mode = GET_MODE_INNER (mode);
20442 n_elts = GET_MODE_NUNITS (mode);
20443 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20444 n_elt_per_word = n_elts / n_words;
20445 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20446
20447 for (i = 0; i < n_words; ++i)
20448 {
20449 rtx word = NULL_RTX;
20450
20451 for (j = 0; j < n_elt_per_word; ++j)
20452 {
20453 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20454 elt = convert_modes (word_mode, inner_mode, elt, true);
20455
20456 if (j == 0)
20457 word = elt;
20458 else
20459 {
20460 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20461 word, 1, OPTAB_LIB_WIDEN);
20462 word = expand_simple_binop (word_mode, IOR, word, elt,
20463 word, 1, OPTAB_LIB_WIDEN);
20464 }
20465 }
20466
20467 words[i] = word;
20468 }
20469
20470 if (n_words == 1)
20471 emit_move_insn (target, gen_lowpart (mode, words[0]));
20472 else if (n_words == 2)
20473 {
20474 rtx tmp = gen_reg_rtx (mode);
20475 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20476 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20477 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20478 emit_move_insn (target, tmp);
20479 }
20480 else if (n_words == 4)
20481 {
20482 rtx tmp = gen_reg_rtx (V4SImode);
20483 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20484 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20485 emit_move_insn (target, gen_lowpart (mode, tmp));
20486 }
20487 else
20488 gcc_unreachable ();
20489 }
20490 }
20491
20492 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20493 instructions unless MMX_OK is true. */
20494
20495 void
20496 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20497 {
20498 enum machine_mode mode = GET_MODE (target);
20499 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20500 int n_elts = GET_MODE_NUNITS (mode);
20501 int n_var = 0, one_var = -1;
20502 bool all_same = true, all_const_zero = true;
20503 int i;
20504 rtx x;
20505
20506 for (i = 0; i < n_elts; ++i)
20507 {
20508 x = XVECEXP (vals, 0, i);
20509 if (!CONSTANT_P (x))
20510 n_var++, one_var = i;
20511 else if (x != CONST0_RTX (inner_mode))
20512 all_const_zero = false;
20513 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20514 all_same = false;
20515 }
20516
20517 /* Constants are best loaded from the constant pool. */
20518 if (n_var == 0)
20519 {
20520 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20521 return;
20522 }
20523
20524 /* If all values are identical, broadcast the value. */
20525 if (all_same
20526 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20527 XVECEXP (vals, 0, 0)))
20528 return;
20529
20530 /* Values where only one field is non-constant are best loaded from
20531 the pool and overwritten via move later. */
20532 if (n_var == 1)
20533 {
20534 if (all_const_zero
20535 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20536 XVECEXP (vals, 0, one_var),
20537 one_var))
20538 return;
20539
20540 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20541 return;
20542 }
20543
20544 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20545 }
20546
20547 void
20548 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20549 {
20550 enum machine_mode mode = GET_MODE (target);
20551 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20552 bool use_vec_merge = false;
20553 rtx tmp;
20554
20555 switch (mode)
20556 {
20557 case V2SFmode:
20558 case V2SImode:
20559 if (mmx_ok)
20560 {
20561 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20562 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20563 if (elt == 0)
20564 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20565 else
20566 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20567 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20568 return;
20569 }
20570 break;
20571
20572 case V2DFmode:
20573 case V2DImode:
20574 {
20575 rtx op0, op1;
20576
20577 /* For the two element vectors, we implement a VEC_CONCAT with
20578 the extraction of the other element. */
20579
20580 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20581 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20582
20583 if (elt == 0)
20584 op0 = val, op1 = tmp;
20585 else
20586 op0 = tmp, op1 = val;
20587
20588 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20589 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20590 }
20591 return;
20592
20593 case V4SFmode:
20594 switch (elt)
20595 {
20596 case 0:
20597 use_vec_merge = true;
20598 break;
20599
20600 case 1:
20601 /* tmp = target = A B C D */
20602 tmp = copy_to_reg (target);
20603 /* target = A A B B */
20604 emit_insn (gen_sse_unpcklps (target, target, target));
20605 /* target = X A B B */
20606 ix86_expand_vector_set (false, target, val, 0);
20607 /* target = A X C D */
20608 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20609 GEN_INT (1), GEN_INT (0),
20610 GEN_INT (2+4), GEN_INT (3+4)));
20611 return;
20612
20613 case 2:
20614 /* tmp = target = A B C D */
20615 tmp = copy_to_reg (target);
20616 /* tmp = X B C D */
20617 ix86_expand_vector_set (false, tmp, val, 0);
20618 /* target = A B X D */
20619 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20620 GEN_INT (0), GEN_INT (1),
20621 GEN_INT (0+4), GEN_INT (3+4)));
20622 return;
20623
20624 case 3:
20625 /* tmp = target = A B C D */
20626 tmp = copy_to_reg (target);
20627 /* tmp = X B C D */
20628 ix86_expand_vector_set (false, tmp, val, 0);
20629 /* target = A B X D */
20630 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20631 GEN_INT (0), GEN_INT (1),
20632 GEN_INT (2+4), GEN_INT (0+4)));
20633 return;
20634
20635 default:
20636 gcc_unreachable ();
20637 }
20638 break;
20639
20640 case V4SImode:
20641 /* Element 0 handled by vec_merge below. */
20642 if (elt == 0)
20643 {
20644 use_vec_merge = true;
20645 break;
20646 }
20647
20648 if (TARGET_SSE2)
20649 {
20650 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20651 store into element 0, then shuffle them back. */
20652
20653 rtx order[4];
20654
20655 order[0] = GEN_INT (elt);
20656 order[1] = const1_rtx;
20657 order[2] = const2_rtx;
20658 order[3] = GEN_INT (3);
20659 order[elt] = const0_rtx;
20660
20661 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20662 order[1], order[2], order[3]));
20663
20664 ix86_expand_vector_set (false, target, val, 0);
20665
20666 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20667 order[1], order[2], order[3]));
20668 }
20669 else
20670 {
20671 /* For SSE1, we have to reuse the V4SF code. */
20672 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20673 gen_lowpart (SFmode, val), elt);
20674 }
20675 return;
20676
20677 case V8HImode:
20678 use_vec_merge = TARGET_SSE2;
20679 break;
20680 case V4HImode:
20681 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20682 break;
20683
20684 case V16QImode:
20685 case V8QImode:
20686 default:
20687 break;
20688 }
20689
20690 if (use_vec_merge)
20691 {
20692 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20693 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20694 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20695 }
20696 else
20697 {
20698 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20699
20700 emit_move_insn (mem, target);
20701
20702 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20703 emit_move_insn (tmp, val);
20704
20705 emit_move_insn (target, mem);
20706 }
20707 }
20708
20709 void
20710 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20711 {
20712 enum machine_mode mode = GET_MODE (vec);
20713 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20714 bool use_vec_extr = false;
20715 rtx tmp;
20716
20717 switch (mode)
20718 {
20719 case V2SImode:
20720 case V2SFmode:
20721 if (!mmx_ok)
20722 break;
20723 /* FALLTHRU */
20724
20725 case V2DFmode:
20726 case V2DImode:
20727 use_vec_extr = true;
20728 break;
20729
20730 case V4SFmode:
20731 switch (elt)
20732 {
20733 case 0:
20734 tmp = vec;
20735 break;
20736
20737 case 1:
20738 case 3:
20739 tmp = gen_reg_rtx (mode);
20740 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20741 GEN_INT (elt), GEN_INT (elt),
20742 GEN_INT (elt+4), GEN_INT (elt+4)));
20743 break;
20744
20745 case 2:
20746 tmp = gen_reg_rtx (mode);
20747 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20748 break;
20749
20750 default:
20751 gcc_unreachable ();
20752 }
20753 vec = tmp;
20754 use_vec_extr = true;
20755 elt = 0;
20756 break;
20757
20758 case V4SImode:
20759 if (TARGET_SSE2)
20760 {
20761 switch (elt)
20762 {
20763 case 0:
20764 tmp = vec;
20765 break;
20766
20767 case 1:
20768 case 3:
20769 tmp = gen_reg_rtx (mode);
20770 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20771 GEN_INT (elt), GEN_INT (elt),
20772 GEN_INT (elt), GEN_INT (elt)));
20773 break;
20774
20775 case 2:
20776 tmp = gen_reg_rtx (mode);
20777 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20778 break;
20779
20780 default:
20781 gcc_unreachable ();
20782 }
20783 vec = tmp;
20784 use_vec_extr = true;
20785 elt = 0;
20786 }
20787 else
20788 {
20789 /* For SSE1, we have to reuse the V4SF code. */
20790 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20791 gen_lowpart (V4SFmode, vec), elt);
20792 return;
20793 }
20794 break;
20795
20796 case V8HImode:
20797 use_vec_extr = TARGET_SSE2;
20798 break;
20799 case V4HImode:
20800 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20801 break;
20802
20803 case V16QImode:
20804 case V8QImode:
20805 /* ??? Could extract the appropriate HImode element and shift. */
20806 default:
20807 break;
20808 }
20809
20810 if (use_vec_extr)
20811 {
20812 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20813 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20814
20815 /* Let the rtl optimizers know about the zero extension performed. */
20816 if (inner_mode == HImode)
20817 {
20818 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20819 target = gen_lowpart (SImode, target);
20820 }
20821
20822 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20823 }
20824 else
20825 {
20826 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20827
20828 emit_move_insn (mem, vec);
20829
20830 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20831 emit_move_insn (target, tmp);
20832 }
20833 }
20834
20835 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20836 pattern to reduce; DEST is the destination; IN is the input vector. */
20837
20838 void
20839 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20840 {
20841 rtx tmp1, tmp2, tmp3;
20842
20843 tmp1 = gen_reg_rtx (V4SFmode);
20844 tmp2 = gen_reg_rtx (V4SFmode);
20845 tmp3 = gen_reg_rtx (V4SFmode);
20846
20847 emit_insn (gen_sse_movhlps (tmp1, in, in));
20848 emit_insn (fn (tmp2, tmp1, in));
20849
20850 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20851 GEN_INT (1), GEN_INT (1),
20852 GEN_INT (1+4), GEN_INT (1+4)));
20853 emit_insn (fn (dest, tmp2, tmp3));
20854 }
20855 \f
20856 /* Target hook for scalar_mode_supported_p. */
20857 static bool
20858 ix86_scalar_mode_supported_p (enum machine_mode mode)
20859 {
20860 if (DECIMAL_FLOAT_MODE_P (mode))
20861 return true;
20862 else
20863 return default_scalar_mode_supported_p (mode);
20864 }
20865
20866 /* Implements target hook vector_mode_supported_p. */
20867 static bool
20868 ix86_vector_mode_supported_p (enum machine_mode mode)
20869 {
20870 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20871 return true;
20872 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20873 return true;
20874 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20875 return true;
20876 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20877 return true;
20878 return false;
20879 }
20880
20881 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20882
20883 We do this in the new i386 backend to maintain source compatibility
20884 with the old cc0-based compiler. */
20885
20886 static tree
20887 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20888 tree inputs ATTRIBUTE_UNUSED,
20889 tree clobbers)
20890 {
20891 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20892 clobbers);
20893 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20894 clobbers);
20895 return clobbers;
20896 }
20897
20898 /* Return true if this goes in small data/bss. */
20899
20900 static bool
20901 ix86_in_large_data_p (tree exp)
20902 {
20903 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20904 return false;
20905
20906 /* Functions are never large data. */
20907 if (TREE_CODE (exp) == FUNCTION_DECL)
20908 return false;
20909
20910 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20911 {
20912 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20913 if (strcmp (section, ".ldata") == 0
20914 || strcmp (section, ".lbss") == 0)
20915 return true;
20916 return false;
20917 }
20918 else
20919 {
20920 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20921
20922 /* If this is an incomplete type with size 0, then we can't put it
20923 in data because it might be too big when completed. */
20924 if (!size || size > ix86_section_threshold)
20925 return true;
20926 }
20927
20928 return false;
20929 }
20930 static void
20931 ix86_encode_section_info (tree decl, rtx rtl, int first)
20932 {
20933 default_encode_section_info (decl, rtl, first);
20934
20935 if (TREE_CODE (decl) == VAR_DECL
20936 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20937 && ix86_in_large_data_p (decl))
20938 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20939 }
20940
20941 /* Worker function for REVERSE_CONDITION. */
20942
20943 enum rtx_code
20944 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20945 {
20946 return (mode != CCFPmode && mode != CCFPUmode
20947 ? reverse_condition (code)
20948 : reverse_condition_maybe_unordered (code));
20949 }
20950
20951 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20952 to OPERANDS[0]. */
20953
20954 const char *
20955 output_387_reg_move (rtx insn, rtx *operands)
20956 {
20957 if (REG_P (operands[1])
20958 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20959 {
20960 if (REGNO (operands[0]) == FIRST_STACK_REG)
20961 return output_387_ffreep (operands, 0);
20962 return "fstp\t%y0";
20963 }
20964 if (STACK_TOP_P (operands[0]))
20965 return "fld%z1\t%y1";
20966 return "fst\t%y0";
20967 }
20968
20969 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20970 FP status register is set. */
20971
20972 void
20973 ix86_emit_fp_unordered_jump (rtx label)
20974 {
20975 rtx reg = gen_reg_rtx (HImode);
20976 rtx temp;
20977
20978 emit_insn (gen_x86_fnstsw_1 (reg));
20979
20980 if (TARGET_USE_SAHF)
20981 {
20982 emit_insn (gen_x86_sahf_1 (reg));
20983
20984 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20985 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20986 }
20987 else
20988 {
20989 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20990
20991 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20992 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20993 }
20994
20995 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20996 gen_rtx_LABEL_REF (VOIDmode, label),
20997 pc_rtx);
20998 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20999 emit_jump_insn (temp);
21000 }
21001
21002 /* Output code to perform a log1p XFmode calculation. */
21003
21004 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21005 {
21006 rtx label1 = gen_label_rtx ();
21007 rtx label2 = gen_label_rtx ();
21008
21009 rtx tmp = gen_reg_rtx (XFmode);
21010 rtx tmp2 = gen_reg_rtx (XFmode);
21011
21012 emit_insn (gen_absxf2 (tmp, op1));
21013 emit_insn (gen_cmpxf (tmp,
21014 CONST_DOUBLE_FROM_REAL_VALUE (
21015 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21016 XFmode)));
21017 emit_jump_insn (gen_bge (label1));
21018
21019 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21020 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21021 emit_jump (label2);
21022
21023 emit_label (label1);
21024 emit_move_insn (tmp, CONST1_RTX (XFmode));
21025 emit_insn (gen_addxf3 (tmp, op1, tmp));
21026 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21027 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21028
21029 emit_label (label2);
21030 }
21031
21032 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21033
21034 static void
21035 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21036 tree decl)
21037 {
21038 /* With Binutils 2.15, the "@unwind" marker must be specified on
21039 every occurrence of the ".eh_frame" section, not just the first
21040 one. */
21041 if (TARGET_64BIT
21042 && strcmp (name, ".eh_frame") == 0)
21043 {
21044 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21045 flags & SECTION_WRITE ? "aw" : "a");
21046 return;
21047 }
21048 default_elf_asm_named_section (name, flags, decl);
21049 }
21050
21051 /* Return the mangling of TYPE if it is an extended fundamental type. */
21052
21053 static const char *
21054 ix86_mangle_fundamental_type (tree type)
21055 {
21056 switch (TYPE_MODE (type))
21057 {
21058 case TFmode:
21059 /* __float128 is "g". */
21060 return "g";
21061 case XFmode:
21062 /* "long double" or __float80 is "e". */
21063 return "e";
21064 default:
21065 return NULL;
21066 }
21067 }
21068
21069 /* For 32-bit code we can save PIC register setup by using
21070 __stack_chk_fail_local hidden function instead of calling
21071 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21072 register, so it is better to call __stack_chk_fail directly. */
21073
21074 static tree
21075 ix86_stack_protect_fail (void)
21076 {
21077 return TARGET_64BIT
21078 ? default_external_stack_protect_fail ()
21079 : default_hidden_stack_protect_fail ();
21080 }
21081
21082 /* Select a format to encode pointers in exception handling data. CODE
21083 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21084 true if the symbol may be affected by dynamic relocations.
21085
21086 ??? All x86 object file formats are capable of representing this.
21087 After all, the relocation needed is the same as for the call insn.
21088 Whether or not a particular assembler allows us to enter such, I
21089 guess we'll have to see. */
21090 int
21091 asm_preferred_eh_data_format (int code, int global)
21092 {
21093 if (flag_pic)
21094 {
21095 int type = DW_EH_PE_sdata8;
21096 if (!TARGET_64BIT
21097 || ix86_cmodel == CM_SMALL_PIC
21098 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21099 type = DW_EH_PE_sdata4;
21100 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21101 }
21102 if (ix86_cmodel == CM_SMALL
21103 || (ix86_cmodel == CM_MEDIUM && code))
21104 return DW_EH_PE_udata4;
21105 return DW_EH_PE_absptr;
21106 }
21107 \f
21108 /* Expand copysign from SIGN to the positive value ABS_VALUE
21109 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21110 the sign-bit. */
21111 static void
21112 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21113 {
21114 enum machine_mode mode = GET_MODE (sign);
21115 rtx sgn = gen_reg_rtx (mode);
21116 if (mask == NULL_RTX)
21117 {
21118 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21119 if (!VECTOR_MODE_P (mode))
21120 {
21121 /* We need to generate a scalar mode mask in this case. */
21122 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21123 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21124 mask = gen_reg_rtx (mode);
21125 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21126 }
21127 }
21128 else
21129 mask = gen_rtx_NOT (mode, mask);
21130 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21131 gen_rtx_AND (mode, mask, sign)));
21132 emit_insn (gen_rtx_SET (VOIDmode, result,
21133 gen_rtx_IOR (mode, abs_value, sgn)));
21134 }
21135
21136 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21137 mask for masking out the sign-bit is stored in *SMASK, if that is
21138 non-null. */
21139 static rtx
21140 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21141 {
21142 enum machine_mode mode = GET_MODE (op0);
21143 rtx xa, mask;
21144
21145 xa = gen_reg_rtx (mode);
21146 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21147 if (!VECTOR_MODE_P (mode))
21148 {
21149 /* We need to generate a scalar mode mask in this case. */
21150 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21151 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21152 mask = gen_reg_rtx (mode);
21153 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21154 }
21155 emit_insn (gen_rtx_SET (VOIDmode, xa,
21156 gen_rtx_AND (mode, op0, mask)));
21157
21158 if (smask)
21159 *smask = mask;
21160
21161 return xa;
21162 }
21163
21164 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21165 swapping the operands if SWAP_OPERANDS is true. The expanded
21166 code is a forward jump to a newly created label in case the
21167 comparison is true. The generated label rtx is returned. */
21168 static rtx
21169 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21170 bool swap_operands)
21171 {
21172 rtx label, tmp;
21173
21174 if (swap_operands)
21175 {
21176 tmp = op0;
21177 op0 = op1;
21178 op1 = tmp;
21179 }
21180
21181 label = gen_label_rtx ();
21182 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21183 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21184 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21185 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21186 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21187 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21188 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21189 JUMP_LABEL (tmp) = label;
21190
21191 return label;
21192 }
21193
21194 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21195 using comparison code CODE. Operands are swapped for the comparison if
21196 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21197 static rtx
21198 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21199 bool swap_operands)
21200 {
21201 enum machine_mode mode = GET_MODE (op0);
21202 rtx mask = gen_reg_rtx (mode);
21203
21204 if (swap_operands)
21205 {
21206 rtx tmp = op0;
21207 op0 = op1;
21208 op1 = tmp;
21209 }
21210
21211 if (mode == DFmode)
21212 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21213 gen_rtx_fmt_ee (code, mode, op0, op1)));
21214 else
21215 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21216 gen_rtx_fmt_ee (code, mode, op0, op1)));
21217
21218 return mask;
21219 }
21220
21221 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21222 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21223 static rtx
21224 ix86_gen_TWO52 (enum machine_mode mode)
21225 {
21226 REAL_VALUE_TYPE TWO52r;
21227 rtx TWO52;
21228
21229 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21230 TWO52 = const_double_from_real_value (TWO52r, mode);
21231 TWO52 = force_reg (mode, TWO52);
21232
21233 return TWO52;
21234 }
21235
21236 /* Expand SSE sequence for computing lround from OP1 storing
21237 into OP0. */
21238 void
21239 ix86_expand_lround (rtx op0, rtx op1)
21240 {
21241 /* C code for the stuff we're doing below:
21242 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21243 return (long)tmp;
21244 */
21245 enum machine_mode mode = GET_MODE (op1);
21246 const struct real_format *fmt;
21247 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21248 rtx adj;
21249
21250 /* load nextafter (0.5, 0.0) */
21251 fmt = REAL_MODE_FORMAT (mode);
21252 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21253 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21254
21255 /* adj = copysign (0.5, op1) */
21256 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21257 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21258
21259 /* adj = op1 + adj */
21260 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21261
21262 /* op0 = (imode)adj */
21263 expand_fix (op0, adj, 0);
21264 }
21265
21266 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21267 into OPERAND0. */
21268 void
21269 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21270 {
21271 /* C code for the stuff we're doing below (for do_floor):
21272 xi = (long)op1;
21273 xi -= (double)xi > op1 ? 1 : 0;
21274 return xi;
21275 */
21276 enum machine_mode fmode = GET_MODE (op1);
21277 enum machine_mode imode = GET_MODE (op0);
21278 rtx ireg, freg, label, tmp;
21279
21280 /* reg = (long)op1 */
21281 ireg = gen_reg_rtx (imode);
21282 expand_fix (ireg, op1, 0);
21283
21284 /* freg = (double)reg */
21285 freg = gen_reg_rtx (fmode);
21286 expand_float (freg, ireg, 0);
21287
21288 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21289 label = ix86_expand_sse_compare_and_jump (UNLE,
21290 freg, op1, !do_floor);
21291 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21292 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21293 emit_move_insn (ireg, tmp);
21294
21295 emit_label (label);
21296 LABEL_NUSES (label) = 1;
21297
21298 emit_move_insn (op0, ireg);
21299 }
21300
21301 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21302 result in OPERAND0. */
21303 void
21304 ix86_expand_rint (rtx operand0, rtx operand1)
21305 {
21306 /* C code for the stuff we're doing below:
21307 xa = fabs (operand1);
21308 if (!isless (xa, 2**52))
21309 return operand1;
21310 xa = xa + 2**52 - 2**52;
21311 return copysign (xa, operand1);
21312 */
21313 enum machine_mode mode = GET_MODE (operand0);
21314 rtx res, xa, label, TWO52, mask;
21315
21316 res = gen_reg_rtx (mode);
21317 emit_move_insn (res, operand1);
21318
21319 /* xa = abs (operand1) */
21320 xa = ix86_expand_sse_fabs (res, &mask);
21321
21322 /* if (!isless (xa, TWO52)) goto label; */
21323 TWO52 = ix86_gen_TWO52 (mode);
21324 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21325
21326 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21327 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21328
21329 ix86_sse_copysign_to_positive (res, xa, res, mask);
21330
21331 emit_label (label);
21332 LABEL_NUSES (label) = 1;
21333
21334 emit_move_insn (operand0, res);
21335 }
21336
21337 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21338 into OPERAND0. */
21339 void
21340 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21341 {
21342 /* C code for the stuff we expand below.
21343 double xa = fabs (x), x2;
21344 if (!isless (xa, TWO52))
21345 return x;
21346 xa = xa + TWO52 - TWO52;
21347 x2 = copysign (xa, x);
21348 Compensate. Floor:
21349 if (x2 > x)
21350 x2 -= 1;
21351 Compensate. Ceil:
21352 if (x2 < x)
21353 x2 -= -1;
21354 return x2;
21355 */
21356 enum machine_mode mode = GET_MODE (operand0);
21357 rtx xa, TWO52, tmp, label, one, res, mask;
21358
21359 TWO52 = ix86_gen_TWO52 (mode);
21360
21361 /* Temporary for holding the result, initialized to the input
21362 operand to ease control flow. */
21363 res = gen_reg_rtx (mode);
21364 emit_move_insn (res, operand1);
21365
21366 /* xa = abs (operand1) */
21367 xa = ix86_expand_sse_fabs (res, &mask);
21368
21369 /* if (!isless (xa, TWO52)) goto label; */
21370 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21371
21372 /* xa = xa + TWO52 - TWO52; */
21373 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21374 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21375
21376 /* xa = copysign (xa, operand1) */
21377 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21378
21379 /* generate 1.0 or -1.0 */
21380 one = force_reg (mode,
21381 const_double_from_real_value (do_floor
21382 ? dconst1 : dconstm1, mode));
21383
21384 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21385 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21386 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21387 gen_rtx_AND (mode, one, tmp)));
21388 /* We always need to subtract here to preserve signed zero. */
21389 tmp = expand_simple_binop (mode, MINUS,
21390 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21391 emit_move_insn (res, tmp);
21392
21393 emit_label (label);
21394 LABEL_NUSES (label) = 1;
21395
21396 emit_move_insn (operand0, res);
21397 }
21398
21399 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21400 into OPERAND0. */
21401 void
21402 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21403 {
21404 /* C code for the stuff we expand below.
21405 double xa = fabs (x), x2;
21406 if (!isless (xa, TWO52))
21407 return x;
21408 x2 = (double)(long)x;
21409 Compensate. Floor:
21410 if (x2 > x)
21411 x2 -= 1;
21412 Compensate. Ceil:
21413 if (x2 < x)
21414 x2 += 1;
21415 if (HONOR_SIGNED_ZEROS (mode))
21416 return copysign (x2, x);
21417 return x2;
21418 */
21419 enum machine_mode mode = GET_MODE (operand0);
21420 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21421
21422 TWO52 = ix86_gen_TWO52 (mode);
21423
21424 /* Temporary for holding the result, initialized to the input
21425 operand to ease control flow. */
21426 res = gen_reg_rtx (mode);
21427 emit_move_insn (res, operand1);
21428
21429 /* xa = abs (operand1) */
21430 xa = ix86_expand_sse_fabs (res, &mask);
21431
21432 /* if (!isless (xa, TWO52)) goto label; */
21433 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21434
21435 /* xa = (double)(long)x */
21436 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21437 expand_fix (xi, res, 0);
21438 expand_float (xa, xi, 0);
21439
21440 /* generate 1.0 */
21441 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21442
21443 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21444 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21445 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21446 gen_rtx_AND (mode, one, tmp)));
21447 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21448 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21449 emit_move_insn (res, tmp);
21450
21451 if (HONOR_SIGNED_ZEROS (mode))
21452 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21453
21454 emit_label (label);
21455 LABEL_NUSES (label) = 1;
21456
21457 emit_move_insn (operand0, res);
21458 }
21459
21460 /* Expand SSE sequence for computing round from OPERAND1 storing
21461 into OPERAND0. Sequence that works without relying on DImode truncation
21462 via cvttsd2siq that is only available on 64bit targets. */
21463 void
21464 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21465 {
21466 /* C code for the stuff we expand below.
21467 double xa = fabs (x), xa2, x2;
21468 if (!isless (xa, TWO52))
21469 return x;
21470 Using the absolute value and copying back sign makes
21471 -0.0 -> -0.0 correct.
21472 xa2 = xa + TWO52 - TWO52;
21473 Compensate.
21474 dxa = xa2 - xa;
21475 if (dxa <= -0.5)
21476 xa2 += 1;
21477 else if (dxa > 0.5)
21478 xa2 -= 1;
21479 x2 = copysign (xa2, x);
21480 return x2;
21481 */
21482 enum machine_mode mode = GET_MODE (operand0);
21483 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21484
21485 TWO52 = ix86_gen_TWO52 (mode);
21486
21487 /* Temporary for holding the result, initialized to the input
21488 operand to ease control flow. */
21489 res = gen_reg_rtx (mode);
21490 emit_move_insn (res, operand1);
21491
21492 /* xa = abs (operand1) */
21493 xa = ix86_expand_sse_fabs (res, &mask);
21494
21495 /* if (!isless (xa, TWO52)) goto label; */
21496 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21497
21498 /* xa2 = xa + TWO52 - TWO52; */
21499 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21500 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21501
21502 /* dxa = xa2 - xa; */
21503 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21504
21505 /* generate 0.5, 1.0 and -0.5 */
21506 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21507 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21508 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21509 0, OPTAB_DIRECT);
21510
21511 /* Compensate. */
21512 tmp = gen_reg_rtx (mode);
21513 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21514 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21515 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21516 gen_rtx_AND (mode, one, tmp)));
21517 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21518 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21519 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21520 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21521 gen_rtx_AND (mode, one, tmp)));
21522 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21523
21524 /* res = copysign (xa2, operand1) */
21525 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21526
21527 emit_label (label);
21528 LABEL_NUSES (label) = 1;
21529
21530 emit_move_insn (operand0, res);
21531 }
21532
21533 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21534 into OPERAND0. */
21535 void
21536 ix86_expand_trunc (rtx operand0, rtx operand1)
21537 {
21538 /* C code for SSE variant we expand below.
21539 double xa = fabs (x), x2;
21540 if (!isless (xa, TWO52))
21541 return x;
21542 x2 = (double)(long)x;
21543 if (HONOR_SIGNED_ZEROS (mode))
21544 return copysign (x2, x);
21545 return x2;
21546 */
21547 enum machine_mode mode = GET_MODE (operand0);
21548 rtx xa, xi, TWO52, label, res, mask;
21549
21550 TWO52 = ix86_gen_TWO52 (mode);
21551
21552 /* Temporary for holding the result, initialized to the input
21553 operand to ease control flow. */
21554 res = gen_reg_rtx (mode);
21555 emit_move_insn (res, operand1);
21556
21557 /* xa = abs (operand1) */
21558 xa = ix86_expand_sse_fabs (res, &mask);
21559
21560 /* if (!isless (xa, TWO52)) goto label; */
21561 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21562
21563 /* x = (double)(long)x */
21564 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21565 expand_fix (xi, res, 0);
21566 expand_float (res, xi, 0);
21567
21568 if (HONOR_SIGNED_ZEROS (mode))
21569 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21570
21571 emit_label (label);
21572 LABEL_NUSES (label) = 1;
21573
21574 emit_move_insn (operand0, res);
21575 }
21576
21577 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21578 into OPERAND0. */
21579 void
21580 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21581 {
21582 enum machine_mode mode = GET_MODE (operand0);
21583 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21584
21585 /* C code for SSE variant we expand below.
21586 double xa = fabs (x), x2;
21587 if (!isless (xa, TWO52))
21588 return x;
21589 xa2 = xa + TWO52 - TWO52;
21590 Compensate:
21591 if (xa2 > xa)
21592 xa2 -= 1.0;
21593 x2 = copysign (xa2, x);
21594 return x2;
21595 */
21596
21597 TWO52 = ix86_gen_TWO52 (mode);
21598
21599 /* Temporary for holding the result, initialized to the input
21600 operand to ease control flow. */
21601 res = gen_reg_rtx (mode);
21602 emit_move_insn (res, operand1);
21603
21604 /* xa = abs (operand1) */
21605 xa = ix86_expand_sse_fabs (res, &smask);
21606
21607 /* if (!isless (xa, TWO52)) goto label; */
21608 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21609
21610 /* res = xa + TWO52 - TWO52; */
21611 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21612 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21613 emit_move_insn (res, tmp);
21614
21615 /* generate 1.0 */
21616 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21617
21618 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21619 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21620 emit_insn (gen_rtx_SET (VOIDmode, mask,
21621 gen_rtx_AND (mode, mask, one)));
21622 tmp = expand_simple_binop (mode, MINUS,
21623 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21624 emit_move_insn (res, tmp);
21625
21626 /* res = copysign (res, operand1) */
21627 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21628
21629 emit_label (label);
21630 LABEL_NUSES (label) = 1;
21631
21632 emit_move_insn (operand0, res);
21633 }
21634
21635 /* Expand SSE sequence for computing round from OPERAND1 storing
21636 into OPERAND0. */
21637 void
21638 ix86_expand_round (rtx operand0, rtx operand1)
21639 {
21640 /* C code for the stuff we're doing below:
21641 double xa = fabs (x);
21642 if (!isless (xa, TWO52))
21643 return x;
21644 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21645 return copysign (xa, x);
21646 */
21647 enum machine_mode mode = GET_MODE (operand0);
21648 rtx res, TWO52, xa, label, xi, half, mask;
21649 const struct real_format *fmt;
21650 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21651
21652 /* Temporary for holding the result, initialized to the input
21653 operand to ease control flow. */
21654 res = gen_reg_rtx (mode);
21655 emit_move_insn (res, operand1);
21656
21657 TWO52 = ix86_gen_TWO52 (mode);
21658 xa = ix86_expand_sse_fabs (res, &mask);
21659 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21660
21661 /* load nextafter (0.5, 0.0) */
21662 fmt = REAL_MODE_FORMAT (mode);
21663 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21664 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21665
21666 /* xa = xa + 0.5 */
21667 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21668 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21669
21670 /* xa = (double)(int64_t)xa */
21671 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21672 expand_fix (xi, xa, 0);
21673 expand_float (xa, xi, 0);
21674
21675 /* res = copysign (xa, operand1) */
21676 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21677
21678 emit_label (label);
21679 LABEL_NUSES (label) = 1;
21680
21681 emit_move_insn (operand0, res);
21682 }
21683
21684 #include "gt-i386.h"