i386.c (ix86_function_regparm): Early exit for 64-bit...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1198 ~m_K8,
1199
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1201 m_K8 | m_GENERIC64,
1202
1203 /* X86_TUNE_SLOW_IMUL_IMM32_MEM (imul of 32-bit constant and memory is vector
1204 path on AMD machines) */
1205 m_K8 | m_GENERIC64 | m_AMDFAM10,
1206
1207 /* X86_TUNE_SLOW_IMUL_IMM8 (imul of 8-bit constant is vector path on AMD
1208 machines) */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210
1211 /* X86_TUNE_MOVE_M1_VIA_OR (on pentiums, it is faster to load -1 via OR than
1212 a MOV) */
1213 m_PENT,
1214
1215 /* X86_TUNE_NOT_UNPAIRABLE (NOT is not pairable on Pentium, while XOR is, but
1216 one byte longer). */
1217 m_PENT,
1218
1219 /* X86_TUNE_NOT_VECTORMODE (On AMD K6, NOT is vector decoded with memory
1220 operand that cannot be represented using a modRM byte. The XOR
1221 replacement is long decoded, so this split helps here as well). */
1222 m_K6,
1223 };
1224
1225 /* Feature tests against the various architecture variations. */
1226 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1227 /* X86_ARCH_CMOVE */
1228 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1229
1230 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1231 ~m_386,
1232
1233 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1234 ~(m_386 | m_486),
1235
1236 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1237 ~m_386,
1238
1239 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1240 ~m_386,
1241 };
1242
1243 static const unsigned int x86_accumulate_outgoing_args
1244 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1245
1246 static const unsigned int x86_arch_always_fancy_math_387
1247 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1248 | m_NOCONA | m_CORE2 | m_GENERIC;
1249
1250 static enum stringop_alg stringop_alg = no_stringop;
1251
1252 /* In case the average insn count for single function invocation is
1253 lower than this constant, emit fast (but longer) prologue and
1254 epilogue code. */
1255 #define FAST_PROLOGUE_INSN_COUNT 20
1256
1257 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1258 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1259 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1260 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1261
1262 /* Array of the smallest class containing reg number REGNO, indexed by
1263 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1264
1265 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1266 {
1267 /* ax, dx, cx, bx */
1268 AREG, DREG, CREG, BREG,
1269 /* si, di, bp, sp */
1270 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1271 /* FP registers */
1272 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1273 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1274 /* arg pointer */
1275 NON_Q_REGS,
1276 /* flags, fpsr, fpcr, frame */
1277 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1278 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1279 SSE_REGS, SSE_REGS,
1280 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1281 MMX_REGS, MMX_REGS,
1282 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1283 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1284 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1285 SSE_REGS, SSE_REGS,
1286 };
1287
1288 /* The "default" register map used in 32bit mode. */
1289
1290 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1291 {
1292 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1293 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1294 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1295 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1296 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1297 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1298 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1299 };
1300
1301 static int const x86_64_int_parameter_registers[6] =
1302 {
1303 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1304 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1305 };
1306
1307 static int const x86_64_int_return_registers[4] =
1308 {
1309 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1310 };
1311
1312 /* The "default" register map used in 64bit mode. */
1313 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1314 {
1315 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1316 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1317 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1319 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1320 8,9,10,11,12,13,14,15, /* extended integer registers */
1321 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1322 };
1323
1324 /* Define the register numbers to be used in Dwarf debugging information.
1325 The SVR4 reference port C compiler uses the following register numbers
1326 in its Dwarf output code:
1327 0 for %eax (gcc regno = 0)
1328 1 for %ecx (gcc regno = 2)
1329 2 for %edx (gcc regno = 1)
1330 3 for %ebx (gcc regno = 3)
1331 4 for %esp (gcc regno = 7)
1332 5 for %ebp (gcc regno = 6)
1333 6 for %esi (gcc regno = 4)
1334 7 for %edi (gcc regno = 5)
1335 The following three DWARF register numbers are never generated by
1336 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1337 believes these numbers have these meanings.
1338 8 for %eip (no gcc equivalent)
1339 9 for %eflags (gcc regno = 17)
1340 10 for %trapno (no gcc equivalent)
1341 It is not at all clear how we should number the FP stack registers
1342 for the x86 architecture. If the version of SDB on x86/svr4 were
1343 a bit less brain dead with respect to floating-point then we would
1344 have a precedent to follow with respect to DWARF register numbers
1345 for x86 FP registers, but the SDB on x86/svr4 is so completely
1346 broken with respect to FP registers that it is hardly worth thinking
1347 of it as something to strive for compatibility with.
1348 The version of x86/svr4 SDB I have at the moment does (partially)
1349 seem to believe that DWARF register number 11 is associated with
1350 the x86 register %st(0), but that's about all. Higher DWARF
1351 register numbers don't seem to be associated with anything in
1352 particular, and even for DWARF regno 11, SDB only seems to under-
1353 stand that it should say that a variable lives in %st(0) (when
1354 asked via an `=' command) if we said it was in DWARF regno 11,
1355 but SDB still prints garbage when asked for the value of the
1356 variable in question (via a `/' command).
1357 (Also note that the labels SDB prints for various FP stack regs
1358 when doing an `x' command are all wrong.)
1359 Note that these problems generally don't affect the native SVR4
1360 C compiler because it doesn't allow the use of -O with -g and
1361 because when it is *not* optimizing, it allocates a memory
1362 location for each floating-point variable, and the memory
1363 location is what gets described in the DWARF AT_location
1364 attribute for the variable in question.
1365 Regardless of the severe mental illness of the x86/svr4 SDB, we
1366 do something sensible here and we use the following DWARF
1367 register numbers. Note that these are all stack-top-relative
1368 numbers.
1369 11 for %st(0) (gcc regno = 8)
1370 12 for %st(1) (gcc regno = 9)
1371 13 for %st(2) (gcc regno = 10)
1372 14 for %st(3) (gcc regno = 11)
1373 15 for %st(4) (gcc regno = 12)
1374 16 for %st(5) (gcc regno = 13)
1375 17 for %st(6) (gcc regno = 14)
1376 18 for %st(7) (gcc regno = 15)
1377 */
1378 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1379 {
1380 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1381 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1382 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1383 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1384 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1385 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1386 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1387 };
1388
1389 /* Test and compare insns in i386.md store the information needed to
1390 generate branch and scc insns here. */
1391
1392 rtx ix86_compare_op0 = NULL_RTX;
1393 rtx ix86_compare_op1 = NULL_RTX;
1394 rtx ix86_compare_emitted = NULL_RTX;
1395
1396 /* Size of the register save area. */
1397 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1398
1399 /* Define the structure for the machine field in struct function. */
1400
1401 struct stack_local_entry GTY(())
1402 {
1403 unsigned short mode;
1404 unsigned short n;
1405 rtx rtl;
1406 struct stack_local_entry *next;
1407 };
1408
1409 /* Structure describing stack frame layout.
1410 Stack grows downward:
1411
1412 [arguments]
1413 <- ARG_POINTER
1414 saved pc
1415
1416 saved frame pointer if frame_pointer_needed
1417 <- HARD_FRAME_POINTER
1418 [saved regs]
1419
1420 [padding1] \
1421 )
1422 [va_arg registers] (
1423 > to_allocate <- FRAME_POINTER
1424 [frame] (
1425 )
1426 [padding2] /
1427 */
1428 struct ix86_frame
1429 {
1430 int nregs;
1431 int padding1;
1432 int va_arg_size;
1433 HOST_WIDE_INT frame;
1434 int padding2;
1435 int outgoing_arguments_size;
1436 int red_zone_size;
1437
1438 HOST_WIDE_INT to_allocate;
1439 /* The offsets relative to ARG_POINTER. */
1440 HOST_WIDE_INT frame_pointer_offset;
1441 HOST_WIDE_INT hard_frame_pointer_offset;
1442 HOST_WIDE_INT stack_pointer_offset;
1443
1444 /* When save_regs_using_mov is set, emit prologue using
1445 move instead of push instructions. */
1446 bool save_regs_using_mov;
1447 };
1448
1449 /* Code model option. */
1450 enum cmodel ix86_cmodel;
1451 /* Asm dialect. */
1452 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1453 /* TLS dialects. */
1454 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1455
1456 /* Which unit we are generating floating point math for. */
1457 enum fpmath_unit ix86_fpmath;
1458
1459 /* Which cpu are we scheduling for. */
1460 enum processor_type ix86_tune;
1461
1462 /* Which instruction set architecture to use. */
1463 enum processor_type ix86_arch;
1464
1465 /* true if sse prefetch instruction is not NOOP. */
1466 int x86_prefetch_sse;
1467
1468 /* ix86_regparm_string as a number */
1469 static int ix86_regparm;
1470
1471 /* -mstackrealign option */
1472 extern int ix86_force_align_arg_pointer;
1473 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1474
1475 /* Preferred alignment for stack boundary in bits. */
1476 unsigned int ix86_preferred_stack_boundary;
1477
1478 /* Values 1-5: see jump.c */
1479 int ix86_branch_cost;
1480
1481 /* Variables which are this size or smaller are put in the data/bss
1482 or ldata/lbss sections. */
1483
1484 int ix86_section_threshold = 65536;
1485
1486 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1487 char internal_label_prefix[16];
1488 int internal_label_prefix_len;
1489 \f
1490 static bool ix86_handle_option (size_t, const char *, int);
1491 static void output_pic_addr_const (FILE *, rtx, int);
1492 static void put_condition_code (enum rtx_code, enum machine_mode,
1493 int, int, FILE *);
1494 static const char *get_some_local_dynamic_name (void);
1495 static int get_some_local_dynamic_name_1 (rtx *, void *);
1496 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1497 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1498 rtx *);
1499 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1500 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1501 enum machine_mode);
1502 static rtx get_thread_pointer (int);
1503 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1504 static void get_pc_thunk_name (char [32], unsigned int);
1505 static rtx gen_push (rtx);
1506 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1507 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1508 static struct machine_function * ix86_init_machine_status (void);
1509 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1510 static int ix86_nsaved_regs (void);
1511 static void ix86_emit_save_regs (void);
1512 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1513 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1514 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1515 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1516 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1517 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1518 static int ix86_issue_rate (void);
1519 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1520 static int ia32_multipass_dfa_lookahead (void);
1521 static void ix86_init_mmx_sse_builtins (void);
1522 static rtx x86_this_parameter (tree);
1523 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1524 HOST_WIDE_INT, tree);
1525 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1526 static void x86_file_start (void);
1527 static void ix86_reorg (void);
1528 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1529 static tree ix86_build_builtin_va_list (void);
1530 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1531 tree, int *, int);
1532 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1533 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1534 static bool ix86_vector_mode_supported_p (enum machine_mode);
1535
1536 static int ix86_address_cost (rtx);
1537 static bool ix86_cannot_force_const_mem (rtx);
1538 static rtx ix86_delegitimize_address (rtx);
1539
1540 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1541
1542 struct builtin_description;
1543 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1544 tree, rtx);
1545 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1546 tree, rtx);
1547 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1548 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1549 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1550 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1551 static rtx safe_vector_operand (rtx, enum machine_mode);
1552 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1553 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1554 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1555 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1556 static int ix86_fp_comparison_cost (enum rtx_code code);
1557 static unsigned int ix86_select_alt_pic_regnum (void);
1558 static int ix86_save_reg (unsigned int, int);
1559 static void ix86_compute_frame_layout (struct ix86_frame *);
1560 static int ix86_comp_type_attributes (tree, tree);
1561 static int ix86_function_regparm (tree, tree);
1562 const struct attribute_spec ix86_attribute_table[];
1563 static bool ix86_function_ok_for_sibcall (tree, tree);
1564 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1565 static bool contains_128bit_aligned_vector_p (tree);
1566 static rtx ix86_struct_value_rtx (tree, int);
1567 static bool ix86_ms_bitfield_layout_p (tree);
1568 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1569 static int extended_reg_mentioned_1 (rtx *, void *);
1570 static bool ix86_rtx_costs (rtx, int, int, int *);
1571 static int min_insn_size (rtx);
1572 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1573 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1574 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1575 tree, bool);
1576 static void ix86_init_builtins (void);
1577 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1578 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1579 static tree ix86_builtin_conversion (enum tree_code, tree);
1580 static const char *ix86_mangle_fundamental_type (tree);
1581 static tree ix86_stack_protect_fail (void);
1582 static rtx ix86_internal_arg_pointer (void);
1583 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1584 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1585 rtx, rtx, int);
1586 static rtx ix86_function_value (tree, tree, bool);
1587
1588 /* This function is only used on Solaris. */
1589 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1590 ATTRIBUTE_UNUSED;
1591
1592 /* Register class used for passing given 64bit part of the argument.
1593 These represent classes as documented by the PS ABI, with the exception
1594 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1595 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1596
1597 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1598 whenever possible (upper half does contain padding).
1599 */
1600 enum x86_64_reg_class
1601 {
1602 X86_64_NO_CLASS,
1603 X86_64_INTEGER_CLASS,
1604 X86_64_INTEGERSI_CLASS,
1605 X86_64_SSE_CLASS,
1606 X86_64_SSESF_CLASS,
1607 X86_64_SSEDF_CLASS,
1608 X86_64_SSEUP_CLASS,
1609 X86_64_X87_CLASS,
1610 X86_64_X87UP_CLASS,
1611 X86_64_COMPLEX_X87_CLASS,
1612 X86_64_MEMORY_CLASS
1613 };
1614 static const char * const x86_64_reg_class_name[] = {
1615 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1616 "sseup", "x87", "x87up", "cplx87", "no"
1617 };
1618
1619 #define MAX_CLASSES 4
1620
1621 /* Table of constants used by fldpi, fldln2, etc.... */
1622 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1623 static bool ext_80387_constants_init = 0;
1624 static void init_ext_80387_constants (void);
1625 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1626 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1627 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1628 static section *x86_64_elf_select_section (tree decl, int reloc,
1629 unsigned HOST_WIDE_INT align)
1630 ATTRIBUTE_UNUSED;
1631 \f
1632 /* Initialize the GCC target structure. */
1633 #undef TARGET_ATTRIBUTE_TABLE
1634 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1635 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1636 # undef TARGET_MERGE_DECL_ATTRIBUTES
1637 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1638 #endif
1639
1640 #undef TARGET_COMP_TYPE_ATTRIBUTES
1641 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1642
1643 #undef TARGET_INIT_BUILTINS
1644 #define TARGET_INIT_BUILTINS ix86_init_builtins
1645 #undef TARGET_EXPAND_BUILTIN
1646 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1647
1648 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1649 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1650 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1651 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1652
1653 #undef TARGET_ASM_FUNCTION_EPILOGUE
1654 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1655
1656 #undef TARGET_ENCODE_SECTION_INFO
1657 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1658 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1659 #else
1660 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1661 #endif
1662
1663 #undef TARGET_ASM_OPEN_PAREN
1664 #define TARGET_ASM_OPEN_PAREN ""
1665 #undef TARGET_ASM_CLOSE_PAREN
1666 #define TARGET_ASM_CLOSE_PAREN ""
1667
1668 #undef TARGET_ASM_ALIGNED_HI_OP
1669 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1670 #undef TARGET_ASM_ALIGNED_SI_OP
1671 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1672 #ifdef ASM_QUAD
1673 #undef TARGET_ASM_ALIGNED_DI_OP
1674 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1675 #endif
1676
1677 #undef TARGET_ASM_UNALIGNED_HI_OP
1678 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1679 #undef TARGET_ASM_UNALIGNED_SI_OP
1680 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1681 #undef TARGET_ASM_UNALIGNED_DI_OP
1682 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1683
1684 #undef TARGET_SCHED_ADJUST_COST
1685 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1686 #undef TARGET_SCHED_ISSUE_RATE
1687 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1688 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1689 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1690 ia32_multipass_dfa_lookahead
1691
1692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1694
1695 #ifdef HAVE_AS_TLS
1696 #undef TARGET_HAVE_TLS
1697 #define TARGET_HAVE_TLS true
1698 #endif
1699 #undef TARGET_CANNOT_FORCE_CONST_MEM
1700 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1701 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1702 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1703
1704 #undef TARGET_DELEGITIMIZE_ADDRESS
1705 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1706
1707 #undef TARGET_MS_BITFIELD_LAYOUT_P
1708 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1709
1710 #if TARGET_MACHO
1711 #undef TARGET_BINDS_LOCAL_P
1712 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1713 #endif
1714
1715 #undef TARGET_ASM_OUTPUT_MI_THUNK
1716 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1717 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1718 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1719
1720 #undef TARGET_ASM_FILE_START
1721 #define TARGET_ASM_FILE_START x86_file_start
1722
1723 #undef TARGET_DEFAULT_TARGET_FLAGS
1724 #define TARGET_DEFAULT_TARGET_FLAGS \
1725 (TARGET_DEFAULT \
1726 | TARGET_64BIT_DEFAULT \
1727 | TARGET_SUBTARGET_DEFAULT \
1728 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1729
1730 #undef TARGET_HANDLE_OPTION
1731 #define TARGET_HANDLE_OPTION ix86_handle_option
1732
1733 #undef TARGET_RTX_COSTS
1734 #define TARGET_RTX_COSTS ix86_rtx_costs
1735 #undef TARGET_ADDRESS_COST
1736 #define TARGET_ADDRESS_COST ix86_address_cost
1737
1738 #undef TARGET_FIXED_CONDITION_CODE_REGS
1739 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1740 #undef TARGET_CC_MODES_COMPATIBLE
1741 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1742
1743 #undef TARGET_MACHINE_DEPENDENT_REORG
1744 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1745
1746 #undef TARGET_BUILD_BUILTIN_VA_LIST
1747 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1748
1749 #undef TARGET_MD_ASM_CLOBBERS
1750 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1751
1752 #undef TARGET_PROMOTE_PROTOTYPES
1753 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1754 #undef TARGET_STRUCT_VALUE_RTX
1755 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1756 #undef TARGET_SETUP_INCOMING_VARARGS
1757 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1758 #undef TARGET_MUST_PASS_IN_STACK
1759 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1760 #undef TARGET_PASS_BY_REFERENCE
1761 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1762 #undef TARGET_INTERNAL_ARG_POINTER
1763 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1764 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1765 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1766
1767 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1768 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1769
1770 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1771 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1772
1773 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1774 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1775
1776 #ifdef HAVE_AS_TLS
1777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1779 #endif
1780
1781 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1782 #undef TARGET_INSERT_ATTRIBUTES
1783 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1784 #endif
1785
1786 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1787 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1788
1789 #undef TARGET_STACK_PROTECT_FAIL
1790 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1791
1792 #undef TARGET_FUNCTION_VALUE
1793 #define TARGET_FUNCTION_VALUE ix86_function_value
1794
1795 struct gcc_target targetm = TARGET_INITIALIZER;
1796
1797 \f
1798 /* The svr4 ABI for the i386 says that records and unions are returned
1799 in memory. */
1800 #ifndef DEFAULT_PCC_STRUCT_RETURN
1801 #define DEFAULT_PCC_STRUCT_RETURN 1
1802 #endif
1803
1804 /* Implement TARGET_HANDLE_OPTION. */
1805
1806 static bool
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1808 {
1809 switch (code)
1810 {
1811 case OPT_m3dnow:
1812 if (!value)
1813 {
1814 target_flags &= ~MASK_3DNOW_A;
1815 target_flags_explicit |= MASK_3DNOW_A;
1816 }
1817 return true;
1818
1819 case OPT_mmmx:
1820 if (!value)
1821 {
1822 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1823 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1824 }
1825 return true;
1826
1827 case OPT_msse:
1828 if (!value)
1829 {
1830 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1831 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1832 }
1833 return true;
1834
1835 case OPT_msse2:
1836 if (!value)
1837 {
1838 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1839 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1840 }
1841 return true;
1842
1843 case OPT_msse3:
1844 if (!value)
1845 {
1846 target_flags &= ~MASK_SSE4A;
1847 target_flags_explicit |= MASK_SSE4A;
1848 }
1849 return true;
1850
1851 default:
1852 return true;
1853 }
1854 }
1855
1856 /* Sometimes certain combinations of command options do not make
1857 sense on a particular target machine. You can define a macro
1858 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1859 defined, is executed once just after all the command options have
1860 been parsed.
1861
1862 Don't use this macro to turn on various extra optimizations for
1863 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1864
1865 void
1866 override_options (void)
1867 {
1868 int i;
1869 int ix86_tune_defaulted = 0;
1870 unsigned int ix86_arch_mask, ix86_tune_mask;
1871
1872 /* Comes from final.c -- no real reason to change it. */
1873 #define MAX_CODE_ALIGN 16
1874
1875 static struct ptt
1876 {
1877 const struct processor_costs *cost; /* Processor costs */
1878 const int target_enable; /* Target flags to enable. */
1879 const int target_disable; /* Target flags to disable. */
1880 const int align_loop; /* Default alignments. */
1881 const int align_loop_max_skip;
1882 const int align_jump;
1883 const int align_jump_max_skip;
1884 const int align_func;
1885 }
1886 const processor_target_table[PROCESSOR_max] =
1887 {
1888 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1889 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1890 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1891 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1892 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1893 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1894 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1895 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1896 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1897 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1898 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1899 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1900 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1901 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1902 };
1903
1904 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1905 static struct pta
1906 {
1907 const char *const name; /* processor name or nickname. */
1908 const enum processor_type processor;
1909 const enum pta_flags
1910 {
1911 PTA_SSE = 1 << 0,
1912 PTA_SSE2 = 1 << 1,
1913 PTA_SSE3 = 1 << 2,
1914 PTA_MMX = 1 << 3,
1915 PTA_PREFETCH_SSE = 1 << 4,
1916 PTA_3DNOW = 1 << 5,
1917 PTA_3DNOW_A = 1 << 6,
1918 PTA_64BIT = 1 << 7,
1919 PTA_SSSE3 = 1 << 8,
1920 PTA_CX16 = 1 << 9,
1921 PTA_POPCNT = 1 << 10,
1922 PTA_ABM = 1 << 11,
1923 PTA_SSE4A = 1 << 12,
1924 PTA_NO_SAHF = 1 << 13
1925 } flags;
1926 }
1927 const processor_alias_table[] =
1928 {
1929 {"i386", PROCESSOR_I386, 0},
1930 {"i486", PROCESSOR_I486, 0},
1931 {"i586", PROCESSOR_PENTIUM, 0},
1932 {"pentium", PROCESSOR_PENTIUM, 0},
1933 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1934 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1935 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1936 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1937 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1938 {"i686", PROCESSOR_PENTIUMPRO, 0},
1939 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1940 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1941 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1942 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1943 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1944 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1945 | PTA_MMX | PTA_PREFETCH_SSE},
1946 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1947 | PTA_MMX | PTA_PREFETCH_SSE},
1948 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1949 | PTA_MMX | PTA_PREFETCH_SSE},
1950 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1951 | PTA_MMX | PTA_PREFETCH_SSE
1952 | PTA_CX16 | PTA_NO_SAHF},
1953 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1954 | PTA_64BIT | PTA_MMX
1955 | PTA_PREFETCH_SSE | PTA_CX16},
1956 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1957 | PTA_3DNOW_A},
1958 {"k6", PROCESSOR_K6, PTA_MMX},
1959 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1960 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1961 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1962 | PTA_3DNOW_A},
1963 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1964 | PTA_3DNOW | PTA_3DNOW_A},
1965 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1966 | PTA_3DNOW_A | PTA_SSE},
1967 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1968 | PTA_3DNOW_A | PTA_SSE},
1969 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1970 | PTA_3DNOW_A | PTA_SSE},
1971 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1972 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1973 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1974 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1975 | PTA_NO_SAHF},
1976 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1977 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1978 | PTA_SSE2 | PTA_NO_SAHF},
1979 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1980 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1981 | PTA_SSE2 | PTA_NO_SAHF},
1982 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1983 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1984 | PTA_SSE2 | PTA_NO_SAHF},
1985 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1986 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1987 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1988 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1989 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1990 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1991 };
1992
1993 int const pta_size = ARRAY_SIZE (processor_alias_table);
1994
1995 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1996 SUBTARGET_OVERRIDE_OPTIONS;
1997 #endif
1998
1999 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2000 SUBSUBTARGET_OVERRIDE_OPTIONS;
2001 #endif
2002
2003 /* -fPIC is the default for x86_64. */
2004 if (TARGET_MACHO && TARGET_64BIT)
2005 flag_pic = 2;
2006
2007 /* Set the default values for switches whose default depends on TARGET_64BIT
2008 in case they weren't overwritten by command line options. */
2009 if (TARGET_64BIT)
2010 {
2011 /* Mach-O doesn't support omitting the frame pointer for now. */
2012 if (flag_omit_frame_pointer == 2)
2013 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2014 if (flag_asynchronous_unwind_tables == 2)
2015 flag_asynchronous_unwind_tables = 1;
2016 if (flag_pcc_struct_return == 2)
2017 flag_pcc_struct_return = 0;
2018 }
2019 else
2020 {
2021 if (flag_omit_frame_pointer == 2)
2022 flag_omit_frame_pointer = 0;
2023 if (flag_asynchronous_unwind_tables == 2)
2024 flag_asynchronous_unwind_tables = 0;
2025 if (flag_pcc_struct_return == 2)
2026 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2027 }
2028
2029 /* Need to check -mtune=generic first. */
2030 if (ix86_tune_string)
2031 {
2032 if (!strcmp (ix86_tune_string, "generic")
2033 || !strcmp (ix86_tune_string, "i686")
2034 /* As special support for cross compilers we read -mtune=native
2035 as -mtune=generic. With native compilers we won't see the
2036 -mtune=native, as it was changed by the driver. */
2037 || !strcmp (ix86_tune_string, "native"))
2038 {
2039 if (TARGET_64BIT)
2040 ix86_tune_string = "generic64";
2041 else
2042 ix86_tune_string = "generic32";
2043 }
2044 else if (!strncmp (ix86_tune_string, "generic", 7))
2045 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2046 }
2047 else
2048 {
2049 if (ix86_arch_string)
2050 ix86_tune_string = ix86_arch_string;
2051 if (!ix86_tune_string)
2052 {
2053 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2054 ix86_tune_defaulted = 1;
2055 }
2056
2057 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2058 need to use a sensible tune option. */
2059 if (!strcmp (ix86_tune_string, "generic")
2060 || !strcmp (ix86_tune_string, "x86-64")
2061 || !strcmp (ix86_tune_string, "i686"))
2062 {
2063 if (TARGET_64BIT)
2064 ix86_tune_string = "generic64";
2065 else
2066 ix86_tune_string = "generic32";
2067 }
2068 }
2069 if (ix86_stringop_string)
2070 {
2071 if (!strcmp (ix86_stringop_string, "rep_byte"))
2072 stringop_alg = rep_prefix_1_byte;
2073 else if (!strcmp (ix86_stringop_string, "libcall"))
2074 stringop_alg = libcall;
2075 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2076 stringop_alg = rep_prefix_4_byte;
2077 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2078 stringop_alg = rep_prefix_8_byte;
2079 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2080 stringop_alg = loop_1_byte;
2081 else if (!strcmp (ix86_stringop_string, "loop"))
2082 stringop_alg = loop;
2083 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2084 stringop_alg = unrolled_loop;
2085 else
2086 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2087 }
2088 if (!strcmp (ix86_tune_string, "x86-64"))
2089 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2090 "-mtune=generic instead as appropriate.");
2091
2092 if (!ix86_arch_string)
2093 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2094 if (!strcmp (ix86_arch_string, "generic"))
2095 error ("generic CPU can be used only for -mtune= switch");
2096 if (!strncmp (ix86_arch_string, "generic", 7))
2097 error ("bad value (%s) for -march= switch", ix86_arch_string);
2098
2099 if (ix86_cmodel_string != 0)
2100 {
2101 if (!strcmp (ix86_cmodel_string, "small"))
2102 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2103 else if (!strcmp (ix86_cmodel_string, "medium"))
2104 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2105 else if (!strcmp (ix86_cmodel_string, "large"))
2106 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2107 else if (flag_pic)
2108 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2109 else if (!strcmp (ix86_cmodel_string, "32"))
2110 ix86_cmodel = CM_32;
2111 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2112 ix86_cmodel = CM_KERNEL;
2113 else
2114 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2115 }
2116 else
2117 {
2118 ix86_cmodel = CM_32;
2119 if (TARGET_64BIT)
2120 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2121 }
2122 if (ix86_asm_string != 0)
2123 {
2124 if (! TARGET_MACHO
2125 && !strcmp (ix86_asm_string, "intel"))
2126 ix86_asm_dialect = ASM_INTEL;
2127 else if (!strcmp (ix86_asm_string, "att"))
2128 ix86_asm_dialect = ASM_ATT;
2129 else
2130 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2131 }
2132 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2133 error ("code model %qs not supported in the %s bit mode",
2134 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2135 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2136 sorry ("%i-bit mode not compiled in",
2137 (target_flags & MASK_64BIT) ? 64 : 32);
2138
2139 for (i = 0; i < pta_size; i++)
2140 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2141 {
2142 ix86_arch = processor_alias_table[i].processor;
2143 /* Default cpu tuning to the architecture. */
2144 ix86_tune = ix86_arch;
2145 if (processor_alias_table[i].flags & PTA_MMX
2146 && !(target_flags_explicit & MASK_MMX))
2147 target_flags |= MASK_MMX;
2148 if (processor_alias_table[i].flags & PTA_3DNOW
2149 && !(target_flags_explicit & MASK_3DNOW))
2150 target_flags |= MASK_3DNOW;
2151 if (processor_alias_table[i].flags & PTA_3DNOW_A
2152 && !(target_flags_explicit & MASK_3DNOW_A))
2153 target_flags |= MASK_3DNOW_A;
2154 if (processor_alias_table[i].flags & PTA_SSE
2155 && !(target_flags_explicit & MASK_SSE))
2156 target_flags |= MASK_SSE;
2157 if (processor_alias_table[i].flags & PTA_SSE2
2158 && !(target_flags_explicit & MASK_SSE2))
2159 target_flags |= MASK_SSE2;
2160 if (processor_alias_table[i].flags & PTA_SSE3
2161 && !(target_flags_explicit & MASK_SSE3))
2162 target_flags |= MASK_SSE3;
2163 if (processor_alias_table[i].flags & PTA_SSSE3
2164 && !(target_flags_explicit & MASK_SSSE3))
2165 target_flags |= MASK_SSSE3;
2166 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2167 x86_prefetch_sse = true;
2168 if (processor_alias_table[i].flags & PTA_CX16)
2169 x86_cmpxchg16b = true;
2170 if (processor_alias_table[i].flags & PTA_POPCNT
2171 && !(target_flags_explicit & MASK_POPCNT))
2172 target_flags |= MASK_POPCNT;
2173 if (processor_alias_table[i].flags & PTA_ABM
2174 && !(target_flags_explicit & MASK_ABM))
2175 target_flags |= MASK_ABM;
2176 if (processor_alias_table[i].flags & PTA_SSE4A
2177 && !(target_flags_explicit & MASK_SSE4A))
2178 target_flags |= MASK_SSE4A;
2179 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2180 x86_sahf = true;
2181 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2182 error ("CPU you selected does not support x86-64 "
2183 "instruction set");
2184 break;
2185 }
2186
2187 if (i == pta_size)
2188 error ("bad value (%s) for -march= switch", ix86_arch_string);
2189
2190 ix86_arch_mask = 1u << ix86_arch;
2191 for (i = 0; i < X86_ARCH_LAST; ++i)
2192 ix86_arch_features[i] &= ix86_arch_mask;
2193
2194 for (i = 0; i < pta_size; i++)
2195 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2196 {
2197 ix86_tune = processor_alias_table[i].processor;
2198 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2199 {
2200 if (ix86_tune_defaulted)
2201 {
2202 ix86_tune_string = "x86-64";
2203 for (i = 0; i < pta_size; i++)
2204 if (! strcmp (ix86_tune_string,
2205 processor_alias_table[i].name))
2206 break;
2207 ix86_tune = processor_alias_table[i].processor;
2208 }
2209 else
2210 error ("CPU you selected does not support x86-64 "
2211 "instruction set");
2212 }
2213 /* Intel CPUs have always interpreted SSE prefetch instructions as
2214 NOPs; so, we can enable SSE prefetch instructions even when
2215 -mtune (rather than -march) points us to a processor that has them.
2216 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2217 higher processors. */
2218 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2219 x86_prefetch_sse = true;
2220 break;
2221 }
2222 if (i == pta_size)
2223 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2224
2225 ix86_tune_mask = 1u << ix86_tune;
2226 for (i = 0; i < X86_TUNE_LAST; ++i)
2227 ix86_tune_features[i] &= ix86_tune_mask;
2228
2229 if (optimize_size)
2230 ix86_cost = &size_cost;
2231 else
2232 ix86_cost = processor_target_table[ix86_tune].cost;
2233 target_flags |= processor_target_table[ix86_tune].target_enable;
2234 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2235
2236 /* Arrange to set up i386_stack_locals for all functions. */
2237 init_machine_status = ix86_init_machine_status;
2238
2239 /* Validate -mregparm= value. */
2240 if (ix86_regparm_string)
2241 {
2242 i = atoi (ix86_regparm_string);
2243 if (i < 0 || i > REGPARM_MAX)
2244 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2245 else
2246 ix86_regparm = i;
2247 }
2248 else
2249 if (TARGET_64BIT)
2250 ix86_regparm = REGPARM_MAX;
2251
2252 /* If the user has provided any of the -malign-* options,
2253 warn and use that value only if -falign-* is not set.
2254 Remove this code in GCC 3.2 or later. */
2255 if (ix86_align_loops_string)
2256 {
2257 warning (0, "-malign-loops is obsolete, use -falign-loops");
2258 if (align_loops == 0)
2259 {
2260 i = atoi (ix86_align_loops_string);
2261 if (i < 0 || i > MAX_CODE_ALIGN)
2262 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2263 else
2264 align_loops = 1 << i;
2265 }
2266 }
2267
2268 if (ix86_align_jumps_string)
2269 {
2270 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2271 if (align_jumps == 0)
2272 {
2273 i = atoi (ix86_align_jumps_string);
2274 if (i < 0 || i > MAX_CODE_ALIGN)
2275 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2276 else
2277 align_jumps = 1 << i;
2278 }
2279 }
2280
2281 if (ix86_align_funcs_string)
2282 {
2283 warning (0, "-malign-functions is obsolete, use -falign-functions");
2284 if (align_functions == 0)
2285 {
2286 i = atoi (ix86_align_funcs_string);
2287 if (i < 0 || i > MAX_CODE_ALIGN)
2288 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2289 else
2290 align_functions = 1 << i;
2291 }
2292 }
2293
2294 /* Default align_* from the processor table. */
2295 if (align_loops == 0)
2296 {
2297 align_loops = processor_target_table[ix86_tune].align_loop;
2298 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2299 }
2300 if (align_jumps == 0)
2301 {
2302 align_jumps = processor_target_table[ix86_tune].align_jump;
2303 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2304 }
2305 if (align_functions == 0)
2306 {
2307 align_functions = processor_target_table[ix86_tune].align_func;
2308 }
2309
2310 /* Validate -mbranch-cost= value, or provide default. */
2311 ix86_branch_cost = ix86_cost->branch_cost;
2312 if (ix86_branch_cost_string)
2313 {
2314 i = atoi (ix86_branch_cost_string);
2315 if (i < 0 || i > 5)
2316 error ("-mbranch-cost=%d is not between 0 and 5", i);
2317 else
2318 ix86_branch_cost = i;
2319 }
2320 if (ix86_section_threshold_string)
2321 {
2322 i = atoi (ix86_section_threshold_string);
2323 if (i < 0)
2324 error ("-mlarge-data-threshold=%d is negative", i);
2325 else
2326 ix86_section_threshold = i;
2327 }
2328
2329 if (ix86_tls_dialect_string)
2330 {
2331 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2332 ix86_tls_dialect = TLS_DIALECT_GNU;
2333 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2334 ix86_tls_dialect = TLS_DIALECT_GNU2;
2335 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2336 ix86_tls_dialect = TLS_DIALECT_SUN;
2337 else
2338 error ("bad value (%s) for -mtls-dialect= switch",
2339 ix86_tls_dialect_string);
2340 }
2341
2342 /* Keep nonleaf frame pointers. */
2343 if (flag_omit_frame_pointer)
2344 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2345 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2346 flag_omit_frame_pointer = 1;
2347
2348 /* If we're doing fast math, we don't care about comparison order
2349 wrt NaNs. This lets us use a shorter comparison sequence. */
2350 if (flag_finite_math_only)
2351 target_flags &= ~MASK_IEEE_FP;
2352
2353 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2354 since the insns won't need emulation. */
2355 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2356 target_flags &= ~MASK_NO_FANCY_MATH_387;
2357
2358 /* Likewise, if the target doesn't have a 387, or we've specified
2359 software floating point, don't use 387 inline intrinsics. */
2360 if (!TARGET_80387)
2361 target_flags |= MASK_NO_FANCY_MATH_387;
2362
2363 /* Turn on SSE3 builtins for -mssse3. */
2364 if (TARGET_SSSE3)
2365 target_flags |= MASK_SSE3;
2366
2367 /* Turn on SSE3 builtins for -msse4a. */
2368 if (TARGET_SSE4A)
2369 target_flags |= MASK_SSE3;
2370
2371 /* Turn on SSE2 builtins for -msse3. */
2372 if (TARGET_SSE3)
2373 target_flags |= MASK_SSE2;
2374
2375 /* Turn on SSE builtins for -msse2. */
2376 if (TARGET_SSE2)
2377 target_flags |= MASK_SSE;
2378
2379 /* Turn on MMX builtins for -msse. */
2380 if (TARGET_SSE)
2381 {
2382 target_flags |= MASK_MMX & ~target_flags_explicit;
2383 x86_prefetch_sse = true;
2384 }
2385
2386 /* Turn on MMX builtins for 3Dnow. */
2387 if (TARGET_3DNOW)
2388 target_flags |= MASK_MMX;
2389
2390 /* Turn on POPCNT builtins for -mabm. */
2391 if (TARGET_ABM)
2392 target_flags |= MASK_POPCNT;
2393
2394 if (TARGET_64BIT)
2395 {
2396 if (TARGET_ALIGN_DOUBLE)
2397 error ("-malign-double makes no sense in the 64bit mode");
2398 if (TARGET_RTD)
2399 error ("-mrtd calling convention not supported in the 64bit mode");
2400
2401 /* Enable by default the SSE and MMX builtins. Do allow the user to
2402 explicitly disable any of these. In particular, disabling SSE and
2403 MMX for kernel code is extremely useful. */
2404 target_flags
2405 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2406 & ~target_flags_explicit);
2407 }
2408 else
2409 {
2410 /* i386 ABI does not specify red zone. It still makes sense to use it
2411 when programmer takes care to stack from being destroyed. */
2412 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2413 target_flags |= MASK_NO_RED_ZONE;
2414 }
2415
2416 /* Validate -mpreferred-stack-boundary= value, or provide default.
2417 The default of 128 bits is for Pentium III's SSE __m128. We can't
2418 change it because of optimize_size. Otherwise, we can't mix object
2419 files compiled with -Os and -On. */
2420 ix86_preferred_stack_boundary = 128;
2421 if (ix86_preferred_stack_boundary_string)
2422 {
2423 i = atoi (ix86_preferred_stack_boundary_string);
2424 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2425 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2426 TARGET_64BIT ? 4 : 2);
2427 else
2428 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2429 }
2430
2431 /* Accept -msseregparm only if at least SSE support is enabled. */
2432 if (TARGET_SSEREGPARM
2433 && ! TARGET_SSE)
2434 error ("-msseregparm used without SSE enabled");
2435
2436 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2437 if (ix86_fpmath_string != 0)
2438 {
2439 if (! strcmp (ix86_fpmath_string, "387"))
2440 ix86_fpmath = FPMATH_387;
2441 else if (! strcmp (ix86_fpmath_string, "sse"))
2442 {
2443 if (!TARGET_SSE)
2444 {
2445 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2446 ix86_fpmath = FPMATH_387;
2447 }
2448 else
2449 ix86_fpmath = FPMATH_SSE;
2450 }
2451 else if (! strcmp (ix86_fpmath_string, "387,sse")
2452 || ! strcmp (ix86_fpmath_string, "sse,387"))
2453 {
2454 if (!TARGET_SSE)
2455 {
2456 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2457 ix86_fpmath = FPMATH_387;
2458 }
2459 else if (!TARGET_80387)
2460 {
2461 warning (0, "387 instruction set disabled, using SSE arithmetics");
2462 ix86_fpmath = FPMATH_SSE;
2463 }
2464 else
2465 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2466 }
2467 else
2468 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2469 }
2470
2471 /* If the i387 is disabled, then do not return values in it. */
2472 if (!TARGET_80387)
2473 target_flags &= ~MASK_FLOAT_RETURNS;
2474
2475 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2476 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2477 && !optimize_size)
2478 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2479
2480 /* ??? Unwind info is not correct around the CFG unless either a frame
2481 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2482 unwind info generation to be aware of the CFG and propagating states
2483 around edges. */
2484 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2485 || flag_exceptions || flag_non_call_exceptions)
2486 && flag_omit_frame_pointer
2487 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2488 {
2489 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2490 warning (0, "unwind tables currently require either a frame pointer "
2491 "or -maccumulate-outgoing-args for correctness");
2492 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2493 }
2494
2495 /* For sane SSE instruction set generation we need fcomi instruction.
2496 It is safe to enable all CMOVE instructions. */
2497 if (TARGET_SSE)
2498 TARGET_CMOVE = 1;
2499
2500 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2501 {
2502 char *p;
2503 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2504 p = strchr (internal_label_prefix, 'X');
2505 internal_label_prefix_len = p - internal_label_prefix;
2506 *p = '\0';
2507 }
2508
2509 /* When scheduling description is not available, disable scheduler pass
2510 so it won't slow down the compilation and make x87 code slower. */
2511 if (!TARGET_SCHEDULE)
2512 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2513
2514 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2515 set_param_value ("simultaneous-prefetches",
2516 ix86_cost->simultaneous_prefetches);
2517 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2518 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2519 }
2520 \f
2521 /* switch to the appropriate section for output of DECL.
2522 DECL is either a `VAR_DECL' node or a constant of some sort.
2523 RELOC indicates whether forming the initial value of DECL requires
2524 link-time relocations. */
2525
2526 static section *
2527 x86_64_elf_select_section (tree decl, int reloc,
2528 unsigned HOST_WIDE_INT align)
2529 {
2530 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2531 && ix86_in_large_data_p (decl))
2532 {
2533 const char *sname = NULL;
2534 unsigned int flags = SECTION_WRITE;
2535 switch (categorize_decl_for_section (decl, reloc))
2536 {
2537 case SECCAT_DATA:
2538 sname = ".ldata";
2539 break;
2540 case SECCAT_DATA_REL:
2541 sname = ".ldata.rel";
2542 break;
2543 case SECCAT_DATA_REL_LOCAL:
2544 sname = ".ldata.rel.local";
2545 break;
2546 case SECCAT_DATA_REL_RO:
2547 sname = ".ldata.rel.ro";
2548 break;
2549 case SECCAT_DATA_REL_RO_LOCAL:
2550 sname = ".ldata.rel.ro.local";
2551 break;
2552 case SECCAT_BSS:
2553 sname = ".lbss";
2554 flags |= SECTION_BSS;
2555 break;
2556 case SECCAT_RODATA:
2557 case SECCAT_RODATA_MERGE_STR:
2558 case SECCAT_RODATA_MERGE_STR_INIT:
2559 case SECCAT_RODATA_MERGE_CONST:
2560 sname = ".lrodata";
2561 flags = 0;
2562 break;
2563 case SECCAT_SRODATA:
2564 case SECCAT_SDATA:
2565 case SECCAT_SBSS:
2566 gcc_unreachable ();
2567 case SECCAT_TEXT:
2568 case SECCAT_TDATA:
2569 case SECCAT_TBSS:
2570 /* We don't split these for medium model. Place them into
2571 default sections and hope for best. */
2572 break;
2573 }
2574 if (sname)
2575 {
2576 /* We might get called with string constants, but get_named_section
2577 doesn't like them as they are not DECLs. Also, we need to set
2578 flags in that case. */
2579 if (!DECL_P (decl))
2580 return get_section (sname, flags, NULL);
2581 return get_named_section (decl, sname, reloc);
2582 }
2583 }
2584 return default_elf_select_section (decl, reloc, align);
2585 }
2586
2587 /* Build up a unique section name, expressed as a
2588 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2589 RELOC indicates whether the initial value of EXP requires
2590 link-time relocations. */
2591
2592 static void
2593 x86_64_elf_unique_section (tree decl, int reloc)
2594 {
2595 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2596 && ix86_in_large_data_p (decl))
2597 {
2598 const char *prefix = NULL;
2599 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2600 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2601
2602 switch (categorize_decl_for_section (decl, reloc))
2603 {
2604 case SECCAT_DATA:
2605 case SECCAT_DATA_REL:
2606 case SECCAT_DATA_REL_LOCAL:
2607 case SECCAT_DATA_REL_RO:
2608 case SECCAT_DATA_REL_RO_LOCAL:
2609 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2610 break;
2611 case SECCAT_BSS:
2612 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2613 break;
2614 case SECCAT_RODATA:
2615 case SECCAT_RODATA_MERGE_STR:
2616 case SECCAT_RODATA_MERGE_STR_INIT:
2617 case SECCAT_RODATA_MERGE_CONST:
2618 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2619 break;
2620 case SECCAT_SRODATA:
2621 case SECCAT_SDATA:
2622 case SECCAT_SBSS:
2623 gcc_unreachable ();
2624 case SECCAT_TEXT:
2625 case SECCAT_TDATA:
2626 case SECCAT_TBSS:
2627 /* We don't split these for medium model. Place them into
2628 default sections and hope for best. */
2629 break;
2630 }
2631 if (prefix)
2632 {
2633 const char *name;
2634 size_t nlen, plen;
2635 char *string;
2636 plen = strlen (prefix);
2637
2638 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2639 name = targetm.strip_name_encoding (name);
2640 nlen = strlen (name);
2641
2642 string = alloca (nlen + plen + 1);
2643 memcpy (string, prefix, plen);
2644 memcpy (string + plen, name, nlen + 1);
2645
2646 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2647 return;
2648 }
2649 }
2650 default_unique_section (decl, reloc);
2651 }
2652
2653 #ifdef COMMON_ASM_OP
2654 /* This says how to output assembler code to declare an
2655 uninitialized external linkage data object.
2656
2657 For medium model x86-64 we need to use .largecomm opcode for
2658 large objects. */
2659 void
2660 x86_elf_aligned_common (FILE *file,
2661 const char *name, unsigned HOST_WIDE_INT size,
2662 int align)
2663 {
2664 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2665 && size > (unsigned int)ix86_section_threshold)
2666 fprintf (file, ".largecomm\t");
2667 else
2668 fprintf (file, "%s", COMMON_ASM_OP);
2669 assemble_name (file, name);
2670 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2671 size, align / BITS_PER_UNIT);
2672 }
2673 #endif
2674 /* Utility function for targets to use in implementing
2675 ASM_OUTPUT_ALIGNED_BSS. */
2676
2677 void
2678 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2679 const char *name, unsigned HOST_WIDE_INT size,
2680 int align)
2681 {
2682 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2683 && size > (unsigned int)ix86_section_threshold)
2684 switch_to_section (get_named_section (decl, ".lbss", 0));
2685 else
2686 switch_to_section (bss_section);
2687 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2688 #ifdef ASM_DECLARE_OBJECT_NAME
2689 last_assemble_variable_decl = decl;
2690 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2691 #else
2692 /* Standard thing is just output label for the object. */
2693 ASM_OUTPUT_LABEL (file, name);
2694 #endif /* ASM_DECLARE_OBJECT_NAME */
2695 ASM_OUTPUT_SKIP (file, size ? size : 1);
2696 }
2697 \f
2698 void
2699 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2700 {
2701 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2702 make the problem with not enough registers even worse. */
2703 #ifdef INSN_SCHEDULING
2704 if (level > 1)
2705 flag_schedule_insns = 0;
2706 #endif
2707
2708 if (TARGET_MACHO)
2709 /* The Darwin libraries never set errno, so we might as well
2710 avoid calling them when that's the only reason we would. */
2711 flag_errno_math = 0;
2712
2713 /* The default values of these switches depend on the TARGET_64BIT
2714 that is not known at this moment. Mark these values with 2 and
2715 let user the to override these. In case there is no command line option
2716 specifying them, we will set the defaults in override_options. */
2717 if (optimize >= 1)
2718 flag_omit_frame_pointer = 2;
2719 flag_pcc_struct_return = 2;
2720 flag_asynchronous_unwind_tables = 2;
2721 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2722 SUBTARGET_OPTIMIZATION_OPTIONS;
2723 #endif
2724 }
2725 \f
2726 /* Table of valid machine attributes. */
2727 const struct attribute_spec ix86_attribute_table[] =
2728 {
2729 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2730 /* Stdcall attribute says callee is responsible for popping arguments
2731 if they are not variable. */
2732 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2733 /* Fastcall attribute says callee is responsible for popping arguments
2734 if they are not variable. */
2735 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2736 /* Cdecl attribute says the callee is a normal C declaration */
2737 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2738 /* Regparm attribute specifies how many integer arguments are to be
2739 passed in registers. */
2740 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2741 /* Sseregparm attribute says we are using x86_64 calling conventions
2742 for FP arguments. */
2743 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2744 /* force_align_arg_pointer says this function realigns the stack at entry. */
2745 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2746 false, true, true, ix86_handle_cconv_attribute },
2747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2748 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2749 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2750 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2751 #endif
2752 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2753 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2754 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2755 SUBTARGET_ATTRIBUTE_TABLE,
2756 #endif
2757 { NULL, 0, 0, false, false, false, NULL }
2758 };
2759
2760 /* Decide whether we can make a sibling call to a function. DECL is the
2761 declaration of the function being targeted by the call and EXP is the
2762 CALL_EXPR representing the call. */
2763
2764 static bool
2765 ix86_function_ok_for_sibcall (tree decl, tree exp)
2766 {
2767 tree func;
2768 rtx a, b;
2769
2770 /* If we are generating position-independent code, we cannot sibcall
2771 optimize any indirect call, or a direct call to a global function,
2772 as the PLT requires %ebx be live. */
2773 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2774 return false;
2775
2776 if (decl)
2777 func = decl;
2778 else
2779 {
2780 func = TREE_TYPE (CALL_EXPR_FN (exp));
2781 if (POINTER_TYPE_P (func))
2782 func = TREE_TYPE (func);
2783 }
2784
2785 /* Check that the return value locations are the same. Like
2786 if we are returning floats on the 80387 register stack, we cannot
2787 make a sibcall from a function that doesn't return a float to a
2788 function that does or, conversely, from a function that does return
2789 a float to a function that doesn't; the necessary stack adjustment
2790 would not be executed. This is also the place we notice
2791 differences in the return value ABI. Note that it is ok for one
2792 of the functions to have void return type as long as the return
2793 value of the other is passed in a register. */
2794 a = ix86_function_value (TREE_TYPE (exp), func, false);
2795 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2796 cfun->decl, false);
2797 if (STACK_REG_P (a) || STACK_REG_P (b))
2798 {
2799 if (!rtx_equal_p (a, b))
2800 return false;
2801 }
2802 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2803 ;
2804 else if (!rtx_equal_p (a, b))
2805 return false;
2806
2807 /* If this call is indirect, we'll need to be able to use a call-clobbered
2808 register for the address of the target function. Make sure that all
2809 such registers are not used for passing parameters. */
2810 if (!decl && !TARGET_64BIT)
2811 {
2812 tree type;
2813
2814 /* We're looking at the CALL_EXPR, we need the type of the function. */
2815 type = CALL_EXPR_FN (exp); /* pointer expression */
2816 type = TREE_TYPE (type); /* pointer type */
2817 type = TREE_TYPE (type); /* function type */
2818
2819 if (ix86_function_regparm (type, NULL) >= 3)
2820 {
2821 /* ??? Need to count the actual number of registers to be used,
2822 not the possible number of registers. Fix later. */
2823 return false;
2824 }
2825 }
2826
2827 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2828 /* Dllimport'd functions are also called indirectly. */
2829 if (decl && DECL_DLLIMPORT_P (decl)
2830 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2831 return false;
2832 #endif
2833
2834 /* If we forced aligned the stack, then sibcalling would unalign the
2835 stack, which may break the called function. */
2836 if (cfun->machine->force_align_arg_pointer)
2837 return false;
2838
2839 /* Otherwise okay. That also includes certain types of indirect calls. */
2840 return true;
2841 }
2842
2843 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2844 calling convention attributes;
2845 arguments as in struct attribute_spec.handler. */
2846
2847 static tree
2848 ix86_handle_cconv_attribute (tree *node, tree name,
2849 tree args,
2850 int flags ATTRIBUTE_UNUSED,
2851 bool *no_add_attrs)
2852 {
2853 if (TREE_CODE (*node) != FUNCTION_TYPE
2854 && TREE_CODE (*node) != METHOD_TYPE
2855 && TREE_CODE (*node) != FIELD_DECL
2856 && TREE_CODE (*node) != TYPE_DECL)
2857 {
2858 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2859 IDENTIFIER_POINTER (name));
2860 *no_add_attrs = true;
2861 return NULL_TREE;
2862 }
2863
2864 /* Can combine regparm with all attributes but fastcall. */
2865 if (is_attribute_p ("regparm", name))
2866 {
2867 tree cst;
2868
2869 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2870 {
2871 error ("fastcall and regparm attributes are not compatible");
2872 }
2873
2874 cst = TREE_VALUE (args);
2875 if (TREE_CODE (cst) != INTEGER_CST)
2876 {
2877 warning (OPT_Wattributes,
2878 "%qs attribute requires an integer constant argument",
2879 IDENTIFIER_POINTER (name));
2880 *no_add_attrs = true;
2881 }
2882 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2883 {
2884 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2885 IDENTIFIER_POINTER (name), REGPARM_MAX);
2886 *no_add_attrs = true;
2887 }
2888
2889 if (!TARGET_64BIT
2890 && lookup_attribute (ix86_force_align_arg_pointer_string,
2891 TYPE_ATTRIBUTES (*node))
2892 && compare_tree_int (cst, REGPARM_MAX-1))
2893 {
2894 error ("%s functions limited to %d register parameters",
2895 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2896 }
2897
2898 return NULL_TREE;
2899 }
2900
2901 if (TARGET_64BIT)
2902 {
2903 warning (OPT_Wattributes, "%qs attribute ignored",
2904 IDENTIFIER_POINTER (name));
2905 *no_add_attrs = true;
2906 return NULL_TREE;
2907 }
2908
2909 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2910 if (is_attribute_p ("fastcall", name))
2911 {
2912 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2913 {
2914 error ("fastcall and cdecl attributes are not compatible");
2915 }
2916 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2917 {
2918 error ("fastcall and stdcall attributes are not compatible");
2919 }
2920 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2921 {
2922 error ("fastcall and regparm attributes are not compatible");
2923 }
2924 }
2925
2926 /* Can combine stdcall with fastcall (redundant), regparm and
2927 sseregparm. */
2928 else if (is_attribute_p ("stdcall", name))
2929 {
2930 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2931 {
2932 error ("stdcall and cdecl attributes are not compatible");
2933 }
2934 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2935 {
2936 error ("stdcall and fastcall attributes are not compatible");
2937 }
2938 }
2939
2940 /* Can combine cdecl with regparm and sseregparm. */
2941 else if (is_attribute_p ("cdecl", name))
2942 {
2943 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2944 {
2945 error ("stdcall and cdecl attributes are not compatible");
2946 }
2947 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2948 {
2949 error ("fastcall and cdecl attributes are not compatible");
2950 }
2951 }
2952
2953 /* Can combine sseregparm with all attributes. */
2954
2955 return NULL_TREE;
2956 }
2957
2958 /* Return 0 if the attributes for two types are incompatible, 1 if they
2959 are compatible, and 2 if they are nearly compatible (which causes a
2960 warning to be generated). */
2961
2962 static int
2963 ix86_comp_type_attributes (tree type1, tree type2)
2964 {
2965 /* Check for mismatch of non-default calling convention. */
2966 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2967
2968 if (TREE_CODE (type1) != FUNCTION_TYPE)
2969 return 1;
2970
2971 /* Check for mismatched fastcall/regparm types. */
2972 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2973 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2974 || (ix86_function_regparm (type1, NULL)
2975 != ix86_function_regparm (type2, NULL)))
2976 return 0;
2977
2978 /* Check for mismatched sseregparm types. */
2979 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2980 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2981 return 0;
2982
2983 /* Check for mismatched return types (cdecl vs stdcall). */
2984 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2985 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2986 return 0;
2987
2988 return 1;
2989 }
2990 \f
2991 /* Return the regparm value for a function with the indicated TYPE and DECL.
2992 DECL may be NULL when calling function indirectly
2993 or considering a libcall. */
2994
2995 static int
2996 ix86_function_regparm (tree type, tree decl)
2997 {
2998 tree attr;
2999 int regparm = ix86_regparm;
3000
3001 if (TARGET_64BIT)
3002 return regparm;
3003
3004 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3005 if (attr)
3006 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3007
3008 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3009 return 2;
3010
3011 /* Use register calling convention for local functions when possible. */
3012 if (decl && flag_unit_at_a_time && !profile_flag)
3013 {
3014 struct cgraph_local_info *i = cgraph_local_info (decl);
3015 if (i && i->local)
3016 {
3017 int local_regparm, globals = 0, regno;
3018 struct function *f;
3019
3020 /* Make sure no regparm register is taken by a
3021 global register variable. */
3022 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3023 if (global_regs[local_regparm])
3024 break;
3025
3026 /* We can't use regparm(3) for nested functions as these use
3027 static chain pointer in third argument. */
3028 if (local_regparm == 3
3029 && decl_function_context (decl)
3030 && !DECL_NO_STATIC_CHAIN (decl))
3031 local_regparm = 2;
3032
3033 /* If the function realigns its stackpointer, the prologue will
3034 clobber %ecx. If we've already generated code for the callee,
3035 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3036 scanning the attributes for the self-realigning property. */
3037 f = DECL_STRUCT_FUNCTION (decl);
3038 if (local_regparm == 3
3039 && (f ? !!f->machine->force_align_arg_pointer
3040 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3041 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3042 local_regparm = 2;
3043
3044 /* Each global register variable increases register preassure,
3045 so the more global reg vars there are, the smaller regparm
3046 optimization use, unless requested by the user explicitly. */
3047 for (regno = 0; regno < 6; regno++)
3048 if (global_regs[regno])
3049 globals++;
3050 local_regparm
3051 = globals < local_regparm ? local_regparm - globals : 0;
3052
3053 if (local_regparm > regparm)
3054 regparm = local_regparm;
3055 }
3056 }
3057
3058 return regparm;
3059 }
3060
3061 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3062 DFmode (2) arguments in SSE registers for a function with the
3063 indicated TYPE and DECL. DECL may be NULL when calling function
3064 indirectly or considering a libcall. Otherwise return 0. */
3065
3066 static int
3067 ix86_function_sseregparm (tree type, tree decl)
3068 {
3069 gcc_assert (!TARGET_64BIT);
3070
3071 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3072 by the sseregparm attribute. */
3073 if (TARGET_SSEREGPARM
3074 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3075 {
3076 if (!TARGET_SSE)
3077 {
3078 if (decl)
3079 error ("Calling %qD with attribute sseregparm without "
3080 "SSE/SSE2 enabled", decl);
3081 else
3082 error ("Calling %qT with attribute sseregparm without "
3083 "SSE/SSE2 enabled", type);
3084 return 0;
3085 }
3086
3087 return 2;
3088 }
3089
3090 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3091 (and DFmode for SSE2) arguments in SSE registers. */
3092 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3093 {
3094 struct cgraph_local_info *i = cgraph_local_info (decl);
3095 if (i && i->local)
3096 return TARGET_SSE2 ? 2 : 1;
3097 }
3098
3099 return 0;
3100 }
3101
3102 /* Return true if EAX is live at the start of the function. Used by
3103 ix86_expand_prologue to determine if we need special help before
3104 calling allocate_stack_worker. */
3105
3106 static bool
3107 ix86_eax_live_at_start_p (void)
3108 {
3109 /* Cheat. Don't bother working forward from ix86_function_regparm
3110 to the function type to whether an actual argument is located in
3111 eax. Instead just look at cfg info, which is still close enough
3112 to correct at this point. This gives false positives for broken
3113 functions that might use uninitialized data that happens to be
3114 allocated in eax, but who cares? */
3115 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3116 }
3117
3118 /* Return true if TYPE has a variable argument list. */
3119
3120 static bool
3121 type_has_variadic_args_p (tree type)
3122 {
3123 tree t;
3124
3125 for (t = TYPE_ARG_TYPES (type); t; t = TREE_CHAIN (t))
3126 if (t == void_list_node)
3127 return false;
3128 return true;
3129 }
3130
3131 /* Value is the number of bytes of arguments automatically
3132 popped when returning from a subroutine call.
3133 FUNDECL is the declaration node of the function (as a tree),
3134 FUNTYPE is the data type of the function (as a tree),
3135 or for a library call it is an identifier node for the subroutine name.
3136 SIZE is the number of bytes of arguments passed on the stack.
3137
3138 On the 80386, the RTD insn may be used to pop them if the number
3139 of args is fixed, but if the number is variable then the caller
3140 must pop them all. RTD can't be used for library calls now
3141 because the library is compiled with the Unix compiler.
3142 Use of RTD is a selectable option, since it is incompatible with
3143 standard Unix calling sequences. If the option is not selected,
3144 the caller must always pop the args.
3145
3146 The attribute stdcall is equivalent to RTD on a per module basis. */
3147
3148 int
3149 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3150 {
3151 int rtd;
3152
3153 /* None of the 64-bit ABIs pop arguments. */
3154 if (TARGET_64BIT)
3155 return 0;
3156
3157 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3158
3159 /* Cdecl functions override -mrtd, and never pop the stack. */
3160 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3161 {
3162 /* Stdcall and fastcall functions will pop the stack if not
3163 variable args. */
3164 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3165 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3166 rtd = 1;
3167
3168 if (rtd && ! type_has_variadic_args_p (funtype))
3169 return size;
3170 }
3171
3172 /* Lose any fake structure return argument if it is passed on the stack. */
3173 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3174 && !KEEP_AGGREGATE_RETURN_POINTER)
3175 {
3176 int nregs = ix86_function_regparm (funtype, fundecl);
3177 if (nregs == 0)
3178 return GET_MODE_SIZE (Pmode);
3179 }
3180
3181 return 0;
3182 }
3183 \f
3184 /* Argument support functions. */
3185
3186 /* Return true when register may be used to pass function parameters. */
3187 bool
3188 ix86_function_arg_regno_p (int regno)
3189 {
3190 int i;
3191
3192 if (!TARGET_64BIT)
3193 {
3194 if (TARGET_MACHO)
3195 return (regno < REGPARM_MAX
3196 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3197 else
3198 return (regno < REGPARM_MAX
3199 || (TARGET_MMX && MMX_REGNO_P (regno)
3200 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3201 || (TARGET_SSE && SSE_REGNO_P (regno)
3202 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3203 }
3204
3205 if (TARGET_MACHO)
3206 {
3207 if (SSE_REGNO_P (regno) && TARGET_SSE)
3208 return true;
3209 }
3210 else
3211 {
3212 if (TARGET_SSE && SSE_REGNO_P (regno)
3213 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3214 return true;
3215 }
3216
3217 /* RAX is used as hidden argument to va_arg functions. */
3218 if (regno == 0)
3219 return true;
3220
3221 for (i = 0; i < REGPARM_MAX; i++)
3222 if (regno == x86_64_int_parameter_registers[i])
3223 return true;
3224 return false;
3225 }
3226
3227 /* Return if we do not know how to pass TYPE solely in registers. */
3228
3229 static bool
3230 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3231 {
3232 if (must_pass_in_stack_var_size_or_pad (mode, type))
3233 return true;
3234
3235 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3236 The layout_type routine is crafty and tries to trick us into passing
3237 currently unsupported vector types on the stack by using TImode. */
3238 return (!TARGET_64BIT && mode == TImode
3239 && type && TREE_CODE (type) != VECTOR_TYPE);
3240 }
3241
3242 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3243 for a call to a function whose data type is FNTYPE.
3244 For a library call, FNTYPE is 0. */
3245
3246 void
3247 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3248 tree fntype, /* tree ptr for function decl */
3249 rtx libname, /* SYMBOL_REF of library name or 0 */
3250 tree fndecl)
3251 {
3252 memset (cum, 0, sizeof (*cum));
3253
3254 /* Set up the number of registers to use for passing arguments. */
3255 cum->nregs = ix86_regparm;
3256 if (TARGET_SSE)
3257 cum->sse_nregs = SSE_REGPARM_MAX;
3258 if (TARGET_MMX)
3259 cum->mmx_nregs = MMX_REGPARM_MAX;
3260 cum->warn_sse = true;
3261 cum->warn_mmx = true;
3262 cum->maybe_vaarg = (fntype ? type_has_variadic_args_p (fntype) : !libname);
3263
3264 if (!TARGET_64BIT)
3265 {
3266 /* If there are variable arguments, then we won't pass anything
3267 in registers in 32-bit mode. */
3268 if (cum->maybe_vaarg)
3269 {
3270 cum->nregs = 0;
3271 cum->sse_nregs = 0;
3272 cum->mmx_nregs = 0;
3273 cum->warn_sse = 0;
3274 cum->warn_mmx = 0;
3275 return;
3276 }
3277
3278 /* Use ecx and edx registers if function has fastcall attribute,
3279 else look for regparm information. */
3280 if (fntype)
3281 {
3282 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3283 {
3284 cum->nregs = 2;
3285 cum->fastcall = 1;
3286 }
3287 else
3288 cum->nregs = ix86_function_regparm (fntype, fndecl);
3289 }
3290
3291 /* Set up the number of SSE registers used for passing SFmode
3292 and DFmode arguments. Warn for mismatching ABI. */
3293 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3294 }
3295 }
3296
3297 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3298 But in the case of vector types, it is some vector mode.
3299
3300 When we have only some of our vector isa extensions enabled, then there
3301 are some modes for which vector_mode_supported_p is false. For these
3302 modes, the generic vector support in gcc will choose some non-vector mode
3303 in order to implement the type. By computing the natural mode, we'll
3304 select the proper ABI location for the operand and not depend on whatever
3305 the middle-end decides to do with these vector types. */
3306
3307 static enum machine_mode
3308 type_natural_mode (tree type)
3309 {
3310 enum machine_mode mode = TYPE_MODE (type);
3311
3312 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3313 {
3314 HOST_WIDE_INT size = int_size_in_bytes (type);
3315 if ((size == 8 || size == 16)
3316 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3317 && TYPE_VECTOR_SUBPARTS (type) > 1)
3318 {
3319 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3320
3321 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3322 mode = MIN_MODE_VECTOR_FLOAT;
3323 else
3324 mode = MIN_MODE_VECTOR_INT;
3325
3326 /* Get the mode which has this inner mode and number of units. */
3327 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3328 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3329 && GET_MODE_INNER (mode) == innermode)
3330 return mode;
3331
3332 gcc_unreachable ();
3333 }
3334 }
3335
3336 return mode;
3337 }
3338
3339 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3340 this may not agree with the mode that the type system has chosen for the
3341 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3342 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3343
3344 static rtx
3345 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3346 unsigned int regno)
3347 {
3348 rtx tmp;
3349
3350 if (orig_mode != BLKmode)
3351 tmp = gen_rtx_REG (orig_mode, regno);
3352 else
3353 {
3354 tmp = gen_rtx_REG (mode, regno);
3355 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3356 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3357 }
3358
3359 return tmp;
3360 }
3361
3362 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3363 of this code is to classify each 8bytes of incoming argument by the register
3364 class and assign registers accordingly. */
3365
3366 /* Return the union class of CLASS1 and CLASS2.
3367 See the x86-64 PS ABI for details. */
3368
3369 static enum x86_64_reg_class
3370 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3371 {
3372 /* Rule #1: If both classes are equal, this is the resulting class. */
3373 if (class1 == class2)
3374 return class1;
3375
3376 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3377 the other class. */
3378 if (class1 == X86_64_NO_CLASS)
3379 return class2;
3380 if (class2 == X86_64_NO_CLASS)
3381 return class1;
3382
3383 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3384 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3385 return X86_64_MEMORY_CLASS;
3386
3387 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3388 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3389 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3390 return X86_64_INTEGERSI_CLASS;
3391 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3392 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3393 return X86_64_INTEGER_CLASS;
3394
3395 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3396 MEMORY is used. */
3397 if (class1 == X86_64_X87_CLASS
3398 || class1 == X86_64_X87UP_CLASS
3399 || class1 == X86_64_COMPLEX_X87_CLASS
3400 || class2 == X86_64_X87_CLASS
3401 || class2 == X86_64_X87UP_CLASS
3402 || class2 == X86_64_COMPLEX_X87_CLASS)
3403 return X86_64_MEMORY_CLASS;
3404
3405 /* Rule #6: Otherwise class SSE is used. */
3406 return X86_64_SSE_CLASS;
3407 }
3408
3409 /* Classify the argument of type TYPE and mode MODE.
3410 CLASSES will be filled by the register class used to pass each word
3411 of the operand. The number of words is returned. In case the parameter
3412 should be passed in memory, 0 is returned. As a special case for zero
3413 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3414
3415 BIT_OFFSET is used internally for handling records and specifies offset
3416 of the offset in bits modulo 256 to avoid overflow cases.
3417
3418 See the x86-64 PS ABI for details.
3419 */
3420
3421 static int
3422 classify_argument (enum machine_mode mode, tree type,
3423 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3424 {
3425 HOST_WIDE_INT bytes =
3426 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3427 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3428
3429 /* Variable sized entities are always passed/returned in memory. */
3430 if (bytes < 0)
3431 return 0;
3432
3433 if (mode != VOIDmode
3434 && targetm.calls.must_pass_in_stack (mode, type))
3435 return 0;
3436
3437 if (type && AGGREGATE_TYPE_P (type))
3438 {
3439 int i;
3440 tree field;
3441 enum x86_64_reg_class subclasses[MAX_CLASSES];
3442
3443 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3444 if (bytes > 16)
3445 return 0;
3446
3447 for (i = 0; i < words; i++)
3448 classes[i] = X86_64_NO_CLASS;
3449
3450 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3451 signalize memory class, so handle it as special case. */
3452 if (!words)
3453 {
3454 classes[0] = X86_64_NO_CLASS;
3455 return 1;
3456 }
3457
3458 /* Classify each field of record and merge classes. */
3459 switch (TREE_CODE (type))
3460 {
3461 case RECORD_TYPE:
3462 /* And now merge the fields of structure. */
3463 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3464 {
3465 if (TREE_CODE (field) == FIELD_DECL)
3466 {
3467 int num;
3468
3469 if (TREE_TYPE (field) == error_mark_node)
3470 continue;
3471
3472 /* Bitfields are always classified as integer. Handle them
3473 early, since later code would consider them to be
3474 misaligned integers. */
3475 if (DECL_BIT_FIELD (field))
3476 {
3477 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3478 i < ((int_bit_position (field) + (bit_offset % 64))
3479 + tree_low_cst (DECL_SIZE (field), 0)
3480 + 63) / 8 / 8; i++)
3481 classes[i] =
3482 merge_classes (X86_64_INTEGER_CLASS,
3483 classes[i]);
3484 }
3485 else
3486 {
3487 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3488 TREE_TYPE (field), subclasses,
3489 (int_bit_position (field)
3490 + bit_offset) % 256);
3491 if (!num)
3492 return 0;
3493 for (i = 0; i < num; i++)
3494 {
3495 int pos =
3496 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3497 classes[i + pos] =
3498 merge_classes (subclasses[i], classes[i + pos]);
3499 }
3500 }
3501 }
3502 }
3503 break;
3504
3505 case ARRAY_TYPE:
3506 /* Arrays are handled as small records. */
3507 {
3508 int num;
3509 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3510 TREE_TYPE (type), subclasses, bit_offset);
3511 if (!num)
3512 return 0;
3513
3514 /* The partial classes are now full classes. */
3515 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3516 subclasses[0] = X86_64_SSE_CLASS;
3517 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3518 subclasses[0] = X86_64_INTEGER_CLASS;
3519
3520 for (i = 0; i < words; i++)
3521 classes[i] = subclasses[i % num];
3522
3523 break;
3524 }
3525 case UNION_TYPE:
3526 case QUAL_UNION_TYPE:
3527 /* Unions are similar to RECORD_TYPE but offset is always 0.
3528 */
3529 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3530 {
3531 if (TREE_CODE (field) == FIELD_DECL)
3532 {
3533 int num;
3534
3535 if (TREE_TYPE (field) == error_mark_node)
3536 continue;
3537
3538 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3539 TREE_TYPE (field), subclasses,
3540 bit_offset);
3541 if (!num)
3542 return 0;
3543 for (i = 0; i < num; i++)
3544 classes[i] = merge_classes (subclasses[i], classes[i]);
3545 }
3546 }
3547 break;
3548
3549 default:
3550 gcc_unreachable ();
3551 }
3552
3553 /* Final merger cleanup. */
3554 for (i = 0; i < words; i++)
3555 {
3556 /* If one class is MEMORY, everything should be passed in
3557 memory. */
3558 if (classes[i] == X86_64_MEMORY_CLASS)
3559 return 0;
3560
3561 /* The X86_64_SSEUP_CLASS should be always preceded by
3562 X86_64_SSE_CLASS. */
3563 if (classes[i] == X86_64_SSEUP_CLASS
3564 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3565 classes[i] = X86_64_SSE_CLASS;
3566
3567 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3568 if (classes[i] == X86_64_X87UP_CLASS
3569 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3570 classes[i] = X86_64_SSE_CLASS;
3571 }
3572 return words;
3573 }
3574
3575 /* Compute alignment needed. We align all types to natural boundaries with
3576 exception of XFmode that is aligned to 64bits. */
3577 if (mode != VOIDmode && mode != BLKmode)
3578 {
3579 int mode_alignment = GET_MODE_BITSIZE (mode);
3580
3581 if (mode == XFmode)
3582 mode_alignment = 128;
3583 else if (mode == XCmode)
3584 mode_alignment = 256;
3585 if (COMPLEX_MODE_P (mode))
3586 mode_alignment /= 2;
3587 /* Misaligned fields are always returned in memory. */
3588 if (bit_offset % mode_alignment)
3589 return 0;
3590 }
3591
3592 /* for V1xx modes, just use the base mode */
3593 if (VECTOR_MODE_P (mode)
3594 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3595 mode = GET_MODE_INNER (mode);
3596
3597 /* Classification of atomic types. */
3598 switch (mode)
3599 {
3600 case SDmode:
3601 case DDmode:
3602 classes[0] = X86_64_SSE_CLASS;
3603 return 1;
3604 case TDmode:
3605 classes[0] = X86_64_SSE_CLASS;
3606 classes[1] = X86_64_SSEUP_CLASS;
3607 return 2;
3608 case DImode:
3609 case SImode:
3610 case HImode:
3611 case QImode:
3612 case CSImode:
3613 case CHImode:
3614 case CQImode:
3615 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3616 classes[0] = X86_64_INTEGERSI_CLASS;
3617 else
3618 classes[0] = X86_64_INTEGER_CLASS;
3619 return 1;
3620 case CDImode:
3621 case TImode:
3622 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3623 return 2;
3624 case CTImode:
3625 return 0;
3626 case SFmode:
3627 if (!(bit_offset % 64))
3628 classes[0] = X86_64_SSESF_CLASS;
3629 else
3630 classes[0] = X86_64_SSE_CLASS;
3631 return 1;
3632 case DFmode:
3633 classes[0] = X86_64_SSEDF_CLASS;
3634 return 1;
3635 case XFmode:
3636 classes[0] = X86_64_X87_CLASS;
3637 classes[1] = X86_64_X87UP_CLASS;
3638 return 2;
3639 case TFmode:
3640 classes[0] = X86_64_SSE_CLASS;
3641 classes[1] = X86_64_SSEUP_CLASS;
3642 return 2;
3643 case SCmode:
3644 classes[0] = X86_64_SSE_CLASS;
3645 return 1;
3646 case DCmode:
3647 classes[0] = X86_64_SSEDF_CLASS;
3648 classes[1] = X86_64_SSEDF_CLASS;
3649 return 2;
3650 case XCmode:
3651 classes[0] = X86_64_COMPLEX_X87_CLASS;
3652 return 1;
3653 case TCmode:
3654 /* This modes is larger than 16 bytes. */
3655 return 0;
3656 case V4SFmode:
3657 case V4SImode:
3658 case V16QImode:
3659 case V8HImode:
3660 case V2DFmode:
3661 case V2DImode:
3662 classes[0] = X86_64_SSE_CLASS;
3663 classes[1] = X86_64_SSEUP_CLASS;
3664 return 2;
3665 case V2SFmode:
3666 case V2SImode:
3667 case V4HImode:
3668 case V8QImode:
3669 classes[0] = X86_64_SSE_CLASS;
3670 return 1;
3671 case BLKmode:
3672 case VOIDmode:
3673 return 0;
3674 default:
3675 gcc_assert (VECTOR_MODE_P (mode));
3676
3677 if (bytes > 16)
3678 return 0;
3679
3680 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3681
3682 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3683 classes[0] = X86_64_INTEGERSI_CLASS;
3684 else
3685 classes[0] = X86_64_INTEGER_CLASS;
3686 classes[1] = X86_64_INTEGER_CLASS;
3687 return 1 + (bytes > 8);
3688 }
3689 }
3690
3691 /* Examine the argument and return set number of register required in each
3692 class. Return 0 iff parameter should be passed in memory. */
3693 static int
3694 examine_argument (enum machine_mode mode, tree type, int in_return,
3695 int *int_nregs, int *sse_nregs)
3696 {
3697 enum x86_64_reg_class class[MAX_CLASSES];
3698 int n = classify_argument (mode, type, class, 0);
3699
3700 *int_nregs = 0;
3701 *sse_nregs = 0;
3702 if (!n)
3703 return 0;
3704 for (n--; n >= 0; n--)
3705 switch (class[n])
3706 {
3707 case X86_64_INTEGER_CLASS:
3708 case X86_64_INTEGERSI_CLASS:
3709 (*int_nregs)++;
3710 break;
3711 case X86_64_SSE_CLASS:
3712 case X86_64_SSESF_CLASS:
3713 case X86_64_SSEDF_CLASS:
3714 (*sse_nregs)++;
3715 break;
3716 case X86_64_NO_CLASS:
3717 case X86_64_SSEUP_CLASS:
3718 break;
3719 case X86_64_X87_CLASS:
3720 case X86_64_X87UP_CLASS:
3721 if (!in_return)
3722 return 0;
3723 break;
3724 case X86_64_COMPLEX_X87_CLASS:
3725 return in_return ? 2 : 0;
3726 case X86_64_MEMORY_CLASS:
3727 gcc_unreachable ();
3728 }
3729 return 1;
3730 }
3731
3732 /* Construct container for the argument used by GCC interface. See
3733 FUNCTION_ARG for the detailed description. */
3734
3735 static rtx
3736 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3737 tree type, int in_return, int nintregs, int nsseregs,
3738 const int *intreg, int sse_regno)
3739 {
3740 /* The following variables hold the static issued_error state. */
3741 static bool issued_sse_arg_error;
3742 static bool issued_sse_ret_error;
3743 static bool issued_x87_ret_error;
3744
3745 enum machine_mode tmpmode;
3746 int bytes =
3747 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3748 enum x86_64_reg_class class[MAX_CLASSES];
3749 int n;
3750 int i;
3751 int nexps = 0;
3752 int needed_sseregs, needed_intregs;
3753 rtx exp[MAX_CLASSES];
3754 rtx ret;
3755
3756 n = classify_argument (mode, type, class, 0);
3757 if (!n)
3758 return NULL;
3759 if (!examine_argument (mode, type, in_return, &needed_intregs,
3760 &needed_sseregs))
3761 return NULL;
3762 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3763 return NULL;
3764
3765 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3766 some less clueful developer tries to use floating-point anyway. */
3767 if (needed_sseregs && !TARGET_SSE)
3768 {
3769 if (in_return)
3770 {
3771 if (!issued_sse_ret_error)
3772 {
3773 error ("SSE register return with SSE disabled");
3774 issued_sse_ret_error = true;
3775 }
3776 }
3777 else if (!issued_sse_arg_error)
3778 {
3779 error ("SSE register argument with SSE disabled");
3780 issued_sse_arg_error = true;
3781 }
3782 return NULL;
3783 }
3784
3785 /* Likewise, error if the ABI requires us to return values in the
3786 x87 registers and the user specified -mno-80387. */
3787 if (!TARGET_80387 && in_return)
3788 for (i = 0; i < n; i++)
3789 if (class[i] == X86_64_X87_CLASS
3790 || class[i] == X86_64_X87UP_CLASS
3791 || class[i] == X86_64_COMPLEX_X87_CLASS)
3792 {
3793 if (!issued_x87_ret_error)
3794 {
3795 error ("x87 register return with x87 disabled");
3796 issued_x87_ret_error = true;
3797 }
3798 return NULL;
3799 }
3800
3801 /* First construct simple cases. Avoid SCmode, since we want to use
3802 single register to pass this type. */
3803 if (n == 1 && mode != SCmode)
3804 switch (class[0])
3805 {
3806 case X86_64_INTEGER_CLASS:
3807 case X86_64_INTEGERSI_CLASS:
3808 return gen_rtx_REG (mode, intreg[0]);
3809 case X86_64_SSE_CLASS:
3810 case X86_64_SSESF_CLASS:
3811 case X86_64_SSEDF_CLASS:
3812 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3813 case X86_64_X87_CLASS:
3814 case X86_64_COMPLEX_X87_CLASS:
3815 return gen_rtx_REG (mode, FIRST_STACK_REG);
3816 case X86_64_NO_CLASS:
3817 /* Zero sized array, struct or class. */
3818 return NULL;
3819 default:
3820 gcc_unreachable ();
3821 }
3822 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3823 && mode != BLKmode)
3824 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3825
3826 if (n == 2
3827 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3828 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3829 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3830 && class[1] == X86_64_INTEGER_CLASS
3831 && (mode == CDImode || mode == TImode || mode == TFmode)
3832 && intreg[0] + 1 == intreg[1])
3833 return gen_rtx_REG (mode, intreg[0]);
3834
3835 /* Otherwise figure out the entries of the PARALLEL. */
3836 for (i = 0; i < n; i++)
3837 {
3838 switch (class[i])
3839 {
3840 case X86_64_NO_CLASS:
3841 break;
3842 case X86_64_INTEGER_CLASS:
3843 case X86_64_INTEGERSI_CLASS:
3844 /* Merge TImodes on aligned occasions here too. */
3845 if (i * 8 + 8 > bytes)
3846 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3847 else if (class[i] == X86_64_INTEGERSI_CLASS)
3848 tmpmode = SImode;
3849 else
3850 tmpmode = DImode;
3851 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3852 if (tmpmode == BLKmode)
3853 tmpmode = DImode;
3854 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3855 gen_rtx_REG (tmpmode, *intreg),
3856 GEN_INT (i*8));
3857 intreg++;
3858 break;
3859 case X86_64_SSESF_CLASS:
3860 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3861 gen_rtx_REG (SFmode,
3862 SSE_REGNO (sse_regno)),
3863 GEN_INT (i*8));
3864 sse_regno++;
3865 break;
3866 case X86_64_SSEDF_CLASS:
3867 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3868 gen_rtx_REG (DFmode,
3869 SSE_REGNO (sse_regno)),
3870 GEN_INT (i*8));
3871 sse_regno++;
3872 break;
3873 case X86_64_SSE_CLASS:
3874 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3875 tmpmode = TImode;
3876 else
3877 tmpmode = DImode;
3878 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3879 gen_rtx_REG (tmpmode,
3880 SSE_REGNO (sse_regno)),
3881 GEN_INT (i*8));
3882 if (tmpmode == TImode)
3883 i++;
3884 sse_regno++;
3885 break;
3886 default:
3887 gcc_unreachable ();
3888 }
3889 }
3890
3891 /* Empty aligned struct, union or class. */
3892 if (nexps == 0)
3893 return NULL;
3894
3895 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3896 for (i = 0; i < nexps; i++)
3897 XVECEXP (ret, 0, i) = exp [i];
3898 return ret;
3899 }
3900
3901 /* Update the data in CUM to advance over an argument of mode MODE
3902 and data type TYPE. (TYPE is null for libcalls where that information
3903 may not be available.) */
3904
3905 static void
3906 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3907 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3908 {
3909 switch (mode)
3910 {
3911 default:
3912 break;
3913
3914 case BLKmode:
3915 if (bytes < 0)
3916 break;
3917 /* FALLTHRU */
3918
3919 case DImode:
3920 case SImode:
3921 case HImode:
3922 case QImode:
3923 cum->words += words;
3924 cum->nregs -= words;
3925 cum->regno += words;
3926
3927 if (cum->nregs <= 0)
3928 {
3929 cum->nregs = 0;
3930 cum->regno = 0;
3931 }
3932 break;
3933
3934 case DFmode:
3935 if (cum->float_in_sse < 2)
3936 break;
3937 case SFmode:
3938 if (cum->float_in_sse < 1)
3939 break;
3940 /* FALLTHRU */
3941
3942 case TImode:
3943 case V16QImode:
3944 case V8HImode:
3945 case V4SImode:
3946 case V2DImode:
3947 case V4SFmode:
3948 case V2DFmode:
3949 if (!type || !AGGREGATE_TYPE_P (type))
3950 {
3951 cum->sse_words += words;
3952 cum->sse_nregs -= 1;
3953 cum->sse_regno += 1;
3954 if (cum->sse_nregs <= 0)
3955 {
3956 cum->sse_nregs = 0;
3957 cum->sse_regno = 0;
3958 }
3959 }
3960 break;
3961
3962 case V8QImode:
3963 case V4HImode:
3964 case V2SImode:
3965 case V2SFmode:
3966 if (!type || !AGGREGATE_TYPE_P (type))
3967 {
3968 cum->mmx_words += words;
3969 cum->mmx_nregs -= 1;
3970 cum->mmx_regno += 1;
3971 if (cum->mmx_nregs <= 0)
3972 {
3973 cum->mmx_nregs = 0;
3974 cum->mmx_regno = 0;
3975 }
3976 }
3977 break;
3978 }
3979 }
3980
3981 static void
3982 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3983 tree type, HOST_WIDE_INT words)
3984 {
3985 int int_nregs, sse_nregs;
3986
3987 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3988 cum->words += words;
3989 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3990 {
3991 cum->nregs -= int_nregs;
3992 cum->sse_nregs -= sse_nregs;
3993 cum->regno += int_nregs;
3994 cum->sse_regno += sse_nregs;
3995 }
3996 else
3997 cum->words += words;
3998 }
3999
4000 void
4001 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4002 tree type, int named ATTRIBUTE_UNUSED)
4003 {
4004 HOST_WIDE_INT bytes, words;
4005
4006 if (mode == BLKmode)
4007 bytes = int_size_in_bytes (type);
4008 else
4009 bytes = GET_MODE_SIZE (mode);
4010 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4011
4012 if (type)
4013 mode = type_natural_mode (type);
4014
4015 if (TARGET_64BIT)
4016 function_arg_advance_64 (cum, mode, type, words);
4017 else
4018 function_arg_advance_32 (cum, mode, type, bytes, words);
4019 }
4020
4021 /* Define where to put the arguments to a function.
4022 Value is zero to push the argument on the stack,
4023 or a hard register in which to store the argument.
4024
4025 MODE is the argument's machine mode.
4026 TYPE is the data type of the argument (as a tree).
4027 This is null for libcalls where that information may
4028 not be available.
4029 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4030 the preceding args and about the function being called.
4031 NAMED is nonzero if this argument is a named parameter
4032 (otherwise it is an extra parameter matching an ellipsis). */
4033
4034 static rtx
4035 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4036 enum machine_mode orig_mode, tree type,
4037 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4038 {
4039 static bool warnedsse, warnedmmx;
4040
4041 /* Avoid the AL settings for the Unix64 ABI. */
4042 if (mode == VOIDmode)
4043 return constm1_rtx;
4044
4045 switch (mode)
4046 {
4047 default:
4048 break;
4049
4050 case BLKmode:
4051 if (bytes < 0)
4052 break;
4053 /* FALLTHRU */
4054 case DImode:
4055 case SImode:
4056 case HImode:
4057 case QImode:
4058 if (words <= cum->nregs)
4059 {
4060 int regno = cum->regno;
4061
4062 /* Fastcall allocates the first two DWORD (SImode) or
4063 smaller arguments to ECX and EDX. */
4064 if (cum->fastcall)
4065 {
4066 if (mode == BLKmode || mode == DImode)
4067 break;
4068
4069 /* ECX not EAX is the first allocated register. */
4070 if (regno == 0)
4071 regno = 2;
4072 }
4073 return gen_rtx_REG (mode, regno);
4074 }
4075 break;
4076
4077 case DFmode:
4078 if (cum->float_in_sse < 2)
4079 break;
4080 case SFmode:
4081 if (cum->float_in_sse < 1)
4082 break;
4083 /* FALLTHRU */
4084 case TImode:
4085 case V16QImode:
4086 case V8HImode:
4087 case V4SImode:
4088 case V2DImode:
4089 case V4SFmode:
4090 case V2DFmode:
4091 if (!type || !AGGREGATE_TYPE_P (type))
4092 {
4093 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4094 {
4095 warnedsse = true;
4096 warning (0, "SSE vector argument without SSE enabled "
4097 "changes the ABI");
4098 }
4099 if (cum->sse_nregs)
4100 return gen_reg_or_parallel (mode, orig_mode,
4101 cum->sse_regno + FIRST_SSE_REG);
4102 }
4103 break;
4104
4105 case V8QImode:
4106 case V4HImode:
4107 case V2SImode:
4108 case V2SFmode:
4109 if (!type || !AGGREGATE_TYPE_P (type))
4110 {
4111 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4112 {
4113 warnedmmx = true;
4114 warning (0, "MMX vector argument without MMX enabled "
4115 "changes the ABI");
4116 }
4117 if (cum->mmx_nregs)
4118 return gen_reg_or_parallel (mode, orig_mode,
4119 cum->mmx_regno + FIRST_MMX_REG);
4120 }
4121 break;
4122 }
4123
4124 return NULL_RTX;
4125 }
4126
4127 static rtx
4128 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4129 enum machine_mode orig_mode, tree type)
4130 {
4131 /* Handle a hidden AL argument containing number of registers
4132 for varargs x86-64 functions. */
4133 if (mode == VOIDmode)
4134 return GEN_INT (cum->maybe_vaarg
4135 ? (cum->sse_nregs < 0
4136 ? SSE_REGPARM_MAX
4137 : cum->sse_regno)
4138 : -1);
4139
4140 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4141 cum->sse_nregs,
4142 &x86_64_int_parameter_registers [cum->regno],
4143 cum->sse_regno);
4144 }
4145
4146 rtx
4147 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4148 tree type, int named ATTRIBUTE_UNUSED)
4149 {
4150 enum machine_mode mode = omode;
4151 HOST_WIDE_INT bytes, words;
4152
4153 if (mode == BLKmode)
4154 bytes = int_size_in_bytes (type);
4155 else
4156 bytes = GET_MODE_SIZE (mode);
4157 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4158
4159 /* To simplify the code below, represent vector types with a vector mode
4160 even if MMX/SSE are not active. */
4161 if (type && TREE_CODE (type) == VECTOR_TYPE)
4162 mode = type_natural_mode (type);
4163
4164 if (TARGET_64BIT)
4165 return function_arg_64 (cum, mode, omode, type);
4166 else
4167 return function_arg_32 (cum, mode, omode, type, bytes, words);
4168 }
4169
4170 /* A C expression that indicates when an argument must be passed by
4171 reference. If nonzero for an argument, a copy of that argument is
4172 made in memory and a pointer to the argument is passed instead of
4173 the argument itself. The pointer is passed in whatever way is
4174 appropriate for passing a pointer to that type. */
4175
4176 static bool
4177 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4178 enum machine_mode mode ATTRIBUTE_UNUSED,
4179 tree type, bool named ATTRIBUTE_UNUSED)
4180 {
4181 if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4182 return 1;
4183
4184 return 0;
4185 }
4186
4187 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4188 ABI. Only called if TARGET_SSE. */
4189 static bool
4190 contains_128bit_aligned_vector_p (tree type)
4191 {
4192 enum machine_mode mode = TYPE_MODE (type);
4193 if (SSE_REG_MODE_P (mode)
4194 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4195 return true;
4196 if (TYPE_ALIGN (type) < 128)
4197 return false;
4198
4199 if (AGGREGATE_TYPE_P (type))
4200 {
4201 /* Walk the aggregates recursively. */
4202 switch (TREE_CODE (type))
4203 {
4204 case RECORD_TYPE:
4205 case UNION_TYPE:
4206 case QUAL_UNION_TYPE:
4207 {
4208 tree field;
4209
4210 /* Walk all the structure fields. */
4211 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4212 {
4213 if (TREE_CODE (field) == FIELD_DECL
4214 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4215 return true;
4216 }
4217 break;
4218 }
4219
4220 case ARRAY_TYPE:
4221 /* Just for use if some languages passes arrays by value. */
4222 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4223 return true;
4224 break;
4225
4226 default:
4227 gcc_unreachable ();
4228 }
4229 }
4230 return false;
4231 }
4232
4233 /* Gives the alignment boundary, in bits, of an argument with the
4234 specified mode and type. */
4235
4236 int
4237 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4238 {
4239 int align;
4240 if (type)
4241 align = TYPE_ALIGN (type);
4242 else
4243 align = GET_MODE_ALIGNMENT (mode);
4244 if (align < PARM_BOUNDARY)
4245 align = PARM_BOUNDARY;
4246 if (!TARGET_64BIT)
4247 {
4248 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4249 make an exception for SSE modes since these require 128bit
4250 alignment.
4251
4252 The handling here differs from field_alignment. ICC aligns MMX
4253 arguments to 4 byte boundaries, while structure fields are aligned
4254 to 8 byte boundaries. */
4255 if (!TARGET_SSE)
4256 align = PARM_BOUNDARY;
4257 else if (!type)
4258 {
4259 if (!SSE_REG_MODE_P (mode))
4260 align = PARM_BOUNDARY;
4261 }
4262 else
4263 {
4264 if (!contains_128bit_aligned_vector_p (type))
4265 align = PARM_BOUNDARY;
4266 }
4267 }
4268 if (align > 128)
4269 align = 128;
4270 return align;
4271 }
4272
4273 /* Return true if N is a possible register number of function value. */
4274
4275 bool
4276 ix86_function_value_regno_p (int regno)
4277 {
4278 switch (regno)
4279 {
4280 case 0:
4281 return true;
4282
4283 case FIRST_FLOAT_REG:
4284 return TARGET_FLOAT_RETURNS_IN_80387;
4285
4286 case FIRST_SSE_REG:
4287 return TARGET_SSE;
4288
4289 case FIRST_MMX_REG:
4290 if (TARGET_MACHO || TARGET_64BIT)
4291 return false;
4292 return TARGET_MMX;
4293 }
4294
4295 return false;
4296 }
4297
4298 /* Define how to find the value returned by a function.
4299 VALTYPE is the data type of the value (as a tree).
4300 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4301 otherwise, FUNC is 0. */
4302
4303 static rtx
4304 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4305 tree fntype, tree fn)
4306 {
4307 unsigned int regno;
4308
4309 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4310 we normally prevent this case when mmx is not available. However
4311 some ABIs may require the result to be returned like DImode. */
4312 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4313 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4314
4315 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4316 we prevent this case when sse is not available. However some ABIs
4317 may require the result to be returned like integer TImode. */
4318 else if (mode == TImode
4319 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4320 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4321
4322 /* Decimal floating point values can go in %eax, unlike other float modes. */
4323 else if (DECIMAL_FLOAT_MODE_P (mode))
4324 regno = 0;
4325
4326 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4327 else if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4328 regno = 0;
4329
4330 /* Floating point return values in %st(0), except for local functions when
4331 SSE math is enabled or for functions with sseregparm attribute. */
4332 else
4333 {
4334 regno = FIRST_FLOAT_REG;
4335
4336 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4337 {
4338 int sse_level = ix86_function_sseregparm (fntype, fn);
4339 if ((sse_level >= 1 && mode == SFmode)
4340 || (sse_level == 2 && mode == DFmode))
4341 regno = FIRST_SSE_REG;
4342 }
4343 }
4344
4345 return gen_rtx_REG (orig_mode, regno);
4346 }
4347
4348 static rtx
4349 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4350 tree valtype)
4351 {
4352 rtx ret;
4353
4354 /* Handle libcalls, which don't provide a type node. */
4355 if (valtype == NULL)
4356 {
4357 switch (mode)
4358 {
4359 case SFmode:
4360 case SCmode:
4361 case DFmode:
4362 case DCmode:
4363 case TFmode:
4364 case SDmode:
4365 case DDmode:
4366 case TDmode:
4367 return gen_rtx_REG (mode, FIRST_SSE_REG);
4368 case XFmode:
4369 case XCmode:
4370 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4371 case TCmode:
4372 return NULL;
4373 default:
4374 return gen_rtx_REG (mode, 0);
4375 }
4376 }
4377
4378 ret = construct_container (mode, orig_mode, valtype, 1,
4379 REGPARM_MAX, SSE_REGPARM_MAX,
4380 x86_64_int_return_registers, 0);
4381
4382 /* For zero sized structures, construct_container returns NULL, but we
4383 need to keep rest of compiler happy by returning meaningful value. */
4384 if (!ret)
4385 ret = gen_rtx_REG (orig_mode, 0);
4386
4387 return ret;
4388 }
4389
4390 static rtx
4391 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4392 enum machine_mode orig_mode, enum machine_mode mode)
4393 {
4394 tree fn, fntype;
4395
4396 fn = NULL_TREE;
4397 if (fntype_or_decl && DECL_P (fntype_or_decl))
4398 fn = fntype_or_decl;
4399 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4400
4401 if (TARGET_64BIT)
4402 return function_value_64 (orig_mode, mode, valtype);
4403 else
4404 return function_value_32 (orig_mode, mode, fntype, fn);
4405 }
4406
4407 static rtx
4408 ix86_function_value (tree valtype, tree fntype_or_decl,
4409 bool outgoing ATTRIBUTE_UNUSED)
4410 {
4411 enum machine_mode mode, orig_mode;
4412
4413 orig_mode = TYPE_MODE (valtype);
4414 mode = type_natural_mode (valtype);
4415 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4416 }
4417
4418 rtx
4419 ix86_libcall_value (enum machine_mode mode)
4420 {
4421 return ix86_function_value_1 (NULL, NULL, mode, mode);
4422 }
4423
4424 /* Return true iff type is returned in memory. */
4425
4426 static int
4427 return_in_memory_32 (tree type, enum machine_mode mode)
4428 {
4429 HOST_WIDE_INT size;
4430
4431 if (mode == BLKmode)
4432 return 1;
4433
4434 size = int_size_in_bytes (type);
4435
4436 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4437 return 0;
4438
4439 if (VECTOR_MODE_P (mode) || mode == TImode)
4440 {
4441 /* User-created vectors small enough to fit in EAX. */
4442 if (size < 8)
4443 return 0;
4444
4445 /* MMX/3dNow values are returned in MM0,
4446 except when it doesn't exits. */
4447 if (size == 8)
4448 return (TARGET_MMX ? 0 : 1);
4449
4450 /* SSE values are returned in XMM0, except when it doesn't exist. */
4451 if (size == 16)
4452 return (TARGET_SSE ? 0 : 1);
4453 }
4454
4455 if (mode == XFmode)
4456 return 0;
4457
4458 if (mode == TDmode)
4459 return 1;
4460
4461 if (size > 12)
4462 return 1;
4463 return 0;
4464 }
4465
4466 static int
4467 return_in_memory_64 (tree type, enum machine_mode mode)
4468 {
4469 int needed_intregs, needed_sseregs;
4470 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4471 }
4472
4473 int
4474 ix86_return_in_memory (tree type)
4475 {
4476 enum machine_mode mode = type_natural_mode (type);
4477
4478 if (TARGET_64BIT)
4479 return return_in_memory_64 (type, mode);
4480 else
4481 return return_in_memory_32 (type, mode);
4482 }
4483
4484 /* When returning SSE vector types, we have a choice of either
4485 (1) being abi incompatible with a -march switch, or
4486 (2) generating an error.
4487 Given no good solution, I think the safest thing is one warning.
4488 The user won't be able to use -Werror, but....
4489
4490 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4491 called in response to actually generating a caller or callee that
4492 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4493 via aggregate_value_p for general type probing from tree-ssa. */
4494
4495 static rtx
4496 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4497 {
4498 static bool warnedsse, warnedmmx;
4499
4500 if (!TARGET_64BIT && type)
4501 {
4502 /* Look at the return type of the function, not the function type. */
4503 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4504
4505 if (!TARGET_SSE && !warnedsse)
4506 {
4507 if (mode == TImode
4508 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4509 {
4510 warnedsse = true;
4511 warning (0, "SSE vector return without SSE enabled "
4512 "changes the ABI");
4513 }
4514 }
4515
4516 if (!TARGET_MMX && !warnedmmx)
4517 {
4518 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4519 {
4520 warnedmmx = true;
4521 warning (0, "MMX vector return without MMX enabled "
4522 "changes the ABI");
4523 }
4524 }
4525 }
4526
4527 return NULL;
4528 }
4529
4530 \f
4531 /* Create the va_list data type. */
4532
4533 static tree
4534 ix86_build_builtin_va_list (void)
4535 {
4536 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4537
4538 /* For i386 we use plain pointer to argument area. */
4539 if (!TARGET_64BIT)
4540 return build_pointer_type (char_type_node);
4541
4542 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4543 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4544
4545 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4546 unsigned_type_node);
4547 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4548 unsigned_type_node);
4549 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4550 ptr_type_node);
4551 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4552 ptr_type_node);
4553
4554 va_list_gpr_counter_field = f_gpr;
4555 va_list_fpr_counter_field = f_fpr;
4556
4557 DECL_FIELD_CONTEXT (f_gpr) = record;
4558 DECL_FIELD_CONTEXT (f_fpr) = record;
4559 DECL_FIELD_CONTEXT (f_ovf) = record;
4560 DECL_FIELD_CONTEXT (f_sav) = record;
4561
4562 TREE_CHAIN (record) = type_decl;
4563 TYPE_NAME (record) = type_decl;
4564 TYPE_FIELDS (record) = f_gpr;
4565 TREE_CHAIN (f_gpr) = f_fpr;
4566 TREE_CHAIN (f_fpr) = f_ovf;
4567 TREE_CHAIN (f_ovf) = f_sav;
4568
4569 layout_type (record);
4570
4571 /* The correct type is an array type of one element. */
4572 return build_array_type (record, build_index_type (size_zero_node));
4573 }
4574
4575 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4576
4577 static void
4578 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4579 {
4580 rtx save_area, mem;
4581 rtx label;
4582 rtx label_ref;
4583 rtx tmp_reg;
4584 rtx nsse_reg;
4585 int set;
4586 int i;
4587
4588 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4589 return;
4590
4591 /* Indicate to allocate space on the stack for varargs save area. */
4592 ix86_save_varrargs_registers = 1;
4593 cfun->stack_alignment_needed = 128;
4594
4595 save_area = frame_pointer_rtx;
4596 set = get_varargs_alias_set ();
4597
4598 for (i = cum->regno;
4599 i < ix86_regparm
4600 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4601 i++)
4602 {
4603 mem = gen_rtx_MEM (Pmode,
4604 plus_constant (save_area, i * UNITS_PER_WORD));
4605 MEM_NOTRAP_P (mem) = 1;
4606 set_mem_alias_set (mem, set);
4607 emit_move_insn (mem, gen_rtx_REG (Pmode,
4608 x86_64_int_parameter_registers[i]));
4609 }
4610
4611 if (cum->sse_nregs && cfun->va_list_fpr_size)
4612 {
4613 /* Now emit code to save SSE registers. The AX parameter contains number
4614 of SSE parameter registers used to call this function. We use
4615 sse_prologue_save insn template that produces computed jump across
4616 SSE saves. We need some preparation work to get this working. */
4617
4618 label = gen_label_rtx ();
4619 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4620
4621 /* Compute address to jump to :
4622 label - 5*eax + nnamed_sse_arguments*5 */
4623 tmp_reg = gen_reg_rtx (Pmode);
4624 nsse_reg = gen_reg_rtx (Pmode);
4625 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4626 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4627 gen_rtx_MULT (Pmode, nsse_reg,
4628 GEN_INT (4))));
4629 if (cum->sse_regno)
4630 emit_move_insn
4631 (nsse_reg,
4632 gen_rtx_CONST (DImode,
4633 gen_rtx_PLUS (DImode,
4634 label_ref,
4635 GEN_INT (cum->sse_regno * 4))));
4636 else
4637 emit_move_insn (nsse_reg, label_ref);
4638 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4639
4640 /* Compute address of memory block we save into. We always use pointer
4641 pointing 127 bytes after first byte to store - this is needed to keep
4642 instruction size limited by 4 bytes. */
4643 tmp_reg = gen_reg_rtx (Pmode);
4644 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4645 plus_constant (save_area,
4646 8 * REGPARM_MAX + 127)));
4647 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4648 MEM_NOTRAP_P (mem) = 1;
4649 set_mem_alias_set (mem, set);
4650 set_mem_align (mem, BITS_PER_WORD);
4651
4652 /* And finally do the dirty job! */
4653 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4654 GEN_INT (cum->sse_regno), label));
4655 }
4656 }
4657
4658 static void
4659 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4660 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4661 int no_rtl)
4662 {
4663 CUMULATIVE_ARGS next_cum;
4664 tree fntype;
4665 int stdarg_p;
4666
4667 /* This argument doesn't appear to be used anymore. Which is good,
4668 because the old code here didn't suppress rtl generation. */
4669 gcc_assert (!no_rtl);
4670
4671 if (!TARGET_64BIT)
4672 return;
4673
4674 fntype = TREE_TYPE (current_function_decl);
4675 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4676 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4677 != void_type_node));
4678
4679 /* For varargs, we do not want to skip the dummy va_dcl argument.
4680 For stdargs, we do want to skip the last named argument. */
4681 next_cum = *cum;
4682 if (stdarg_p)
4683 function_arg_advance (&next_cum, mode, type, 1);
4684
4685 setup_incoming_varargs_64 (&next_cum);
4686 }
4687
4688 /* Implement va_start. */
4689
4690 void
4691 ix86_va_start (tree valist, rtx nextarg)
4692 {
4693 HOST_WIDE_INT words, n_gpr, n_fpr;
4694 tree f_gpr, f_fpr, f_ovf, f_sav;
4695 tree gpr, fpr, ovf, sav, t;
4696 tree type;
4697
4698 /* Only 64bit target needs something special. */
4699 if (!TARGET_64BIT)
4700 {
4701 std_expand_builtin_va_start (valist, nextarg);
4702 return;
4703 }
4704
4705 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4706 f_fpr = TREE_CHAIN (f_gpr);
4707 f_ovf = TREE_CHAIN (f_fpr);
4708 f_sav = TREE_CHAIN (f_ovf);
4709
4710 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4711 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4712 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4713 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4714 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4715
4716 /* Count number of gp and fp argument registers used. */
4717 words = current_function_args_info.words;
4718 n_gpr = current_function_args_info.regno;
4719 n_fpr = current_function_args_info.sse_regno;
4720
4721 if (cfun->va_list_gpr_size)
4722 {
4723 type = TREE_TYPE (gpr);
4724 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4725 build_int_cst (type, n_gpr * 8));
4726 TREE_SIDE_EFFECTS (t) = 1;
4727 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4728 }
4729
4730 if (cfun->va_list_fpr_size)
4731 {
4732 type = TREE_TYPE (fpr);
4733 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4734 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4735 TREE_SIDE_EFFECTS (t) = 1;
4736 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4737 }
4738
4739 /* Find the overflow area. */
4740 type = TREE_TYPE (ovf);
4741 t = make_tree (type, virtual_incoming_args_rtx);
4742 if (words != 0)
4743 t = build2 (PLUS_EXPR, type, t,
4744 build_int_cst (type, words * UNITS_PER_WORD));
4745 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4746 TREE_SIDE_EFFECTS (t) = 1;
4747 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4748
4749 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4750 {
4751 /* Find the register save area.
4752 Prologue of the function save it right above stack frame. */
4753 type = TREE_TYPE (sav);
4754 t = make_tree (type, frame_pointer_rtx);
4755 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4756 TREE_SIDE_EFFECTS (t) = 1;
4757 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4758 }
4759 }
4760
4761 /* Implement va_arg. */
4762
4763 tree
4764 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4765 {
4766 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4767 tree f_gpr, f_fpr, f_ovf, f_sav;
4768 tree gpr, fpr, ovf, sav, t;
4769 int size, rsize;
4770 tree lab_false, lab_over = NULL_TREE;
4771 tree addr, t2;
4772 rtx container;
4773 int indirect_p = 0;
4774 tree ptrtype;
4775 enum machine_mode nat_mode;
4776
4777 /* Only 64bit target needs something special. */
4778 if (!TARGET_64BIT)
4779 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4780
4781 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4782 f_fpr = TREE_CHAIN (f_gpr);
4783 f_ovf = TREE_CHAIN (f_fpr);
4784 f_sav = TREE_CHAIN (f_ovf);
4785
4786 valist = build_va_arg_indirect_ref (valist);
4787 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4788 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4789 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4790 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4791
4792 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4793 if (indirect_p)
4794 type = build_pointer_type (type);
4795 size = int_size_in_bytes (type);
4796 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4797
4798 nat_mode = type_natural_mode (type);
4799 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4800 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4801
4802 /* Pull the value out of the saved registers. */
4803
4804 addr = create_tmp_var (ptr_type_node, "addr");
4805 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4806
4807 if (container)
4808 {
4809 int needed_intregs, needed_sseregs;
4810 bool need_temp;
4811 tree int_addr, sse_addr;
4812
4813 lab_false = create_artificial_label ();
4814 lab_over = create_artificial_label ();
4815
4816 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4817
4818 need_temp = (!REG_P (container)
4819 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4820 || TYPE_ALIGN (type) > 128));
4821
4822 /* In case we are passing structure, verify that it is consecutive block
4823 on the register save area. If not we need to do moves. */
4824 if (!need_temp && !REG_P (container))
4825 {
4826 /* Verify that all registers are strictly consecutive */
4827 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4828 {
4829 int i;
4830
4831 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4832 {
4833 rtx slot = XVECEXP (container, 0, i);
4834 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4835 || INTVAL (XEXP (slot, 1)) != i * 16)
4836 need_temp = 1;
4837 }
4838 }
4839 else
4840 {
4841 int i;
4842
4843 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4844 {
4845 rtx slot = XVECEXP (container, 0, i);
4846 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4847 || INTVAL (XEXP (slot, 1)) != i * 8)
4848 need_temp = 1;
4849 }
4850 }
4851 }
4852 if (!need_temp)
4853 {
4854 int_addr = addr;
4855 sse_addr = addr;
4856 }
4857 else
4858 {
4859 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4860 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4861 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4862 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4863 }
4864
4865 /* First ensure that we fit completely in registers. */
4866 if (needed_intregs)
4867 {
4868 t = build_int_cst (TREE_TYPE (gpr),
4869 (REGPARM_MAX - needed_intregs + 1) * 8);
4870 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4871 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4872 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4873 gimplify_and_add (t, pre_p);
4874 }
4875 if (needed_sseregs)
4876 {
4877 t = build_int_cst (TREE_TYPE (fpr),
4878 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4879 + REGPARM_MAX * 8);
4880 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4881 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4882 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4883 gimplify_and_add (t, pre_p);
4884 }
4885
4886 /* Compute index to start of area used for integer regs. */
4887 if (needed_intregs)
4888 {
4889 /* int_addr = gpr + sav; */
4890 t = fold_convert (ptr_type_node, gpr);
4891 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4892 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4893 gimplify_and_add (t, pre_p);
4894 }
4895 if (needed_sseregs)
4896 {
4897 /* sse_addr = fpr + sav; */
4898 t = fold_convert (ptr_type_node, fpr);
4899 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4900 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4901 gimplify_and_add (t, pre_p);
4902 }
4903 if (need_temp)
4904 {
4905 int i;
4906 tree temp = create_tmp_var (type, "va_arg_tmp");
4907
4908 /* addr = &temp; */
4909 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4910 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4911 gimplify_and_add (t, pre_p);
4912
4913 for (i = 0; i < XVECLEN (container, 0); i++)
4914 {
4915 rtx slot = XVECEXP (container, 0, i);
4916 rtx reg = XEXP (slot, 0);
4917 enum machine_mode mode = GET_MODE (reg);
4918 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4919 tree addr_type = build_pointer_type (piece_type);
4920 tree src_addr, src;
4921 int src_offset;
4922 tree dest_addr, dest;
4923
4924 if (SSE_REGNO_P (REGNO (reg)))
4925 {
4926 src_addr = sse_addr;
4927 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4928 }
4929 else
4930 {
4931 src_addr = int_addr;
4932 src_offset = REGNO (reg) * 8;
4933 }
4934 src_addr = fold_convert (addr_type, src_addr);
4935 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4936 size_int (src_offset));
4937 src = build_va_arg_indirect_ref (src_addr);
4938
4939 dest_addr = fold_convert (addr_type, addr);
4940 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4941 size_int (INTVAL (XEXP (slot, 1))));
4942 dest = build_va_arg_indirect_ref (dest_addr);
4943
4944 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4945 gimplify_and_add (t, pre_p);
4946 }
4947 }
4948
4949 if (needed_intregs)
4950 {
4951 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4952 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4953 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4954 gimplify_and_add (t, pre_p);
4955 }
4956 if (needed_sseregs)
4957 {
4958 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4959 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4960 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4961 gimplify_and_add (t, pre_p);
4962 }
4963
4964 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4965 gimplify_and_add (t, pre_p);
4966
4967 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4968 append_to_statement_list (t, pre_p);
4969 }
4970
4971 /* ... otherwise out of the overflow area. */
4972
4973 /* Care for on-stack alignment if needed. */
4974 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4975 || integer_zerop (TYPE_SIZE (type)))
4976 t = ovf;
4977 else
4978 {
4979 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4980 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4981 build_int_cst (TREE_TYPE (ovf), align - 1));
4982 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4983 build_int_cst (TREE_TYPE (t), -align));
4984 }
4985 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4986
4987 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4988 gimplify_and_add (t2, pre_p);
4989
4990 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4991 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4992 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4993 gimplify_and_add (t, pre_p);
4994
4995 if (container)
4996 {
4997 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4998 append_to_statement_list (t, pre_p);
4999 }
5000
5001 ptrtype = build_pointer_type (type);
5002 addr = fold_convert (ptrtype, addr);
5003
5004 if (indirect_p)
5005 addr = build_va_arg_indirect_ref (addr);
5006 return build_va_arg_indirect_ref (addr);
5007 }
5008 \f
5009 /* Return nonzero if OPNUM's MEM should be matched
5010 in movabs* patterns. */
5011
5012 int
5013 ix86_check_movabs (rtx insn, int opnum)
5014 {
5015 rtx set, mem;
5016
5017 set = PATTERN (insn);
5018 if (GET_CODE (set) == PARALLEL)
5019 set = XVECEXP (set, 0, 0);
5020 gcc_assert (GET_CODE (set) == SET);
5021 mem = XEXP (set, opnum);
5022 while (GET_CODE (mem) == SUBREG)
5023 mem = SUBREG_REG (mem);
5024 gcc_assert (MEM_P (mem));
5025 return (volatile_ok || !MEM_VOLATILE_P (mem));
5026 }
5027 \f
5028 /* Initialize the table of extra 80387 mathematical constants. */
5029
5030 static void
5031 init_ext_80387_constants (void)
5032 {
5033 static const char * cst[5] =
5034 {
5035 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5036 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5037 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5038 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5039 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5040 };
5041 int i;
5042
5043 for (i = 0; i < 5; i++)
5044 {
5045 real_from_string (&ext_80387_constants_table[i], cst[i]);
5046 /* Ensure each constant is rounded to XFmode precision. */
5047 real_convert (&ext_80387_constants_table[i],
5048 XFmode, &ext_80387_constants_table[i]);
5049 }
5050
5051 ext_80387_constants_init = 1;
5052 }
5053
5054 /* Return true if the constant is something that can be loaded with
5055 a special instruction. */
5056
5057 int
5058 standard_80387_constant_p (rtx x)
5059 {
5060 REAL_VALUE_TYPE r;
5061
5062 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
5063 return -1;
5064
5065 if (x == CONST0_RTX (GET_MODE (x)))
5066 return 1;
5067 if (x == CONST1_RTX (GET_MODE (x)))
5068 return 2;
5069
5070 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5071
5072 /* For XFmode constants, try to find a special 80387 instruction when
5073 optimizing for size or on those CPUs that benefit from them. */
5074 if (GET_MODE (x) == XFmode
5075 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5076 {
5077 int i;
5078
5079 if (! ext_80387_constants_init)
5080 init_ext_80387_constants ();
5081
5082 for (i = 0; i < 5; i++)
5083 if (real_identical (&r, &ext_80387_constants_table[i]))
5084 return i + 3;
5085 }
5086
5087 /* Load of the constant -0.0 or -1.0 will be split as
5088 fldz;fchs or fld1;fchs sequence. */
5089 if (real_isnegzero (&r))
5090 return 8;
5091 if (real_identical (&r, &dconstm1))
5092 return 9;
5093
5094 return 0;
5095 }
5096
5097 /* Return the opcode of the special instruction to be used to load
5098 the constant X. */
5099
5100 const char *
5101 standard_80387_constant_opcode (rtx x)
5102 {
5103 switch (standard_80387_constant_p (x))
5104 {
5105 case 1:
5106 return "fldz";
5107 case 2:
5108 return "fld1";
5109 case 3:
5110 return "fldlg2";
5111 case 4:
5112 return "fldln2";
5113 case 5:
5114 return "fldl2e";
5115 case 6:
5116 return "fldl2t";
5117 case 7:
5118 return "fldpi";
5119 case 8:
5120 case 9:
5121 return "#";
5122 default:
5123 gcc_unreachable ();
5124 }
5125 }
5126
5127 /* Return the CONST_DOUBLE representing the 80387 constant that is
5128 loaded by the specified special instruction. The argument IDX
5129 matches the return value from standard_80387_constant_p. */
5130
5131 rtx
5132 standard_80387_constant_rtx (int idx)
5133 {
5134 int i;
5135
5136 if (! ext_80387_constants_init)
5137 init_ext_80387_constants ();
5138
5139 switch (idx)
5140 {
5141 case 3:
5142 case 4:
5143 case 5:
5144 case 6:
5145 case 7:
5146 i = idx - 3;
5147 break;
5148
5149 default:
5150 gcc_unreachable ();
5151 }
5152
5153 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5154 XFmode);
5155 }
5156
5157 /* Return 1 if mode is a valid mode for sse. */
5158 static int
5159 standard_sse_mode_p (enum machine_mode mode)
5160 {
5161 switch (mode)
5162 {
5163 case V16QImode:
5164 case V8HImode:
5165 case V4SImode:
5166 case V2DImode:
5167 case V4SFmode:
5168 case V2DFmode:
5169 return 1;
5170
5171 default:
5172 return 0;
5173 }
5174 }
5175
5176 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5177 */
5178 int
5179 standard_sse_constant_p (rtx x)
5180 {
5181 enum machine_mode mode = GET_MODE (x);
5182
5183 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5184 return 1;
5185 if (vector_all_ones_operand (x, mode)
5186 && standard_sse_mode_p (mode))
5187 return TARGET_SSE2 ? 2 : -1;
5188
5189 return 0;
5190 }
5191
5192 /* Return the opcode of the special instruction to be used to load
5193 the constant X. */
5194
5195 const char *
5196 standard_sse_constant_opcode (rtx insn, rtx x)
5197 {
5198 switch (standard_sse_constant_p (x))
5199 {
5200 case 1:
5201 if (get_attr_mode (insn) == MODE_V4SF)
5202 return "xorps\t%0, %0";
5203 else if (get_attr_mode (insn) == MODE_V2DF)
5204 return "xorpd\t%0, %0";
5205 else
5206 return "pxor\t%0, %0";
5207 case 2:
5208 return "pcmpeqd\t%0, %0";
5209 }
5210 gcc_unreachable ();
5211 }
5212
5213 /* Returns 1 if OP contains a symbol reference */
5214
5215 int
5216 symbolic_reference_mentioned_p (rtx op)
5217 {
5218 const char *fmt;
5219 int i;
5220
5221 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5222 return 1;
5223
5224 fmt = GET_RTX_FORMAT (GET_CODE (op));
5225 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5226 {
5227 if (fmt[i] == 'E')
5228 {
5229 int j;
5230
5231 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5232 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5233 return 1;
5234 }
5235
5236 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5237 return 1;
5238 }
5239
5240 return 0;
5241 }
5242
5243 /* Return 1 if it is appropriate to emit `ret' instructions in the
5244 body of a function. Do this only if the epilogue is simple, needing a
5245 couple of insns. Prior to reloading, we can't tell how many registers
5246 must be saved, so return 0 then. Return 0 if there is no frame
5247 marker to de-allocate. */
5248
5249 int
5250 ix86_can_use_return_insn_p (void)
5251 {
5252 struct ix86_frame frame;
5253
5254 if (! reload_completed || frame_pointer_needed)
5255 return 0;
5256
5257 /* Don't allow more than 32 pop, since that's all we can do
5258 with one instruction. */
5259 if (current_function_pops_args
5260 && current_function_args_size >= 32768)
5261 return 0;
5262
5263 ix86_compute_frame_layout (&frame);
5264 return frame.to_allocate == 0 && frame.nregs == 0;
5265 }
5266 \f
5267 /* Value should be nonzero if functions must have frame pointers.
5268 Zero means the frame pointer need not be set up (and parms may
5269 be accessed via the stack pointer) in functions that seem suitable. */
5270
5271 int
5272 ix86_frame_pointer_required (void)
5273 {
5274 /* If we accessed previous frames, then the generated code expects
5275 to be able to access the saved ebp value in our frame. */
5276 if (cfun->machine->accesses_prev_frame)
5277 return 1;
5278
5279 /* Several x86 os'es need a frame pointer for other reasons,
5280 usually pertaining to setjmp. */
5281 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5282 return 1;
5283
5284 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5285 the frame pointer by default. Turn it back on now if we've not
5286 got a leaf function. */
5287 if (TARGET_OMIT_LEAF_FRAME_POINTER
5288 && (!current_function_is_leaf
5289 || ix86_current_function_calls_tls_descriptor))
5290 return 1;
5291
5292 if (current_function_profile)
5293 return 1;
5294
5295 return 0;
5296 }
5297
5298 /* Record that the current function accesses previous call frames. */
5299
5300 void
5301 ix86_setup_frame_addresses (void)
5302 {
5303 cfun->machine->accesses_prev_frame = 1;
5304 }
5305 \f
5306 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5307 # define USE_HIDDEN_LINKONCE 1
5308 #else
5309 # define USE_HIDDEN_LINKONCE 0
5310 #endif
5311
5312 static int pic_labels_used;
5313
5314 /* Fills in the label name that should be used for a pc thunk for
5315 the given register. */
5316
5317 static void
5318 get_pc_thunk_name (char name[32], unsigned int regno)
5319 {
5320 gcc_assert (!TARGET_64BIT);
5321
5322 if (USE_HIDDEN_LINKONCE)
5323 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5324 else
5325 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5326 }
5327
5328
5329 /* This function generates code for -fpic that loads %ebx with
5330 the return address of the caller and then returns. */
5331
5332 void
5333 ix86_file_end (void)
5334 {
5335 rtx xops[2];
5336 int regno;
5337
5338 for (regno = 0; regno < 8; ++regno)
5339 {
5340 char name[32];
5341
5342 if (! ((pic_labels_used >> regno) & 1))
5343 continue;
5344
5345 get_pc_thunk_name (name, regno);
5346
5347 #if TARGET_MACHO
5348 if (TARGET_MACHO)
5349 {
5350 switch_to_section (darwin_sections[text_coal_section]);
5351 fputs ("\t.weak_definition\t", asm_out_file);
5352 assemble_name (asm_out_file, name);
5353 fputs ("\n\t.private_extern\t", asm_out_file);
5354 assemble_name (asm_out_file, name);
5355 fputs ("\n", asm_out_file);
5356 ASM_OUTPUT_LABEL (asm_out_file, name);
5357 }
5358 else
5359 #endif
5360 if (USE_HIDDEN_LINKONCE)
5361 {
5362 tree decl;
5363
5364 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5365 error_mark_node);
5366 TREE_PUBLIC (decl) = 1;
5367 TREE_STATIC (decl) = 1;
5368 DECL_ONE_ONLY (decl) = 1;
5369
5370 (*targetm.asm_out.unique_section) (decl, 0);
5371 switch_to_section (get_named_section (decl, NULL, 0));
5372
5373 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5374 fputs ("\t.hidden\t", asm_out_file);
5375 assemble_name (asm_out_file, name);
5376 fputc ('\n', asm_out_file);
5377 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5378 }
5379 else
5380 {
5381 switch_to_section (text_section);
5382 ASM_OUTPUT_LABEL (asm_out_file, name);
5383 }
5384
5385 xops[0] = gen_rtx_REG (SImode, regno);
5386 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5387 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5388 output_asm_insn ("ret", xops);
5389 }
5390
5391 if (NEED_INDICATE_EXEC_STACK)
5392 file_end_indicate_exec_stack ();
5393 }
5394
5395 /* Emit code for the SET_GOT patterns. */
5396
5397 const char *
5398 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5399 {
5400 rtx xops[3];
5401
5402 xops[0] = dest;
5403
5404 if (TARGET_VXWORKS_RTP && flag_pic)
5405 {
5406 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5407 xops[2] = gen_rtx_MEM (Pmode,
5408 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5409 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5410
5411 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5412 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5413 an unadorned address. */
5414 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5415 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5416 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5417 return "";
5418 }
5419
5420 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5421
5422 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5423 {
5424 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5425
5426 if (!flag_pic)
5427 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5428 else
5429 output_asm_insn ("call\t%a2", xops);
5430
5431 #if TARGET_MACHO
5432 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5433 is what will be referenced by the Mach-O PIC subsystem. */
5434 if (!label)
5435 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5436 #endif
5437
5438 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5439 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5440
5441 if (flag_pic)
5442 output_asm_insn ("pop{l}\t%0", xops);
5443 }
5444 else
5445 {
5446 char name[32];
5447 get_pc_thunk_name (name, REGNO (dest));
5448 pic_labels_used |= 1 << REGNO (dest);
5449
5450 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5451 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5452 output_asm_insn ("call\t%X2", xops);
5453 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5454 is what will be referenced by the Mach-O PIC subsystem. */
5455 #if TARGET_MACHO
5456 if (!label)
5457 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5458 else
5459 targetm.asm_out.internal_label (asm_out_file, "L",
5460 CODE_LABEL_NUMBER (label));
5461 #endif
5462 }
5463
5464 if (TARGET_MACHO)
5465 return "";
5466
5467 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5468 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5469 else
5470 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5471
5472 return "";
5473 }
5474
5475 /* Generate an "push" pattern for input ARG. */
5476
5477 static rtx
5478 gen_push (rtx arg)
5479 {
5480 return gen_rtx_SET (VOIDmode,
5481 gen_rtx_MEM (Pmode,
5482 gen_rtx_PRE_DEC (Pmode,
5483 stack_pointer_rtx)),
5484 arg);
5485 }
5486
5487 /* Return >= 0 if there is an unused call-clobbered register available
5488 for the entire function. */
5489
5490 static unsigned int
5491 ix86_select_alt_pic_regnum (void)
5492 {
5493 if (current_function_is_leaf && !current_function_profile
5494 && !ix86_current_function_calls_tls_descriptor)
5495 {
5496 int i;
5497 for (i = 2; i >= 0; --i)
5498 if (!regs_ever_live[i])
5499 return i;
5500 }
5501
5502 return INVALID_REGNUM;
5503 }
5504
5505 /* Return 1 if we need to save REGNO. */
5506 static int
5507 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5508 {
5509 if (pic_offset_table_rtx
5510 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5511 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5512 || current_function_profile
5513 || current_function_calls_eh_return
5514 || current_function_uses_const_pool))
5515 {
5516 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5517 return 0;
5518 return 1;
5519 }
5520
5521 if (current_function_calls_eh_return && maybe_eh_return)
5522 {
5523 unsigned i;
5524 for (i = 0; ; i++)
5525 {
5526 unsigned test = EH_RETURN_DATA_REGNO (i);
5527 if (test == INVALID_REGNUM)
5528 break;
5529 if (test == regno)
5530 return 1;
5531 }
5532 }
5533
5534 if (cfun->machine->force_align_arg_pointer
5535 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5536 return 1;
5537
5538 return (regs_ever_live[regno]
5539 && !call_used_regs[regno]
5540 && !fixed_regs[regno]
5541 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5542 }
5543
5544 /* Return number of registers to be saved on the stack. */
5545
5546 static int
5547 ix86_nsaved_regs (void)
5548 {
5549 int nregs = 0;
5550 int regno;
5551
5552 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5553 if (ix86_save_reg (regno, true))
5554 nregs++;
5555 return nregs;
5556 }
5557
5558 /* Return the offset between two registers, one to be eliminated, and the other
5559 its replacement, at the start of a routine. */
5560
5561 HOST_WIDE_INT
5562 ix86_initial_elimination_offset (int from, int to)
5563 {
5564 struct ix86_frame frame;
5565 ix86_compute_frame_layout (&frame);
5566
5567 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5568 return frame.hard_frame_pointer_offset;
5569 else if (from == FRAME_POINTER_REGNUM
5570 && to == HARD_FRAME_POINTER_REGNUM)
5571 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5572 else
5573 {
5574 gcc_assert (to == STACK_POINTER_REGNUM);
5575
5576 if (from == ARG_POINTER_REGNUM)
5577 return frame.stack_pointer_offset;
5578
5579 gcc_assert (from == FRAME_POINTER_REGNUM);
5580 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5581 }
5582 }
5583
5584 /* Fill structure ix86_frame about frame of currently computed function. */
5585
5586 static void
5587 ix86_compute_frame_layout (struct ix86_frame *frame)
5588 {
5589 HOST_WIDE_INT total_size;
5590 unsigned int stack_alignment_needed;
5591 HOST_WIDE_INT offset;
5592 unsigned int preferred_alignment;
5593 HOST_WIDE_INT size = get_frame_size ();
5594
5595 frame->nregs = ix86_nsaved_regs ();
5596 total_size = size;
5597
5598 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5599 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5600
5601 /* During reload iteration the amount of registers saved can change.
5602 Recompute the value as needed. Do not recompute when amount of registers
5603 didn't change as reload does multiple calls to the function and does not
5604 expect the decision to change within single iteration. */
5605 if (!optimize_size
5606 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5607 {
5608 int count = frame->nregs;
5609
5610 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5611 /* The fast prologue uses move instead of push to save registers. This
5612 is significantly longer, but also executes faster as modern hardware
5613 can execute the moves in parallel, but can't do that for push/pop.
5614
5615 Be careful about choosing what prologue to emit: When function takes
5616 many instructions to execute we may use slow version as well as in
5617 case function is known to be outside hot spot (this is known with
5618 feedback only). Weight the size of function by number of registers
5619 to save as it is cheap to use one or two push instructions but very
5620 slow to use many of them. */
5621 if (count)
5622 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5623 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5624 || (flag_branch_probabilities
5625 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5626 cfun->machine->use_fast_prologue_epilogue = false;
5627 else
5628 cfun->machine->use_fast_prologue_epilogue
5629 = !expensive_function_p (count);
5630 }
5631 if (TARGET_PROLOGUE_USING_MOVE
5632 && cfun->machine->use_fast_prologue_epilogue)
5633 frame->save_regs_using_mov = true;
5634 else
5635 frame->save_regs_using_mov = false;
5636
5637
5638 /* Skip return address and saved base pointer. */
5639 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5640
5641 frame->hard_frame_pointer_offset = offset;
5642
5643 /* Do some sanity checking of stack_alignment_needed and
5644 preferred_alignment, since i386 port is the only using those features
5645 that may break easily. */
5646
5647 gcc_assert (!size || stack_alignment_needed);
5648 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5649 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5650 gcc_assert (stack_alignment_needed
5651 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5652
5653 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5654 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5655
5656 /* Register save area */
5657 offset += frame->nregs * UNITS_PER_WORD;
5658
5659 /* Va-arg area */
5660 if (ix86_save_varrargs_registers)
5661 {
5662 offset += X86_64_VARARGS_SIZE;
5663 frame->va_arg_size = X86_64_VARARGS_SIZE;
5664 }
5665 else
5666 frame->va_arg_size = 0;
5667
5668 /* Align start of frame for local function. */
5669 frame->padding1 = ((offset + stack_alignment_needed - 1)
5670 & -stack_alignment_needed) - offset;
5671
5672 offset += frame->padding1;
5673
5674 /* Frame pointer points here. */
5675 frame->frame_pointer_offset = offset;
5676
5677 offset += size;
5678
5679 /* Add outgoing arguments area. Can be skipped if we eliminated
5680 all the function calls as dead code.
5681 Skipping is however impossible when function calls alloca. Alloca
5682 expander assumes that last current_function_outgoing_args_size
5683 of stack frame are unused. */
5684 if (ACCUMULATE_OUTGOING_ARGS
5685 && (!current_function_is_leaf || current_function_calls_alloca
5686 || ix86_current_function_calls_tls_descriptor))
5687 {
5688 offset += current_function_outgoing_args_size;
5689 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5690 }
5691 else
5692 frame->outgoing_arguments_size = 0;
5693
5694 /* Align stack boundary. Only needed if we're calling another function
5695 or using alloca. */
5696 if (!current_function_is_leaf || current_function_calls_alloca
5697 || ix86_current_function_calls_tls_descriptor)
5698 frame->padding2 = ((offset + preferred_alignment - 1)
5699 & -preferred_alignment) - offset;
5700 else
5701 frame->padding2 = 0;
5702
5703 offset += frame->padding2;
5704
5705 /* We've reached end of stack frame. */
5706 frame->stack_pointer_offset = offset;
5707
5708 /* Size prologue needs to allocate. */
5709 frame->to_allocate =
5710 (size + frame->padding1 + frame->padding2
5711 + frame->outgoing_arguments_size + frame->va_arg_size);
5712
5713 if ((!frame->to_allocate && frame->nregs <= 1)
5714 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5715 frame->save_regs_using_mov = false;
5716
5717 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5718 && current_function_is_leaf
5719 && !ix86_current_function_calls_tls_descriptor)
5720 {
5721 frame->red_zone_size = frame->to_allocate;
5722 if (frame->save_regs_using_mov)
5723 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5724 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5725 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5726 }
5727 else
5728 frame->red_zone_size = 0;
5729 frame->to_allocate -= frame->red_zone_size;
5730 frame->stack_pointer_offset -= frame->red_zone_size;
5731 #if 0
5732 fprintf (stderr, "\n");
5733 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5734 fprintf (stderr, "size: %ld\n", (long)size);
5735 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5736 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5737 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5738 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5739 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5740 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5741 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5742 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5743 (long)frame->hard_frame_pointer_offset);
5744 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5745 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5746 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5747 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5748 #endif
5749 }
5750
5751 /* Emit code to save registers in the prologue. */
5752
5753 static void
5754 ix86_emit_save_regs (void)
5755 {
5756 unsigned int regno;
5757 rtx insn;
5758
5759 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5760 if (ix86_save_reg (regno, true))
5761 {
5762 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5763 RTX_FRAME_RELATED_P (insn) = 1;
5764 }
5765 }
5766
5767 /* Emit code to save registers using MOV insns. First register
5768 is restored from POINTER + OFFSET. */
5769 static void
5770 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5771 {
5772 unsigned int regno;
5773 rtx insn;
5774
5775 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5776 if (ix86_save_reg (regno, true))
5777 {
5778 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5779 Pmode, offset),
5780 gen_rtx_REG (Pmode, regno));
5781 RTX_FRAME_RELATED_P (insn) = 1;
5782 offset += UNITS_PER_WORD;
5783 }
5784 }
5785
5786 /* Expand prologue or epilogue stack adjustment.
5787 The pattern exist to put a dependency on all ebp-based memory accesses.
5788 STYLE should be negative if instructions should be marked as frame related,
5789 zero if %r11 register is live and cannot be freely used and positive
5790 otherwise. */
5791
5792 static void
5793 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5794 {
5795 rtx insn;
5796
5797 if (! TARGET_64BIT)
5798 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5799 else if (x86_64_immediate_operand (offset, DImode))
5800 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5801 else
5802 {
5803 rtx r11;
5804 /* r11 is used by indirect sibcall return as well, set before the
5805 epilogue and used after the epilogue. ATM indirect sibcall
5806 shouldn't be used together with huge frame sizes in one
5807 function because of the frame_size check in sibcall.c. */
5808 gcc_assert (style);
5809 r11 = gen_rtx_REG (DImode, R11_REG);
5810 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5811 if (style < 0)
5812 RTX_FRAME_RELATED_P (insn) = 1;
5813 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5814 offset));
5815 }
5816 if (style < 0)
5817 RTX_FRAME_RELATED_P (insn) = 1;
5818 }
5819
5820 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5821
5822 static rtx
5823 ix86_internal_arg_pointer (void)
5824 {
5825 bool has_force_align_arg_pointer =
5826 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5827 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5828 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5829 && DECL_NAME (current_function_decl)
5830 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5831 && DECL_FILE_SCOPE_P (current_function_decl))
5832 || ix86_force_align_arg_pointer
5833 || has_force_align_arg_pointer)
5834 {
5835 /* Nested functions can't realign the stack due to a register
5836 conflict. */
5837 if (DECL_CONTEXT (current_function_decl)
5838 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5839 {
5840 if (ix86_force_align_arg_pointer)
5841 warning (0, "-mstackrealign ignored for nested functions");
5842 if (has_force_align_arg_pointer)
5843 error ("%s not supported for nested functions",
5844 ix86_force_align_arg_pointer_string);
5845 return virtual_incoming_args_rtx;
5846 }
5847 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5848 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5849 }
5850 else
5851 return virtual_incoming_args_rtx;
5852 }
5853
5854 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5855 This is called from dwarf2out.c to emit call frame instructions
5856 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5857 static void
5858 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5859 {
5860 rtx unspec = SET_SRC (pattern);
5861 gcc_assert (GET_CODE (unspec) == UNSPEC);
5862
5863 switch (index)
5864 {
5865 case UNSPEC_REG_SAVE:
5866 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5867 SET_DEST (pattern));
5868 break;
5869 case UNSPEC_DEF_CFA:
5870 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5871 INTVAL (XVECEXP (unspec, 0, 0)));
5872 break;
5873 default:
5874 gcc_unreachable ();
5875 }
5876 }
5877
5878 /* Expand the prologue into a bunch of separate insns. */
5879
5880 void
5881 ix86_expand_prologue (void)
5882 {
5883 rtx insn;
5884 bool pic_reg_used;
5885 struct ix86_frame frame;
5886 HOST_WIDE_INT allocate;
5887
5888 ix86_compute_frame_layout (&frame);
5889
5890 if (cfun->machine->force_align_arg_pointer)
5891 {
5892 rtx x, y;
5893
5894 /* Grab the argument pointer. */
5895 x = plus_constant (stack_pointer_rtx, 4);
5896 y = cfun->machine->force_align_arg_pointer;
5897 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5898 RTX_FRAME_RELATED_P (insn) = 1;
5899
5900 /* The unwind info consists of two parts: install the fafp as the cfa,
5901 and record the fafp as the "save register" of the stack pointer.
5902 The later is there in order that the unwinder can see where it
5903 should restore the stack pointer across the and insn. */
5904 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5905 x = gen_rtx_SET (VOIDmode, y, x);
5906 RTX_FRAME_RELATED_P (x) = 1;
5907 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5908 UNSPEC_REG_SAVE);
5909 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5910 RTX_FRAME_RELATED_P (y) = 1;
5911 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5912 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5913 REG_NOTES (insn) = x;
5914
5915 /* Align the stack. */
5916 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5917 GEN_INT (-16)));
5918
5919 /* And here we cheat like madmen with the unwind info. We force the
5920 cfa register back to sp+4, which is exactly what it was at the
5921 start of the function. Re-pushing the return address results in
5922 the return at the same spot relative to the cfa, and thus is
5923 correct wrt the unwind info. */
5924 x = cfun->machine->force_align_arg_pointer;
5925 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5926 insn = emit_insn (gen_push (x));
5927 RTX_FRAME_RELATED_P (insn) = 1;
5928
5929 x = GEN_INT (4);
5930 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5931 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5932 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5933 REG_NOTES (insn) = x;
5934 }
5935
5936 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5937 slower on all targets. Also sdb doesn't like it. */
5938
5939 if (frame_pointer_needed)
5940 {
5941 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5942 RTX_FRAME_RELATED_P (insn) = 1;
5943
5944 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5945 RTX_FRAME_RELATED_P (insn) = 1;
5946 }
5947
5948 allocate = frame.to_allocate;
5949
5950 if (!frame.save_regs_using_mov)
5951 ix86_emit_save_regs ();
5952 else
5953 allocate += frame.nregs * UNITS_PER_WORD;
5954
5955 /* When using red zone we may start register saving before allocating
5956 the stack frame saving one cycle of the prologue. */
5957 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5958 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5959 : stack_pointer_rtx,
5960 -frame.nregs * UNITS_PER_WORD);
5961
5962 if (allocate == 0)
5963 ;
5964 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5965 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5966 GEN_INT (-allocate), -1);
5967 else
5968 {
5969 /* Only valid for Win32. */
5970 rtx eax = gen_rtx_REG (SImode, 0);
5971 bool eax_live = ix86_eax_live_at_start_p ();
5972 rtx t;
5973
5974 gcc_assert (!TARGET_64BIT);
5975
5976 if (eax_live)
5977 {
5978 emit_insn (gen_push (eax));
5979 allocate -= 4;
5980 }
5981
5982 emit_move_insn (eax, GEN_INT (allocate));
5983
5984 insn = emit_insn (gen_allocate_stack_worker (eax));
5985 RTX_FRAME_RELATED_P (insn) = 1;
5986 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5987 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5988 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5989 t, REG_NOTES (insn));
5990
5991 if (eax_live)
5992 {
5993 if (frame_pointer_needed)
5994 t = plus_constant (hard_frame_pointer_rtx,
5995 allocate
5996 - frame.to_allocate
5997 - frame.nregs * UNITS_PER_WORD);
5998 else
5999 t = plus_constant (stack_pointer_rtx, allocate);
6000 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
6001 }
6002 }
6003
6004 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6005 {
6006 if (!frame_pointer_needed || !frame.to_allocate)
6007 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6008 else
6009 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6010 -frame.nregs * UNITS_PER_WORD);
6011 }
6012
6013 pic_reg_used = false;
6014 if (pic_offset_table_rtx
6015 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6016 || current_function_profile))
6017 {
6018 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6019
6020 if (alt_pic_reg_used != INVALID_REGNUM)
6021 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6022
6023 pic_reg_used = true;
6024 }
6025
6026 if (pic_reg_used)
6027 {
6028 if (TARGET_64BIT)
6029 {
6030 if (ix86_cmodel == CM_LARGE_PIC)
6031 {
6032 rtx tmp_reg = gen_rtx_REG (DImode,
6033 FIRST_REX_INT_REG + 3 /* R11 */);
6034 rtx label = gen_label_rtx ();
6035 emit_label (label);
6036 LABEL_PRESERVE_P (label) = 1;
6037 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6038 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6039 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6040 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6041 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6042 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6043 pic_offset_table_rtx, tmp_reg));
6044 }
6045 else
6046 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6047 }
6048 else
6049 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6050
6051 /* Even with accurate pre-reload life analysis, we can wind up
6052 deleting all references to the pic register after reload.
6053 Consider if cross-jumping unifies two sides of a branch
6054 controlled by a comparison vs the only read from a global.
6055 In which case, allow the set_got to be deleted, though we're
6056 too late to do anything about the ebx save in the prologue. */
6057 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6058 }
6059
6060 /* Prevent function calls from be scheduled before the call to mcount.
6061 In the pic_reg_used case, make sure that the got load isn't deleted. */
6062 if (current_function_profile)
6063 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6064 }
6065
6066 /* Emit code to restore saved registers using MOV insns. First register
6067 is restored from POINTER + OFFSET. */
6068 static void
6069 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6070 int maybe_eh_return)
6071 {
6072 int regno;
6073 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6074
6075 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6076 if (ix86_save_reg (regno, maybe_eh_return))
6077 {
6078 /* Ensure that adjust_address won't be forced to produce pointer
6079 out of range allowed by x86-64 instruction set. */
6080 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6081 {
6082 rtx r11;
6083
6084 r11 = gen_rtx_REG (DImode, R11_REG);
6085 emit_move_insn (r11, GEN_INT (offset));
6086 emit_insn (gen_adddi3 (r11, r11, pointer));
6087 base_address = gen_rtx_MEM (Pmode, r11);
6088 offset = 0;
6089 }
6090 emit_move_insn (gen_rtx_REG (Pmode, regno),
6091 adjust_address (base_address, Pmode, offset));
6092 offset += UNITS_PER_WORD;
6093 }
6094 }
6095
6096 /* Restore function stack, frame, and registers. */
6097
6098 void
6099 ix86_expand_epilogue (int style)
6100 {
6101 int regno;
6102 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6103 struct ix86_frame frame;
6104 HOST_WIDE_INT offset;
6105
6106 ix86_compute_frame_layout (&frame);
6107
6108 /* Calculate start of saved registers relative to ebp. Special care
6109 must be taken for the normal return case of a function using
6110 eh_return: the eax and edx registers are marked as saved, but not
6111 restored along this path. */
6112 offset = frame.nregs;
6113 if (current_function_calls_eh_return && style != 2)
6114 offset -= 2;
6115 offset *= -UNITS_PER_WORD;
6116
6117 /* If we're only restoring one register and sp is not valid then
6118 using a move instruction to restore the register since it's
6119 less work than reloading sp and popping the register.
6120
6121 The default code result in stack adjustment using add/lea instruction,
6122 while this code results in LEAVE instruction (or discrete equivalent),
6123 so it is profitable in some other cases as well. Especially when there
6124 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6125 and there is exactly one register to pop. This heuristic may need some
6126 tuning in future. */
6127 if ((!sp_valid && frame.nregs <= 1)
6128 || (TARGET_EPILOGUE_USING_MOVE
6129 && cfun->machine->use_fast_prologue_epilogue
6130 && (frame.nregs > 1 || frame.to_allocate))
6131 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6132 || (frame_pointer_needed && TARGET_USE_LEAVE
6133 && cfun->machine->use_fast_prologue_epilogue
6134 && frame.nregs == 1)
6135 || current_function_calls_eh_return)
6136 {
6137 /* Restore registers. We can use ebp or esp to address the memory
6138 locations. If both are available, default to ebp, since offsets
6139 are known to be small. Only exception is esp pointing directly to the
6140 end of block of saved registers, where we may simplify addressing
6141 mode. */
6142
6143 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6144 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6145 frame.to_allocate, style == 2);
6146 else
6147 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6148 offset, style == 2);
6149
6150 /* eh_return epilogues need %ecx added to the stack pointer. */
6151 if (style == 2)
6152 {
6153 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6154
6155 if (frame_pointer_needed)
6156 {
6157 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6158 tmp = plus_constant (tmp, UNITS_PER_WORD);
6159 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6160
6161 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6162 emit_move_insn (hard_frame_pointer_rtx, tmp);
6163
6164 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6165 const0_rtx, style);
6166 }
6167 else
6168 {
6169 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6170 tmp = plus_constant (tmp, (frame.to_allocate
6171 + frame.nregs * UNITS_PER_WORD));
6172 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6173 }
6174 }
6175 else if (!frame_pointer_needed)
6176 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6177 GEN_INT (frame.to_allocate
6178 + frame.nregs * UNITS_PER_WORD),
6179 style);
6180 /* If not an i386, mov & pop is faster than "leave". */
6181 else if (TARGET_USE_LEAVE || optimize_size
6182 || !cfun->machine->use_fast_prologue_epilogue)
6183 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6184 else
6185 {
6186 pro_epilogue_adjust_stack (stack_pointer_rtx,
6187 hard_frame_pointer_rtx,
6188 const0_rtx, style);
6189 if (TARGET_64BIT)
6190 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6191 else
6192 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6193 }
6194 }
6195 else
6196 {
6197 /* First step is to deallocate the stack frame so that we can
6198 pop the registers. */
6199 if (!sp_valid)
6200 {
6201 gcc_assert (frame_pointer_needed);
6202 pro_epilogue_adjust_stack (stack_pointer_rtx,
6203 hard_frame_pointer_rtx,
6204 GEN_INT (offset), style);
6205 }
6206 else if (frame.to_allocate)
6207 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6208 GEN_INT (frame.to_allocate), style);
6209
6210 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6211 if (ix86_save_reg (regno, false))
6212 {
6213 if (TARGET_64BIT)
6214 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6215 else
6216 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6217 }
6218 if (frame_pointer_needed)
6219 {
6220 /* Leave results in shorter dependency chains on CPUs that are
6221 able to grok it fast. */
6222 if (TARGET_USE_LEAVE)
6223 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6224 else if (TARGET_64BIT)
6225 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6226 else
6227 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6228 }
6229 }
6230
6231 if (cfun->machine->force_align_arg_pointer)
6232 {
6233 emit_insn (gen_addsi3 (stack_pointer_rtx,
6234 cfun->machine->force_align_arg_pointer,
6235 GEN_INT (-4)));
6236 }
6237
6238 /* Sibcall epilogues don't want a return instruction. */
6239 if (style == 0)
6240 return;
6241
6242 if (current_function_pops_args && current_function_args_size)
6243 {
6244 rtx popc = GEN_INT (current_function_pops_args);
6245
6246 /* i386 can only pop 64K bytes. If asked to pop more, pop
6247 return address, do explicit add, and jump indirectly to the
6248 caller. */
6249
6250 if (current_function_pops_args >= 65536)
6251 {
6252 rtx ecx = gen_rtx_REG (SImode, 2);
6253
6254 /* There is no "pascal" calling convention in 64bit ABI. */
6255 gcc_assert (!TARGET_64BIT);
6256
6257 emit_insn (gen_popsi1 (ecx));
6258 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6259 emit_jump_insn (gen_return_indirect_internal (ecx));
6260 }
6261 else
6262 emit_jump_insn (gen_return_pop_internal (popc));
6263 }
6264 else
6265 emit_jump_insn (gen_return_internal ());
6266 }
6267
6268 /* Reset from the function's potential modifications. */
6269
6270 static void
6271 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6272 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6273 {
6274 if (pic_offset_table_rtx)
6275 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6276 #if TARGET_MACHO
6277 /* Mach-O doesn't support labels at the end of objects, so if
6278 it looks like we might want one, insert a NOP. */
6279 {
6280 rtx insn = get_last_insn ();
6281 while (insn
6282 && NOTE_P (insn)
6283 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6284 insn = PREV_INSN (insn);
6285 if (insn
6286 && (LABEL_P (insn)
6287 || (NOTE_P (insn)
6288 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6289 fputs ("\tnop\n", file);
6290 }
6291 #endif
6292
6293 }
6294 \f
6295 /* Extract the parts of an RTL expression that is a valid memory address
6296 for an instruction. Return 0 if the structure of the address is
6297 grossly off. Return -1 if the address contains ASHIFT, so it is not
6298 strictly valid, but still used for computing length of lea instruction. */
6299
6300 int
6301 ix86_decompose_address (rtx addr, struct ix86_address *out)
6302 {
6303 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6304 rtx base_reg, index_reg;
6305 HOST_WIDE_INT scale = 1;
6306 rtx scale_rtx = NULL_RTX;
6307 int retval = 1;
6308 enum ix86_address_seg seg = SEG_DEFAULT;
6309
6310 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6311 base = addr;
6312 else if (GET_CODE (addr) == PLUS)
6313 {
6314 rtx addends[4], op;
6315 int n = 0, i;
6316
6317 op = addr;
6318 do
6319 {
6320 if (n >= 4)
6321 return 0;
6322 addends[n++] = XEXP (op, 1);
6323 op = XEXP (op, 0);
6324 }
6325 while (GET_CODE (op) == PLUS);
6326 if (n >= 4)
6327 return 0;
6328 addends[n] = op;
6329
6330 for (i = n; i >= 0; --i)
6331 {
6332 op = addends[i];
6333 switch (GET_CODE (op))
6334 {
6335 case MULT:
6336 if (index)
6337 return 0;
6338 index = XEXP (op, 0);
6339 scale_rtx = XEXP (op, 1);
6340 break;
6341
6342 case UNSPEC:
6343 if (XINT (op, 1) == UNSPEC_TP
6344 && TARGET_TLS_DIRECT_SEG_REFS
6345 && seg == SEG_DEFAULT)
6346 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6347 else
6348 return 0;
6349 break;
6350
6351 case REG:
6352 case SUBREG:
6353 if (!base)
6354 base = op;
6355 else if (!index)
6356 index = op;
6357 else
6358 return 0;
6359 break;
6360
6361 case CONST:
6362 case CONST_INT:
6363 case SYMBOL_REF:
6364 case LABEL_REF:
6365 if (disp)
6366 return 0;
6367 disp = op;
6368 break;
6369
6370 default:
6371 return 0;
6372 }
6373 }
6374 }
6375 else if (GET_CODE (addr) == MULT)
6376 {
6377 index = XEXP (addr, 0); /* index*scale */
6378 scale_rtx = XEXP (addr, 1);
6379 }
6380 else if (GET_CODE (addr) == ASHIFT)
6381 {
6382 rtx tmp;
6383
6384 /* We're called for lea too, which implements ashift on occasion. */
6385 index = XEXP (addr, 0);
6386 tmp = XEXP (addr, 1);
6387 if (!CONST_INT_P (tmp))
6388 return 0;
6389 scale = INTVAL (tmp);
6390 if ((unsigned HOST_WIDE_INT) scale > 3)
6391 return 0;
6392 scale = 1 << scale;
6393 retval = -1;
6394 }
6395 else
6396 disp = addr; /* displacement */
6397
6398 /* Extract the integral value of scale. */
6399 if (scale_rtx)
6400 {
6401 if (!CONST_INT_P (scale_rtx))
6402 return 0;
6403 scale = INTVAL (scale_rtx);
6404 }
6405
6406 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6407 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6408
6409 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6410 if (base_reg && index_reg && scale == 1
6411 && (index_reg == arg_pointer_rtx
6412 || index_reg == frame_pointer_rtx
6413 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6414 {
6415 rtx tmp;
6416 tmp = base, base = index, index = tmp;
6417 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6418 }
6419
6420 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6421 if ((base_reg == hard_frame_pointer_rtx
6422 || base_reg == frame_pointer_rtx
6423 || base_reg == arg_pointer_rtx) && !disp)
6424 disp = const0_rtx;
6425
6426 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6427 Avoid this by transforming to [%esi+0]. */
6428 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6429 && base_reg && !index_reg && !disp
6430 && REG_P (base_reg)
6431 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6432 disp = const0_rtx;
6433
6434 /* Special case: encode reg+reg instead of reg*2. */
6435 if (!base && index && scale && scale == 2)
6436 base = index, base_reg = index_reg, scale = 1;
6437
6438 /* Special case: scaling cannot be encoded without base or displacement. */
6439 if (!base && !disp && index && scale != 1)
6440 disp = const0_rtx;
6441
6442 out->base = base;
6443 out->index = index;
6444 out->disp = disp;
6445 out->scale = scale;
6446 out->seg = seg;
6447
6448 return retval;
6449 }
6450 \f
6451 /* Return cost of the memory address x.
6452 For i386, it is better to use a complex address than let gcc copy
6453 the address into a reg and make a new pseudo. But not if the address
6454 requires to two regs - that would mean more pseudos with longer
6455 lifetimes. */
6456 static int
6457 ix86_address_cost (rtx x)
6458 {
6459 struct ix86_address parts;
6460 int cost = 1;
6461 int ok = ix86_decompose_address (x, &parts);
6462
6463 gcc_assert (ok);
6464
6465 if (parts.base && GET_CODE (parts.base) == SUBREG)
6466 parts.base = SUBREG_REG (parts.base);
6467 if (parts.index && GET_CODE (parts.index) == SUBREG)
6468 parts.index = SUBREG_REG (parts.index);
6469
6470 /* More complex memory references are better. */
6471 if (parts.disp && parts.disp != const0_rtx)
6472 cost--;
6473 if (parts.seg != SEG_DEFAULT)
6474 cost--;
6475
6476 /* Attempt to minimize number of registers in the address. */
6477 if ((parts.base
6478 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6479 || (parts.index
6480 && (!REG_P (parts.index)
6481 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6482 cost++;
6483
6484 if (parts.base
6485 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6486 && parts.index
6487 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6488 && parts.base != parts.index)
6489 cost++;
6490
6491 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6492 since it's predecode logic can't detect the length of instructions
6493 and it degenerates to vector decoded. Increase cost of such
6494 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6495 to split such addresses or even refuse such addresses at all.
6496
6497 Following addressing modes are affected:
6498 [base+scale*index]
6499 [scale*index+disp]
6500 [base+index]
6501
6502 The first and last case may be avoidable by explicitly coding the zero in
6503 memory address, but I don't have AMD-K6 machine handy to check this
6504 theory. */
6505
6506 if (TARGET_K6
6507 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6508 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6509 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6510 cost += 10;
6511
6512 return cost;
6513 }
6514 \f
6515 /* If X is a machine specific address (i.e. a symbol or label being
6516 referenced as a displacement from the GOT implemented using an
6517 UNSPEC), then return the base term. Otherwise return X. */
6518
6519 rtx
6520 ix86_find_base_term (rtx x)
6521 {
6522 rtx term;
6523
6524 if (TARGET_64BIT)
6525 {
6526 if (GET_CODE (x) != CONST)
6527 return x;
6528 term = XEXP (x, 0);
6529 if (GET_CODE (term) == PLUS
6530 && (CONST_INT_P (XEXP (term, 1))
6531 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6532 term = XEXP (term, 0);
6533 if (GET_CODE (term) != UNSPEC
6534 || XINT (term, 1) != UNSPEC_GOTPCREL)
6535 return x;
6536
6537 term = XVECEXP (term, 0, 0);
6538
6539 if (GET_CODE (term) != SYMBOL_REF
6540 && GET_CODE (term) != LABEL_REF)
6541 return x;
6542
6543 return term;
6544 }
6545
6546 term = ix86_delegitimize_address (x);
6547
6548 if (GET_CODE (term) != SYMBOL_REF
6549 && GET_CODE (term) != LABEL_REF)
6550 return x;
6551
6552 return term;
6553 }
6554
6555 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6556 this is used for to form addresses to local data when -fPIC is in
6557 use. */
6558
6559 static bool
6560 darwin_local_data_pic (rtx disp)
6561 {
6562 if (GET_CODE (disp) == MINUS)
6563 {
6564 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6565 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6566 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6567 {
6568 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6569 if (! strcmp (sym_name, "<pic base>"))
6570 return true;
6571 }
6572 }
6573
6574 return false;
6575 }
6576 \f
6577 /* Determine if a given RTX is a valid constant. We already know this
6578 satisfies CONSTANT_P. */
6579
6580 bool
6581 legitimate_constant_p (rtx x)
6582 {
6583 switch (GET_CODE (x))
6584 {
6585 case CONST:
6586 x = XEXP (x, 0);
6587
6588 if (GET_CODE (x) == PLUS)
6589 {
6590 if (!CONST_INT_P (XEXP (x, 1)))
6591 return false;
6592 x = XEXP (x, 0);
6593 }
6594
6595 if (TARGET_MACHO && darwin_local_data_pic (x))
6596 return true;
6597
6598 /* Only some unspecs are valid as "constants". */
6599 if (GET_CODE (x) == UNSPEC)
6600 switch (XINT (x, 1))
6601 {
6602 case UNSPEC_GOT:
6603 case UNSPEC_GOTOFF:
6604 case UNSPEC_PLTOFF:
6605 return TARGET_64BIT;
6606 case UNSPEC_TPOFF:
6607 case UNSPEC_NTPOFF:
6608 x = XVECEXP (x, 0, 0);
6609 return (GET_CODE (x) == SYMBOL_REF
6610 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6611 case UNSPEC_DTPOFF:
6612 x = XVECEXP (x, 0, 0);
6613 return (GET_CODE (x) == SYMBOL_REF
6614 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6615 default:
6616 return false;
6617 }
6618
6619 /* We must have drilled down to a symbol. */
6620 if (GET_CODE (x) == LABEL_REF)
6621 return true;
6622 if (GET_CODE (x) != SYMBOL_REF)
6623 return false;
6624 /* FALLTHRU */
6625
6626 case SYMBOL_REF:
6627 /* TLS symbols are never valid. */
6628 if (SYMBOL_REF_TLS_MODEL (x))
6629 return false;
6630 break;
6631
6632 case CONST_DOUBLE:
6633 if (GET_MODE (x) == TImode
6634 && x != CONST0_RTX (TImode)
6635 && !TARGET_64BIT)
6636 return false;
6637 break;
6638
6639 case CONST_VECTOR:
6640 if (x == CONST0_RTX (GET_MODE (x)))
6641 return true;
6642 return false;
6643
6644 default:
6645 break;
6646 }
6647
6648 /* Otherwise we handle everything else in the move patterns. */
6649 return true;
6650 }
6651
6652 /* Determine if it's legal to put X into the constant pool. This
6653 is not possible for the address of thread-local symbols, which
6654 is checked above. */
6655
6656 static bool
6657 ix86_cannot_force_const_mem (rtx x)
6658 {
6659 /* We can always put integral constants and vectors in memory. */
6660 switch (GET_CODE (x))
6661 {
6662 case CONST_INT:
6663 case CONST_DOUBLE:
6664 case CONST_VECTOR:
6665 return false;
6666
6667 default:
6668 break;
6669 }
6670 return !legitimate_constant_p (x);
6671 }
6672
6673 /* Determine if a given RTX is a valid constant address. */
6674
6675 bool
6676 constant_address_p (rtx x)
6677 {
6678 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6679 }
6680
6681 /* Nonzero if the constant value X is a legitimate general operand
6682 when generating PIC code. It is given that flag_pic is on and
6683 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6684
6685 bool
6686 legitimate_pic_operand_p (rtx x)
6687 {
6688 rtx inner;
6689
6690 switch (GET_CODE (x))
6691 {
6692 case CONST:
6693 inner = XEXP (x, 0);
6694 if (GET_CODE (inner) == PLUS
6695 && CONST_INT_P (XEXP (inner, 1)))
6696 inner = XEXP (inner, 0);
6697
6698 /* Only some unspecs are valid as "constants". */
6699 if (GET_CODE (inner) == UNSPEC)
6700 switch (XINT (inner, 1))
6701 {
6702 case UNSPEC_GOT:
6703 case UNSPEC_GOTOFF:
6704 case UNSPEC_PLTOFF:
6705 return TARGET_64BIT;
6706 case UNSPEC_TPOFF:
6707 x = XVECEXP (inner, 0, 0);
6708 return (GET_CODE (x) == SYMBOL_REF
6709 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6710 default:
6711 return false;
6712 }
6713 /* FALLTHRU */
6714
6715 case SYMBOL_REF:
6716 case LABEL_REF:
6717 return legitimate_pic_address_disp_p (x);
6718
6719 default:
6720 return true;
6721 }
6722 }
6723
6724 /* Determine if a given CONST RTX is a valid memory displacement
6725 in PIC mode. */
6726
6727 int
6728 legitimate_pic_address_disp_p (rtx disp)
6729 {
6730 bool saw_plus;
6731
6732 /* In 64bit mode we can allow direct addresses of symbols and labels
6733 when they are not dynamic symbols. */
6734 if (TARGET_64BIT)
6735 {
6736 rtx op0 = disp, op1;
6737
6738 switch (GET_CODE (disp))
6739 {
6740 case LABEL_REF:
6741 return true;
6742
6743 case CONST:
6744 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6745 break;
6746 op0 = XEXP (XEXP (disp, 0), 0);
6747 op1 = XEXP (XEXP (disp, 0), 1);
6748 if (!CONST_INT_P (op1)
6749 || INTVAL (op1) >= 16*1024*1024
6750 || INTVAL (op1) < -16*1024*1024)
6751 break;
6752 if (GET_CODE (op0) == LABEL_REF)
6753 return true;
6754 if (GET_CODE (op0) != SYMBOL_REF)
6755 break;
6756 /* FALLTHRU */
6757
6758 case SYMBOL_REF:
6759 /* TLS references should always be enclosed in UNSPEC. */
6760 if (SYMBOL_REF_TLS_MODEL (op0))
6761 return false;
6762 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6763 && ix86_cmodel != CM_LARGE_PIC)
6764 return true;
6765 break;
6766
6767 default:
6768 break;
6769 }
6770 }
6771 if (GET_CODE (disp) != CONST)
6772 return 0;
6773 disp = XEXP (disp, 0);
6774
6775 if (TARGET_64BIT)
6776 {
6777 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6778 of GOT tables. We should not need these anyway. */
6779 if (GET_CODE (disp) != UNSPEC
6780 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6781 && XINT (disp, 1) != UNSPEC_GOTOFF
6782 && XINT (disp, 1) != UNSPEC_PLTOFF))
6783 return 0;
6784
6785 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6786 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6787 return 0;
6788 return 1;
6789 }
6790
6791 saw_plus = false;
6792 if (GET_CODE (disp) == PLUS)
6793 {
6794 if (!CONST_INT_P (XEXP (disp, 1)))
6795 return 0;
6796 disp = XEXP (disp, 0);
6797 saw_plus = true;
6798 }
6799
6800 if (TARGET_MACHO && darwin_local_data_pic (disp))
6801 return 1;
6802
6803 if (GET_CODE (disp) != UNSPEC)
6804 return 0;
6805
6806 switch (XINT (disp, 1))
6807 {
6808 case UNSPEC_GOT:
6809 if (saw_plus)
6810 return false;
6811 /* We need to check for both symbols and labels because VxWorks loads
6812 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6813 details. */
6814 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6815 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6816 case UNSPEC_GOTOFF:
6817 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6818 While ABI specify also 32bit relocation but we don't produce it in
6819 small PIC model at all. */
6820 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6821 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6822 && !TARGET_64BIT)
6823 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6824 return false;
6825 case UNSPEC_GOTTPOFF:
6826 case UNSPEC_GOTNTPOFF:
6827 case UNSPEC_INDNTPOFF:
6828 if (saw_plus)
6829 return false;
6830 disp = XVECEXP (disp, 0, 0);
6831 return (GET_CODE (disp) == SYMBOL_REF
6832 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6833 case UNSPEC_NTPOFF:
6834 disp = XVECEXP (disp, 0, 0);
6835 return (GET_CODE (disp) == SYMBOL_REF
6836 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6837 case UNSPEC_DTPOFF:
6838 disp = XVECEXP (disp, 0, 0);
6839 return (GET_CODE (disp) == SYMBOL_REF
6840 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6841 }
6842
6843 return 0;
6844 }
6845
6846 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6847 memory address for an instruction. The MODE argument is the machine mode
6848 for the MEM expression that wants to use this address.
6849
6850 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6851 convert common non-canonical forms to canonical form so that they will
6852 be recognized. */
6853
6854 int
6855 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6856 rtx addr, int strict)
6857 {
6858 struct ix86_address parts;
6859 rtx base, index, disp;
6860 HOST_WIDE_INT scale;
6861 const char *reason = NULL;
6862 rtx reason_rtx = NULL_RTX;
6863
6864 if (ix86_decompose_address (addr, &parts) <= 0)
6865 {
6866 reason = "decomposition failed";
6867 goto report_error;
6868 }
6869
6870 base = parts.base;
6871 index = parts.index;
6872 disp = parts.disp;
6873 scale = parts.scale;
6874
6875 /* Validate base register.
6876
6877 Don't allow SUBREG's that span more than a word here. It can lead to spill
6878 failures when the base is one word out of a two word structure, which is
6879 represented internally as a DImode int. */
6880
6881 if (base)
6882 {
6883 rtx reg;
6884 reason_rtx = base;
6885
6886 if (REG_P (base))
6887 reg = base;
6888 else if (GET_CODE (base) == SUBREG
6889 && REG_P (SUBREG_REG (base))
6890 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6891 <= UNITS_PER_WORD)
6892 reg = SUBREG_REG (base);
6893 else
6894 {
6895 reason = "base is not a register";
6896 goto report_error;
6897 }
6898
6899 if (GET_MODE (base) != Pmode)
6900 {
6901 reason = "base is not in Pmode";
6902 goto report_error;
6903 }
6904
6905 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6906 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6907 {
6908 reason = "base is not valid";
6909 goto report_error;
6910 }
6911 }
6912
6913 /* Validate index register.
6914
6915 Don't allow SUBREG's that span more than a word here -- same as above. */
6916
6917 if (index)
6918 {
6919 rtx reg;
6920 reason_rtx = index;
6921
6922 if (REG_P (index))
6923 reg = index;
6924 else if (GET_CODE (index) == SUBREG
6925 && REG_P (SUBREG_REG (index))
6926 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6927 <= UNITS_PER_WORD)
6928 reg = SUBREG_REG (index);
6929 else
6930 {
6931 reason = "index is not a register";
6932 goto report_error;
6933 }
6934
6935 if (GET_MODE (index) != Pmode)
6936 {
6937 reason = "index is not in Pmode";
6938 goto report_error;
6939 }
6940
6941 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6942 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6943 {
6944 reason = "index is not valid";
6945 goto report_error;
6946 }
6947 }
6948
6949 /* Validate scale factor. */
6950 if (scale != 1)
6951 {
6952 reason_rtx = GEN_INT (scale);
6953 if (!index)
6954 {
6955 reason = "scale without index";
6956 goto report_error;
6957 }
6958
6959 if (scale != 2 && scale != 4 && scale != 8)
6960 {
6961 reason = "scale is not a valid multiplier";
6962 goto report_error;
6963 }
6964 }
6965
6966 /* Validate displacement. */
6967 if (disp)
6968 {
6969 reason_rtx = disp;
6970
6971 if (GET_CODE (disp) == CONST
6972 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6973 switch (XINT (XEXP (disp, 0), 1))
6974 {
6975 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6976 used. While ABI specify also 32bit relocations, we don't produce
6977 them at all and use IP relative instead. */
6978 case UNSPEC_GOT:
6979 case UNSPEC_GOTOFF:
6980 gcc_assert (flag_pic);
6981 if (!TARGET_64BIT)
6982 goto is_legitimate_pic;
6983 reason = "64bit address unspec";
6984 goto report_error;
6985
6986 case UNSPEC_GOTPCREL:
6987 gcc_assert (flag_pic);
6988 goto is_legitimate_pic;
6989
6990 case UNSPEC_GOTTPOFF:
6991 case UNSPEC_GOTNTPOFF:
6992 case UNSPEC_INDNTPOFF:
6993 case UNSPEC_NTPOFF:
6994 case UNSPEC_DTPOFF:
6995 break;
6996
6997 default:
6998 reason = "invalid address unspec";
6999 goto report_error;
7000 }
7001
7002 else if (SYMBOLIC_CONST (disp)
7003 && (flag_pic
7004 || (TARGET_MACHO
7005 #if TARGET_MACHO
7006 && MACHOPIC_INDIRECT
7007 && !machopic_operand_p (disp)
7008 #endif
7009 )))
7010 {
7011
7012 is_legitimate_pic:
7013 if (TARGET_64BIT && (index || base))
7014 {
7015 /* foo@dtpoff(%rX) is ok. */
7016 if (GET_CODE (disp) != CONST
7017 || GET_CODE (XEXP (disp, 0)) != PLUS
7018 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7019 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7020 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7021 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7022 {
7023 reason = "non-constant pic memory reference";
7024 goto report_error;
7025 }
7026 }
7027 else if (! legitimate_pic_address_disp_p (disp))
7028 {
7029 reason = "displacement is an invalid pic construct";
7030 goto report_error;
7031 }
7032
7033 /* This code used to verify that a symbolic pic displacement
7034 includes the pic_offset_table_rtx register.
7035
7036 While this is good idea, unfortunately these constructs may
7037 be created by "adds using lea" optimization for incorrect
7038 code like:
7039
7040 int a;
7041 int foo(int i)
7042 {
7043 return *(&a+i);
7044 }
7045
7046 This code is nonsensical, but results in addressing
7047 GOT table with pic_offset_table_rtx base. We can't
7048 just refuse it easily, since it gets matched by
7049 "addsi3" pattern, that later gets split to lea in the
7050 case output register differs from input. While this
7051 can be handled by separate addsi pattern for this case
7052 that never results in lea, this seems to be easier and
7053 correct fix for crash to disable this test. */
7054 }
7055 else if (GET_CODE (disp) != LABEL_REF
7056 && !CONST_INT_P (disp)
7057 && (GET_CODE (disp) != CONST
7058 || !legitimate_constant_p (disp))
7059 && (GET_CODE (disp) != SYMBOL_REF
7060 || !legitimate_constant_p (disp)))
7061 {
7062 reason = "displacement is not constant";
7063 goto report_error;
7064 }
7065 else if (TARGET_64BIT
7066 && !x86_64_immediate_operand (disp, VOIDmode))
7067 {
7068 reason = "displacement is out of range";
7069 goto report_error;
7070 }
7071 }
7072
7073 /* Everything looks valid. */
7074 return TRUE;
7075
7076 report_error:
7077 return FALSE;
7078 }
7079 \f
7080 /* Return a unique alias set for the GOT. */
7081
7082 static HOST_WIDE_INT
7083 ix86_GOT_alias_set (void)
7084 {
7085 static HOST_WIDE_INT set = -1;
7086 if (set == -1)
7087 set = new_alias_set ();
7088 return set;
7089 }
7090
7091 /* Return a legitimate reference for ORIG (an address) using the
7092 register REG. If REG is 0, a new pseudo is generated.
7093
7094 There are two types of references that must be handled:
7095
7096 1. Global data references must load the address from the GOT, via
7097 the PIC reg. An insn is emitted to do this load, and the reg is
7098 returned.
7099
7100 2. Static data references, constant pool addresses, and code labels
7101 compute the address as an offset from the GOT, whose base is in
7102 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7103 differentiate them from global data objects. The returned
7104 address is the PIC reg + an unspec constant.
7105
7106 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7107 reg also appears in the address. */
7108
7109 static rtx
7110 legitimize_pic_address (rtx orig, rtx reg)
7111 {
7112 rtx addr = orig;
7113 rtx new = orig;
7114 rtx base;
7115
7116 #if TARGET_MACHO
7117 if (TARGET_MACHO && !TARGET_64BIT)
7118 {
7119 if (reg == 0)
7120 reg = gen_reg_rtx (Pmode);
7121 /* Use the generic Mach-O PIC machinery. */
7122 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7123 }
7124 #endif
7125
7126 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7127 new = addr;
7128 else if (TARGET_64BIT
7129 && ix86_cmodel != CM_SMALL_PIC
7130 && gotoff_operand (addr, Pmode))
7131 {
7132 rtx tmpreg;
7133 /* This symbol may be referenced via a displacement from the PIC
7134 base address (@GOTOFF). */
7135
7136 if (reload_in_progress)
7137 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7138 if (GET_CODE (addr) == CONST)
7139 addr = XEXP (addr, 0);
7140 if (GET_CODE (addr) == PLUS)
7141 {
7142 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7143 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7144 }
7145 else
7146 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7147 new = gen_rtx_CONST (Pmode, new);
7148 if (!reg)
7149 tmpreg = gen_reg_rtx (Pmode);
7150 else
7151 tmpreg = reg;
7152 emit_move_insn (tmpreg, new);
7153
7154 if (reg != 0)
7155 {
7156 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7157 tmpreg, 1, OPTAB_DIRECT);
7158 new = reg;
7159 }
7160 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7161 }
7162 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7163 {
7164 /* This symbol may be referenced via a displacement from the PIC
7165 base address (@GOTOFF). */
7166
7167 if (reload_in_progress)
7168 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7169 if (GET_CODE (addr) == CONST)
7170 addr = XEXP (addr, 0);
7171 if (GET_CODE (addr) == PLUS)
7172 {
7173 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7174 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7175 }
7176 else
7177 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7178 new = gen_rtx_CONST (Pmode, new);
7179 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7180
7181 if (reg != 0)
7182 {
7183 emit_move_insn (reg, new);
7184 new = reg;
7185 }
7186 }
7187 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7188 /* We can't use @GOTOFF for text labels on VxWorks;
7189 see gotoff_operand. */
7190 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7191 {
7192 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7193 {
7194 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7195 new = gen_rtx_CONST (Pmode, new);
7196 new = gen_const_mem (Pmode, new);
7197 set_mem_alias_set (new, ix86_GOT_alias_set ());
7198
7199 if (reg == 0)
7200 reg = gen_reg_rtx (Pmode);
7201 /* Use directly gen_movsi, otherwise the address is loaded
7202 into register for CSE. We don't want to CSE this addresses,
7203 instead we CSE addresses from the GOT table, so skip this. */
7204 emit_insn (gen_movsi (reg, new));
7205 new = reg;
7206 }
7207 else
7208 {
7209 /* This symbol must be referenced via a load from the
7210 Global Offset Table (@GOT). */
7211
7212 if (reload_in_progress)
7213 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7214 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7215 new = gen_rtx_CONST (Pmode, new);
7216 if (TARGET_64BIT)
7217 new = force_reg (Pmode, new);
7218 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7219 new = gen_const_mem (Pmode, new);
7220 set_mem_alias_set (new, ix86_GOT_alias_set ());
7221
7222 if (reg == 0)
7223 reg = gen_reg_rtx (Pmode);
7224 emit_move_insn (reg, new);
7225 new = reg;
7226 }
7227 }
7228 else
7229 {
7230 if (CONST_INT_P (addr)
7231 && !x86_64_immediate_operand (addr, VOIDmode))
7232 {
7233 if (reg)
7234 {
7235 emit_move_insn (reg, addr);
7236 new = reg;
7237 }
7238 else
7239 new = force_reg (Pmode, addr);
7240 }
7241 else if (GET_CODE (addr) == CONST)
7242 {
7243 addr = XEXP (addr, 0);
7244
7245 /* We must match stuff we generate before. Assume the only
7246 unspecs that can get here are ours. Not that we could do
7247 anything with them anyway.... */
7248 if (GET_CODE (addr) == UNSPEC
7249 || (GET_CODE (addr) == PLUS
7250 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7251 return orig;
7252 gcc_assert (GET_CODE (addr) == PLUS);
7253 }
7254 if (GET_CODE (addr) == PLUS)
7255 {
7256 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7257
7258 /* Check first to see if this is a constant offset from a @GOTOFF
7259 symbol reference. */
7260 if (gotoff_operand (op0, Pmode)
7261 && CONST_INT_P (op1))
7262 {
7263 if (!TARGET_64BIT)
7264 {
7265 if (reload_in_progress)
7266 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7267 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7268 UNSPEC_GOTOFF);
7269 new = gen_rtx_PLUS (Pmode, new, op1);
7270 new = gen_rtx_CONST (Pmode, new);
7271 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7272
7273 if (reg != 0)
7274 {
7275 emit_move_insn (reg, new);
7276 new = reg;
7277 }
7278 }
7279 else
7280 {
7281 if (INTVAL (op1) < -16*1024*1024
7282 || INTVAL (op1) >= 16*1024*1024)
7283 {
7284 if (!x86_64_immediate_operand (op1, Pmode))
7285 op1 = force_reg (Pmode, op1);
7286 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7287 }
7288 }
7289 }
7290 else
7291 {
7292 base = legitimize_pic_address (XEXP (addr, 0), reg);
7293 new = legitimize_pic_address (XEXP (addr, 1),
7294 base == reg ? NULL_RTX : reg);
7295
7296 if (CONST_INT_P (new))
7297 new = plus_constant (base, INTVAL (new));
7298 else
7299 {
7300 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7301 {
7302 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7303 new = XEXP (new, 1);
7304 }
7305 new = gen_rtx_PLUS (Pmode, base, new);
7306 }
7307 }
7308 }
7309 }
7310 return new;
7311 }
7312 \f
7313 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7314
7315 static rtx
7316 get_thread_pointer (int to_reg)
7317 {
7318 rtx tp, reg, insn;
7319
7320 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7321 if (!to_reg)
7322 return tp;
7323
7324 reg = gen_reg_rtx (Pmode);
7325 insn = gen_rtx_SET (VOIDmode, reg, tp);
7326 insn = emit_insn (insn);
7327
7328 return reg;
7329 }
7330
7331 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7332 false if we expect this to be used for a memory address and true if
7333 we expect to load the address into a register. */
7334
7335 static rtx
7336 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7337 {
7338 rtx dest, base, off, pic, tp;
7339 int type;
7340
7341 switch (model)
7342 {
7343 case TLS_MODEL_GLOBAL_DYNAMIC:
7344 dest = gen_reg_rtx (Pmode);
7345 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7346
7347 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7348 {
7349 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7350
7351 start_sequence ();
7352 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7353 insns = get_insns ();
7354 end_sequence ();
7355
7356 emit_libcall_block (insns, dest, rax, x);
7357 }
7358 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7359 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7360 else
7361 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7362
7363 if (TARGET_GNU2_TLS)
7364 {
7365 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7366
7367 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7368 }
7369 break;
7370
7371 case TLS_MODEL_LOCAL_DYNAMIC:
7372 base = gen_reg_rtx (Pmode);
7373 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7374
7375 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7376 {
7377 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7378
7379 start_sequence ();
7380 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7381 insns = get_insns ();
7382 end_sequence ();
7383
7384 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7385 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7386 emit_libcall_block (insns, base, rax, note);
7387 }
7388 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7389 emit_insn (gen_tls_local_dynamic_base_64 (base));
7390 else
7391 emit_insn (gen_tls_local_dynamic_base_32 (base));
7392
7393 if (TARGET_GNU2_TLS)
7394 {
7395 rtx x = ix86_tls_module_base ();
7396
7397 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7398 gen_rtx_MINUS (Pmode, x, tp));
7399 }
7400
7401 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7402 off = gen_rtx_CONST (Pmode, off);
7403
7404 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7405
7406 if (TARGET_GNU2_TLS)
7407 {
7408 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7409
7410 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7411 }
7412
7413 break;
7414
7415 case TLS_MODEL_INITIAL_EXEC:
7416 if (TARGET_64BIT)
7417 {
7418 pic = NULL;
7419 type = UNSPEC_GOTNTPOFF;
7420 }
7421 else if (flag_pic)
7422 {
7423 if (reload_in_progress)
7424 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7425 pic = pic_offset_table_rtx;
7426 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7427 }
7428 else if (!TARGET_ANY_GNU_TLS)
7429 {
7430 pic = gen_reg_rtx (Pmode);
7431 emit_insn (gen_set_got (pic));
7432 type = UNSPEC_GOTTPOFF;
7433 }
7434 else
7435 {
7436 pic = NULL;
7437 type = UNSPEC_INDNTPOFF;
7438 }
7439
7440 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7441 off = gen_rtx_CONST (Pmode, off);
7442 if (pic)
7443 off = gen_rtx_PLUS (Pmode, pic, off);
7444 off = gen_const_mem (Pmode, off);
7445 set_mem_alias_set (off, ix86_GOT_alias_set ());
7446
7447 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7448 {
7449 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7450 off = force_reg (Pmode, off);
7451 return gen_rtx_PLUS (Pmode, base, off);
7452 }
7453 else
7454 {
7455 base = get_thread_pointer (true);
7456 dest = gen_reg_rtx (Pmode);
7457 emit_insn (gen_subsi3 (dest, base, off));
7458 }
7459 break;
7460
7461 case TLS_MODEL_LOCAL_EXEC:
7462 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7463 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7464 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7465 off = gen_rtx_CONST (Pmode, off);
7466
7467 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7468 {
7469 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7470 return gen_rtx_PLUS (Pmode, base, off);
7471 }
7472 else
7473 {
7474 base = get_thread_pointer (true);
7475 dest = gen_reg_rtx (Pmode);
7476 emit_insn (gen_subsi3 (dest, base, off));
7477 }
7478 break;
7479
7480 default:
7481 gcc_unreachable ();
7482 }
7483
7484 return dest;
7485 }
7486
7487 /* Try machine-dependent ways of modifying an illegitimate address
7488 to be legitimate. If we find one, return the new, valid address.
7489 This macro is used in only one place: `memory_address' in explow.c.
7490
7491 OLDX is the address as it was before break_out_memory_refs was called.
7492 In some cases it is useful to look at this to decide what needs to be done.
7493
7494 MODE and WIN are passed so that this macro can use
7495 GO_IF_LEGITIMATE_ADDRESS.
7496
7497 It is always safe for this macro to do nothing. It exists to recognize
7498 opportunities to optimize the output.
7499
7500 For the 80386, we handle X+REG by loading X into a register R and
7501 using R+REG. R will go in a general reg and indexing will be used.
7502 However, if REG is a broken-out memory address or multiplication,
7503 nothing needs to be done because REG can certainly go in a general reg.
7504
7505 When -fpic is used, special handling is needed for symbolic references.
7506 See comments by legitimize_pic_address in i386.c for details. */
7507
7508 rtx
7509 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7510 {
7511 int changed = 0;
7512 unsigned log;
7513
7514 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7515 if (log)
7516 return legitimize_tls_address (x, log, false);
7517 if (GET_CODE (x) == CONST
7518 && GET_CODE (XEXP (x, 0)) == PLUS
7519 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7520 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7521 {
7522 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7523 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7524 }
7525
7526 if (flag_pic && SYMBOLIC_CONST (x))
7527 return legitimize_pic_address (x, 0);
7528
7529 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7530 if (GET_CODE (x) == ASHIFT
7531 && CONST_INT_P (XEXP (x, 1))
7532 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7533 {
7534 changed = 1;
7535 log = INTVAL (XEXP (x, 1));
7536 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7537 GEN_INT (1 << log));
7538 }
7539
7540 if (GET_CODE (x) == PLUS)
7541 {
7542 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7543
7544 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7545 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7546 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7547 {
7548 changed = 1;
7549 log = INTVAL (XEXP (XEXP (x, 0), 1));
7550 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7551 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7552 GEN_INT (1 << log));
7553 }
7554
7555 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7556 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7557 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7558 {
7559 changed = 1;
7560 log = INTVAL (XEXP (XEXP (x, 1), 1));
7561 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7562 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7563 GEN_INT (1 << log));
7564 }
7565
7566 /* Put multiply first if it isn't already. */
7567 if (GET_CODE (XEXP (x, 1)) == MULT)
7568 {
7569 rtx tmp = XEXP (x, 0);
7570 XEXP (x, 0) = XEXP (x, 1);
7571 XEXP (x, 1) = tmp;
7572 changed = 1;
7573 }
7574
7575 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7576 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7577 created by virtual register instantiation, register elimination, and
7578 similar optimizations. */
7579 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7580 {
7581 changed = 1;
7582 x = gen_rtx_PLUS (Pmode,
7583 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7584 XEXP (XEXP (x, 1), 0)),
7585 XEXP (XEXP (x, 1), 1));
7586 }
7587
7588 /* Canonicalize
7589 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7590 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7591 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7592 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7593 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7594 && CONSTANT_P (XEXP (x, 1)))
7595 {
7596 rtx constant;
7597 rtx other = NULL_RTX;
7598
7599 if (CONST_INT_P (XEXP (x, 1)))
7600 {
7601 constant = XEXP (x, 1);
7602 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7603 }
7604 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7605 {
7606 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7607 other = XEXP (x, 1);
7608 }
7609 else
7610 constant = 0;
7611
7612 if (constant)
7613 {
7614 changed = 1;
7615 x = gen_rtx_PLUS (Pmode,
7616 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7617 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7618 plus_constant (other, INTVAL (constant)));
7619 }
7620 }
7621
7622 if (changed && legitimate_address_p (mode, x, FALSE))
7623 return x;
7624
7625 if (GET_CODE (XEXP (x, 0)) == MULT)
7626 {
7627 changed = 1;
7628 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7629 }
7630
7631 if (GET_CODE (XEXP (x, 1)) == MULT)
7632 {
7633 changed = 1;
7634 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7635 }
7636
7637 if (changed
7638 && REG_P (XEXP (x, 1))
7639 && REG_P (XEXP (x, 0)))
7640 return x;
7641
7642 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7643 {
7644 changed = 1;
7645 x = legitimize_pic_address (x, 0);
7646 }
7647
7648 if (changed && legitimate_address_p (mode, x, FALSE))
7649 return x;
7650
7651 if (REG_P (XEXP (x, 0)))
7652 {
7653 rtx temp = gen_reg_rtx (Pmode);
7654 rtx val = force_operand (XEXP (x, 1), temp);
7655 if (val != temp)
7656 emit_move_insn (temp, val);
7657
7658 XEXP (x, 1) = temp;
7659 return x;
7660 }
7661
7662 else if (REG_P (XEXP (x, 1)))
7663 {
7664 rtx temp = gen_reg_rtx (Pmode);
7665 rtx val = force_operand (XEXP (x, 0), temp);
7666 if (val != temp)
7667 emit_move_insn (temp, val);
7668
7669 XEXP (x, 0) = temp;
7670 return x;
7671 }
7672 }
7673
7674 return x;
7675 }
7676 \f
7677 /* Print an integer constant expression in assembler syntax. Addition
7678 and subtraction are the only arithmetic that may appear in these
7679 expressions. FILE is the stdio stream to write to, X is the rtx, and
7680 CODE is the operand print code from the output string. */
7681
7682 static void
7683 output_pic_addr_const (FILE *file, rtx x, int code)
7684 {
7685 char buf[256];
7686
7687 switch (GET_CODE (x))
7688 {
7689 case PC:
7690 gcc_assert (flag_pic);
7691 putc ('.', file);
7692 break;
7693
7694 case SYMBOL_REF:
7695 if (! TARGET_MACHO || TARGET_64BIT)
7696 output_addr_const (file, x);
7697 else
7698 {
7699 const char *name = XSTR (x, 0);
7700
7701 /* Mark the decl as referenced so that cgraph will output the function. */
7702 if (SYMBOL_REF_DECL (x))
7703 mark_decl_referenced (SYMBOL_REF_DECL (x));
7704
7705 #if TARGET_MACHO
7706 if (MACHOPIC_INDIRECT
7707 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7708 name = machopic_indirection_name (x, /*stub_p=*/true);
7709 #endif
7710 assemble_name (file, name);
7711 }
7712 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7713 fputs ("@PLT", file);
7714 break;
7715
7716 case LABEL_REF:
7717 x = XEXP (x, 0);
7718 /* FALLTHRU */
7719 case CODE_LABEL:
7720 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7721 assemble_name (asm_out_file, buf);
7722 break;
7723
7724 case CONST_INT:
7725 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7726 break;
7727
7728 case CONST:
7729 /* This used to output parentheses around the expression,
7730 but that does not work on the 386 (either ATT or BSD assembler). */
7731 output_pic_addr_const (file, XEXP (x, 0), code);
7732 break;
7733
7734 case CONST_DOUBLE:
7735 if (GET_MODE (x) == VOIDmode)
7736 {
7737 /* We can use %d if the number is <32 bits and positive. */
7738 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7739 fprintf (file, "0x%lx%08lx",
7740 (unsigned long) CONST_DOUBLE_HIGH (x),
7741 (unsigned long) CONST_DOUBLE_LOW (x));
7742 else
7743 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7744 }
7745 else
7746 /* We can't handle floating point constants;
7747 PRINT_OPERAND must handle them. */
7748 output_operand_lossage ("floating constant misused");
7749 break;
7750
7751 case PLUS:
7752 /* Some assemblers need integer constants to appear first. */
7753 if (CONST_INT_P (XEXP (x, 0)))
7754 {
7755 output_pic_addr_const (file, XEXP (x, 0), code);
7756 putc ('+', file);
7757 output_pic_addr_const (file, XEXP (x, 1), code);
7758 }
7759 else
7760 {
7761 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7762 output_pic_addr_const (file, XEXP (x, 1), code);
7763 putc ('+', file);
7764 output_pic_addr_const (file, XEXP (x, 0), code);
7765 }
7766 break;
7767
7768 case MINUS:
7769 if (!TARGET_MACHO)
7770 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7771 output_pic_addr_const (file, XEXP (x, 0), code);
7772 putc ('-', file);
7773 output_pic_addr_const (file, XEXP (x, 1), code);
7774 if (!TARGET_MACHO)
7775 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7776 break;
7777
7778 case UNSPEC:
7779 gcc_assert (XVECLEN (x, 0) == 1);
7780 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7781 switch (XINT (x, 1))
7782 {
7783 case UNSPEC_GOT:
7784 fputs ("@GOT", file);
7785 break;
7786 case UNSPEC_GOTOFF:
7787 fputs ("@GOTOFF", file);
7788 break;
7789 case UNSPEC_PLTOFF:
7790 fputs ("@PLTOFF", file);
7791 break;
7792 case UNSPEC_GOTPCREL:
7793 fputs ("@GOTPCREL(%rip)", file);
7794 break;
7795 case UNSPEC_GOTTPOFF:
7796 /* FIXME: This might be @TPOFF in Sun ld too. */
7797 fputs ("@GOTTPOFF", file);
7798 break;
7799 case UNSPEC_TPOFF:
7800 fputs ("@TPOFF", file);
7801 break;
7802 case UNSPEC_NTPOFF:
7803 if (TARGET_64BIT)
7804 fputs ("@TPOFF", file);
7805 else
7806 fputs ("@NTPOFF", file);
7807 break;
7808 case UNSPEC_DTPOFF:
7809 fputs ("@DTPOFF", file);
7810 break;
7811 case UNSPEC_GOTNTPOFF:
7812 if (TARGET_64BIT)
7813 fputs ("@GOTTPOFF(%rip)", file);
7814 else
7815 fputs ("@GOTNTPOFF", file);
7816 break;
7817 case UNSPEC_INDNTPOFF:
7818 fputs ("@INDNTPOFF", file);
7819 break;
7820 default:
7821 output_operand_lossage ("invalid UNSPEC as operand");
7822 break;
7823 }
7824 break;
7825
7826 default:
7827 output_operand_lossage ("invalid expression as operand");
7828 }
7829 }
7830
7831 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7832 We need to emit DTP-relative relocations. */
7833
7834 static void
7835 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7836 {
7837 fputs (ASM_LONG, file);
7838 output_addr_const (file, x);
7839 fputs ("@DTPOFF", file);
7840 switch (size)
7841 {
7842 case 4:
7843 break;
7844 case 8:
7845 fputs (", 0", file);
7846 break;
7847 default:
7848 gcc_unreachable ();
7849 }
7850 }
7851
7852 /* In the name of slightly smaller debug output, and to cater to
7853 general assembler lossage, recognize PIC+GOTOFF and turn it back
7854 into a direct symbol reference.
7855
7856 On Darwin, this is necessary to avoid a crash, because Darwin
7857 has a different PIC label for each routine but the DWARF debugging
7858 information is not associated with any particular routine, so it's
7859 necessary to remove references to the PIC label from RTL stored by
7860 the DWARF output code. */
7861
7862 static rtx
7863 ix86_delegitimize_address (rtx orig_x)
7864 {
7865 rtx x = orig_x;
7866 /* reg_addend is NULL or a multiple of some register. */
7867 rtx reg_addend = NULL_RTX;
7868 /* const_addend is NULL or a const_int. */
7869 rtx const_addend = NULL_RTX;
7870 /* This is the result, or NULL. */
7871 rtx result = NULL_RTX;
7872
7873 if (MEM_P (x))
7874 x = XEXP (x, 0);
7875
7876 if (TARGET_64BIT)
7877 {
7878 if (GET_CODE (x) != CONST
7879 || GET_CODE (XEXP (x, 0)) != UNSPEC
7880 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7881 || !MEM_P (orig_x))
7882 return orig_x;
7883 return XVECEXP (XEXP (x, 0), 0, 0);
7884 }
7885
7886 if (GET_CODE (x) != PLUS
7887 || GET_CODE (XEXP (x, 1)) != CONST)
7888 return orig_x;
7889
7890 if (REG_P (XEXP (x, 0))
7891 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7892 /* %ebx + GOT/GOTOFF */
7893 ;
7894 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7895 {
7896 /* %ebx + %reg * scale + GOT/GOTOFF */
7897 reg_addend = XEXP (x, 0);
7898 if (REG_P (XEXP (reg_addend, 0))
7899 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7900 reg_addend = XEXP (reg_addend, 1);
7901 else if (REG_P (XEXP (reg_addend, 1))
7902 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7903 reg_addend = XEXP (reg_addend, 0);
7904 else
7905 return orig_x;
7906 if (!REG_P (reg_addend)
7907 && GET_CODE (reg_addend) != MULT
7908 && GET_CODE (reg_addend) != ASHIFT)
7909 return orig_x;
7910 }
7911 else
7912 return orig_x;
7913
7914 x = XEXP (XEXP (x, 1), 0);
7915 if (GET_CODE (x) == PLUS
7916 && CONST_INT_P (XEXP (x, 1)))
7917 {
7918 const_addend = XEXP (x, 1);
7919 x = XEXP (x, 0);
7920 }
7921
7922 if (GET_CODE (x) == UNSPEC
7923 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7924 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7925 result = XVECEXP (x, 0, 0);
7926
7927 if (TARGET_MACHO && darwin_local_data_pic (x)
7928 && !MEM_P (orig_x))
7929 result = XEXP (x, 0);
7930
7931 if (! result)
7932 return orig_x;
7933
7934 if (const_addend)
7935 result = gen_rtx_PLUS (Pmode, result, const_addend);
7936 if (reg_addend)
7937 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7938 return result;
7939 }
7940 \f
7941 static void
7942 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7943 int fp, FILE *file)
7944 {
7945 const char *suffix;
7946
7947 if (mode == CCFPmode || mode == CCFPUmode)
7948 {
7949 enum rtx_code second_code, bypass_code;
7950 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7951 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7952 code = ix86_fp_compare_code_to_integer (code);
7953 mode = CCmode;
7954 }
7955 if (reverse)
7956 code = reverse_condition (code);
7957
7958 switch (code)
7959 {
7960 case EQ:
7961 suffix = "e";
7962 break;
7963 case NE:
7964 suffix = "ne";
7965 break;
7966 case GT:
7967 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7968 suffix = "g";
7969 break;
7970 case GTU:
7971 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7972 Those same assemblers have the same but opposite lossage on cmov. */
7973 gcc_assert (mode == CCmode);
7974 suffix = fp ? "nbe" : "a";
7975 break;
7976 case LT:
7977 switch (mode)
7978 {
7979 case CCNOmode:
7980 case CCGOCmode:
7981 suffix = "s";
7982 break;
7983
7984 case CCmode:
7985 case CCGCmode:
7986 suffix = "l";
7987 break;
7988
7989 default:
7990 gcc_unreachable ();
7991 }
7992 break;
7993 case LTU:
7994 gcc_assert (mode == CCmode);
7995 suffix = "b";
7996 break;
7997 case GE:
7998 switch (mode)
7999 {
8000 case CCNOmode:
8001 case CCGOCmode:
8002 suffix = "ns";
8003 break;
8004
8005 case CCmode:
8006 case CCGCmode:
8007 suffix = "ge";
8008 break;
8009
8010 default:
8011 gcc_unreachable ();
8012 }
8013 break;
8014 case GEU:
8015 /* ??? As above. */
8016 gcc_assert (mode == CCmode);
8017 suffix = fp ? "nb" : "ae";
8018 break;
8019 case LE:
8020 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8021 suffix = "le";
8022 break;
8023 case LEU:
8024 gcc_assert (mode == CCmode);
8025 suffix = "be";
8026 break;
8027 case UNORDERED:
8028 suffix = fp ? "u" : "p";
8029 break;
8030 case ORDERED:
8031 suffix = fp ? "nu" : "np";
8032 break;
8033 default:
8034 gcc_unreachable ();
8035 }
8036 fputs (suffix, file);
8037 }
8038
8039 /* Print the name of register X to FILE based on its machine mode and number.
8040 If CODE is 'w', pretend the mode is HImode.
8041 If CODE is 'b', pretend the mode is QImode.
8042 If CODE is 'k', pretend the mode is SImode.
8043 If CODE is 'q', pretend the mode is DImode.
8044 If CODE is 'h', pretend the reg is the 'high' byte register.
8045 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8046
8047 void
8048 print_reg (rtx x, int code, FILE *file)
8049 {
8050 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8051 && REGNO (x) != FRAME_POINTER_REGNUM
8052 && REGNO (x) != FLAGS_REG
8053 && REGNO (x) != FPSR_REG
8054 && REGNO (x) != FPCR_REG);
8055
8056 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8057 putc ('%', file);
8058
8059 if (code == 'w' || MMX_REG_P (x))
8060 code = 2;
8061 else if (code == 'b')
8062 code = 1;
8063 else if (code == 'k')
8064 code = 4;
8065 else if (code == 'q')
8066 code = 8;
8067 else if (code == 'y')
8068 code = 3;
8069 else if (code == 'h')
8070 code = 0;
8071 else
8072 code = GET_MODE_SIZE (GET_MODE (x));
8073
8074 /* Irritatingly, AMD extended registers use different naming convention
8075 from the normal registers. */
8076 if (REX_INT_REG_P (x))
8077 {
8078 gcc_assert (TARGET_64BIT);
8079 switch (code)
8080 {
8081 case 0:
8082 error ("extended registers have no high halves");
8083 break;
8084 case 1:
8085 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8086 break;
8087 case 2:
8088 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8089 break;
8090 case 4:
8091 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8092 break;
8093 case 8:
8094 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8095 break;
8096 default:
8097 error ("unsupported operand size for extended register");
8098 break;
8099 }
8100 return;
8101 }
8102 switch (code)
8103 {
8104 case 3:
8105 if (STACK_TOP_P (x))
8106 {
8107 fputs ("st(0)", file);
8108 break;
8109 }
8110 /* FALLTHRU */
8111 case 8:
8112 case 4:
8113 case 12:
8114 if (! ANY_FP_REG_P (x))
8115 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8116 /* FALLTHRU */
8117 case 16:
8118 case 2:
8119 normal:
8120 fputs (hi_reg_name[REGNO (x)], file);
8121 break;
8122 case 1:
8123 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8124 goto normal;
8125 fputs (qi_reg_name[REGNO (x)], file);
8126 break;
8127 case 0:
8128 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8129 goto normal;
8130 fputs (qi_high_reg_name[REGNO (x)], file);
8131 break;
8132 default:
8133 gcc_unreachable ();
8134 }
8135 }
8136
8137 /* Locate some local-dynamic symbol still in use by this function
8138 so that we can print its name in some tls_local_dynamic_base
8139 pattern. */
8140
8141 static const char *
8142 get_some_local_dynamic_name (void)
8143 {
8144 rtx insn;
8145
8146 if (cfun->machine->some_ld_name)
8147 return cfun->machine->some_ld_name;
8148
8149 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8150 if (INSN_P (insn)
8151 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8152 return cfun->machine->some_ld_name;
8153
8154 gcc_unreachable ();
8155 }
8156
8157 static int
8158 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8159 {
8160 rtx x = *px;
8161
8162 if (GET_CODE (x) == SYMBOL_REF
8163 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8164 {
8165 cfun->machine->some_ld_name = XSTR (x, 0);
8166 return 1;
8167 }
8168
8169 return 0;
8170 }
8171
8172 /* Meaning of CODE:
8173 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8174 C -- print opcode suffix for set/cmov insn.
8175 c -- like C, but print reversed condition
8176 F,f -- likewise, but for floating-point.
8177 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8178 otherwise nothing
8179 R -- print the prefix for register names.
8180 z -- print the opcode suffix for the size of the current operand.
8181 * -- print a star (in certain assembler syntax)
8182 A -- print an absolute memory reference.
8183 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8184 s -- print a shift double count, followed by the assemblers argument
8185 delimiter.
8186 b -- print the QImode name of the register for the indicated operand.
8187 %b0 would print %al if operands[0] is reg 0.
8188 w -- likewise, print the HImode name of the register.
8189 k -- likewise, print the SImode name of the register.
8190 q -- likewise, print the DImode name of the register.
8191 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8192 y -- print "st(0)" instead of "st" as a register.
8193 D -- print condition for SSE cmp instruction.
8194 P -- if PIC, print an @PLT suffix.
8195 X -- don't print any sort of PIC '@' suffix for a symbol.
8196 & -- print some in-use local-dynamic symbol name.
8197 H -- print a memory address offset by 8; used for sse high-parts
8198 */
8199
8200 void
8201 print_operand (FILE *file, rtx x, int code)
8202 {
8203 if (code)
8204 {
8205 switch (code)
8206 {
8207 case '*':
8208 if (ASSEMBLER_DIALECT == ASM_ATT)
8209 putc ('*', file);
8210 return;
8211
8212 case '&':
8213 assemble_name (file, get_some_local_dynamic_name ());
8214 return;
8215
8216 case 'A':
8217 switch (ASSEMBLER_DIALECT)
8218 {
8219 case ASM_ATT:
8220 putc ('*', file);
8221 break;
8222
8223 case ASM_INTEL:
8224 /* Intel syntax. For absolute addresses, registers should not
8225 be surrounded by braces. */
8226 if (!REG_P (x))
8227 {
8228 putc ('[', file);
8229 PRINT_OPERAND (file, x, 0);
8230 putc (']', file);
8231 return;
8232 }
8233 break;
8234
8235 default:
8236 gcc_unreachable ();
8237 }
8238
8239 PRINT_OPERAND (file, x, 0);
8240 return;
8241
8242
8243 case 'L':
8244 if (ASSEMBLER_DIALECT == ASM_ATT)
8245 putc ('l', file);
8246 return;
8247
8248 case 'W':
8249 if (ASSEMBLER_DIALECT == ASM_ATT)
8250 putc ('w', file);
8251 return;
8252
8253 case 'B':
8254 if (ASSEMBLER_DIALECT == ASM_ATT)
8255 putc ('b', file);
8256 return;
8257
8258 case 'Q':
8259 if (ASSEMBLER_DIALECT == ASM_ATT)
8260 putc ('l', file);
8261 return;
8262
8263 case 'S':
8264 if (ASSEMBLER_DIALECT == ASM_ATT)
8265 putc ('s', file);
8266 return;
8267
8268 case 'T':
8269 if (ASSEMBLER_DIALECT == ASM_ATT)
8270 putc ('t', file);
8271 return;
8272
8273 case 'z':
8274 /* 387 opcodes don't get size suffixes if the operands are
8275 registers. */
8276 if (STACK_REG_P (x))
8277 return;
8278
8279 /* Likewise if using Intel opcodes. */
8280 if (ASSEMBLER_DIALECT == ASM_INTEL)
8281 return;
8282
8283 /* This is the size of op from size of operand. */
8284 switch (GET_MODE_SIZE (GET_MODE (x)))
8285 {
8286 case 1:
8287 putc ('b', file);
8288 return;
8289
8290 case 2:
8291 #ifdef HAVE_GAS_FILDS_FISTS
8292 putc ('s', file);
8293 #endif
8294 return;
8295
8296 case 4:
8297 if (GET_MODE (x) == SFmode)
8298 {
8299 putc ('s', file);
8300 return;
8301 }
8302 else
8303 putc ('l', file);
8304 return;
8305
8306 case 12:
8307 case 16:
8308 putc ('t', file);
8309 return;
8310
8311 case 8:
8312 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8313 {
8314 #ifdef GAS_MNEMONICS
8315 putc ('q', file);
8316 #else
8317 putc ('l', file);
8318 putc ('l', file);
8319 #endif
8320 }
8321 else
8322 putc ('l', file);
8323 return;
8324
8325 default:
8326 gcc_unreachable ();
8327 }
8328
8329 case 'b':
8330 case 'w':
8331 case 'k':
8332 case 'q':
8333 case 'h':
8334 case 'y':
8335 case 'X':
8336 case 'P':
8337 break;
8338
8339 case 's':
8340 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8341 {
8342 PRINT_OPERAND (file, x, 0);
8343 putc (',', file);
8344 }
8345 return;
8346
8347 case 'D':
8348 /* Little bit of braindamage here. The SSE compare instructions
8349 does use completely different names for the comparisons that the
8350 fp conditional moves. */
8351 switch (GET_CODE (x))
8352 {
8353 case EQ:
8354 case UNEQ:
8355 fputs ("eq", file);
8356 break;
8357 case LT:
8358 case UNLT:
8359 fputs ("lt", file);
8360 break;
8361 case LE:
8362 case UNLE:
8363 fputs ("le", file);
8364 break;
8365 case UNORDERED:
8366 fputs ("unord", file);
8367 break;
8368 case NE:
8369 case LTGT:
8370 fputs ("neq", file);
8371 break;
8372 case UNGE:
8373 case GE:
8374 fputs ("nlt", file);
8375 break;
8376 case UNGT:
8377 case GT:
8378 fputs ("nle", file);
8379 break;
8380 case ORDERED:
8381 fputs ("ord", file);
8382 break;
8383 default:
8384 gcc_unreachable ();
8385 }
8386 return;
8387 case 'O':
8388 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8389 if (ASSEMBLER_DIALECT == ASM_ATT)
8390 {
8391 switch (GET_MODE (x))
8392 {
8393 case HImode: putc ('w', file); break;
8394 case SImode:
8395 case SFmode: putc ('l', file); break;
8396 case DImode:
8397 case DFmode: putc ('q', file); break;
8398 default: gcc_unreachable ();
8399 }
8400 putc ('.', file);
8401 }
8402 #endif
8403 return;
8404 case 'C':
8405 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8406 return;
8407 case 'F':
8408 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8409 if (ASSEMBLER_DIALECT == ASM_ATT)
8410 putc ('.', file);
8411 #endif
8412 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8413 return;
8414
8415 /* Like above, but reverse condition */
8416 case 'c':
8417 /* Check to see if argument to %c is really a constant
8418 and not a condition code which needs to be reversed. */
8419 if (!COMPARISON_P (x))
8420 {
8421 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8422 return;
8423 }
8424 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8425 return;
8426 case 'f':
8427 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8428 if (ASSEMBLER_DIALECT == ASM_ATT)
8429 putc ('.', file);
8430 #endif
8431 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8432 return;
8433
8434 case 'H':
8435 /* It doesn't actually matter what mode we use here, as we're
8436 only going to use this for printing. */
8437 x = adjust_address_nv (x, DImode, 8);
8438 break;
8439
8440 case '+':
8441 {
8442 rtx x;
8443
8444 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8445 return;
8446
8447 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8448 if (x)
8449 {
8450 int pred_val = INTVAL (XEXP (x, 0));
8451
8452 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8453 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8454 {
8455 int taken = pred_val > REG_BR_PROB_BASE / 2;
8456 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8457
8458 /* Emit hints only in the case default branch prediction
8459 heuristics would fail. */
8460 if (taken != cputaken)
8461 {
8462 /* We use 3e (DS) prefix for taken branches and
8463 2e (CS) prefix for not taken branches. */
8464 if (taken)
8465 fputs ("ds ; ", file);
8466 else
8467 fputs ("cs ; ", file);
8468 }
8469 }
8470 }
8471 return;
8472 }
8473 default:
8474 output_operand_lossage ("invalid operand code '%c'", code);
8475 }
8476 }
8477
8478 if (REG_P (x))
8479 print_reg (x, code, file);
8480
8481 else if (MEM_P (x))
8482 {
8483 /* No `byte ptr' prefix for call instructions. */
8484 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8485 {
8486 const char * size;
8487 switch (GET_MODE_SIZE (GET_MODE (x)))
8488 {
8489 case 1: size = "BYTE"; break;
8490 case 2: size = "WORD"; break;
8491 case 4: size = "DWORD"; break;
8492 case 8: size = "QWORD"; break;
8493 case 12: size = "XWORD"; break;
8494 case 16: size = "XMMWORD"; break;
8495 default:
8496 gcc_unreachable ();
8497 }
8498
8499 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8500 if (code == 'b')
8501 size = "BYTE";
8502 else if (code == 'w')
8503 size = "WORD";
8504 else if (code == 'k')
8505 size = "DWORD";
8506
8507 fputs (size, file);
8508 fputs (" PTR ", file);
8509 }
8510
8511 x = XEXP (x, 0);
8512 /* Avoid (%rip) for call operands. */
8513 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8514 && !CONST_INT_P (x))
8515 output_addr_const (file, x);
8516 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8517 output_operand_lossage ("invalid constraints for operand");
8518 else
8519 output_address (x);
8520 }
8521
8522 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8523 {
8524 REAL_VALUE_TYPE r;
8525 long l;
8526
8527 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8528 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8529
8530 if (ASSEMBLER_DIALECT == ASM_ATT)
8531 putc ('$', file);
8532 fprintf (file, "0x%08lx", l);
8533 }
8534
8535 /* These float cases don't actually occur as immediate operands. */
8536 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8537 {
8538 char dstr[30];
8539
8540 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8541 fprintf (file, "%s", dstr);
8542 }
8543
8544 else if (GET_CODE (x) == CONST_DOUBLE
8545 && GET_MODE (x) == XFmode)
8546 {
8547 char dstr[30];
8548
8549 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8550 fprintf (file, "%s", dstr);
8551 }
8552
8553 else
8554 {
8555 /* We have patterns that allow zero sets of memory, for instance.
8556 In 64-bit mode, we should probably support all 8-byte vectors,
8557 since we can in fact encode that into an immediate. */
8558 if (GET_CODE (x) == CONST_VECTOR)
8559 {
8560 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8561 x = const0_rtx;
8562 }
8563
8564 if (code != 'P')
8565 {
8566 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8567 {
8568 if (ASSEMBLER_DIALECT == ASM_ATT)
8569 putc ('$', file);
8570 }
8571 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8572 || GET_CODE (x) == LABEL_REF)
8573 {
8574 if (ASSEMBLER_DIALECT == ASM_ATT)
8575 putc ('$', file);
8576 else
8577 fputs ("OFFSET FLAT:", file);
8578 }
8579 }
8580 if (CONST_INT_P (x))
8581 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8582 else if (flag_pic)
8583 output_pic_addr_const (file, x, code);
8584 else
8585 output_addr_const (file, x);
8586 }
8587 }
8588 \f
8589 /* Print a memory operand whose address is ADDR. */
8590
8591 void
8592 print_operand_address (FILE *file, rtx addr)
8593 {
8594 struct ix86_address parts;
8595 rtx base, index, disp;
8596 int scale;
8597 int ok = ix86_decompose_address (addr, &parts);
8598
8599 gcc_assert (ok);
8600
8601 base = parts.base;
8602 index = parts.index;
8603 disp = parts.disp;
8604 scale = parts.scale;
8605
8606 switch (parts.seg)
8607 {
8608 case SEG_DEFAULT:
8609 break;
8610 case SEG_FS:
8611 case SEG_GS:
8612 if (USER_LABEL_PREFIX[0] == 0)
8613 putc ('%', file);
8614 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8615 break;
8616 default:
8617 gcc_unreachable ();
8618 }
8619
8620 if (!base && !index)
8621 {
8622 /* Displacement only requires special attention. */
8623
8624 if (CONST_INT_P (disp))
8625 {
8626 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8627 {
8628 if (USER_LABEL_PREFIX[0] == 0)
8629 putc ('%', file);
8630 fputs ("ds:", file);
8631 }
8632 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8633 }
8634 else if (flag_pic)
8635 output_pic_addr_const (file, disp, 0);
8636 else
8637 output_addr_const (file, disp);
8638
8639 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8640 if (TARGET_64BIT)
8641 {
8642 if (GET_CODE (disp) == CONST
8643 && GET_CODE (XEXP (disp, 0)) == PLUS
8644 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8645 disp = XEXP (XEXP (disp, 0), 0);
8646 if (GET_CODE (disp) == LABEL_REF
8647 || (GET_CODE (disp) == SYMBOL_REF
8648 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8649 fputs ("(%rip)", file);
8650 }
8651 }
8652 else
8653 {
8654 if (ASSEMBLER_DIALECT == ASM_ATT)
8655 {
8656 if (disp)
8657 {
8658 if (flag_pic)
8659 output_pic_addr_const (file, disp, 0);
8660 else if (GET_CODE (disp) == LABEL_REF)
8661 output_asm_label (disp);
8662 else
8663 output_addr_const (file, disp);
8664 }
8665
8666 putc ('(', file);
8667 if (base)
8668 print_reg (base, 0, file);
8669 if (index)
8670 {
8671 putc (',', file);
8672 print_reg (index, 0, file);
8673 if (scale != 1)
8674 fprintf (file, ",%d", scale);
8675 }
8676 putc (')', file);
8677 }
8678 else
8679 {
8680 rtx offset = NULL_RTX;
8681
8682 if (disp)
8683 {
8684 /* Pull out the offset of a symbol; print any symbol itself. */
8685 if (GET_CODE (disp) == CONST
8686 && GET_CODE (XEXP (disp, 0)) == PLUS
8687 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8688 {
8689 offset = XEXP (XEXP (disp, 0), 1);
8690 disp = gen_rtx_CONST (VOIDmode,
8691 XEXP (XEXP (disp, 0), 0));
8692 }
8693
8694 if (flag_pic)
8695 output_pic_addr_const (file, disp, 0);
8696 else if (GET_CODE (disp) == LABEL_REF)
8697 output_asm_label (disp);
8698 else if (CONST_INT_P (disp))
8699 offset = disp;
8700 else
8701 output_addr_const (file, disp);
8702 }
8703
8704 putc ('[', file);
8705 if (base)
8706 {
8707 print_reg (base, 0, file);
8708 if (offset)
8709 {
8710 if (INTVAL (offset) >= 0)
8711 putc ('+', file);
8712 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8713 }
8714 }
8715 else if (offset)
8716 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8717 else
8718 putc ('0', file);
8719
8720 if (index)
8721 {
8722 putc ('+', file);
8723 print_reg (index, 0, file);
8724 if (scale != 1)
8725 fprintf (file, "*%d", scale);
8726 }
8727 putc (']', file);
8728 }
8729 }
8730 }
8731
8732 bool
8733 output_addr_const_extra (FILE *file, rtx x)
8734 {
8735 rtx op;
8736
8737 if (GET_CODE (x) != UNSPEC)
8738 return false;
8739
8740 op = XVECEXP (x, 0, 0);
8741 switch (XINT (x, 1))
8742 {
8743 case UNSPEC_GOTTPOFF:
8744 output_addr_const (file, op);
8745 /* FIXME: This might be @TPOFF in Sun ld. */
8746 fputs ("@GOTTPOFF", file);
8747 break;
8748 case UNSPEC_TPOFF:
8749 output_addr_const (file, op);
8750 fputs ("@TPOFF", file);
8751 break;
8752 case UNSPEC_NTPOFF:
8753 output_addr_const (file, op);
8754 if (TARGET_64BIT)
8755 fputs ("@TPOFF", file);
8756 else
8757 fputs ("@NTPOFF", file);
8758 break;
8759 case UNSPEC_DTPOFF:
8760 output_addr_const (file, op);
8761 fputs ("@DTPOFF", file);
8762 break;
8763 case UNSPEC_GOTNTPOFF:
8764 output_addr_const (file, op);
8765 if (TARGET_64BIT)
8766 fputs ("@GOTTPOFF(%rip)", file);
8767 else
8768 fputs ("@GOTNTPOFF", file);
8769 break;
8770 case UNSPEC_INDNTPOFF:
8771 output_addr_const (file, op);
8772 fputs ("@INDNTPOFF", file);
8773 break;
8774
8775 default:
8776 return false;
8777 }
8778
8779 return true;
8780 }
8781 \f
8782 /* Split one or more DImode RTL references into pairs of SImode
8783 references. The RTL can be REG, offsettable MEM, integer constant, or
8784 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8785 split and "num" is its length. lo_half and hi_half are output arrays
8786 that parallel "operands". */
8787
8788 void
8789 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8790 {
8791 while (num--)
8792 {
8793 rtx op = operands[num];
8794
8795 /* simplify_subreg refuse to split volatile memory addresses,
8796 but we still have to handle it. */
8797 if (MEM_P (op))
8798 {
8799 lo_half[num] = adjust_address (op, SImode, 0);
8800 hi_half[num] = adjust_address (op, SImode, 4);
8801 }
8802 else
8803 {
8804 lo_half[num] = simplify_gen_subreg (SImode, op,
8805 GET_MODE (op) == VOIDmode
8806 ? DImode : GET_MODE (op), 0);
8807 hi_half[num] = simplify_gen_subreg (SImode, op,
8808 GET_MODE (op) == VOIDmode
8809 ? DImode : GET_MODE (op), 4);
8810 }
8811 }
8812 }
8813 /* Split one or more TImode RTL references into pairs of DImode
8814 references. The RTL can be REG, offsettable MEM, integer constant, or
8815 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8816 split and "num" is its length. lo_half and hi_half are output arrays
8817 that parallel "operands". */
8818
8819 void
8820 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8821 {
8822 while (num--)
8823 {
8824 rtx op = operands[num];
8825
8826 /* simplify_subreg refuse to split volatile memory addresses, but we
8827 still have to handle it. */
8828 if (MEM_P (op))
8829 {
8830 lo_half[num] = adjust_address (op, DImode, 0);
8831 hi_half[num] = adjust_address (op, DImode, 8);
8832 }
8833 else
8834 {
8835 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8836 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8837 }
8838 }
8839 }
8840 \f
8841 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8842 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8843 is the expression of the binary operation. The output may either be
8844 emitted here, or returned to the caller, like all output_* functions.
8845
8846 There is no guarantee that the operands are the same mode, as they
8847 might be within FLOAT or FLOAT_EXTEND expressions. */
8848
8849 #ifndef SYSV386_COMPAT
8850 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8851 wants to fix the assemblers because that causes incompatibility
8852 with gcc. No-one wants to fix gcc because that causes
8853 incompatibility with assemblers... You can use the option of
8854 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8855 #define SYSV386_COMPAT 1
8856 #endif
8857
8858 const char *
8859 output_387_binary_op (rtx insn, rtx *operands)
8860 {
8861 static char buf[30];
8862 const char *p;
8863 const char *ssep;
8864 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8865
8866 #ifdef ENABLE_CHECKING
8867 /* Even if we do not want to check the inputs, this documents input
8868 constraints. Which helps in understanding the following code. */
8869 if (STACK_REG_P (operands[0])
8870 && ((REG_P (operands[1])
8871 && REGNO (operands[0]) == REGNO (operands[1])
8872 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8873 || (REG_P (operands[2])
8874 && REGNO (operands[0]) == REGNO (operands[2])
8875 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8876 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8877 ; /* ok */
8878 else
8879 gcc_assert (is_sse);
8880 #endif
8881
8882 switch (GET_CODE (operands[3]))
8883 {
8884 case PLUS:
8885 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8886 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8887 p = "fiadd";
8888 else
8889 p = "fadd";
8890 ssep = "add";
8891 break;
8892
8893 case MINUS:
8894 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8895 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8896 p = "fisub";
8897 else
8898 p = "fsub";
8899 ssep = "sub";
8900 break;
8901
8902 case MULT:
8903 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8904 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8905 p = "fimul";
8906 else
8907 p = "fmul";
8908 ssep = "mul";
8909 break;
8910
8911 case DIV:
8912 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8913 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8914 p = "fidiv";
8915 else
8916 p = "fdiv";
8917 ssep = "div";
8918 break;
8919
8920 default:
8921 gcc_unreachable ();
8922 }
8923
8924 if (is_sse)
8925 {
8926 strcpy (buf, ssep);
8927 if (GET_MODE (operands[0]) == SFmode)
8928 strcat (buf, "ss\t{%2, %0|%0, %2}");
8929 else
8930 strcat (buf, "sd\t{%2, %0|%0, %2}");
8931 return buf;
8932 }
8933 strcpy (buf, p);
8934
8935 switch (GET_CODE (operands[3]))
8936 {
8937 case MULT:
8938 case PLUS:
8939 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8940 {
8941 rtx temp = operands[2];
8942 operands[2] = operands[1];
8943 operands[1] = temp;
8944 }
8945
8946 /* know operands[0] == operands[1]. */
8947
8948 if (MEM_P (operands[2]))
8949 {
8950 p = "%z2\t%2";
8951 break;
8952 }
8953
8954 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8955 {
8956 if (STACK_TOP_P (operands[0]))
8957 /* How is it that we are storing to a dead operand[2]?
8958 Well, presumably operands[1] is dead too. We can't
8959 store the result to st(0) as st(0) gets popped on this
8960 instruction. Instead store to operands[2] (which I
8961 think has to be st(1)). st(1) will be popped later.
8962 gcc <= 2.8.1 didn't have this check and generated
8963 assembly code that the Unixware assembler rejected. */
8964 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8965 else
8966 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8967 break;
8968 }
8969
8970 if (STACK_TOP_P (operands[0]))
8971 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8972 else
8973 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8974 break;
8975
8976 case MINUS:
8977 case DIV:
8978 if (MEM_P (operands[1]))
8979 {
8980 p = "r%z1\t%1";
8981 break;
8982 }
8983
8984 if (MEM_P (operands[2]))
8985 {
8986 p = "%z2\t%2";
8987 break;
8988 }
8989
8990 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8991 {
8992 #if SYSV386_COMPAT
8993 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8994 derived assemblers, confusingly reverse the direction of
8995 the operation for fsub{r} and fdiv{r} when the
8996 destination register is not st(0). The Intel assembler
8997 doesn't have this brain damage. Read !SYSV386_COMPAT to
8998 figure out what the hardware really does. */
8999 if (STACK_TOP_P (operands[0]))
9000 p = "{p\t%0, %2|rp\t%2, %0}";
9001 else
9002 p = "{rp\t%2, %0|p\t%0, %2}";
9003 #else
9004 if (STACK_TOP_P (operands[0]))
9005 /* As above for fmul/fadd, we can't store to st(0). */
9006 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9007 else
9008 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9009 #endif
9010 break;
9011 }
9012
9013 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9014 {
9015 #if SYSV386_COMPAT
9016 if (STACK_TOP_P (operands[0]))
9017 p = "{rp\t%0, %1|p\t%1, %0}";
9018 else
9019 p = "{p\t%1, %0|rp\t%0, %1}";
9020 #else
9021 if (STACK_TOP_P (operands[0]))
9022 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9023 else
9024 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9025 #endif
9026 break;
9027 }
9028
9029 if (STACK_TOP_P (operands[0]))
9030 {
9031 if (STACK_TOP_P (operands[1]))
9032 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9033 else
9034 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9035 break;
9036 }
9037 else if (STACK_TOP_P (operands[1]))
9038 {
9039 #if SYSV386_COMPAT
9040 p = "{\t%1, %0|r\t%0, %1}";
9041 #else
9042 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9043 #endif
9044 }
9045 else
9046 {
9047 #if SYSV386_COMPAT
9048 p = "{r\t%2, %0|\t%0, %2}";
9049 #else
9050 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9051 #endif
9052 }
9053 break;
9054
9055 default:
9056 gcc_unreachable ();
9057 }
9058
9059 strcat (buf, p);
9060 return buf;
9061 }
9062
9063 /* Return needed mode for entity in optimize_mode_switching pass. */
9064
9065 int
9066 ix86_mode_needed (int entity, rtx insn)
9067 {
9068 enum attr_i387_cw mode;
9069
9070 /* The mode UNINITIALIZED is used to store control word after a
9071 function call or ASM pattern. The mode ANY specify that function
9072 has no requirements on the control word and make no changes in the
9073 bits we are interested in. */
9074
9075 if (CALL_P (insn)
9076 || (NONJUMP_INSN_P (insn)
9077 && (asm_noperands (PATTERN (insn)) >= 0
9078 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9079 return I387_CW_UNINITIALIZED;
9080
9081 if (recog_memoized (insn) < 0)
9082 return I387_CW_ANY;
9083
9084 mode = get_attr_i387_cw (insn);
9085
9086 switch (entity)
9087 {
9088 case I387_TRUNC:
9089 if (mode == I387_CW_TRUNC)
9090 return mode;
9091 break;
9092
9093 case I387_FLOOR:
9094 if (mode == I387_CW_FLOOR)
9095 return mode;
9096 break;
9097
9098 case I387_CEIL:
9099 if (mode == I387_CW_CEIL)
9100 return mode;
9101 break;
9102
9103 case I387_MASK_PM:
9104 if (mode == I387_CW_MASK_PM)
9105 return mode;
9106 break;
9107
9108 default:
9109 gcc_unreachable ();
9110 }
9111
9112 return I387_CW_ANY;
9113 }
9114
9115 /* Output code to initialize control word copies used by trunc?f?i and
9116 rounding patterns. CURRENT_MODE is set to current control word,
9117 while NEW_MODE is set to new control word. */
9118
9119 void
9120 emit_i387_cw_initialization (int mode)
9121 {
9122 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9123 rtx new_mode;
9124
9125 int slot;
9126
9127 rtx reg = gen_reg_rtx (HImode);
9128
9129 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9130 emit_move_insn (reg, copy_rtx (stored_mode));
9131
9132 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9133 {
9134 switch (mode)
9135 {
9136 case I387_CW_TRUNC:
9137 /* round toward zero (truncate) */
9138 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9139 slot = SLOT_CW_TRUNC;
9140 break;
9141
9142 case I387_CW_FLOOR:
9143 /* round down toward -oo */
9144 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9145 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9146 slot = SLOT_CW_FLOOR;
9147 break;
9148
9149 case I387_CW_CEIL:
9150 /* round up toward +oo */
9151 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9152 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9153 slot = SLOT_CW_CEIL;
9154 break;
9155
9156 case I387_CW_MASK_PM:
9157 /* mask precision exception for nearbyint() */
9158 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9159 slot = SLOT_CW_MASK_PM;
9160 break;
9161
9162 default:
9163 gcc_unreachable ();
9164 }
9165 }
9166 else
9167 {
9168 switch (mode)
9169 {
9170 case I387_CW_TRUNC:
9171 /* round toward zero (truncate) */
9172 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9173 slot = SLOT_CW_TRUNC;
9174 break;
9175
9176 case I387_CW_FLOOR:
9177 /* round down toward -oo */
9178 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9179 slot = SLOT_CW_FLOOR;
9180 break;
9181
9182 case I387_CW_CEIL:
9183 /* round up toward +oo */
9184 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9185 slot = SLOT_CW_CEIL;
9186 break;
9187
9188 case I387_CW_MASK_PM:
9189 /* mask precision exception for nearbyint() */
9190 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9191 slot = SLOT_CW_MASK_PM;
9192 break;
9193
9194 default:
9195 gcc_unreachable ();
9196 }
9197 }
9198
9199 gcc_assert (slot < MAX_386_STACK_LOCALS);
9200
9201 new_mode = assign_386_stack_local (HImode, slot);
9202 emit_move_insn (new_mode, reg);
9203 }
9204
9205 /* Output code for INSN to convert a float to a signed int. OPERANDS
9206 are the insn operands. The output may be [HSD]Imode and the input
9207 operand may be [SDX]Fmode. */
9208
9209 const char *
9210 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9211 {
9212 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9213 int dimode_p = GET_MODE (operands[0]) == DImode;
9214 int round_mode = get_attr_i387_cw (insn);
9215
9216 /* Jump through a hoop or two for DImode, since the hardware has no
9217 non-popping instruction. We used to do this a different way, but
9218 that was somewhat fragile and broke with post-reload splitters. */
9219 if ((dimode_p || fisttp) && !stack_top_dies)
9220 output_asm_insn ("fld\t%y1", operands);
9221
9222 gcc_assert (STACK_TOP_P (operands[1]));
9223 gcc_assert (MEM_P (operands[0]));
9224
9225 if (fisttp)
9226 output_asm_insn ("fisttp%z0\t%0", operands);
9227 else
9228 {
9229 if (round_mode != I387_CW_ANY)
9230 output_asm_insn ("fldcw\t%3", operands);
9231 if (stack_top_dies || dimode_p)
9232 output_asm_insn ("fistp%z0\t%0", operands);
9233 else
9234 output_asm_insn ("fist%z0\t%0", operands);
9235 if (round_mode != I387_CW_ANY)
9236 output_asm_insn ("fldcw\t%2", operands);
9237 }
9238
9239 return "";
9240 }
9241
9242 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9243 have the values zero or one, indicates the ffreep insn's operand
9244 from the OPERANDS array. */
9245
9246 static const char *
9247 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9248 {
9249 if (TARGET_USE_FFREEP)
9250 #if HAVE_AS_IX86_FFREEP
9251 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9252 #else
9253 {
9254 static char retval[] = ".word\t0xc_df";
9255 int regno = REGNO (operands[opno]);
9256
9257 gcc_assert (FP_REGNO_P (regno));
9258
9259 retval[9] = '0' + (regno - FIRST_STACK_REG);
9260 return retval;
9261 }
9262 #endif
9263
9264 return opno ? "fstp\t%y1" : "fstp\t%y0";
9265 }
9266
9267
9268 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9269 should be used. UNORDERED_P is true when fucom should be used. */
9270
9271 const char *
9272 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9273 {
9274 int stack_top_dies;
9275 rtx cmp_op0, cmp_op1;
9276 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9277
9278 if (eflags_p)
9279 {
9280 cmp_op0 = operands[0];
9281 cmp_op1 = operands[1];
9282 }
9283 else
9284 {
9285 cmp_op0 = operands[1];
9286 cmp_op1 = operands[2];
9287 }
9288
9289 if (is_sse)
9290 {
9291 if (GET_MODE (operands[0]) == SFmode)
9292 if (unordered_p)
9293 return "ucomiss\t{%1, %0|%0, %1}";
9294 else
9295 return "comiss\t{%1, %0|%0, %1}";
9296 else
9297 if (unordered_p)
9298 return "ucomisd\t{%1, %0|%0, %1}";
9299 else
9300 return "comisd\t{%1, %0|%0, %1}";
9301 }
9302
9303 gcc_assert (STACK_TOP_P (cmp_op0));
9304
9305 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9306
9307 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9308 {
9309 if (stack_top_dies)
9310 {
9311 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9312 return output_387_ffreep (operands, 1);
9313 }
9314 else
9315 return "ftst\n\tfnstsw\t%0";
9316 }
9317
9318 if (STACK_REG_P (cmp_op1)
9319 && stack_top_dies
9320 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9321 && REGNO (cmp_op1) != FIRST_STACK_REG)
9322 {
9323 /* If both the top of the 387 stack dies, and the other operand
9324 is also a stack register that dies, then this must be a
9325 `fcompp' float compare */
9326
9327 if (eflags_p)
9328 {
9329 /* There is no double popping fcomi variant. Fortunately,
9330 eflags is immune from the fstp's cc clobbering. */
9331 if (unordered_p)
9332 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9333 else
9334 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9335 return output_387_ffreep (operands, 0);
9336 }
9337 else
9338 {
9339 if (unordered_p)
9340 return "fucompp\n\tfnstsw\t%0";
9341 else
9342 return "fcompp\n\tfnstsw\t%0";
9343 }
9344 }
9345 else
9346 {
9347 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9348
9349 static const char * const alt[16] =
9350 {
9351 "fcom%z2\t%y2\n\tfnstsw\t%0",
9352 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9353 "fucom%z2\t%y2\n\tfnstsw\t%0",
9354 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9355
9356 "ficom%z2\t%y2\n\tfnstsw\t%0",
9357 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9358 NULL,
9359 NULL,
9360
9361 "fcomi\t{%y1, %0|%0, %y1}",
9362 "fcomip\t{%y1, %0|%0, %y1}",
9363 "fucomi\t{%y1, %0|%0, %y1}",
9364 "fucomip\t{%y1, %0|%0, %y1}",
9365
9366 NULL,
9367 NULL,
9368 NULL,
9369 NULL
9370 };
9371
9372 int mask;
9373 const char *ret;
9374
9375 mask = eflags_p << 3;
9376 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9377 mask |= unordered_p << 1;
9378 mask |= stack_top_dies;
9379
9380 gcc_assert (mask < 16);
9381 ret = alt[mask];
9382 gcc_assert (ret);
9383
9384 return ret;
9385 }
9386 }
9387
9388 void
9389 ix86_output_addr_vec_elt (FILE *file, int value)
9390 {
9391 const char *directive = ASM_LONG;
9392
9393 #ifdef ASM_QUAD
9394 if (TARGET_64BIT)
9395 directive = ASM_QUAD;
9396 #else
9397 gcc_assert (!TARGET_64BIT);
9398 #endif
9399
9400 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9401 }
9402
9403 void
9404 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9405 {
9406 const char *directive = ASM_LONG;
9407
9408 #ifdef ASM_QUAD
9409 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9410 directive = ASM_QUAD;
9411 #else
9412 gcc_assert (!TARGET_64BIT);
9413 #endif
9414 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9415 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9416 fprintf (file, "%s%s%d-%s%d\n",
9417 directive, LPREFIX, value, LPREFIX, rel);
9418 else if (HAVE_AS_GOTOFF_IN_DATA)
9419 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9420 #if TARGET_MACHO
9421 else if (TARGET_MACHO)
9422 {
9423 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9424 machopic_output_function_base_name (file);
9425 fprintf(file, "\n");
9426 }
9427 #endif
9428 else
9429 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9430 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9431 }
9432 \f
9433 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9434 for the target. */
9435
9436 void
9437 ix86_expand_clear (rtx dest)
9438 {
9439 rtx tmp;
9440
9441 /* We play register width games, which are only valid after reload. */
9442 gcc_assert (reload_completed);
9443
9444 /* Avoid HImode and its attendant prefix byte. */
9445 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9446 dest = gen_rtx_REG (SImode, REGNO (dest));
9447
9448 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9449
9450 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9451 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9452 {
9453 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9454 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9455 }
9456
9457 emit_insn (tmp);
9458 }
9459
9460 /* X is an unchanging MEM. If it is a constant pool reference, return
9461 the constant pool rtx, else NULL. */
9462
9463 rtx
9464 maybe_get_pool_constant (rtx x)
9465 {
9466 x = ix86_delegitimize_address (XEXP (x, 0));
9467
9468 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9469 return get_pool_constant (x);
9470
9471 return NULL_RTX;
9472 }
9473
9474 void
9475 ix86_expand_move (enum machine_mode mode, rtx operands[])
9476 {
9477 int strict = (reload_in_progress || reload_completed);
9478 rtx op0, op1;
9479 enum tls_model model;
9480
9481 op0 = operands[0];
9482 op1 = operands[1];
9483
9484 if (GET_CODE (op1) == SYMBOL_REF)
9485 {
9486 model = SYMBOL_REF_TLS_MODEL (op1);
9487 if (model)
9488 {
9489 op1 = legitimize_tls_address (op1, model, true);
9490 op1 = force_operand (op1, op0);
9491 if (op1 == op0)
9492 return;
9493 }
9494 }
9495 else if (GET_CODE (op1) == CONST
9496 && GET_CODE (XEXP (op1, 0)) == PLUS
9497 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9498 {
9499 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9500 if (model)
9501 {
9502 rtx addend = XEXP (XEXP (op1, 0), 1);
9503 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9504 op1 = force_operand (op1, NULL);
9505 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9506 op0, 1, OPTAB_DIRECT);
9507 if (op1 == op0)
9508 return;
9509 }
9510 }
9511
9512 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9513 {
9514 if (TARGET_MACHO && !TARGET_64BIT)
9515 {
9516 #if TARGET_MACHO
9517 if (MACHOPIC_PURE)
9518 {
9519 rtx temp = ((reload_in_progress
9520 || ((op0 && REG_P (op0))
9521 && mode == Pmode))
9522 ? op0 : gen_reg_rtx (Pmode));
9523 op1 = machopic_indirect_data_reference (op1, temp);
9524 op1 = machopic_legitimize_pic_address (op1, mode,
9525 temp == op1 ? 0 : temp);
9526 }
9527 else if (MACHOPIC_INDIRECT)
9528 op1 = machopic_indirect_data_reference (op1, 0);
9529 if (op0 == op1)
9530 return;
9531 #endif
9532 }
9533 else
9534 {
9535 if (MEM_P (op0))
9536 op1 = force_reg (Pmode, op1);
9537 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9538 {
9539 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9540 op1 = legitimize_pic_address (op1, reg);
9541 if (op0 == op1)
9542 return;
9543 }
9544 }
9545 }
9546 else
9547 {
9548 if (MEM_P (op0)
9549 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9550 || !push_operand (op0, mode))
9551 && MEM_P (op1))
9552 op1 = force_reg (mode, op1);
9553
9554 if (push_operand (op0, mode)
9555 && ! general_no_elim_operand (op1, mode))
9556 op1 = copy_to_mode_reg (mode, op1);
9557
9558 /* Force large constants in 64bit compilation into register
9559 to get them CSEed. */
9560 if (TARGET_64BIT && mode == DImode
9561 && immediate_operand (op1, mode)
9562 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9563 && !register_operand (op0, mode)
9564 && optimize && !reload_completed && !reload_in_progress)
9565 op1 = copy_to_mode_reg (mode, op1);
9566
9567 if (FLOAT_MODE_P (mode))
9568 {
9569 /* If we are loading a floating point constant to a register,
9570 force the value to memory now, since we'll get better code
9571 out the back end. */
9572
9573 if (strict)
9574 ;
9575 else if (GET_CODE (op1) == CONST_DOUBLE)
9576 {
9577 op1 = validize_mem (force_const_mem (mode, op1));
9578 if (!register_operand (op0, mode))
9579 {
9580 rtx temp = gen_reg_rtx (mode);
9581 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9582 emit_move_insn (op0, temp);
9583 return;
9584 }
9585 }
9586 }
9587 }
9588
9589 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9590 }
9591
9592 void
9593 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9594 {
9595 rtx op0 = operands[0], op1 = operands[1];
9596
9597 /* Force constants other than zero into memory. We do not know how
9598 the instructions used to build constants modify the upper 64 bits
9599 of the register, once we have that information we may be able
9600 to handle some of them more efficiently. */
9601 if ((reload_in_progress | reload_completed) == 0
9602 && register_operand (op0, mode)
9603 && CONSTANT_P (op1)
9604 && standard_sse_constant_p (op1) <= 0)
9605 op1 = validize_mem (force_const_mem (mode, op1));
9606
9607 /* Make operand1 a register if it isn't already. */
9608 if (!no_new_pseudos
9609 && !register_operand (op0, mode)
9610 && !register_operand (op1, mode))
9611 {
9612 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9613 return;
9614 }
9615
9616 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9617 }
9618
9619 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9620 straight to ix86_expand_vector_move. */
9621 /* Code generation for scalar reg-reg moves of single and double precision data:
9622 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9623 movaps reg, reg
9624 else
9625 movss reg, reg
9626 if (x86_sse_partial_reg_dependency == true)
9627 movapd reg, reg
9628 else
9629 movsd reg, reg
9630
9631 Code generation for scalar loads of double precision data:
9632 if (x86_sse_split_regs == true)
9633 movlpd mem, reg (gas syntax)
9634 else
9635 movsd mem, reg
9636
9637 Code generation for unaligned packed loads of single precision data
9638 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9639 if (x86_sse_unaligned_move_optimal)
9640 movups mem, reg
9641
9642 if (x86_sse_partial_reg_dependency == true)
9643 {
9644 xorps reg, reg
9645 movlps mem, reg
9646 movhps mem+8, reg
9647 }
9648 else
9649 {
9650 movlps mem, reg
9651 movhps mem+8, reg
9652 }
9653
9654 Code generation for unaligned packed loads of double precision data
9655 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9656 if (x86_sse_unaligned_move_optimal)
9657 movupd mem, reg
9658
9659 if (x86_sse_split_regs == true)
9660 {
9661 movlpd mem, reg
9662 movhpd mem+8, reg
9663 }
9664 else
9665 {
9666 movsd mem, reg
9667 movhpd mem+8, reg
9668 }
9669 */
9670
9671 void
9672 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9673 {
9674 rtx op0, op1, m;
9675
9676 op0 = operands[0];
9677 op1 = operands[1];
9678
9679 if (MEM_P (op1))
9680 {
9681 /* If we're optimizing for size, movups is the smallest. */
9682 if (optimize_size)
9683 {
9684 op0 = gen_lowpart (V4SFmode, op0);
9685 op1 = gen_lowpart (V4SFmode, op1);
9686 emit_insn (gen_sse_movups (op0, op1));
9687 return;
9688 }
9689
9690 /* ??? If we have typed data, then it would appear that using
9691 movdqu is the only way to get unaligned data loaded with
9692 integer type. */
9693 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9694 {
9695 op0 = gen_lowpart (V16QImode, op0);
9696 op1 = gen_lowpart (V16QImode, op1);
9697 emit_insn (gen_sse2_movdqu (op0, op1));
9698 return;
9699 }
9700
9701 if (TARGET_SSE2 && mode == V2DFmode)
9702 {
9703 rtx zero;
9704
9705 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9706 {
9707 op0 = gen_lowpart (V2DFmode, op0);
9708 op1 = gen_lowpart (V2DFmode, op1);
9709 emit_insn (gen_sse2_movupd (op0, op1));
9710 return;
9711 }
9712
9713 /* When SSE registers are split into halves, we can avoid
9714 writing to the top half twice. */
9715 if (TARGET_SSE_SPLIT_REGS)
9716 {
9717 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9718 zero = op0;
9719 }
9720 else
9721 {
9722 /* ??? Not sure about the best option for the Intel chips.
9723 The following would seem to satisfy; the register is
9724 entirely cleared, breaking the dependency chain. We
9725 then store to the upper half, with a dependency depth
9726 of one. A rumor has it that Intel recommends two movsd
9727 followed by an unpacklpd, but this is unconfirmed. And
9728 given that the dependency depth of the unpacklpd would
9729 still be one, I'm not sure why this would be better. */
9730 zero = CONST0_RTX (V2DFmode);
9731 }
9732
9733 m = adjust_address (op1, DFmode, 0);
9734 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9735 m = adjust_address (op1, DFmode, 8);
9736 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9737 }
9738 else
9739 {
9740 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9741 {
9742 op0 = gen_lowpart (V4SFmode, op0);
9743 op1 = gen_lowpart (V4SFmode, op1);
9744 emit_insn (gen_sse_movups (op0, op1));
9745 return;
9746 }
9747
9748 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9749 emit_move_insn (op0, CONST0_RTX (mode));
9750 else
9751 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9752
9753 if (mode != V4SFmode)
9754 op0 = gen_lowpart (V4SFmode, op0);
9755 m = adjust_address (op1, V2SFmode, 0);
9756 emit_insn (gen_sse_loadlps (op0, op0, m));
9757 m = adjust_address (op1, V2SFmode, 8);
9758 emit_insn (gen_sse_loadhps (op0, op0, m));
9759 }
9760 }
9761 else if (MEM_P (op0))
9762 {
9763 /* If we're optimizing for size, movups is the smallest. */
9764 if (optimize_size)
9765 {
9766 op0 = gen_lowpart (V4SFmode, op0);
9767 op1 = gen_lowpart (V4SFmode, op1);
9768 emit_insn (gen_sse_movups (op0, op1));
9769 return;
9770 }
9771
9772 /* ??? Similar to above, only less clear because of quote
9773 typeless stores unquote. */
9774 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9775 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9776 {
9777 op0 = gen_lowpart (V16QImode, op0);
9778 op1 = gen_lowpart (V16QImode, op1);
9779 emit_insn (gen_sse2_movdqu (op0, op1));
9780 return;
9781 }
9782
9783 if (TARGET_SSE2 && mode == V2DFmode)
9784 {
9785 m = adjust_address (op0, DFmode, 0);
9786 emit_insn (gen_sse2_storelpd (m, op1));
9787 m = adjust_address (op0, DFmode, 8);
9788 emit_insn (gen_sse2_storehpd (m, op1));
9789 }
9790 else
9791 {
9792 if (mode != V4SFmode)
9793 op1 = gen_lowpart (V4SFmode, op1);
9794 m = adjust_address (op0, V2SFmode, 0);
9795 emit_insn (gen_sse_storelps (m, op1));
9796 m = adjust_address (op0, V2SFmode, 8);
9797 emit_insn (gen_sse_storehps (m, op1));
9798 }
9799 }
9800 else
9801 gcc_unreachable ();
9802 }
9803
9804 /* Expand a push in MODE. This is some mode for which we do not support
9805 proper push instructions, at least from the registers that we expect
9806 the value to live in. */
9807
9808 void
9809 ix86_expand_push (enum machine_mode mode, rtx x)
9810 {
9811 rtx tmp;
9812
9813 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9814 GEN_INT (-GET_MODE_SIZE (mode)),
9815 stack_pointer_rtx, 1, OPTAB_DIRECT);
9816 if (tmp != stack_pointer_rtx)
9817 emit_move_insn (stack_pointer_rtx, tmp);
9818
9819 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9820 emit_move_insn (tmp, x);
9821 }
9822
9823 /* Helper function of ix86_fixup_binary_operands to canonicalize
9824 operand order. Returns true if the operands should be swapped. */
9825
9826 static bool
9827 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9828 rtx operands[])
9829 {
9830 rtx dst = operands[0];
9831 rtx src1 = operands[1];
9832 rtx src2 = operands[2];
9833
9834 /* If the operation is not commutative, we can't do anything. */
9835 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9836 return false;
9837
9838 /* Highest priority is that src1 should match dst. */
9839 if (rtx_equal_p (dst, src1))
9840 return false;
9841 if (rtx_equal_p (dst, src2))
9842 return true;
9843
9844 /* Next highest priority is that immediate constants come second. */
9845 if (immediate_operand (src2, mode))
9846 return false;
9847 if (immediate_operand (src1, mode))
9848 return true;
9849
9850 /* Lowest priority is that memory references should come second. */
9851 if (MEM_P (src2))
9852 return false;
9853 if (MEM_P (src1))
9854 return true;
9855
9856 return false;
9857 }
9858
9859
9860 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9861 destination to use for the operation. If different from the true
9862 destination in operands[0], a copy operation will be required. */
9863
9864 rtx
9865 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9866 rtx operands[])
9867 {
9868 rtx dst = operands[0];
9869 rtx src1 = operands[1];
9870 rtx src2 = operands[2];
9871
9872 /* Canonicalize operand order. */
9873 if (ix86_swap_binary_operands_p (code, mode, operands))
9874 {
9875 rtx temp = src1;
9876 src1 = src2;
9877 src2 = temp;
9878 }
9879
9880 /* Both source operands cannot be in memory. */
9881 if (MEM_P (src1) && MEM_P (src2))
9882 {
9883 /* Optimization: Only read from memory once. */
9884 if (rtx_equal_p (src1, src2))
9885 {
9886 src2 = force_reg (mode, src2);
9887 src1 = src2;
9888 }
9889 else
9890 src2 = force_reg (mode, src2);
9891 }
9892
9893 /* If the destination is memory, and we do not have matching source
9894 operands, do things in registers. */
9895 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9896 dst = gen_reg_rtx (mode);
9897
9898 /* Source 1 cannot be a constant. */
9899 if (CONSTANT_P (src1))
9900 src1 = force_reg (mode, src1);
9901
9902 /* Source 1 cannot be a non-matching memory. */
9903 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9904 src1 = force_reg (mode, src1);
9905
9906 operands[1] = src1;
9907 operands[2] = src2;
9908 return dst;
9909 }
9910
9911 /* Similarly, but assume that the destination has already been
9912 set up properly. */
9913
9914 void
9915 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9916 enum machine_mode mode, rtx operands[])
9917 {
9918 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9919 gcc_assert (dst == operands[0]);
9920 }
9921
9922 /* Attempt to expand a binary operator. Make the expansion closer to the
9923 actual machine, then just general_operand, which will allow 3 separate
9924 memory references (one output, two input) in a single insn. */
9925
9926 void
9927 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9928 rtx operands[])
9929 {
9930 rtx src1, src2, dst, op, clob;
9931
9932 dst = ix86_fixup_binary_operands (code, mode, operands);
9933 src1 = operands[1];
9934 src2 = operands[2];
9935
9936 /* Emit the instruction. */
9937
9938 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9939 if (reload_in_progress)
9940 {
9941 /* Reload doesn't know about the flags register, and doesn't know that
9942 it doesn't want to clobber it. We can only do this with PLUS. */
9943 gcc_assert (code == PLUS);
9944 emit_insn (op);
9945 }
9946 else
9947 {
9948 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9949 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9950 }
9951
9952 /* Fix up the destination if needed. */
9953 if (dst != operands[0])
9954 emit_move_insn (operands[0], dst);
9955 }
9956
9957 /* Return TRUE or FALSE depending on whether the binary operator meets the
9958 appropriate constraints. */
9959
9960 int
9961 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9962 rtx operands[3])
9963 {
9964 rtx dst = operands[0];
9965 rtx src1 = operands[1];
9966 rtx src2 = operands[2];
9967
9968 /* Both source operands cannot be in memory. */
9969 if (MEM_P (src1) && MEM_P (src2))
9970 return 0;
9971
9972 /* Canonicalize operand order for commutative operators. */
9973 if (ix86_swap_binary_operands_p (code, mode, operands))
9974 {
9975 rtx temp = src1;
9976 src1 = src2;
9977 src2 = temp;
9978 }
9979
9980 /* If the destination is memory, we must have a matching source operand. */
9981 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9982 return 0;
9983
9984 /* Source 1 cannot be a constant. */
9985 if (CONSTANT_P (src1))
9986 return 0;
9987
9988 /* Source 1 cannot be a non-matching memory. */
9989 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9990 return 0;
9991
9992 return 1;
9993 }
9994
9995 /* Attempt to expand a unary operator. Make the expansion closer to the
9996 actual machine, then just general_operand, which will allow 2 separate
9997 memory references (one output, one input) in a single insn. */
9998
9999 void
10000 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10001 rtx operands[])
10002 {
10003 int matching_memory;
10004 rtx src, dst, op, clob;
10005
10006 dst = operands[0];
10007 src = operands[1];
10008
10009 /* If the destination is memory, and we do not have matching source
10010 operands, do things in registers. */
10011 matching_memory = 0;
10012 if (MEM_P (dst))
10013 {
10014 if (rtx_equal_p (dst, src))
10015 matching_memory = 1;
10016 else
10017 dst = gen_reg_rtx (mode);
10018 }
10019
10020 /* When source operand is memory, destination must match. */
10021 if (MEM_P (src) && !matching_memory)
10022 src = force_reg (mode, src);
10023
10024 /* Emit the instruction. */
10025
10026 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10027 if (reload_in_progress || code == NOT)
10028 {
10029 /* Reload doesn't know about the flags register, and doesn't know that
10030 it doesn't want to clobber it. */
10031 gcc_assert (code == NOT);
10032 emit_insn (op);
10033 }
10034 else
10035 {
10036 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10037 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10038 }
10039
10040 /* Fix up the destination if needed. */
10041 if (dst != operands[0])
10042 emit_move_insn (operands[0], dst);
10043 }
10044
10045 /* Return TRUE or FALSE depending on whether the unary operator meets the
10046 appropriate constraints. */
10047
10048 int
10049 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10050 enum machine_mode mode ATTRIBUTE_UNUSED,
10051 rtx operands[2] ATTRIBUTE_UNUSED)
10052 {
10053 /* If one of operands is memory, source and destination must match. */
10054 if ((MEM_P (operands[0])
10055 || MEM_P (operands[1]))
10056 && ! rtx_equal_p (operands[0], operands[1]))
10057 return FALSE;
10058 return TRUE;
10059 }
10060
10061 /* Post-reload splitter for converting an SF or DFmode value in an
10062 SSE register into an unsigned SImode. */
10063
10064 void
10065 ix86_split_convert_uns_si_sse (rtx operands[])
10066 {
10067 enum machine_mode vecmode;
10068 rtx value, large, zero_or_two31, input, two31, x;
10069
10070 large = operands[1];
10071 zero_or_two31 = operands[2];
10072 input = operands[3];
10073 two31 = operands[4];
10074 vecmode = GET_MODE (large);
10075 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10076
10077 /* Load up the value into the low element. We must ensure that the other
10078 elements are valid floats -- zero is the easiest such value. */
10079 if (MEM_P (input))
10080 {
10081 if (vecmode == V4SFmode)
10082 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10083 else
10084 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10085 }
10086 else
10087 {
10088 input = gen_rtx_REG (vecmode, REGNO (input));
10089 emit_move_insn (value, CONST0_RTX (vecmode));
10090 if (vecmode == V4SFmode)
10091 emit_insn (gen_sse_movss (value, value, input));
10092 else
10093 emit_insn (gen_sse2_movsd (value, value, input));
10094 }
10095
10096 emit_move_insn (large, two31);
10097 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10098
10099 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10100 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10101
10102 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10103 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10104
10105 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10106 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10107
10108 large = gen_rtx_REG (V4SImode, REGNO (large));
10109 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10110
10111 x = gen_rtx_REG (V4SImode, REGNO (value));
10112 if (vecmode == V4SFmode)
10113 emit_insn (gen_sse2_cvttps2dq (x, value));
10114 else
10115 emit_insn (gen_sse2_cvttpd2dq (x, value));
10116 value = x;
10117
10118 emit_insn (gen_xorv4si3 (value, value, large));
10119 }
10120
10121 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10122 Expects the 64-bit DImode to be supplied in a pair of integral
10123 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10124 -mfpmath=sse, !optimize_size only. */
10125
10126 void
10127 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10128 {
10129 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10130 rtx int_xmm, fp_xmm;
10131 rtx biases, exponents;
10132 rtx x;
10133
10134 int_xmm = gen_reg_rtx (V4SImode);
10135 if (TARGET_INTER_UNIT_MOVES)
10136 emit_insn (gen_movdi_to_sse (int_xmm, input));
10137 else if (TARGET_SSE_SPLIT_REGS)
10138 {
10139 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10140 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10141 }
10142 else
10143 {
10144 x = gen_reg_rtx (V2DImode);
10145 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10146 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10147 }
10148
10149 x = gen_rtx_CONST_VECTOR (V4SImode,
10150 gen_rtvec (4, GEN_INT (0x43300000UL),
10151 GEN_INT (0x45300000UL),
10152 const0_rtx, const0_rtx));
10153 exponents = validize_mem (force_const_mem (V4SImode, x));
10154
10155 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10156 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10157
10158 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10159 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10160 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10161 (0x1.0p84 + double(fp_value_hi_xmm)).
10162 Note these exponents differ by 32. */
10163
10164 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10165
10166 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10167 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10168 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10169 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10170 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10171 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10172 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10173 biases = validize_mem (force_const_mem (V2DFmode, biases));
10174 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10175
10176 /* Add the upper and lower DFmode values together. */
10177 if (TARGET_SSE3)
10178 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10179 else
10180 {
10181 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10182 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10183 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10184 }
10185
10186 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10187 }
10188
10189 /* Convert an unsigned SImode value into a DFmode. Only currently used
10190 for SSE, but applicable anywhere. */
10191
10192 void
10193 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10194 {
10195 REAL_VALUE_TYPE TWO31r;
10196 rtx x, fp;
10197
10198 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10199 NULL, 1, OPTAB_DIRECT);
10200
10201 fp = gen_reg_rtx (DFmode);
10202 emit_insn (gen_floatsidf2 (fp, x));
10203
10204 real_ldexp (&TWO31r, &dconst1, 31);
10205 x = const_double_from_real_value (TWO31r, DFmode);
10206
10207 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10208 if (x != target)
10209 emit_move_insn (target, x);
10210 }
10211
10212 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10213 32-bit mode; otherwise we have a direct convert instruction. */
10214
10215 void
10216 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10217 {
10218 REAL_VALUE_TYPE TWO32r;
10219 rtx fp_lo, fp_hi, x;
10220
10221 fp_lo = gen_reg_rtx (DFmode);
10222 fp_hi = gen_reg_rtx (DFmode);
10223
10224 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10225
10226 real_ldexp (&TWO32r, &dconst1, 32);
10227 x = const_double_from_real_value (TWO32r, DFmode);
10228 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10229
10230 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10231
10232 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10233 0, OPTAB_DIRECT);
10234 if (x != target)
10235 emit_move_insn (target, x);
10236 }
10237
10238 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10239 For x86_32, -mfpmath=sse, !optimize_size only. */
10240 void
10241 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10242 {
10243 REAL_VALUE_TYPE ONE16r;
10244 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10245
10246 real_ldexp (&ONE16r, &dconst1, 16);
10247 x = const_double_from_real_value (ONE16r, SFmode);
10248 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10249 NULL, 0, OPTAB_DIRECT);
10250 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10251 NULL, 0, OPTAB_DIRECT);
10252 fp_hi = gen_reg_rtx (SFmode);
10253 fp_lo = gen_reg_rtx (SFmode);
10254 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10255 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10256 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10257 0, OPTAB_DIRECT);
10258 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10259 0, OPTAB_DIRECT);
10260 if (!rtx_equal_p (target, fp_hi))
10261 emit_move_insn (target, fp_hi);
10262 }
10263
10264 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10265 then replicate the value for all elements of the vector
10266 register. */
10267
10268 rtx
10269 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10270 {
10271 rtvec v;
10272 switch (mode)
10273 {
10274 case SFmode:
10275 if (vect)
10276 v = gen_rtvec (4, value, value, value, value);
10277 else
10278 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10279 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10280 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10281
10282 case DFmode:
10283 if (vect)
10284 v = gen_rtvec (2, value, value);
10285 else
10286 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10287 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10288
10289 default:
10290 gcc_unreachable ();
10291 }
10292 }
10293
10294 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10295 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10296 true, then replicate the mask for all elements of the vector register.
10297 If INVERT is true, then create a mask excluding the sign bit. */
10298
10299 rtx
10300 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10301 {
10302 enum machine_mode vec_mode;
10303 HOST_WIDE_INT hi, lo;
10304 int shift = 63;
10305 rtx v;
10306 rtx mask;
10307
10308 /* Find the sign bit, sign extended to 2*HWI. */
10309 if (mode == SFmode)
10310 lo = 0x80000000, hi = lo < 0;
10311 else if (HOST_BITS_PER_WIDE_INT >= 64)
10312 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10313 else
10314 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10315
10316 if (invert)
10317 lo = ~lo, hi = ~hi;
10318
10319 /* Force this value into the low part of a fp vector constant. */
10320 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10321 mask = gen_lowpart (mode, mask);
10322
10323 v = ix86_build_const_vector (mode, vect, mask);
10324 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10325 return force_reg (vec_mode, v);
10326 }
10327
10328 /* Generate code for floating point ABS or NEG. */
10329
10330 void
10331 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10332 rtx operands[])
10333 {
10334 rtx mask, set, use, clob, dst, src;
10335 bool matching_memory;
10336 bool use_sse = false;
10337 bool vector_mode = VECTOR_MODE_P (mode);
10338 enum machine_mode elt_mode = mode;
10339
10340 if (vector_mode)
10341 {
10342 elt_mode = GET_MODE_INNER (mode);
10343 use_sse = true;
10344 }
10345 else if (TARGET_SSE_MATH)
10346 use_sse = SSE_FLOAT_MODE_P (mode);
10347
10348 /* NEG and ABS performed with SSE use bitwise mask operations.
10349 Create the appropriate mask now. */
10350 if (use_sse)
10351 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10352 else
10353 mask = NULL_RTX;
10354
10355 dst = operands[0];
10356 src = operands[1];
10357
10358 /* If the destination is memory, and we don't have matching source
10359 operands or we're using the x87, do things in registers. */
10360 matching_memory = false;
10361 if (MEM_P (dst))
10362 {
10363 if (use_sse && rtx_equal_p (dst, src))
10364 matching_memory = true;
10365 else
10366 dst = gen_reg_rtx (mode);
10367 }
10368 if (MEM_P (src) && !matching_memory)
10369 src = force_reg (mode, src);
10370
10371 if (vector_mode)
10372 {
10373 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10374 set = gen_rtx_SET (VOIDmode, dst, set);
10375 emit_insn (set);
10376 }
10377 else
10378 {
10379 set = gen_rtx_fmt_e (code, mode, src);
10380 set = gen_rtx_SET (VOIDmode, dst, set);
10381 if (mask)
10382 {
10383 use = gen_rtx_USE (VOIDmode, mask);
10384 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10385 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10386 gen_rtvec (3, set, use, clob)));
10387 }
10388 else
10389 emit_insn (set);
10390 }
10391
10392 if (dst != operands[0])
10393 emit_move_insn (operands[0], dst);
10394 }
10395
10396 /* Expand a copysign operation. Special case operand 0 being a constant. */
10397
10398 void
10399 ix86_expand_copysign (rtx operands[])
10400 {
10401 enum machine_mode mode, vmode;
10402 rtx dest, op0, op1, mask, nmask;
10403
10404 dest = operands[0];
10405 op0 = operands[1];
10406 op1 = operands[2];
10407
10408 mode = GET_MODE (dest);
10409 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10410
10411 if (GET_CODE (op0) == CONST_DOUBLE)
10412 {
10413 rtvec v;
10414
10415 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10416 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10417
10418 if (op0 == CONST0_RTX (mode))
10419 op0 = CONST0_RTX (vmode);
10420 else
10421 {
10422 if (mode == SFmode)
10423 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10424 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10425 else
10426 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10427 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10428 }
10429
10430 mask = ix86_build_signbit_mask (mode, 0, 0);
10431
10432 if (mode == SFmode)
10433 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10434 else
10435 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10436 }
10437 else
10438 {
10439 nmask = ix86_build_signbit_mask (mode, 0, 1);
10440 mask = ix86_build_signbit_mask (mode, 0, 0);
10441
10442 if (mode == SFmode)
10443 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10444 else
10445 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10446 }
10447 }
10448
10449 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10450 be a constant, and so has already been expanded into a vector constant. */
10451
10452 void
10453 ix86_split_copysign_const (rtx operands[])
10454 {
10455 enum machine_mode mode, vmode;
10456 rtx dest, op0, op1, mask, x;
10457
10458 dest = operands[0];
10459 op0 = operands[1];
10460 op1 = operands[2];
10461 mask = operands[3];
10462
10463 mode = GET_MODE (dest);
10464 vmode = GET_MODE (mask);
10465
10466 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10467 x = gen_rtx_AND (vmode, dest, mask);
10468 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10469
10470 if (op0 != CONST0_RTX (vmode))
10471 {
10472 x = gen_rtx_IOR (vmode, dest, op0);
10473 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10474 }
10475 }
10476
10477 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10478 so we have to do two masks. */
10479
10480 void
10481 ix86_split_copysign_var (rtx operands[])
10482 {
10483 enum machine_mode mode, vmode;
10484 rtx dest, scratch, op0, op1, mask, nmask, x;
10485
10486 dest = operands[0];
10487 scratch = operands[1];
10488 op0 = operands[2];
10489 op1 = operands[3];
10490 nmask = operands[4];
10491 mask = operands[5];
10492
10493 mode = GET_MODE (dest);
10494 vmode = GET_MODE (mask);
10495
10496 if (rtx_equal_p (op0, op1))
10497 {
10498 /* Shouldn't happen often (it's useless, obviously), but when it does
10499 we'd generate incorrect code if we continue below. */
10500 emit_move_insn (dest, op0);
10501 return;
10502 }
10503
10504 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10505 {
10506 gcc_assert (REGNO (op1) == REGNO (scratch));
10507
10508 x = gen_rtx_AND (vmode, scratch, mask);
10509 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10510
10511 dest = mask;
10512 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10513 x = gen_rtx_NOT (vmode, dest);
10514 x = gen_rtx_AND (vmode, x, op0);
10515 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10516 }
10517 else
10518 {
10519 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10520 {
10521 x = gen_rtx_AND (vmode, scratch, mask);
10522 }
10523 else /* alternative 2,4 */
10524 {
10525 gcc_assert (REGNO (mask) == REGNO (scratch));
10526 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10527 x = gen_rtx_AND (vmode, scratch, op1);
10528 }
10529 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10530
10531 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10532 {
10533 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10534 x = gen_rtx_AND (vmode, dest, nmask);
10535 }
10536 else /* alternative 3,4 */
10537 {
10538 gcc_assert (REGNO (nmask) == REGNO (dest));
10539 dest = nmask;
10540 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10541 x = gen_rtx_AND (vmode, dest, op0);
10542 }
10543 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10544 }
10545
10546 x = gen_rtx_IOR (vmode, dest, scratch);
10547 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10548 }
10549
10550 /* Return TRUE or FALSE depending on whether the first SET in INSN
10551 has source and destination with matching CC modes, and that the
10552 CC mode is at least as constrained as REQ_MODE. */
10553
10554 int
10555 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10556 {
10557 rtx set;
10558 enum machine_mode set_mode;
10559
10560 set = PATTERN (insn);
10561 if (GET_CODE (set) == PARALLEL)
10562 set = XVECEXP (set, 0, 0);
10563 gcc_assert (GET_CODE (set) == SET);
10564 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10565
10566 set_mode = GET_MODE (SET_DEST (set));
10567 switch (set_mode)
10568 {
10569 case CCNOmode:
10570 if (req_mode != CCNOmode
10571 && (req_mode != CCmode
10572 || XEXP (SET_SRC (set), 1) != const0_rtx))
10573 return 0;
10574 break;
10575 case CCmode:
10576 if (req_mode == CCGCmode)
10577 return 0;
10578 /* FALLTHRU */
10579 case CCGCmode:
10580 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10581 return 0;
10582 /* FALLTHRU */
10583 case CCGOCmode:
10584 if (req_mode == CCZmode)
10585 return 0;
10586 /* FALLTHRU */
10587 case CCZmode:
10588 break;
10589
10590 default:
10591 gcc_unreachable ();
10592 }
10593
10594 return (GET_MODE (SET_SRC (set)) == set_mode);
10595 }
10596
10597 /* Generate insn patterns to do an integer compare of OPERANDS. */
10598
10599 static rtx
10600 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10601 {
10602 enum machine_mode cmpmode;
10603 rtx tmp, flags;
10604
10605 cmpmode = SELECT_CC_MODE (code, op0, op1);
10606 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10607
10608 /* This is very simple, but making the interface the same as in the
10609 FP case makes the rest of the code easier. */
10610 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10611 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10612
10613 /* Return the test that should be put into the flags user, i.e.
10614 the bcc, scc, or cmov instruction. */
10615 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10616 }
10617
10618 /* Figure out whether to use ordered or unordered fp comparisons.
10619 Return the appropriate mode to use. */
10620
10621 enum machine_mode
10622 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10623 {
10624 /* ??? In order to make all comparisons reversible, we do all comparisons
10625 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10626 all forms trapping and nontrapping comparisons, we can make inequality
10627 comparisons trapping again, since it results in better code when using
10628 FCOM based compares. */
10629 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10630 }
10631
10632 enum machine_mode
10633 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10634 {
10635 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10636 return ix86_fp_compare_mode (code);
10637 switch (code)
10638 {
10639 /* Only zero flag is needed. */
10640 case EQ: /* ZF=0 */
10641 case NE: /* ZF!=0 */
10642 return CCZmode;
10643 /* Codes needing carry flag. */
10644 case GEU: /* CF=0 */
10645 case GTU: /* CF=0 & ZF=0 */
10646 case LTU: /* CF=1 */
10647 case LEU: /* CF=1 | ZF=1 */
10648 return CCmode;
10649 /* Codes possibly doable only with sign flag when
10650 comparing against zero. */
10651 case GE: /* SF=OF or SF=0 */
10652 case LT: /* SF<>OF or SF=1 */
10653 if (op1 == const0_rtx)
10654 return CCGOCmode;
10655 else
10656 /* For other cases Carry flag is not required. */
10657 return CCGCmode;
10658 /* Codes doable only with sign flag when comparing
10659 against zero, but we miss jump instruction for it
10660 so we need to use relational tests against overflow
10661 that thus needs to be zero. */
10662 case GT: /* ZF=0 & SF=OF */
10663 case LE: /* ZF=1 | SF<>OF */
10664 if (op1 == const0_rtx)
10665 return CCNOmode;
10666 else
10667 return CCGCmode;
10668 /* strcmp pattern do (use flags) and combine may ask us for proper
10669 mode. */
10670 case USE:
10671 return CCmode;
10672 default:
10673 gcc_unreachable ();
10674 }
10675 }
10676
10677 /* Return the fixed registers used for condition codes. */
10678
10679 static bool
10680 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10681 {
10682 *p1 = FLAGS_REG;
10683 *p2 = FPSR_REG;
10684 return true;
10685 }
10686
10687 /* If two condition code modes are compatible, return a condition code
10688 mode which is compatible with both. Otherwise, return
10689 VOIDmode. */
10690
10691 static enum machine_mode
10692 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10693 {
10694 if (m1 == m2)
10695 return m1;
10696
10697 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10698 return VOIDmode;
10699
10700 if ((m1 == CCGCmode && m2 == CCGOCmode)
10701 || (m1 == CCGOCmode && m2 == CCGCmode))
10702 return CCGCmode;
10703
10704 switch (m1)
10705 {
10706 default:
10707 gcc_unreachable ();
10708
10709 case CCmode:
10710 case CCGCmode:
10711 case CCGOCmode:
10712 case CCNOmode:
10713 case CCZmode:
10714 switch (m2)
10715 {
10716 default:
10717 return VOIDmode;
10718
10719 case CCmode:
10720 case CCGCmode:
10721 case CCGOCmode:
10722 case CCNOmode:
10723 case CCZmode:
10724 return CCmode;
10725 }
10726
10727 case CCFPmode:
10728 case CCFPUmode:
10729 /* These are only compatible with themselves, which we already
10730 checked above. */
10731 return VOIDmode;
10732 }
10733 }
10734
10735 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10736
10737 int
10738 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10739 {
10740 enum rtx_code swapped_code = swap_condition (code);
10741 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10742 || (ix86_fp_comparison_cost (swapped_code)
10743 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10744 }
10745
10746 /* Swap, force into registers, or otherwise massage the two operands
10747 to a fp comparison. The operands are updated in place; the new
10748 comparison code is returned. */
10749
10750 static enum rtx_code
10751 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10752 {
10753 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10754 rtx op0 = *pop0, op1 = *pop1;
10755 enum machine_mode op_mode = GET_MODE (op0);
10756 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10757
10758 /* All of the unordered compare instructions only work on registers.
10759 The same is true of the fcomi compare instructions. The XFmode
10760 compare instructions require registers except when comparing
10761 against zero or when converting operand 1 from fixed point to
10762 floating point. */
10763
10764 if (!is_sse
10765 && (fpcmp_mode == CCFPUmode
10766 || (op_mode == XFmode
10767 && ! (standard_80387_constant_p (op0) == 1
10768 || standard_80387_constant_p (op1) == 1)
10769 && GET_CODE (op1) != FLOAT)
10770 || ix86_use_fcomi_compare (code)))
10771 {
10772 op0 = force_reg (op_mode, op0);
10773 op1 = force_reg (op_mode, op1);
10774 }
10775 else
10776 {
10777 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10778 things around if they appear profitable, otherwise force op0
10779 into a register. */
10780
10781 if (standard_80387_constant_p (op0) == 0
10782 || (MEM_P (op0)
10783 && ! (standard_80387_constant_p (op1) == 0
10784 || MEM_P (op1))))
10785 {
10786 rtx tmp;
10787 tmp = op0, op0 = op1, op1 = tmp;
10788 code = swap_condition (code);
10789 }
10790
10791 if (!REG_P (op0))
10792 op0 = force_reg (op_mode, op0);
10793
10794 if (CONSTANT_P (op1))
10795 {
10796 int tmp = standard_80387_constant_p (op1);
10797 if (tmp == 0)
10798 op1 = validize_mem (force_const_mem (op_mode, op1));
10799 else if (tmp == 1)
10800 {
10801 if (TARGET_CMOVE)
10802 op1 = force_reg (op_mode, op1);
10803 }
10804 else
10805 op1 = force_reg (op_mode, op1);
10806 }
10807 }
10808
10809 /* Try to rearrange the comparison to make it cheaper. */
10810 if (ix86_fp_comparison_cost (code)
10811 > ix86_fp_comparison_cost (swap_condition (code))
10812 && (REG_P (op1) || !no_new_pseudos))
10813 {
10814 rtx tmp;
10815 tmp = op0, op0 = op1, op1 = tmp;
10816 code = swap_condition (code);
10817 if (!REG_P (op0))
10818 op0 = force_reg (op_mode, op0);
10819 }
10820
10821 *pop0 = op0;
10822 *pop1 = op1;
10823 return code;
10824 }
10825
10826 /* Convert comparison codes we use to represent FP comparison to integer
10827 code that will result in proper branch. Return UNKNOWN if no such code
10828 is available. */
10829
10830 enum rtx_code
10831 ix86_fp_compare_code_to_integer (enum rtx_code code)
10832 {
10833 switch (code)
10834 {
10835 case GT:
10836 return GTU;
10837 case GE:
10838 return GEU;
10839 case ORDERED:
10840 case UNORDERED:
10841 return code;
10842 break;
10843 case UNEQ:
10844 return EQ;
10845 break;
10846 case UNLT:
10847 return LTU;
10848 break;
10849 case UNLE:
10850 return LEU;
10851 break;
10852 case LTGT:
10853 return NE;
10854 break;
10855 default:
10856 return UNKNOWN;
10857 }
10858 }
10859
10860 /* Split comparison code CODE into comparisons we can do using branch
10861 instructions. BYPASS_CODE is comparison code for branch that will
10862 branch around FIRST_CODE and SECOND_CODE. If some of branches
10863 is not required, set value to UNKNOWN.
10864 We never require more than two branches. */
10865
10866 void
10867 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10868 enum rtx_code *first_code,
10869 enum rtx_code *second_code)
10870 {
10871 *first_code = code;
10872 *bypass_code = UNKNOWN;
10873 *second_code = UNKNOWN;
10874
10875 /* The fcomi comparison sets flags as follows:
10876
10877 cmp ZF PF CF
10878 > 0 0 0
10879 < 0 0 1
10880 = 1 0 0
10881 un 1 1 1 */
10882
10883 switch (code)
10884 {
10885 case GT: /* GTU - CF=0 & ZF=0 */
10886 case GE: /* GEU - CF=0 */
10887 case ORDERED: /* PF=0 */
10888 case UNORDERED: /* PF=1 */
10889 case UNEQ: /* EQ - ZF=1 */
10890 case UNLT: /* LTU - CF=1 */
10891 case UNLE: /* LEU - CF=1 | ZF=1 */
10892 case LTGT: /* EQ - ZF=0 */
10893 break;
10894 case LT: /* LTU - CF=1 - fails on unordered */
10895 *first_code = UNLT;
10896 *bypass_code = UNORDERED;
10897 break;
10898 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10899 *first_code = UNLE;
10900 *bypass_code = UNORDERED;
10901 break;
10902 case EQ: /* EQ - ZF=1 - fails on unordered */
10903 *first_code = UNEQ;
10904 *bypass_code = UNORDERED;
10905 break;
10906 case NE: /* NE - ZF=0 - fails on unordered */
10907 *first_code = LTGT;
10908 *second_code = UNORDERED;
10909 break;
10910 case UNGE: /* GEU - CF=0 - fails on unordered */
10911 *first_code = GE;
10912 *second_code = UNORDERED;
10913 break;
10914 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10915 *first_code = GT;
10916 *second_code = UNORDERED;
10917 break;
10918 default:
10919 gcc_unreachable ();
10920 }
10921 if (!TARGET_IEEE_FP)
10922 {
10923 *second_code = UNKNOWN;
10924 *bypass_code = UNKNOWN;
10925 }
10926 }
10927
10928 /* Return cost of comparison done fcom + arithmetics operations on AX.
10929 All following functions do use number of instructions as a cost metrics.
10930 In future this should be tweaked to compute bytes for optimize_size and
10931 take into account performance of various instructions on various CPUs. */
10932 static int
10933 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10934 {
10935 if (!TARGET_IEEE_FP)
10936 return 4;
10937 /* The cost of code output by ix86_expand_fp_compare. */
10938 switch (code)
10939 {
10940 case UNLE:
10941 case UNLT:
10942 case LTGT:
10943 case GT:
10944 case GE:
10945 case UNORDERED:
10946 case ORDERED:
10947 case UNEQ:
10948 return 4;
10949 break;
10950 case LT:
10951 case NE:
10952 case EQ:
10953 case UNGE:
10954 return 5;
10955 break;
10956 case LE:
10957 case UNGT:
10958 return 6;
10959 break;
10960 default:
10961 gcc_unreachable ();
10962 }
10963 }
10964
10965 /* Return cost of comparison done using fcomi operation.
10966 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10967 static int
10968 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10969 {
10970 enum rtx_code bypass_code, first_code, second_code;
10971 /* Return arbitrarily high cost when instruction is not supported - this
10972 prevents gcc from using it. */
10973 if (!TARGET_CMOVE)
10974 return 1024;
10975 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10976 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10977 }
10978
10979 /* Return cost of comparison done using sahf operation.
10980 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10981 static int
10982 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10983 {
10984 enum rtx_code bypass_code, first_code, second_code;
10985 /* Return arbitrarily high cost when instruction is not preferred - this
10986 avoids gcc from using it. */
10987 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10988 return 1024;
10989 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10990 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10991 }
10992
10993 /* Compute cost of the comparison done using any method.
10994 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10995 static int
10996 ix86_fp_comparison_cost (enum rtx_code code)
10997 {
10998 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10999 int min;
11000
11001 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11002 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11003
11004 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11005 if (min > sahf_cost)
11006 min = sahf_cost;
11007 if (min > fcomi_cost)
11008 min = fcomi_cost;
11009 return min;
11010 }
11011
11012 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11013
11014 static rtx
11015 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11016 rtx *second_test, rtx *bypass_test)
11017 {
11018 enum machine_mode fpcmp_mode, intcmp_mode;
11019 rtx tmp, tmp2;
11020 int cost = ix86_fp_comparison_cost (code);
11021 enum rtx_code bypass_code, first_code, second_code;
11022
11023 fpcmp_mode = ix86_fp_compare_mode (code);
11024 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11025
11026 if (second_test)
11027 *second_test = NULL_RTX;
11028 if (bypass_test)
11029 *bypass_test = NULL_RTX;
11030
11031 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11032
11033 /* Do fcomi/sahf based test when profitable. */
11034 if ((TARGET_CMOVE || TARGET_SAHF)
11035 && (bypass_code == UNKNOWN || bypass_test)
11036 && (second_code == UNKNOWN || second_test)
11037 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11038 {
11039 if (TARGET_CMOVE)
11040 {
11041 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11042 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11043 tmp);
11044 emit_insn (tmp);
11045 }
11046 else
11047 {
11048 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11049 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11050 if (!scratch)
11051 scratch = gen_reg_rtx (HImode);
11052 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11053 emit_insn (gen_x86_sahf_1 (scratch));
11054 }
11055
11056 /* The FP codes work out to act like unsigned. */
11057 intcmp_mode = fpcmp_mode;
11058 code = first_code;
11059 if (bypass_code != UNKNOWN)
11060 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11061 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11062 const0_rtx);
11063 if (second_code != UNKNOWN)
11064 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11065 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11066 const0_rtx);
11067 }
11068 else
11069 {
11070 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11071 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11072 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11073 if (!scratch)
11074 scratch = gen_reg_rtx (HImode);
11075 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11076
11077 /* In the unordered case, we have to check C2 for NaN's, which
11078 doesn't happen to work out to anything nice combination-wise.
11079 So do some bit twiddling on the value we've got in AH to come
11080 up with an appropriate set of condition codes. */
11081
11082 intcmp_mode = CCNOmode;
11083 switch (code)
11084 {
11085 case GT:
11086 case UNGT:
11087 if (code == GT || !TARGET_IEEE_FP)
11088 {
11089 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11090 code = EQ;
11091 }
11092 else
11093 {
11094 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11095 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11096 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11097 intcmp_mode = CCmode;
11098 code = GEU;
11099 }
11100 break;
11101 case LT:
11102 case UNLT:
11103 if (code == LT && TARGET_IEEE_FP)
11104 {
11105 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11106 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11107 intcmp_mode = CCmode;
11108 code = EQ;
11109 }
11110 else
11111 {
11112 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11113 code = NE;
11114 }
11115 break;
11116 case GE:
11117 case UNGE:
11118 if (code == GE || !TARGET_IEEE_FP)
11119 {
11120 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11121 code = EQ;
11122 }
11123 else
11124 {
11125 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11126 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11127 GEN_INT (0x01)));
11128 code = NE;
11129 }
11130 break;
11131 case LE:
11132 case UNLE:
11133 if (code == LE && TARGET_IEEE_FP)
11134 {
11135 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11136 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11137 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11138 intcmp_mode = CCmode;
11139 code = LTU;
11140 }
11141 else
11142 {
11143 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11144 code = NE;
11145 }
11146 break;
11147 case EQ:
11148 case UNEQ:
11149 if (code == EQ && TARGET_IEEE_FP)
11150 {
11151 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11152 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11153 intcmp_mode = CCmode;
11154 code = EQ;
11155 }
11156 else
11157 {
11158 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11159 code = NE;
11160 break;
11161 }
11162 break;
11163 case NE:
11164 case LTGT:
11165 if (code == NE && TARGET_IEEE_FP)
11166 {
11167 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11168 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11169 GEN_INT (0x40)));
11170 code = NE;
11171 }
11172 else
11173 {
11174 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11175 code = EQ;
11176 }
11177 break;
11178
11179 case UNORDERED:
11180 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11181 code = NE;
11182 break;
11183 case ORDERED:
11184 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11185 code = EQ;
11186 break;
11187
11188 default:
11189 gcc_unreachable ();
11190 }
11191 }
11192
11193 /* Return the test that should be put into the flags user, i.e.
11194 the bcc, scc, or cmov instruction. */
11195 return gen_rtx_fmt_ee (code, VOIDmode,
11196 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11197 const0_rtx);
11198 }
11199
11200 rtx
11201 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11202 {
11203 rtx op0, op1, ret;
11204 op0 = ix86_compare_op0;
11205 op1 = ix86_compare_op1;
11206
11207 if (second_test)
11208 *second_test = NULL_RTX;
11209 if (bypass_test)
11210 *bypass_test = NULL_RTX;
11211
11212 if (ix86_compare_emitted)
11213 {
11214 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11215 ix86_compare_emitted = NULL_RTX;
11216 }
11217 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11218 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11219 second_test, bypass_test);
11220 else
11221 ret = ix86_expand_int_compare (code, op0, op1);
11222
11223 return ret;
11224 }
11225
11226 /* Return true if the CODE will result in nontrivial jump sequence. */
11227 bool
11228 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11229 {
11230 enum rtx_code bypass_code, first_code, second_code;
11231 if (!TARGET_CMOVE)
11232 return true;
11233 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11234 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11235 }
11236
11237 void
11238 ix86_expand_branch (enum rtx_code code, rtx label)
11239 {
11240 rtx tmp;
11241
11242 /* If we have emitted a compare insn, go straight to simple.
11243 ix86_expand_compare won't emit anything if ix86_compare_emitted
11244 is non NULL. */
11245 if (ix86_compare_emitted)
11246 goto simple;
11247
11248 switch (GET_MODE (ix86_compare_op0))
11249 {
11250 case QImode:
11251 case HImode:
11252 case SImode:
11253 simple:
11254 tmp = ix86_expand_compare (code, NULL, NULL);
11255 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11256 gen_rtx_LABEL_REF (VOIDmode, label),
11257 pc_rtx);
11258 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11259 return;
11260
11261 case SFmode:
11262 case DFmode:
11263 case XFmode:
11264 {
11265 rtvec vec;
11266 int use_fcomi;
11267 enum rtx_code bypass_code, first_code, second_code;
11268
11269 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11270 &ix86_compare_op1);
11271
11272 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11273
11274 /* Check whether we will use the natural sequence with one jump. If
11275 so, we can expand jump early. Otherwise delay expansion by
11276 creating compound insn to not confuse optimizers. */
11277 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11278 && TARGET_CMOVE)
11279 {
11280 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11281 gen_rtx_LABEL_REF (VOIDmode, label),
11282 pc_rtx, NULL_RTX, NULL_RTX);
11283 }
11284 else
11285 {
11286 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11287 ix86_compare_op0, ix86_compare_op1);
11288 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11289 gen_rtx_LABEL_REF (VOIDmode, label),
11290 pc_rtx);
11291 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11292
11293 use_fcomi = ix86_use_fcomi_compare (code);
11294 vec = rtvec_alloc (3 + !use_fcomi);
11295 RTVEC_ELT (vec, 0) = tmp;
11296 RTVEC_ELT (vec, 1)
11297 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11298 RTVEC_ELT (vec, 2)
11299 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11300 if (! use_fcomi)
11301 RTVEC_ELT (vec, 3)
11302 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11303
11304 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11305 }
11306 return;
11307 }
11308
11309 case DImode:
11310 if (TARGET_64BIT)
11311 goto simple;
11312 case TImode:
11313 /* Expand DImode branch into multiple compare+branch. */
11314 {
11315 rtx lo[2], hi[2], label2;
11316 enum rtx_code code1, code2, code3;
11317 enum machine_mode submode;
11318
11319 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11320 {
11321 tmp = ix86_compare_op0;
11322 ix86_compare_op0 = ix86_compare_op1;
11323 ix86_compare_op1 = tmp;
11324 code = swap_condition (code);
11325 }
11326 if (GET_MODE (ix86_compare_op0) == DImode)
11327 {
11328 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11329 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11330 submode = SImode;
11331 }
11332 else
11333 {
11334 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11335 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11336 submode = DImode;
11337 }
11338
11339 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11340 avoid two branches. This costs one extra insn, so disable when
11341 optimizing for size. */
11342
11343 if ((code == EQ || code == NE)
11344 && (!optimize_size
11345 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11346 {
11347 rtx xor0, xor1;
11348
11349 xor1 = hi[0];
11350 if (hi[1] != const0_rtx)
11351 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11352 NULL_RTX, 0, OPTAB_WIDEN);
11353
11354 xor0 = lo[0];
11355 if (lo[1] != const0_rtx)
11356 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11357 NULL_RTX, 0, OPTAB_WIDEN);
11358
11359 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11360 NULL_RTX, 0, OPTAB_WIDEN);
11361
11362 ix86_compare_op0 = tmp;
11363 ix86_compare_op1 = const0_rtx;
11364 ix86_expand_branch (code, label);
11365 return;
11366 }
11367
11368 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11369 op1 is a constant and the low word is zero, then we can just
11370 examine the high word. */
11371
11372 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11373 switch (code)
11374 {
11375 case LT: case LTU: case GE: case GEU:
11376 ix86_compare_op0 = hi[0];
11377 ix86_compare_op1 = hi[1];
11378 ix86_expand_branch (code, label);
11379 return;
11380 default:
11381 break;
11382 }
11383
11384 /* Otherwise, we need two or three jumps. */
11385
11386 label2 = gen_label_rtx ();
11387
11388 code1 = code;
11389 code2 = swap_condition (code);
11390 code3 = unsigned_condition (code);
11391
11392 switch (code)
11393 {
11394 case LT: case GT: case LTU: case GTU:
11395 break;
11396
11397 case LE: code1 = LT; code2 = GT; break;
11398 case GE: code1 = GT; code2 = LT; break;
11399 case LEU: code1 = LTU; code2 = GTU; break;
11400 case GEU: code1 = GTU; code2 = LTU; break;
11401
11402 case EQ: code1 = UNKNOWN; code2 = NE; break;
11403 case NE: code2 = UNKNOWN; break;
11404
11405 default:
11406 gcc_unreachable ();
11407 }
11408
11409 /*
11410 * a < b =>
11411 * if (hi(a) < hi(b)) goto true;
11412 * if (hi(a) > hi(b)) goto false;
11413 * if (lo(a) < lo(b)) goto true;
11414 * false:
11415 */
11416
11417 ix86_compare_op0 = hi[0];
11418 ix86_compare_op1 = hi[1];
11419
11420 if (code1 != UNKNOWN)
11421 ix86_expand_branch (code1, label);
11422 if (code2 != UNKNOWN)
11423 ix86_expand_branch (code2, label2);
11424
11425 ix86_compare_op0 = lo[0];
11426 ix86_compare_op1 = lo[1];
11427 ix86_expand_branch (code3, label);
11428
11429 if (code2 != UNKNOWN)
11430 emit_label (label2);
11431 return;
11432 }
11433
11434 default:
11435 gcc_unreachable ();
11436 }
11437 }
11438
11439 /* Split branch based on floating point condition. */
11440 void
11441 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11442 rtx target1, rtx target2, rtx tmp, rtx pushed)
11443 {
11444 rtx second, bypass;
11445 rtx label = NULL_RTX;
11446 rtx condition;
11447 int bypass_probability = -1, second_probability = -1, probability = -1;
11448 rtx i;
11449
11450 if (target2 != pc_rtx)
11451 {
11452 rtx tmp = target2;
11453 code = reverse_condition_maybe_unordered (code);
11454 target2 = target1;
11455 target1 = tmp;
11456 }
11457
11458 condition = ix86_expand_fp_compare (code, op1, op2,
11459 tmp, &second, &bypass);
11460
11461 /* Remove pushed operand from stack. */
11462 if (pushed)
11463 ix86_free_from_memory (GET_MODE (pushed));
11464
11465 if (split_branch_probability >= 0)
11466 {
11467 /* Distribute the probabilities across the jumps.
11468 Assume the BYPASS and SECOND to be always test
11469 for UNORDERED. */
11470 probability = split_branch_probability;
11471
11472 /* Value of 1 is low enough to make no need for probability
11473 to be updated. Later we may run some experiments and see
11474 if unordered values are more frequent in practice. */
11475 if (bypass)
11476 bypass_probability = 1;
11477 if (second)
11478 second_probability = 1;
11479 }
11480 if (bypass != NULL_RTX)
11481 {
11482 label = gen_label_rtx ();
11483 i = emit_jump_insn (gen_rtx_SET
11484 (VOIDmode, pc_rtx,
11485 gen_rtx_IF_THEN_ELSE (VOIDmode,
11486 bypass,
11487 gen_rtx_LABEL_REF (VOIDmode,
11488 label),
11489 pc_rtx)));
11490 if (bypass_probability >= 0)
11491 REG_NOTES (i)
11492 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11493 GEN_INT (bypass_probability),
11494 REG_NOTES (i));
11495 }
11496 i = emit_jump_insn (gen_rtx_SET
11497 (VOIDmode, pc_rtx,
11498 gen_rtx_IF_THEN_ELSE (VOIDmode,
11499 condition, target1, target2)));
11500 if (probability >= 0)
11501 REG_NOTES (i)
11502 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11503 GEN_INT (probability),
11504 REG_NOTES (i));
11505 if (second != NULL_RTX)
11506 {
11507 i = emit_jump_insn (gen_rtx_SET
11508 (VOIDmode, pc_rtx,
11509 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11510 target2)));
11511 if (second_probability >= 0)
11512 REG_NOTES (i)
11513 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11514 GEN_INT (second_probability),
11515 REG_NOTES (i));
11516 }
11517 if (label != NULL_RTX)
11518 emit_label (label);
11519 }
11520
11521 int
11522 ix86_expand_setcc (enum rtx_code code, rtx dest)
11523 {
11524 rtx ret, tmp, tmpreg, equiv;
11525 rtx second_test, bypass_test;
11526
11527 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11528 return 0; /* FAIL */
11529
11530 gcc_assert (GET_MODE (dest) == QImode);
11531
11532 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11533 PUT_MODE (ret, QImode);
11534
11535 tmp = dest;
11536 tmpreg = dest;
11537
11538 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11539 if (bypass_test || second_test)
11540 {
11541 rtx test = second_test;
11542 int bypass = 0;
11543 rtx tmp2 = gen_reg_rtx (QImode);
11544 if (bypass_test)
11545 {
11546 gcc_assert (!second_test);
11547 test = bypass_test;
11548 bypass = 1;
11549 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11550 }
11551 PUT_MODE (test, QImode);
11552 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11553
11554 if (bypass)
11555 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11556 else
11557 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11558 }
11559
11560 /* Attach a REG_EQUAL note describing the comparison result. */
11561 if (ix86_compare_op0 && ix86_compare_op1)
11562 {
11563 equiv = simplify_gen_relational (code, QImode,
11564 GET_MODE (ix86_compare_op0),
11565 ix86_compare_op0, ix86_compare_op1);
11566 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11567 }
11568
11569 return 1; /* DONE */
11570 }
11571
11572 /* Expand comparison setting or clearing carry flag. Return true when
11573 successful and set pop for the operation. */
11574 static bool
11575 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11576 {
11577 enum machine_mode mode =
11578 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11579
11580 /* Do not handle DImode compares that go through special path. Also we can't
11581 deal with FP compares yet. This is possible to add. */
11582 if (mode == (TARGET_64BIT ? TImode : DImode))
11583 return false;
11584 if (FLOAT_MODE_P (mode))
11585 {
11586 rtx second_test = NULL, bypass_test = NULL;
11587 rtx compare_op, compare_seq;
11588
11589 /* Shortcut: following common codes never translate into carry flag compares. */
11590 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11591 || code == ORDERED || code == UNORDERED)
11592 return false;
11593
11594 /* These comparisons require zero flag; swap operands so they won't. */
11595 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11596 && !TARGET_IEEE_FP)
11597 {
11598 rtx tmp = op0;
11599 op0 = op1;
11600 op1 = tmp;
11601 code = swap_condition (code);
11602 }
11603
11604 /* Try to expand the comparison and verify that we end up with carry flag
11605 based comparison. This is fails to be true only when we decide to expand
11606 comparison using arithmetic that is not too common scenario. */
11607 start_sequence ();
11608 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11609 &second_test, &bypass_test);
11610 compare_seq = get_insns ();
11611 end_sequence ();
11612
11613 if (second_test || bypass_test)
11614 return false;
11615 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11616 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11617 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11618 else
11619 code = GET_CODE (compare_op);
11620 if (code != LTU && code != GEU)
11621 return false;
11622 emit_insn (compare_seq);
11623 *pop = compare_op;
11624 return true;
11625 }
11626 if (!INTEGRAL_MODE_P (mode))
11627 return false;
11628 switch (code)
11629 {
11630 case LTU:
11631 case GEU:
11632 break;
11633
11634 /* Convert a==0 into (unsigned)a<1. */
11635 case EQ:
11636 case NE:
11637 if (op1 != const0_rtx)
11638 return false;
11639 op1 = const1_rtx;
11640 code = (code == EQ ? LTU : GEU);
11641 break;
11642
11643 /* Convert a>b into b<a or a>=b-1. */
11644 case GTU:
11645 case LEU:
11646 if (CONST_INT_P (op1))
11647 {
11648 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11649 /* Bail out on overflow. We still can swap operands but that
11650 would force loading of the constant into register. */
11651 if (op1 == const0_rtx
11652 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11653 return false;
11654 code = (code == GTU ? GEU : LTU);
11655 }
11656 else
11657 {
11658 rtx tmp = op1;
11659 op1 = op0;
11660 op0 = tmp;
11661 code = (code == GTU ? LTU : GEU);
11662 }
11663 break;
11664
11665 /* Convert a>=0 into (unsigned)a<0x80000000. */
11666 case LT:
11667 case GE:
11668 if (mode == DImode || op1 != const0_rtx)
11669 return false;
11670 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11671 code = (code == LT ? GEU : LTU);
11672 break;
11673 case LE:
11674 case GT:
11675 if (mode == DImode || op1 != constm1_rtx)
11676 return false;
11677 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11678 code = (code == LE ? GEU : LTU);
11679 break;
11680
11681 default:
11682 return false;
11683 }
11684 /* Swapping operands may cause constant to appear as first operand. */
11685 if (!nonimmediate_operand (op0, VOIDmode))
11686 {
11687 if (no_new_pseudos)
11688 return false;
11689 op0 = force_reg (mode, op0);
11690 }
11691 ix86_compare_op0 = op0;
11692 ix86_compare_op1 = op1;
11693 *pop = ix86_expand_compare (code, NULL, NULL);
11694 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11695 return true;
11696 }
11697
11698 int
11699 ix86_expand_int_movcc (rtx operands[])
11700 {
11701 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11702 rtx compare_seq, compare_op;
11703 rtx second_test, bypass_test;
11704 enum machine_mode mode = GET_MODE (operands[0]);
11705 bool sign_bit_compare_p = false;;
11706
11707 start_sequence ();
11708 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11709 compare_seq = get_insns ();
11710 end_sequence ();
11711
11712 compare_code = GET_CODE (compare_op);
11713
11714 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11715 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11716 sign_bit_compare_p = true;
11717
11718 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11719 HImode insns, we'd be swallowed in word prefix ops. */
11720
11721 if ((mode != HImode || TARGET_FAST_PREFIX)
11722 && (mode != (TARGET_64BIT ? TImode : DImode))
11723 && CONST_INT_P (operands[2])
11724 && CONST_INT_P (operands[3]))
11725 {
11726 rtx out = operands[0];
11727 HOST_WIDE_INT ct = INTVAL (operands[2]);
11728 HOST_WIDE_INT cf = INTVAL (operands[3]);
11729 HOST_WIDE_INT diff;
11730
11731 diff = ct - cf;
11732 /* Sign bit compares are better done using shifts than we do by using
11733 sbb. */
11734 if (sign_bit_compare_p
11735 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11736 ix86_compare_op1, &compare_op))
11737 {
11738 /* Detect overlap between destination and compare sources. */
11739 rtx tmp = out;
11740
11741 if (!sign_bit_compare_p)
11742 {
11743 bool fpcmp = false;
11744
11745 compare_code = GET_CODE (compare_op);
11746
11747 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11748 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11749 {
11750 fpcmp = true;
11751 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11752 }
11753
11754 /* To simplify rest of code, restrict to the GEU case. */
11755 if (compare_code == LTU)
11756 {
11757 HOST_WIDE_INT tmp = ct;
11758 ct = cf;
11759 cf = tmp;
11760 compare_code = reverse_condition (compare_code);
11761 code = reverse_condition (code);
11762 }
11763 else
11764 {
11765 if (fpcmp)
11766 PUT_CODE (compare_op,
11767 reverse_condition_maybe_unordered
11768 (GET_CODE (compare_op)));
11769 else
11770 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11771 }
11772 diff = ct - cf;
11773
11774 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11775 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11776 tmp = gen_reg_rtx (mode);
11777
11778 if (mode == DImode)
11779 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11780 else
11781 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11782 }
11783 else
11784 {
11785 if (code == GT || code == GE)
11786 code = reverse_condition (code);
11787 else
11788 {
11789 HOST_WIDE_INT tmp = ct;
11790 ct = cf;
11791 cf = tmp;
11792 diff = ct - cf;
11793 }
11794 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11795 ix86_compare_op1, VOIDmode, 0, -1);
11796 }
11797
11798 if (diff == 1)
11799 {
11800 /*
11801 * cmpl op0,op1
11802 * sbbl dest,dest
11803 * [addl dest, ct]
11804 *
11805 * Size 5 - 8.
11806 */
11807 if (ct)
11808 tmp = expand_simple_binop (mode, PLUS,
11809 tmp, GEN_INT (ct),
11810 copy_rtx (tmp), 1, OPTAB_DIRECT);
11811 }
11812 else if (cf == -1)
11813 {
11814 /*
11815 * cmpl op0,op1
11816 * sbbl dest,dest
11817 * orl $ct, dest
11818 *
11819 * Size 8.
11820 */
11821 tmp = expand_simple_binop (mode, IOR,
11822 tmp, GEN_INT (ct),
11823 copy_rtx (tmp), 1, OPTAB_DIRECT);
11824 }
11825 else if (diff == -1 && ct)
11826 {
11827 /*
11828 * cmpl op0,op1
11829 * sbbl dest,dest
11830 * notl dest
11831 * [addl dest, cf]
11832 *
11833 * Size 8 - 11.
11834 */
11835 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11836 if (cf)
11837 tmp = expand_simple_binop (mode, PLUS,
11838 copy_rtx (tmp), GEN_INT (cf),
11839 copy_rtx (tmp), 1, OPTAB_DIRECT);
11840 }
11841 else
11842 {
11843 /*
11844 * cmpl op0,op1
11845 * sbbl dest,dest
11846 * [notl dest]
11847 * andl cf - ct, dest
11848 * [addl dest, ct]
11849 *
11850 * Size 8 - 11.
11851 */
11852
11853 if (cf == 0)
11854 {
11855 cf = ct;
11856 ct = 0;
11857 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11858 }
11859
11860 tmp = expand_simple_binop (mode, AND,
11861 copy_rtx (tmp),
11862 gen_int_mode (cf - ct, mode),
11863 copy_rtx (tmp), 1, OPTAB_DIRECT);
11864 if (ct)
11865 tmp = expand_simple_binop (mode, PLUS,
11866 copy_rtx (tmp), GEN_INT (ct),
11867 copy_rtx (tmp), 1, OPTAB_DIRECT);
11868 }
11869
11870 if (!rtx_equal_p (tmp, out))
11871 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11872
11873 return 1; /* DONE */
11874 }
11875
11876 if (diff < 0)
11877 {
11878 HOST_WIDE_INT tmp;
11879 tmp = ct, ct = cf, cf = tmp;
11880 diff = -diff;
11881 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11882 {
11883 /* We may be reversing unordered compare to normal compare, that
11884 is not valid in general (we may convert non-trapping condition
11885 to trapping one), however on i386 we currently emit all
11886 comparisons unordered. */
11887 compare_code = reverse_condition_maybe_unordered (compare_code);
11888 code = reverse_condition_maybe_unordered (code);
11889 }
11890 else
11891 {
11892 compare_code = reverse_condition (compare_code);
11893 code = reverse_condition (code);
11894 }
11895 }
11896
11897 compare_code = UNKNOWN;
11898 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11899 && CONST_INT_P (ix86_compare_op1))
11900 {
11901 if (ix86_compare_op1 == const0_rtx
11902 && (code == LT || code == GE))
11903 compare_code = code;
11904 else if (ix86_compare_op1 == constm1_rtx)
11905 {
11906 if (code == LE)
11907 compare_code = LT;
11908 else if (code == GT)
11909 compare_code = GE;
11910 }
11911 }
11912
11913 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11914 if (compare_code != UNKNOWN
11915 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11916 && (cf == -1 || ct == -1))
11917 {
11918 /* If lea code below could be used, only optimize
11919 if it results in a 2 insn sequence. */
11920
11921 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11922 || diff == 3 || diff == 5 || diff == 9)
11923 || (compare_code == LT && ct == -1)
11924 || (compare_code == GE && cf == -1))
11925 {
11926 /*
11927 * notl op1 (if necessary)
11928 * sarl $31, op1
11929 * orl cf, op1
11930 */
11931 if (ct != -1)
11932 {
11933 cf = ct;
11934 ct = -1;
11935 code = reverse_condition (code);
11936 }
11937
11938 out = emit_store_flag (out, code, ix86_compare_op0,
11939 ix86_compare_op1, VOIDmode, 0, -1);
11940
11941 out = expand_simple_binop (mode, IOR,
11942 out, GEN_INT (cf),
11943 out, 1, OPTAB_DIRECT);
11944 if (out != operands[0])
11945 emit_move_insn (operands[0], out);
11946
11947 return 1; /* DONE */
11948 }
11949 }
11950
11951
11952 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11953 || diff == 3 || diff == 5 || diff == 9)
11954 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11955 && (mode != DImode
11956 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11957 {
11958 /*
11959 * xorl dest,dest
11960 * cmpl op1,op2
11961 * setcc dest
11962 * lea cf(dest*(ct-cf)),dest
11963 *
11964 * Size 14.
11965 *
11966 * This also catches the degenerate setcc-only case.
11967 */
11968
11969 rtx tmp;
11970 int nops;
11971
11972 out = emit_store_flag (out, code, ix86_compare_op0,
11973 ix86_compare_op1, VOIDmode, 0, 1);
11974
11975 nops = 0;
11976 /* On x86_64 the lea instruction operates on Pmode, so we need
11977 to get arithmetics done in proper mode to match. */
11978 if (diff == 1)
11979 tmp = copy_rtx (out);
11980 else
11981 {
11982 rtx out1;
11983 out1 = copy_rtx (out);
11984 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11985 nops++;
11986 if (diff & 1)
11987 {
11988 tmp = gen_rtx_PLUS (mode, tmp, out1);
11989 nops++;
11990 }
11991 }
11992 if (cf != 0)
11993 {
11994 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11995 nops++;
11996 }
11997 if (!rtx_equal_p (tmp, out))
11998 {
11999 if (nops == 1)
12000 out = force_operand (tmp, copy_rtx (out));
12001 else
12002 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12003 }
12004 if (!rtx_equal_p (out, operands[0]))
12005 emit_move_insn (operands[0], copy_rtx (out));
12006
12007 return 1; /* DONE */
12008 }
12009
12010 /*
12011 * General case: Jumpful:
12012 * xorl dest,dest cmpl op1, op2
12013 * cmpl op1, op2 movl ct, dest
12014 * setcc dest jcc 1f
12015 * decl dest movl cf, dest
12016 * andl (cf-ct),dest 1:
12017 * addl ct,dest
12018 *
12019 * Size 20. Size 14.
12020 *
12021 * This is reasonably steep, but branch mispredict costs are
12022 * high on modern cpus, so consider failing only if optimizing
12023 * for space.
12024 */
12025
12026 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12027 && BRANCH_COST >= 2)
12028 {
12029 if (cf == 0)
12030 {
12031 cf = ct;
12032 ct = 0;
12033 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
12034 /* We may be reversing unordered compare to normal compare,
12035 that is not valid in general (we may convert non-trapping
12036 condition to trapping one), however on i386 we currently
12037 emit all comparisons unordered. */
12038 code = reverse_condition_maybe_unordered (code);
12039 else
12040 {
12041 code = reverse_condition (code);
12042 if (compare_code != UNKNOWN)
12043 compare_code = reverse_condition (compare_code);
12044 }
12045 }
12046
12047 if (compare_code != UNKNOWN)
12048 {
12049 /* notl op1 (if needed)
12050 sarl $31, op1
12051 andl (cf-ct), op1
12052 addl ct, op1
12053
12054 For x < 0 (resp. x <= -1) there will be no notl,
12055 so if possible swap the constants to get rid of the
12056 complement.
12057 True/false will be -1/0 while code below (store flag
12058 followed by decrement) is 0/-1, so the constants need
12059 to be exchanged once more. */
12060
12061 if (compare_code == GE || !cf)
12062 {
12063 code = reverse_condition (code);
12064 compare_code = LT;
12065 }
12066 else
12067 {
12068 HOST_WIDE_INT tmp = cf;
12069 cf = ct;
12070 ct = tmp;
12071 }
12072
12073 out = emit_store_flag (out, code, ix86_compare_op0,
12074 ix86_compare_op1, VOIDmode, 0, -1);
12075 }
12076 else
12077 {
12078 out = emit_store_flag (out, code, ix86_compare_op0,
12079 ix86_compare_op1, VOIDmode, 0, 1);
12080
12081 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12082 copy_rtx (out), 1, OPTAB_DIRECT);
12083 }
12084
12085 out = expand_simple_binop (mode, AND, copy_rtx (out),
12086 gen_int_mode (cf - ct, mode),
12087 copy_rtx (out), 1, OPTAB_DIRECT);
12088 if (ct)
12089 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12090 copy_rtx (out), 1, OPTAB_DIRECT);
12091 if (!rtx_equal_p (out, operands[0]))
12092 emit_move_insn (operands[0], copy_rtx (out));
12093
12094 return 1; /* DONE */
12095 }
12096 }
12097
12098 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12099 {
12100 /* Try a few things more with specific constants and a variable. */
12101
12102 optab op;
12103 rtx var, orig_out, out, tmp;
12104
12105 if (BRANCH_COST <= 2)
12106 return 0; /* FAIL */
12107
12108 /* If one of the two operands is an interesting constant, load a
12109 constant with the above and mask it in with a logical operation. */
12110
12111 if (CONST_INT_P (operands[2]))
12112 {
12113 var = operands[3];
12114 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12115 operands[3] = constm1_rtx, op = and_optab;
12116 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12117 operands[3] = const0_rtx, op = ior_optab;
12118 else
12119 return 0; /* FAIL */
12120 }
12121 else if (CONST_INT_P (operands[3]))
12122 {
12123 var = operands[2];
12124 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12125 operands[2] = constm1_rtx, op = and_optab;
12126 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12127 operands[2] = const0_rtx, op = ior_optab;
12128 else
12129 return 0; /* FAIL */
12130 }
12131 else
12132 return 0; /* FAIL */
12133
12134 orig_out = operands[0];
12135 tmp = gen_reg_rtx (mode);
12136 operands[0] = tmp;
12137
12138 /* Recurse to get the constant loaded. */
12139 if (ix86_expand_int_movcc (operands) == 0)
12140 return 0; /* FAIL */
12141
12142 /* Mask in the interesting variable. */
12143 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12144 OPTAB_WIDEN);
12145 if (!rtx_equal_p (out, orig_out))
12146 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12147
12148 return 1; /* DONE */
12149 }
12150
12151 /*
12152 * For comparison with above,
12153 *
12154 * movl cf,dest
12155 * movl ct,tmp
12156 * cmpl op1,op2
12157 * cmovcc tmp,dest
12158 *
12159 * Size 15.
12160 */
12161
12162 if (! nonimmediate_operand (operands[2], mode))
12163 operands[2] = force_reg (mode, operands[2]);
12164 if (! nonimmediate_operand (operands[3], mode))
12165 operands[3] = force_reg (mode, operands[3]);
12166
12167 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12168 {
12169 rtx tmp = gen_reg_rtx (mode);
12170 emit_move_insn (tmp, operands[3]);
12171 operands[3] = tmp;
12172 }
12173 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12174 {
12175 rtx tmp = gen_reg_rtx (mode);
12176 emit_move_insn (tmp, operands[2]);
12177 operands[2] = tmp;
12178 }
12179
12180 if (! register_operand (operands[2], VOIDmode)
12181 && (mode == QImode
12182 || ! register_operand (operands[3], VOIDmode)))
12183 operands[2] = force_reg (mode, operands[2]);
12184
12185 if (mode == QImode
12186 && ! register_operand (operands[3], VOIDmode))
12187 operands[3] = force_reg (mode, operands[3]);
12188
12189 emit_insn (compare_seq);
12190 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12191 gen_rtx_IF_THEN_ELSE (mode,
12192 compare_op, operands[2],
12193 operands[3])));
12194 if (bypass_test)
12195 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12196 gen_rtx_IF_THEN_ELSE (mode,
12197 bypass_test,
12198 copy_rtx (operands[3]),
12199 copy_rtx (operands[0]))));
12200 if (second_test)
12201 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12202 gen_rtx_IF_THEN_ELSE (mode,
12203 second_test,
12204 copy_rtx (operands[2]),
12205 copy_rtx (operands[0]))));
12206
12207 return 1; /* DONE */
12208 }
12209
12210 /* Swap, force into registers, or otherwise massage the two operands
12211 to an sse comparison with a mask result. Thus we differ a bit from
12212 ix86_prepare_fp_compare_args which expects to produce a flags result.
12213
12214 The DEST operand exists to help determine whether to commute commutative
12215 operators. The POP0/POP1 operands are updated in place. The new
12216 comparison code is returned, or UNKNOWN if not implementable. */
12217
12218 static enum rtx_code
12219 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12220 rtx *pop0, rtx *pop1)
12221 {
12222 rtx tmp;
12223
12224 switch (code)
12225 {
12226 case LTGT:
12227 case UNEQ:
12228 /* We have no LTGT as an operator. We could implement it with
12229 NE & ORDERED, but this requires an extra temporary. It's
12230 not clear that it's worth it. */
12231 return UNKNOWN;
12232
12233 case LT:
12234 case LE:
12235 case UNGT:
12236 case UNGE:
12237 /* These are supported directly. */
12238 break;
12239
12240 case EQ:
12241 case NE:
12242 case UNORDERED:
12243 case ORDERED:
12244 /* For commutative operators, try to canonicalize the destination
12245 operand to be first in the comparison - this helps reload to
12246 avoid extra moves. */
12247 if (!dest || !rtx_equal_p (dest, *pop1))
12248 break;
12249 /* FALLTHRU */
12250
12251 case GE:
12252 case GT:
12253 case UNLE:
12254 case UNLT:
12255 /* These are not supported directly. Swap the comparison operands
12256 to transform into something that is supported. */
12257 tmp = *pop0;
12258 *pop0 = *pop1;
12259 *pop1 = tmp;
12260 code = swap_condition (code);
12261 break;
12262
12263 default:
12264 gcc_unreachable ();
12265 }
12266
12267 return code;
12268 }
12269
12270 /* Detect conditional moves that exactly match min/max operational
12271 semantics. Note that this is IEEE safe, as long as we don't
12272 interchange the operands.
12273
12274 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12275 and TRUE if the operation is successful and instructions are emitted. */
12276
12277 static bool
12278 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12279 rtx cmp_op1, rtx if_true, rtx if_false)
12280 {
12281 enum machine_mode mode;
12282 bool is_min;
12283 rtx tmp;
12284
12285 if (code == LT)
12286 ;
12287 else if (code == UNGE)
12288 {
12289 tmp = if_true;
12290 if_true = if_false;
12291 if_false = tmp;
12292 }
12293 else
12294 return false;
12295
12296 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12297 is_min = true;
12298 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12299 is_min = false;
12300 else
12301 return false;
12302
12303 mode = GET_MODE (dest);
12304
12305 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12306 but MODE may be a vector mode and thus not appropriate. */
12307 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12308 {
12309 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12310 rtvec v;
12311
12312 if_true = force_reg (mode, if_true);
12313 v = gen_rtvec (2, if_true, if_false);
12314 tmp = gen_rtx_UNSPEC (mode, v, u);
12315 }
12316 else
12317 {
12318 code = is_min ? SMIN : SMAX;
12319 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12320 }
12321
12322 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12323 return true;
12324 }
12325
12326 /* Expand an sse vector comparison. Return the register with the result. */
12327
12328 static rtx
12329 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12330 rtx op_true, rtx op_false)
12331 {
12332 enum machine_mode mode = GET_MODE (dest);
12333 rtx x;
12334
12335 cmp_op0 = force_reg (mode, cmp_op0);
12336 if (!nonimmediate_operand (cmp_op1, mode))
12337 cmp_op1 = force_reg (mode, cmp_op1);
12338
12339 if (optimize
12340 || reg_overlap_mentioned_p (dest, op_true)
12341 || reg_overlap_mentioned_p (dest, op_false))
12342 dest = gen_reg_rtx (mode);
12343
12344 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12345 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12346
12347 return dest;
12348 }
12349
12350 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12351 operations. This is used for both scalar and vector conditional moves. */
12352
12353 static void
12354 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12355 {
12356 enum machine_mode mode = GET_MODE (dest);
12357 rtx t2, t3, x;
12358
12359 if (op_false == CONST0_RTX (mode))
12360 {
12361 op_true = force_reg (mode, op_true);
12362 x = gen_rtx_AND (mode, cmp, op_true);
12363 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12364 }
12365 else if (op_true == CONST0_RTX (mode))
12366 {
12367 op_false = force_reg (mode, op_false);
12368 x = gen_rtx_NOT (mode, cmp);
12369 x = gen_rtx_AND (mode, x, op_false);
12370 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12371 }
12372 else
12373 {
12374 op_true = force_reg (mode, op_true);
12375 op_false = force_reg (mode, op_false);
12376
12377 t2 = gen_reg_rtx (mode);
12378 if (optimize)
12379 t3 = gen_reg_rtx (mode);
12380 else
12381 t3 = dest;
12382
12383 x = gen_rtx_AND (mode, op_true, cmp);
12384 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12385
12386 x = gen_rtx_NOT (mode, cmp);
12387 x = gen_rtx_AND (mode, x, op_false);
12388 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12389
12390 x = gen_rtx_IOR (mode, t3, t2);
12391 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12392 }
12393 }
12394
12395 /* Expand a floating-point conditional move. Return true if successful. */
12396
12397 int
12398 ix86_expand_fp_movcc (rtx operands[])
12399 {
12400 enum machine_mode mode = GET_MODE (operands[0]);
12401 enum rtx_code code = GET_CODE (operands[1]);
12402 rtx tmp, compare_op, second_test, bypass_test;
12403
12404 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12405 {
12406 enum machine_mode cmode;
12407
12408 /* Since we've no cmove for sse registers, don't force bad register
12409 allocation just to gain access to it. Deny movcc when the
12410 comparison mode doesn't match the move mode. */
12411 cmode = GET_MODE (ix86_compare_op0);
12412 if (cmode == VOIDmode)
12413 cmode = GET_MODE (ix86_compare_op1);
12414 if (cmode != mode)
12415 return 0;
12416
12417 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12418 &ix86_compare_op0,
12419 &ix86_compare_op1);
12420 if (code == UNKNOWN)
12421 return 0;
12422
12423 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12424 ix86_compare_op1, operands[2],
12425 operands[3]))
12426 return 1;
12427
12428 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12429 ix86_compare_op1, operands[2], operands[3]);
12430 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12431 return 1;
12432 }
12433
12434 /* The floating point conditional move instructions don't directly
12435 support conditions resulting from a signed integer comparison. */
12436
12437 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12438
12439 /* The floating point conditional move instructions don't directly
12440 support signed integer comparisons. */
12441
12442 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12443 {
12444 gcc_assert (!second_test && !bypass_test);
12445 tmp = gen_reg_rtx (QImode);
12446 ix86_expand_setcc (code, tmp);
12447 code = NE;
12448 ix86_compare_op0 = tmp;
12449 ix86_compare_op1 = const0_rtx;
12450 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12451 }
12452 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12453 {
12454 tmp = gen_reg_rtx (mode);
12455 emit_move_insn (tmp, operands[3]);
12456 operands[3] = tmp;
12457 }
12458 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12459 {
12460 tmp = gen_reg_rtx (mode);
12461 emit_move_insn (tmp, operands[2]);
12462 operands[2] = tmp;
12463 }
12464
12465 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12466 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12467 operands[2], operands[3])));
12468 if (bypass_test)
12469 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12470 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12471 operands[3], operands[0])));
12472 if (second_test)
12473 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12474 gen_rtx_IF_THEN_ELSE (mode, second_test,
12475 operands[2], operands[0])));
12476
12477 return 1;
12478 }
12479
12480 /* Expand a floating-point vector conditional move; a vcond operation
12481 rather than a movcc operation. */
12482
12483 bool
12484 ix86_expand_fp_vcond (rtx operands[])
12485 {
12486 enum rtx_code code = GET_CODE (operands[3]);
12487 rtx cmp;
12488
12489 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12490 &operands[4], &operands[5]);
12491 if (code == UNKNOWN)
12492 return false;
12493
12494 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12495 operands[5], operands[1], operands[2]))
12496 return true;
12497
12498 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12499 operands[1], operands[2]);
12500 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12501 return true;
12502 }
12503
12504 /* Expand a signed integral vector conditional move. */
12505
12506 bool
12507 ix86_expand_int_vcond (rtx operands[])
12508 {
12509 enum machine_mode mode = GET_MODE (operands[0]);
12510 enum rtx_code code = GET_CODE (operands[3]);
12511 bool negate = false;
12512 rtx x, cop0, cop1;
12513
12514 cop0 = operands[4];
12515 cop1 = operands[5];
12516
12517 /* Canonicalize the comparison to EQ, GT, GTU. */
12518 switch (code)
12519 {
12520 case EQ:
12521 case GT:
12522 case GTU:
12523 break;
12524
12525 case NE:
12526 case LE:
12527 case LEU:
12528 code = reverse_condition (code);
12529 negate = true;
12530 break;
12531
12532 case GE:
12533 case GEU:
12534 code = reverse_condition (code);
12535 negate = true;
12536 /* FALLTHRU */
12537
12538 case LT:
12539 case LTU:
12540 code = swap_condition (code);
12541 x = cop0, cop0 = cop1, cop1 = x;
12542 break;
12543
12544 default:
12545 gcc_unreachable ();
12546 }
12547
12548 /* Unsigned parallel compare is not supported by the hardware. Play some
12549 tricks to turn this into a signed comparison against 0. */
12550 if (code == GTU)
12551 {
12552 cop0 = force_reg (mode, cop0);
12553
12554 switch (mode)
12555 {
12556 case V4SImode:
12557 {
12558 rtx t1, t2, mask;
12559
12560 /* Perform a parallel modulo subtraction. */
12561 t1 = gen_reg_rtx (mode);
12562 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12563
12564 /* Extract the original sign bit of op0. */
12565 mask = GEN_INT (-0x80000000);
12566 mask = gen_rtx_CONST_VECTOR (mode,
12567 gen_rtvec (4, mask, mask, mask, mask));
12568 mask = force_reg (mode, mask);
12569 t2 = gen_reg_rtx (mode);
12570 emit_insn (gen_andv4si3 (t2, cop0, mask));
12571
12572 /* XOR it back into the result of the subtraction. This results
12573 in the sign bit set iff we saw unsigned underflow. */
12574 x = gen_reg_rtx (mode);
12575 emit_insn (gen_xorv4si3 (x, t1, t2));
12576
12577 code = GT;
12578 }
12579 break;
12580
12581 case V16QImode:
12582 case V8HImode:
12583 /* Perform a parallel unsigned saturating subtraction. */
12584 x = gen_reg_rtx (mode);
12585 emit_insn (gen_rtx_SET (VOIDmode, x,
12586 gen_rtx_US_MINUS (mode, cop0, cop1)));
12587
12588 code = EQ;
12589 negate = !negate;
12590 break;
12591
12592 default:
12593 gcc_unreachable ();
12594 }
12595
12596 cop0 = x;
12597 cop1 = CONST0_RTX (mode);
12598 }
12599
12600 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12601 operands[1+negate], operands[2-negate]);
12602
12603 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12604 operands[2-negate]);
12605 return true;
12606 }
12607
12608 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12609 true if we should do zero extension, else sign extension. HIGH_P is
12610 true if we want the N/2 high elements, else the low elements. */
12611
12612 void
12613 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12614 {
12615 enum machine_mode imode = GET_MODE (operands[1]);
12616 rtx (*unpack)(rtx, rtx, rtx);
12617 rtx se, dest;
12618
12619 switch (imode)
12620 {
12621 case V16QImode:
12622 if (high_p)
12623 unpack = gen_vec_interleave_highv16qi;
12624 else
12625 unpack = gen_vec_interleave_lowv16qi;
12626 break;
12627 case V8HImode:
12628 if (high_p)
12629 unpack = gen_vec_interleave_highv8hi;
12630 else
12631 unpack = gen_vec_interleave_lowv8hi;
12632 break;
12633 case V4SImode:
12634 if (high_p)
12635 unpack = gen_vec_interleave_highv4si;
12636 else
12637 unpack = gen_vec_interleave_lowv4si;
12638 break;
12639 default:
12640 gcc_unreachable ();
12641 }
12642
12643 dest = gen_lowpart (imode, operands[0]);
12644
12645 if (unsigned_p)
12646 se = force_reg (imode, CONST0_RTX (imode));
12647 else
12648 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12649 operands[1], pc_rtx, pc_rtx);
12650
12651 emit_insn (unpack (dest, operands[1], se));
12652 }
12653
12654 /* Expand conditional increment or decrement using adb/sbb instructions.
12655 The default case using setcc followed by the conditional move can be
12656 done by generic code. */
12657 int
12658 ix86_expand_int_addcc (rtx operands[])
12659 {
12660 enum rtx_code code = GET_CODE (operands[1]);
12661 rtx compare_op;
12662 rtx val = const0_rtx;
12663 bool fpcmp = false;
12664 enum machine_mode mode = GET_MODE (operands[0]);
12665
12666 if (operands[3] != const1_rtx
12667 && operands[3] != constm1_rtx)
12668 return 0;
12669 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12670 ix86_compare_op1, &compare_op))
12671 return 0;
12672 code = GET_CODE (compare_op);
12673
12674 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12675 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12676 {
12677 fpcmp = true;
12678 code = ix86_fp_compare_code_to_integer (code);
12679 }
12680
12681 if (code != LTU)
12682 {
12683 val = constm1_rtx;
12684 if (fpcmp)
12685 PUT_CODE (compare_op,
12686 reverse_condition_maybe_unordered
12687 (GET_CODE (compare_op)));
12688 else
12689 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12690 }
12691 PUT_MODE (compare_op, mode);
12692
12693 /* Construct either adc or sbb insn. */
12694 if ((code == LTU) == (operands[3] == constm1_rtx))
12695 {
12696 switch (GET_MODE (operands[0]))
12697 {
12698 case QImode:
12699 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12700 break;
12701 case HImode:
12702 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12703 break;
12704 case SImode:
12705 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12706 break;
12707 case DImode:
12708 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12709 break;
12710 default:
12711 gcc_unreachable ();
12712 }
12713 }
12714 else
12715 {
12716 switch (GET_MODE (operands[0]))
12717 {
12718 case QImode:
12719 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12720 break;
12721 case HImode:
12722 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12723 break;
12724 case SImode:
12725 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12726 break;
12727 case DImode:
12728 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12729 break;
12730 default:
12731 gcc_unreachable ();
12732 }
12733 }
12734 return 1; /* DONE */
12735 }
12736
12737
12738 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12739 works for floating pointer parameters and nonoffsetable memories.
12740 For pushes, it returns just stack offsets; the values will be saved
12741 in the right order. Maximally three parts are generated. */
12742
12743 static int
12744 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12745 {
12746 int size;
12747
12748 if (!TARGET_64BIT)
12749 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12750 else
12751 size = (GET_MODE_SIZE (mode) + 4) / 8;
12752
12753 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12754 gcc_assert (size >= 2 && size <= 3);
12755
12756 /* Optimize constant pool reference to immediates. This is used by fp
12757 moves, that force all constants to memory to allow combining. */
12758 if (MEM_P (operand) && MEM_READONLY_P (operand))
12759 {
12760 rtx tmp = maybe_get_pool_constant (operand);
12761 if (tmp)
12762 operand = tmp;
12763 }
12764
12765 if (MEM_P (operand) && !offsettable_memref_p (operand))
12766 {
12767 /* The only non-offsetable memories we handle are pushes. */
12768 int ok = push_operand (operand, VOIDmode);
12769
12770 gcc_assert (ok);
12771
12772 operand = copy_rtx (operand);
12773 PUT_MODE (operand, Pmode);
12774 parts[0] = parts[1] = parts[2] = operand;
12775 return size;
12776 }
12777
12778 if (GET_CODE (operand) == CONST_VECTOR)
12779 {
12780 enum machine_mode imode = int_mode_for_mode (mode);
12781 /* Caution: if we looked through a constant pool memory above,
12782 the operand may actually have a different mode now. That's
12783 ok, since we want to pun this all the way back to an integer. */
12784 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12785 gcc_assert (operand != NULL);
12786 mode = imode;
12787 }
12788
12789 if (!TARGET_64BIT)
12790 {
12791 if (mode == DImode)
12792 split_di (&operand, 1, &parts[0], &parts[1]);
12793 else
12794 {
12795 if (REG_P (operand))
12796 {
12797 gcc_assert (reload_completed);
12798 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12799 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12800 if (size == 3)
12801 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12802 }
12803 else if (offsettable_memref_p (operand))
12804 {
12805 operand = adjust_address (operand, SImode, 0);
12806 parts[0] = operand;
12807 parts[1] = adjust_address (operand, SImode, 4);
12808 if (size == 3)
12809 parts[2] = adjust_address (operand, SImode, 8);
12810 }
12811 else if (GET_CODE (operand) == CONST_DOUBLE)
12812 {
12813 REAL_VALUE_TYPE r;
12814 long l[4];
12815
12816 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12817 switch (mode)
12818 {
12819 case XFmode:
12820 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12821 parts[2] = gen_int_mode (l[2], SImode);
12822 break;
12823 case DFmode:
12824 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12825 break;
12826 default:
12827 gcc_unreachable ();
12828 }
12829 parts[1] = gen_int_mode (l[1], SImode);
12830 parts[0] = gen_int_mode (l[0], SImode);
12831 }
12832 else
12833 gcc_unreachable ();
12834 }
12835 }
12836 else
12837 {
12838 if (mode == TImode)
12839 split_ti (&operand, 1, &parts[0], &parts[1]);
12840 if (mode == XFmode || mode == TFmode)
12841 {
12842 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12843 if (REG_P (operand))
12844 {
12845 gcc_assert (reload_completed);
12846 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12847 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12848 }
12849 else if (offsettable_memref_p (operand))
12850 {
12851 operand = adjust_address (operand, DImode, 0);
12852 parts[0] = operand;
12853 parts[1] = adjust_address (operand, upper_mode, 8);
12854 }
12855 else if (GET_CODE (operand) == CONST_DOUBLE)
12856 {
12857 REAL_VALUE_TYPE r;
12858 long l[4];
12859
12860 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12861 real_to_target (l, &r, mode);
12862
12863 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12864 if (HOST_BITS_PER_WIDE_INT >= 64)
12865 parts[0]
12866 = gen_int_mode
12867 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12868 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12869 DImode);
12870 else
12871 parts[0] = immed_double_const (l[0], l[1], DImode);
12872
12873 if (upper_mode == SImode)
12874 parts[1] = gen_int_mode (l[2], SImode);
12875 else if (HOST_BITS_PER_WIDE_INT >= 64)
12876 parts[1]
12877 = gen_int_mode
12878 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12879 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12880 DImode);
12881 else
12882 parts[1] = immed_double_const (l[2], l[3], DImode);
12883 }
12884 else
12885 gcc_unreachable ();
12886 }
12887 }
12888
12889 return size;
12890 }
12891
12892 /* Emit insns to perform a move or push of DI, DF, and XF values.
12893 Return false when normal moves are needed; true when all required
12894 insns have been emitted. Operands 2-4 contain the input values
12895 int the correct order; operands 5-7 contain the output values. */
12896
12897 void
12898 ix86_split_long_move (rtx operands[])
12899 {
12900 rtx part[2][3];
12901 int nparts;
12902 int push = 0;
12903 int collisions = 0;
12904 enum machine_mode mode = GET_MODE (operands[0]);
12905
12906 /* The DFmode expanders may ask us to move double.
12907 For 64bit target this is single move. By hiding the fact
12908 here we simplify i386.md splitters. */
12909 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12910 {
12911 /* Optimize constant pool reference to immediates. This is used by
12912 fp moves, that force all constants to memory to allow combining. */
12913
12914 if (MEM_P (operands[1])
12915 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12916 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12917 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12918 if (push_operand (operands[0], VOIDmode))
12919 {
12920 operands[0] = copy_rtx (operands[0]);
12921 PUT_MODE (operands[0], Pmode);
12922 }
12923 else
12924 operands[0] = gen_lowpart (DImode, operands[0]);
12925 operands[1] = gen_lowpart (DImode, operands[1]);
12926 emit_move_insn (operands[0], operands[1]);
12927 return;
12928 }
12929
12930 /* The only non-offsettable memory we handle is push. */
12931 if (push_operand (operands[0], VOIDmode))
12932 push = 1;
12933 else
12934 gcc_assert (!MEM_P (operands[0])
12935 || offsettable_memref_p (operands[0]));
12936
12937 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12938 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12939
12940 /* When emitting push, take care for source operands on the stack. */
12941 if (push && MEM_P (operands[1])
12942 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12943 {
12944 if (nparts == 3)
12945 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12946 XEXP (part[1][2], 0));
12947 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12948 XEXP (part[1][1], 0));
12949 }
12950
12951 /* We need to do copy in the right order in case an address register
12952 of the source overlaps the destination. */
12953 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12954 {
12955 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12956 collisions++;
12957 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12958 collisions++;
12959 if (nparts == 3
12960 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12961 collisions++;
12962
12963 /* Collision in the middle part can be handled by reordering. */
12964 if (collisions == 1 && nparts == 3
12965 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12966 {
12967 rtx tmp;
12968 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12969 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12970 }
12971
12972 /* If there are more collisions, we can't handle it by reordering.
12973 Do an lea to the last part and use only one colliding move. */
12974 else if (collisions > 1)
12975 {
12976 rtx base;
12977
12978 collisions = 1;
12979
12980 base = part[0][nparts - 1];
12981
12982 /* Handle the case when the last part isn't valid for lea.
12983 Happens in 64-bit mode storing the 12-byte XFmode. */
12984 if (GET_MODE (base) != Pmode)
12985 base = gen_rtx_REG (Pmode, REGNO (base));
12986
12987 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12988 part[1][0] = replace_equiv_address (part[1][0], base);
12989 part[1][1] = replace_equiv_address (part[1][1],
12990 plus_constant (base, UNITS_PER_WORD));
12991 if (nparts == 3)
12992 part[1][2] = replace_equiv_address (part[1][2],
12993 plus_constant (base, 8));
12994 }
12995 }
12996
12997 if (push)
12998 {
12999 if (!TARGET_64BIT)
13000 {
13001 if (nparts == 3)
13002 {
13003 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13004 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13005 emit_move_insn (part[0][2], part[1][2]);
13006 }
13007 }
13008 else
13009 {
13010 /* In 64bit mode we don't have 32bit push available. In case this is
13011 register, it is OK - we will just use larger counterpart. We also
13012 retype memory - these comes from attempt to avoid REX prefix on
13013 moving of second half of TFmode value. */
13014 if (GET_MODE (part[1][1]) == SImode)
13015 {
13016 switch (GET_CODE (part[1][1]))
13017 {
13018 case MEM:
13019 part[1][1] = adjust_address (part[1][1], DImode, 0);
13020 break;
13021
13022 case REG:
13023 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13024 break;
13025
13026 default:
13027 gcc_unreachable ();
13028 }
13029
13030 if (GET_MODE (part[1][0]) == SImode)
13031 part[1][0] = part[1][1];
13032 }
13033 }
13034 emit_move_insn (part[0][1], part[1][1]);
13035 emit_move_insn (part[0][0], part[1][0]);
13036 return;
13037 }
13038
13039 /* Choose correct order to not overwrite the source before it is copied. */
13040 if ((REG_P (part[0][0])
13041 && REG_P (part[1][1])
13042 && (REGNO (part[0][0]) == REGNO (part[1][1])
13043 || (nparts == 3
13044 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13045 || (collisions > 0
13046 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13047 {
13048 if (nparts == 3)
13049 {
13050 operands[2] = part[0][2];
13051 operands[3] = part[0][1];
13052 operands[4] = part[0][0];
13053 operands[5] = part[1][2];
13054 operands[6] = part[1][1];
13055 operands[7] = part[1][0];
13056 }
13057 else
13058 {
13059 operands[2] = part[0][1];
13060 operands[3] = part[0][0];
13061 operands[5] = part[1][1];
13062 operands[6] = part[1][0];
13063 }
13064 }
13065 else
13066 {
13067 if (nparts == 3)
13068 {
13069 operands[2] = part[0][0];
13070 operands[3] = part[0][1];
13071 operands[4] = part[0][2];
13072 operands[5] = part[1][0];
13073 operands[6] = part[1][1];
13074 operands[7] = part[1][2];
13075 }
13076 else
13077 {
13078 operands[2] = part[0][0];
13079 operands[3] = part[0][1];
13080 operands[5] = part[1][0];
13081 operands[6] = part[1][1];
13082 }
13083 }
13084
13085 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13086 if (optimize_size)
13087 {
13088 if (CONST_INT_P (operands[5])
13089 && operands[5] != const0_rtx
13090 && REG_P (operands[2]))
13091 {
13092 if (CONST_INT_P (operands[6])
13093 && INTVAL (operands[6]) == INTVAL (operands[5]))
13094 operands[6] = operands[2];
13095
13096 if (nparts == 3
13097 && CONST_INT_P (operands[7])
13098 && INTVAL (operands[7]) == INTVAL (operands[5]))
13099 operands[7] = operands[2];
13100 }
13101
13102 if (nparts == 3
13103 && CONST_INT_P (operands[6])
13104 && operands[6] != const0_rtx
13105 && REG_P (operands[3])
13106 && CONST_INT_P (operands[7])
13107 && INTVAL (operands[7]) == INTVAL (operands[6]))
13108 operands[7] = operands[3];
13109 }
13110
13111 emit_move_insn (operands[2], operands[5]);
13112 emit_move_insn (operands[3], operands[6]);
13113 if (nparts == 3)
13114 emit_move_insn (operands[4], operands[7]);
13115
13116 return;
13117 }
13118
13119 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13120 left shift by a constant, either using a single shift or
13121 a sequence of add instructions. */
13122
13123 static void
13124 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13125 {
13126 if (count == 1)
13127 {
13128 emit_insn ((mode == DImode
13129 ? gen_addsi3
13130 : gen_adddi3) (operand, operand, operand));
13131 }
13132 else if (!optimize_size
13133 && count * ix86_cost->add <= ix86_cost->shift_const)
13134 {
13135 int i;
13136 for (i=0; i<count; i++)
13137 {
13138 emit_insn ((mode == DImode
13139 ? gen_addsi3
13140 : gen_adddi3) (operand, operand, operand));
13141 }
13142 }
13143 else
13144 emit_insn ((mode == DImode
13145 ? gen_ashlsi3
13146 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13147 }
13148
13149 void
13150 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13151 {
13152 rtx low[2], high[2];
13153 int count;
13154 const int single_width = mode == DImode ? 32 : 64;
13155
13156 if (CONST_INT_P (operands[2]))
13157 {
13158 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13159 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13160
13161 if (count >= single_width)
13162 {
13163 emit_move_insn (high[0], low[1]);
13164 emit_move_insn (low[0], const0_rtx);
13165
13166 if (count > single_width)
13167 ix86_expand_ashl_const (high[0], count - single_width, mode);
13168 }
13169 else
13170 {
13171 if (!rtx_equal_p (operands[0], operands[1]))
13172 emit_move_insn (operands[0], operands[1]);
13173 emit_insn ((mode == DImode
13174 ? gen_x86_shld_1
13175 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13176 ix86_expand_ashl_const (low[0], count, mode);
13177 }
13178 return;
13179 }
13180
13181 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13182
13183 if (operands[1] == const1_rtx)
13184 {
13185 /* Assuming we've chosen a QImode capable registers, then 1 << N
13186 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13187 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13188 {
13189 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13190
13191 ix86_expand_clear (low[0]);
13192 ix86_expand_clear (high[0]);
13193 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13194
13195 d = gen_lowpart (QImode, low[0]);
13196 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13197 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13198 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13199
13200 d = gen_lowpart (QImode, high[0]);
13201 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13202 s = gen_rtx_NE (QImode, flags, const0_rtx);
13203 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13204 }
13205
13206 /* Otherwise, we can get the same results by manually performing
13207 a bit extract operation on bit 5/6, and then performing the two
13208 shifts. The two methods of getting 0/1 into low/high are exactly
13209 the same size. Avoiding the shift in the bit extract case helps
13210 pentium4 a bit; no one else seems to care much either way. */
13211 else
13212 {
13213 rtx x;
13214
13215 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13216 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13217 else
13218 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13219 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13220
13221 emit_insn ((mode == DImode
13222 ? gen_lshrsi3
13223 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13224 emit_insn ((mode == DImode
13225 ? gen_andsi3
13226 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13227 emit_move_insn (low[0], high[0]);
13228 emit_insn ((mode == DImode
13229 ? gen_xorsi3
13230 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13231 }
13232
13233 emit_insn ((mode == DImode
13234 ? gen_ashlsi3
13235 : gen_ashldi3) (low[0], low[0], operands[2]));
13236 emit_insn ((mode == DImode
13237 ? gen_ashlsi3
13238 : gen_ashldi3) (high[0], high[0], operands[2]));
13239 return;
13240 }
13241
13242 if (operands[1] == constm1_rtx)
13243 {
13244 /* For -1 << N, we can avoid the shld instruction, because we
13245 know that we're shifting 0...31/63 ones into a -1. */
13246 emit_move_insn (low[0], constm1_rtx);
13247 if (optimize_size)
13248 emit_move_insn (high[0], low[0]);
13249 else
13250 emit_move_insn (high[0], constm1_rtx);
13251 }
13252 else
13253 {
13254 if (!rtx_equal_p (operands[0], operands[1]))
13255 emit_move_insn (operands[0], operands[1]);
13256
13257 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13258 emit_insn ((mode == DImode
13259 ? gen_x86_shld_1
13260 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13261 }
13262
13263 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13264
13265 if (TARGET_CMOVE && scratch)
13266 {
13267 ix86_expand_clear (scratch);
13268 emit_insn ((mode == DImode
13269 ? gen_x86_shift_adj_1
13270 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13271 }
13272 else
13273 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13274 }
13275
13276 void
13277 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13278 {
13279 rtx low[2], high[2];
13280 int count;
13281 const int single_width = mode == DImode ? 32 : 64;
13282
13283 if (CONST_INT_P (operands[2]))
13284 {
13285 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13286 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13287
13288 if (count == single_width * 2 - 1)
13289 {
13290 emit_move_insn (high[0], high[1]);
13291 emit_insn ((mode == DImode
13292 ? gen_ashrsi3
13293 : gen_ashrdi3) (high[0], high[0],
13294 GEN_INT (single_width - 1)));
13295 emit_move_insn (low[0], high[0]);
13296
13297 }
13298 else if (count >= single_width)
13299 {
13300 emit_move_insn (low[0], high[1]);
13301 emit_move_insn (high[0], low[0]);
13302 emit_insn ((mode == DImode
13303 ? gen_ashrsi3
13304 : gen_ashrdi3) (high[0], high[0],
13305 GEN_INT (single_width - 1)));
13306 if (count > single_width)
13307 emit_insn ((mode == DImode
13308 ? gen_ashrsi3
13309 : gen_ashrdi3) (low[0], low[0],
13310 GEN_INT (count - single_width)));
13311 }
13312 else
13313 {
13314 if (!rtx_equal_p (operands[0], operands[1]))
13315 emit_move_insn (operands[0], operands[1]);
13316 emit_insn ((mode == DImode
13317 ? gen_x86_shrd_1
13318 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13319 emit_insn ((mode == DImode
13320 ? gen_ashrsi3
13321 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13322 }
13323 }
13324 else
13325 {
13326 if (!rtx_equal_p (operands[0], operands[1]))
13327 emit_move_insn (operands[0], operands[1]);
13328
13329 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13330
13331 emit_insn ((mode == DImode
13332 ? gen_x86_shrd_1
13333 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13334 emit_insn ((mode == DImode
13335 ? gen_ashrsi3
13336 : gen_ashrdi3) (high[0], high[0], operands[2]));
13337
13338 if (TARGET_CMOVE && scratch)
13339 {
13340 emit_move_insn (scratch, high[0]);
13341 emit_insn ((mode == DImode
13342 ? gen_ashrsi3
13343 : gen_ashrdi3) (scratch, scratch,
13344 GEN_INT (single_width - 1)));
13345 emit_insn ((mode == DImode
13346 ? gen_x86_shift_adj_1
13347 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13348 scratch));
13349 }
13350 else
13351 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13352 }
13353 }
13354
13355 void
13356 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13357 {
13358 rtx low[2], high[2];
13359 int count;
13360 const int single_width = mode == DImode ? 32 : 64;
13361
13362 if (CONST_INT_P (operands[2]))
13363 {
13364 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13365 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13366
13367 if (count >= single_width)
13368 {
13369 emit_move_insn (low[0], high[1]);
13370 ix86_expand_clear (high[0]);
13371
13372 if (count > single_width)
13373 emit_insn ((mode == DImode
13374 ? gen_lshrsi3
13375 : gen_lshrdi3) (low[0], low[0],
13376 GEN_INT (count - single_width)));
13377 }
13378 else
13379 {
13380 if (!rtx_equal_p (operands[0], operands[1]))
13381 emit_move_insn (operands[0], operands[1]);
13382 emit_insn ((mode == DImode
13383 ? gen_x86_shrd_1
13384 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13385 emit_insn ((mode == DImode
13386 ? gen_lshrsi3
13387 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13388 }
13389 }
13390 else
13391 {
13392 if (!rtx_equal_p (operands[0], operands[1]))
13393 emit_move_insn (operands[0], operands[1]);
13394
13395 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13396
13397 emit_insn ((mode == DImode
13398 ? gen_x86_shrd_1
13399 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13400 emit_insn ((mode == DImode
13401 ? gen_lshrsi3
13402 : gen_lshrdi3) (high[0], high[0], operands[2]));
13403
13404 /* Heh. By reversing the arguments, we can reuse this pattern. */
13405 if (TARGET_CMOVE && scratch)
13406 {
13407 ix86_expand_clear (scratch);
13408 emit_insn ((mode == DImode
13409 ? gen_x86_shift_adj_1
13410 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13411 scratch));
13412 }
13413 else
13414 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13415 }
13416 }
13417
13418 /* Predict just emitted jump instruction to be taken with probability PROB. */
13419 static void
13420 predict_jump (int prob)
13421 {
13422 rtx insn = get_last_insn ();
13423 gcc_assert (JUMP_P (insn));
13424 REG_NOTES (insn)
13425 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13426 GEN_INT (prob),
13427 REG_NOTES (insn));
13428 }
13429
13430 /* Helper function for the string operations below. Dest VARIABLE whether
13431 it is aligned to VALUE bytes. If true, jump to the label. */
13432 static rtx
13433 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13434 {
13435 rtx label = gen_label_rtx ();
13436 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13437 if (GET_MODE (variable) == DImode)
13438 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13439 else
13440 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13441 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13442 1, label);
13443 if (epilogue)
13444 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13445 else
13446 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13447 return label;
13448 }
13449
13450 /* Adjust COUNTER by the VALUE. */
13451 static void
13452 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13453 {
13454 if (GET_MODE (countreg) == DImode)
13455 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13456 else
13457 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13458 }
13459
13460 /* Zero extend possibly SImode EXP to Pmode register. */
13461 rtx
13462 ix86_zero_extend_to_Pmode (rtx exp)
13463 {
13464 rtx r;
13465 if (GET_MODE (exp) == VOIDmode)
13466 return force_reg (Pmode, exp);
13467 if (GET_MODE (exp) == Pmode)
13468 return copy_to_mode_reg (Pmode, exp);
13469 r = gen_reg_rtx (Pmode);
13470 emit_insn (gen_zero_extendsidi2 (r, exp));
13471 return r;
13472 }
13473
13474 /* Divide COUNTREG by SCALE. */
13475 static rtx
13476 scale_counter (rtx countreg, int scale)
13477 {
13478 rtx sc;
13479 rtx piece_size_mask;
13480
13481 if (scale == 1)
13482 return countreg;
13483 if (CONST_INT_P (countreg))
13484 return GEN_INT (INTVAL (countreg) / scale);
13485 gcc_assert (REG_P (countreg));
13486
13487 piece_size_mask = GEN_INT (scale - 1);
13488 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13489 GEN_INT (exact_log2 (scale)),
13490 NULL, 1, OPTAB_DIRECT);
13491 return sc;
13492 }
13493
13494 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13495 for constant loop counts. */
13496
13497 static enum machine_mode
13498 counter_mode (rtx count_exp)
13499 {
13500 if (GET_MODE (count_exp) != VOIDmode)
13501 return GET_MODE (count_exp);
13502 if (GET_CODE (count_exp) != CONST_INT)
13503 return Pmode;
13504 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13505 return DImode;
13506 return SImode;
13507 }
13508
13509 /* When SRCPTR is non-NULL, output simple loop to move memory
13510 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13511 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13512 equivalent loop to set memory by VALUE (supposed to be in MODE).
13513
13514 The size is rounded down to whole number of chunk size moved at once.
13515 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13516
13517
13518 static void
13519 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13520 rtx destptr, rtx srcptr, rtx value,
13521 rtx count, enum machine_mode mode, int unroll,
13522 int expected_size)
13523 {
13524 rtx out_label, top_label, iter, tmp;
13525 enum machine_mode iter_mode = counter_mode (count);
13526 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13527 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13528 rtx size;
13529 rtx x_addr;
13530 rtx y_addr;
13531 int i;
13532
13533 top_label = gen_label_rtx ();
13534 out_label = gen_label_rtx ();
13535 iter = gen_reg_rtx (iter_mode);
13536
13537 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13538 NULL, 1, OPTAB_DIRECT);
13539 /* Those two should combine. */
13540 if (piece_size == const1_rtx)
13541 {
13542 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13543 true, out_label);
13544 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13545 }
13546 emit_move_insn (iter, const0_rtx);
13547
13548 emit_label (top_label);
13549
13550 tmp = convert_modes (Pmode, iter_mode, iter, true);
13551 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13552 destmem = change_address (destmem, mode, x_addr);
13553
13554 if (srcmem)
13555 {
13556 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13557 srcmem = change_address (srcmem, mode, y_addr);
13558
13559 /* When unrolling for chips that reorder memory reads and writes,
13560 we can save registers by using single temporary.
13561 Also using 4 temporaries is overkill in 32bit mode. */
13562 if (!TARGET_64BIT && 0)
13563 {
13564 for (i = 0; i < unroll; i++)
13565 {
13566 if (i)
13567 {
13568 destmem =
13569 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13570 srcmem =
13571 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13572 }
13573 emit_move_insn (destmem, srcmem);
13574 }
13575 }
13576 else
13577 {
13578 rtx tmpreg[4];
13579 gcc_assert (unroll <= 4);
13580 for (i = 0; i < unroll; i++)
13581 {
13582 tmpreg[i] = gen_reg_rtx (mode);
13583 if (i)
13584 {
13585 srcmem =
13586 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13587 }
13588 emit_move_insn (tmpreg[i], srcmem);
13589 }
13590 for (i = 0; i < unroll; i++)
13591 {
13592 if (i)
13593 {
13594 destmem =
13595 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13596 }
13597 emit_move_insn (destmem, tmpreg[i]);
13598 }
13599 }
13600 }
13601 else
13602 for (i = 0; i < unroll; i++)
13603 {
13604 if (i)
13605 destmem =
13606 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13607 emit_move_insn (destmem, value);
13608 }
13609
13610 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13611 true, OPTAB_LIB_WIDEN);
13612 if (tmp != iter)
13613 emit_move_insn (iter, tmp);
13614
13615 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13616 true, top_label);
13617 if (expected_size != -1)
13618 {
13619 expected_size /= GET_MODE_SIZE (mode) * unroll;
13620 if (expected_size == 0)
13621 predict_jump (0);
13622 else if (expected_size > REG_BR_PROB_BASE)
13623 predict_jump (REG_BR_PROB_BASE - 1);
13624 else
13625 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13626 }
13627 else
13628 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13629 iter = ix86_zero_extend_to_Pmode (iter);
13630 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13631 true, OPTAB_LIB_WIDEN);
13632 if (tmp != destptr)
13633 emit_move_insn (destptr, tmp);
13634 if (srcptr)
13635 {
13636 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13637 true, OPTAB_LIB_WIDEN);
13638 if (tmp != srcptr)
13639 emit_move_insn (srcptr, tmp);
13640 }
13641 emit_label (out_label);
13642 }
13643
13644 /* Output "rep; mov" instruction.
13645 Arguments have same meaning as for previous function */
13646 static void
13647 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13648 rtx destptr, rtx srcptr,
13649 rtx count,
13650 enum machine_mode mode)
13651 {
13652 rtx destexp;
13653 rtx srcexp;
13654 rtx countreg;
13655
13656 /* If the size is known, it is shorter to use rep movs. */
13657 if (mode == QImode && CONST_INT_P (count)
13658 && !(INTVAL (count) & 3))
13659 mode = SImode;
13660
13661 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13662 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13663 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13664 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13665 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13666 if (mode != QImode)
13667 {
13668 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13669 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13670 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13671 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13672 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13673 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13674 }
13675 else
13676 {
13677 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13678 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13679 }
13680 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13681 destexp, srcexp));
13682 }
13683
13684 /* Output "rep; stos" instruction.
13685 Arguments have same meaning as for previous function */
13686 static void
13687 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13688 rtx count,
13689 enum machine_mode mode)
13690 {
13691 rtx destexp;
13692 rtx countreg;
13693
13694 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13695 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13696 value = force_reg (mode, gen_lowpart (mode, value));
13697 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13698 if (mode != QImode)
13699 {
13700 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13701 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13702 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13703 }
13704 else
13705 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13706 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13707 }
13708
13709 static void
13710 emit_strmov (rtx destmem, rtx srcmem,
13711 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13712 {
13713 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13714 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13715 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13716 }
13717
13718 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13719 static void
13720 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13721 rtx destptr, rtx srcptr, rtx count, int max_size)
13722 {
13723 rtx src, dest;
13724 if (CONST_INT_P (count))
13725 {
13726 HOST_WIDE_INT countval = INTVAL (count);
13727 int offset = 0;
13728
13729 if ((countval & 0x10) && max_size > 16)
13730 {
13731 if (TARGET_64BIT)
13732 {
13733 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13734 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13735 }
13736 else
13737 gcc_unreachable ();
13738 offset += 16;
13739 }
13740 if ((countval & 0x08) && max_size > 8)
13741 {
13742 if (TARGET_64BIT)
13743 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13744 else
13745 {
13746 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13747 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13748 }
13749 offset += 8;
13750 }
13751 if ((countval & 0x04) && max_size > 4)
13752 {
13753 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13754 offset += 4;
13755 }
13756 if ((countval & 0x02) && max_size > 2)
13757 {
13758 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13759 offset += 2;
13760 }
13761 if ((countval & 0x01) && max_size > 1)
13762 {
13763 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13764 offset += 1;
13765 }
13766 return;
13767 }
13768 if (max_size > 8)
13769 {
13770 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13771 count, 1, OPTAB_DIRECT);
13772 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13773 count, QImode, 1, 4);
13774 return;
13775 }
13776
13777 /* When there are stringops, we can cheaply increase dest and src pointers.
13778 Otherwise we save code size by maintaining offset (zero is readily
13779 available from preceding rep operation) and using x86 addressing modes.
13780 */
13781 if (TARGET_SINGLE_STRINGOP)
13782 {
13783 if (max_size > 4)
13784 {
13785 rtx label = ix86_expand_aligntest (count, 4, true);
13786 src = change_address (srcmem, SImode, srcptr);
13787 dest = change_address (destmem, SImode, destptr);
13788 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13789 emit_label (label);
13790 LABEL_NUSES (label) = 1;
13791 }
13792 if (max_size > 2)
13793 {
13794 rtx label = ix86_expand_aligntest (count, 2, true);
13795 src = change_address (srcmem, HImode, srcptr);
13796 dest = change_address (destmem, HImode, destptr);
13797 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13798 emit_label (label);
13799 LABEL_NUSES (label) = 1;
13800 }
13801 if (max_size > 1)
13802 {
13803 rtx label = ix86_expand_aligntest (count, 1, true);
13804 src = change_address (srcmem, QImode, srcptr);
13805 dest = change_address (destmem, QImode, destptr);
13806 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13807 emit_label (label);
13808 LABEL_NUSES (label) = 1;
13809 }
13810 }
13811 else
13812 {
13813 rtx offset = force_reg (Pmode, const0_rtx);
13814 rtx tmp;
13815
13816 if (max_size > 4)
13817 {
13818 rtx label = ix86_expand_aligntest (count, 4, true);
13819 src = change_address (srcmem, SImode, srcptr);
13820 dest = change_address (destmem, SImode, destptr);
13821 emit_move_insn (dest, src);
13822 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13823 true, OPTAB_LIB_WIDEN);
13824 if (tmp != offset)
13825 emit_move_insn (offset, tmp);
13826 emit_label (label);
13827 LABEL_NUSES (label) = 1;
13828 }
13829 if (max_size > 2)
13830 {
13831 rtx label = ix86_expand_aligntest (count, 2, true);
13832 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13833 src = change_address (srcmem, HImode, tmp);
13834 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13835 dest = change_address (destmem, HImode, tmp);
13836 emit_move_insn (dest, src);
13837 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13838 true, OPTAB_LIB_WIDEN);
13839 if (tmp != offset)
13840 emit_move_insn (offset, tmp);
13841 emit_label (label);
13842 LABEL_NUSES (label) = 1;
13843 }
13844 if (max_size > 1)
13845 {
13846 rtx label = ix86_expand_aligntest (count, 1, true);
13847 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13848 src = change_address (srcmem, QImode, tmp);
13849 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13850 dest = change_address (destmem, QImode, tmp);
13851 emit_move_insn (dest, src);
13852 emit_label (label);
13853 LABEL_NUSES (label) = 1;
13854 }
13855 }
13856 }
13857
13858 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13859 static void
13860 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13861 rtx count, int max_size)
13862 {
13863 count =
13864 expand_simple_binop (counter_mode (count), AND, count,
13865 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13866 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13867 gen_lowpart (QImode, value), count, QImode,
13868 1, max_size / 2);
13869 }
13870
13871 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13872 static void
13873 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13874 {
13875 rtx dest;
13876
13877 if (CONST_INT_P (count))
13878 {
13879 HOST_WIDE_INT countval = INTVAL (count);
13880 int offset = 0;
13881
13882 if ((countval & 0x10) && max_size > 16)
13883 {
13884 if (TARGET_64BIT)
13885 {
13886 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13887 emit_insn (gen_strset (destptr, dest, value));
13888 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13889 emit_insn (gen_strset (destptr, dest, value));
13890 }
13891 else
13892 gcc_unreachable ();
13893 offset += 16;
13894 }
13895 if ((countval & 0x08) && max_size > 8)
13896 {
13897 if (TARGET_64BIT)
13898 {
13899 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13900 emit_insn (gen_strset (destptr, dest, value));
13901 }
13902 else
13903 {
13904 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13905 emit_insn (gen_strset (destptr, dest, value));
13906 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13907 emit_insn (gen_strset (destptr, dest, value));
13908 }
13909 offset += 8;
13910 }
13911 if ((countval & 0x04) && max_size > 4)
13912 {
13913 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13914 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13915 offset += 4;
13916 }
13917 if ((countval & 0x02) && max_size > 2)
13918 {
13919 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13920 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13921 offset += 2;
13922 }
13923 if ((countval & 0x01) && max_size > 1)
13924 {
13925 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13926 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13927 offset += 1;
13928 }
13929 return;
13930 }
13931 if (max_size > 32)
13932 {
13933 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13934 return;
13935 }
13936 if (max_size > 16)
13937 {
13938 rtx label = ix86_expand_aligntest (count, 16, true);
13939 if (TARGET_64BIT)
13940 {
13941 dest = change_address (destmem, DImode, destptr);
13942 emit_insn (gen_strset (destptr, dest, value));
13943 emit_insn (gen_strset (destptr, dest, value));
13944 }
13945 else
13946 {
13947 dest = change_address (destmem, SImode, destptr);
13948 emit_insn (gen_strset (destptr, dest, value));
13949 emit_insn (gen_strset (destptr, dest, value));
13950 emit_insn (gen_strset (destptr, dest, value));
13951 emit_insn (gen_strset (destptr, dest, value));
13952 }
13953 emit_label (label);
13954 LABEL_NUSES (label) = 1;
13955 }
13956 if (max_size > 8)
13957 {
13958 rtx label = ix86_expand_aligntest (count, 8, true);
13959 if (TARGET_64BIT)
13960 {
13961 dest = change_address (destmem, DImode, destptr);
13962 emit_insn (gen_strset (destptr, dest, value));
13963 }
13964 else
13965 {
13966 dest = change_address (destmem, SImode, destptr);
13967 emit_insn (gen_strset (destptr, dest, value));
13968 emit_insn (gen_strset (destptr, dest, value));
13969 }
13970 emit_label (label);
13971 LABEL_NUSES (label) = 1;
13972 }
13973 if (max_size > 4)
13974 {
13975 rtx label = ix86_expand_aligntest (count, 4, true);
13976 dest = change_address (destmem, SImode, destptr);
13977 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13978 emit_label (label);
13979 LABEL_NUSES (label) = 1;
13980 }
13981 if (max_size > 2)
13982 {
13983 rtx label = ix86_expand_aligntest (count, 2, true);
13984 dest = change_address (destmem, HImode, destptr);
13985 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13986 emit_label (label);
13987 LABEL_NUSES (label) = 1;
13988 }
13989 if (max_size > 1)
13990 {
13991 rtx label = ix86_expand_aligntest (count, 1, true);
13992 dest = change_address (destmem, QImode, destptr);
13993 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13994 emit_label (label);
13995 LABEL_NUSES (label) = 1;
13996 }
13997 }
13998
13999 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14000 DESIRED_ALIGNMENT. */
14001 static void
14002 expand_movmem_prologue (rtx destmem, rtx srcmem,
14003 rtx destptr, rtx srcptr, rtx count,
14004 int align, int desired_alignment)
14005 {
14006 if (align <= 1 && desired_alignment > 1)
14007 {
14008 rtx label = ix86_expand_aligntest (destptr, 1, false);
14009 srcmem = change_address (srcmem, QImode, srcptr);
14010 destmem = change_address (destmem, QImode, destptr);
14011 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14012 ix86_adjust_counter (count, 1);
14013 emit_label (label);
14014 LABEL_NUSES (label) = 1;
14015 }
14016 if (align <= 2 && desired_alignment > 2)
14017 {
14018 rtx label = ix86_expand_aligntest (destptr, 2, false);
14019 srcmem = change_address (srcmem, HImode, srcptr);
14020 destmem = change_address (destmem, HImode, destptr);
14021 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14022 ix86_adjust_counter (count, 2);
14023 emit_label (label);
14024 LABEL_NUSES (label) = 1;
14025 }
14026 if (align <= 4 && desired_alignment > 4)
14027 {
14028 rtx label = ix86_expand_aligntest (destptr, 4, false);
14029 srcmem = change_address (srcmem, SImode, srcptr);
14030 destmem = change_address (destmem, SImode, destptr);
14031 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14032 ix86_adjust_counter (count, 4);
14033 emit_label (label);
14034 LABEL_NUSES (label) = 1;
14035 }
14036 gcc_assert (desired_alignment <= 8);
14037 }
14038
14039 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14040 DESIRED_ALIGNMENT. */
14041 static void
14042 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14043 int align, int desired_alignment)
14044 {
14045 if (align <= 1 && desired_alignment > 1)
14046 {
14047 rtx label = ix86_expand_aligntest (destptr, 1, false);
14048 destmem = change_address (destmem, QImode, destptr);
14049 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14050 ix86_adjust_counter (count, 1);
14051 emit_label (label);
14052 LABEL_NUSES (label) = 1;
14053 }
14054 if (align <= 2 && desired_alignment > 2)
14055 {
14056 rtx label = ix86_expand_aligntest (destptr, 2, false);
14057 destmem = change_address (destmem, HImode, destptr);
14058 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14059 ix86_adjust_counter (count, 2);
14060 emit_label (label);
14061 LABEL_NUSES (label) = 1;
14062 }
14063 if (align <= 4 && desired_alignment > 4)
14064 {
14065 rtx label = ix86_expand_aligntest (destptr, 4, false);
14066 destmem = change_address (destmem, SImode, destptr);
14067 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14068 ix86_adjust_counter (count, 4);
14069 emit_label (label);
14070 LABEL_NUSES (label) = 1;
14071 }
14072 gcc_assert (desired_alignment <= 8);
14073 }
14074
14075 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14076 static enum stringop_alg
14077 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14078 int *dynamic_check)
14079 {
14080 const struct stringop_algs * algs;
14081
14082 *dynamic_check = -1;
14083 if (memset)
14084 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14085 else
14086 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14087 if (stringop_alg != no_stringop)
14088 return stringop_alg;
14089 /* rep; movq or rep; movl is the smallest variant. */
14090 else if (optimize_size)
14091 {
14092 if (!count || (count & 3))
14093 return rep_prefix_1_byte;
14094 else
14095 return rep_prefix_4_byte;
14096 }
14097 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14098 */
14099 else if (expected_size != -1 && expected_size < 4)
14100 return loop_1_byte;
14101 else if (expected_size != -1)
14102 {
14103 unsigned int i;
14104 enum stringop_alg alg = libcall;
14105 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14106 {
14107 gcc_assert (algs->size[i].max);
14108 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14109 {
14110 if (algs->size[i].alg != libcall)
14111 alg = algs->size[i].alg;
14112 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14113 last non-libcall inline algorithm. */
14114 if (TARGET_INLINE_ALL_STRINGOPS)
14115 {
14116 /* When the current size is best to be copied by a libcall,
14117 but we are still forced to inline, run the heuristic bellow
14118 that will pick code for medium sized blocks. */
14119 if (alg != libcall)
14120 return alg;
14121 break;
14122 }
14123 else
14124 return algs->size[i].alg;
14125 }
14126 }
14127 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14128 }
14129 /* When asked to inline the call anyway, try to pick meaningful choice.
14130 We look for maximal size of block that is faster to copy by hand and
14131 take blocks of at most of that size guessing that average size will
14132 be roughly half of the block.
14133
14134 If this turns out to be bad, we might simply specify the preferred
14135 choice in ix86_costs. */
14136 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14137 && algs->unknown_size == libcall)
14138 {
14139 int max = -1;
14140 enum stringop_alg alg;
14141 int i;
14142
14143 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14144 if (algs->size[i].alg != libcall && algs->size[i].alg)
14145 max = algs->size[i].max;
14146 if (max == -1)
14147 max = 4096;
14148 alg = decide_alg (count, max / 2, memset, dynamic_check);
14149 gcc_assert (*dynamic_check == -1);
14150 gcc_assert (alg != libcall);
14151 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14152 *dynamic_check = max;
14153 return alg;
14154 }
14155 return algs->unknown_size;
14156 }
14157
14158 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14159 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14160 static int
14161 decide_alignment (int align,
14162 enum stringop_alg alg,
14163 int expected_size)
14164 {
14165 int desired_align = 0;
14166 switch (alg)
14167 {
14168 case no_stringop:
14169 gcc_unreachable ();
14170 case loop:
14171 case unrolled_loop:
14172 desired_align = GET_MODE_SIZE (Pmode);
14173 break;
14174 case rep_prefix_8_byte:
14175 desired_align = 8;
14176 break;
14177 case rep_prefix_4_byte:
14178 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14179 copying whole cacheline at once. */
14180 if (TARGET_PENTIUMPRO)
14181 desired_align = 8;
14182 else
14183 desired_align = 4;
14184 break;
14185 case rep_prefix_1_byte:
14186 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14187 copying whole cacheline at once. */
14188 if (TARGET_PENTIUMPRO)
14189 desired_align = 8;
14190 else
14191 desired_align = 1;
14192 break;
14193 case loop_1_byte:
14194 desired_align = 1;
14195 break;
14196 case libcall:
14197 return 0;
14198 }
14199
14200 if (optimize_size)
14201 desired_align = 1;
14202 if (desired_align < align)
14203 desired_align = align;
14204 if (expected_size != -1 && expected_size < 4)
14205 desired_align = align;
14206 return desired_align;
14207 }
14208
14209 /* Return the smallest power of 2 greater than VAL. */
14210 static int
14211 smallest_pow2_greater_than (int val)
14212 {
14213 int ret = 1;
14214 while (ret <= val)
14215 ret <<= 1;
14216 return ret;
14217 }
14218
14219 /* Expand string move (memcpy) operation. Use i386 string operations when
14220 profitable. expand_clrmem contains similar code. The code depends upon
14221 architecture, block size and alignment, but always has the same
14222 overall structure:
14223
14224 1) Prologue guard: Conditional that jumps up to epilogues for small
14225 blocks that can be handled by epilogue alone. This is faster but
14226 also needed for correctness, since prologue assume the block is larger
14227 than the desired alignment.
14228
14229 Optional dynamic check for size and libcall for large
14230 blocks is emitted here too, with -minline-stringops-dynamically.
14231
14232 2) Prologue: copy first few bytes in order to get destination aligned
14233 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14234 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14235 We emit either a jump tree on power of two sized blocks, or a byte loop.
14236
14237 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14238 with specified algorithm.
14239
14240 4) Epilogue: code copying tail of the block that is too small to be
14241 handled by main body (or up to size guarded by prologue guard). */
14242
14243 int
14244 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14245 rtx expected_align_exp, rtx expected_size_exp)
14246 {
14247 rtx destreg;
14248 rtx srcreg;
14249 rtx label = NULL;
14250 rtx tmp;
14251 rtx jump_around_label = NULL;
14252 HOST_WIDE_INT align = 1;
14253 unsigned HOST_WIDE_INT count = 0;
14254 HOST_WIDE_INT expected_size = -1;
14255 int size_needed = 0, epilogue_size_needed;
14256 int desired_align = 0;
14257 enum stringop_alg alg;
14258 int dynamic_check;
14259
14260 if (CONST_INT_P (align_exp))
14261 align = INTVAL (align_exp);
14262 /* i386 can do misaligned access on reasonably increased cost. */
14263 if (CONST_INT_P (expected_align_exp)
14264 && INTVAL (expected_align_exp) > align)
14265 align = INTVAL (expected_align_exp);
14266 if (CONST_INT_P (count_exp))
14267 count = expected_size = INTVAL (count_exp);
14268 if (CONST_INT_P (expected_size_exp) && count == 0)
14269 expected_size = INTVAL (expected_size_exp);
14270
14271 /* Step 0: Decide on preferred algorithm, desired alignment and
14272 size of chunks to be copied by main loop. */
14273
14274 alg = decide_alg (count, expected_size, false, &dynamic_check);
14275 desired_align = decide_alignment (align, alg, expected_size);
14276
14277 if (!TARGET_ALIGN_STRINGOPS)
14278 align = desired_align;
14279
14280 if (alg == libcall)
14281 return 0;
14282 gcc_assert (alg != no_stringop);
14283 if (!count)
14284 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14285 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14286 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14287 switch (alg)
14288 {
14289 case libcall:
14290 case no_stringop:
14291 gcc_unreachable ();
14292 case loop:
14293 size_needed = GET_MODE_SIZE (Pmode);
14294 break;
14295 case unrolled_loop:
14296 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14297 break;
14298 case rep_prefix_8_byte:
14299 size_needed = 8;
14300 break;
14301 case rep_prefix_4_byte:
14302 size_needed = 4;
14303 break;
14304 case rep_prefix_1_byte:
14305 case loop_1_byte:
14306 size_needed = 1;
14307 break;
14308 }
14309
14310 epilogue_size_needed = size_needed;
14311
14312 /* Step 1: Prologue guard. */
14313
14314 /* Alignment code needs count to be in register. */
14315 if (CONST_INT_P (count_exp) && desired_align > align)
14316 {
14317 enum machine_mode mode = SImode;
14318 if (TARGET_64BIT && (count & ~0xffffffff))
14319 mode = DImode;
14320 count_exp = force_reg (mode, count_exp);
14321 }
14322 gcc_assert (desired_align >= 1 && align >= 1);
14323
14324 /* Ensure that alignment prologue won't copy past end of block. */
14325 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14326 {
14327 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14328 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14329 Make sure it is power of 2. */
14330 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14331
14332 label = gen_label_rtx ();
14333 emit_cmp_and_jump_insns (count_exp,
14334 GEN_INT (epilogue_size_needed),
14335 LTU, 0, counter_mode (count_exp), 1, label);
14336 if (GET_CODE (count_exp) == CONST_INT)
14337 ;
14338 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14339 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14340 else
14341 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14342 }
14343 /* Emit code to decide on runtime whether library call or inline should be
14344 used. */
14345 if (dynamic_check != -1)
14346 {
14347 rtx hot_label = gen_label_rtx ();
14348 jump_around_label = gen_label_rtx ();
14349 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14350 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14351 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14352 emit_block_move_via_libcall (dst, src, count_exp, false);
14353 emit_jump (jump_around_label);
14354 emit_label (hot_label);
14355 }
14356
14357 /* Step 2: Alignment prologue. */
14358
14359 if (desired_align > align)
14360 {
14361 /* Except for the first move in epilogue, we no longer know
14362 constant offset in aliasing info. It don't seems to worth
14363 the pain to maintain it for the first move, so throw away
14364 the info early. */
14365 src = change_address (src, BLKmode, srcreg);
14366 dst = change_address (dst, BLKmode, destreg);
14367 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14368 desired_align);
14369 }
14370 if (label && size_needed == 1)
14371 {
14372 emit_label (label);
14373 LABEL_NUSES (label) = 1;
14374 label = NULL;
14375 }
14376
14377 /* Step 3: Main loop. */
14378
14379 switch (alg)
14380 {
14381 case libcall:
14382 case no_stringop:
14383 gcc_unreachable ();
14384 case loop_1_byte:
14385 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14386 count_exp, QImode, 1, expected_size);
14387 break;
14388 case loop:
14389 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14390 count_exp, Pmode, 1, expected_size);
14391 break;
14392 case unrolled_loop:
14393 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14394 registers for 4 temporaries anyway. */
14395 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14396 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14397 expected_size);
14398 break;
14399 case rep_prefix_8_byte:
14400 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14401 DImode);
14402 break;
14403 case rep_prefix_4_byte:
14404 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14405 SImode);
14406 break;
14407 case rep_prefix_1_byte:
14408 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14409 QImode);
14410 break;
14411 }
14412 /* Adjust properly the offset of src and dest memory for aliasing. */
14413 if (CONST_INT_P (count_exp))
14414 {
14415 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14416 (count / size_needed) * size_needed);
14417 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14418 (count / size_needed) * size_needed);
14419 }
14420 else
14421 {
14422 src = change_address (src, BLKmode, srcreg);
14423 dst = change_address (dst, BLKmode, destreg);
14424 }
14425
14426 /* Step 4: Epilogue to copy the remaining bytes. */
14427
14428 if (label)
14429 {
14430 /* When the main loop is done, COUNT_EXP might hold original count,
14431 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14432 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14433 bytes. Compensate if needed. */
14434
14435 if (size_needed < epilogue_size_needed)
14436 {
14437 tmp =
14438 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14439 GEN_INT (size_needed - 1), count_exp, 1,
14440 OPTAB_DIRECT);
14441 if (tmp != count_exp)
14442 emit_move_insn (count_exp, tmp);
14443 }
14444 emit_label (label);
14445 LABEL_NUSES (label) = 1;
14446 }
14447
14448 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14449 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14450 epilogue_size_needed);
14451 if (jump_around_label)
14452 emit_label (jump_around_label);
14453 return 1;
14454 }
14455
14456 /* Helper function for memcpy. For QImode value 0xXY produce
14457 0xXYXYXYXY of wide specified by MODE. This is essentially
14458 a * 0x10101010, but we can do slightly better than
14459 synth_mult by unwinding the sequence by hand on CPUs with
14460 slow multiply. */
14461 static rtx
14462 promote_duplicated_reg (enum machine_mode mode, rtx val)
14463 {
14464 enum machine_mode valmode = GET_MODE (val);
14465 rtx tmp;
14466 int nops = mode == DImode ? 3 : 2;
14467
14468 gcc_assert (mode == SImode || mode == DImode);
14469 if (val == const0_rtx)
14470 return copy_to_mode_reg (mode, const0_rtx);
14471 if (CONST_INT_P (val))
14472 {
14473 HOST_WIDE_INT v = INTVAL (val) & 255;
14474
14475 v |= v << 8;
14476 v |= v << 16;
14477 if (mode == DImode)
14478 v |= (v << 16) << 16;
14479 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14480 }
14481
14482 if (valmode == VOIDmode)
14483 valmode = QImode;
14484 if (valmode != QImode)
14485 val = gen_lowpart (QImode, val);
14486 if (mode == QImode)
14487 return val;
14488 if (!TARGET_PARTIAL_REG_STALL)
14489 nops--;
14490 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14491 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14492 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14493 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14494 {
14495 rtx reg = convert_modes (mode, QImode, val, true);
14496 tmp = promote_duplicated_reg (mode, const1_rtx);
14497 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14498 OPTAB_DIRECT);
14499 }
14500 else
14501 {
14502 rtx reg = convert_modes (mode, QImode, val, true);
14503
14504 if (!TARGET_PARTIAL_REG_STALL)
14505 if (mode == SImode)
14506 emit_insn (gen_movsi_insv_1 (reg, reg));
14507 else
14508 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14509 else
14510 {
14511 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14512 NULL, 1, OPTAB_DIRECT);
14513 reg =
14514 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14515 }
14516 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14517 NULL, 1, OPTAB_DIRECT);
14518 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14519 if (mode == SImode)
14520 return reg;
14521 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14522 NULL, 1, OPTAB_DIRECT);
14523 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14524 return reg;
14525 }
14526 }
14527
14528 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14529 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14530 alignment from ALIGN to DESIRED_ALIGN. */
14531 static rtx
14532 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14533 {
14534 rtx promoted_val;
14535
14536 if (TARGET_64BIT
14537 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14538 promoted_val = promote_duplicated_reg (DImode, val);
14539 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14540 promoted_val = promote_duplicated_reg (SImode, val);
14541 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14542 promoted_val = promote_duplicated_reg (HImode, val);
14543 else
14544 promoted_val = val;
14545
14546 return promoted_val;
14547 }
14548
14549 /* Expand string clear operation (bzero). Use i386 string operations when
14550 profitable. See expand_movmem comment for explanation of individual
14551 steps performed. */
14552 int
14553 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14554 rtx expected_align_exp, rtx expected_size_exp)
14555 {
14556 rtx destreg;
14557 rtx label = NULL;
14558 rtx tmp;
14559 rtx jump_around_label = NULL;
14560 HOST_WIDE_INT align = 1;
14561 unsigned HOST_WIDE_INT count = 0;
14562 HOST_WIDE_INT expected_size = -1;
14563 int size_needed = 0, epilogue_size_needed;
14564 int desired_align = 0;
14565 enum stringop_alg alg;
14566 rtx promoted_val = NULL;
14567 bool force_loopy_epilogue = false;
14568 int dynamic_check;
14569
14570 if (CONST_INT_P (align_exp))
14571 align = INTVAL (align_exp);
14572 /* i386 can do misaligned access on reasonably increased cost. */
14573 if (CONST_INT_P (expected_align_exp)
14574 && INTVAL (expected_align_exp) > align)
14575 align = INTVAL (expected_align_exp);
14576 if (CONST_INT_P (count_exp))
14577 count = expected_size = INTVAL (count_exp);
14578 if (CONST_INT_P (expected_size_exp) && count == 0)
14579 expected_size = INTVAL (expected_size_exp);
14580
14581 /* Step 0: Decide on preferred algorithm, desired alignment and
14582 size of chunks to be copied by main loop. */
14583
14584 alg = decide_alg (count, expected_size, true, &dynamic_check);
14585 desired_align = decide_alignment (align, alg, expected_size);
14586
14587 if (!TARGET_ALIGN_STRINGOPS)
14588 align = desired_align;
14589
14590 if (alg == libcall)
14591 return 0;
14592 gcc_assert (alg != no_stringop);
14593 if (!count)
14594 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14595 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14596 switch (alg)
14597 {
14598 case libcall:
14599 case no_stringop:
14600 gcc_unreachable ();
14601 case loop:
14602 size_needed = GET_MODE_SIZE (Pmode);
14603 break;
14604 case unrolled_loop:
14605 size_needed = GET_MODE_SIZE (Pmode) * 4;
14606 break;
14607 case rep_prefix_8_byte:
14608 size_needed = 8;
14609 break;
14610 case rep_prefix_4_byte:
14611 size_needed = 4;
14612 break;
14613 case rep_prefix_1_byte:
14614 case loop_1_byte:
14615 size_needed = 1;
14616 break;
14617 }
14618 epilogue_size_needed = size_needed;
14619
14620 /* Step 1: Prologue guard. */
14621
14622 /* Alignment code needs count to be in register. */
14623 if (CONST_INT_P (count_exp) && desired_align > align)
14624 {
14625 enum machine_mode mode = SImode;
14626 if (TARGET_64BIT && (count & ~0xffffffff))
14627 mode = DImode;
14628 count_exp = force_reg (mode, count_exp);
14629 }
14630 /* Do the cheap promotion to allow better CSE across the
14631 main loop and epilogue (ie one load of the big constant in the
14632 front of all code. */
14633 if (CONST_INT_P (val_exp))
14634 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14635 desired_align, align);
14636 /* Ensure that alignment prologue won't copy past end of block. */
14637 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14638 {
14639 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14640 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14641 Make sure it is power of 2. */
14642 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14643
14644 /* To improve performance of small blocks, we jump around the VAL
14645 promoting mode. This mean that if the promoted VAL is not constant,
14646 we might not use it in the epilogue and have to use byte
14647 loop variant. */
14648 if (epilogue_size_needed > 2 && !promoted_val)
14649 force_loopy_epilogue = true;
14650 label = gen_label_rtx ();
14651 emit_cmp_and_jump_insns (count_exp,
14652 GEN_INT (epilogue_size_needed),
14653 LTU, 0, counter_mode (count_exp), 1, label);
14654 if (GET_CODE (count_exp) == CONST_INT)
14655 ;
14656 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14657 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14658 else
14659 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14660 }
14661 if (dynamic_check != -1)
14662 {
14663 rtx hot_label = gen_label_rtx ();
14664 jump_around_label = gen_label_rtx ();
14665 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14666 LEU, 0, counter_mode (count_exp), 1, hot_label);
14667 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14668 set_storage_via_libcall (dst, count_exp, val_exp, false);
14669 emit_jump (jump_around_label);
14670 emit_label (hot_label);
14671 }
14672
14673 /* Step 2: Alignment prologue. */
14674
14675 /* Do the expensive promotion once we branched off the small blocks. */
14676 if (!promoted_val)
14677 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14678 desired_align, align);
14679 gcc_assert (desired_align >= 1 && align >= 1);
14680
14681 if (desired_align > align)
14682 {
14683 /* Except for the first move in epilogue, we no longer know
14684 constant offset in aliasing info. It don't seems to worth
14685 the pain to maintain it for the first move, so throw away
14686 the info early. */
14687 dst = change_address (dst, BLKmode, destreg);
14688 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14689 desired_align);
14690 }
14691 if (label && size_needed == 1)
14692 {
14693 emit_label (label);
14694 LABEL_NUSES (label) = 1;
14695 label = NULL;
14696 }
14697
14698 /* Step 3: Main loop. */
14699
14700 switch (alg)
14701 {
14702 case libcall:
14703 case no_stringop:
14704 gcc_unreachable ();
14705 case loop_1_byte:
14706 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14707 count_exp, QImode, 1, expected_size);
14708 break;
14709 case loop:
14710 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14711 count_exp, Pmode, 1, expected_size);
14712 break;
14713 case unrolled_loop:
14714 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14715 count_exp, Pmode, 4, expected_size);
14716 break;
14717 case rep_prefix_8_byte:
14718 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14719 DImode);
14720 break;
14721 case rep_prefix_4_byte:
14722 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14723 SImode);
14724 break;
14725 case rep_prefix_1_byte:
14726 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14727 QImode);
14728 break;
14729 }
14730 /* Adjust properly the offset of src and dest memory for aliasing. */
14731 if (CONST_INT_P (count_exp))
14732 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14733 (count / size_needed) * size_needed);
14734 else
14735 dst = change_address (dst, BLKmode, destreg);
14736
14737 /* Step 4: Epilogue to copy the remaining bytes. */
14738
14739 if (label)
14740 {
14741 /* When the main loop is done, COUNT_EXP might hold original count,
14742 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14743 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14744 bytes. Compensate if needed. */
14745
14746 if (size_needed < desired_align - align)
14747 {
14748 tmp =
14749 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14750 GEN_INT (size_needed - 1), count_exp, 1,
14751 OPTAB_DIRECT);
14752 size_needed = desired_align - align + 1;
14753 if (tmp != count_exp)
14754 emit_move_insn (count_exp, tmp);
14755 }
14756 emit_label (label);
14757 LABEL_NUSES (label) = 1;
14758 }
14759 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14760 {
14761 if (force_loopy_epilogue)
14762 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14763 size_needed);
14764 else
14765 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14766 size_needed);
14767 }
14768 if (jump_around_label)
14769 emit_label (jump_around_label);
14770 return 1;
14771 }
14772
14773 /* Expand strlen. */
14774 int
14775 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14776 {
14777 rtx addr, scratch1, scratch2, scratch3, scratch4;
14778
14779 /* The generic case of strlen expander is long. Avoid it's
14780 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14781
14782 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14783 && !TARGET_INLINE_ALL_STRINGOPS
14784 && !optimize_size
14785 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14786 return 0;
14787
14788 addr = force_reg (Pmode, XEXP (src, 0));
14789 scratch1 = gen_reg_rtx (Pmode);
14790
14791 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14792 && !optimize_size)
14793 {
14794 /* Well it seems that some optimizer does not combine a call like
14795 foo(strlen(bar), strlen(bar));
14796 when the move and the subtraction is done here. It does calculate
14797 the length just once when these instructions are done inside of
14798 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14799 often used and I use one fewer register for the lifetime of
14800 output_strlen_unroll() this is better. */
14801
14802 emit_move_insn (out, addr);
14803
14804 ix86_expand_strlensi_unroll_1 (out, src, align);
14805
14806 /* strlensi_unroll_1 returns the address of the zero at the end of
14807 the string, like memchr(), so compute the length by subtracting
14808 the start address. */
14809 if (TARGET_64BIT)
14810 emit_insn (gen_subdi3 (out, out, addr));
14811 else
14812 emit_insn (gen_subsi3 (out, out, addr));
14813 }
14814 else
14815 {
14816 rtx unspec;
14817 scratch2 = gen_reg_rtx (Pmode);
14818 scratch3 = gen_reg_rtx (Pmode);
14819 scratch4 = force_reg (Pmode, constm1_rtx);
14820
14821 emit_move_insn (scratch3, addr);
14822 eoschar = force_reg (QImode, eoschar);
14823
14824 src = replace_equiv_address_nv (src, scratch3);
14825
14826 /* If .md starts supporting :P, this can be done in .md. */
14827 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14828 scratch4), UNSPEC_SCAS);
14829 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14830 if (TARGET_64BIT)
14831 {
14832 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14833 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14834 }
14835 else
14836 {
14837 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14838 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14839 }
14840 }
14841 return 1;
14842 }
14843
14844 /* Expand the appropriate insns for doing strlen if not just doing
14845 repnz; scasb
14846
14847 out = result, initialized with the start address
14848 align_rtx = alignment of the address.
14849 scratch = scratch register, initialized with the startaddress when
14850 not aligned, otherwise undefined
14851
14852 This is just the body. It needs the initializations mentioned above and
14853 some address computing at the end. These things are done in i386.md. */
14854
14855 static void
14856 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14857 {
14858 int align;
14859 rtx tmp;
14860 rtx align_2_label = NULL_RTX;
14861 rtx align_3_label = NULL_RTX;
14862 rtx align_4_label = gen_label_rtx ();
14863 rtx end_0_label = gen_label_rtx ();
14864 rtx mem;
14865 rtx tmpreg = gen_reg_rtx (SImode);
14866 rtx scratch = gen_reg_rtx (SImode);
14867 rtx cmp;
14868
14869 align = 0;
14870 if (CONST_INT_P (align_rtx))
14871 align = INTVAL (align_rtx);
14872
14873 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14874
14875 /* Is there a known alignment and is it less than 4? */
14876 if (align < 4)
14877 {
14878 rtx scratch1 = gen_reg_rtx (Pmode);
14879 emit_move_insn (scratch1, out);
14880 /* Is there a known alignment and is it not 2? */
14881 if (align != 2)
14882 {
14883 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14884 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14885
14886 /* Leave just the 3 lower bits. */
14887 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14888 NULL_RTX, 0, OPTAB_WIDEN);
14889
14890 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14891 Pmode, 1, align_4_label);
14892 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14893 Pmode, 1, align_2_label);
14894 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14895 Pmode, 1, align_3_label);
14896 }
14897 else
14898 {
14899 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14900 check if is aligned to 4 - byte. */
14901
14902 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14903 NULL_RTX, 0, OPTAB_WIDEN);
14904
14905 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14906 Pmode, 1, align_4_label);
14907 }
14908
14909 mem = change_address (src, QImode, out);
14910
14911 /* Now compare the bytes. */
14912
14913 /* Compare the first n unaligned byte on a byte per byte basis. */
14914 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14915 QImode, 1, end_0_label);
14916
14917 /* Increment the address. */
14918 if (TARGET_64BIT)
14919 emit_insn (gen_adddi3 (out, out, const1_rtx));
14920 else
14921 emit_insn (gen_addsi3 (out, out, const1_rtx));
14922
14923 /* Not needed with an alignment of 2 */
14924 if (align != 2)
14925 {
14926 emit_label (align_2_label);
14927
14928 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14929 end_0_label);
14930
14931 if (TARGET_64BIT)
14932 emit_insn (gen_adddi3 (out, out, const1_rtx));
14933 else
14934 emit_insn (gen_addsi3 (out, out, const1_rtx));
14935
14936 emit_label (align_3_label);
14937 }
14938
14939 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14940 end_0_label);
14941
14942 if (TARGET_64BIT)
14943 emit_insn (gen_adddi3 (out, out, const1_rtx));
14944 else
14945 emit_insn (gen_addsi3 (out, out, const1_rtx));
14946 }
14947
14948 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14949 align this loop. It gives only huge programs, but does not help to
14950 speed up. */
14951 emit_label (align_4_label);
14952
14953 mem = change_address (src, SImode, out);
14954 emit_move_insn (scratch, mem);
14955 if (TARGET_64BIT)
14956 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14957 else
14958 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14959
14960 /* This formula yields a nonzero result iff one of the bytes is zero.
14961 This saves three branches inside loop and many cycles. */
14962
14963 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14964 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14965 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14966 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14967 gen_int_mode (0x80808080, SImode)));
14968 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14969 align_4_label);
14970
14971 if (TARGET_CMOVE)
14972 {
14973 rtx reg = gen_reg_rtx (SImode);
14974 rtx reg2 = gen_reg_rtx (Pmode);
14975 emit_move_insn (reg, tmpreg);
14976 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14977
14978 /* If zero is not in the first two bytes, move two bytes forward. */
14979 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14980 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14981 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14982 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14983 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14984 reg,
14985 tmpreg)));
14986 /* Emit lea manually to avoid clobbering of flags. */
14987 emit_insn (gen_rtx_SET (SImode, reg2,
14988 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14989
14990 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14991 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14992 emit_insn (gen_rtx_SET (VOIDmode, out,
14993 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14994 reg2,
14995 out)));
14996
14997 }
14998 else
14999 {
15000 rtx end_2_label = gen_label_rtx ();
15001 /* Is zero in the first two bytes? */
15002
15003 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15004 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15005 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15006 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15007 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15008 pc_rtx);
15009 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15010 JUMP_LABEL (tmp) = end_2_label;
15011
15012 /* Not in the first two. Move two bytes forward. */
15013 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15014 if (TARGET_64BIT)
15015 emit_insn (gen_adddi3 (out, out, const2_rtx));
15016 else
15017 emit_insn (gen_addsi3 (out, out, const2_rtx));
15018
15019 emit_label (end_2_label);
15020
15021 }
15022
15023 /* Avoid branch in fixing the byte. */
15024 tmpreg = gen_lowpart (QImode, tmpreg);
15025 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15026 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15027 if (TARGET_64BIT)
15028 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15029 else
15030 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15031
15032 emit_label (end_0_label);
15033 }
15034
15035 /* For given symbol (function) construct code to compute address of it's PLT
15036 entry in large x86-64 PIC model. */
15037 rtx
15038 construct_plt_address (rtx symbol)
15039 {
15040 rtx tmp = gen_reg_rtx (Pmode);
15041 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15042
15043 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15044 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15045
15046 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15047 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15048 return tmp;
15049 }
15050
15051 void
15052 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15053 rtx callarg2 ATTRIBUTE_UNUSED,
15054 rtx pop, int sibcall)
15055 {
15056 rtx use = NULL, call;
15057
15058 if (pop == const0_rtx)
15059 pop = NULL;
15060 gcc_assert (!TARGET_64BIT || !pop);
15061
15062 if (TARGET_MACHO && !TARGET_64BIT)
15063 {
15064 #if TARGET_MACHO
15065 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15066 fnaddr = machopic_indirect_call_target (fnaddr);
15067 #endif
15068 }
15069 else
15070 {
15071 /* Static functions and indirect calls don't need the pic register. */
15072 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15073 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15074 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15075 use_reg (&use, pic_offset_table_rtx);
15076 }
15077
15078 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15079 {
15080 rtx al = gen_rtx_REG (QImode, 0);
15081 emit_move_insn (al, callarg2);
15082 use_reg (&use, al);
15083 }
15084
15085 if (ix86_cmodel == CM_LARGE_PIC
15086 && GET_CODE (fnaddr) == MEM
15087 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15088 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15089 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15090 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15091 {
15092 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15093 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15094 }
15095 if (sibcall && TARGET_64BIT
15096 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15097 {
15098 rtx addr;
15099 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15100 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15101 emit_move_insn (fnaddr, addr);
15102 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15103 }
15104
15105 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15106 if (retval)
15107 call = gen_rtx_SET (VOIDmode, retval, call);
15108 if (pop)
15109 {
15110 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15111 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15112 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15113 }
15114
15115 call = emit_call_insn (call);
15116 if (use)
15117 CALL_INSN_FUNCTION_USAGE (call) = use;
15118 }
15119
15120 \f
15121 /* Clear stack slot assignments remembered from previous functions.
15122 This is called from INIT_EXPANDERS once before RTL is emitted for each
15123 function. */
15124
15125 static struct machine_function *
15126 ix86_init_machine_status (void)
15127 {
15128 struct machine_function *f;
15129
15130 f = ggc_alloc_cleared (sizeof (struct machine_function));
15131 f->use_fast_prologue_epilogue_nregs = -1;
15132 f->tls_descriptor_call_expanded_p = 0;
15133
15134 return f;
15135 }
15136
15137 /* Return a MEM corresponding to a stack slot with mode MODE.
15138 Allocate a new slot if necessary.
15139
15140 The RTL for a function can have several slots available: N is
15141 which slot to use. */
15142
15143 rtx
15144 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15145 {
15146 struct stack_local_entry *s;
15147
15148 gcc_assert (n < MAX_386_STACK_LOCALS);
15149
15150 for (s = ix86_stack_locals; s; s = s->next)
15151 if (s->mode == mode && s->n == n)
15152 return copy_rtx (s->rtl);
15153
15154 s = (struct stack_local_entry *)
15155 ggc_alloc (sizeof (struct stack_local_entry));
15156 s->n = n;
15157 s->mode = mode;
15158 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15159
15160 s->next = ix86_stack_locals;
15161 ix86_stack_locals = s;
15162 return s->rtl;
15163 }
15164
15165 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15166
15167 static GTY(()) rtx ix86_tls_symbol;
15168 rtx
15169 ix86_tls_get_addr (void)
15170 {
15171
15172 if (!ix86_tls_symbol)
15173 {
15174 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15175 (TARGET_ANY_GNU_TLS
15176 && !TARGET_64BIT)
15177 ? "___tls_get_addr"
15178 : "__tls_get_addr");
15179 }
15180
15181 return ix86_tls_symbol;
15182 }
15183
15184 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15185
15186 static GTY(()) rtx ix86_tls_module_base_symbol;
15187 rtx
15188 ix86_tls_module_base (void)
15189 {
15190
15191 if (!ix86_tls_module_base_symbol)
15192 {
15193 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15194 "_TLS_MODULE_BASE_");
15195 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15196 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15197 }
15198
15199 return ix86_tls_module_base_symbol;
15200 }
15201 \f
15202 /* Calculate the length of the memory address in the instruction
15203 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15204
15205 int
15206 memory_address_length (rtx addr)
15207 {
15208 struct ix86_address parts;
15209 rtx base, index, disp;
15210 int len;
15211 int ok;
15212
15213 if (GET_CODE (addr) == PRE_DEC
15214 || GET_CODE (addr) == POST_INC
15215 || GET_CODE (addr) == PRE_MODIFY
15216 || GET_CODE (addr) == POST_MODIFY)
15217 return 0;
15218
15219 ok = ix86_decompose_address (addr, &parts);
15220 gcc_assert (ok);
15221
15222 if (parts.base && GET_CODE (parts.base) == SUBREG)
15223 parts.base = SUBREG_REG (parts.base);
15224 if (parts.index && GET_CODE (parts.index) == SUBREG)
15225 parts.index = SUBREG_REG (parts.index);
15226
15227 base = parts.base;
15228 index = parts.index;
15229 disp = parts.disp;
15230 len = 0;
15231
15232 /* Rule of thumb:
15233 - esp as the base always wants an index,
15234 - ebp as the base always wants a displacement. */
15235
15236 /* Register Indirect. */
15237 if (base && !index && !disp)
15238 {
15239 /* esp (for its index) and ebp (for its displacement) need
15240 the two-byte modrm form. */
15241 if (addr == stack_pointer_rtx
15242 || addr == arg_pointer_rtx
15243 || addr == frame_pointer_rtx
15244 || addr == hard_frame_pointer_rtx)
15245 len = 1;
15246 }
15247
15248 /* Direct Addressing. */
15249 else if (disp && !base && !index)
15250 len = 4;
15251
15252 else
15253 {
15254 /* Find the length of the displacement constant. */
15255 if (disp)
15256 {
15257 if (base && satisfies_constraint_K (disp))
15258 len = 1;
15259 else
15260 len = 4;
15261 }
15262 /* ebp always wants a displacement. */
15263 else if (base == hard_frame_pointer_rtx)
15264 len = 1;
15265
15266 /* An index requires the two-byte modrm form.... */
15267 if (index
15268 /* ...like esp, which always wants an index. */
15269 || base == stack_pointer_rtx
15270 || base == arg_pointer_rtx
15271 || base == frame_pointer_rtx)
15272 len += 1;
15273 }
15274
15275 return len;
15276 }
15277
15278 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15279 is set, expect that insn have 8bit immediate alternative. */
15280 int
15281 ix86_attr_length_immediate_default (rtx insn, int shortform)
15282 {
15283 int len = 0;
15284 int i;
15285 extract_insn_cached (insn);
15286 for (i = recog_data.n_operands - 1; i >= 0; --i)
15287 if (CONSTANT_P (recog_data.operand[i]))
15288 {
15289 gcc_assert (!len);
15290 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15291 len = 1;
15292 else
15293 {
15294 switch (get_attr_mode (insn))
15295 {
15296 case MODE_QI:
15297 len+=1;
15298 break;
15299 case MODE_HI:
15300 len+=2;
15301 break;
15302 case MODE_SI:
15303 len+=4;
15304 break;
15305 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15306 case MODE_DI:
15307 len+=4;
15308 break;
15309 default:
15310 fatal_insn ("unknown insn mode", insn);
15311 }
15312 }
15313 }
15314 return len;
15315 }
15316 /* Compute default value for "length_address" attribute. */
15317 int
15318 ix86_attr_length_address_default (rtx insn)
15319 {
15320 int i;
15321
15322 if (get_attr_type (insn) == TYPE_LEA)
15323 {
15324 rtx set = PATTERN (insn);
15325
15326 if (GET_CODE (set) == PARALLEL)
15327 set = XVECEXP (set, 0, 0);
15328
15329 gcc_assert (GET_CODE (set) == SET);
15330
15331 return memory_address_length (SET_SRC (set));
15332 }
15333
15334 extract_insn_cached (insn);
15335 for (i = recog_data.n_operands - 1; i >= 0; --i)
15336 if (MEM_P (recog_data.operand[i]))
15337 {
15338 return memory_address_length (XEXP (recog_data.operand[i], 0));
15339 break;
15340 }
15341 return 0;
15342 }
15343 \f
15344 /* Return the maximum number of instructions a cpu can issue. */
15345
15346 static int
15347 ix86_issue_rate (void)
15348 {
15349 switch (ix86_tune)
15350 {
15351 case PROCESSOR_PENTIUM:
15352 case PROCESSOR_K6:
15353 return 2;
15354
15355 case PROCESSOR_PENTIUMPRO:
15356 case PROCESSOR_PENTIUM4:
15357 case PROCESSOR_ATHLON:
15358 case PROCESSOR_K8:
15359 case PROCESSOR_AMDFAM10:
15360 case PROCESSOR_NOCONA:
15361 case PROCESSOR_GENERIC32:
15362 case PROCESSOR_GENERIC64:
15363 return 3;
15364
15365 case PROCESSOR_CORE2:
15366 return 4;
15367
15368 default:
15369 return 1;
15370 }
15371 }
15372
15373 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15374 by DEP_INSN and nothing set by DEP_INSN. */
15375
15376 static int
15377 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15378 {
15379 rtx set, set2;
15380
15381 /* Simplify the test for uninteresting insns. */
15382 if (insn_type != TYPE_SETCC
15383 && insn_type != TYPE_ICMOV
15384 && insn_type != TYPE_FCMOV
15385 && insn_type != TYPE_IBR)
15386 return 0;
15387
15388 if ((set = single_set (dep_insn)) != 0)
15389 {
15390 set = SET_DEST (set);
15391 set2 = NULL_RTX;
15392 }
15393 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15394 && XVECLEN (PATTERN (dep_insn), 0) == 2
15395 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15396 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15397 {
15398 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15399 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15400 }
15401 else
15402 return 0;
15403
15404 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15405 return 0;
15406
15407 /* This test is true if the dependent insn reads the flags but
15408 not any other potentially set register. */
15409 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15410 return 0;
15411
15412 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15413 return 0;
15414
15415 return 1;
15416 }
15417
15418 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15419 address with operands set by DEP_INSN. */
15420
15421 static int
15422 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15423 {
15424 rtx addr;
15425
15426 if (insn_type == TYPE_LEA
15427 && TARGET_PENTIUM)
15428 {
15429 addr = PATTERN (insn);
15430
15431 if (GET_CODE (addr) == PARALLEL)
15432 addr = XVECEXP (addr, 0, 0);
15433
15434 gcc_assert (GET_CODE (addr) == SET);
15435
15436 addr = SET_SRC (addr);
15437 }
15438 else
15439 {
15440 int i;
15441 extract_insn_cached (insn);
15442 for (i = recog_data.n_operands - 1; i >= 0; --i)
15443 if (MEM_P (recog_data.operand[i]))
15444 {
15445 addr = XEXP (recog_data.operand[i], 0);
15446 goto found;
15447 }
15448 return 0;
15449 found:;
15450 }
15451
15452 return modified_in_p (addr, dep_insn);
15453 }
15454
15455 static int
15456 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15457 {
15458 enum attr_type insn_type, dep_insn_type;
15459 enum attr_memory memory;
15460 rtx set, set2;
15461 int dep_insn_code_number;
15462
15463 /* Anti and output dependencies have zero cost on all CPUs. */
15464 if (REG_NOTE_KIND (link) != 0)
15465 return 0;
15466
15467 dep_insn_code_number = recog_memoized (dep_insn);
15468
15469 /* If we can't recognize the insns, we can't really do anything. */
15470 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15471 return cost;
15472
15473 insn_type = get_attr_type (insn);
15474 dep_insn_type = get_attr_type (dep_insn);
15475
15476 switch (ix86_tune)
15477 {
15478 case PROCESSOR_PENTIUM:
15479 /* Address Generation Interlock adds a cycle of latency. */
15480 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15481 cost += 1;
15482
15483 /* ??? Compares pair with jump/setcc. */
15484 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15485 cost = 0;
15486
15487 /* Floating point stores require value to be ready one cycle earlier. */
15488 if (insn_type == TYPE_FMOV
15489 && get_attr_memory (insn) == MEMORY_STORE
15490 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15491 cost += 1;
15492 break;
15493
15494 case PROCESSOR_PENTIUMPRO:
15495 memory = get_attr_memory (insn);
15496
15497 /* INT->FP conversion is expensive. */
15498 if (get_attr_fp_int_src (dep_insn))
15499 cost += 5;
15500
15501 /* There is one cycle extra latency between an FP op and a store. */
15502 if (insn_type == TYPE_FMOV
15503 && (set = single_set (dep_insn)) != NULL_RTX
15504 && (set2 = single_set (insn)) != NULL_RTX
15505 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15506 && MEM_P (SET_DEST (set2)))
15507 cost += 1;
15508
15509 /* Show ability of reorder buffer to hide latency of load by executing
15510 in parallel with previous instruction in case
15511 previous instruction is not needed to compute the address. */
15512 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15513 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15514 {
15515 /* Claim moves to take one cycle, as core can issue one load
15516 at time and the next load can start cycle later. */
15517 if (dep_insn_type == TYPE_IMOV
15518 || dep_insn_type == TYPE_FMOV)
15519 cost = 1;
15520 else if (cost > 1)
15521 cost--;
15522 }
15523 break;
15524
15525 case PROCESSOR_K6:
15526 memory = get_attr_memory (insn);
15527
15528 /* The esp dependency is resolved before the instruction is really
15529 finished. */
15530 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15531 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15532 return 1;
15533
15534 /* INT->FP conversion is expensive. */
15535 if (get_attr_fp_int_src (dep_insn))
15536 cost += 5;
15537
15538 /* Show ability of reorder buffer to hide latency of load by executing
15539 in parallel with previous instruction in case
15540 previous instruction is not needed to compute the address. */
15541 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15542 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15543 {
15544 /* Claim moves to take one cycle, as core can issue one load
15545 at time and the next load can start cycle later. */
15546 if (dep_insn_type == TYPE_IMOV
15547 || dep_insn_type == TYPE_FMOV)
15548 cost = 1;
15549 else if (cost > 2)
15550 cost -= 2;
15551 else
15552 cost = 1;
15553 }
15554 break;
15555
15556 case PROCESSOR_ATHLON:
15557 case PROCESSOR_K8:
15558 case PROCESSOR_AMDFAM10:
15559 case PROCESSOR_GENERIC32:
15560 case PROCESSOR_GENERIC64:
15561 memory = get_attr_memory (insn);
15562
15563 /* Show ability of reorder buffer to hide latency of load by executing
15564 in parallel with previous instruction in case
15565 previous instruction is not needed to compute the address. */
15566 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15567 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15568 {
15569 enum attr_unit unit = get_attr_unit (insn);
15570 int loadcost = 3;
15571
15572 /* Because of the difference between the length of integer and
15573 floating unit pipeline preparation stages, the memory operands
15574 for floating point are cheaper.
15575
15576 ??? For Athlon it the difference is most probably 2. */
15577 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15578 loadcost = 3;
15579 else
15580 loadcost = TARGET_ATHLON ? 2 : 0;
15581
15582 if (cost >= loadcost)
15583 cost -= loadcost;
15584 else
15585 cost = 0;
15586 }
15587
15588 default:
15589 break;
15590 }
15591
15592 return cost;
15593 }
15594
15595 /* How many alternative schedules to try. This should be as wide as the
15596 scheduling freedom in the DFA, but no wider. Making this value too
15597 large results extra work for the scheduler. */
15598
15599 static int
15600 ia32_multipass_dfa_lookahead (void)
15601 {
15602 if (ix86_tune == PROCESSOR_PENTIUM)
15603 return 2;
15604
15605 if (ix86_tune == PROCESSOR_PENTIUMPRO
15606 || ix86_tune == PROCESSOR_K6)
15607 return 1;
15608
15609 else
15610 return 0;
15611 }
15612
15613 \f
15614 /* Compute the alignment given to a constant that is being placed in memory.
15615 EXP is the constant and ALIGN is the alignment that the object would
15616 ordinarily have.
15617 The value of this function is used instead of that alignment to align
15618 the object. */
15619
15620 int
15621 ix86_constant_alignment (tree exp, int align)
15622 {
15623 if (TREE_CODE (exp) == REAL_CST)
15624 {
15625 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15626 return 64;
15627 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15628 return 128;
15629 }
15630 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15631 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15632 return BITS_PER_WORD;
15633
15634 return align;
15635 }
15636
15637 /* Compute the alignment for a static variable.
15638 TYPE is the data type, and ALIGN is the alignment that
15639 the object would ordinarily have. The value of this function is used
15640 instead of that alignment to align the object. */
15641
15642 int
15643 ix86_data_alignment (tree type, int align)
15644 {
15645 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15646
15647 if (AGGREGATE_TYPE_P (type)
15648 && TYPE_SIZE (type)
15649 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15650 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15651 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15652 && align < max_align)
15653 align = max_align;
15654
15655 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15656 to 16byte boundary. */
15657 if (TARGET_64BIT)
15658 {
15659 if (AGGREGATE_TYPE_P (type)
15660 && TYPE_SIZE (type)
15661 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15662 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15663 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15664 return 128;
15665 }
15666
15667 if (TREE_CODE (type) == ARRAY_TYPE)
15668 {
15669 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15670 return 64;
15671 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15672 return 128;
15673 }
15674 else if (TREE_CODE (type) == COMPLEX_TYPE)
15675 {
15676
15677 if (TYPE_MODE (type) == DCmode && align < 64)
15678 return 64;
15679 if (TYPE_MODE (type) == XCmode && align < 128)
15680 return 128;
15681 }
15682 else if ((TREE_CODE (type) == RECORD_TYPE
15683 || TREE_CODE (type) == UNION_TYPE
15684 || TREE_CODE (type) == QUAL_UNION_TYPE)
15685 && TYPE_FIELDS (type))
15686 {
15687 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15688 return 64;
15689 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15690 return 128;
15691 }
15692 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15693 || TREE_CODE (type) == INTEGER_TYPE)
15694 {
15695 if (TYPE_MODE (type) == DFmode && align < 64)
15696 return 64;
15697 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15698 return 128;
15699 }
15700
15701 return align;
15702 }
15703
15704 /* Compute the alignment for a local variable.
15705 TYPE is the data type, and ALIGN is the alignment that
15706 the object would ordinarily have. The value of this macro is used
15707 instead of that alignment to align the object. */
15708
15709 int
15710 ix86_local_alignment (tree type, int align)
15711 {
15712 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15713 to 16byte boundary. */
15714 if (TARGET_64BIT)
15715 {
15716 if (AGGREGATE_TYPE_P (type)
15717 && TYPE_SIZE (type)
15718 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15719 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15720 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15721 return 128;
15722 }
15723 if (TREE_CODE (type) == ARRAY_TYPE)
15724 {
15725 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15726 return 64;
15727 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15728 return 128;
15729 }
15730 else if (TREE_CODE (type) == COMPLEX_TYPE)
15731 {
15732 if (TYPE_MODE (type) == DCmode && align < 64)
15733 return 64;
15734 if (TYPE_MODE (type) == XCmode && align < 128)
15735 return 128;
15736 }
15737 else if ((TREE_CODE (type) == RECORD_TYPE
15738 || TREE_CODE (type) == UNION_TYPE
15739 || TREE_CODE (type) == QUAL_UNION_TYPE)
15740 && TYPE_FIELDS (type))
15741 {
15742 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15743 return 64;
15744 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15745 return 128;
15746 }
15747 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15748 || TREE_CODE (type) == INTEGER_TYPE)
15749 {
15750
15751 if (TYPE_MODE (type) == DFmode && align < 64)
15752 return 64;
15753 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15754 return 128;
15755 }
15756 return align;
15757 }
15758 \f
15759 /* Emit RTL insns to initialize the variable parts of a trampoline.
15760 FNADDR is an RTX for the address of the function's pure code.
15761 CXT is an RTX for the static chain value for the function. */
15762 void
15763 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15764 {
15765 if (!TARGET_64BIT)
15766 {
15767 /* Compute offset from the end of the jmp to the target function. */
15768 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15769 plus_constant (tramp, 10),
15770 NULL_RTX, 1, OPTAB_DIRECT);
15771 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15772 gen_int_mode (0xb9, QImode));
15773 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15774 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15775 gen_int_mode (0xe9, QImode));
15776 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15777 }
15778 else
15779 {
15780 int offset = 0;
15781 /* Try to load address using shorter movl instead of movabs.
15782 We may want to support movq for kernel mode, but kernel does not use
15783 trampolines at the moment. */
15784 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15785 {
15786 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15787 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15788 gen_int_mode (0xbb41, HImode));
15789 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15790 gen_lowpart (SImode, fnaddr));
15791 offset += 6;
15792 }
15793 else
15794 {
15795 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15796 gen_int_mode (0xbb49, HImode));
15797 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15798 fnaddr);
15799 offset += 10;
15800 }
15801 /* Load static chain using movabs to r10. */
15802 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15803 gen_int_mode (0xba49, HImode));
15804 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15805 cxt);
15806 offset += 10;
15807 /* Jump to the r11 */
15808 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15809 gen_int_mode (0xff49, HImode));
15810 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15811 gen_int_mode (0xe3, QImode));
15812 offset += 3;
15813 gcc_assert (offset <= TRAMPOLINE_SIZE);
15814 }
15815
15816 #ifdef ENABLE_EXECUTE_STACK
15817 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15818 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15819 #endif
15820 }
15821 \f
15822 /* Codes for all the SSE/MMX builtins. */
15823 enum ix86_builtins
15824 {
15825 IX86_BUILTIN_ADDPS,
15826 IX86_BUILTIN_ADDSS,
15827 IX86_BUILTIN_DIVPS,
15828 IX86_BUILTIN_DIVSS,
15829 IX86_BUILTIN_MULPS,
15830 IX86_BUILTIN_MULSS,
15831 IX86_BUILTIN_SUBPS,
15832 IX86_BUILTIN_SUBSS,
15833
15834 IX86_BUILTIN_CMPEQPS,
15835 IX86_BUILTIN_CMPLTPS,
15836 IX86_BUILTIN_CMPLEPS,
15837 IX86_BUILTIN_CMPGTPS,
15838 IX86_BUILTIN_CMPGEPS,
15839 IX86_BUILTIN_CMPNEQPS,
15840 IX86_BUILTIN_CMPNLTPS,
15841 IX86_BUILTIN_CMPNLEPS,
15842 IX86_BUILTIN_CMPNGTPS,
15843 IX86_BUILTIN_CMPNGEPS,
15844 IX86_BUILTIN_CMPORDPS,
15845 IX86_BUILTIN_CMPUNORDPS,
15846 IX86_BUILTIN_CMPEQSS,
15847 IX86_BUILTIN_CMPLTSS,
15848 IX86_BUILTIN_CMPLESS,
15849 IX86_BUILTIN_CMPNEQSS,
15850 IX86_BUILTIN_CMPNLTSS,
15851 IX86_BUILTIN_CMPNLESS,
15852 IX86_BUILTIN_CMPNGTSS,
15853 IX86_BUILTIN_CMPNGESS,
15854 IX86_BUILTIN_CMPORDSS,
15855 IX86_BUILTIN_CMPUNORDSS,
15856
15857 IX86_BUILTIN_COMIEQSS,
15858 IX86_BUILTIN_COMILTSS,
15859 IX86_BUILTIN_COMILESS,
15860 IX86_BUILTIN_COMIGTSS,
15861 IX86_BUILTIN_COMIGESS,
15862 IX86_BUILTIN_COMINEQSS,
15863 IX86_BUILTIN_UCOMIEQSS,
15864 IX86_BUILTIN_UCOMILTSS,
15865 IX86_BUILTIN_UCOMILESS,
15866 IX86_BUILTIN_UCOMIGTSS,
15867 IX86_BUILTIN_UCOMIGESS,
15868 IX86_BUILTIN_UCOMINEQSS,
15869
15870 IX86_BUILTIN_CVTPI2PS,
15871 IX86_BUILTIN_CVTPS2PI,
15872 IX86_BUILTIN_CVTSI2SS,
15873 IX86_BUILTIN_CVTSI642SS,
15874 IX86_BUILTIN_CVTSS2SI,
15875 IX86_BUILTIN_CVTSS2SI64,
15876 IX86_BUILTIN_CVTTPS2PI,
15877 IX86_BUILTIN_CVTTSS2SI,
15878 IX86_BUILTIN_CVTTSS2SI64,
15879
15880 IX86_BUILTIN_MAXPS,
15881 IX86_BUILTIN_MAXSS,
15882 IX86_BUILTIN_MINPS,
15883 IX86_BUILTIN_MINSS,
15884
15885 IX86_BUILTIN_LOADUPS,
15886 IX86_BUILTIN_STOREUPS,
15887 IX86_BUILTIN_MOVSS,
15888
15889 IX86_BUILTIN_MOVHLPS,
15890 IX86_BUILTIN_MOVLHPS,
15891 IX86_BUILTIN_LOADHPS,
15892 IX86_BUILTIN_LOADLPS,
15893 IX86_BUILTIN_STOREHPS,
15894 IX86_BUILTIN_STORELPS,
15895
15896 IX86_BUILTIN_MASKMOVQ,
15897 IX86_BUILTIN_MOVMSKPS,
15898 IX86_BUILTIN_PMOVMSKB,
15899
15900 IX86_BUILTIN_MOVNTPS,
15901 IX86_BUILTIN_MOVNTQ,
15902
15903 IX86_BUILTIN_LOADDQU,
15904 IX86_BUILTIN_STOREDQU,
15905
15906 IX86_BUILTIN_PACKSSWB,
15907 IX86_BUILTIN_PACKSSDW,
15908 IX86_BUILTIN_PACKUSWB,
15909
15910 IX86_BUILTIN_PADDB,
15911 IX86_BUILTIN_PADDW,
15912 IX86_BUILTIN_PADDD,
15913 IX86_BUILTIN_PADDQ,
15914 IX86_BUILTIN_PADDSB,
15915 IX86_BUILTIN_PADDSW,
15916 IX86_BUILTIN_PADDUSB,
15917 IX86_BUILTIN_PADDUSW,
15918 IX86_BUILTIN_PSUBB,
15919 IX86_BUILTIN_PSUBW,
15920 IX86_BUILTIN_PSUBD,
15921 IX86_BUILTIN_PSUBQ,
15922 IX86_BUILTIN_PSUBSB,
15923 IX86_BUILTIN_PSUBSW,
15924 IX86_BUILTIN_PSUBUSB,
15925 IX86_BUILTIN_PSUBUSW,
15926
15927 IX86_BUILTIN_PAND,
15928 IX86_BUILTIN_PANDN,
15929 IX86_BUILTIN_POR,
15930 IX86_BUILTIN_PXOR,
15931
15932 IX86_BUILTIN_PAVGB,
15933 IX86_BUILTIN_PAVGW,
15934
15935 IX86_BUILTIN_PCMPEQB,
15936 IX86_BUILTIN_PCMPEQW,
15937 IX86_BUILTIN_PCMPEQD,
15938 IX86_BUILTIN_PCMPGTB,
15939 IX86_BUILTIN_PCMPGTW,
15940 IX86_BUILTIN_PCMPGTD,
15941
15942 IX86_BUILTIN_PMADDWD,
15943
15944 IX86_BUILTIN_PMAXSW,
15945 IX86_BUILTIN_PMAXUB,
15946 IX86_BUILTIN_PMINSW,
15947 IX86_BUILTIN_PMINUB,
15948
15949 IX86_BUILTIN_PMULHUW,
15950 IX86_BUILTIN_PMULHW,
15951 IX86_BUILTIN_PMULLW,
15952
15953 IX86_BUILTIN_PSADBW,
15954 IX86_BUILTIN_PSHUFW,
15955
15956 IX86_BUILTIN_PSLLW,
15957 IX86_BUILTIN_PSLLD,
15958 IX86_BUILTIN_PSLLQ,
15959 IX86_BUILTIN_PSRAW,
15960 IX86_BUILTIN_PSRAD,
15961 IX86_BUILTIN_PSRLW,
15962 IX86_BUILTIN_PSRLD,
15963 IX86_BUILTIN_PSRLQ,
15964 IX86_BUILTIN_PSLLWI,
15965 IX86_BUILTIN_PSLLDI,
15966 IX86_BUILTIN_PSLLQI,
15967 IX86_BUILTIN_PSRAWI,
15968 IX86_BUILTIN_PSRADI,
15969 IX86_BUILTIN_PSRLWI,
15970 IX86_BUILTIN_PSRLDI,
15971 IX86_BUILTIN_PSRLQI,
15972
15973 IX86_BUILTIN_PUNPCKHBW,
15974 IX86_BUILTIN_PUNPCKHWD,
15975 IX86_BUILTIN_PUNPCKHDQ,
15976 IX86_BUILTIN_PUNPCKLBW,
15977 IX86_BUILTIN_PUNPCKLWD,
15978 IX86_BUILTIN_PUNPCKLDQ,
15979
15980 IX86_BUILTIN_SHUFPS,
15981
15982 IX86_BUILTIN_RCPPS,
15983 IX86_BUILTIN_RCPSS,
15984 IX86_BUILTIN_RSQRTPS,
15985 IX86_BUILTIN_RSQRTSS,
15986 IX86_BUILTIN_SQRTPS,
15987 IX86_BUILTIN_SQRTSS,
15988
15989 IX86_BUILTIN_UNPCKHPS,
15990 IX86_BUILTIN_UNPCKLPS,
15991
15992 IX86_BUILTIN_ANDPS,
15993 IX86_BUILTIN_ANDNPS,
15994 IX86_BUILTIN_ORPS,
15995 IX86_BUILTIN_XORPS,
15996
15997 IX86_BUILTIN_EMMS,
15998 IX86_BUILTIN_LDMXCSR,
15999 IX86_BUILTIN_STMXCSR,
16000 IX86_BUILTIN_SFENCE,
16001
16002 /* 3DNow! Original */
16003 IX86_BUILTIN_FEMMS,
16004 IX86_BUILTIN_PAVGUSB,
16005 IX86_BUILTIN_PF2ID,
16006 IX86_BUILTIN_PFACC,
16007 IX86_BUILTIN_PFADD,
16008 IX86_BUILTIN_PFCMPEQ,
16009 IX86_BUILTIN_PFCMPGE,
16010 IX86_BUILTIN_PFCMPGT,
16011 IX86_BUILTIN_PFMAX,
16012 IX86_BUILTIN_PFMIN,
16013 IX86_BUILTIN_PFMUL,
16014 IX86_BUILTIN_PFRCP,
16015 IX86_BUILTIN_PFRCPIT1,
16016 IX86_BUILTIN_PFRCPIT2,
16017 IX86_BUILTIN_PFRSQIT1,
16018 IX86_BUILTIN_PFRSQRT,
16019 IX86_BUILTIN_PFSUB,
16020 IX86_BUILTIN_PFSUBR,
16021 IX86_BUILTIN_PI2FD,
16022 IX86_BUILTIN_PMULHRW,
16023
16024 /* 3DNow! Athlon Extensions */
16025 IX86_BUILTIN_PF2IW,
16026 IX86_BUILTIN_PFNACC,
16027 IX86_BUILTIN_PFPNACC,
16028 IX86_BUILTIN_PI2FW,
16029 IX86_BUILTIN_PSWAPDSI,
16030 IX86_BUILTIN_PSWAPDSF,
16031
16032 /* SSE2 */
16033 IX86_BUILTIN_ADDPD,
16034 IX86_BUILTIN_ADDSD,
16035 IX86_BUILTIN_DIVPD,
16036 IX86_BUILTIN_DIVSD,
16037 IX86_BUILTIN_MULPD,
16038 IX86_BUILTIN_MULSD,
16039 IX86_BUILTIN_SUBPD,
16040 IX86_BUILTIN_SUBSD,
16041
16042 IX86_BUILTIN_CMPEQPD,
16043 IX86_BUILTIN_CMPLTPD,
16044 IX86_BUILTIN_CMPLEPD,
16045 IX86_BUILTIN_CMPGTPD,
16046 IX86_BUILTIN_CMPGEPD,
16047 IX86_BUILTIN_CMPNEQPD,
16048 IX86_BUILTIN_CMPNLTPD,
16049 IX86_BUILTIN_CMPNLEPD,
16050 IX86_BUILTIN_CMPNGTPD,
16051 IX86_BUILTIN_CMPNGEPD,
16052 IX86_BUILTIN_CMPORDPD,
16053 IX86_BUILTIN_CMPUNORDPD,
16054 IX86_BUILTIN_CMPNEPD,
16055 IX86_BUILTIN_CMPEQSD,
16056 IX86_BUILTIN_CMPLTSD,
16057 IX86_BUILTIN_CMPLESD,
16058 IX86_BUILTIN_CMPNEQSD,
16059 IX86_BUILTIN_CMPNLTSD,
16060 IX86_BUILTIN_CMPNLESD,
16061 IX86_BUILTIN_CMPORDSD,
16062 IX86_BUILTIN_CMPUNORDSD,
16063 IX86_BUILTIN_CMPNESD,
16064
16065 IX86_BUILTIN_COMIEQSD,
16066 IX86_BUILTIN_COMILTSD,
16067 IX86_BUILTIN_COMILESD,
16068 IX86_BUILTIN_COMIGTSD,
16069 IX86_BUILTIN_COMIGESD,
16070 IX86_BUILTIN_COMINEQSD,
16071 IX86_BUILTIN_UCOMIEQSD,
16072 IX86_BUILTIN_UCOMILTSD,
16073 IX86_BUILTIN_UCOMILESD,
16074 IX86_BUILTIN_UCOMIGTSD,
16075 IX86_BUILTIN_UCOMIGESD,
16076 IX86_BUILTIN_UCOMINEQSD,
16077
16078 IX86_BUILTIN_MAXPD,
16079 IX86_BUILTIN_MAXSD,
16080 IX86_BUILTIN_MINPD,
16081 IX86_BUILTIN_MINSD,
16082
16083 IX86_BUILTIN_ANDPD,
16084 IX86_BUILTIN_ANDNPD,
16085 IX86_BUILTIN_ORPD,
16086 IX86_BUILTIN_XORPD,
16087
16088 IX86_BUILTIN_SQRTPD,
16089 IX86_BUILTIN_SQRTSD,
16090
16091 IX86_BUILTIN_UNPCKHPD,
16092 IX86_BUILTIN_UNPCKLPD,
16093
16094 IX86_BUILTIN_SHUFPD,
16095
16096 IX86_BUILTIN_LOADUPD,
16097 IX86_BUILTIN_STOREUPD,
16098 IX86_BUILTIN_MOVSD,
16099
16100 IX86_BUILTIN_LOADHPD,
16101 IX86_BUILTIN_LOADLPD,
16102
16103 IX86_BUILTIN_CVTDQ2PD,
16104 IX86_BUILTIN_CVTDQ2PS,
16105
16106 IX86_BUILTIN_CVTPD2DQ,
16107 IX86_BUILTIN_CVTPD2PI,
16108 IX86_BUILTIN_CVTPD2PS,
16109 IX86_BUILTIN_CVTTPD2DQ,
16110 IX86_BUILTIN_CVTTPD2PI,
16111
16112 IX86_BUILTIN_CVTPI2PD,
16113 IX86_BUILTIN_CVTSI2SD,
16114 IX86_BUILTIN_CVTSI642SD,
16115
16116 IX86_BUILTIN_CVTSD2SI,
16117 IX86_BUILTIN_CVTSD2SI64,
16118 IX86_BUILTIN_CVTSD2SS,
16119 IX86_BUILTIN_CVTSS2SD,
16120 IX86_BUILTIN_CVTTSD2SI,
16121 IX86_BUILTIN_CVTTSD2SI64,
16122
16123 IX86_BUILTIN_CVTPS2DQ,
16124 IX86_BUILTIN_CVTPS2PD,
16125 IX86_BUILTIN_CVTTPS2DQ,
16126
16127 IX86_BUILTIN_MOVNTI,
16128 IX86_BUILTIN_MOVNTPD,
16129 IX86_BUILTIN_MOVNTDQ,
16130
16131 /* SSE2 MMX */
16132 IX86_BUILTIN_MASKMOVDQU,
16133 IX86_BUILTIN_MOVMSKPD,
16134 IX86_BUILTIN_PMOVMSKB128,
16135
16136 IX86_BUILTIN_PACKSSWB128,
16137 IX86_BUILTIN_PACKSSDW128,
16138 IX86_BUILTIN_PACKUSWB128,
16139
16140 IX86_BUILTIN_PADDB128,
16141 IX86_BUILTIN_PADDW128,
16142 IX86_BUILTIN_PADDD128,
16143 IX86_BUILTIN_PADDQ128,
16144 IX86_BUILTIN_PADDSB128,
16145 IX86_BUILTIN_PADDSW128,
16146 IX86_BUILTIN_PADDUSB128,
16147 IX86_BUILTIN_PADDUSW128,
16148 IX86_BUILTIN_PSUBB128,
16149 IX86_BUILTIN_PSUBW128,
16150 IX86_BUILTIN_PSUBD128,
16151 IX86_BUILTIN_PSUBQ128,
16152 IX86_BUILTIN_PSUBSB128,
16153 IX86_BUILTIN_PSUBSW128,
16154 IX86_BUILTIN_PSUBUSB128,
16155 IX86_BUILTIN_PSUBUSW128,
16156
16157 IX86_BUILTIN_PAND128,
16158 IX86_BUILTIN_PANDN128,
16159 IX86_BUILTIN_POR128,
16160 IX86_BUILTIN_PXOR128,
16161
16162 IX86_BUILTIN_PAVGB128,
16163 IX86_BUILTIN_PAVGW128,
16164
16165 IX86_BUILTIN_PCMPEQB128,
16166 IX86_BUILTIN_PCMPEQW128,
16167 IX86_BUILTIN_PCMPEQD128,
16168 IX86_BUILTIN_PCMPGTB128,
16169 IX86_BUILTIN_PCMPGTW128,
16170 IX86_BUILTIN_PCMPGTD128,
16171
16172 IX86_BUILTIN_PMADDWD128,
16173
16174 IX86_BUILTIN_PMAXSW128,
16175 IX86_BUILTIN_PMAXUB128,
16176 IX86_BUILTIN_PMINSW128,
16177 IX86_BUILTIN_PMINUB128,
16178
16179 IX86_BUILTIN_PMULUDQ,
16180 IX86_BUILTIN_PMULUDQ128,
16181 IX86_BUILTIN_PMULHUW128,
16182 IX86_BUILTIN_PMULHW128,
16183 IX86_BUILTIN_PMULLW128,
16184
16185 IX86_BUILTIN_PSADBW128,
16186 IX86_BUILTIN_PSHUFHW,
16187 IX86_BUILTIN_PSHUFLW,
16188 IX86_BUILTIN_PSHUFD,
16189
16190 IX86_BUILTIN_PSLLW128,
16191 IX86_BUILTIN_PSLLD128,
16192 IX86_BUILTIN_PSLLQ128,
16193 IX86_BUILTIN_PSRAW128,
16194 IX86_BUILTIN_PSRAD128,
16195 IX86_BUILTIN_PSRLW128,
16196 IX86_BUILTIN_PSRLD128,
16197 IX86_BUILTIN_PSRLQ128,
16198 IX86_BUILTIN_PSLLDQI128,
16199 IX86_BUILTIN_PSLLWI128,
16200 IX86_BUILTIN_PSLLDI128,
16201 IX86_BUILTIN_PSLLQI128,
16202 IX86_BUILTIN_PSRAWI128,
16203 IX86_BUILTIN_PSRADI128,
16204 IX86_BUILTIN_PSRLDQI128,
16205 IX86_BUILTIN_PSRLWI128,
16206 IX86_BUILTIN_PSRLDI128,
16207 IX86_BUILTIN_PSRLQI128,
16208
16209 IX86_BUILTIN_PUNPCKHBW128,
16210 IX86_BUILTIN_PUNPCKHWD128,
16211 IX86_BUILTIN_PUNPCKHDQ128,
16212 IX86_BUILTIN_PUNPCKHQDQ128,
16213 IX86_BUILTIN_PUNPCKLBW128,
16214 IX86_BUILTIN_PUNPCKLWD128,
16215 IX86_BUILTIN_PUNPCKLDQ128,
16216 IX86_BUILTIN_PUNPCKLQDQ128,
16217
16218 IX86_BUILTIN_CLFLUSH,
16219 IX86_BUILTIN_MFENCE,
16220 IX86_BUILTIN_LFENCE,
16221
16222 /* Prescott New Instructions. */
16223 IX86_BUILTIN_ADDSUBPS,
16224 IX86_BUILTIN_HADDPS,
16225 IX86_BUILTIN_HSUBPS,
16226 IX86_BUILTIN_MOVSHDUP,
16227 IX86_BUILTIN_MOVSLDUP,
16228 IX86_BUILTIN_ADDSUBPD,
16229 IX86_BUILTIN_HADDPD,
16230 IX86_BUILTIN_HSUBPD,
16231 IX86_BUILTIN_LDDQU,
16232
16233 IX86_BUILTIN_MONITOR,
16234 IX86_BUILTIN_MWAIT,
16235
16236 /* SSSE3. */
16237 IX86_BUILTIN_PHADDW,
16238 IX86_BUILTIN_PHADDD,
16239 IX86_BUILTIN_PHADDSW,
16240 IX86_BUILTIN_PHSUBW,
16241 IX86_BUILTIN_PHSUBD,
16242 IX86_BUILTIN_PHSUBSW,
16243 IX86_BUILTIN_PMADDUBSW,
16244 IX86_BUILTIN_PMULHRSW,
16245 IX86_BUILTIN_PSHUFB,
16246 IX86_BUILTIN_PSIGNB,
16247 IX86_BUILTIN_PSIGNW,
16248 IX86_BUILTIN_PSIGND,
16249 IX86_BUILTIN_PALIGNR,
16250 IX86_BUILTIN_PABSB,
16251 IX86_BUILTIN_PABSW,
16252 IX86_BUILTIN_PABSD,
16253
16254 IX86_BUILTIN_PHADDW128,
16255 IX86_BUILTIN_PHADDD128,
16256 IX86_BUILTIN_PHADDSW128,
16257 IX86_BUILTIN_PHSUBW128,
16258 IX86_BUILTIN_PHSUBD128,
16259 IX86_BUILTIN_PHSUBSW128,
16260 IX86_BUILTIN_PMADDUBSW128,
16261 IX86_BUILTIN_PMULHRSW128,
16262 IX86_BUILTIN_PSHUFB128,
16263 IX86_BUILTIN_PSIGNB128,
16264 IX86_BUILTIN_PSIGNW128,
16265 IX86_BUILTIN_PSIGND128,
16266 IX86_BUILTIN_PALIGNR128,
16267 IX86_BUILTIN_PABSB128,
16268 IX86_BUILTIN_PABSW128,
16269 IX86_BUILTIN_PABSD128,
16270
16271 /* AMDFAM10 - SSE4A New Instructions. */
16272 IX86_BUILTIN_MOVNTSD,
16273 IX86_BUILTIN_MOVNTSS,
16274 IX86_BUILTIN_EXTRQI,
16275 IX86_BUILTIN_EXTRQ,
16276 IX86_BUILTIN_INSERTQI,
16277 IX86_BUILTIN_INSERTQ,
16278
16279 IX86_BUILTIN_VEC_INIT_V2SI,
16280 IX86_BUILTIN_VEC_INIT_V4HI,
16281 IX86_BUILTIN_VEC_INIT_V8QI,
16282 IX86_BUILTIN_VEC_EXT_V2DF,
16283 IX86_BUILTIN_VEC_EXT_V2DI,
16284 IX86_BUILTIN_VEC_EXT_V4SF,
16285 IX86_BUILTIN_VEC_EXT_V4SI,
16286 IX86_BUILTIN_VEC_EXT_V8HI,
16287 IX86_BUILTIN_VEC_EXT_V2SI,
16288 IX86_BUILTIN_VEC_EXT_V4HI,
16289 IX86_BUILTIN_VEC_SET_V8HI,
16290 IX86_BUILTIN_VEC_SET_V4HI,
16291
16292 IX86_BUILTIN_MAX
16293 };
16294
16295 /* Table for the ix86 builtin decls. */
16296 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16297
16298 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16299 * if the target_flags include one of MASK. Stores the function decl
16300 * in the ix86_builtins array.
16301 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16302
16303 static inline tree
16304 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16305 {
16306 tree decl = NULL_TREE;
16307
16308 if (mask & target_flags
16309 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16310 {
16311 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16312 NULL, NULL_TREE);
16313 ix86_builtins[(int) code] = decl;
16314 }
16315
16316 return decl;
16317 }
16318
16319 /* Like def_builtin, but also marks the function decl "const". */
16320
16321 static inline tree
16322 def_builtin_const (int mask, const char *name, tree type,
16323 enum ix86_builtins code)
16324 {
16325 tree decl = def_builtin (mask, name, type, code);
16326 if (decl)
16327 TREE_READONLY (decl) = 1;
16328 return decl;
16329 }
16330
16331 /* Bits for builtin_description.flag. */
16332
16333 /* Set when we don't support the comparison natively, and should
16334 swap_comparison in order to support it. */
16335 #define BUILTIN_DESC_SWAP_OPERANDS 1
16336
16337 struct builtin_description
16338 {
16339 const unsigned int mask;
16340 const enum insn_code icode;
16341 const char *const name;
16342 const enum ix86_builtins code;
16343 const enum rtx_code comparison;
16344 const unsigned int flag;
16345 };
16346
16347 static const struct builtin_description bdesc_comi[] =
16348 {
16349 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16350 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16351 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16352 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16353 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16354 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16355 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16356 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16357 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16358 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16359 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16360 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16361 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16362 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16363 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16364 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16365 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16366 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16367 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16368 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16369 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16370 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16371 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16372 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16373 };
16374
16375 static const struct builtin_description bdesc_2arg[] =
16376 {
16377 /* SSE */
16378 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16379 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16380 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16381 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16382 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16383 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16384 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16385 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16386
16387 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16388 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16389 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16390 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16391 BUILTIN_DESC_SWAP_OPERANDS },
16392 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16393 BUILTIN_DESC_SWAP_OPERANDS },
16394 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16395 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16396 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16397 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16398 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16399 BUILTIN_DESC_SWAP_OPERANDS },
16400 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16401 BUILTIN_DESC_SWAP_OPERANDS },
16402 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16403 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16404 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16405 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16406 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16407 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16408 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16409 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16410 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16411 BUILTIN_DESC_SWAP_OPERANDS },
16412 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16413 BUILTIN_DESC_SWAP_OPERANDS },
16414 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16415
16416 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16417 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16418 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16419 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16420
16421 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16422 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16423 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16424 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16425
16426 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16427 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16428 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16429 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16430 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16431
16432 /* MMX */
16433 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16434 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16435 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16436 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16437 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16438 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16439 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16440 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16441
16442 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16443 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16444 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16445 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16446 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16447 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16448 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16449 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16450
16451 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16452 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16453 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16454
16455 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16456 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16457 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16458 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16459
16460 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16461 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16462
16463 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16464 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16465 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16466 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16467 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16468 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16469
16470 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16471 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16472 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16473 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16474
16475 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16476 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16477 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16478 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16479 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16480 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16481
16482 /* Special. */
16483 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16484 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16485 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16486
16487 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16488 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16489 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16490
16491 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16492 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16493 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16494 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16495 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16496 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16497
16498 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16499 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16500 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16501 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16502 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16503 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16504
16505 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16506 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16507 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16508 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16509
16510 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16511 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16512
16513 /* SSE2 */
16514 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16515 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16516 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16517 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16518 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16519 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16520 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16521 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16522
16523 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16524 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16525 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16526 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16527 BUILTIN_DESC_SWAP_OPERANDS },
16528 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16529 BUILTIN_DESC_SWAP_OPERANDS },
16530 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16531 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16532 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16533 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16534 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16535 BUILTIN_DESC_SWAP_OPERANDS },
16536 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16537 BUILTIN_DESC_SWAP_OPERANDS },
16538 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16539 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16540 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16541 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16542 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16543 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16544 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16545 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16546 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16547
16548 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16549 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16550 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16551 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16552
16553 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16554 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16555 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16556 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16557
16558 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16559 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16560 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16561
16562 /* SSE2 MMX */
16563 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16564 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16565 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16566 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16567 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16568 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16569 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16570 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16571
16572 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16573 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16574 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16575 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16576 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16577 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16578 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16579 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16580
16581 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16582 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16583
16584 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16585 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16586 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16587 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16588
16589 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16590 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16591
16592 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16593 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16594 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16595 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16596 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16597 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16598
16599 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16600 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16601 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16602 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16603
16604 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16605 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16606 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16607 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16608 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16609 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16610 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16611 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16612
16613 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16614 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16615 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16616
16617 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16618 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16619
16620 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16621 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16622
16623 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16624 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16625 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16626
16627 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16628 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16629 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16630
16631 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16632 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16633
16634 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16635
16636 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16637 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16638 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16639 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16640
16641 /* SSE3 MMX */
16642 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16643 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16644 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16645 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16646 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16647 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16648
16649 /* SSSE3 */
16650 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16651 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16652 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16653 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16654 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16655 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16656 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16657 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16658 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16659 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16660 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16661 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16662 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16663 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16664 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16665 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16666 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16667 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16668 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16669 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16670 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16671 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16672 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16673 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16674 };
16675
16676 static const struct builtin_description bdesc_1arg[] =
16677 {
16678 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16679 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16680
16681 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16682 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16683 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16684
16685 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16686 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16687 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16688 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16689 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16690 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16691
16692 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16693 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16694
16695 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16696
16697 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16698 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16699
16700 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16701 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16702 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16703 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16704 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16705
16706 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16707
16708 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16709 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16710 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16711 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16712
16713 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16714 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16715 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16716
16717 /* SSE3 */
16718 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16719 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16720
16721 /* SSSE3 */
16722 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16723 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16724 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16725 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16726 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16727 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16728 };
16729
16730 static void
16731 ix86_init_builtins (void)
16732 {
16733 if (TARGET_MMX)
16734 ix86_init_mmx_sse_builtins ();
16735 }
16736
16737 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16738 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16739 builtins. */
16740 static void
16741 ix86_init_mmx_sse_builtins (void)
16742 {
16743 const struct builtin_description * d;
16744 size_t i;
16745
16746 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16747 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16748 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16749 tree V2DI_type_node
16750 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16751 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16752 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16753 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16754 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16755 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16756 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16757
16758 tree pchar_type_node = build_pointer_type (char_type_node);
16759 tree pcchar_type_node = build_pointer_type (
16760 build_type_variant (char_type_node, 1, 0));
16761 tree pfloat_type_node = build_pointer_type (float_type_node);
16762 tree pcfloat_type_node = build_pointer_type (
16763 build_type_variant (float_type_node, 1, 0));
16764 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16765 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16766 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16767
16768 /* Comparisons. */
16769 tree int_ftype_v4sf_v4sf
16770 = build_function_type_list (integer_type_node,
16771 V4SF_type_node, V4SF_type_node, NULL_TREE);
16772 tree v4si_ftype_v4sf_v4sf
16773 = build_function_type_list (V4SI_type_node,
16774 V4SF_type_node, V4SF_type_node, NULL_TREE);
16775 /* MMX/SSE/integer conversions. */
16776 tree int_ftype_v4sf
16777 = build_function_type_list (integer_type_node,
16778 V4SF_type_node, NULL_TREE);
16779 tree int64_ftype_v4sf
16780 = build_function_type_list (long_long_integer_type_node,
16781 V4SF_type_node, NULL_TREE);
16782 tree int_ftype_v8qi
16783 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16784 tree v4sf_ftype_v4sf_int
16785 = build_function_type_list (V4SF_type_node,
16786 V4SF_type_node, integer_type_node, NULL_TREE);
16787 tree v4sf_ftype_v4sf_int64
16788 = build_function_type_list (V4SF_type_node,
16789 V4SF_type_node, long_long_integer_type_node,
16790 NULL_TREE);
16791 tree v4sf_ftype_v4sf_v2si
16792 = build_function_type_list (V4SF_type_node,
16793 V4SF_type_node, V2SI_type_node, NULL_TREE);
16794
16795 /* Miscellaneous. */
16796 tree v8qi_ftype_v4hi_v4hi
16797 = build_function_type_list (V8QI_type_node,
16798 V4HI_type_node, V4HI_type_node, NULL_TREE);
16799 tree v4hi_ftype_v2si_v2si
16800 = build_function_type_list (V4HI_type_node,
16801 V2SI_type_node, V2SI_type_node, NULL_TREE);
16802 tree v4sf_ftype_v4sf_v4sf_int
16803 = build_function_type_list (V4SF_type_node,
16804 V4SF_type_node, V4SF_type_node,
16805 integer_type_node, NULL_TREE);
16806 tree v2si_ftype_v4hi_v4hi
16807 = build_function_type_list (V2SI_type_node,
16808 V4HI_type_node, V4HI_type_node, NULL_TREE);
16809 tree v4hi_ftype_v4hi_int
16810 = build_function_type_list (V4HI_type_node,
16811 V4HI_type_node, integer_type_node, NULL_TREE);
16812 tree v4hi_ftype_v4hi_di
16813 = build_function_type_list (V4HI_type_node,
16814 V4HI_type_node, long_long_unsigned_type_node,
16815 NULL_TREE);
16816 tree v2si_ftype_v2si_di
16817 = build_function_type_list (V2SI_type_node,
16818 V2SI_type_node, long_long_unsigned_type_node,
16819 NULL_TREE);
16820 tree void_ftype_void
16821 = build_function_type (void_type_node, void_list_node);
16822 tree void_ftype_unsigned
16823 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16824 tree void_ftype_unsigned_unsigned
16825 = build_function_type_list (void_type_node, unsigned_type_node,
16826 unsigned_type_node, NULL_TREE);
16827 tree void_ftype_pcvoid_unsigned_unsigned
16828 = build_function_type_list (void_type_node, const_ptr_type_node,
16829 unsigned_type_node, unsigned_type_node,
16830 NULL_TREE);
16831 tree unsigned_ftype_void
16832 = build_function_type (unsigned_type_node, void_list_node);
16833 tree v2si_ftype_v4sf
16834 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16835 /* Loads/stores. */
16836 tree void_ftype_v8qi_v8qi_pchar
16837 = build_function_type_list (void_type_node,
16838 V8QI_type_node, V8QI_type_node,
16839 pchar_type_node, NULL_TREE);
16840 tree v4sf_ftype_pcfloat
16841 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16842 /* @@@ the type is bogus */
16843 tree v4sf_ftype_v4sf_pv2si
16844 = build_function_type_list (V4SF_type_node,
16845 V4SF_type_node, pv2si_type_node, NULL_TREE);
16846 tree void_ftype_pv2si_v4sf
16847 = build_function_type_list (void_type_node,
16848 pv2si_type_node, V4SF_type_node, NULL_TREE);
16849 tree void_ftype_pfloat_v4sf
16850 = build_function_type_list (void_type_node,
16851 pfloat_type_node, V4SF_type_node, NULL_TREE);
16852 tree void_ftype_pdi_di
16853 = build_function_type_list (void_type_node,
16854 pdi_type_node, long_long_unsigned_type_node,
16855 NULL_TREE);
16856 tree void_ftype_pv2di_v2di
16857 = build_function_type_list (void_type_node,
16858 pv2di_type_node, V2DI_type_node, NULL_TREE);
16859 /* Normal vector unops. */
16860 tree v4sf_ftype_v4sf
16861 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16862 tree v16qi_ftype_v16qi
16863 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16864 tree v8hi_ftype_v8hi
16865 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16866 tree v4si_ftype_v4si
16867 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16868 tree v8qi_ftype_v8qi
16869 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16870 tree v4hi_ftype_v4hi
16871 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16872
16873 /* Normal vector binops. */
16874 tree v4sf_ftype_v4sf_v4sf
16875 = build_function_type_list (V4SF_type_node,
16876 V4SF_type_node, V4SF_type_node, NULL_TREE);
16877 tree v8qi_ftype_v8qi_v8qi
16878 = build_function_type_list (V8QI_type_node,
16879 V8QI_type_node, V8QI_type_node, NULL_TREE);
16880 tree v4hi_ftype_v4hi_v4hi
16881 = build_function_type_list (V4HI_type_node,
16882 V4HI_type_node, V4HI_type_node, NULL_TREE);
16883 tree v2si_ftype_v2si_v2si
16884 = build_function_type_list (V2SI_type_node,
16885 V2SI_type_node, V2SI_type_node, NULL_TREE);
16886 tree di_ftype_di_di
16887 = build_function_type_list (long_long_unsigned_type_node,
16888 long_long_unsigned_type_node,
16889 long_long_unsigned_type_node, NULL_TREE);
16890
16891 tree di_ftype_di_di_int
16892 = build_function_type_list (long_long_unsigned_type_node,
16893 long_long_unsigned_type_node,
16894 long_long_unsigned_type_node,
16895 integer_type_node, NULL_TREE);
16896
16897 tree v2si_ftype_v2sf
16898 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16899 tree v2sf_ftype_v2si
16900 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16901 tree v2si_ftype_v2si
16902 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16903 tree v2sf_ftype_v2sf
16904 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16905 tree v2sf_ftype_v2sf_v2sf
16906 = build_function_type_list (V2SF_type_node,
16907 V2SF_type_node, V2SF_type_node, NULL_TREE);
16908 tree v2si_ftype_v2sf_v2sf
16909 = build_function_type_list (V2SI_type_node,
16910 V2SF_type_node, V2SF_type_node, NULL_TREE);
16911 tree pint_type_node = build_pointer_type (integer_type_node);
16912 tree pdouble_type_node = build_pointer_type (double_type_node);
16913 tree pcdouble_type_node = build_pointer_type (
16914 build_type_variant (double_type_node, 1, 0));
16915 tree int_ftype_v2df_v2df
16916 = build_function_type_list (integer_type_node,
16917 V2DF_type_node, V2DF_type_node, NULL_TREE);
16918
16919 tree void_ftype_pcvoid
16920 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16921 tree v4sf_ftype_v4si
16922 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16923 tree v4si_ftype_v4sf
16924 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16925 tree v2df_ftype_v4si
16926 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16927 tree v4si_ftype_v2df
16928 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16929 tree v2si_ftype_v2df
16930 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16931 tree v4sf_ftype_v2df
16932 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16933 tree v2df_ftype_v2si
16934 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16935 tree v2df_ftype_v4sf
16936 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16937 tree int_ftype_v2df
16938 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16939 tree int64_ftype_v2df
16940 = build_function_type_list (long_long_integer_type_node,
16941 V2DF_type_node, NULL_TREE);
16942 tree v2df_ftype_v2df_int
16943 = build_function_type_list (V2DF_type_node,
16944 V2DF_type_node, integer_type_node, NULL_TREE);
16945 tree v2df_ftype_v2df_int64
16946 = build_function_type_list (V2DF_type_node,
16947 V2DF_type_node, long_long_integer_type_node,
16948 NULL_TREE);
16949 tree v4sf_ftype_v4sf_v2df
16950 = build_function_type_list (V4SF_type_node,
16951 V4SF_type_node, V2DF_type_node, NULL_TREE);
16952 tree v2df_ftype_v2df_v4sf
16953 = build_function_type_list (V2DF_type_node,
16954 V2DF_type_node, V4SF_type_node, NULL_TREE);
16955 tree v2df_ftype_v2df_v2df_int
16956 = build_function_type_list (V2DF_type_node,
16957 V2DF_type_node, V2DF_type_node,
16958 integer_type_node,
16959 NULL_TREE);
16960 tree v2df_ftype_v2df_pcdouble
16961 = build_function_type_list (V2DF_type_node,
16962 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16963 tree void_ftype_pdouble_v2df
16964 = build_function_type_list (void_type_node,
16965 pdouble_type_node, V2DF_type_node, NULL_TREE);
16966 tree void_ftype_pint_int
16967 = build_function_type_list (void_type_node,
16968 pint_type_node, integer_type_node, NULL_TREE);
16969 tree void_ftype_v16qi_v16qi_pchar
16970 = build_function_type_list (void_type_node,
16971 V16QI_type_node, V16QI_type_node,
16972 pchar_type_node, NULL_TREE);
16973 tree v2df_ftype_pcdouble
16974 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16975 tree v2df_ftype_v2df_v2df
16976 = build_function_type_list (V2DF_type_node,
16977 V2DF_type_node, V2DF_type_node, NULL_TREE);
16978 tree v16qi_ftype_v16qi_v16qi
16979 = build_function_type_list (V16QI_type_node,
16980 V16QI_type_node, V16QI_type_node, NULL_TREE);
16981 tree v8hi_ftype_v8hi_v8hi
16982 = build_function_type_list (V8HI_type_node,
16983 V8HI_type_node, V8HI_type_node, NULL_TREE);
16984 tree v4si_ftype_v4si_v4si
16985 = build_function_type_list (V4SI_type_node,
16986 V4SI_type_node, V4SI_type_node, NULL_TREE);
16987 tree v2di_ftype_v2di_v2di
16988 = build_function_type_list (V2DI_type_node,
16989 V2DI_type_node, V2DI_type_node, NULL_TREE);
16990 tree v2di_ftype_v2df_v2df
16991 = build_function_type_list (V2DI_type_node,
16992 V2DF_type_node, V2DF_type_node, NULL_TREE);
16993 tree v2df_ftype_v2df
16994 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16995 tree v2di_ftype_v2di_int
16996 = build_function_type_list (V2DI_type_node,
16997 V2DI_type_node, integer_type_node, NULL_TREE);
16998 tree v2di_ftype_v2di_v2di_int
16999 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17000 V2DI_type_node, integer_type_node, NULL_TREE);
17001 tree v4si_ftype_v4si_int
17002 = build_function_type_list (V4SI_type_node,
17003 V4SI_type_node, integer_type_node, NULL_TREE);
17004 tree v8hi_ftype_v8hi_int
17005 = build_function_type_list (V8HI_type_node,
17006 V8HI_type_node, integer_type_node, NULL_TREE);
17007 tree v8hi_ftype_v8hi_v2di
17008 = build_function_type_list (V8HI_type_node,
17009 V8HI_type_node, V2DI_type_node, NULL_TREE);
17010 tree v4si_ftype_v4si_v2di
17011 = build_function_type_list (V4SI_type_node,
17012 V4SI_type_node, V2DI_type_node, NULL_TREE);
17013 tree v4si_ftype_v8hi_v8hi
17014 = build_function_type_list (V4SI_type_node,
17015 V8HI_type_node, V8HI_type_node, NULL_TREE);
17016 tree di_ftype_v8qi_v8qi
17017 = build_function_type_list (long_long_unsigned_type_node,
17018 V8QI_type_node, V8QI_type_node, NULL_TREE);
17019 tree di_ftype_v2si_v2si
17020 = build_function_type_list (long_long_unsigned_type_node,
17021 V2SI_type_node, V2SI_type_node, NULL_TREE);
17022 tree v2di_ftype_v16qi_v16qi
17023 = build_function_type_list (V2DI_type_node,
17024 V16QI_type_node, V16QI_type_node, NULL_TREE);
17025 tree v2di_ftype_v4si_v4si
17026 = build_function_type_list (V2DI_type_node,
17027 V4SI_type_node, V4SI_type_node, NULL_TREE);
17028 tree int_ftype_v16qi
17029 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17030 tree v16qi_ftype_pcchar
17031 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17032 tree void_ftype_pchar_v16qi
17033 = build_function_type_list (void_type_node,
17034 pchar_type_node, V16QI_type_node, NULL_TREE);
17035
17036 tree v2di_ftype_v2di_unsigned_unsigned
17037 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17038 unsigned_type_node, unsigned_type_node,
17039 NULL_TREE);
17040 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17041 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17042 unsigned_type_node, unsigned_type_node,
17043 NULL_TREE);
17044 tree v2di_ftype_v2di_v16qi
17045 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17046 NULL_TREE);
17047
17048 tree float80_type;
17049 tree float128_type;
17050 tree ftype;
17051
17052 /* The __float80 type. */
17053 if (TYPE_MODE (long_double_type_node) == XFmode)
17054 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17055 "__float80");
17056 else
17057 {
17058 /* The __float80 type. */
17059 float80_type = make_node (REAL_TYPE);
17060 TYPE_PRECISION (float80_type) = 80;
17061 layout_type (float80_type);
17062 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17063 }
17064
17065 if (TARGET_64BIT)
17066 {
17067 float128_type = make_node (REAL_TYPE);
17068 TYPE_PRECISION (float128_type) = 128;
17069 layout_type (float128_type);
17070 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17071 }
17072
17073 /* Add all builtins that are more or less simple operations on two
17074 operands. */
17075 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17076 {
17077 /* Use one of the operands; the target can have a different mode for
17078 mask-generating compares. */
17079 enum machine_mode mode;
17080 tree type;
17081
17082 if (d->name == 0)
17083 continue;
17084 mode = insn_data[d->icode].operand[1].mode;
17085
17086 switch (mode)
17087 {
17088 case V16QImode:
17089 type = v16qi_ftype_v16qi_v16qi;
17090 break;
17091 case V8HImode:
17092 type = v8hi_ftype_v8hi_v8hi;
17093 break;
17094 case V4SImode:
17095 type = v4si_ftype_v4si_v4si;
17096 break;
17097 case V2DImode:
17098 type = v2di_ftype_v2di_v2di;
17099 break;
17100 case V2DFmode:
17101 type = v2df_ftype_v2df_v2df;
17102 break;
17103 case V4SFmode:
17104 type = v4sf_ftype_v4sf_v4sf;
17105 break;
17106 case V8QImode:
17107 type = v8qi_ftype_v8qi_v8qi;
17108 break;
17109 case V4HImode:
17110 type = v4hi_ftype_v4hi_v4hi;
17111 break;
17112 case V2SImode:
17113 type = v2si_ftype_v2si_v2si;
17114 break;
17115 case DImode:
17116 type = di_ftype_di_di;
17117 break;
17118
17119 default:
17120 gcc_unreachable ();
17121 }
17122
17123 /* Override for comparisons. */
17124 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17125 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17126 type = v4si_ftype_v4sf_v4sf;
17127
17128 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17129 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17130 type = v2di_ftype_v2df_v2df;
17131
17132 def_builtin (d->mask, d->name, type, d->code);
17133 }
17134
17135 /* Add all builtins that are more or less simple operations on 1 operand. */
17136 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17137 {
17138 enum machine_mode mode;
17139 tree type;
17140
17141 if (d->name == 0)
17142 continue;
17143 mode = insn_data[d->icode].operand[1].mode;
17144
17145 switch (mode)
17146 {
17147 case V16QImode:
17148 type = v16qi_ftype_v16qi;
17149 break;
17150 case V8HImode:
17151 type = v8hi_ftype_v8hi;
17152 break;
17153 case V4SImode:
17154 type = v4si_ftype_v4si;
17155 break;
17156 case V2DFmode:
17157 type = v2df_ftype_v2df;
17158 break;
17159 case V4SFmode:
17160 type = v4sf_ftype_v4sf;
17161 break;
17162 case V8QImode:
17163 type = v8qi_ftype_v8qi;
17164 break;
17165 case V4HImode:
17166 type = v4hi_ftype_v4hi;
17167 break;
17168 case V2SImode:
17169 type = v2si_ftype_v2si;
17170 break;
17171
17172 default:
17173 abort ();
17174 }
17175
17176 def_builtin (d->mask, d->name, type, d->code);
17177 }
17178
17179 /* Add the remaining MMX insns with somewhat more complicated types. */
17180 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17181 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17182 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17183 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17184
17185 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17186 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17187 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17188
17189 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17190 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17191
17192 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17193 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17194
17195 /* comi/ucomi insns. */
17196 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17197 if (d->mask == MASK_SSE2)
17198 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17199 else
17200 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17201
17202 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17203 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17204 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17205
17206 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17207 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17208 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17209 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17210 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17211 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17212 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17213 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17214 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17215 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17216 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17217
17218 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17219
17220 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17221 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17222
17223 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17224 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17225 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17226 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17227
17228 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17229 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17230 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17231 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17232
17233 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17234
17235 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17236
17237 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17238 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17239 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17240 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17241 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17242 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17243
17244 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17245
17246 /* Original 3DNow! */
17247 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17248 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17249 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17250 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17253 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17254 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17255 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17256 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17257 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17258 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17259 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17260 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17261 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17262 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17263 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17264 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17265 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17266 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17267
17268 /* 3DNow! extension as used in the Athlon CPU. */
17269 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17270 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17271 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17272 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17273 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17274 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17275
17276 /* SSE2 */
17277 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17278
17279 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17280 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17281
17282 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17283 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17284
17285 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17286 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17287 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17288 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17289 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17290
17291 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17292 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17293 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17294 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17295
17296 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17297 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17298
17299 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17300
17301 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17302 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17303
17304 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17305 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17306 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17307 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17308 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17309
17310 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17311
17312 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17313 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17314 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17315 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17316
17317 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17318 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17319 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17320
17321 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17322 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17323 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17324 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17325
17326 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17327 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17328 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17329
17330 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17331 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17332
17333 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17334 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17335
17336 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17337 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17338 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17339
17340 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17341 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17342 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17343
17344 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17345 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17346
17347 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17348 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17349 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17350 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17351
17352 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17353 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17354 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17355 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17356
17357 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17358 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17359
17360 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17361
17362 /* Prescott New Instructions. */
17363 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17364 void_ftype_pcvoid_unsigned_unsigned,
17365 IX86_BUILTIN_MONITOR);
17366 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17367 void_ftype_unsigned_unsigned,
17368 IX86_BUILTIN_MWAIT);
17369 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17370 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17371
17372 /* SSSE3. */
17373 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17374 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17375 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17376 IX86_BUILTIN_PALIGNR);
17377
17378 /* AMDFAM10 SSE4A New built-ins */
17379 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17380 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17381 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17382 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17383 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17384 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17385 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17386 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17387 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17388 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17389 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17390 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17391
17392 /* Access to the vec_init patterns. */
17393 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17394 integer_type_node, NULL_TREE);
17395 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17396 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17397
17398 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17399 short_integer_type_node,
17400 short_integer_type_node,
17401 short_integer_type_node, NULL_TREE);
17402 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17403 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17404
17405 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17406 char_type_node, char_type_node,
17407 char_type_node, char_type_node,
17408 char_type_node, char_type_node,
17409 char_type_node, NULL_TREE);
17410 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17411 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17412
17413 /* Access to the vec_extract patterns. */
17414 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17415 integer_type_node, NULL_TREE);
17416 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17417 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17418
17419 ftype = build_function_type_list (long_long_integer_type_node,
17420 V2DI_type_node, integer_type_node,
17421 NULL_TREE);
17422 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17423 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17424
17425 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17426 integer_type_node, NULL_TREE);
17427 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17428 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17429
17430 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17431 integer_type_node, NULL_TREE);
17432 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17433 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17434
17435 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17436 integer_type_node, NULL_TREE);
17437 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17438 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17439
17440 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17441 integer_type_node, NULL_TREE);
17442 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17443 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17444
17445 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17446 integer_type_node, NULL_TREE);
17447 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17448 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17449
17450 /* Access to the vec_set patterns. */
17451 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17452 intHI_type_node,
17453 integer_type_node, NULL_TREE);
17454 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17455 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17456
17457 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17458 intHI_type_node,
17459 integer_type_node, NULL_TREE);
17460 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17461 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17462 }
17463
17464 /* Errors in the source file can cause expand_expr to return const0_rtx
17465 where we expect a vector. To avoid crashing, use one of the vector
17466 clear instructions. */
17467 static rtx
17468 safe_vector_operand (rtx x, enum machine_mode mode)
17469 {
17470 if (x == const0_rtx)
17471 x = CONST0_RTX (mode);
17472 return x;
17473 }
17474
17475 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17476
17477 static rtx
17478 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17479 {
17480 rtx pat, xops[3];
17481 tree arg0 = CALL_EXPR_ARG (exp, 0);
17482 tree arg1 = CALL_EXPR_ARG (exp, 1);
17483 rtx op0 = expand_normal (arg0);
17484 rtx op1 = expand_normal (arg1);
17485 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17486 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17487 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17488
17489 if (VECTOR_MODE_P (mode0))
17490 op0 = safe_vector_operand (op0, mode0);
17491 if (VECTOR_MODE_P (mode1))
17492 op1 = safe_vector_operand (op1, mode1);
17493
17494 if (optimize || !target
17495 || GET_MODE (target) != tmode
17496 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17497 target = gen_reg_rtx (tmode);
17498
17499 if (GET_MODE (op1) == SImode && mode1 == TImode)
17500 {
17501 rtx x = gen_reg_rtx (V4SImode);
17502 emit_insn (gen_sse2_loadd (x, op1));
17503 op1 = gen_lowpart (TImode, x);
17504 }
17505
17506 /* The insn must want input operands in the same modes as the
17507 result. */
17508 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17509 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17510
17511 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17512 op0 = copy_to_mode_reg (mode0, op0);
17513 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17514 op1 = copy_to_mode_reg (mode1, op1);
17515
17516 /* ??? Using ix86_fixup_binary_operands is problematic when
17517 we've got mismatched modes. Fake it. */
17518
17519 xops[0] = target;
17520 xops[1] = op0;
17521 xops[2] = op1;
17522
17523 if (tmode == mode0 && tmode == mode1)
17524 {
17525 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17526 op0 = xops[1];
17527 op1 = xops[2];
17528 }
17529 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17530 {
17531 op0 = force_reg (mode0, op0);
17532 op1 = force_reg (mode1, op1);
17533 target = gen_reg_rtx (tmode);
17534 }
17535
17536 pat = GEN_FCN (icode) (target, op0, op1);
17537 if (! pat)
17538 return 0;
17539 emit_insn (pat);
17540 return target;
17541 }
17542
17543 /* Subroutine of ix86_expand_builtin to take care of stores. */
17544
17545 static rtx
17546 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17547 {
17548 rtx pat;
17549 tree arg0 = CALL_EXPR_ARG (exp, 0);
17550 tree arg1 = CALL_EXPR_ARG (exp, 1);
17551 rtx op0 = expand_normal (arg0);
17552 rtx op1 = expand_normal (arg1);
17553 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17554 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17555
17556 if (VECTOR_MODE_P (mode1))
17557 op1 = safe_vector_operand (op1, mode1);
17558
17559 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17560 op1 = copy_to_mode_reg (mode1, op1);
17561
17562 pat = GEN_FCN (icode) (op0, op1);
17563 if (pat)
17564 emit_insn (pat);
17565 return 0;
17566 }
17567
17568 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17569
17570 static rtx
17571 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17572 rtx target, int do_load)
17573 {
17574 rtx pat;
17575 tree arg0 = CALL_EXPR_ARG (exp, 0);
17576 rtx op0 = expand_normal (arg0);
17577 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17578 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17579
17580 if (optimize || !target
17581 || GET_MODE (target) != tmode
17582 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17583 target = gen_reg_rtx (tmode);
17584 if (do_load)
17585 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17586 else
17587 {
17588 if (VECTOR_MODE_P (mode0))
17589 op0 = safe_vector_operand (op0, mode0);
17590
17591 if ((optimize && !register_operand (op0, mode0))
17592 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17593 op0 = copy_to_mode_reg (mode0, op0);
17594 }
17595
17596 pat = GEN_FCN (icode) (target, op0);
17597 if (! pat)
17598 return 0;
17599 emit_insn (pat);
17600 return target;
17601 }
17602
17603 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17604 sqrtss, rsqrtss, rcpss. */
17605
17606 static rtx
17607 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17608 {
17609 rtx pat;
17610 tree arg0 = CALL_EXPR_ARG (exp, 0);
17611 rtx op1, op0 = expand_normal (arg0);
17612 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17613 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17614
17615 if (optimize || !target
17616 || GET_MODE (target) != tmode
17617 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17618 target = gen_reg_rtx (tmode);
17619
17620 if (VECTOR_MODE_P (mode0))
17621 op0 = safe_vector_operand (op0, mode0);
17622
17623 if ((optimize && !register_operand (op0, mode0))
17624 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17625 op0 = copy_to_mode_reg (mode0, op0);
17626
17627 op1 = op0;
17628 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17629 op1 = copy_to_mode_reg (mode0, op1);
17630
17631 pat = GEN_FCN (icode) (target, op0, op1);
17632 if (! pat)
17633 return 0;
17634 emit_insn (pat);
17635 return target;
17636 }
17637
17638 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17639
17640 static rtx
17641 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17642 rtx target)
17643 {
17644 rtx pat;
17645 tree arg0 = CALL_EXPR_ARG (exp, 0);
17646 tree arg1 = CALL_EXPR_ARG (exp, 1);
17647 rtx op0 = expand_normal (arg0);
17648 rtx op1 = expand_normal (arg1);
17649 rtx op2;
17650 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17651 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17652 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17653 enum rtx_code comparison = d->comparison;
17654
17655 if (VECTOR_MODE_P (mode0))
17656 op0 = safe_vector_operand (op0, mode0);
17657 if (VECTOR_MODE_P (mode1))
17658 op1 = safe_vector_operand (op1, mode1);
17659
17660 /* Swap operands if we have a comparison that isn't available in
17661 hardware. */
17662 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17663 {
17664 rtx tmp = gen_reg_rtx (mode1);
17665 emit_move_insn (tmp, op1);
17666 op1 = op0;
17667 op0 = tmp;
17668 }
17669
17670 if (optimize || !target
17671 || GET_MODE (target) != tmode
17672 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17673 target = gen_reg_rtx (tmode);
17674
17675 if ((optimize && !register_operand (op0, mode0))
17676 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17677 op0 = copy_to_mode_reg (mode0, op0);
17678 if ((optimize && !register_operand (op1, mode1))
17679 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17680 op1 = copy_to_mode_reg (mode1, op1);
17681
17682 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17683 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17684 if (! pat)
17685 return 0;
17686 emit_insn (pat);
17687 return target;
17688 }
17689
17690 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17691
17692 static rtx
17693 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17694 rtx target)
17695 {
17696 rtx pat;
17697 tree arg0 = CALL_EXPR_ARG (exp, 0);
17698 tree arg1 = CALL_EXPR_ARG (exp, 1);
17699 rtx op0 = expand_normal (arg0);
17700 rtx op1 = expand_normal (arg1);
17701 rtx op2;
17702 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17703 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17704 enum rtx_code comparison = d->comparison;
17705
17706 if (VECTOR_MODE_P (mode0))
17707 op0 = safe_vector_operand (op0, mode0);
17708 if (VECTOR_MODE_P (mode1))
17709 op1 = safe_vector_operand (op1, mode1);
17710
17711 /* Swap operands if we have a comparison that isn't available in
17712 hardware. */
17713 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17714 {
17715 rtx tmp = op1;
17716 op1 = op0;
17717 op0 = tmp;
17718 }
17719
17720 target = gen_reg_rtx (SImode);
17721 emit_move_insn (target, const0_rtx);
17722 target = gen_rtx_SUBREG (QImode, target, 0);
17723
17724 if ((optimize && !register_operand (op0, mode0))
17725 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17726 op0 = copy_to_mode_reg (mode0, op0);
17727 if ((optimize && !register_operand (op1, mode1))
17728 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17729 op1 = copy_to_mode_reg (mode1, op1);
17730
17731 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17732 pat = GEN_FCN (d->icode) (op0, op1);
17733 if (! pat)
17734 return 0;
17735 emit_insn (pat);
17736 emit_insn (gen_rtx_SET (VOIDmode,
17737 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17738 gen_rtx_fmt_ee (comparison, QImode,
17739 SET_DEST (pat),
17740 const0_rtx)));
17741
17742 return SUBREG_REG (target);
17743 }
17744
17745 /* Return the integer constant in ARG. Constrain it to be in the range
17746 of the subparts of VEC_TYPE; issue an error if not. */
17747
17748 static int
17749 get_element_number (tree vec_type, tree arg)
17750 {
17751 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17752
17753 if (!host_integerp (arg, 1)
17754 || (elt = tree_low_cst (arg, 1), elt > max))
17755 {
17756 error ("selector must be an integer constant in the range 0..%wi", max);
17757 return 0;
17758 }
17759
17760 return elt;
17761 }
17762
17763 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17764 ix86_expand_vector_init. We DO have language-level syntax for this, in
17765 the form of (type){ init-list }. Except that since we can't place emms
17766 instructions from inside the compiler, we can't allow the use of MMX
17767 registers unless the user explicitly asks for it. So we do *not* define
17768 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17769 we have builtins invoked by mmintrin.h that gives us license to emit
17770 these sorts of instructions. */
17771
17772 static rtx
17773 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17774 {
17775 enum machine_mode tmode = TYPE_MODE (type);
17776 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17777 int i, n_elt = GET_MODE_NUNITS (tmode);
17778 rtvec v = rtvec_alloc (n_elt);
17779
17780 gcc_assert (VECTOR_MODE_P (tmode));
17781 gcc_assert (call_expr_nargs (exp) == n_elt);
17782
17783 for (i = 0; i < n_elt; ++i)
17784 {
17785 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17786 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17787 }
17788
17789 if (!target || !register_operand (target, tmode))
17790 target = gen_reg_rtx (tmode);
17791
17792 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17793 return target;
17794 }
17795
17796 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17797 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17798 had a language-level syntax for referencing vector elements. */
17799
17800 static rtx
17801 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17802 {
17803 enum machine_mode tmode, mode0;
17804 tree arg0, arg1;
17805 int elt;
17806 rtx op0;
17807
17808 arg0 = CALL_EXPR_ARG (exp, 0);
17809 arg1 = CALL_EXPR_ARG (exp, 1);
17810
17811 op0 = expand_normal (arg0);
17812 elt = get_element_number (TREE_TYPE (arg0), arg1);
17813
17814 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17815 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17816 gcc_assert (VECTOR_MODE_P (mode0));
17817
17818 op0 = force_reg (mode0, op0);
17819
17820 if (optimize || !target || !register_operand (target, tmode))
17821 target = gen_reg_rtx (tmode);
17822
17823 ix86_expand_vector_extract (true, target, op0, elt);
17824
17825 return target;
17826 }
17827
17828 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17829 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17830 a language-level syntax for referencing vector elements. */
17831
17832 static rtx
17833 ix86_expand_vec_set_builtin (tree exp)
17834 {
17835 enum machine_mode tmode, mode1;
17836 tree arg0, arg1, arg2;
17837 int elt;
17838 rtx op0, op1;
17839
17840 arg0 = CALL_EXPR_ARG (exp, 0);
17841 arg1 = CALL_EXPR_ARG (exp, 1);
17842 arg2 = CALL_EXPR_ARG (exp, 2);
17843
17844 tmode = TYPE_MODE (TREE_TYPE (arg0));
17845 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17846 gcc_assert (VECTOR_MODE_P (tmode));
17847
17848 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17849 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17850 elt = get_element_number (TREE_TYPE (arg0), arg2);
17851
17852 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17853 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17854
17855 op0 = force_reg (tmode, op0);
17856 op1 = force_reg (mode1, op1);
17857
17858 ix86_expand_vector_set (true, op0, op1, elt);
17859
17860 return op0;
17861 }
17862
17863 /* Expand an expression EXP that calls a built-in function,
17864 with result going to TARGET if that's convenient
17865 (and in mode MODE if that's convenient).
17866 SUBTARGET may be used as the target for computing one of EXP's operands.
17867 IGNORE is nonzero if the value is to be ignored. */
17868
17869 static rtx
17870 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17871 enum machine_mode mode ATTRIBUTE_UNUSED,
17872 int ignore ATTRIBUTE_UNUSED)
17873 {
17874 const struct builtin_description *d;
17875 size_t i;
17876 enum insn_code icode;
17877 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17878 tree arg0, arg1, arg2, arg3;
17879 rtx op0, op1, op2, op3, pat;
17880 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17881 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17882
17883 switch (fcode)
17884 {
17885 case IX86_BUILTIN_EMMS:
17886 emit_insn (gen_mmx_emms ());
17887 return 0;
17888
17889 case IX86_BUILTIN_SFENCE:
17890 emit_insn (gen_sse_sfence ());
17891 return 0;
17892
17893 case IX86_BUILTIN_MASKMOVQ:
17894 case IX86_BUILTIN_MASKMOVDQU:
17895 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17896 ? CODE_FOR_mmx_maskmovq
17897 : CODE_FOR_sse2_maskmovdqu);
17898 /* Note the arg order is different from the operand order. */
17899 arg1 = CALL_EXPR_ARG (exp, 0);
17900 arg2 = CALL_EXPR_ARG (exp, 1);
17901 arg0 = CALL_EXPR_ARG (exp, 2);
17902 op0 = expand_normal (arg0);
17903 op1 = expand_normal (arg1);
17904 op2 = expand_normal (arg2);
17905 mode0 = insn_data[icode].operand[0].mode;
17906 mode1 = insn_data[icode].operand[1].mode;
17907 mode2 = insn_data[icode].operand[2].mode;
17908
17909 op0 = force_reg (Pmode, op0);
17910 op0 = gen_rtx_MEM (mode1, op0);
17911
17912 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17913 op0 = copy_to_mode_reg (mode0, op0);
17914 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17915 op1 = copy_to_mode_reg (mode1, op1);
17916 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17917 op2 = copy_to_mode_reg (mode2, op2);
17918 pat = GEN_FCN (icode) (op0, op1, op2);
17919 if (! pat)
17920 return 0;
17921 emit_insn (pat);
17922 return 0;
17923
17924 case IX86_BUILTIN_SQRTSS:
17925 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17926 case IX86_BUILTIN_RSQRTSS:
17927 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17928 case IX86_BUILTIN_RCPSS:
17929 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17930
17931 case IX86_BUILTIN_LOADUPS:
17932 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17933
17934 case IX86_BUILTIN_STOREUPS:
17935 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17936
17937 case IX86_BUILTIN_LOADHPS:
17938 case IX86_BUILTIN_LOADLPS:
17939 case IX86_BUILTIN_LOADHPD:
17940 case IX86_BUILTIN_LOADLPD:
17941 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17942 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17943 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17944 : CODE_FOR_sse2_loadlpd);
17945 arg0 = CALL_EXPR_ARG (exp, 0);
17946 arg1 = CALL_EXPR_ARG (exp, 1);
17947 op0 = expand_normal (arg0);
17948 op1 = expand_normal (arg1);
17949 tmode = insn_data[icode].operand[0].mode;
17950 mode0 = insn_data[icode].operand[1].mode;
17951 mode1 = insn_data[icode].operand[2].mode;
17952
17953 op0 = force_reg (mode0, op0);
17954 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17955 if (optimize || target == 0
17956 || GET_MODE (target) != tmode
17957 || !register_operand (target, tmode))
17958 target = gen_reg_rtx (tmode);
17959 pat = GEN_FCN (icode) (target, op0, op1);
17960 if (! pat)
17961 return 0;
17962 emit_insn (pat);
17963 return target;
17964
17965 case IX86_BUILTIN_STOREHPS:
17966 case IX86_BUILTIN_STORELPS:
17967 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17968 : CODE_FOR_sse_storelps);
17969 arg0 = CALL_EXPR_ARG (exp, 0);
17970 arg1 = CALL_EXPR_ARG (exp, 1);
17971 op0 = expand_normal (arg0);
17972 op1 = expand_normal (arg1);
17973 mode0 = insn_data[icode].operand[0].mode;
17974 mode1 = insn_data[icode].operand[1].mode;
17975
17976 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17977 op1 = force_reg (mode1, op1);
17978
17979 pat = GEN_FCN (icode) (op0, op1);
17980 if (! pat)
17981 return 0;
17982 emit_insn (pat);
17983 return const0_rtx;
17984
17985 case IX86_BUILTIN_MOVNTPS:
17986 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17987 case IX86_BUILTIN_MOVNTQ:
17988 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17989
17990 case IX86_BUILTIN_LDMXCSR:
17991 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17992 target = assign_386_stack_local (SImode, SLOT_TEMP);
17993 emit_move_insn (target, op0);
17994 emit_insn (gen_sse_ldmxcsr (target));
17995 return 0;
17996
17997 case IX86_BUILTIN_STMXCSR:
17998 target = assign_386_stack_local (SImode, SLOT_TEMP);
17999 emit_insn (gen_sse_stmxcsr (target));
18000 return copy_to_mode_reg (SImode, target);
18001
18002 case IX86_BUILTIN_SHUFPS:
18003 case IX86_BUILTIN_SHUFPD:
18004 icode = (fcode == IX86_BUILTIN_SHUFPS
18005 ? CODE_FOR_sse_shufps
18006 : CODE_FOR_sse2_shufpd);
18007 arg0 = CALL_EXPR_ARG (exp, 0);
18008 arg1 = CALL_EXPR_ARG (exp, 1);
18009 arg2 = CALL_EXPR_ARG (exp, 2);
18010 op0 = expand_normal (arg0);
18011 op1 = expand_normal (arg1);
18012 op2 = expand_normal (arg2);
18013 tmode = insn_data[icode].operand[0].mode;
18014 mode0 = insn_data[icode].operand[1].mode;
18015 mode1 = insn_data[icode].operand[2].mode;
18016 mode2 = insn_data[icode].operand[3].mode;
18017
18018 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18019 op0 = copy_to_mode_reg (mode0, op0);
18020 if ((optimize && !register_operand (op1, mode1))
18021 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18022 op1 = copy_to_mode_reg (mode1, op1);
18023 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18024 {
18025 /* @@@ better error message */
18026 error ("mask must be an immediate");
18027 return gen_reg_rtx (tmode);
18028 }
18029 if (optimize || target == 0
18030 || GET_MODE (target) != tmode
18031 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18032 target = gen_reg_rtx (tmode);
18033 pat = GEN_FCN (icode) (target, op0, op1, op2);
18034 if (! pat)
18035 return 0;
18036 emit_insn (pat);
18037 return target;
18038
18039 case IX86_BUILTIN_PSHUFW:
18040 case IX86_BUILTIN_PSHUFD:
18041 case IX86_BUILTIN_PSHUFHW:
18042 case IX86_BUILTIN_PSHUFLW:
18043 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18044 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18045 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18046 : CODE_FOR_mmx_pshufw);
18047 arg0 = CALL_EXPR_ARG (exp, 0);
18048 arg1 = CALL_EXPR_ARG (exp, 1);
18049 op0 = expand_normal (arg0);
18050 op1 = expand_normal (arg1);
18051 tmode = insn_data[icode].operand[0].mode;
18052 mode1 = insn_data[icode].operand[1].mode;
18053 mode2 = insn_data[icode].operand[2].mode;
18054
18055 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18056 op0 = copy_to_mode_reg (mode1, op0);
18057 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18058 {
18059 /* @@@ better error message */
18060 error ("mask must be an immediate");
18061 return const0_rtx;
18062 }
18063 if (target == 0
18064 || GET_MODE (target) != tmode
18065 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18066 target = gen_reg_rtx (tmode);
18067 pat = GEN_FCN (icode) (target, op0, op1);
18068 if (! pat)
18069 return 0;
18070 emit_insn (pat);
18071 return target;
18072
18073 case IX86_BUILTIN_PSLLDQI128:
18074 case IX86_BUILTIN_PSRLDQI128:
18075 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18076 : CODE_FOR_sse2_lshrti3);
18077 arg0 = CALL_EXPR_ARG (exp, 0);
18078 arg1 = CALL_EXPR_ARG (exp, 1);
18079 op0 = expand_normal (arg0);
18080 op1 = expand_normal (arg1);
18081 tmode = insn_data[icode].operand[0].mode;
18082 mode1 = insn_data[icode].operand[1].mode;
18083 mode2 = insn_data[icode].operand[2].mode;
18084
18085 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18086 {
18087 op0 = copy_to_reg (op0);
18088 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18089 }
18090 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18091 {
18092 error ("shift must be an immediate");
18093 return const0_rtx;
18094 }
18095 target = gen_reg_rtx (V2DImode);
18096 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
18097 if (! pat)
18098 return 0;
18099 emit_insn (pat);
18100 return target;
18101
18102 case IX86_BUILTIN_FEMMS:
18103 emit_insn (gen_mmx_femms ());
18104 return NULL_RTX;
18105
18106 case IX86_BUILTIN_PAVGUSB:
18107 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18108
18109 case IX86_BUILTIN_PF2ID:
18110 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18111
18112 case IX86_BUILTIN_PFACC:
18113 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18114
18115 case IX86_BUILTIN_PFADD:
18116 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18117
18118 case IX86_BUILTIN_PFCMPEQ:
18119 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18120
18121 case IX86_BUILTIN_PFCMPGE:
18122 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18123
18124 case IX86_BUILTIN_PFCMPGT:
18125 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18126
18127 case IX86_BUILTIN_PFMAX:
18128 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18129
18130 case IX86_BUILTIN_PFMIN:
18131 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18132
18133 case IX86_BUILTIN_PFMUL:
18134 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18135
18136 case IX86_BUILTIN_PFRCP:
18137 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18138
18139 case IX86_BUILTIN_PFRCPIT1:
18140 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18141
18142 case IX86_BUILTIN_PFRCPIT2:
18143 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18144
18145 case IX86_BUILTIN_PFRSQIT1:
18146 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18147
18148 case IX86_BUILTIN_PFRSQRT:
18149 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18150
18151 case IX86_BUILTIN_PFSUB:
18152 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18153
18154 case IX86_BUILTIN_PFSUBR:
18155 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18156
18157 case IX86_BUILTIN_PI2FD:
18158 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18159
18160 case IX86_BUILTIN_PMULHRW:
18161 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18162
18163 case IX86_BUILTIN_PF2IW:
18164 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18165
18166 case IX86_BUILTIN_PFNACC:
18167 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18168
18169 case IX86_BUILTIN_PFPNACC:
18170 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18171
18172 case IX86_BUILTIN_PI2FW:
18173 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18174
18175 case IX86_BUILTIN_PSWAPDSI:
18176 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18177
18178 case IX86_BUILTIN_PSWAPDSF:
18179 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18180
18181 case IX86_BUILTIN_SQRTSD:
18182 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18183 case IX86_BUILTIN_LOADUPD:
18184 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18185 case IX86_BUILTIN_STOREUPD:
18186 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18187
18188 case IX86_BUILTIN_MFENCE:
18189 emit_insn (gen_sse2_mfence ());
18190 return 0;
18191 case IX86_BUILTIN_LFENCE:
18192 emit_insn (gen_sse2_lfence ());
18193 return 0;
18194
18195 case IX86_BUILTIN_CLFLUSH:
18196 arg0 = CALL_EXPR_ARG (exp, 0);
18197 op0 = expand_normal (arg0);
18198 icode = CODE_FOR_sse2_clflush;
18199 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18200 op0 = copy_to_mode_reg (Pmode, op0);
18201
18202 emit_insn (gen_sse2_clflush (op0));
18203 return 0;
18204
18205 case IX86_BUILTIN_MOVNTPD:
18206 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18207 case IX86_BUILTIN_MOVNTDQ:
18208 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18209 case IX86_BUILTIN_MOVNTI:
18210 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18211
18212 case IX86_BUILTIN_LOADDQU:
18213 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18214 case IX86_BUILTIN_STOREDQU:
18215 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18216
18217 case IX86_BUILTIN_MONITOR:
18218 arg0 = CALL_EXPR_ARG (exp, 0);
18219 arg1 = CALL_EXPR_ARG (exp, 1);
18220 arg2 = CALL_EXPR_ARG (exp, 2);
18221 op0 = expand_normal (arg0);
18222 op1 = expand_normal (arg1);
18223 op2 = expand_normal (arg2);
18224 if (!REG_P (op0))
18225 op0 = copy_to_mode_reg (Pmode, op0);
18226 if (!REG_P (op1))
18227 op1 = copy_to_mode_reg (SImode, op1);
18228 if (!REG_P (op2))
18229 op2 = copy_to_mode_reg (SImode, op2);
18230 if (!TARGET_64BIT)
18231 emit_insn (gen_sse3_monitor (op0, op1, op2));
18232 else
18233 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18234 return 0;
18235
18236 case IX86_BUILTIN_MWAIT:
18237 arg0 = CALL_EXPR_ARG (exp, 0);
18238 arg1 = CALL_EXPR_ARG (exp, 1);
18239 op0 = expand_normal (arg0);
18240 op1 = expand_normal (arg1);
18241 if (!REG_P (op0))
18242 op0 = copy_to_mode_reg (SImode, op0);
18243 if (!REG_P (op1))
18244 op1 = copy_to_mode_reg (SImode, op1);
18245 emit_insn (gen_sse3_mwait (op0, op1));
18246 return 0;
18247
18248 case IX86_BUILTIN_LDDQU:
18249 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18250 target, 1);
18251
18252 case IX86_BUILTIN_PALIGNR:
18253 case IX86_BUILTIN_PALIGNR128:
18254 if (fcode == IX86_BUILTIN_PALIGNR)
18255 {
18256 icode = CODE_FOR_ssse3_palignrdi;
18257 mode = DImode;
18258 }
18259 else
18260 {
18261 icode = CODE_FOR_ssse3_palignrti;
18262 mode = V2DImode;
18263 }
18264 arg0 = CALL_EXPR_ARG (exp, 0);
18265 arg1 = CALL_EXPR_ARG (exp, 1);
18266 arg2 = CALL_EXPR_ARG (exp, 2);
18267 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18268 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18269 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18270 tmode = insn_data[icode].operand[0].mode;
18271 mode1 = insn_data[icode].operand[1].mode;
18272 mode2 = insn_data[icode].operand[2].mode;
18273 mode3 = insn_data[icode].operand[3].mode;
18274
18275 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18276 {
18277 op0 = copy_to_reg (op0);
18278 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18279 }
18280 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18281 {
18282 op1 = copy_to_reg (op1);
18283 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18284 }
18285 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18286 {
18287 error ("shift must be an immediate");
18288 return const0_rtx;
18289 }
18290 target = gen_reg_rtx (mode);
18291 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18292 op0, op1, op2);
18293 if (! pat)
18294 return 0;
18295 emit_insn (pat);
18296 return target;
18297
18298 case IX86_BUILTIN_MOVNTSD:
18299 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18300
18301 case IX86_BUILTIN_MOVNTSS:
18302 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18303
18304 case IX86_BUILTIN_INSERTQ:
18305 case IX86_BUILTIN_EXTRQ:
18306 icode = (fcode == IX86_BUILTIN_EXTRQ
18307 ? CODE_FOR_sse4a_extrq
18308 : CODE_FOR_sse4a_insertq);
18309 arg0 = CALL_EXPR_ARG (exp, 0);
18310 arg1 = CALL_EXPR_ARG (exp, 1);
18311 op0 = expand_normal (arg0);
18312 op1 = expand_normal (arg1);
18313 tmode = insn_data[icode].operand[0].mode;
18314 mode1 = insn_data[icode].operand[1].mode;
18315 mode2 = insn_data[icode].operand[2].mode;
18316 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18317 op0 = copy_to_mode_reg (mode1, op0);
18318 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18319 op1 = copy_to_mode_reg (mode2, op1);
18320 if (optimize || target == 0
18321 || GET_MODE (target) != tmode
18322 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18323 target = gen_reg_rtx (tmode);
18324 pat = GEN_FCN (icode) (target, op0, op1);
18325 if (! pat)
18326 return NULL_RTX;
18327 emit_insn (pat);
18328 return target;
18329
18330 case IX86_BUILTIN_EXTRQI:
18331 icode = CODE_FOR_sse4a_extrqi;
18332 arg0 = CALL_EXPR_ARG (exp, 0);
18333 arg1 = CALL_EXPR_ARG (exp, 1);
18334 arg2 = CALL_EXPR_ARG (exp, 2);
18335 op0 = expand_normal (arg0);
18336 op1 = expand_normal (arg1);
18337 op2 = expand_normal (arg2);
18338 tmode = insn_data[icode].operand[0].mode;
18339 mode1 = insn_data[icode].operand[1].mode;
18340 mode2 = insn_data[icode].operand[2].mode;
18341 mode3 = insn_data[icode].operand[3].mode;
18342 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18343 op0 = copy_to_mode_reg (mode1, op0);
18344 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18345 {
18346 error ("index mask must be an immediate");
18347 return gen_reg_rtx (tmode);
18348 }
18349 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18350 {
18351 error ("length mask must be an immediate");
18352 return gen_reg_rtx (tmode);
18353 }
18354 if (optimize || target == 0
18355 || GET_MODE (target) != tmode
18356 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18357 target = gen_reg_rtx (tmode);
18358 pat = GEN_FCN (icode) (target, op0, op1, op2);
18359 if (! pat)
18360 return NULL_RTX;
18361 emit_insn (pat);
18362 return target;
18363
18364 case IX86_BUILTIN_INSERTQI:
18365 icode = CODE_FOR_sse4a_insertqi;
18366 arg0 = CALL_EXPR_ARG (exp, 0);
18367 arg1 = CALL_EXPR_ARG (exp, 1);
18368 arg2 = CALL_EXPR_ARG (exp, 2);
18369 arg3 = CALL_EXPR_ARG (exp, 3);
18370 op0 = expand_normal (arg0);
18371 op1 = expand_normal (arg1);
18372 op2 = expand_normal (arg2);
18373 op3 = expand_normal (arg3);
18374 tmode = insn_data[icode].operand[0].mode;
18375 mode1 = insn_data[icode].operand[1].mode;
18376 mode2 = insn_data[icode].operand[2].mode;
18377 mode3 = insn_data[icode].operand[3].mode;
18378 mode4 = insn_data[icode].operand[4].mode;
18379
18380 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18381 op0 = copy_to_mode_reg (mode1, op0);
18382
18383 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18384 op1 = copy_to_mode_reg (mode2, op1);
18385
18386 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18387 {
18388 error ("index mask must be an immediate");
18389 return gen_reg_rtx (tmode);
18390 }
18391 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18392 {
18393 error ("length mask must be an immediate");
18394 return gen_reg_rtx (tmode);
18395 }
18396 if (optimize || target == 0
18397 || GET_MODE (target) != tmode
18398 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18399 target = gen_reg_rtx (tmode);
18400 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18401 if (! pat)
18402 return NULL_RTX;
18403 emit_insn (pat);
18404 return target;
18405
18406 case IX86_BUILTIN_VEC_INIT_V2SI:
18407 case IX86_BUILTIN_VEC_INIT_V4HI:
18408 case IX86_BUILTIN_VEC_INIT_V8QI:
18409 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18410
18411 case IX86_BUILTIN_VEC_EXT_V2DF:
18412 case IX86_BUILTIN_VEC_EXT_V2DI:
18413 case IX86_BUILTIN_VEC_EXT_V4SF:
18414 case IX86_BUILTIN_VEC_EXT_V4SI:
18415 case IX86_BUILTIN_VEC_EXT_V8HI:
18416 case IX86_BUILTIN_VEC_EXT_V2SI:
18417 case IX86_BUILTIN_VEC_EXT_V4HI:
18418 return ix86_expand_vec_ext_builtin (exp, target);
18419
18420 case IX86_BUILTIN_VEC_SET_V8HI:
18421 case IX86_BUILTIN_VEC_SET_V4HI:
18422 return ix86_expand_vec_set_builtin (exp);
18423
18424 default:
18425 break;
18426 }
18427
18428 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18429 if (d->code == fcode)
18430 {
18431 /* Compares are treated specially. */
18432 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18433 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18434 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18435 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18436 return ix86_expand_sse_compare (d, exp, target);
18437
18438 return ix86_expand_binop_builtin (d->icode, exp, target);
18439 }
18440
18441 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18442 if (d->code == fcode)
18443 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18444
18445 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18446 if (d->code == fcode)
18447 return ix86_expand_sse_comi (d, exp, target);
18448
18449 gcc_unreachable ();
18450 }
18451
18452 /* Returns a function decl for a vectorized version of the builtin function
18453 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18454 if it is not available. */
18455
18456 static tree
18457 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18458 tree type_in)
18459 {
18460 enum machine_mode in_mode, out_mode;
18461 int in_n, out_n;
18462
18463 if (TREE_CODE (type_out) != VECTOR_TYPE
18464 || TREE_CODE (type_in) != VECTOR_TYPE)
18465 return NULL_TREE;
18466
18467 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18468 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18469 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18470 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18471
18472 switch (fn)
18473 {
18474 case BUILT_IN_SQRT:
18475 if (out_mode == DFmode && out_n == 2
18476 && in_mode == DFmode && in_n == 2)
18477 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18478 return NULL_TREE;
18479
18480 case BUILT_IN_SQRTF:
18481 if (out_mode == SFmode && out_n == 4
18482 && in_mode == SFmode && in_n == 4)
18483 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18484 return NULL_TREE;
18485
18486 case BUILT_IN_LRINTF:
18487 if (out_mode == SImode && out_n == 4
18488 && in_mode == SFmode && in_n == 4)
18489 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18490 return NULL_TREE;
18491
18492 default:
18493 ;
18494 }
18495
18496 return NULL_TREE;
18497 }
18498
18499 /* Returns a decl of a function that implements conversion of the
18500 input vector of type TYPE, or NULL_TREE if it is not available. */
18501
18502 static tree
18503 ix86_builtin_conversion (enum tree_code code, tree type)
18504 {
18505 if (TREE_CODE (type) != VECTOR_TYPE)
18506 return NULL_TREE;
18507
18508 switch (code)
18509 {
18510 case FLOAT_EXPR:
18511 switch (TYPE_MODE (type))
18512 {
18513 case V4SImode:
18514 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18515 default:
18516 return NULL_TREE;
18517 }
18518
18519 case FIX_TRUNC_EXPR:
18520 switch (TYPE_MODE (type))
18521 {
18522 case V4SFmode:
18523 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18524 default:
18525 return NULL_TREE;
18526 }
18527 default:
18528 return NULL_TREE;
18529
18530 }
18531 }
18532
18533 /* Store OPERAND to the memory after reload is completed. This means
18534 that we can't easily use assign_stack_local. */
18535 rtx
18536 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18537 {
18538 rtx result;
18539
18540 gcc_assert (reload_completed);
18541 if (TARGET_RED_ZONE)
18542 {
18543 result = gen_rtx_MEM (mode,
18544 gen_rtx_PLUS (Pmode,
18545 stack_pointer_rtx,
18546 GEN_INT (-RED_ZONE_SIZE)));
18547 emit_move_insn (result, operand);
18548 }
18549 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18550 {
18551 switch (mode)
18552 {
18553 case HImode:
18554 case SImode:
18555 operand = gen_lowpart (DImode, operand);
18556 /* FALLTHRU */
18557 case DImode:
18558 emit_insn (
18559 gen_rtx_SET (VOIDmode,
18560 gen_rtx_MEM (DImode,
18561 gen_rtx_PRE_DEC (DImode,
18562 stack_pointer_rtx)),
18563 operand));
18564 break;
18565 default:
18566 gcc_unreachable ();
18567 }
18568 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18569 }
18570 else
18571 {
18572 switch (mode)
18573 {
18574 case DImode:
18575 {
18576 rtx operands[2];
18577 split_di (&operand, 1, operands, operands + 1);
18578 emit_insn (
18579 gen_rtx_SET (VOIDmode,
18580 gen_rtx_MEM (SImode,
18581 gen_rtx_PRE_DEC (Pmode,
18582 stack_pointer_rtx)),
18583 operands[1]));
18584 emit_insn (
18585 gen_rtx_SET (VOIDmode,
18586 gen_rtx_MEM (SImode,
18587 gen_rtx_PRE_DEC (Pmode,
18588 stack_pointer_rtx)),
18589 operands[0]));
18590 }
18591 break;
18592 case HImode:
18593 /* Store HImodes as SImodes. */
18594 operand = gen_lowpart (SImode, operand);
18595 /* FALLTHRU */
18596 case SImode:
18597 emit_insn (
18598 gen_rtx_SET (VOIDmode,
18599 gen_rtx_MEM (GET_MODE (operand),
18600 gen_rtx_PRE_DEC (SImode,
18601 stack_pointer_rtx)),
18602 operand));
18603 break;
18604 default:
18605 gcc_unreachable ();
18606 }
18607 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18608 }
18609 return result;
18610 }
18611
18612 /* Free operand from the memory. */
18613 void
18614 ix86_free_from_memory (enum machine_mode mode)
18615 {
18616 if (!TARGET_RED_ZONE)
18617 {
18618 int size;
18619
18620 if (mode == DImode || TARGET_64BIT)
18621 size = 8;
18622 else
18623 size = 4;
18624 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18625 to pop or add instruction if registers are available. */
18626 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18627 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18628 GEN_INT (size))));
18629 }
18630 }
18631
18632 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18633 QImode must go into class Q_REGS.
18634 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18635 movdf to do mem-to-mem moves through integer regs. */
18636 enum reg_class
18637 ix86_preferred_reload_class (rtx x, enum reg_class class)
18638 {
18639 enum machine_mode mode = GET_MODE (x);
18640
18641 /* We're only allowed to return a subclass of CLASS. Many of the
18642 following checks fail for NO_REGS, so eliminate that early. */
18643 if (class == NO_REGS)
18644 return NO_REGS;
18645
18646 /* All classes can load zeros. */
18647 if (x == CONST0_RTX (mode))
18648 return class;
18649
18650 /* Force constants into memory if we are loading a (nonzero) constant into
18651 an MMX or SSE register. This is because there are no MMX/SSE instructions
18652 to load from a constant. */
18653 if (CONSTANT_P (x)
18654 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18655 return NO_REGS;
18656
18657 /* Prefer SSE regs only, if we can use them for math. */
18658 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18659 return SSE_CLASS_P (class) ? class : NO_REGS;
18660
18661 /* Floating-point constants need more complex checks. */
18662 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18663 {
18664 /* General regs can load everything. */
18665 if (reg_class_subset_p (class, GENERAL_REGS))
18666 return class;
18667
18668 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18669 zero above. We only want to wind up preferring 80387 registers if
18670 we plan on doing computation with them. */
18671 if (TARGET_80387
18672 && standard_80387_constant_p (x))
18673 {
18674 /* Limit class to non-sse. */
18675 if (class == FLOAT_SSE_REGS)
18676 return FLOAT_REGS;
18677 if (class == FP_TOP_SSE_REGS)
18678 return FP_TOP_REG;
18679 if (class == FP_SECOND_SSE_REGS)
18680 return FP_SECOND_REG;
18681 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18682 return class;
18683 }
18684
18685 return NO_REGS;
18686 }
18687
18688 /* Generally when we see PLUS here, it's the function invariant
18689 (plus soft-fp const_int). Which can only be computed into general
18690 regs. */
18691 if (GET_CODE (x) == PLUS)
18692 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18693
18694 /* QImode constants are easy to load, but non-constant QImode data
18695 must go into Q_REGS. */
18696 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18697 {
18698 if (reg_class_subset_p (class, Q_REGS))
18699 return class;
18700 if (reg_class_subset_p (Q_REGS, class))
18701 return Q_REGS;
18702 return NO_REGS;
18703 }
18704
18705 return class;
18706 }
18707
18708 /* Discourage putting floating-point values in SSE registers unless
18709 SSE math is being used, and likewise for the 387 registers. */
18710 enum reg_class
18711 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18712 {
18713 enum machine_mode mode = GET_MODE (x);
18714
18715 /* Restrict the output reload class to the register bank that we are doing
18716 math on. If we would like not to return a subset of CLASS, reject this
18717 alternative: if reload cannot do this, it will still use its choice. */
18718 mode = GET_MODE (x);
18719 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18720 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18721
18722 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18723 {
18724 if (class == FP_TOP_SSE_REGS)
18725 return FP_TOP_REG;
18726 else if (class == FP_SECOND_SSE_REGS)
18727 return FP_SECOND_REG;
18728 else
18729 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18730 }
18731
18732 return class;
18733 }
18734
18735 /* If we are copying between general and FP registers, we need a memory
18736 location. The same is true for SSE and MMX registers.
18737
18738 The macro can't work reliably when one of the CLASSES is class containing
18739 registers from multiple units (SSE, MMX, integer). We avoid this by never
18740 combining those units in single alternative in the machine description.
18741 Ensure that this constraint holds to avoid unexpected surprises.
18742
18743 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18744 enforce these sanity checks. */
18745
18746 int
18747 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18748 enum machine_mode mode, int strict)
18749 {
18750 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18751 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18752 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18753 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18754 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18755 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18756 {
18757 gcc_assert (!strict);
18758 return true;
18759 }
18760
18761 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18762 return true;
18763
18764 /* ??? This is a lie. We do have moves between mmx/general, and for
18765 mmx/sse2. But by saying we need secondary memory we discourage the
18766 register allocator from using the mmx registers unless needed. */
18767 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18768 return true;
18769
18770 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18771 {
18772 /* SSE1 doesn't have any direct moves from other classes. */
18773 if (!TARGET_SSE2)
18774 return true;
18775
18776 /* If the target says that inter-unit moves are more expensive
18777 than moving through memory, then don't generate them. */
18778 if (!TARGET_INTER_UNIT_MOVES)
18779 return true;
18780
18781 /* Between SSE and general, we have moves no larger than word size. */
18782 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18783 return true;
18784 }
18785
18786 return false;
18787 }
18788
18789 /* Return true if the registers in CLASS cannot represent the change from
18790 modes FROM to TO. */
18791
18792 bool
18793 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18794 enum reg_class class)
18795 {
18796 if (from == to)
18797 return false;
18798
18799 /* x87 registers can't do subreg at all, as all values are reformatted
18800 to extended precision. */
18801 if (MAYBE_FLOAT_CLASS_P (class))
18802 return true;
18803
18804 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18805 {
18806 /* Vector registers do not support QI or HImode loads. If we don't
18807 disallow a change to these modes, reload will assume it's ok to
18808 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18809 the vec_dupv4hi pattern. */
18810 if (GET_MODE_SIZE (from) < 4)
18811 return true;
18812
18813 /* Vector registers do not support subreg with nonzero offsets, which
18814 are otherwise valid for integer registers. Since we can't see
18815 whether we have a nonzero offset from here, prohibit all
18816 nonparadoxical subregs changing size. */
18817 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18818 return true;
18819 }
18820
18821 return false;
18822 }
18823
18824 /* Return the cost of moving data from a register in class CLASS1 to
18825 one in class CLASS2.
18826
18827 It is not required that the cost always equal 2 when FROM is the same as TO;
18828 on some machines it is expensive to move between registers if they are not
18829 general registers. */
18830
18831 int
18832 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18833 enum reg_class class2)
18834 {
18835 /* In case we require secondary memory, compute cost of the store followed
18836 by load. In order to avoid bad register allocation choices, we need
18837 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18838
18839 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18840 {
18841 int cost = 1;
18842
18843 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18844 MEMORY_MOVE_COST (mode, class1, 1));
18845 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18846 MEMORY_MOVE_COST (mode, class2, 1));
18847
18848 /* In case of copying from general_purpose_register we may emit multiple
18849 stores followed by single load causing memory size mismatch stall.
18850 Count this as arbitrarily high cost of 20. */
18851 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18852 cost += 20;
18853
18854 /* In the case of FP/MMX moves, the registers actually overlap, and we
18855 have to switch modes in order to treat them differently. */
18856 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18857 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18858 cost += 20;
18859
18860 return cost;
18861 }
18862
18863 /* Moves between SSE/MMX and integer unit are expensive. */
18864 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18865 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18866 return ix86_cost->mmxsse_to_integer;
18867 if (MAYBE_FLOAT_CLASS_P (class1))
18868 return ix86_cost->fp_move;
18869 if (MAYBE_SSE_CLASS_P (class1))
18870 return ix86_cost->sse_move;
18871 if (MAYBE_MMX_CLASS_P (class1))
18872 return ix86_cost->mmx_move;
18873 return 2;
18874 }
18875
18876 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18877
18878 bool
18879 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18880 {
18881 /* Flags and only flags can only hold CCmode values. */
18882 if (CC_REGNO_P (regno))
18883 return GET_MODE_CLASS (mode) == MODE_CC;
18884 if (GET_MODE_CLASS (mode) == MODE_CC
18885 || GET_MODE_CLASS (mode) == MODE_RANDOM
18886 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18887 return 0;
18888 if (FP_REGNO_P (regno))
18889 return VALID_FP_MODE_P (mode);
18890 if (SSE_REGNO_P (regno))
18891 {
18892 /* We implement the move patterns for all vector modes into and
18893 out of SSE registers, even when no operation instructions
18894 are available. */
18895 return (VALID_SSE_REG_MODE (mode)
18896 || VALID_SSE2_REG_MODE (mode)
18897 || VALID_MMX_REG_MODE (mode)
18898 || VALID_MMX_REG_MODE_3DNOW (mode));
18899 }
18900 if (MMX_REGNO_P (regno))
18901 {
18902 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18903 so if the register is available at all, then we can move data of
18904 the given mode into or out of it. */
18905 return (VALID_MMX_REG_MODE (mode)
18906 || VALID_MMX_REG_MODE_3DNOW (mode));
18907 }
18908
18909 if (mode == QImode)
18910 {
18911 /* Take care for QImode values - they can be in non-QI regs,
18912 but then they do cause partial register stalls. */
18913 if (regno < 4 || TARGET_64BIT)
18914 return 1;
18915 if (!TARGET_PARTIAL_REG_STALL)
18916 return 1;
18917 return reload_in_progress || reload_completed;
18918 }
18919 /* We handle both integer and floats in the general purpose registers. */
18920 else if (VALID_INT_MODE_P (mode))
18921 return 1;
18922 else if (VALID_FP_MODE_P (mode))
18923 return 1;
18924 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18925 on to use that value in smaller contexts, this can easily force a
18926 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18927 supporting DImode, allow it. */
18928 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18929 return 1;
18930
18931 return 0;
18932 }
18933
18934 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18935 tieable integer mode. */
18936
18937 static bool
18938 ix86_tieable_integer_mode_p (enum machine_mode mode)
18939 {
18940 switch (mode)
18941 {
18942 case HImode:
18943 case SImode:
18944 return true;
18945
18946 case QImode:
18947 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18948
18949 case DImode:
18950 return TARGET_64BIT;
18951
18952 default:
18953 return false;
18954 }
18955 }
18956
18957 /* Return true if MODE1 is accessible in a register that can hold MODE2
18958 without copying. That is, all register classes that can hold MODE2
18959 can also hold MODE1. */
18960
18961 bool
18962 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18963 {
18964 if (mode1 == mode2)
18965 return true;
18966
18967 if (ix86_tieable_integer_mode_p (mode1)
18968 && ix86_tieable_integer_mode_p (mode2))
18969 return true;
18970
18971 /* MODE2 being XFmode implies fp stack or general regs, which means we
18972 can tie any smaller floating point modes to it. Note that we do not
18973 tie this with TFmode. */
18974 if (mode2 == XFmode)
18975 return mode1 == SFmode || mode1 == DFmode;
18976
18977 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18978 that we can tie it with SFmode. */
18979 if (mode2 == DFmode)
18980 return mode1 == SFmode;
18981
18982 /* If MODE2 is only appropriate for an SSE register, then tie with
18983 any other mode acceptable to SSE registers. */
18984 if (GET_MODE_SIZE (mode2) == 16
18985 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18986 return (GET_MODE_SIZE (mode1) == 16
18987 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18988
18989 /* If MODE2 is appropriate for an MMX register, then tie
18990 with any other mode acceptable to MMX registers. */
18991 if (GET_MODE_SIZE (mode2) == 8
18992 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18993 return (GET_MODE_SIZE (mode1) == 8
18994 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18995
18996 return false;
18997 }
18998
18999 /* Return the cost of moving data of mode M between a
19000 register and memory. A value of 2 is the default; this cost is
19001 relative to those in `REGISTER_MOVE_COST'.
19002
19003 If moving between registers and memory is more expensive than
19004 between two registers, you should define this macro to express the
19005 relative cost.
19006
19007 Model also increased moving costs of QImode registers in non
19008 Q_REGS classes.
19009 */
19010 int
19011 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19012 {
19013 if (FLOAT_CLASS_P (class))
19014 {
19015 int index;
19016 switch (mode)
19017 {
19018 case SFmode:
19019 index = 0;
19020 break;
19021 case DFmode:
19022 index = 1;
19023 break;
19024 case XFmode:
19025 index = 2;
19026 break;
19027 default:
19028 return 100;
19029 }
19030 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19031 }
19032 if (SSE_CLASS_P (class))
19033 {
19034 int index;
19035 switch (GET_MODE_SIZE (mode))
19036 {
19037 case 4:
19038 index = 0;
19039 break;
19040 case 8:
19041 index = 1;
19042 break;
19043 case 16:
19044 index = 2;
19045 break;
19046 default:
19047 return 100;
19048 }
19049 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19050 }
19051 if (MMX_CLASS_P (class))
19052 {
19053 int index;
19054 switch (GET_MODE_SIZE (mode))
19055 {
19056 case 4:
19057 index = 0;
19058 break;
19059 case 8:
19060 index = 1;
19061 break;
19062 default:
19063 return 100;
19064 }
19065 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19066 }
19067 switch (GET_MODE_SIZE (mode))
19068 {
19069 case 1:
19070 if (in)
19071 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19072 : ix86_cost->movzbl_load);
19073 else
19074 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19075 : ix86_cost->int_store[0] + 4);
19076 break;
19077 case 2:
19078 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19079 default:
19080 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19081 if (mode == TFmode)
19082 mode = XFmode;
19083 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19084 * (((int) GET_MODE_SIZE (mode)
19085 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19086 }
19087 }
19088
19089 /* Compute a (partial) cost for rtx X. Return true if the complete
19090 cost has been computed, and false if subexpressions should be
19091 scanned. In either case, *TOTAL contains the cost result. */
19092
19093 static bool
19094 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19095 {
19096 enum machine_mode mode = GET_MODE (x);
19097
19098 switch (code)
19099 {
19100 case CONST_INT:
19101 case CONST:
19102 case LABEL_REF:
19103 case SYMBOL_REF:
19104 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19105 *total = 3;
19106 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19107 *total = 2;
19108 else if (flag_pic && SYMBOLIC_CONST (x)
19109 && (!TARGET_64BIT
19110 || (!GET_CODE (x) != LABEL_REF
19111 && (GET_CODE (x) != SYMBOL_REF
19112 || !SYMBOL_REF_LOCAL_P (x)))))
19113 *total = 1;
19114 else
19115 *total = 0;
19116 return true;
19117
19118 case CONST_DOUBLE:
19119 if (mode == VOIDmode)
19120 *total = 0;
19121 else
19122 switch (standard_80387_constant_p (x))
19123 {
19124 case 1: /* 0.0 */
19125 *total = 1;
19126 break;
19127 default: /* Other constants */
19128 *total = 2;
19129 break;
19130 case 0:
19131 case -1:
19132 /* Start with (MEM (SYMBOL_REF)), since that's where
19133 it'll probably end up. Add a penalty for size. */
19134 *total = (COSTS_N_INSNS (1)
19135 + (flag_pic != 0 && !TARGET_64BIT)
19136 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19137 break;
19138 }
19139 return true;
19140
19141 case ZERO_EXTEND:
19142 /* The zero extensions is often completely free on x86_64, so make
19143 it as cheap as possible. */
19144 if (TARGET_64BIT && mode == DImode
19145 && GET_MODE (XEXP (x, 0)) == SImode)
19146 *total = 1;
19147 else if (TARGET_ZERO_EXTEND_WITH_AND)
19148 *total = ix86_cost->add;
19149 else
19150 *total = ix86_cost->movzx;
19151 return false;
19152
19153 case SIGN_EXTEND:
19154 *total = ix86_cost->movsx;
19155 return false;
19156
19157 case ASHIFT:
19158 if (CONST_INT_P (XEXP (x, 1))
19159 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19160 {
19161 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19162 if (value == 1)
19163 {
19164 *total = ix86_cost->add;
19165 return false;
19166 }
19167 if ((value == 2 || value == 3)
19168 && ix86_cost->lea <= ix86_cost->shift_const)
19169 {
19170 *total = ix86_cost->lea;
19171 return false;
19172 }
19173 }
19174 /* FALLTHRU */
19175
19176 case ROTATE:
19177 case ASHIFTRT:
19178 case LSHIFTRT:
19179 case ROTATERT:
19180 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19181 {
19182 if (CONST_INT_P (XEXP (x, 1)))
19183 {
19184 if (INTVAL (XEXP (x, 1)) > 32)
19185 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19186 else
19187 *total = ix86_cost->shift_const * 2;
19188 }
19189 else
19190 {
19191 if (GET_CODE (XEXP (x, 1)) == AND)
19192 *total = ix86_cost->shift_var * 2;
19193 else
19194 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19195 }
19196 }
19197 else
19198 {
19199 if (CONST_INT_P (XEXP (x, 1)))
19200 *total = ix86_cost->shift_const;
19201 else
19202 *total = ix86_cost->shift_var;
19203 }
19204 return false;
19205
19206 case MULT:
19207 if (FLOAT_MODE_P (mode))
19208 {
19209 *total = ix86_cost->fmul;
19210 return false;
19211 }
19212 else
19213 {
19214 rtx op0 = XEXP (x, 0);
19215 rtx op1 = XEXP (x, 1);
19216 int nbits;
19217 if (CONST_INT_P (XEXP (x, 1)))
19218 {
19219 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19220 for (nbits = 0; value != 0; value &= value - 1)
19221 nbits++;
19222 }
19223 else
19224 /* This is arbitrary. */
19225 nbits = 7;
19226
19227 /* Compute costs correctly for widening multiplication. */
19228 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19229 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19230 == GET_MODE_SIZE (mode))
19231 {
19232 int is_mulwiden = 0;
19233 enum machine_mode inner_mode = GET_MODE (op0);
19234
19235 if (GET_CODE (op0) == GET_CODE (op1))
19236 is_mulwiden = 1, op1 = XEXP (op1, 0);
19237 else if (CONST_INT_P (op1))
19238 {
19239 if (GET_CODE (op0) == SIGN_EXTEND)
19240 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19241 == INTVAL (op1);
19242 else
19243 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19244 }
19245
19246 if (is_mulwiden)
19247 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19248 }
19249
19250 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19251 + nbits * ix86_cost->mult_bit
19252 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19253
19254 return true;
19255 }
19256
19257 case DIV:
19258 case UDIV:
19259 case MOD:
19260 case UMOD:
19261 if (FLOAT_MODE_P (mode))
19262 *total = ix86_cost->fdiv;
19263 else
19264 *total = ix86_cost->divide[MODE_INDEX (mode)];
19265 return false;
19266
19267 case PLUS:
19268 if (FLOAT_MODE_P (mode))
19269 *total = ix86_cost->fadd;
19270 else if (GET_MODE_CLASS (mode) == MODE_INT
19271 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19272 {
19273 if (GET_CODE (XEXP (x, 0)) == PLUS
19274 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19275 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19276 && CONSTANT_P (XEXP (x, 1)))
19277 {
19278 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19279 if (val == 2 || val == 4 || val == 8)
19280 {
19281 *total = ix86_cost->lea;
19282 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19283 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19284 outer_code);
19285 *total += rtx_cost (XEXP (x, 1), outer_code);
19286 return true;
19287 }
19288 }
19289 else if (GET_CODE (XEXP (x, 0)) == MULT
19290 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19291 {
19292 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19293 if (val == 2 || val == 4 || val == 8)
19294 {
19295 *total = ix86_cost->lea;
19296 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19297 *total += rtx_cost (XEXP (x, 1), outer_code);
19298 return true;
19299 }
19300 }
19301 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19302 {
19303 *total = ix86_cost->lea;
19304 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19305 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19306 *total += rtx_cost (XEXP (x, 1), outer_code);
19307 return true;
19308 }
19309 }
19310 /* FALLTHRU */
19311
19312 case MINUS:
19313 if (FLOAT_MODE_P (mode))
19314 {
19315 *total = ix86_cost->fadd;
19316 return false;
19317 }
19318 /* FALLTHRU */
19319
19320 case AND:
19321 case IOR:
19322 case XOR:
19323 if (!TARGET_64BIT && mode == DImode)
19324 {
19325 *total = (ix86_cost->add * 2
19326 + (rtx_cost (XEXP (x, 0), outer_code)
19327 << (GET_MODE (XEXP (x, 0)) != DImode))
19328 + (rtx_cost (XEXP (x, 1), outer_code)
19329 << (GET_MODE (XEXP (x, 1)) != DImode)));
19330 return true;
19331 }
19332 /* FALLTHRU */
19333
19334 case NEG:
19335 if (FLOAT_MODE_P (mode))
19336 {
19337 *total = ix86_cost->fchs;
19338 return false;
19339 }
19340 /* FALLTHRU */
19341
19342 case NOT:
19343 if (!TARGET_64BIT && mode == DImode)
19344 *total = ix86_cost->add * 2;
19345 else
19346 *total = ix86_cost->add;
19347 return false;
19348
19349 case COMPARE:
19350 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19351 && XEXP (XEXP (x, 0), 1) == const1_rtx
19352 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19353 && XEXP (x, 1) == const0_rtx)
19354 {
19355 /* This kind of construct is implemented using test[bwl].
19356 Treat it as if we had an AND. */
19357 *total = (ix86_cost->add
19358 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19359 + rtx_cost (const1_rtx, outer_code));
19360 return true;
19361 }
19362 return false;
19363
19364 case FLOAT_EXTEND:
19365 if (!TARGET_SSE_MATH
19366 || mode == XFmode
19367 || (mode == DFmode && !TARGET_SSE2))
19368 *total = 0;
19369 return false;
19370
19371 case ABS:
19372 if (FLOAT_MODE_P (mode))
19373 *total = ix86_cost->fabs;
19374 return false;
19375
19376 case SQRT:
19377 if (FLOAT_MODE_P (mode))
19378 *total = ix86_cost->fsqrt;
19379 return false;
19380
19381 case UNSPEC:
19382 if (XINT (x, 1) == UNSPEC_TP)
19383 *total = 0;
19384 return false;
19385
19386 default:
19387 return false;
19388 }
19389 }
19390
19391 #if TARGET_MACHO
19392
19393 static int current_machopic_label_num;
19394
19395 /* Given a symbol name and its associated stub, write out the
19396 definition of the stub. */
19397
19398 void
19399 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19400 {
19401 unsigned int length;
19402 char *binder_name, *symbol_name, lazy_ptr_name[32];
19403 int label = ++current_machopic_label_num;
19404
19405 /* For 64-bit we shouldn't get here. */
19406 gcc_assert (!TARGET_64BIT);
19407
19408 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19409 symb = (*targetm.strip_name_encoding) (symb);
19410
19411 length = strlen (stub);
19412 binder_name = alloca (length + 32);
19413 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19414
19415 length = strlen (symb);
19416 symbol_name = alloca (length + 32);
19417 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19418
19419 sprintf (lazy_ptr_name, "L%d$lz", label);
19420
19421 if (MACHOPIC_PURE)
19422 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19423 else
19424 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19425
19426 fprintf (file, "%s:\n", stub);
19427 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19428
19429 if (MACHOPIC_PURE)
19430 {
19431 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19432 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19433 fprintf (file, "\tjmp\t*%%edx\n");
19434 }
19435 else
19436 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19437
19438 fprintf (file, "%s:\n", binder_name);
19439
19440 if (MACHOPIC_PURE)
19441 {
19442 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19443 fprintf (file, "\tpushl\t%%eax\n");
19444 }
19445 else
19446 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19447
19448 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19449
19450 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19451 fprintf (file, "%s:\n", lazy_ptr_name);
19452 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19453 fprintf (file, "\t.long %s\n", binder_name);
19454 }
19455
19456 void
19457 darwin_x86_file_end (void)
19458 {
19459 darwin_file_end ();
19460 ix86_file_end ();
19461 }
19462 #endif /* TARGET_MACHO */
19463
19464 /* Order the registers for register allocator. */
19465
19466 void
19467 x86_order_regs_for_local_alloc (void)
19468 {
19469 int pos = 0;
19470 int i;
19471
19472 /* First allocate the local general purpose registers. */
19473 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19474 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19475 reg_alloc_order [pos++] = i;
19476
19477 /* Global general purpose registers. */
19478 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19479 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19480 reg_alloc_order [pos++] = i;
19481
19482 /* x87 registers come first in case we are doing FP math
19483 using them. */
19484 if (!TARGET_SSE_MATH)
19485 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19486 reg_alloc_order [pos++] = i;
19487
19488 /* SSE registers. */
19489 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19490 reg_alloc_order [pos++] = i;
19491 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19492 reg_alloc_order [pos++] = i;
19493
19494 /* x87 registers. */
19495 if (TARGET_SSE_MATH)
19496 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19497 reg_alloc_order [pos++] = i;
19498
19499 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19500 reg_alloc_order [pos++] = i;
19501
19502 /* Initialize the rest of array as we do not allocate some registers
19503 at all. */
19504 while (pos < FIRST_PSEUDO_REGISTER)
19505 reg_alloc_order [pos++] = 0;
19506 }
19507
19508 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19509 struct attribute_spec.handler. */
19510 static tree
19511 ix86_handle_struct_attribute (tree *node, tree name,
19512 tree args ATTRIBUTE_UNUSED,
19513 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19514 {
19515 tree *type = NULL;
19516 if (DECL_P (*node))
19517 {
19518 if (TREE_CODE (*node) == TYPE_DECL)
19519 type = &TREE_TYPE (*node);
19520 }
19521 else
19522 type = node;
19523
19524 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19525 || TREE_CODE (*type) == UNION_TYPE)))
19526 {
19527 warning (OPT_Wattributes, "%qs attribute ignored",
19528 IDENTIFIER_POINTER (name));
19529 *no_add_attrs = true;
19530 }
19531
19532 else if ((is_attribute_p ("ms_struct", name)
19533 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19534 || ((is_attribute_p ("gcc_struct", name)
19535 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19536 {
19537 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19538 IDENTIFIER_POINTER (name));
19539 *no_add_attrs = true;
19540 }
19541
19542 return NULL_TREE;
19543 }
19544
19545 static bool
19546 ix86_ms_bitfield_layout_p (tree record_type)
19547 {
19548 return (TARGET_MS_BITFIELD_LAYOUT &&
19549 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19550 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19551 }
19552
19553 /* Returns an expression indicating where the this parameter is
19554 located on entry to the FUNCTION. */
19555
19556 static rtx
19557 x86_this_parameter (tree function)
19558 {
19559 tree type = TREE_TYPE (function);
19560
19561 if (TARGET_64BIT)
19562 {
19563 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19564 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19565 }
19566
19567 if (ix86_function_regparm (type, function) > 0)
19568 {
19569 tree parm;
19570
19571 parm = TYPE_ARG_TYPES (type);
19572 /* Figure out whether or not the function has a variable number of
19573 arguments. */
19574 for (; parm; parm = TREE_CHAIN (parm))
19575 if (TREE_VALUE (parm) == void_type_node)
19576 break;
19577 /* If not, the this parameter is in the first argument. */
19578 if (parm)
19579 {
19580 int regno = 0;
19581 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19582 regno = 2;
19583 return gen_rtx_REG (SImode, regno);
19584 }
19585 }
19586
19587 if (aggregate_value_p (TREE_TYPE (type), type))
19588 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19589 else
19590 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19591 }
19592
19593 /* Determine whether x86_output_mi_thunk can succeed. */
19594
19595 static bool
19596 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19597 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19598 HOST_WIDE_INT vcall_offset, tree function)
19599 {
19600 /* 64-bit can handle anything. */
19601 if (TARGET_64BIT)
19602 return true;
19603
19604 /* For 32-bit, everything's fine if we have one free register. */
19605 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19606 return true;
19607
19608 /* Need a free register for vcall_offset. */
19609 if (vcall_offset)
19610 return false;
19611
19612 /* Need a free register for GOT references. */
19613 if (flag_pic && !(*targetm.binds_local_p) (function))
19614 return false;
19615
19616 /* Otherwise ok. */
19617 return true;
19618 }
19619
19620 /* Output the assembler code for a thunk function. THUNK_DECL is the
19621 declaration for the thunk function itself, FUNCTION is the decl for
19622 the target function. DELTA is an immediate constant offset to be
19623 added to THIS. If VCALL_OFFSET is nonzero, the word at
19624 *(*this + vcall_offset) should be added to THIS. */
19625
19626 static void
19627 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19628 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19629 HOST_WIDE_INT vcall_offset, tree function)
19630 {
19631 rtx xops[3];
19632 rtx this = x86_this_parameter (function);
19633 rtx this_reg, tmp;
19634
19635 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19636 pull it in now and let DELTA benefit. */
19637 if (REG_P (this))
19638 this_reg = this;
19639 else if (vcall_offset)
19640 {
19641 /* Put the this parameter into %eax. */
19642 xops[0] = this;
19643 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19644 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19645 }
19646 else
19647 this_reg = NULL_RTX;
19648
19649 /* Adjust the this parameter by a fixed constant. */
19650 if (delta)
19651 {
19652 xops[0] = GEN_INT (delta);
19653 xops[1] = this_reg ? this_reg : this;
19654 if (TARGET_64BIT)
19655 {
19656 if (!x86_64_general_operand (xops[0], DImode))
19657 {
19658 tmp = gen_rtx_REG (DImode, R10_REG);
19659 xops[1] = tmp;
19660 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19661 xops[0] = tmp;
19662 xops[1] = this;
19663 }
19664 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19665 }
19666 else
19667 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19668 }
19669
19670 /* Adjust the this parameter by a value stored in the vtable. */
19671 if (vcall_offset)
19672 {
19673 if (TARGET_64BIT)
19674 tmp = gen_rtx_REG (DImode, R10_REG);
19675 else
19676 {
19677 int tmp_regno = 2 /* ECX */;
19678 if (lookup_attribute ("fastcall",
19679 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19680 tmp_regno = 0 /* EAX */;
19681 tmp = gen_rtx_REG (SImode, tmp_regno);
19682 }
19683
19684 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19685 xops[1] = tmp;
19686 if (TARGET_64BIT)
19687 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19688 else
19689 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19690
19691 /* Adjust the this parameter. */
19692 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19693 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19694 {
19695 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19696 xops[0] = GEN_INT (vcall_offset);
19697 xops[1] = tmp2;
19698 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19699 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19700 }
19701 xops[1] = this_reg;
19702 if (TARGET_64BIT)
19703 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19704 else
19705 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19706 }
19707
19708 /* If necessary, drop THIS back to its stack slot. */
19709 if (this_reg && this_reg != this)
19710 {
19711 xops[0] = this_reg;
19712 xops[1] = this;
19713 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19714 }
19715
19716 xops[0] = XEXP (DECL_RTL (function), 0);
19717 if (TARGET_64BIT)
19718 {
19719 if (!flag_pic || (*targetm.binds_local_p) (function))
19720 output_asm_insn ("jmp\t%P0", xops);
19721 else
19722 {
19723 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19724 tmp = gen_rtx_CONST (Pmode, tmp);
19725 tmp = gen_rtx_MEM (QImode, tmp);
19726 xops[0] = tmp;
19727 output_asm_insn ("jmp\t%A0", xops);
19728 }
19729 }
19730 else
19731 {
19732 if (!flag_pic || (*targetm.binds_local_p) (function))
19733 output_asm_insn ("jmp\t%P0", xops);
19734 else
19735 #if TARGET_MACHO
19736 if (TARGET_MACHO)
19737 {
19738 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19739 tmp = (gen_rtx_SYMBOL_REF
19740 (Pmode,
19741 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19742 tmp = gen_rtx_MEM (QImode, tmp);
19743 xops[0] = tmp;
19744 output_asm_insn ("jmp\t%0", xops);
19745 }
19746 else
19747 #endif /* TARGET_MACHO */
19748 {
19749 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19750 output_set_got (tmp, NULL_RTX);
19751
19752 xops[1] = tmp;
19753 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19754 output_asm_insn ("jmp\t{*}%1", xops);
19755 }
19756 }
19757 }
19758
19759 static void
19760 x86_file_start (void)
19761 {
19762 default_file_start ();
19763 #if TARGET_MACHO
19764 darwin_file_start ();
19765 #endif
19766 if (X86_FILE_START_VERSION_DIRECTIVE)
19767 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19768 if (X86_FILE_START_FLTUSED)
19769 fputs ("\t.global\t__fltused\n", asm_out_file);
19770 if (ix86_asm_dialect == ASM_INTEL)
19771 fputs ("\t.intel_syntax\n", asm_out_file);
19772 }
19773
19774 int
19775 x86_field_alignment (tree field, int computed)
19776 {
19777 enum machine_mode mode;
19778 tree type = TREE_TYPE (field);
19779
19780 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19781 return computed;
19782 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19783 ? get_inner_array_type (type) : type);
19784 if (mode == DFmode || mode == DCmode
19785 || GET_MODE_CLASS (mode) == MODE_INT
19786 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19787 return MIN (32, computed);
19788 return computed;
19789 }
19790
19791 /* Output assembler code to FILE to increment profiler label # LABELNO
19792 for profiling a function entry. */
19793 void
19794 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19795 {
19796 if (TARGET_64BIT)
19797 if (flag_pic)
19798 {
19799 #ifndef NO_PROFILE_COUNTERS
19800 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19801 #endif
19802 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19803 }
19804 else
19805 {
19806 #ifndef NO_PROFILE_COUNTERS
19807 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19808 #endif
19809 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19810 }
19811 else if (flag_pic)
19812 {
19813 #ifndef NO_PROFILE_COUNTERS
19814 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19815 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19816 #endif
19817 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19818 }
19819 else
19820 {
19821 #ifndef NO_PROFILE_COUNTERS
19822 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19823 PROFILE_COUNT_REGISTER);
19824 #endif
19825 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19826 }
19827 }
19828
19829 /* We don't have exact information about the insn sizes, but we may assume
19830 quite safely that we are informed about all 1 byte insns and memory
19831 address sizes. This is enough to eliminate unnecessary padding in
19832 99% of cases. */
19833
19834 static int
19835 min_insn_size (rtx insn)
19836 {
19837 int l = 0;
19838
19839 if (!INSN_P (insn) || !active_insn_p (insn))
19840 return 0;
19841
19842 /* Discard alignments we've emit and jump instructions. */
19843 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19844 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19845 return 0;
19846 if (JUMP_P (insn)
19847 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19848 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19849 return 0;
19850
19851 /* Important case - calls are always 5 bytes.
19852 It is common to have many calls in the row. */
19853 if (CALL_P (insn)
19854 && symbolic_reference_mentioned_p (PATTERN (insn))
19855 && !SIBLING_CALL_P (insn))
19856 return 5;
19857 if (get_attr_length (insn) <= 1)
19858 return 1;
19859
19860 /* For normal instructions we may rely on the sizes of addresses
19861 and the presence of symbol to require 4 bytes of encoding.
19862 This is not the case for jumps where references are PC relative. */
19863 if (!JUMP_P (insn))
19864 {
19865 l = get_attr_length_address (insn);
19866 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19867 l = 4;
19868 }
19869 if (l)
19870 return 1+l;
19871 else
19872 return 2;
19873 }
19874
19875 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19876 window. */
19877
19878 static void
19879 ix86_avoid_jump_misspredicts (void)
19880 {
19881 rtx insn, start = get_insns ();
19882 int nbytes = 0, njumps = 0;
19883 int isjump = 0;
19884
19885 /* Look for all minimal intervals of instructions containing 4 jumps.
19886 The intervals are bounded by START and INSN. NBYTES is the total
19887 size of instructions in the interval including INSN and not including
19888 START. When the NBYTES is smaller than 16 bytes, it is possible
19889 that the end of START and INSN ends up in the same 16byte page.
19890
19891 The smallest offset in the page INSN can start is the case where START
19892 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19893 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19894 */
19895 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19896 {
19897
19898 nbytes += min_insn_size (insn);
19899 if (dump_file)
19900 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19901 INSN_UID (insn), min_insn_size (insn));
19902 if ((JUMP_P (insn)
19903 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19904 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19905 || CALL_P (insn))
19906 njumps++;
19907 else
19908 continue;
19909
19910 while (njumps > 3)
19911 {
19912 start = NEXT_INSN (start);
19913 if ((JUMP_P (start)
19914 && GET_CODE (PATTERN (start)) != ADDR_VEC
19915 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19916 || CALL_P (start))
19917 njumps--, isjump = 1;
19918 else
19919 isjump = 0;
19920 nbytes -= min_insn_size (start);
19921 }
19922 gcc_assert (njumps >= 0);
19923 if (dump_file)
19924 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19925 INSN_UID (start), INSN_UID (insn), nbytes);
19926
19927 if (njumps == 3 && isjump && nbytes < 16)
19928 {
19929 int padsize = 15 - nbytes + min_insn_size (insn);
19930
19931 if (dump_file)
19932 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19933 INSN_UID (insn), padsize);
19934 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19935 }
19936 }
19937 }
19938
19939 /* AMD Athlon works faster
19940 when RET is not destination of conditional jump or directly preceded
19941 by other jump instruction. We avoid the penalty by inserting NOP just
19942 before the RET instructions in such cases. */
19943 static void
19944 ix86_pad_returns (void)
19945 {
19946 edge e;
19947 edge_iterator ei;
19948
19949 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19950 {
19951 basic_block bb = e->src;
19952 rtx ret = BB_END (bb);
19953 rtx prev;
19954 bool replace = false;
19955
19956 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19957 || !maybe_hot_bb_p (bb))
19958 continue;
19959 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19960 if (active_insn_p (prev) || LABEL_P (prev))
19961 break;
19962 if (prev && LABEL_P (prev))
19963 {
19964 edge e;
19965 edge_iterator ei;
19966
19967 FOR_EACH_EDGE (e, ei, bb->preds)
19968 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19969 && !(e->flags & EDGE_FALLTHRU))
19970 replace = true;
19971 }
19972 if (!replace)
19973 {
19974 prev = prev_active_insn (ret);
19975 if (prev
19976 && ((JUMP_P (prev) && any_condjump_p (prev))
19977 || CALL_P (prev)))
19978 replace = true;
19979 /* Empty functions get branch mispredict even when the jump destination
19980 is not visible to us. */
19981 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19982 replace = true;
19983 }
19984 if (replace)
19985 {
19986 emit_insn_before (gen_return_internal_long (), ret);
19987 delete_insn (ret);
19988 }
19989 }
19990 }
19991
19992 /* Implement machine specific optimizations. We implement padding of returns
19993 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19994 static void
19995 ix86_reorg (void)
19996 {
19997 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19998 ix86_pad_returns ();
19999 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20000 ix86_avoid_jump_misspredicts ();
20001 }
20002
20003 /* Return nonzero when QImode register that must be represented via REX prefix
20004 is used. */
20005 bool
20006 x86_extended_QIreg_mentioned_p (rtx insn)
20007 {
20008 int i;
20009 extract_insn_cached (insn);
20010 for (i = 0; i < recog_data.n_operands; i++)
20011 if (REG_P (recog_data.operand[i])
20012 && REGNO (recog_data.operand[i]) >= 4)
20013 return true;
20014 return false;
20015 }
20016
20017 /* Return nonzero when P points to register encoded via REX prefix.
20018 Called via for_each_rtx. */
20019 static int
20020 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20021 {
20022 unsigned int regno;
20023 if (!REG_P (*p))
20024 return 0;
20025 regno = REGNO (*p);
20026 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20027 }
20028
20029 /* Return true when INSN mentions register that must be encoded using REX
20030 prefix. */
20031 bool
20032 x86_extended_reg_mentioned_p (rtx insn)
20033 {
20034 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20035 }
20036
20037 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20038 optabs would emit if we didn't have TFmode patterns. */
20039
20040 void
20041 x86_emit_floatuns (rtx operands[2])
20042 {
20043 rtx neglab, donelab, i0, i1, f0, in, out;
20044 enum machine_mode mode, inmode;
20045
20046 inmode = GET_MODE (operands[1]);
20047 gcc_assert (inmode == SImode || inmode == DImode);
20048
20049 out = operands[0];
20050 in = force_reg (inmode, operands[1]);
20051 mode = GET_MODE (out);
20052 neglab = gen_label_rtx ();
20053 donelab = gen_label_rtx ();
20054 f0 = gen_reg_rtx (mode);
20055
20056 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20057
20058 expand_float (out, in, 0);
20059
20060 emit_jump_insn (gen_jump (donelab));
20061 emit_barrier ();
20062
20063 emit_label (neglab);
20064
20065 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20066 1, OPTAB_DIRECT);
20067 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20068 1, OPTAB_DIRECT);
20069 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20070
20071 expand_float (f0, i0, 0);
20072
20073 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20074
20075 emit_label (donelab);
20076 }
20077 \f
20078 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20079 with all elements equal to VAR. Return true if successful. */
20080
20081 static bool
20082 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20083 rtx target, rtx val)
20084 {
20085 enum machine_mode smode, wsmode, wvmode;
20086 rtx x;
20087
20088 switch (mode)
20089 {
20090 case V2SImode:
20091 case V2SFmode:
20092 if (!mmx_ok)
20093 return false;
20094 /* FALLTHRU */
20095
20096 case V2DFmode:
20097 case V2DImode:
20098 case V4SFmode:
20099 case V4SImode:
20100 val = force_reg (GET_MODE_INNER (mode), val);
20101 x = gen_rtx_VEC_DUPLICATE (mode, val);
20102 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20103 return true;
20104
20105 case V4HImode:
20106 if (!mmx_ok)
20107 return false;
20108 if (TARGET_SSE || TARGET_3DNOW_A)
20109 {
20110 val = gen_lowpart (SImode, val);
20111 x = gen_rtx_TRUNCATE (HImode, val);
20112 x = gen_rtx_VEC_DUPLICATE (mode, x);
20113 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20114 return true;
20115 }
20116 else
20117 {
20118 smode = HImode;
20119 wsmode = SImode;
20120 wvmode = V2SImode;
20121 goto widen;
20122 }
20123
20124 case V8QImode:
20125 if (!mmx_ok)
20126 return false;
20127 smode = QImode;
20128 wsmode = HImode;
20129 wvmode = V4HImode;
20130 goto widen;
20131 case V8HImode:
20132 if (TARGET_SSE2)
20133 {
20134 rtx tmp1, tmp2;
20135 /* Extend HImode to SImode using a paradoxical SUBREG. */
20136 tmp1 = gen_reg_rtx (SImode);
20137 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20138 /* Insert the SImode value as low element of V4SImode vector. */
20139 tmp2 = gen_reg_rtx (V4SImode);
20140 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20141 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20142 CONST0_RTX (V4SImode),
20143 const1_rtx);
20144 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20145 /* Cast the V4SImode vector back to a V8HImode vector. */
20146 tmp1 = gen_reg_rtx (V8HImode);
20147 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20148 /* Duplicate the low short through the whole low SImode word. */
20149 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20150 /* Cast the V8HImode vector back to a V4SImode vector. */
20151 tmp2 = gen_reg_rtx (V4SImode);
20152 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20153 /* Replicate the low element of the V4SImode vector. */
20154 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20155 /* Cast the V2SImode back to V8HImode, and store in target. */
20156 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20157 return true;
20158 }
20159 smode = HImode;
20160 wsmode = SImode;
20161 wvmode = V4SImode;
20162 goto widen;
20163 case V16QImode:
20164 if (TARGET_SSE2)
20165 {
20166 rtx tmp1, tmp2;
20167 /* Extend QImode to SImode using a paradoxical SUBREG. */
20168 tmp1 = gen_reg_rtx (SImode);
20169 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20170 /* Insert the SImode value as low element of V4SImode vector. */
20171 tmp2 = gen_reg_rtx (V4SImode);
20172 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20173 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20174 CONST0_RTX (V4SImode),
20175 const1_rtx);
20176 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20177 /* Cast the V4SImode vector back to a V16QImode vector. */
20178 tmp1 = gen_reg_rtx (V16QImode);
20179 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20180 /* Duplicate the low byte through the whole low SImode word. */
20181 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20182 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20183 /* Cast the V16QImode vector back to a V4SImode vector. */
20184 tmp2 = gen_reg_rtx (V4SImode);
20185 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20186 /* Replicate the low element of the V4SImode vector. */
20187 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20188 /* Cast the V2SImode back to V16QImode, and store in target. */
20189 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20190 return true;
20191 }
20192 smode = QImode;
20193 wsmode = HImode;
20194 wvmode = V8HImode;
20195 goto widen;
20196 widen:
20197 /* Replicate the value once into the next wider mode and recurse. */
20198 val = convert_modes (wsmode, smode, val, true);
20199 x = expand_simple_binop (wsmode, ASHIFT, val,
20200 GEN_INT (GET_MODE_BITSIZE (smode)),
20201 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20202 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20203
20204 x = gen_reg_rtx (wvmode);
20205 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20206 gcc_unreachable ();
20207 emit_move_insn (target, gen_lowpart (mode, x));
20208 return true;
20209
20210 default:
20211 return false;
20212 }
20213 }
20214
20215 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20216 whose ONE_VAR element is VAR, and other elements are zero. Return true
20217 if successful. */
20218
20219 static bool
20220 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20221 rtx target, rtx var, int one_var)
20222 {
20223 enum machine_mode vsimode;
20224 rtx new_target;
20225 rtx x, tmp;
20226
20227 switch (mode)
20228 {
20229 case V2SFmode:
20230 case V2SImode:
20231 if (!mmx_ok)
20232 return false;
20233 /* FALLTHRU */
20234
20235 case V2DFmode:
20236 case V2DImode:
20237 if (one_var != 0)
20238 return false;
20239 var = force_reg (GET_MODE_INNER (mode), var);
20240 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20241 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20242 return true;
20243
20244 case V4SFmode:
20245 case V4SImode:
20246 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20247 new_target = gen_reg_rtx (mode);
20248 else
20249 new_target = target;
20250 var = force_reg (GET_MODE_INNER (mode), var);
20251 x = gen_rtx_VEC_DUPLICATE (mode, var);
20252 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20253 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20254 if (one_var != 0)
20255 {
20256 /* We need to shuffle the value to the correct position, so
20257 create a new pseudo to store the intermediate result. */
20258
20259 /* With SSE2, we can use the integer shuffle insns. */
20260 if (mode != V4SFmode && TARGET_SSE2)
20261 {
20262 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20263 GEN_INT (1),
20264 GEN_INT (one_var == 1 ? 0 : 1),
20265 GEN_INT (one_var == 2 ? 0 : 1),
20266 GEN_INT (one_var == 3 ? 0 : 1)));
20267 if (target != new_target)
20268 emit_move_insn (target, new_target);
20269 return true;
20270 }
20271
20272 /* Otherwise convert the intermediate result to V4SFmode and
20273 use the SSE1 shuffle instructions. */
20274 if (mode != V4SFmode)
20275 {
20276 tmp = gen_reg_rtx (V4SFmode);
20277 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20278 }
20279 else
20280 tmp = new_target;
20281
20282 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20283 GEN_INT (1),
20284 GEN_INT (one_var == 1 ? 0 : 1),
20285 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20286 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20287
20288 if (mode != V4SFmode)
20289 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20290 else if (tmp != target)
20291 emit_move_insn (target, tmp);
20292 }
20293 else if (target != new_target)
20294 emit_move_insn (target, new_target);
20295 return true;
20296
20297 case V8HImode:
20298 case V16QImode:
20299 vsimode = V4SImode;
20300 goto widen;
20301 case V4HImode:
20302 case V8QImode:
20303 if (!mmx_ok)
20304 return false;
20305 vsimode = V2SImode;
20306 goto widen;
20307 widen:
20308 if (one_var != 0)
20309 return false;
20310
20311 /* Zero extend the variable element to SImode and recurse. */
20312 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20313
20314 x = gen_reg_rtx (vsimode);
20315 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20316 var, one_var))
20317 gcc_unreachable ();
20318
20319 emit_move_insn (target, gen_lowpart (mode, x));
20320 return true;
20321
20322 default:
20323 return false;
20324 }
20325 }
20326
20327 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20328 consisting of the values in VALS. It is known that all elements
20329 except ONE_VAR are constants. Return true if successful. */
20330
20331 static bool
20332 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20333 rtx target, rtx vals, int one_var)
20334 {
20335 rtx var = XVECEXP (vals, 0, one_var);
20336 enum machine_mode wmode;
20337 rtx const_vec, x;
20338
20339 const_vec = copy_rtx (vals);
20340 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20341 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20342
20343 switch (mode)
20344 {
20345 case V2DFmode:
20346 case V2DImode:
20347 case V2SFmode:
20348 case V2SImode:
20349 /* For the two element vectors, it's just as easy to use
20350 the general case. */
20351 return false;
20352
20353 case V4SFmode:
20354 case V4SImode:
20355 case V8HImode:
20356 case V4HImode:
20357 break;
20358
20359 case V16QImode:
20360 wmode = V8HImode;
20361 goto widen;
20362 case V8QImode:
20363 wmode = V4HImode;
20364 goto widen;
20365 widen:
20366 /* There's no way to set one QImode entry easily. Combine
20367 the variable value with its adjacent constant value, and
20368 promote to an HImode set. */
20369 x = XVECEXP (vals, 0, one_var ^ 1);
20370 if (one_var & 1)
20371 {
20372 var = convert_modes (HImode, QImode, var, true);
20373 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20374 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20375 x = GEN_INT (INTVAL (x) & 0xff);
20376 }
20377 else
20378 {
20379 var = convert_modes (HImode, QImode, var, true);
20380 x = gen_int_mode (INTVAL (x) << 8, HImode);
20381 }
20382 if (x != const0_rtx)
20383 var = expand_simple_binop (HImode, IOR, var, x, var,
20384 1, OPTAB_LIB_WIDEN);
20385
20386 x = gen_reg_rtx (wmode);
20387 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20388 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20389
20390 emit_move_insn (target, gen_lowpart (mode, x));
20391 return true;
20392
20393 default:
20394 return false;
20395 }
20396
20397 emit_move_insn (target, const_vec);
20398 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20399 return true;
20400 }
20401
20402 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20403 all values variable, and none identical. */
20404
20405 static void
20406 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20407 rtx target, rtx vals)
20408 {
20409 enum machine_mode half_mode = GET_MODE_INNER (mode);
20410 rtx op0 = NULL, op1 = NULL;
20411 bool use_vec_concat = false;
20412
20413 switch (mode)
20414 {
20415 case V2SFmode:
20416 case V2SImode:
20417 if (!mmx_ok && !TARGET_SSE)
20418 break;
20419 /* FALLTHRU */
20420
20421 case V2DFmode:
20422 case V2DImode:
20423 /* For the two element vectors, we always implement VEC_CONCAT. */
20424 op0 = XVECEXP (vals, 0, 0);
20425 op1 = XVECEXP (vals, 0, 1);
20426 use_vec_concat = true;
20427 break;
20428
20429 case V4SFmode:
20430 half_mode = V2SFmode;
20431 goto half;
20432 case V4SImode:
20433 half_mode = V2SImode;
20434 goto half;
20435 half:
20436 {
20437 rtvec v;
20438
20439 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20440 Recurse to load the two halves. */
20441
20442 op0 = gen_reg_rtx (half_mode);
20443 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20444 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20445
20446 op1 = gen_reg_rtx (half_mode);
20447 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20448 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20449
20450 use_vec_concat = true;
20451 }
20452 break;
20453
20454 case V8HImode:
20455 case V16QImode:
20456 case V4HImode:
20457 case V8QImode:
20458 break;
20459
20460 default:
20461 gcc_unreachable ();
20462 }
20463
20464 if (use_vec_concat)
20465 {
20466 if (!register_operand (op0, half_mode))
20467 op0 = force_reg (half_mode, op0);
20468 if (!register_operand (op1, half_mode))
20469 op1 = force_reg (half_mode, op1);
20470
20471 emit_insn (gen_rtx_SET (VOIDmode, target,
20472 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20473 }
20474 else
20475 {
20476 int i, j, n_elts, n_words, n_elt_per_word;
20477 enum machine_mode inner_mode;
20478 rtx words[4], shift;
20479
20480 inner_mode = GET_MODE_INNER (mode);
20481 n_elts = GET_MODE_NUNITS (mode);
20482 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20483 n_elt_per_word = n_elts / n_words;
20484 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20485
20486 for (i = 0; i < n_words; ++i)
20487 {
20488 rtx word = NULL_RTX;
20489
20490 for (j = 0; j < n_elt_per_word; ++j)
20491 {
20492 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20493 elt = convert_modes (word_mode, inner_mode, elt, true);
20494
20495 if (j == 0)
20496 word = elt;
20497 else
20498 {
20499 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20500 word, 1, OPTAB_LIB_WIDEN);
20501 word = expand_simple_binop (word_mode, IOR, word, elt,
20502 word, 1, OPTAB_LIB_WIDEN);
20503 }
20504 }
20505
20506 words[i] = word;
20507 }
20508
20509 if (n_words == 1)
20510 emit_move_insn (target, gen_lowpart (mode, words[0]));
20511 else if (n_words == 2)
20512 {
20513 rtx tmp = gen_reg_rtx (mode);
20514 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20515 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20516 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20517 emit_move_insn (target, tmp);
20518 }
20519 else if (n_words == 4)
20520 {
20521 rtx tmp = gen_reg_rtx (V4SImode);
20522 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20523 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20524 emit_move_insn (target, gen_lowpart (mode, tmp));
20525 }
20526 else
20527 gcc_unreachable ();
20528 }
20529 }
20530
20531 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20532 instructions unless MMX_OK is true. */
20533
20534 void
20535 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20536 {
20537 enum machine_mode mode = GET_MODE (target);
20538 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20539 int n_elts = GET_MODE_NUNITS (mode);
20540 int n_var = 0, one_var = -1;
20541 bool all_same = true, all_const_zero = true;
20542 int i;
20543 rtx x;
20544
20545 for (i = 0; i < n_elts; ++i)
20546 {
20547 x = XVECEXP (vals, 0, i);
20548 if (!CONSTANT_P (x))
20549 n_var++, one_var = i;
20550 else if (x != CONST0_RTX (inner_mode))
20551 all_const_zero = false;
20552 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20553 all_same = false;
20554 }
20555
20556 /* Constants are best loaded from the constant pool. */
20557 if (n_var == 0)
20558 {
20559 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20560 return;
20561 }
20562
20563 /* If all values are identical, broadcast the value. */
20564 if (all_same
20565 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20566 XVECEXP (vals, 0, 0)))
20567 return;
20568
20569 /* Values where only one field is non-constant are best loaded from
20570 the pool and overwritten via move later. */
20571 if (n_var == 1)
20572 {
20573 if (all_const_zero
20574 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20575 XVECEXP (vals, 0, one_var),
20576 one_var))
20577 return;
20578
20579 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20580 return;
20581 }
20582
20583 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20584 }
20585
20586 void
20587 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20588 {
20589 enum machine_mode mode = GET_MODE (target);
20590 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20591 bool use_vec_merge = false;
20592 rtx tmp;
20593
20594 switch (mode)
20595 {
20596 case V2SFmode:
20597 case V2SImode:
20598 if (mmx_ok)
20599 {
20600 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20601 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20602 if (elt == 0)
20603 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20604 else
20605 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20606 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20607 return;
20608 }
20609 break;
20610
20611 case V2DFmode:
20612 case V2DImode:
20613 {
20614 rtx op0, op1;
20615
20616 /* For the two element vectors, we implement a VEC_CONCAT with
20617 the extraction of the other element. */
20618
20619 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20620 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20621
20622 if (elt == 0)
20623 op0 = val, op1 = tmp;
20624 else
20625 op0 = tmp, op1 = val;
20626
20627 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20628 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20629 }
20630 return;
20631
20632 case V4SFmode:
20633 switch (elt)
20634 {
20635 case 0:
20636 use_vec_merge = true;
20637 break;
20638
20639 case 1:
20640 /* tmp = target = A B C D */
20641 tmp = copy_to_reg (target);
20642 /* target = A A B B */
20643 emit_insn (gen_sse_unpcklps (target, target, target));
20644 /* target = X A B B */
20645 ix86_expand_vector_set (false, target, val, 0);
20646 /* target = A X C D */
20647 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20648 GEN_INT (1), GEN_INT (0),
20649 GEN_INT (2+4), GEN_INT (3+4)));
20650 return;
20651
20652 case 2:
20653 /* tmp = target = A B C D */
20654 tmp = copy_to_reg (target);
20655 /* tmp = X B C D */
20656 ix86_expand_vector_set (false, tmp, val, 0);
20657 /* target = A B X D */
20658 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20659 GEN_INT (0), GEN_INT (1),
20660 GEN_INT (0+4), GEN_INT (3+4)));
20661 return;
20662
20663 case 3:
20664 /* tmp = target = A B C D */
20665 tmp = copy_to_reg (target);
20666 /* tmp = X B C D */
20667 ix86_expand_vector_set (false, tmp, val, 0);
20668 /* target = A B X D */
20669 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20670 GEN_INT (0), GEN_INT (1),
20671 GEN_INT (2+4), GEN_INT (0+4)));
20672 return;
20673
20674 default:
20675 gcc_unreachable ();
20676 }
20677 break;
20678
20679 case V4SImode:
20680 /* Element 0 handled by vec_merge below. */
20681 if (elt == 0)
20682 {
20683 use_vec_merge = true;
20684 break;
20685 }
20686
20687 if (TARGET_SSE2)
20688 {
20689 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20690 store into element 0, then shuffle them back. */
20691
20692 rtx order[4];
20693
20694 order[0] = GEN_INT (elt);
20695 order[1] = const1_rtx;
20696 order[2] = const2_rtx;
20697 order[3] = GEN_INT (3);
20698 order[elt] = const0_rtx;
20699
20700 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20701 order[1], order[2], order[3]));
20702
20703 ix86_expand_vector_set (false, target, val, 0);
20704
20705 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20706 order[1], order[2], order[3]));
20707 }
20708 else
20709 {
20710 /* For SSE1, we have to reuse the V4SF code. */
20711 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20712 gen_lowpart (SFmode, val), elt);
20713 }
20714 return;
20715
20716 case V8HImode:
20717 use_vec_merge = TARGET_SSE2;
20718 break;
20719 case V4HImode:
20720 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20721 break;
20722
20723 case V16QImode:
20724 case V8QImode:
20725 default:
20726 break;
20727 }
20728
20729 if (use_vec_merge)
20730 {
20731 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20732 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20733 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20734 }
20735 else
20736 {
20737 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20738
20739 emit_move_insn (mem, target);
20740
20741 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20742 emit_move_insn (tmp, val);
20743
20744 emit_move_insn (target, mem);
20745 }
20746 }
20747
20748 void
20749 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20750 {
20751 enum machine_mode mode = GET_MODE (vec);
20752 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20753 bool use_vec_extr = false;
20754 rtx tmp;
20755
20756 switch (mode)
20757 {
20758 case V2SImode:
20759 case V2SFmode:
20760 if (!mmx_ok)
20761 break;
20762 /* FALLTHRU */
20763
20764 case V2DFmode:
20765 case V2DImode:
20766 use_vec_extr = true;
20767 break;
20768
20769 case V4SFmode:
20770 switch (elt)
20771 {
20772 case 0:
20773 tmp = vec;
20774 break;
20775
20776 case 1:
20777 case 3:
20778 tmp = gen_reg_rtx (mode);
20779 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20780 GEN_INT (elt), GEN_INT (elt),
20781 GEN_INT (elt+4), GEN_INT (elt+4)));
20782 break;
20783
20784 case 2:
20785 tmp = gen_reg_rtx (mode);
20786 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20787 break;
20788
20789 default:
20790 gcc_unreachable ();
20791 }
20792 vec = tmp;
20793 use_vec_extr = true;
20794 elt = 0;
20795 break;
20796
20797 case V4SImode:
20798 if (TARGET_SSE2)
20799 {
20800 switch (elt)
20801 {
20802 case 0:
20803 tmp = vec;
20804 break;
20805
20806 case 1:
20807 case 3:
20808 tmp = gen_reg_rtx (mode);
20809 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20810 GEN_INT (elt), GEN_INT (elt),
20811 GEN_INT (elt), GEN_INT (elt)));
20812 break;
20813
20814 case 2:
20815 tmp = gen_reg_rtx (mode);
20816 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20817 break;
20818
20819 default:
20820 gcc_unreachable ();
20821 }
20822 vec = tmp;
20823 use_vec_extr = true;
20824 elt = 0;
20825 }
20826 else
20827 {
20828 /* For SSE1, we have to reuse the V4SF code. */
20829 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20830 gen_lowpart (V4SFmode, vec), elt);
20831 return;
20832 }
20833 break;
20834
20835 case V8HImode:
20836 use_vec_extr = TARGET_SSE2;
20837 break;
20838 case V4HImode:
20839 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20840 break;
20841
20842 case V16QImode:
20843 case V8QImode:
20844 /* ??? Could extract the appropriate HImode element and shift. */
20845 default:
20846 break;
20847 }
20848
20849 if (use_vec_extr)
20850 {
20851 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20852 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20853
20854 /* Let the rtl optimizers know about the zero extension performed. */
20855 if (inner_mode == HImode)
20856 {
20857 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20858 target = gen_lowpart (SImode, target);
20859 }
20860
20861 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20862 }
20863 else
20864 {
20865 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20866
20867 emit_move_insn (mem, vec);
20868
20869 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20870 emit_move_insn (target, tmp);
20871 }
20872 }
20873
20874 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20875 pattern to reduce; DEST is the destination; IN is the input vector. */
20876
20877 void
20878 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20879 {
20880 rtx tmp1, tmp2, tmp3;
20881
20882 tmp1 = gen_reg_rtx (V4SFmode);
20883 tmp2 = gen_reg_rtx (V4SFmode);
20884 tmp3 = gen_reg_rtx (V4SFmode);
20885
20886 emit_insn (gen_sse_movhlps (tmp1, in, in));
20887 emit_insn (fn (tmp2, tmp1, in));
20888
20889 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20890 GEN_INT (1), GEN_INT (1),
20891 GEN_INT (1+4), GEN_INT (1+4)));
20892 emit_insn (fn (dest, tmp2, tmp3));
20893 }
20894 \f
20895 /* Target hook for scalar_mode_supported_p. */
20896 static bool
20897 ix86_scalar_mode_supported_p (enum machine_mode mode)
20898 {
20899 if (DECIMAL_FLOAT_MODE_P (mode))
20900 return true;
20901 else
20902 return default_scalar_mode_supported_p (mode);
20903 }
20904
20905 /* Implements target hook vector_mode_supported_p. */
20906 static bool
20907 ix86_vector_mode_supported_p (enum machine_mode mode)
20908 {
20909 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20910 return true;
20911 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20912 return true;
20913 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20914 return true;
20915 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20916 return true;
20917 return false;
20918 }
20919
20920 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20921
20922 We do this in the new i386 backend to maintain source compatibility
20923 with the old cc0-based compiler. */
20924
20925 static tree
20926 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20927 tree inputs ATTRIBUTE_UNUSED,
20928 tree clobbers)
20929 {
20930 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20931 clobbers);
20932 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20933 clobbers);
20934 return clobbers;
20935 }
20936
20937 /* Return true if this goes in small data/bss. */
20938
20939 static bool
20940 ix86_in_large_data_p (tree exp)
20941 {
20942 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20943 return false;
20944
20945 /* Functions are never large data. */
20946 if (TREE_CODE (exp) == FUNCTION_DECL)
20947 return false;
20948
20949 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20950 {
20951 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20952 if (strcmp (section, ".ldata") == 0
20953 || strcmp (section, ".lbss") == 0)
20954 return true;
20955 return false;
20956 }
20957 else
20958 {
20959 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20960
20961 /* If this is an incomplete type with size 0, then we can't put it
20962 in data because it might be too big when completed. */
20963 if (!size || size > ix86_section_threshold)
20964 return true;
20965 }
20966
20967 return false;
20968 }
20969 static void
20970 ix86_encode_section_info (tree decl, rtx rtl, int first)
20971 {
20972 default_encode_section_info (decl, rtl, first);
20973
20974 if (TREE_CODE (decl) == VAR_DECL
20975 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20976 && ix86_in_large_data_p (decl))
20977 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20978 }
20979
20980 /* Worker function for REVERSE_CONDITION. */
20981
20982 enum rtx_code
20983 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20984 {
20985 return (mode != CCFPmode && mode != CCFPUmode
20986 ? reverse_condition (code)
20987 : reverse_condition_maybe_unordered (code));
20988 }
20989
20990 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20991 to OPERANDS[0]. */
20992
20993 const char *
20994 output_387_reg_move (rtx insn, rtx *operands)
20995 {
20996 if (REG_P (operands[1])
20997 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20998 {
20999 if (REGNO (operands[0]) == FIRST_STACK_REG)
21000 return output_387_ffreep (operands, 0);
21001 return "fstp\t%y0";
21002 }
21003 if (STACK_TOP_P (operands[0]))
21004 return "fld%z1\t%y1";
21005 return "fst\t%y0";
21006 }
21007
21008 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21009 FP status register is set. */
21010
21011 void
21012 ix86_emit_fp_unordered_jump (rtx label)
21013 {
21014 rtx reg = gen_reg_rtx (HImode);
21015 rtx temp;
21016
21017 emit_insn (gen_x86_fnstsw_1 (reg));
21018
21019 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21020 {
21021 emit_insn (gen_x86_sahf_1 (reg));
21022
21023 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21024 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21025 }
21026 else
21027 {
21028 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21029
21030 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21031 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21032 }
21033
21034 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21035 gen_rtx_LABEL_REF (VOIDmode, label),
21036 pc_rtx);
21037 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21038
21039 emit_jump_insn (temp);
21040 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21041 }
21042
21043 /* Output code to perform a log1p XFmode calculation. */
21044
21045 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21046 {
21047 rtx label1 = gen_label_rtx ();
21048 rtx label2 = gen_label_rtx ();
21049
21050 rtx tmp = gen_reg_rtx (XFmode);
21051 rtx tmp2 = gen_reg_rtx (XFmode);
21052
21053 emit_insn (gen_absxf2 (tmp, op1));
21054 emit_insn (gen_cmpxf (tmp,
21055 CONST_DOUBLE_FROM_REAL_VALUE (
21056 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21057 XFmode)));
21058 emit_jump_insn (gen_bge (label1));
21059
21060 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21061 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21062 emit_jump (label2);
21063
21064 emit_label (label1);
21065 emit_move_insn (tmp, CONST1_RTX (XFmode));
21066 emit_insn (gen_addxf3 (tmp, op1, tmp));
21067 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21068 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21069
21070 emit_label (label2);
21071 }
21072
21073 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21074
21075 static void
21076 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21077 tree decl)
21078 {
21079 /* With Binutils 2.15, the "@unwind" marker must be specified on
21080 every occurrence of the ".eh_frame" section, not just the first
21081 one. */
21082 if (TARGET_64BIT
21083 && strcmp (name, ".eh_frame") == 0)
21084 {
21085 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21086 flags & SECTION_WRITE ? "aw" : "a");
21087 return;
21088 }
21089 default_elf_asm_named_section (name, flags, decl);
21090 }
21091
21092 /* Return the mangling of TYPE if it is an extended fundamental type. */
21093
21094 static const char *
21095 ix86_mangle_fundamental_type (tree type)
21096 {
21097 switch (TYPE_MODE (type))
21098 {
21099 case TFmode:
21100 /* __float128 is "g". */
21101 return "g";
21102 case XFmode:
21103 /* "long double" or __float80 is "e". */
21104 return "e";
21105 default:
21106 return NULL;
21107 }
21108 }
21109
21110 /* For 32-bit code we can save PIC register setup by using
21111 __stack_chk_fail_local hidden function instead of calling
21112 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21113 register, so it is better to call __stack_chk_fail directly. */
21114
21115 static tree
21116 ix86_stack_protect_fail (void)
21117 {
21118 return TARGET_64BIT
21119 ? default_external_stack_protect_fail ()
21120 : default_hidden_stack_protect_fail ();
21121 }
21122
21123 /* Select a format to encode pointers in exception handling data. CODE
21124 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21125 true if the symbol may be affected by dynamic relocations.
21126
21127 ??? All x86 object file formats are capable of representing this.
21128 After all, the relocation needed is the same as for the call insn.
21129 Whether or not a particular assembler allows us to enter such, I
21130 guess we'll have to see. */
21131 int
21132 asm_preferred_eh_data_format (int code, int global)
21133 {
21134 if (flag_pic)
21135 {
21136 int type = DW_EH_PE_sdata8;
21137 if (!TARGET_64BIT
21138 || ix86_cmodel == CM_SMALL_PIC
21139 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21140 type = DW_EH_PE_sdata4;
21141 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21142 }
21143 if (ix86_cmodel == CM_SMALL
21144 || (ix86_cmodel == CM_MEDIUM && code))
21145 return DW_EH_PE_udata4;
21146 return DW_EH_PE_absptr;
21147 }
21148 \f
21149 /* Expand copysign from SIGN to the positive value ABS_VALUE
21150 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21151 the sign-bit. */
21152 static void
21153 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21154 {
21155 enum machine_mode mode = GET_MODE (sign);
21156 rtx sgn = gen_reg_rtx (mode);
21157 if (mask == NULL_RTX)
21158 {
21159 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21160 if (!VECTOR_MODE_P (mode))
21161 {
21162 /* We need to generate a scalar mode mask in this case. */
21163 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21164 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21165 mask = gen_reg_rtx (mode);
21166 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21167 }
21168 }
21169 else
21170 mask = gen_rtx_NOT (mode, mask);
21171 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21172 gen_rtx_AND (mode, mask, sign)));
21173 emit_insn (gen_rtx_SET (VOIDmode, result,
21174 gen_rtx_IOR (mode, abs_value, sgn)));
21175 }
21176
21177 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21178 mask for masking out the sign-bit is stored in *SMASK, if that is
21179 non-null. */
21180 static rtx
21181 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21182 {
21183 enum machine_mode mode = GET_MODE (op0);
21184 rtx xa, mask;
21185
21186 xa = gen_reg_rtx (mode);
21187 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21188 if (!VECTOR_MODE_P (mode))
21189 {
21190 /* We need to generate a scalar mode mask in this case. */
21191 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21192 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21193 mask = gen_reg_rtx (mode);
21194 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21195 }
21196 emit_insn (gen_rtx_SET (VOIDmode, xa,
21197 gen_rtx_AND (mode, op0, mask)));
21198
21199 if (smask)
21200 *smask = mask;
21201
21202 return xa;
21203 }
21204
21205 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21206 swapping the operands if SWAP_OPERANDS is true. The expanded
21207 code is a forward jump to a newly created label in case the
21208 comparison is true. The generated label rtx is returned. */
21209 static rtx
21210 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21211 bool swap_operands)
21212 {
21213 rtx label, tmp;
21214
21215 if (swap_operands)
21216 {
21217 tmp = op0;
21218 op0 = op1;
21219 op1 = tmp;
21220 }
21221
21222 label = gen_label_rtx ();
21223 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21224 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21225 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21226 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21227 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21228 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21229 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21230 JUMP_LABEL (tmp) = label;
21231
21232 return label;
21233 }
21234
21235 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21236 using comparison code CODE. Operands are swapped for the comparison if
21237 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21238 static rtx
21239 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21240 bool swap_operands)
21241 {
21242 enum machine_mode mode = GET_MODE (op0);
21243 rtx mask = gen_reg_rtx (mode);
21244
21245 if (swap_operands)
21246 {
21247 rtx tmp = op0;
21248 op0 = op1;
21249 op1 = tmp;
21250 }
21251
21252 if (mode == DFmode)
21253 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21254 gen_rtx_fmt_ee (code, mode, op0, op1)));
21255 else
21256 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21257 gen_rtx_fmt_ee (code, mode, op0, op1)));
21258
21259 return mask;
21260 }
21261
21262 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21263 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21264 static rtx
21265 ix86_gen_TWO52 (enum machine_mode mode)
21266 {
21267 REAL_VALUE_TYPE TWO52r;
21268 rtx TWO52;
21269
21270 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21271 TWO52 = const_double_from_real_value (TWO52r, mode);
21272 TWO52 = force_reg (mode, TWO52);
21273
21274 return TWO52;
21275 }
21276
21277 /* Expand SSE sequence for computing lround from OP1 storing
21278 into OP0. */
21279 void
21280 ix86_expand_lround (rtx op0, rtx op1)
21281 {
21282 /* C code for the stuff we're doing below:
21283 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21284 return (long)tmp;
21285 */
21286 enum machine_mode mode = GET_MODE (op1);
21287 const struct real_format *fmt;
21288 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21289 rtx adj;
21290
21291 /* load nextafter (0.5, 0.0) */
21292 fmt = REAL_MODE_FORMAT (mode);
21293 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21294 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21295
21296 /* adj = copysign (0.5, op1) */
21297 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21298 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21299
21300 /* adj = op1 + adj */
21301 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21302
21303 /* op0 = (imode)adj */
21304 expand_fix (op0, adj, 0);
21305 }
21306
21307 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21308 into OPERAND0. */
21309 void
21310 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21311 {
21312 /* C code for the stuff we're doing below (for do_floor):
21313 xi = (long)op1;
21314 xi -= (double)xi > op1 ? 1 : 0;
21315 return xi;
21316 */
21317 enum machine_mode fmode = GET_MODE (op1);
21318 enum machine_mode imode = GET_MODE (op0);
21319 rtx ireg, freg, label, tmp;
21320
21321 /* reg = (long)op1 */
21322 ireg = gen_reg_rtx (imode);
21323 expand_fix (ireg, op1, 0);
21324
21325 /* freg = (double)reg */
21326 freg = gen_reg_rtx (fmode);
21327 expand_float (freg, ireg, 0);
21328
21329 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21330 label = ix86_expand_sse_compare_and_jump (UNLE,
21331 freg, op1, !do_floor);
21332 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21333 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21334 emit_move_insn (ireg, tmp);
21335
21336 emit_label (label);
21337 LABEL_NUSES (label) = 1;
21338
21339 emit_move_insn (op0, ireg);
21340 }
21341
21342 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21343 result in OPERAND0. */
21344 void
21345 ix86_expand_rint (rtx operand0, rtx operand1)
21346 {
21347 /* C code for the stuff we're doing below:
21348 xa = fabs (operand1);
21349 if (!isless (xa, 2**52))
21350 return operand1;
21351 xa = xa + 2**52 - 2**52;
21352 return copysign (xa, operand1);
21353 */
21354 enum machine_mode mode = GET_MODE (operand0);
21355 rtx res, xa, label, TWO52, mask;
21356
21357 res = gen_reg_rtx (mode);
21358 emit_move_insn (res, operand1);
21359
21360 /* xa = abs (operand1) */
21361 xa = ix86_expand_sse_fabs (res, &mask);
21362
21363 /* if (!isless (xa, TWO52)) goto label; */
21364 TWO52 = ix86_gen_TWO52 (mode);
21365 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21366
21367 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21368 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21369
21370 ix86_sse_copysign_to_positive (res, xa, res, mask);
21371
21372 emit_label (label);
21373 LABEL_NUSES (label) = 1;
21374
21375 emit_move_insn (operand0, res);
21376 }
21377
21378 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21379 into OPERAND0. */
21380 void
21381 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21382 {
21383 /* C code for the stuff we expand below.
21384 double xa = fabs (x), x2;
21385 if (!isless (xa, TWO52))
21386 return x;
21387 xa = xa + TWO52 - TWO52;
21388 x2 = copysign (xa, x);
21389 Compensate. Floor:
21390 if (x2 > x)
21391 x2 -= 1;
21392 Compensate. Ceil:
21393 if (x2 < x)
21394 x2 -= -1;
21395 return x2;
21396 */
21397 enum machine_mode mode = GET_MODE (operand0);
21398 rtx xa, TWO52, tmp, label, one, res, mask;
21399
21400 TWO52 = ix86_gen_TWO52 (mode);
21401
21402 /* Temporary for holding the result, initialized to the input
21403 operand to ease control flow. */
21404 res = gen_reg_rtx (mode);
21405 emit_move_insn (res, operand1);
21406
21407 /* xa = abs (operand1) */
21408 xa = ix86_expand_sse_fabs (res, &mask);
21409
21410 /* if (!isless (xa, TWO52)) goto label; */
21411 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21412
21413 /* xa = xa + TWO52 - TWO52; */
21414 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21415 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21416
21417 /* xa = copysign (xa, operand1) */
21418 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21419
21420 /* generate 1.0 or -1.0 */
21421 one = force_reg (mode,
21422 const_double_from_real_value (do_floor
21423 ? dconst1 : dconstm1, mode));
21424
21425 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21426 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21427 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21428 gen_rtx_AND (mode, one, tmp)));
21429 /* We always need to subtract here to preserve signed zero. */
21430 tmp = expand_simple_binop (mode, MINUS,
21431 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21432 emit_move_insn (res, tmp);
21433
21434 emit_label (label);
21435 LABEL_NUSES (label) = 1;
21436
21437 emit_move_insn (operand0, res);
21438 }
21439
21440 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21441 into OPERAND0. */
21442 void
21443 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21444 {
21445 /* C code for the stuff we expand below.
21446 double xa = fabs (x), x2;
21447 if (!isless (xa, TWO52))
21448 return x;
21449 x2 = (double)(long)x;
21450 Compensate. Floor:
21451 if (x2 > x)
21452 x2 -= 1;
21453 Compensate. Ceil:
21454 if (x2 < x)
21455 x2 += 1;
21456 if (HONOR_SIGNED_ZEROS (mode))
21457 return copysign (x2, x);
21458 return x2;
21459 */
21460 enum machine_mode mode = GET_MODE (operand0);
21461 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21462
21463 TWO52 = ix86_gen_TWO52 (mode);
21464
21465 /* Temporary for holding the result, initialized to the input
21466 operand to ease control flow. */
21467 res = gen_reg_rtx (mode);
21468 emit_move_insn (res, operand1);
21469
21470 /* xa = abs (operand1) */
21471 xa = ix86_expand_sse_fabs (res, &mask);
21472
21473 /* if (!isless (xa, TWO52)) goto label; */
21474 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21475
21476 /* xa = (double)(long)x */
21477 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21478 expand_fix (xi, res, 0);
21479 expand_float (xa, xi, 0);
21480
21481 /* generate 1.0 */
21482 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21483
21484 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21485 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21486 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21487 gen_rtx_AND (mode, one, tmp)));
21488 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21489 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21490 emit_move_insn (res, tmp);
21491
21492 if (HONOR_SIGNED_ZEROS (mode))
21493 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21494
21495 emit_label (label);
21496 LABEL_NUSES (label) = 1;
21497
21498 emit_move_insn (operand0, res);
21499 }
21500
21501 /* Expand SSE sequence for computing round from OPERAND1 storing
21502 into OPERAND0. Sequence that works without relying on DImode truncation
21503 via cvttsd2siq that is only available on 64bit targets. */
21504 void
21505 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21506 {
21507 /* C code for the stuff we expand below.
21508 double xa = fabs (x), xa2, x2;
21509 if (!isless (xa, TWO52))
21510 return x;
21511 Using the absolute value and copying back sign makes
21512 -0.0 -> -0.0 correct.
21513 xa2 = xa + TWO52 - TWO52;
21514 Compensate.
21515 dxa = xa2 - xa;
21516 if (dxa <= -0.5)
21517 xa2 += 1;
21518 else if (dxa > 0.5)
21519 xa2 -= 1;
21520 x2 = copysign (xa2, x);
21521 return x2;
21522 */
21523 enum machine_mode mode = GET_MODE (operand0);
21524 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21525
21526 TWO52 = ix86_gen_TWO52 (mode);
21527
21528 /* Temporary for holding the result, initialized to the input
21529 operand to ease control flow. */
21530 res = gen_reg_rtx (mode);
21531 emit_move_insn (res, operand1);
21532
21533 /* xa = abs (operand1) */
21534 xa = ix86_expand_sse_fabs (res, &mask);
21535
21536 /* if (!isless (xa, TWO52)) goto label; */
21537 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21538
21539 /* xa2 = xa + TWO52 - TWO52; */
21540 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21541 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21542
21543 /* dxa = xa2 - xa; */
21544 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21545
21546 /* generate 0.5, 1.0 and -0.5 */
21547 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21548 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21549 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21550 0, OPTAB_DIRECT);
21551
21552 /* Compensate. */
21553 tmp = gen_reg_rtx (mode);
21554 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21555 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21556 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21557 gen_rtx_AND (mode, one, tmp)));
21558 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21559 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21560 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21561 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21562 gen_rtx_AND (mode, one, tmp)));
21563 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21564
21565 /* res = copysign (xa2, operand1) */
21566 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21567
21568 emit_label (label);
21569 LABEL_NUSES (label) = 1;
21570
21571 emit_move_insn (operand0, res);
21572 }
21573
21574 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21575 into OPERAND0. */
21576 void
21577 ix86_expand_trunc (rtx operand0, rtx operand1)
21578 {
21579 /* C code for SSE variant we expand below.
21580 double xa = fabs (x), x2;
21581 if (!isless (xa, TWO52))
21582 return x;
21583 x2 = (double)(long)x;
21584 if (HONOR_SIGNED_ZEROS (mode))
21585 return copysign (x2, x);
21586 return x2;
21587 */
21588 enum machine_mode mode = GET_MODE (operand0);
21589 rtx xa, xi, TWO52, label, res, mask;
21590
21591 TWO52 = ix86_gen_TWO52 (mode);
21592
21593 /* Temporary for holding the result, initialized to the input
21594 operand to ease control flow. */
21595 res = gen_reg_rtx (mode);
21596 emit_move_insn (res, operand1);
21597
21598 /* xa = abs (operand1) */
21599 xa = ix86_expand_sse_fabs (res, &mask);
21600
21601 /* if (!isless (xa, TWO52)) goto label; */
21602 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21603
21604 /* x = (double)(long)x */
21605 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21606 expand_fix (xi, res, 0);
21607 expand_float (res, xi, 0);
21608
21609 if (HONOR_SIGNED_ZEROS (mode))
21610 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21611
21612 emit_label (label);
21613 LABEL_NUSES (label) = 1;
21614
21615 emit_move_insn (operand0, res);
21616 }
21617
21618 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21619 into OPERAND0. */
21620 void
21621 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21622 {
21623 enum machine_mode mode = GET_MODE (operand0);
21624 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21625
21626 /* C code for SSE variant we expand below.
21627 double xa = fabs (x), x2;
21628 if (!isless (xa, TWO52))
21629 return x;
21630 xa2 = xa + TWO52 - TWO52;
21631 Compensate:
21632 if (xa2 > xa)
21633 xa2 -= 1.0;
21634 x2 = copysign (xa2, x);
21635 return x2;
21636 */
21637
21638 TWO52 = ix86_gen_TWO52 (mode);
21639
21640 /* Temporary for holding the result, initialized to the input
21641 operand to ease control flow. */
21642 res = gen_reg_rtx (mode);
21643 emit_move_insn (res, operand1);
21644
21645 /* xa = abs (operand1) */
21646 xa = ix86_expand_sse_fabs (res, &smask);
21647
21648 /* if (!isless (xa, TWO52)) goto label; */
21649 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21650
21651 /* res = xa + TWO52 - TWO52; */
21652 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21653 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21654 emit_move_insn (res, tmp);
21655
21656 /* generate 1.0 */
21657 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21658
21659 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21660 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21661 emit_insn (gen_rtx_SET (VOIDmode, mask,
21662 gen_rtx_AND (mode, mask, one)));
21663 tmp = expand_simple_binop (mode, MINUS,
21664 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21665 emit_move_insn (res, tmp);
21666
21667 /* res = copysign (res, operand1) */
21668 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21669
21670 emit_label (label);
21671 LABEL_NUSES (label) = 1;
21672
21673 emit_move_insn (operand0, res);
21674 }
21675
21676 /* Expand SSE sequence for computing round from OPERAND1 storing
21677 into OPERAND0. */
21678 void
21679 ix86_expand_round (rtx operand0, rtx operand1)
21680 {
21681 /* C code for the stuff we're doing below:
21682 double xa = fabs (x);
21683 if (!isless (xa, TWO52))
21684 return x;
21685 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21686 return copysign (xa, x);
21687 */
21688 enum machine_mode mode = GET_MODE (operand0);
21689 rtx res, TWO52, xa, label, xi, half, mask;
21690 const struct real_format *fmt;
21691 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21692
21693 /* Temporary for holding the result, initialized to the input
21694 operand to ease control flow. */
21695 res = gen_reg_rtx (mode);
21696 emit_move_insn (res, operand1);
21697
21698 TWO52 = ix86_gen_TWO52 (mode);
21699 xa = ix86_expand_sse_fabs (res, &mask);
21700 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21701
21702 /* load nextafter (0.5, 0.0) */
21703 fmt = REAL_MODE_FORMAT (mode);
21704 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21705 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21706
21707 /* xa = xa + 0.5 */
21708 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21709 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21710
21711 /* xa = (double)(int64_t)xa */
21712 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21713 expand_fix (xi, xa, 0);
21714 expand_float (xa, xi, 0);
21715
21716 /* res = copysign (xa, operand1) */
21717 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21718
21719 emit_label (label);
21720 LABEL_NUSES (label) = 1;
21721
21722 emit_move_insn (operand0, res);
21723 }
21724
21725 #include "gt-i386.h"