tree.h (enum tree_code_class): Add tcc_vl_exp.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1001
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1004
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
1009 | m_GENERIC64;
1010 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1011 | m_NOCONA | m_CORE2 | m_GENERIC;
1012 const int x86_zero_extend_with_and = m_486 | m_PENT;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1015 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
1016 const int x86_double_with_add = ~m_386;
1017 const int x86_use_bit_test = m_386;
1018 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
1019 | m_K6 | m_CORE2 | m_GENERIC;
1020 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1021 | m_NOCONA;
1022 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
1023 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
1024 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints = 0;
1030 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall = m_PPRO;
1041 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
1042 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
1043 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
1044 | m_CORE2 | m_GENERIC);
1045 const int x86_use_mov0 = m_K6;
1046 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
1047 const int x86_read_modify_write = ~m_PENT;
1048 const int x86_read_modify = ~(m_PENT | m_PPRO);
1049 const int x86_split_long_moves = m_PPRO;
1050 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
1051 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1052 /* m_PENT4 ? */
1053 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
1054 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
1055 const int x86_qimode_math = ~(0);
1056 const int x86_promote_qi_regs = 0;
1057 /* On PPro this flag is meant to avoid partial register stalls. Just like
1058 the x86_partial_reg_stall this option might be considered for Generic32
1059 if our scheme for avoiding partial stalls was more effective. */
1060 const int x86_himode_math = ~(m_PPRO);
1061 const int x86_promote_hi_regs = m_PPRO;
1062 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1063 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1064 | m_CORE2 | m_GENERIC;
1065 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1066 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1067 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
1068 | m_CORE2 | m_GENERIC;
1069 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1070 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1071 /* Enable if integer moves are preferred for DFmode copies */
1072 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1073 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
1074 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1075 | m_CORE2 | m_GENERIC;
1076 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1077 | m_CORE2 | m_GENERIC;
1078 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1079 for outgoing arguments will be computed and placed into the variable
1080 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1081 for each call; instead, the function prologue should increase the stack frame
1082 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1083 not proper. */
1084 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
1085 | m_NOCONA | m_PPRO | m_CORE2
1086 | m_GENERIC;
1087 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1088 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1089 const int x86_shift1 = ~m_486;
1090 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
1091 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1092 | m_NOCONA | m_CORE2 | m_GENERIC;
1093 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1094 that thread 128bit SSE registers as single units versus K8 based chips that
1095 divide SSE registers to two 64bit halves.
1096 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1097 to allow register renaming on 128bit SSE units, but usually results in one
1098 extra microop on 64bit SSE units. Experimental results shows that disabling
1099 this option on P4 brings over 20% SPECfp regression, while enabling it on
1100 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1101 of moves. */
1102 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1103 | m_GENERIC | m_AMDFAM10;
1104 /* Set for machines where the type and dependencies are resolved on SSE
1105 register parts instead of whole registers, so we may maintain just
1106 lower part of scalar values in proper format leaving the upper part
1107 undefined. */
1108 const int x86_sse_split_regs = m_ATHLON_K8;
1109 /* Code generation for scalar reg-reg moves of single and double precision data:
1110 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1111 movaps reg, reg
1112 else
1113 movss reg, reg
1114 if (x86_sse_partial_reg_dependency == true)
1115 movapd reg, reg
1116 else
1117 movsd reg, reg
1118
1119 Code generation for scalar loads of double precision data:
1120 if (x86_sse_split_regs == true)
1121 movlpd mem, reg (gas syntax)
1122 else
1123 movsd mem, reg
1124
1125 Code generation for unaligned packed loads of single precision data
1126 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1127 if (x86_sse_unaligned_move_optimal)
1128 movups mem, reg
1129
1130 if (x86_sse_partial_reg_dependency == true)
1131 {
1132 xorps reg, reg
1133 movlps mem, reg
1134 movhps mem+8, reg
1135 }
1136 else
1137 {
1138 movlps mem, reg
1139 movhps mem+8, reg
1140 }
1141
1142 Code generation for unaligned packed loads of double precision data
1143 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1144 if (x86_sse_unaligned_move_optimal)
1145 movupd mem, reg
1146
1147 if (x86_sse_split_regs == true)
1148 {
1149 movlpd mem, reg
1150 movhpd mem+8, reg
1151 }
1152 else
1153 {
1154 movsd mem, reg
1155 movhpd mem+8, reg
1156 }
1157 */
1158 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1159 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1160 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1161 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1162 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1163
1164 const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
1165
1166 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1167 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1168 /* Some CPU cores are not able to predict more than 4 branch instructions in
1169 the 16 byte window. */
1170 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1171 | m_NOCONA | m_CORE2 | m_GENERIC;
1172 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1173 | m_CORE2 | m_GENERIC;
1174 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1175 /* Compare and exchange was added for 80486. */
1176 const int x86_cmpxchg = ~m_386;
1177 /* Compare and exchange 8 bytes was added for pentium. */
1178 const int x86_cmpxchg8b = ~(m_386 | m_486);
1179 /* Exchange and add was added for 80486. */
1180 const int x86_xadd = ~m_386;
1181 /* Byteswap was added for 80486. */
1182 const int x86_bswap = ~m_386;
1183 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1184
1185 static enum stringop_alg stringop_alg = no_stringop;
1186
1187 /* In case the average insn count for single function invocation is
1188 lower than this constant, emit fast (but longer) prologue and
1189 epilogue code. */
1190 #define FAST_PROLOGUE_INSN_COUNT 20
1191
1192 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1193 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1194 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1195 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1196
1197 /* Array of the smallest class containing reg number REGNO, indexed by
1198 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1199
1200 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1201 {
1202 /* ax, dx, cx, bx */
1203 AREG, DREG, CREG, BREG,
1204 /* si, di, bp, sp */
1205 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1206 /* FP registers */
1207 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1208 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1209 /* arg pointer */
1210 NON_Q_REGS,
1211 /* flags, fpsr, fpcr, frame */
1212 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1213 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1214 SSE_REGS, SSE_REGS,
1215 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1216 MMX_REGS, MMX_REGS,
1217 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1218 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1219 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1220 SSE_REGS, SSE_REGS,
1221 };
1222
1223 /* The "default" register map used in 32bit mode. */
1224
1225 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1226 {
1227 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1228 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1229 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1230 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1231 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1232 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1233 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1234 };
1235
1236 static int const x86_64_int_parameter_registers[6] =
1237 {
1238 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1239 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1240 };
1241
1242 static int const x86_64_int_return_registers[4] =
1243 {
1244 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1245 };
1246
1247 /* The "default" register map used in 64bit mode. */
1248 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1249 {
1250 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1251 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1252 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1253 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1254 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1255 8,9,10,11,12,13,14,15, /* extended integer registers */
1256 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1257 };
1258
1259 /* Define the register numbers to be used in Dwarf debugging information.
1260 The SVR4 reference port C compiler uses the following register numbers
1261 in its Dwarf output code:
1262 0 for %eax (gcc regno = 0)
1263 1 for %ecx (gcc regno = 2)
1264 2 for %edx (gcc regno = 1)
1265 3 for %ebx (gcc regno = 3)
1266 4 for %esp (gcc regno = 7)
1267 5 for %ebp (gcc regno = 6)
1268 6 for %esi (gcc regno = 4)
1269 7 for %edi (gcc regno = 5)
1270 The following three DWARF register numbers are never generated by
1271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1272 believes these numbers have these meanings.
1273 8 for %eip (no gcc equivalent)
1274 9 for %eflags (gcc regno = 17)
1275 10 for %trapno (no gcc equivalent)
1276 It is not at all clear how we should number the FP stack registers
1277 for the x86 architecture. If the version of SDB on x86/svr4 were
1278 a bit less brain dead with respect to floating-point then we would
1279 have a precedent to follow with respect to DWARF register numbers
1280 for x86 FP registers, but the SDB on x86/svr4 is so completely
1281 broken with respect to FP registers that it is hardly worth thinking
1282 of it as something to strive for compatibility with.
1283 The version of x86/svr4 SDB I have at the moment does (partially)
1284 seem to believe that DWARF register number 11 is associated with
1285 the x86 register %st(0), but that's about all. Higher DWARF
1286 register numbers don't seem to be associated with anything in
1287 particular, and even for DWARF regno 11, SDB only seems to under-
1288 stand that it should say that a variable lives in %st(0) (when
1289 asked via an `=' command) if we said it was in DWARF regno 11,
1290 but SDB still prints garbage when asked for the value of the
1291 variable in question (via a `/' command).
1292 (Also note that the labels SDB prints for various FP stack regs
1293 when doing an `x' command are all wrong.)
1294 Note that these problems generally don't affect the native SVR4
1295 C compiler because it doesn't allow the use of -O with -g and
1296 because when it is *not* optimizing, it allocates a memory
1297 location for each floating-point variable, and the memory
1298 location is what gets described in the DWARF AT_location
1299 attribute for the variable in question.
1300 Regardless of the severe mental illness of the x86/svr4 SDB, we
1301 do something sensible here and we use the following DWARF
1302 register numbers. Note that these are all stack-top-relative
1303 numbers.
1304 11 for %st(0) (gcc regno = 8)
1305 12 for %st(1) (gcc regno = 9)
1306 13 for %st(2) (gcc regno = 10)
1307 14 for %st(3) (gcc regno = 11)
1308 15 for %st(4) (gcc regno = 12)
1309 16 for %st(5) (gcc regno = 13)
1310 17 for %st(6) (gcc regno = 14)
1311 18 for %st(7) (gcc regno = 15)
1312 */
1313 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1314 {
1315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1317 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1320 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1321 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1322 };
1323
1324 /* Test and compare insns in i386.md store the information needed to
1325 generate branch and scc insns here. */
1326
1327 rtx ix86_compare_op0 = NULL_RTX;
1328 rtx ix86_compare_op1 = NULL_RTX;
1329 rtx ix86_compare_emitted = NULL_RTX;
1330
1331 /* Size of the register save area. */
1332 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1333
1334 /* Define the structure for the machine field in struct function. */
1335
1336 struct stack_local_entry GTY(())
1337 {
1338 unsigned short mode;
1339 unsigned short n;
1340 rtx rtl;
1341 struct stack_local_entry *next;
1342 };
1343
1344 /* Structure describing stack frame layout.
1345 Stack grows downward:
1346
1347 [arguments]
1348 <- ARG_POINTER
1349 saved pc
1350
1351 saved frame pointer if frame_pointer_needed
1352 <- HARD_FRAME_POINTER
1353 [saved regs]
1354
1355 [padding1] \
1356 )
1357 [va_arg registers] (
1358 > to_allocate <- FRAME_POINTER
1359 [frame] (
1360 )
1361 [padding2] /
1362 */
1363 struct ix86_frame
1364 {
1365 int nregs;
1366 int padding1;
1367 int va_arg_size;
1368 HOST_WIDE_INT frame;
1369 int padding2;
1370 int outgoing_arguments_size;
1371 int red_zone_size;
1372
1373 HOST_WIDE_INT to_allocate;
1374 /* The offsets relative to ARG_POINTER. */
1375 HOST_WIDE_INT frame_pointer_offset;
1376 HOST_WIDE_INT hard_frame_pointer_offset;
1377 HOST_WIDE_INT stack_pointer_offset;
1378
1379 /* When save_regs_using_mov is set, emit prologue using
1380 move instead of push instructions. */
1381 bool save_regs_using_mov;
1382 };
1383
1384 /* Code model option. */
1385 enum cmodel ix86_cmodel;
1386 /* Asm dialect. */
1387 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1388 /* TLS dialects. */
1389 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1390
1391 /* Which unit we are generating floating point math for. */
1392 enum fpmath_unit ix86_fpmath;
1393
1394 /* Which cpu are we scheduling for. */
1395 enum processor_type ix86_tune;
1396 /* Which instruction set architecture to use. */
1397 enum processor_type ix86_arch;
1398
1399 /* true if sse prefetch instruction is not NOOP. */
1400 int x86_prefetch_sse;
1401
1402 /* true if cmpxchg16b is supported. */
1403 int x86_cmpxchg16b;
1404
1405 /* ix86_regparm_string as a number */
1406 static int ix86_regparm;
1407
1408 /* -mstackrealign option */
1409 extern int ix86_force_align_arg_pointer;
1410 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1411
1412 /* Preferred alignment for stack boundary in bits. */
1413 unsigned int ix86_preferred_stack_boundary;
1414
1415 /* Values 1-5: see jump.c */
1416 int ix86_branch_cost;
1417
1418 /* Variables which are this size or smaller are put in the data/bss
1419 or ldata/lbss sections. */
1420
1421 int ix86_section_threshold = 65536;
1422
1423 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1424 char internal_label_prefix[16];
1425 int internal_label_prefix_len;
1426 \f
1427 static bool ix86_handle_option (size_t, const char *, int);
1428 static void output_pic_addr_const (FILE *, rtx, int);
1429 static void put_condition_code (enum rtx_code, enum machine_mode,
1430 int, int, FILE *);
1431 static const char *get_some_local_dynamic_name (void);
1432 static int get_some_local_dynamic_name_1 (rtx *, void *);
1433 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1434 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1435 rtx *);
1436 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1437 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1438 enum machine_mode);
1439 static rtx get_thread_pointer (int);
1440 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1441 static void get_pc_thunk_name (char [32], unsigned int);
1442 static rtx gen_push (rtx);
1443 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1444 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1445 static struct machine_function * ix86_init_machine_status (void);
1446 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1447 static int ix86_nsaved_regs (void);
1448 static void ix86_emit_save_regs (void);
1449 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1450 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1451 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1452 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1453 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1454 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1455 static int ix86_issue_rate (void);
1456 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1457 static int ia32_multipass_dfa_lookahead (void);
1458 static void ix86_init_mmx_sse_builtins (void);
1459 static rtx x86_this_parameter (tree);
1460 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1461 HOST_WIDE_INT, tree);
1462 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1463 static void x86_file_start (void);
1464 static void ix86_reorg (void);
1465 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1466 static tree ix86_build_builtin_va_list (void);
1467 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1468 tree, int *, int);
1469 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1470 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1471 static bool ix86_vector_mode_supported_p (enum machine_mode);
1472
1473 static int ix86_address_cost (rtx);
1474 static bool ix86_cannot_force_const_mem (rtx);
1475 static rtx ix86_delegitimize_address (rtx);
1476
1477 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1478
1479 struct builtin_description;
1480 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1481 tree, rtx);
1482 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1483 tree, rtx);
1484 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1485 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1486 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1487 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1488 static rtx safe_vector_operand (rtx, enum machine_mode);
1489 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1490 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1491 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1492 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1493 static int ix86_fp_comparison_cost (enum rtx_code code);
1494 static unsigned int ix86_select_alt_pic_regnum (void);
1495 static int ix86_save_reg (unsigned int, int);
1496 static void ix86_compute_frame_layout (struct ix86_frame *);
1497 static int ix86_comp_type_attributes (tree, tree);
1498 static int ix86_function_regparm (tree, tree);
1499 const struct attribute_spec ix86_attribute_table[];
1500 static bool ix86_function_ok_for_sibcall (tree, tree);
1501 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1502 static int ix86_value_regno (enum machine_mode, tree, tree);
1503 static bool contains_128bit_aligned_vector_p (tree);
1504 static rtx ix86_struct_value_rtx (tree, int);
1505 static bool ix86_ms_bitfield_layout_p (tree);
1506 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1507 static int extended_reg_mentioned_1 (rtx *, void *);
1508 static bool ix86_rtx_costs (rtx, int, int, int *);
1509 static int min_insn_size (rtx);
1510 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1511 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1512 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1513 tree, bool);
1514 static void ix86_init_builtins (void);
1515 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1516 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1517 static tree ix86_builtin_conversion (enum tree_code, tree);
1518 static const char *ix86_mangle_fundamental_type (tree);
1519 static tree ix86_stack_protect_fail (void);
1520 static rtx ix86_internal_arg_pointer (void);
1521 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1522 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1523 rtx, rtx, int);
1524
1525 /* This function is only used on Solaris. */
1526 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1527 ATTRIBUTE_UNUSED;
1528
1529 /* Register class used for passing given 64bit part of the argument.
1530 These represent classes as documented by the PS ABI, with the exception
1531 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1532 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1533
1534 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1535 whenever possible (upper half does contain padding).
1536 */
1537 enum x86_64_reg_class
1538 {
1539 X86_64_NO_CLASS,
1540 X86_64_INTEGER_CLASS,
1541 X86_64_INTEGERSI_CLASS,
1542 X86_64_SSE_CLASS,
1543 X86_64_SSESF_CLASS,
1544 X86_64_SSEDF_CLASS,
1545 X86_64_SSEUP_CLASS,
1546 X86_64_X87_CLASS,
1547 X86_64_X87UP_CLASS,
1548 X86_64_COMPLEX_X87_CLASS,
1549 X86_64_MEMORY_CLASS
1550 };
1551 static const char * const x86_64_reg_class_name[] = {
1552 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1553 "sseup", "x87", "x87up", "cplx87", "no"
1554 };
1555
1556 #define MAX_CLASSES 4
1557
1558 /* Table of constants used by fldpi, fldln2, etc.... */
1559 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1560 static bool ext_80387_constants_init = 0;
1561 static void init_ext_80387_constants (void);
1562 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1563 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1564 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1565 static section *x86_64_elf_select_section (tree decl, int reloc,
1566 unsigned HOST_WIDE_INT align)
1567 ATTRIBUTE_UNUSED;
1568 \f
1569 /* Initialize the GCC target structure. */
1570 #undef TARGET_ATTRIBUTE_TABLE
1571 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1572 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1573 # undef TARGET_MERGE_DECL_ATTRIBUTES
1574 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1575 #endif
1576
1577 #undef TARGET_COMP_TYPE_ATTRIBUTES
1578 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1579
1580 #undef TARGET_INIT_BUILTINS
1581 #define TARGET_INIT_BUILTINS ix86_init_builtins
1582 #undef TARGET_EXPAND_BUILTIN
1583 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1584
1585 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1586 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1587 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1588 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1589
1590 #undef TARGET_ASM_FUNCTION_EPILOGUE
1591 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1592
1593 #undef TARGET_ENCODE_SECTION_INFO
1594 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1595 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1596 #else
1597 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1598 #endif
1599
1600 #undef TARGET_ASM_OPEN_PAREN
1601 #define TARGET_ASM_OPEN_PAREN ""
1602 #undef TARGET_ASM_CLOSE_PAREN
1603 #define TARGET_ASM_CLOSE_PAREN ""
1604
1605 #undef TARGET_ASM_ALIGNED_HI_OP
1606 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1607 #undef TARGET_ASM_ALIGNED_SI_OP
1608 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1609 #ifdef ASM_QUAD
1610 #undef TARGET_ASM_ALIGNED_DI_OP
1611 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1612 #endif
1613
1614 #undef TARGET_ASM_UNALIGNED_HI_OP
1615 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1616 #undef TARGET_ASM_UNALIGNED_SI_OP
1617 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1618 #undef TARGET_ASM_UNALIGNED_DI_OP
1619 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1620
1621 #undef TARGET_SCHED_ADJUST_COST
1622 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1623 #undef TARGET_SCHED_ISSUE_RATE
1624 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1625 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1626 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1627 ia32_multipass_dfa_lookahead
1628
1629 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1630 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1631
1632 #ifdef HAVE_AS_TLS
1633 #undef TARGET_HAVE_TLS
1634 #define TARGET_HAVE_TLS true
1635 #endif
1636 #undef TARGET_CANNOT_FORCE_CONST_MEM
1637 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1638 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1639 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1640
1641 #undef TARGET_DELEGITIMIZE_ADDRESS
1642 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1643
1644 #undef TARGET_MS_BITFIELD_LAYOUT_P
1645 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1646
1647 #if TARGET_MACHO
1648 #undef TARGET_BINDS_LOCAL_P
1649 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1650 #endif
1651
1652 #undef TARGET_ASM_OUTPUT_MI_THUNK
1653 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1654 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1655 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1656
1657 #undef TARGET_ASM_FILE_START
1658 #define TARGET_ASM_FILE_START x86_file_start
1659
1660 #undef TARGET_DEFAULT_TARGET_FLAGS
1661 #define TARGET_DEFAULT_TARGET_FLAGS \
1662 (TARGET_DEFAULT \
1663 | TARGET_64BIT_DEFAULT \
1664 | TARGET_SUBTARGET_DEFAULT \
1665 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1666
1667 #undef TARGET_HANDLE_OPTION
1668 #define TARGET_HANDLE_OPTION ix86_handle_option
1669
1670 #undef TARGET_RTX_COSTS
1671 #define TARGET_RTX_COSTS ix86_rtx_costs
1672 #undef TARGET_ADDRESS_COST
1673 #define TARGET_ADDRESS_COST ix86_address_cost
1674
1675 #undef TARGET_FIXED_CONDITION_CODE_REGS
1676 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1677 #undef TARGET_CC_MODES_COMPATIBLE
1678 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1679
1680 #undef TARGET_MACHINE_DEPENDENT_REORG
1681 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1682
1683 #undef TARGET_BUILD_BUILTIN_VA_LIST
1684 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1685
1686 #undef TARGET_MD_ASM_CLOBBERS
1687 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1688
1689 #undef TARGET_PROMOTE_PROTOTYPES
1690 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1691 #undef TARGET_STRUCT_VALUE_RTX
1692 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1693 #undef TARGET_SETUP_INCOMING_VARARGS
1694 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1695 #undef TARGET_MUST_PASS_IN_STACK
1696 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1697 #undef TARGET_PASS_BY_REFERENCE
1698 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1699 #undef TARGET_INTERNAL_ARG_POINTER
1700 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1701 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1702 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1703
1704 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1705 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1706
1707 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1708 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1709
1710 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1711 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1712
1713 #ifdef HAVE_AS_TLS
1714 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1715 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1716 #endif
1717
1718 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1719 #undef TARGET_INSERT_ATTRIBUTES
1720 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1721 #endif
1722
1723 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1724 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1725
1726 #undef TARGET_STACK_PROTECT_FAIL
1727 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1728
1729 #undef TARGET_FUNCTION_VALUE
1730 #define TARGET_FUNCTION_VALUE ix86_function_value
1731
1732 struct gcc_target targetm = TARGET_INITIALIZER;
1733
1734 \f
1735 /* The svr4 ABI for the i386 says that records and unions are returned
1736 in memory. */
1737 #ifndef DEFAULT_PCC_STRUCT_RETURN
1738 #define DEFAULT_PCC_STRUCT_RETURN 1
1739 #endif
1740
1741 /* Implement TARGET_HANDLE_OPTION. */
1742
1743 static bool
1744 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1745 {
1746 switch (code)
1747 {
1748 case OPT_m3dnow:
1749 if (!value)
1750 {
1751 target_flags &= ~MASK_3DNOW_A;
1752 target_flags_explicit |= MASK_3DNOW_A;
1753 }
1754 return true;
1755
1756 case OPT_mmmx:
1757 if (!value)
1758 {
1759 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1760 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1761 }
1762 return true;
1763
1764 case OPT_msse:
1765 if (!value)
1766 {
1767 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1768 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1769 }
1770 return true;
1771
1772 case OPT_msse2:
1773 if (!value)
1774 {
1775 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1776 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1777 }
1778 return true;
1779
1780 case OPT_msse3:
1781 if (!value)
1782 {
1783 target_flags &= ~MASK_SSE4A;
1784 target_flags_explicit |= MASK_SSE4A;
1785 }
1786 return true;
1787
1788 default:
1789 return true;
1790 }
1791 }
1792
1793 /* Sometimes certain combinations of command options do not make
1794 sense on a particular target machine. You can define a macro
1795 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1796 defined, is executed once just after all the command options have
1797 been parsed.
1798
1799 Don't use this macro to turn on various extra optimizations for
1800 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1801
1802 void
1803 override_options (void)
1804 {
1805 int i;
1806 int ix86_tune_defaulted = 0;
1807
1808 /* Comes from final.c -- no real reason to change it. */
1809 #define MAX_CODE_ALIGN 16
1810
1811 static struct ptt
1812 {
1813 const struct processor_costs *cost; /* Processor costs */
1814 const int target_enable; /* Target flags to enable. */
1815 const int target_disable; /* Target flags to disable. */
1816 const int align_loop; /* Default alignments. */
1817 const int align_loop_max_skip;
1818 const int align_jump;
1819 const int align_jump_max_skip;
1820 const int align_func;
1821 }
1822 const processor_target_table[PROCESSOR_max] =
1823 {
1824 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1825 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1826 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1827 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1828 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1829 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1830 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1831 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1832 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1833 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1834 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1835 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1836 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1837 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1838 };
1839
1840 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1841 static struct pta
1842 {
1843 const char *const name; /* processor name or nickname. */
1844 const enum processor_type processor;
1845 const enum pta_flags
1846 {
1847 PTA_SSE = 1,
1848 PTA_SSE2 = 2,
1849 PTA_SSE3 = 4,
1850 PTA_MMX = 8,
1851 PTA_PREFETCH_SSE = 16,
1852 PTA_3DNOW = 32,
1853 PTA_3DNOW_A = 64,
1854 PTA_64BIT = 128,
1855 PTA_SSSE3 = 256,
1856 PTA_CX16 = 512,
1857 PTA_POPCNT = 1024,
1858 PTA_ABM = 2048,
1859 PTA_SSE4A = 4096
1860 } flags;
1861 }
1862 const processor_alias_table[] =
1863 {
1864 {"i386", PROCESSOR_I386, 0},
1865 {"i486", PROCESSOR_I486, 0},
1866 {"i586", PROCESSOR_PENTIUM, 0},
1867 {"pentium", PROCESSOR_PENTIUM, 0},
1868 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1869 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1870 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1871 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1872 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1873 {"i686", PROCESSOR_PENTIUMPRO, 0},
1874 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1875 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1876 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1877 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1878 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1879 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1880 | PTA_MMX | PTA_PREFETCH_SSE},
1881 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1882 | PTA_MMX | PTA_PREFETCH_SSE},
1883 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1884 | PTA_MMX | PTA_PREFETCH_SSE},
1885 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1886 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1887 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1888 | PTA_64BIT | PTA_MMX
1889 | PTA_PREFETCH_SSE | PTA_CX16},
1890 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1891 | PTA_3DNOW_A},
1892 {"k6", PROCESSOR_K6, PTA_MMX},
1893 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1894 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1895 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1896 | PTA_3DNOW_A},
1897 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1898 | PTA_3DNOW | PTA_3DNOW_A},
1899 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1900 | PTA_3DNOW_A | PTA_SSE},
1901 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1902 | PTA_3DNOW_A | PTA_SSE},
1903 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1904 | PTA_3DNOW_A | PTA_SSE},
1905 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1906 | PTA_SSE | PTA_SSE2 },
1907 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1908 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1909 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1910 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1911 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1912 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1913 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1914 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1915 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1916 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1917 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1918 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1919 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1920 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1921 };
1922
1923 int const pta_size = ARRAY_SIZE (processor_alias_table);
1924
1925 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1926 SUBTARGET_OVERRIDE_OPTIONS;
1927 #endif
1928
1929 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1930 SUBSUBTARGET_OVERRIDE_OPTIONS;
1931 #endif
1932
1933 /* -fPIC is the default for x86_64. */
1934 if (TARGET_MACHO && TARGET_64BIT)
1935 flag_pic = 2;
1936
1937 /* Set the default values for switches whose default depends on TARGET_64BIT
1938 in case they weren't overwritten by command line options. */
1939 if (TARGET_64BIT)
1940 {
1941 /* Mach-O doesn't support omitting the frame pointer for now. */
1942 if (flag_omit_frame_pointer == 2)
1943 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1944 if (flag_asynchronous_unwind_tables == 2)
1945 flag_asynchronous_unwind_tables = 1;
1946 if (flag_pcc_struct_return == 2)
1947 flag_pcc_struct_return = 0;
1948 }
1949 else
1950 {
1951 if (flag_omit_frame_pointer == 2)
1952 flag_omit_frame_pointer = 0;
1953 if (flag_asynchronous_unwind_tables == 2)
1954 flag_asynchronous_unwind_tables = 0;
1955 if (flag_pcc_struct_return == 2)
1956 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1957 }
1958
1959 /* Need to check -mtune=generic first. */
1960 if (ix86_tune_string)
1961 {
1962 if (!strcmp (ix86_tune_string, "generic")
1963 || !strcmp (ix86_tune_string, "i686")
1964 /* As special support for cross compilers we read -mtune=native
1965 as -mtune=generic. With native compilers we won't see the
1966 -mtune=native, as it was changed by the driver. */
1967 || !strcmp (ix86_tune_string, "native"))
1968 {
1969 if (TARGET_64BIT)
1970 ix86_tune_string = "generic64";
1971 else
1972 ix86_tune_string = "generic32";
1973 }
1974 else if (!strncmp (ix86_tune_string, "generic", 7))
1975 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1976 }
1977 else
1978 {
1979 if (ix86_arch_string)
1980 ix86_tune_string = ix86_arch_string;
1981 if (!ix86_tune_string)
1982 {
1983 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1984 ix86_tune_defaulted = 1;
1985 }
1986
1987 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1988 need to use a sensible tune option. */
1989 if (!strcmp (ix86_tune_string, "generic")
1990 || !strcmp (ix86_tune_string, "x86-64")
1991 || !strcmp (ix86_tune_string, "i686"))
1992 {
1993 if (TARGET_64BIT)
1994 ix86_tune_string = "generic64";
1995 else
1996 ix86_tune_string = "generic32";
1997 }
1998 }
1999 if (ix86_stringop_string)
2000 {
2001 if (!strcmp (ix86_stringop_string, "rep_byte"))
2002 stringop_alg = rep_prefix_1_byte;
2003 else if (!strcmp (ix86_stringop_string, "libcall"))
2004 stringop_alg = libcall;
2005 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2006 stringop_alg = rep_prefix_4_byte;
2007 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2008 stringop_alg = rep_prefix_8_byte;
2009 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2010 stringop_alg = loop_1_byte;
2011 else if (!strcmp (ix86_stringop_string, "loop"))
2012 stringop_alg = loop;
2013 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2014 stringop_alg = unrolled_loop;
2015 else
2016 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2017 }
2018 if (!strcmp (ix86_tune_string, "x86-64"))
2019 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2020 "-mtune=generic instead as appropriate.");
2021
2022 if (!ix86_arch_string)
2023 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2024 if (!strcmp (ix86_arch_string, "generic"))
2025 error ("generic CPU can be used only for -mtune= switch");
2026 if (!strncmp (ix86_arch_string, "generic", 7))
2027 error ("bad value (%s) for -march= switch", ix86_arch_string);
2028
2029 if (ix86_cmodel_string != 0)
2030 {
2031 if (!strcmp (ix86_cmodel_string, "small"))
2032 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2033 else if (!strcmp (ix86_cmodel_string, "medium"))
2034 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2035 else if (flag_pic)
2036 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
2037 else if (!strcmp (ix86_cmodel_string, "32"))
2038 ix86_cmodel = CM_32;
2039 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2040 ix86_cmodel = CM_KERNEL;
2041 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
2042 ix86_cmodel = CM_LARGE;
2043 else
2044 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2045 }
2046 else
2047 {
2048 ix86_cmodel = CM_32;
2049 if (TARGET_64BIT)
2050 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2051 }
2052 if (ix86_asm_string != 0)
2053 {
2054 if (! TARGET_MACHO
2055 && !strcmp (ix86_asm_string, "intel"))
2056 ix86_asm_dialect = ASM_INTEL;
2057 else if (!strcmp (ix86_asm_string, "att"))
2058 ix86_asm_dialect = ASM_ATT;
2059 else
2060 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2061 }
2062 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2063 error ("code model %qs not supported in the %s bit mode",
2064 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2065 if (ix86_cmodel == CM_LARGE)
2066 sorry ("code model %<large%> not supported yet");
2067 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2068 sorry ("%i-bit mode not compiled in",
2069 (target_flags & MASK_64BIT) ? 64 : 32);
2070
2071 for (i = 0; i < pta_size; i++)
2072 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2073 {
2074 ix86_arch = processor_alias_table[i].processor;
2075 /* Default cpu tuning to the architecture. */
2076 ix86_tune = ix86_arch;
2077 if (processor_alias_table[i].flags & PTA_MMX
2078 && !(target_flags_explicit & MASK_MMX))
2079 target_flags |= MASK_MMX;
2080 if (processor_alias_table[i].flags & PTA_3DNOW
2081 && !(target_flags_explicit & MASK_3DNOW))
2082 target_flags |= MASK_3DNOW;
2083 if (processor_alias_table[i].flags & PTA_3DNOW_A
2084 && !(target_flags_explicit & MASK_3DNOW_A))
2085 target_flags |= MASK_3DNOW_A;
2086 if (processor_alias_table[i].flags & PTA_SSE
2087 && !(target_flags_explicit & MASK_SSE))
2088 target_flags |= MASK_SSE;
2089 if (processor_alias_table[i].flags & PTA_SSE2
2090 && !(target_flags_explicit & MASK_SSE2))
2091 target_flags |= MASK_SSE2;
2092 if (processor_alias_table[i].flags & PTA_SSE3
2093 && !(target_flags_explicit & MASK_SSE3))
2094 target_flags |= MASK_SSE3;
2095 if (processor_alias_table[i].flags & PTA_SSSE3
2096 && !(target_flags_explicit & MASK_SSSE3))
2097 target_flags |= MASK_SSSE3;
2098 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2099 x86_prefetch_sse = true;
2100 if (processor_alias_table[i].flags & PTA_CX16)
2101 x86_cmpxchg16b = true;
2102 if (processor_alias_table[i].flags & PTA_POPCNT
2103 && !(target_flags_explicit & MASK_POPCNT))
2104 target_flags |= MASK_POPCNT;
2105 if (processor_alias_table[i].flags & PTA_ABM
2106 && !(target_flags_explicit & MASK_ABM))
2107 target_flags |= MASK_ABM;
2108 if (processor_alias_table[i].flags & PTA_SSE4A
2109 && !(target_flags_explicit & MASK_SSE4A))
2110 target_flags |= MASK_SSE4A;
2111 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2112 error ("CPU you selected does not support x86-64 "
2113 "instruction set");
2114 break;
2115 }
2116
2117 if (i == pta_size)
2118 error ("bad value (%s) for -march= switch", ix86_arch_string);
2119
2120 for (i = 0; i < pta_size; i++)
2121 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2122 {
2123 ix86_tune = processor_alias_table[i].processor;
2124 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2125 {
2126 if (ix86_tune_defaulted)
2127 {
2128 ix86_tune_string = "x86-64";
2129 for (i = 0; i < pta_size; i++)
2130 if (! strcmp (ix86_tune_string,
2131 processor_alias_table[i].name))
2132 break;
2133 ix86_tune = processor_alias_table[i].processor;
2134 }
2135 else
2136 error ("CPU you selected does not support x86-64 "
2137 "instruction set");
2138 }
2139 /* Intel CPUs have always interpreted SSE prefetch instructions as
2140 NOPs; so, we can enable SSE prefetch instructions even when
2141 -mtune (rather than -march) points us to a processor that has them.
2142 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2143 higher processors. */
2144 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2145 x86_prefetch_sse = true;
2146 break;
2147 }
2148 if (i == pta_size)
2149 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2150
2151 if (optimize_size)
2152 ix86_cost = &size_cost;
2153 else
2154 ix86_cost = processor_target_table[ix86_tune].cost;
2155 target_flags |= processor_target_table[ix86_tune].target_enable;
2156 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2157
2158 /* Arrange to set up i386_stack_locals for all functions. */
2159 init_machine_status = ix86_init_machine_status;
2160
2161 /* Validate -mregparm= value. */
2162 if (ix86_regparm_string)
2163 {
2164 i = atoi (ix86_regparm_string);
2165 if (i < 0 || i > REGPARM_MAX)
2166 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2167 else
2168 ix86_regparm = i;
2169 }
2170 else
2171 if (TARGET_64BIT)
2172 ix86_regparm = REGPARM_MAX;
2173
2174 /* If the user has provided any of the -malign-* options,
2175 warn and use that value only if -falign-* is not set.
2176 Remove this code in GCC 3.2 or later. */
2177 if (ix86_align_loops_string)
2178 {
2179 warning (0, "-malign-loops is obsolete, use -falign-loops");
2180 if (align_loops == 0)
2181 {
2182 i = atoi (ix86_align_loops_string);
2183 if (i < 0 || i > MAX_CODE_ALIGN)
2184 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2185 else
2186 align_loops = 1 << i;
2187 }
2188 }
2189
2190 if (ix86_align_jumps_string)
2191 {
2192 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2193 if (align_jumps == 0)
2194 {
2195 i = atoi (ix86_align_jumps_string);
2196 if (i < 0 || i > MAX_CODE_ALIGN)
2197 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2198 else
2199 align_jumps = 1 << i;
2200 }
2201 }
2202
2203 if (ix86_align_funcs_string)
2204 {
2205 warning (0, "-malign-functions is obsolete, use -falign-functions");
2206 if (align_functions == 0)
2207 {
2208 i = atoi (ix86_align_funcs_string);
2209 if (i < 0 || i > MAX_CODE_ALIGN)
2210 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2211 else
2212 align_functions = 1 << i;
2213 }
2214 }
2215
2216 /* Default align_* from the processor table. */
2217 if (align_loops == 0)
2218 {
2219 align_loops = processor_target_table[ix86_tune].align_loop;
2220 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2221 }
2222 if (align_jumps == 0)
2223 {
2224 align_jumps = processor_target_table[ix86_tune].align_jump;
2225 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2226 }
2227 if (align_functions == 0)
2228 {
2229 align_functions = processor_target_table[ix86_tune].align_func;
2230 }
2231
2232 /* Validate -mbranch-cost= value, or provide default. */
2233 ix86_branch_cost = ix86_cost->branch_cost;
2234 if (ix86_branch_cost_string)
2235 {
2236 i = atoi (ix86_branch_cost_string);
2237 if (i < 0 || i > 5)
2238 error ("-mbranch-cost=%d is not between 0 and 5", i);
2239 else
2240 ix86_branch_cost = i;
2241 }
2242 if (ix86_section_threshold_string)
2243 {
2244 i = atoi (ix86_section_threshold_string);
2245 if (i < 0)
2246 error ("-mlarge-data-threshold=%d is negative", i);
2247 else
2248 ix86_section_threshold = i;
2249 }
2250
2251 if (ix86_tls_dialect_string)
2252 {
2253 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2254 ix86_tls_dialect = TLS_DIALECT_GNU;
2255 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2256 ix86_tls_dialect = TLS_DIALECT_GNU2;
2257 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2258 ix86_tls_dialect = TLS_DIALECT_SUN;
2259 else
2260 error ("bad value (%s) for -mtls-dialect= switch",
2261 ix86_tls_dialect_string);
2262 }
2263
2264 /* Keep nonleaf frame pointers. */
2265 if (flag_omit_frame_pointer)
2266 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2267 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2268 flag_omit_frame_pointer = 1;
2269
2270 /* If we're doing fast math, we don't care about comparison order
2271 wrt NaNs. This lets us use a shorter comparison sequence. */
2272 if (flag_finite_math_only)
2273 target_flags &= ~MASK_IEEE_FP;
2274
2275 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2276 since the insns won't need emulation. */
2277 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2278 target_flags &= ~MASK_NO_FANCY_MATH_387;
2279
2280 /* Likewise, if the target doesn't have a 387, or we've specified
2281 software floating point, don't use 387 inline intrinsics. */
2282 if (!TARGET_80387)
2283 target_flags |= MASK_NO_FANCY_MATH_387;
2284
2285 /* Turn on SSE3 builtins for -mssse3. */
2286 if (TARGET_SSSE3)
2287 target_flags |= MASK_SSE3;
2288
2289 /* Turn on SSE3 builtins for -msse4a. */
2290 if (TARGET_SSE4A)
2291 target_flags |= MASK_SSE3;
2292
2293 /* Turn on SSE2 builtins for -msse3. */
2294 if (TARGET_SSE3)
2295 target_flags |= MASK_SSE2;
2296
2297 /* Turn on SSE builtins for -msse2. */
2298 if (TARGET_SSE2)
2299 target_flags |= MASK_SSE;
2300
2301 /* Turn on MMX builtins for -msse. */
2302 if (TARGET_SSE)
2303 {
2304 target_flags |= MASK_MMX & ~target_flags_explicit;
2305 x86_prefetch_sse = true;
2306 }
2307
2308 /* Turn on MMX builtins for 3Dnow. */
2309 if (TARGET_3DNOW)
2310 target_flags |= MASK_MMX;
2311
2312 /* Turn on POPCNT builtins for -mabm. */
2313 if (TARGET_ABM)
2314 target_flags |= MASK_POPCNT;
2315
2316 if (TARGET_64BIT)
2317 {
2318 if (TARGET_ALIGN_DOUBLE)
2319 error ("-malign-double makes no sense in the 64bit mode");
2320 if (TARGET_RTD)
2321 error ("-mrtd calling convention not supported in the 64bit mode");
2322
2323 /* Enable by default the SSE and MMX builtins. Do allow the user to
2324 explicitly disable any of these. In particular, disabling SSE and
2325 MMX for kernel code is extremely useful. */
2326 target_flags
2327 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2328 & ~target_flags_explicit);
2329 }
2330 else
2331 {
2332 /* i386 ABI does not specify red zone. It still makes sense to use it
2333 when programmer takes care to stack from being destroyed. */
2334 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2335 target_flags |= MASK_NO_RED_ZONE;
2336 }
2337
2338 /* Validate -mpreferred-stack-boundary= value, or provide default.
2339 The default of 128 bits is for Pentium III's SSE __m128. We can't
2340 change it because of optimize_size. Otherwise, we can't mix object
2341 files compiled with -Os and -On. */
2342 ix86_preferred_stack_boundary = 128;
2343 if (ix86_preferred_stack_boundary_string)
2344 {
2345 i = atoi (ix86_preferred_stack_boundary_string);
2346 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2347 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2348 TARGET_64BIT ? 4 : 2);
2349 else
2350 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2351 }
2352
2353 /* Accept -msseregparm only if at least SSE support is enabled. */
2354 if (TARGET_SSEREGPARM
2355 && ! TARGET_SSE)
2356 error ("-msseregparm used without SSE enabled");
2357
2358 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2359
2360 if (ix86_fpmath_string != 0)
2361 {
2362 if (! strcmp (ix86_fpmath_string, "387"))
2363 ix86_fpmath = FPMATH_387;
2364 else if (! strcmp (ix86_fpmath_string, "sse"))
2365 {
2366 if (!TARGET_SSE)
2367 {
2368 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2369 ix86_fpmath = FPMATH_387;
2370 }
2371 else
2372 ix86_fpmath = FPMATH_SSE;
2373 }
2374 else if (! strcmp (ix86_fpmath_string, "387,sse")
2375 || ! strcmp (ix86_fpmath_string, "sse,387"))
2376 {
2377 if (!TARGET_SSE)
2378 {
2379 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2380 ix86_fpmath = FPMATH_387;
2381 }
2382 else if (!TARGET_80387)
2383 {
2384 warning (0, "387 instruction set disabled, using SSE arithmetics");
2385 ix86_fpmath = FPMATH_SSE;
2386 }
2387 else
2388 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2389 }
2390 else
2391 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2392 }
2393
2394 /* If the i387 is disabled, then do not return values in it. */
2395 if (!TARGET_80387)
2396 target_flags &= ~MASK_FLOAT_RETURNS;
2397
2398 if ((x86_accumulate_outgoing_args & TUNEMASK)
2399 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2400 && !optimize_size)
2401 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2402
2403 /* ??? Unwind info is not correct around the CFG unless either a frame
2404 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2405 unwind info generation to be aware of the CFG and propagating states
2406 around edges. */
2407 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2408 || flag_exceptions || flag_non_call_exceptions)
2409 && flag_omit_frame_pointer
2410 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2411 {
2412 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2413 warning (0, "unwind tables currently require either a frame pointer "
2414 "or -maccumulate-outgoing-args for correctness");
2415 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2416 }
2417
2418 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2419 {
2420 char *p;
2421 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2422 p = strchr (internal_label_prefix, 'X');
2423 internal_label_prefix_len = p - internal_label_prefix;
2424 *p = '\0';
2425 }
2426
2427 /* When scheduling description is not available, disable scheduler pass
2428 so it won't slow down the compilation and make x87 code slower. */
2429 if (!TARGET_SCHEDULE)
2430 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2431
2432 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2433 set_param_value ("simultaneous-prefetches",
2434 ix86_cost->simultaneous_prefetches);
2435 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2436 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2437 }
2438 \f
2439 /* switch to the appropriate section for output of DECL.
2440 DECL is either a `VAR_DECL' node or a constant of some sort.
2441 RELOC indicates whether forming the initial value of DECL requires
2442 link-time relocations. */
2443
2444 static section *
2445 x86_64_elf_select_section (tree decl, int reloc,
2446 unsigned HOST_WIDE_INT align)
2447 {
2448 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2449 && ix86_in_large_data_p (decl))
2450 {
2451 const char *sname = NULL;
2452 unsigned int flags = SECTION_WRITE;
2453 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2454 {
2455 case SECCAT_DATA:
2456 sname = ".ldata";
2457 break;
2458 case SECCAT_DATA_REL:
2459 sname = ".ldata.rel";
2460 break;
2461 case SECCAT_DATA_REL_LOCAL:
2462 sname = ".ldata.rel.local";
2463 break;
2464 case SECCAT_DATA_REL_RO:
2465 sname = ".ldata.rel.ro";
2466 break;
2467 case SECCAT_DATA_REL_RO_LOCAL:
2468 sname = ".ldata.rel.ro.local";
2469 break;
2470 case SECCAT_BSS:
2471 sname = ".lbss";
2472 flags |= SECTION_BSS;
2473 break;
2474 case SECCAT_RODATA:
2475 case SECCAT_RODATA_MERGE_STR:
2476 case SECCAT_RODATA_MERGE_STR_INIT:
2477 case SECCAT_RODATA_MERGE_CONST:
2478 sname = ".lrodata";
2479 flags = 0;
2480 break;
2481 case SECCAT_SRODATA:
2482 case SECCAT_SDATA:
2483 case SECCAT_SBSS:
2484 gcc_unreachable ();
2485 case SECCAT_TEXT:
2486 case SECCAT_TDATA:
2487 case SECCAT_TBSS:
2488 /* We don't split these for medium model. Place them into
2489 default sections and hope for best. */
2490 break;
2491 }
2492 if (sname)
2493 {
2494 /* We might get called with string constants, but get_named_section
2495 doesn't like them as they are not DECLs. Also, we need to set
2496 flags in that case. */
2497 if (!DECL_P (decl))
2498 return get_section (sname, flags, NULL);
2499 return get_named_section (decl, sname, reloc);
2500 }
2501 }
2502 return default_elf_select_section (decl, reloc, align);
2503 }
2504
2505 /* Build up a unique section name, expressed as a
2506 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2507 RELOC indicates whether the initial value of EXP requires
2508 link-time relocations. */
2509
2510 static void
2511 x86_64_elf_unique_section (tree decl, int reloc)
2512 {
2513 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2514 && ix86_in_large_data_p (decl))
2515 {
2516 const char *prefix = NULL;
2517 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2518 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2519
2520 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2521 {
2522 case SECCAT_DATA:
2523 case SECCAT_DATA_REL:
2524 case SECCAT_DATA_REL_LOCAL:
2525 case SECCAT_DATA_REL_RO:
2526 case SECCAT_DATA_REL_RO_LOCAL:
2527 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2528 break;
2529 case SECCAT_BSS:
2530 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2531 break;
2532 case SECCAT_RODATA:
2533 case SECCAT_RODATA_MERGE_STR:
2534 case SECCAT_RODATA_MERGE_STR_INIT:
2535 case SECCAT_RODATA_MERGE_CONST:
2536 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2537 break;
2538 case SECCAT_SRODATA:
2539 case SECCAT_SDATA:
2540 case SECCAT_SBSS:
2541 gcc_unreachable ();
2542 case SECCAT_TEXT:
2543 case SECCAT_TDATA:
2544 case SECCAT_TBSS:
2545 /* We don't split these for medium model. Place them into
2546 default sections and hope for best. */
2547 break;
2548 }
2549 if (prefix)
2550 {
2551 const char *name;
2552 size_t nlen, plen;
2553 char *string;
2554 plen = strlen (prefix);
2555
2556 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2557 name = targetm.strip_name_encoding (name);
2558 nlen = strlen (name);
2559
2560 string = alloca (nlen + plen + 1);
2561 memcpy (string, prefix, plen);
2562 memcpy (string + plen, name, nlen + 1);
2563
2564 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2565 return;
2566 }
2567 }
2568 default_unique_section (decl, reloc);
2569 }
2570
2571 #ifdef COMMON_ASM_OP
2572 /* This says how to output assembler code to declare an
2573 uninitialized external linkage data object.
2574
2575 For medium model x86-64 we need to use .largecomm opcode for
2576 large objects. */
2577 void
2578 x86_elf_aligned_common (FILE *file,
2579 const char *name, unsigned HOST_WIDE_INT size,
2580 int align)
2581 {
2582 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2583 && size > (unsigned int)ix86_section_threshold)
2584 fprintf (file, ".largecomm\t");
2585 else
2586 fprintf (file, "%s", COMMON_ASM_OP);
2587 assemble_name (file, name);
2588 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2589 size, align / BITS_PER_UNIT);
2590 }
2591 #endif
2592 /* Utility function for targets to use in implementing
2593 ASM_OUTPUT_ALIGNED_BSS. */
2594
2595 void
2596 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2597 const char *name, unsigned HOST_WIDE_INT size,
2598 int align)
2599 {
2600 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2601 && size > (unsigned int)ix86_section_threshold)
2602 switch_to_section (get_named_section (decl, ".lbss", 0));
2603 else
2604 switch_to_section (bss_section);
2605 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2606 #ifdef ASM_DECLARE_OBJECT_NAME
2607 last_assemble_variable_decl = decl;
2608 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2609 #else
2610 /* Standard thing is just output label for the object. */
2611 ASM_OUTPUT_LABEL (file, name);
2612 #endif /* ASM_DECLARE_OBJECT_NAME */
2613 ASM_OUTPUT_SKIP (file, size ? size : 1);
2614 }
2615 \f
2616 void
2617 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2618 {
2619 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2620 make the problem with not enough registers even worse. */
2621 #ifdef INSN_SCHEDULING
2622 if (level > 1)
2623 flag_schedule_insns = 0;
2624 #endif
2625
2626 if (TARGET_MACHO)
2627 /* The Darwin libraries never set errno, so we might as well
2628 avoid calling them when that's the only reason we would. */
2629 flag_errno_math = 0;
2630
2631 /* The default values of these switches depend on the TARGET_64BIT
2632 that is not known at this moment. Mark these values with 2 and
2633 let user the to override these. In case there is no command line option
2634 specifying them, we will set the defaults in override_options. */
2635 if (optimize >= 1)
2636 flag_omit_frame_pointer = 2;
2637 flag_pcc_struct_return = 2;
2638 flag_asynchronous_unwind_tables = 2;
2639 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2640 SUBTARGET_OPTIMIZATION_OPTIONS;
2641 #endif
2642 }
2643 \f
2644 /* Table of valid machine attributes. */
2645 const struct attribute_spec ix86_attribute_table[] =
2646 {
2647 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2648 /* Stdcall attribute says callee is responsible for popping arguments
2649 if they are not variable. */
2650 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2651 /* Fastcall attribute says callee is responsible for popping arguments
2652 if they are not variable. */
2653 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2654 /* Cdecl attribute says the callee is a normal C declaration */
2655 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2656 /* Regparm attribute specifies how many integer arguments are to be
2657 passed in registers. */
2658 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2659 /* Sseregparm attribute says we are using x86_64 calling conventions
2660 for FP arguments. */
2661 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2662 /* force_align_arg_pointer says this function realigns the stack at entry. */
2663 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2664 false, true, true, ix86_handle_cconv_attribute },
2665 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2666 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2667 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2668 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2669 #endif
2670 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2671 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2672 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2673 SUBTARGET_ATTRIBUTE_TABLE,
2674 #endif
2675 { NULL, 0, 0, false, false, false, NULL }
2676 };
2677
2678 /* Decide whether we can make a sibling call to a function. DECL is the
2679 declaration of the function being targeted by the call and EXP is the
2680 CALL_EXPR representing the call. */
2681
2682 static bool
2683 ix86_function_ok_for_sibcall (tree decl, tree exp)
2684 {
2685 tree func;
2686 rtx a, b;
2687
2688 /* If we are generating position-independent code, we cannot sibcall
2689 optimize any indirect call, or a direct call to a global function,
2690 as the PLT requires %ebx be live. */
2691 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2692 return false;
2693
2694 if (decl)
2695 func = decl;
2696 else
2697 {
2698 func = TREE_TYPE (CALL_EXPR_FN (exp));
2699 if (POINTER_TYPE_P (func))
2700 func = TREE_TYPE (func);
2701 }
2702
2703 /* Check that the return value locations are the same. Like
2704 if we are returning floats on the 80387 register stack, we cannot
2705 make a sibcall from a function that doesn't return a float to a
2706 function that does or, conversely, from a function that does return
2707 a float to a function that doesn't; the necessary stack adjustment
2708 would not be executed. This is also the place we notice
2709 differences in the return value ABI. Note that it is ok for one
2710 of the functions to have void return type as long as the return
2711 value of the other is passed in a register. */
2712 a = ix86_function_value (TREE_TYPE (exp), func, false);
2713 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2714 cfun->decl, false);
2715 if (STACK_REG_P (a) || STACK_REG_P (b))
2716 {
2717 if (!rtx_equal_p (a, b))
2718 return false;
2719 }
2720 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2721 ;
2722 else if (!rtx_equal_p (a, b))
2723 return false;
2724
2725 /* If this call is indirect, we'll need to be able to use a call-clobbered
2726 register for the address of the target function. Make sure that all
2727 such registers are not used for passing parameters. */
2728 if (!decl && !TARGET_64BIT)
2729 {
2730 tree type;
2731
2732 /* We're looking at the CALL_EXPR, we need the type of the function. */
2733 type = CALL_EXPR_FN (exp); /* pointer expression */
2734 type = TREE_TYPE (type); /* pointer type */
2735 type = TREE_TYPE (type); /* function type */
2736
2737 if (ix86_function_regparm (type, NULL) >= 3)
2738 {
2739 /* ??? Need to count the actual number of registers to be used,
2740 not the possible number of registers. Fix later. */
2741 return false;
2742 }
2743 }
2744
2745 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2746 /* Dllimport'd functions are also called indirectly. */
2747 if (decl && DECL_DLLIMPORT_P (decl)
2748 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2749 return false;
2750 #endif
2751
2752 /* If we forced aligned the stack, then sibcalling would unalign the
2753 stack, which may break the called function. */
2754 if (cfun->machine->force_align_arg_pointer)
2755 return false;
2756
2757 /* Otherwise okay. That also includes certain types of indirect calls. */
2758 return true;
2759 }
2760
2761 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2762 calling convention attributes;
2763 arguments as in struct attribute_spec.handler. */
2764
2765 static tree
2766 ix86_handle_cconv_attribute (tree *node, tree name,
2767 tree args,
2768 int flags ATTRIBUTE_UNUSED,
2769 bool *no_add_attrs)
2770 {
2771 if (TREE_CODE (*node) != FUNCTION_TYPE
2772 && TREE_CODE (*node) != METHOD_TYPE
2773 && TREE_CODE (*node) != FIELD_DECL
2774 && TREE_CODE (*node) != TYPE_DECL)
2775 {
2776 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2777 IDENTIFIER_POINTER (name));
2778 *no_add_attrs = true;
2779 return NULL_TREE;
2780 }
2781
2782 /* Can combine regparm with all attributes but fastcall. */
2783 if (is_attribute_p ("regparm", name))
2784 {
2785 tree cst;
2786
2787 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2788 {
2789 error ("fastcall and regparm attributes are not compatible");
2790 }
2791
2792 cst = TREE_VALUE (args);
2793 if (TREE_CODE (cst) != INTEGER_CST)
2794 {
2795 warning (OPT_Wattributes,
2796 "%qs attribute requires an integer constant argument",
2797 IDENTIFIER_POINTER (name));
2798 *no_add_attrs = true;
2799 }
2800 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2801 {
2802 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2803 IDENTIFIER_POINTER (name), REGPARM_MAX);
2804 *no_add_attrs = true;
2805 }
2806
2807 if (!TARGET_64BIT
2808 && lookup_attribute (ix86_force_align_arg_pointer_string,
2809 TYPE_ATTRIBUTES (*node))
2810 && compare_tree_int (cst, REGPARM_MAX-1))
2811 {
2812 error ("%s functions limited to %d register parameters",
2813 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2814 }
2815
2816 return NULL_TREE;
2817 }
2818
2819 if (TARGET_64BIT)
2820 {
2821 warning (OPT_Wattributes, "%qs attribute ignored",
2822 IDENTIFIER_POINTER (name));
2823 *no_add_attrs = true;
2824 return NULL_TREE;
2825 }
2826
2827 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2828 if (is_attribute_p ("fastcall", name))
2829 {
2830 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2831 {
2832 error ("fastcall and cdecl attributes are not compatible");
2833 }
2834 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2835 {
2836 error ("fastcall and stdcall attributes are not compatible");
2837 }
2838 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2839 {
2840 error ("fastcall and regparm attributes are not compatible");
2841 }
2842 }
2843
2844 /* Can combine stdcall with fastcall (redundant), regparm and
2845 sseregparm. */
2846 else if (is_attribute_p ("stdcall", name))
2847 {
2848 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2849 {
2850 error ("stdcall and cdecl attributes are not compatible");
2851 }
2852 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2853 {
2854 error ("stdcall and fastcall attributes are not compatible");
2855 }
2856 }
2857
2858 /* Can combine cdecl with regparm and sseregparm. */
2859 else if (is_attribute_p ("cdecl", name))
2860 {
2861 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2862 {
2863 error ("stdcall and cdecl attributes are not compatible");
2864 }
2865 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2866 {
2867 error ("fastcall and cdecl attributes are not compatible");
2868 }
2869 }
2870
2871 /* Can combine sseregparm with all attributes. */
2872
2873 return NULL_TREE;
2874 }
2875
2876 /* Return 0 if the attributes for two types are incompatible, 1 if they
2877 are compatible, and 2 if they are nearly compatible (which causes a
2878 warning to be generated). */
2879
2880 static int
2881 ix86_comp_type_attributes (tree type1, tree type2)
2882 {
2883 /* Check for mismatch of non-default calling convention. */
2884 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2885
2886 if (TREE_CODE (type1) != FUNCTION_TYPE)
2887 return 1;
2888
2889 /* Check for mismatched fastcall/regparm types. */
2890 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2891 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2892 || (ix86_function_regparm (type1, NULL)
2893 != ix86_function_regparm (type2, NULL)))
2894 return 0;
2895
2896 /* Check for mismatched sseregparm types. */
2897 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2898 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2899 return 0;
2900
2901 /* Check for mismatched return types (cdecl vs stdcall). */
2902 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2903 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2904 return 0;
2905
2906 return 1;
2907 }
2908 \f
2909 /* Return the regparm value for a function with the indicated TYPE and DECL.
2910 DECL may be NULL when calling function indirectly
2911 or considering a libcall. */
2912
2913 static int
2914 ix86_function_regparm (tree type, tree decl)
2915 {
2916 tree attr;
2917 int regparm = ix86_regparm;
2918 bool user_convention = false;
2919
2920 if (!TARGET_64BIT)
2921 {
2922 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2923 if (attr)
2924 {
2925 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2926 user_convention = true;
2927 }
2928
2929 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2930 {
2931 regparm = 2;
2932 user_convention = true;
2933 }
2934
2935 /* Use register calling convention for local functions when possible. */
2936 if (!TARGET_64BIT && !user_convention && decl
2937 && flag_unit_at_a_time && !profile_flag)
2938 {
2939 struct cgraph_local_info *i = cgraph_local_info (decl);
2940 if (i && i->local)
2941 {
2942 int local_regparm, globals = 0, regno;
2943
2944 /* Make sure no regparm register is taken by a global register
2945 variable. */
2946 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2947 if (global_regs[local_regparm])
2948 break;
2949 /* We can't use regparm(3) for nested functions as these use
2950 static chain pointer in third argument. */
2951 if (local_regparm == 3
2952 && decl_function_context (decl)
2953 && !DECL_NO_STATIC_CHAIN (decl))
2954 local_regparm = 2;
2955 /* If the function realigns its stackpointer, the
2956 prologue will clobber %ecx. If we've already
2957 generated code for the callee, the callee
2958 DECL_STRUCT_FUNCTION is gone, so we fall back to
2959 scanning the attributes for the self-realigning
2960 property. */
2961 if ((DECL_STRUCT_FUNCTION (decl)
2962 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2963 || (!DECL_STRUCT_FUNCTION (decl)
2964 && lookup_attribute (ix86_force_align_arg_pointer_string,
2965 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2966 local_regparm = 2;
2967 /* Each global register variable increases register preassure,
2968 so the more global reg vars there are, the smaller regparm
2969 optimization use, unless requested by the user explicitly. */
2970 for (regno = 0; regno < 6; regno++)
2971 if (global_regs[regno])
2972 globals++;
2973 local_regparm
2974 = globals < local_regparm ? local_regparm - globals : 0;
2975
2976 if (local_regparm > regparm)
2977 regparm = local_regparm;
2978 }
2979 }
2980 }
2981 return regparm;
2982 }
2983
2984 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2985 DFmode (2) arguments in SSE registers for a function with the
2986 indicated TYPE and DECL. DECL may be NULL when calling function
2987 indirectly or considering a libcall. Otherwise return 0. */
2988
2989 static int
2990 ix86_function_sseregparm (tree type, tree decl)
2991 {
2992 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2993 by the sseregparm attribute. */
2994 if (TARGET_SSEREGPARM
2995 || (type
2996 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2997 {
2998 if (!TARGET_SSE)
2999 {
3000 if (decl)
3001 error ("Calling %qD with attribute sseregparm without "
3002 "SSE/SSE2 enabled", decl);
3003 else
3004 error ("Calling %qT with attribute sseregparm without "
3005 "SSE/SSE2 enabled", type);
3006 return 0;
3007 }
3008
3009 return 2;
3010 }
3011
3012 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3013 (and DFmode for SSE2) arguments in SSE registers,
3014 even for 32-bit targets. */
3015 if (!TARGET_64BIT && decl
3016 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3017 {
3018 struct cgraph_local_info *i = cgraph_local_info (decl);
3019 if (i && i->local)
3020 return TARGET_SSE2 ? 2 : 1;
3021 }
3022
3023 return 0;
3024 }
3025
3026 /* Return true if EAX is live at the start of the function. Used by
3027 ix86_expand_prologue to determine if we need special help before
3028 calling allocate_stack_worker. */
3029
3030 static bool
3031 ix86_eax_live_at_start_p (void)
3032 {
3033 /* Cheat. Don't bother working forward from ix86_function_regparm
3034 to the function type to whether an actual argument is located in
3035 eax. Instead just look at cfg info, which is still close enough
3036 to correct at this point. This gives false positives for broken
3037 functions that might use uninitialized data that happens to be
3038 allocated in eax, but who cares? */
3039 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3040 }
3041
3042 /* Value is the number of bytes of arguments automatically
3043 popped when returning from a subroutine call.
3044 FUNDECL is the declaration node of the function (as a tree),
3045 FUNTYPE is the data type of the function (as a tree),
3046 or for a library call it is an identifier node for the subroutine name.
3047 SIZE is the number of bytes of arguments passed on the stack.
3048
3049 On the 80386, the RTD insn may be used to pop them if the number
3050 of args is fixed, but if the number is variable then the caller
3051 must pop them all. RTD can't be used for library calls now
3052 because the library is compiled with the Unix compiler.
3053 Use of RTD is a selectable option, since it is incompatible with
3054 standard Unix calling sequences. If the option is not selected,
3055 the caller must always pop the args.
3056
3057 The attribute stdcall is equivalent to RTD on a per module basis. */
3058
3059 int
3060 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3061 {
3062 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3063
3064 /* Cdecl functions override -mrtd, and never pop the stack. */
3065 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3066
3067 /* Stdcall and fastcall functions will pop the stack if not
3068 variable args. */
3069 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3070 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3071 rtd = 1;
3072
3073 if (rtd
3074 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3075 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3076 == void_type_node)))
3077 return size;
3078 }
3079
3080 /* Lose any fake structure return argument if it is passed on the stack. */
3081 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3082 && !TARGET_64BIT
3083 && !KEEP_AGGREGATE_RETURN_POINTER)
3084 {
3085 int nregs = ix86_function_regparm (funtype, fundecl);
3086
3087 if (!nregs)
3088 return GET_MODE_SIZE (Pmode);
3089 }
3090
3091 return 0;
3092 }
3093 \f
3094 /* Argument support functions. */
3095
3096 /* Return true when register may be used to pass function parameters. */
3097 bool
3098 ix86_function_arg_regno_p (int regno)
3099 {
3100 int i;
3101 if (!TARGET_64BIT)
3102 {
3103 if (TARGET_MACHO)
3104 return (regno < REGPARM_MAX
3105 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3106 else
3107 return (regno < REGPARM_MAX
3108 || (TARGET_MMX && MMX_REGNO_P (regno)
3109 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3110 || (TARGET_SSE && SSE_REGNO_P (regno)
3111 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3112 }
3113
3114 if (TARGET_MACHO)
3115 {
3116 if (SSE_REGNO_P (regno) && TARGET_SSE)
3117 return true;
3118 }
3119 else
3120 {
3121 if (TARGET_SSE && SSE_REGNO_P (regno)
3122 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3123 return true;
3124 }
3125 /* RAX is used as hidden argument to va_arg functions. */
3126 if (!regno)
3127 return true;
3128 for (i = 0; i < REGPARM_MAX; i++)
3129 if (regno == x86_64_int_parameter_registers[i])
3130 return true;
3131 return false;
3132 }
3133
3134 /* Return if we do not know how to pass TYPE solely in registers. */
3135
3136 static bool
3137 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3138 {
3139 if (must_pass_in_stack_var_size_or_pad (mode, type))
3140 return true;
3141
3142 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3143 The layout_type routine is crafty and tries to trick us into passing
3144 currently unsupported vector types on the stack by using TImode. */
3145 return (!TARGET_64BIT && mode == TImode
3146 && type && TREE_CODE (type) != VECTOR_TYPE);
3147 }
3148
3149 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3150 for a call to a function whose data type is FNTYPE.
3151 For a library call, FNTYPE is 0. */
3152
3153 void
3154 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3155 tree fntype, /* tree ptr for function decl */
3156 rtx libname, /* SYMBOL_REF of library name or 0 */
3157 tree fndecl)
3158 {
3159 static CUMULATIVE_ARGS zero_cum;
3160 tree param, next_param;
3161
3162 if (TARGET_DEBUG_ARG)
3163 {
3164 fprintf (stderr, "\ninit_cumulative_args (");
3165 if (fntype)
3166 fprintf (stderr, "fntype code = %s, ret code = %s",
3167 tree_code_name[(int) TREE_CODE (fntype)],
3168 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3169 else
3170 fprintf (stderr, "no fntype");
3171
3172 if (libname)
3173 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3174 }
3175
3176 *cum = zero_cum;
3177
3178 /* Set up the number of registers to use for passing arguments. */
3179 cum->nregs = ix86_regparm;
3180 if (TARGET_SSE)
3181 cum->sse_nregs = SSE_REGPARM_MAX;
3182 if (TARGET_MMX)
3183 cum->mmx_nregs = MMX_REGPARM_MAX;
3184 cum->warn_sse = true;
3185 cum->warn_mmx = true;
3186 cum->maybe_vaarg = false;
3187
3188 /* Use ecx and edx registers if function has fastcall attribute,
3189 else look for regparm information. */
3190 if (fntype && !TARGET_64BIT)
3191 {
3192 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3193 {
3194 cum->nregs = 2;
3195 cum->fastcall = 1;
3196 }
3197 else
3198 cum->nregs = ix86_function_regparm (fntype, fndecl);
3199 }
3200
3201 /* Set up the number of SSE registers used for passing SFmode
3202 and DFmode arguments. Warn for mismatching ABI. */
3203 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3204
3205 /* Determine if this function has variable arguments. This is
3206 indicated by the last argument being 'void_type_mode' if there
3207 are no variable arguments. If there are variable arguments, then
3208 we won't pass anything in registers in 32-bit mode. */
3209
3210 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3211 {
3212 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3213 param != 0; param = next_param)
3214 {
3215 next_param = TREE_CHAIN (param);
3216 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3217 {
3218 if (!TARGET_64BIT)
3219 {
3220 cum->nregs = 0;
3221 cum->sse_nregs = 0;
3222 cum->mmx_nregs = 0;
3223 cum->warn_sse = 0;
3224 cum->warn_mmx = 0;
3225 cum->fastcall = 0;
3226 cum->float_in_sse = 0;
3227 }
3228 cum->maybe_vaarg = true;
3229 }
3230 }
3231 }
3232 if ((!fntype && !libname)
3233 || (fntype && !TYPE_ARG_TYPES (fntype)))
3234 cum->maybe_vaarg = true;
3235
3236 if (TARGET_DEBUG_ARG)
3237 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3238
3239 return;
3240 }
3241
3242 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3243 But in the case of vector types, it is some vector mode.
3244
3245 When we have only some of our vector isa extensions enabled, then there
3246 are some modes for which vector_mode_supported_p is false. For these
3247 modes, the generic vector support in gcc will choose some non-vector mode
3248 in order to implement the type. By computing the natural mode, we'll
3249 select the proper ABI location for the operand and not depend on whatever
3250 the middle-end decides to do with these vector types. */
3251
3252 static enum machine_mode
3253 type_natural_mode (tree type)
3254 {
3255 enum machine_mode mode = TYPE_MODE (type);
3256
3257 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3258 {
3259 HOST_WIDE_INT size = int_size_in_bytes (type);
3260 if ((size == 8 || size == 16)
3261 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3262 && TYPE_VECTOR_SUBPARTS (type) > 1)
3263 {
3264 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3265
3266 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3267 mode = MIN_MODE_VECTOR_FLOAT;
3268 else
3269 mode = MIN_MODE_VECTOR_INT;
3270
3271 /* Get the mode which has this inner mode and number of units. */
3272 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3273 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3274 && GET_MODE_INNER (mode) == innermode)
3275 return mode;
3276
3277 gcc_unreachable ();
3278 }
3279 }
3280
3281 return mode;
3282 }
3283
3284 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3285 this may not agree with the mode that the type system has chosen for the
3286 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3287 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3288
3289 static rtx
3290 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3291 unsigned int regno)
3292 {
3293 rtx tmp;
3294
3295 if (orig_mode != BLKmode)
3296 tmp = gen_rtx_REG (orig_mode, regno);
3297 else
3298 {
3299 tmp = gen_rtx_REG (mode, regno);
3300 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3301 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3302 }
3303
3304 return tmp;
3305 }
3306
3307 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3308 of this code is to classify each 8bytes of incoming argument by the register
3309 class and assign registers accordingly. */
3310
3311 /* Return the union class of CLASS1 and CLASS2.
3312 See the x86-64 PS ABI for details. */
3313
3314 static enum x86_64_reg_class
3315 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3316 {
3317 /* Rule #1: If both classes are equal, this is the resulting class. */
3318 if (class1 == class2)
3319 return class1;
3320
3321 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3322 the other class. */
3323 if (class1 == X86_64_NO_CLASS)
3324 return class2;
3325 if (class2 == X86_64_NO_CLASS)
3326 return class1;
3327
3328 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3329 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3330 return X86_64_MEMORY_CLASS;
3331
3332 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3333 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3334 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3335 return X86_64_INTEGERSI_CLASS;
3336 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3337 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3338 return X86_64_INTEGER_CLASS;
3339
3340 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3341 MEMORY is used. */
3342 if (class1 == X86_64_X87_CLASS
3343 || class1 == X86_64_X87UP_CLASS
3344 || class1 == X86_64_COMPLEX_X87_CLASS
3345 || class2 == X86_64_X87_CLASS
3346 || class2 == X86_64_X87UP_CLASS
3347 || class2 == X86_64_COMPLEX_X87_CLASS)
3348 return X86_64_MEMORY_CLASS;
3349
3350 /* Rule #6: Otherwise class SSE is used. */
3351 return X86_64_SSE_CLASS;
3352 }
3353
3354 /* Classify the argument of type TYPE and mode MODE.
3355 CLASSES will be filled by the register class used to pass each word
3356 of the operand. The number of words is returned. In case the parameter
3357 should be passed in memory, 0 is returned. As a special case for zero
3358 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3359
3360 BIT_OFFSET is used internally for handling records and specifies offset
3361 of the offset in bits modulo 256 to avoid overflow cases.
3362
3363 See the x86-64 PS ABI for details.
3364 */
3365
3366 static int
3367 classify_argument (enum machine_mode mode, tree type,
3368 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3369 {
3370 HOST_WIDE_INT bytes =
3371 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3372 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3373
3374 /* Variable sized entities are always passed/returned in memory. */
3375 if (bytes < 0)
3376 return 0;
3377
3378 if (mode != VOIDmode
3379 && targetm.calls.must_pass_in_stack (mode, type))
3380 return 0;
3381
3382 if (type && AGGREGATE_TYPE_P (type))
3383 {
3384 int i;
3385 tree field;
3386 enum x86_64_reg_class subclasses[MAX_CLASSES];
3387
3388 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3389 if (bytes > 16)
3390 return 0;
3391
3392 for (i = 0; i < words; i++)
3393 classes[i] = X86_64_NO_CLASS;
3394
3395 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3396 signalize memory class, so handle it as special case. */
3397 if (!words)
3398 {
3399 classes[0] = X86_64_NO_CLASS;
3400 return 1;
3401 }
3402
3403 /* Classify each field of record and merge classes. */
3404 switch (TREE_CODE (type))
3405 {
3406 case RECORD_TYPE:
3407 /* And now merge the fields of structure. */
3408 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3409 {
3410 if (TREE_CODE (field) == FIELD_DECL)
3411 {
3412 int num;
3413
3414 if (TREE_TYPE (field) == error_mark_node)
3415 continue;
3416
3417 /* Bitfields are always classified as integer. Handle them
3418 early, since later code would consider them to be
3419 misaligned integers. */
3420 if (DECL_BIT_FIELD (field))
3421 {
3422 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3423 i < ((int_bit_position (field) + (bit_offset % 64))
3424 + tree_low_cst (DECL_SIZE (field), 0)
3425 + 63) / 8 / 8; i++)
3426 classes[i] =
3427 merge_classes (X86_64_INTEGER_CLASS,
3428 classes[i]);
3429 }
3430 else
3431 {
3432 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3433 TREE_TYPE (field), subclasses,
3434 (int_bit_position (field)
3435 + bit_offset) % 256);
3436 if (!num)
3437 return 0;
3438 for (i = 0; i < num; i++)
3439 {
3440 int pos =
3441 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3442 classes[i + pos] =
3443 merge_classes (subclasses[i], classes[i + pos]);
3444 }
3445 }
3446 }
3447 }
3448 break;
3449
3450 case ARRAY_TYPE:
3451 /* Arrays are handled as small records. */
3452 {
3453 int num;
3454 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3455 TREE_TYPE (type), subclasses, bit_offset);
3456 if (!num)
3457 return 0;
3458
3459 /* The partial classes are now full classes. */
3460 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3461 subclasses[0] = X86_64_SSE_CLASS;
3462 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3463 subclasses[0] = X86_64_INTEGER_CLASS;
3464
3465 for (i = 0; i < words; i++)
3466 classes[i] = subclasses[i % num];
3467
3468 break;
3469 }
3470 case UNION_TYPE:
3471 case QUAL_UNION_TYPE:
3472 /* Unions are similar to RECORD_TYPE but offset is always 0.
3473 */
3474 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3475 {
3476 if (TREE_CODE (field) == FIELD_DECL)
3477 {
3478 int num;
3479
3480 if (TREE_TYPE (field) == error_mark_node)
3481 continue;
3482
3483 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3484 TREE_TYPE (field), subclasses,
3485 bit_offset);
3486 if (!num)
3487 return 0;
3488 for (i = 0; i < num; i++)
3489 classes[i] = merge_classes (subclasses[i], classes[i]);
3490 }
3491 }
3492 break;
3493
3494 default:
3495 gcc_unreachable ();
3496 }
3497
3498 /* Final merger cleanup. */
3499 for (i = 0; i < words; i++)
3500 {
3501 /* If one class is MEMORY, everything should be passed in
3502 memory. */
3503 if (classes[i] == X86_64_MEMORY_CLASS)
3504 return 0;
3505
3506 /* The X86_64_SSEUP_CLASS should be always preceded by
3507 X86_64_SSE_CLASS. */
3508 if (classes[i] == X86_64_SSEUP_CLASS
3509 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3510 classes[i] = X86_64_SSE_CLASS;
3511
3512 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3513 if (classes[i] == X86_64_X87UP_CLASS
3514 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3515 classes[i] = X86_64_SSE_CLASS;
3516 }
3517 return words;
3518 }
3519
3520 /* Compute alignment needed. We align all types to natural boundaries with
3521 exception of XFmode that is aligned to 64bits. */
3522 if (mode != VOIDmode && mode != BLKmode)
3523 {
3524 int mode_alignment = GET_MODE_BITSIZE (mode);
3525
3526 if (mode == XFmode)
3527 mode_alignment = 128;
3528 else if (mode == XCmode)
3529 mode_alignment = 256;
3530 if (COMPLEX_MODE_P (mode))
3531 mode_alignment /= 2;
3532 /* Misaligned fields are always returned in memory. */
3533 if (bit_offset % mode_alignment)
3534 return 0;
3535 }
3536
3537 /* for V1xx modes, just use the base mode */
3538 if (VECTOR_MODE_P (mode)
3539 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3540 mode = GET_MODE_INNER (mode);
3541
3542 /* Classification of atomic types. */
3543 switch (mode)
3544 {
3545 case SDmode:
3546 case DDmode:
3547 classes[0] = X86_64_SSE_CLASS;
3548 return 1;
3549 case TDmode:
3550 classes[0] = X86_64_SSE_CLASS;
3551 classes[1] = X86_64_SSEUP_CLASS;
3552 return 2;
3553 case DImode:
3554 case SImode:
3555 case HImode:
3556 case QImode:
3557 case CSImode:
3558 case CHImode:
3559 case CQImode:
3560 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3561 classes[0] = X86_64_INTEGERSI_CLASS;
3562 else
3563 classes[0] = X86_64_INTEGER_CLASS;
3564 return 1;
3565 case CDImode:
3566 case TImode:
3567 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3568 return 2;
3569 case CTImode:
3570 return 0;
3571 case SFmode:
3572 if (!(bit_offset % 64))
3573 classes[0] = X86_64_SSESF_CLASS;
3574 else
3575 classes[0] = X86_64_SSE_CLASS;
3576 return 1;
3577 case DFmode:
3578 classes[0] = X86_64_SSEDF_CLASS;
3579 return 1;
3580 case XFmode:
3581 classes[0] = X86_64_X87_CLASS;
3582 classes[1] = X86_64_X87UP_CLASS;
3583 return 2;
3584 case TFmode:
3585 classes[0] = X86_64_SSE_CLASS;
3586 classes[1] = X86_64_SSEUP_CLASS;
3587 return 2;
3588 case SCmode:
3589 classes[0] = X86_64_SSE_CLASS;
3590 return 1;
3591 case DCmode:
3592 classes[0] = X86_64_SSEDF_CLASS;
3593 classes[1] = X86_64_SSEDF_CLASS;
3594 return 2;
3595 case XCmode:
3596 classes[0] = X86_64_COMPLEX_X87_CLASS;
3597 return 1;
3598 case TCmode:
3599 /* This modes is larger than 16 bytes. */
3600 return 0;
3601 case V4SFmode:
3602 case V4SImode:
3603 case V16QImode:
3604 case V8HImode:
3605 case V2DFmode:
3606 case V2DImode:
3607 classes[0] = X86_64_SSE_CLASS;
3608 classes[1] = X86_64_SSEUP_CLASS;
3609 return 2;
3610 case V2SFmode:
3611 case V2SImode:
3612 case V4HImode:
3613 case V8QImode:
3614 classes[0] = X86_64_SSE_CLASS;
3615 return 1;
3616 case BLKmode:
3617 case VOIDmode:
3618 return 0;
3619 default:
3620 gcc_assert (VECTOR_MODE_P (mode));
3621
3622 if (bytes > 16)
3623 return 0;
3624
3625 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3626
3627 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3628 classes[0] = X86_64_INTEGERSI_CLASS;
3629 else
3630 classes[0] = X86_64_INTEGER_CLASS;
3631 classes[1] = X86_64_INTEGER_CLASS;
3632 return 1 + (bytes > 8);
3633 }
3634 }
3635
3636 /* Examine the argument and return set number of register required in each
3637 class. Return 0 iff parameter should be passed in memory. */
3638 static int
3639 examine_argument (enum machine_mode mode, tree type, int in_return,
3640 int *int_nregs, int *sse_nregs)
3641 {
3642 enum x86_64_reg_class class[MAX_CLASSES];
3643 int n = classify_argument (mode, type, class, 0);
3644
3645 *int_nregs = 0;
3646 *sse_nregs = 0;
3647 if (!n)
3648 return 0;
3649 for (n--; n >= 0; n--)
3650 switch (class[n])
3651 {
3652 case X86_64_INTEGER_CLASS:
3653 case X86_64_INTEGERSI_CLASS:
3654 (*int_nregs)++;
3655 break;
3656 case X86_64_SSE_CLASS:
3657 case X86_64_SSESF_CLASS:
3658 case X86_64_SSEDF_CLASS:
3659 (*sse_nregs)++;
3660 break;
3661 case X86_64_NO_CLASS:
3662 case X86_64_SSEUP_CLASS:
3663 break;
3664 case X86_64_X87_CLASS:
3665 case X86_64_X87UP_CLASS:
3666 if (!in_return)
3667 return 0;
3668 break;
3669 case X86_64_COMPLEX_X87_CLASS:
3670 return in_return ? 2 : 0;
3671 case X86_64_MEMORY_CLASS:
3672 gcc_unreachable ();
3673 }
3674 return 1;
3675 }
3676
3677 /* Construct container for the argument used by GCC interface. See
3678 FUNCTION_ARG for the detailed description. */
3679
3680 static rtx
3681 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3682 tree type, int in_return, int nintregs, int nsseregs,
3683 const int *intreg, int sse_regno)
3684 {
3685 /* The following variables hold the static issued_error state. */
3686 static bool issued_sse_arg_error;
3687 static bool issued_sse_ret_error;
3688 static bool issued_x87_ret_error;
3689
3690 enum machine_mode tmpmode;
3691 int bytes =
3692 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3693 enum x86_64_reg_class class[MAX_CLASSES];
3694 int n;
3695 int i;
3696 int nexps = 0;
3697 int needed_sseregs, needed_intregs;
3698 rtx exp[MAX_CLASSES];
3699 rtx ret;
3700
3701 n = classify_argument (mode, type, class, 0);
3702 if (TARGET_DEBUG_ARG)
3703 {
3704 if (!n)
3705 fprintf (stderr, "Memory class\n");
3706 else
3707 {
3708 fprintf (stderr, "Classes:");
3709 for (i = 0; i < n; i++)
3710 {
3711 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3712 }
3713 fprintf (stderr, "\n");
3714 }
3715 }
3716 if (!n)
3717 return NULL;
3718 if (!examine_argument (mode, type, in_return, &needed_intregs,
3719 &needed_sseregs))
3720 return NULL;
3721 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3722 return NULL;
3723
3724 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3725 some less clueful developer tries to use floating-point anyway. */
3726 if (needed_sseregs && !TARGET_SSE)
3727 {
3728 if (in_return)
3729 {
3730 if (!issued_sse_ret_error)
3731 {
3732 error ("SSE register return with SSE disabled");
3733 issued_sse_ret_error = true;
3734 }
3735 }
3736 else if (!issued_sse_arg_error)
3737 {
3738 error ("SSE register argument with SSE disabled");
3739 issued_sse_arg_error = true;
3740 }
3741 return NULL;
3742 }
3743
3744 /* Likewise, error if the ABI requires us to return values in the
3745 x87 registers and the user specified -mno-80387. */
3746 if (!TARGET_80387 && in_return)
3747 for (i = 0; i < n; i++)
3748 if (class[i] == X86_64_X87_CLASS
3749 || class[i] == X86_64_X87UP_CLASS
3750 || class[i] == X86_64_COMPLEX_X87_CLASS)
3751 {
3752 if (!issued_x87_ret_error)
3753 {
3754 error ("x87 register return with x87 disabled");
3755 issued_x87_ret_error = true;
3756 }
3757 return NULL;
3758 }
3759
3760 /* First construct simple cases. Avoid SCmode, since we want to use
3761 single register to pass this type. */
3762 if (n == 1 && mode != SCmode)
3763 switch (class[0])
3764 {
3765 case X86_64_INTEGER_CLASS:
3766 case X86_64_INTEGERSI_CLASS:
3767 return gen_rtx_REG (mode, intreg[0]);
3768 case X86_64_SSE_CLASS:
3769 case X86_64_SSESF_CLASS:
3770 case X86_64_SSEDF_CLASS:
3771 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3772 case X86_64_X87_CLASS:
3773 case X86_64_COMPLEX_X87_CLASS:
3774 return gen_rtx_REG (mode, FIRST_STACK_REG);
3775 case X86_64_NO_CLASS:
3776 /* Zero sized array, struct or class. */
3777 return NULL;
3778 default:
3779 gcc_unreachable ();
3780 }
3781 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3782 && mode != BLKmode)
3783 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3784 if (n == 2
3785 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3786 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3787 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3788 && class[1] == X86_64_INTEGER_CLASS
3789 && (mode == CDImode || mode == TImode || mode == TFmode)
3790 && intreg[0] + 1 == intreg[1])
3791 return gen_rtx_REG (mode, intreg[0]);
3792
3793 /* Otherwise figure out the entries of the PARALLEL. */
3794 for (i = 0; i < n; i++)
3795 {
3796 switch (class[i])
3797 {
3798 case X86_64_NO_CLASS:
3799 break;
3800 case X86_64_INTEGER_CLASS:
3801 case X86_64_INTEGERSI_CLASS:
3802 /* Merge TImodes on aligned occasions here too. */
3803 if (i * 8 + 8 > bytes)
3804 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3805 else if (class[i] == X86_64_INTEGERSI_CLASS)
3806 tmpmode = SImode;
3807 else
3808 tmpmode = DImode;
3809 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3810 if (tmpmode == BLKmode)
3811 tmpmode = DImode;
3812 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3813 gen_rtx_REG (tmpmode, *intreg),
3814 GEN_INT (i*8));
3815 intreg++;
3816 break;
3817 case X86_64_SSESF_CLASS:
3818 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3819 gen_rtx_REG (SFmode,
3820 SSE_REGNO (sse_regno)),
3821 GEN_INT (i*8));
3822 sse_regno++;
3823 break;
3824 case X86_64_SSEDF_CLASS:
3825 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3826 gen_rtx_REG (DFmode,
3827 SSE_REGNO (sse_regno)),
3828 GEN_INT (i*8));
3829 sse_regno++;
3830 break;
3831 case X86_64_SSE_CLASS:
3832 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3833 tmpmode = TImode;
3834 else
3835 tmpmode = DImode;
3836 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3837 gen_rtx_REG (tmpmode,
3838 SSE_REGNO (sse_regno)),
3839 GEN_INT (i*8));
3840 if (tmpmode == TImode)
3841 i++;
3842 sse_regno++;
3843 break;
3844 default:
3845 gcc_unreachable ();
3846 }
3847 }
3848
3849 /* Empty aligned struct, union or class. */
3850 if (nexps == 0)
3851 return NULL;
3852
3853 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3854 for (i = 0; i < nexps; i++)
3855 XVECEXP (ret, 0, i) = exp [i];
3856 return ret;
3857 }
3858
3859 /* Update the data in CUM to advance over an argument
3860 of mode MODE and data type TYPE.
3861 (TYPE is null for libcalls where that information may not be available.) */
3862
3863 void
3864 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3865 tree type, int named)
3866 {
3867 int bytes =
3868 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3869 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3870
3871 if (type)
3872 mode = type_natural_mode (type);
3873
3874 if (TARGET_DEBUG_ARG)
3875 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3876 "mode=%s, named=%d)\n\n",
3877 words, cum->words, cum->nregs, cum->sse_nregs,
3878 GET_MODE_NAME (mode), named);
3879
3880 if (TARGET_64BIT)
3881 {
3882 int int_nregs, sse_nregs;
3883 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3884 cum->words += words;
3885 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3886 {
3887 cum->nregs -= int_nregs;
3888 cum->sse_nregs -= sse_nregs;
3889 cum->regno += int_nregs;
3890 cum->sse_regno += sse_nregs;
3891 }
3892 else
3893 cum->words += words;
3894 }
3895 else
3896 {
3897 switch (mode)
3898 {
3899 default:
3900 break;
3901
3902 case BLKmode:
3903 if (bytes < 0)
3904 break;
3905 /* FALLTHRU */
3906
3907 case DImode:
3908 case SImode:
3909 case HImode:
3910 case QImode:
3911 cum->words += words;
3912 cum->nregs -= words;
3913 cum->regno += words;
3914
3915 if (cum->nregs <= 0)
3916 {
3917 cum->nregs = 0;
3918 cum->regno = 0;
3919 }
3920 break;
3921
3922 case DFmode:
3923 if (cum->float_in_sse < 2)
3924 break;
3925 case SFmode:
3926 if (cum->float_in_sse < 1)
3927 break;
3928 /* FALLTHRU */
3929
3930 case TImode:
3931 case V16QImode:
3932 case V8HImode:
3933 case V4SImode:
3934 case V2DImode:
3935 case V4SFmode:
3936 case V2DFmode:
3937 if (!type || !AGGREGATE_TYPE_P (type))
3938 {
3939 cum->sse_words += words;
3940 cum->sse_nregs -= 1;
3941 cum->sse_regno += 1;
3942 if (cum->sse_nregs <= 0)
3943 {
3944 cum->sse_nregs = 0;
3945 cum->sse_regno = 0;
3946 }
3947 }
3948 break;
3949
3950 case V8QImode:
3951 case V4HImode:
3952 case V2SImode:
3953 case V2SFmode:
3954 if (!type || !AGGREGATE_TYPE_P (type))
3955 {
3956 cum->mmx_words += words;
3957 cum->mmx_nregs -= 1;
3958 cum->mmx_regno += 1;
3959 if (cum->mmx_nregs <= 0)
3960 {
3961 cum->mmx_nregs = 0;
3962 cum->mmx_regno = 0;
3963 }
3964 }
3965 break;
3966 }
3967 }
3968 }
3969
3970 /* Define where to put the arguments to a function.
3971 Value is zero to push the argument on the stack,
3972 or a hard register in which to store the argument.
3973
3974 MODE is the argument's machine mode.
3975 TYPE is the data type of the argument (as a tree).
3976 This is null for libcalls where that information may
3977 not be available.
3978 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3979 the preceding args and about the function being called.
3980 NAMED is nonzero if this argument is a named parameter
3981 (otherwise it is an extra parameter matching an ellipsis). */
3982
3983 rtx
3984 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3985 tree type, int named)
3986 {
3987 enum machine_mode mode = orig_mode;
3988 rtx ret = NULL_RTX;
3989 int bytes =
3990 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3991 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3992 static bool warnedsse, warnedmmx;
3993
3994 /* To simplify the code below, represent vector types with a vector mode
3995 even if MMX/SSE are not active. */
3996 if (type && TREE_CODE (type) == VECTOR_TYPE)
3997 mode = type_natural_mode (type);
3998
3999 /* Handle a hidden AL argument containing number of registers for varargs
4000 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4001 any AL settings. */
4002 if (mode == VOIDmode)
4003 {
4004 if (TARGET_64BIT)
4005 return GEN_INT (cum->maybe_vaarg
4006 ? (cum->sse_nregs < 0
4007 ? SSE_REGPARM_MAX
4008 : cum->sse_regno)
4009 : -1);
4010 else
4011 return constm1_rtx;
4012 }
4013 if (TARGET_64BIT)
4014 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4015 cum->sse_nregs,
4016 &x86_64_int_parameter_registers [cum->regno],
4017 cum->sse_regno);
4018 else
4019 switch (mode)
4020 {
4021 /* For now, pass fp/complex values on the stack. */
4022 default:
4023 break;
4024
4025 case BLKmode:
4026 if (bytes < 0)
4027 break;
4028 /* FALLTHRU */
4029 case DImode:
4030 case SImode:
4031 case HImode:
4032 case QImode:
4033 if (words <= cum->nregs)
4034 {
4035 int regno = cum->regno;
4036
4037 /* Fastcall allocates the first two DWORD (SImode) or
4038 smaller arguments to ECX and EDX. */
4039 if (cum->fastcall)
4040 {
4041 if (mode == BLKmode || mode == DImode)
4042 break;
4043
4044 /* ECX not EAX is the first allocated register. */
4045 if (regno == 0)
4046 regno = 2;
4047 }
4048 ret = gen_rtx_REG (mode, regno);
4049 }
4050 break;
4051 case DFmode:
4052 if (cum->float_in_sse < 2)
4053 break;
4054 case SFmode:
4055 if (cum->float_in_sse < 1)
4056 break;
4057 /* FALLTHRU */
4058 case TImode:
4059 case V16QImode:
4060 case V8HImode:
4061 case V4SImode:
4062 case V2DImode:
4063 case V4SFmode:
4064 case V2DFmode:
4065 if (!type || !AGGREGATE_TYPE_P (type))
4066 {
4067 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4068 {
4069 warnedsse = true;
4070 warning (0, "SSE vector argument without SSE enabled "
4071 "changes the ABI");
4072 }
4073 if (cum->sse_nregs)
4074 ret = gen_reg_or_parallel (mode, orig_mode,
4075 cum->sse_regno + FIRST_SSE_REG);
4076 }
4077 break;
4078 case V8QImode:
4079 case V4HImode:
4080 case V2SImode:
4081 case V2SFmode:
4082 if (!type || !AGGREGATE_TYPE_P (type))
4083 {
4084 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4085 {
4086 warnedmmx = true;
4087 warning (0, "MMX vector argument without MMX enabled "
4088 "changes the ABI");
4089 }
4090 if (cum->mmx_nregs)
4091 ret = gen_reg_or_parallel (mode, orig_mode,
4092 cum->mmx_regno + FIRST_MMX_REG);
4093 }
4094 break;
4095 }
4096
4097 if (TARGET_DEBUG_ARG)
4098 {
4099 fprintf (stderr,
4100 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4101 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4102
4103 if (ret)
4104 print_simple_rtl (stderr, ret);
4105 else
4106 fprintf (stderr, ", stack");
4107
4108 fprintf (stderr, " )\n");
4109 }
4110
4111 return ret;
4112 }
4113
4114 /* A C expression that indicates when an argument must be passed by
4115 reference. If nonzero for an argument, a copy of that argument is
4116 made in memory and a pointer to the argument is passed instead of
4117 the argument itself. The pointer is passed in whatever way is
4118 appropriate for passing a pointer to that type. */
4119
4120 static bool
4121 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4122 enum machine_mode mode ATTRIBUTE_UNUSED,
4123 tree type, bool named ATTRIBUTE_UNUSED)
4124 {
4125 if (!TARGET_64BIT)
4126 return 0;
4127
4128 if (type && int_size_in_bytes (type) == -1)
4129 {
4130 if (TARGET_DEBUG_ARG)
4131 fprintf (stderr, "function_arg_pass_by_reference\n");
4132 return 1;
4133 }
4134
4135 return 0;
4136 }
4137
4138 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4139 ABI. Only called if TARGET_SSE. */
4140 static bool
4141 contains_128bit_aligned_vector_p (tree type)
4142 {
4143 enum machine_mode mode = TYPE_MODE (type);
4144 if (SSE_REG_MODE_P (mode)
4145 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4146 return true;
4147 if (TYPE_ALIGN (type) < 128)
4148 return false;
4149
4150 if (AGGREGATE_TYPE_P (type))
4151 {
4152 /* Walk the aggregates recursively. */
4153 switch (TREE_CODE (type))
4154 {
4155 case RECORD_TYPE:
4156 case UNION_TYPE:
4157 case QUAL_UNION_TYPE:
4158 {
4159 tree field;
4160
4161 /* Walk all the structure fields. */
4162 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4163 {
4164 if (TREE_CODE (field) == FIELD_DECL
4165 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4166 return true;
4167 }
4168 break;
4169 }
4170
4171 case ARRAY_TYPE:
4172 /* Just for use if some languages passes arrays by value. */
4173 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4174 return true;
4175 break;
4176
4177 default:
4178 gcc_unreachable ();
4179 }
4180 }
4181 return false;
4182 }
4183
4184 /* Gives the alignment boundary, in bits, of an argument with the
4185 specified mode and type. */
4186
4187 int
4188 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4189 {
4190 int align;
4191 if (type)
4192 align = TYPE_ALIGN (type);
4193 else
4194 align = GET_MODE_ALIGNMENT (mode);
4195 if (align < PARM_BOUNDARY)
4196 align = PARM_BOUNDARY;
4197 if (!TARGET_64BIT)
4198 {
4199 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4200 make an exception for SSE modes since these require 128bit
4201 alignment.
4202
4203 The handling here differs from field_alignment. ICC aligns MMX
4204 arguments to 4 byte boundaries, while structure fields are aligned
4205 to 8 byte boundaries. */
4206 if (!TARGET_SSE)
4207 align = PARM_BOUNDARY;
4208 else if (!type)
4209 {
4210 if (!SSE_REG_MODE_P (mode))
4211 align = PARM_BOUNDARY;
4212 }
4213 else
4214 {
4215 if (!contains_128bit_aligned_vector_p (type))
4216 align = PARM_BOUNDARY;
4217 }
4218 }
4219 if (align > 128)
4220 align = 128;
4221 return align;
4222 }
4223
4224 /* Return true if N is a possible register number of function value. */
4225 bool
4226 ix86_function_value_regno_p (int regno)
4227 {
4228 if (TARGET_MACHO)
4229 {
4230 if (!TARGET_64BIT)
4231 {
4232 return ((regno) == 0
4233 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4234 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4235 }
4236 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4237 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4238 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4239 }
4240 else
4241 {
4242 if (regno == 0
4243 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4244 || (regno == FIRST_SSE_REG && TARGET_SSE))
4245 return true;
4246
4247 if (!TARGET_64BIT
4248 && (regno == FIRST_MMX_REG && TARGET_MMX))
4249 return true;
4250
4251 return false;
4252 }
4253 }
4254
4255 /* Define how to find the value returned by a function.
4256 VALTYPE is the data type of the value (as a tree).
4257 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4258 otherwise, FUNC is 0. */
4259 rtx
4260 ix86_function_value (tree valtype, tree fntype_or_decl,
4261 bool outgoing ATTRIBUTE_UNUSED)
4262 {
4263 enum machine_mode natmode = type_natural_mode (valtype);
4264
4265 if (TARGET_64BIT)
4266 {
4267 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4268 1, REGPARM_MAX, SSE_REGPARM_MAX,
4269 x86_64_int_return_registers, 0);
4270 /* For zero sized structures, construct_container return NULL, but we
4271 need to keep rest of compiler happy by returning meaningful value. */
4272 if (!ret)
4273 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4274 return ret;
4275 }
4276 else
4277 {
4278 tree fn = NULL_TREE, fntype;
4279 if (fntype_or_decl
4280 && DECL_P (fntype_or_decl))
4281 fn = fntype_or_decl;
4282 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4283 return gen_rtx_REG (TYPE_MODE (valtype),
4284 ix86_value_regno (natmode, fn, fntype));
4285 }
4286 }
4287
4288 /* Return true iff type is returned in memory. */
4289 int
4290 ix86_return_in_memory (tree type)
4291 {
4292 int needed_intregs, needed_sseregs, size;
4293 enum machine_mode mode = type_natural_mode (type);
4294
4295 if (TARGET_64BIT)
4296 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4297
4298 if (mode == BLKmode)
4299 return 1;
4300
4301 size = int_size_in_bytes (type);
4302
4303 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4304 return 0;
4305
4306 if (VECTOR_MODE_P (mode) || mode == TImode)
4307 {
4308 /* User-created vectors small enough to fit in EAX. */
4309 if (size < 8)
4310 return 0;
4311
4312 /* MMX/3dNow values are returned in MM0,
4313 except when it doesn't exits. */
4314 if (size == 8)
4315 return (TARGET_MMX ? 0 : 1);
4316
4317 /* SSE values are returned in XMM0, except when it doesn't exist. */
4318 if (size == 16)
4319 return (TARGET_SSE ? 0 : 1);
4320 }
4321
4322 if (mode == XFmode)
4323 return 0;
4324
4325 if (mode == TDmode)
4326 return 1;
4327
4328 if (size > 12)
4329 return 1;
4330 return 0;
4331 }
4332
4333 /* When returning SSE vector types, we have a choice of either
4334 (1) being abi incompatible with a -march switch, or
4335 (2) generating an error.
4336 Given no good solution, I think the safest thing is one warning.
4337 The user won't be able to use -Werror, but....
4338
4339 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4340 called in response to actually generating a caller or callee that
4341 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4342 via aggregate_value_p for general type probing from tree-ssa. */
4343
4344 static rtx
4345 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4346 {
4347 static bool warnedsse, warnedmmx;
4348
4349 if (type)
4350 {
4351 /* Look at the return type of the function, not the function type. */
4352 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4353
4354 if (!TARGET_SSE && !warnedsse)
4355 {
4356 if (mode == TImode
4357 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4358 {
4359 warnedsse = true;
4360 warning (0, "SSE vector return without SSE enabled "
4361 "changes the ABI");
4362 }
4363 }
4364
4365 if (!TARGET_MMX && !warnedmmx)
4366 {
4367 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4368 {
4369 warnedmmx = true;
4370 warning (0, "MMX vector return without MMX enabled "
4371 "changes the ABI");
4372 }
4373 }
4374 }
4375
4376 return NULL;
4377 }
4378
4379 /* Define how to find the value returned by a library function
4380 assuming the value has mode MODE. */
4381 rtx
4382 ix86_libcall_value (enum machine_mode mode)
4383 {
4384 if (TARGET_64BIT)
4385 {
4386 switch (mode)
4387 {
4388 case SFmode:
4389 case SCmode:
4390 case DFmode:
4391 case DCmode:
4392 case TFmode:
4393 case SDmode:
4394 case DDmode:
4395 case TDmode:
4396 return gen_rtx_REG (mode, FIRST_SSE_REG);
4397 case XFmode:
4398 case XCmode:
4399 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4400 case TCmode:
4401 return NULL;
4402 default:
4403 return gen_rtx_REG (mode, 0);
4404 }
4405 }
4406 else
4407 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4408 }
4409
4410 /* Given a mode, return the register to use for a return value. */
4411
4412 static int
4413 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4414 {
4415 gcc_assert (!TARGET_64BIT);
4416
4417 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4418 we normally prevent this case when mmx is not available. However
4419 some ABIs may require the result to be returned like DImode. */
4420 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4421 return TARGET_MMX ? FIRST_MMX_REG : 0;
4422
4423 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4424 we prevent this case when sse is not available. However some ABIs
4425 may require the result to be returned like integer TImode. */
4426 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4427 return TARGET_SSE ? FIRST_SSE_REG : 0;
4428
4429 /* Decimal floating point values can go in %eax, unlike other float modes. */
4430 if (DECIMAL_FLOAT_MODE_P (mode))
4431 return 0;
4432
4433 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4434 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4435 return 0;
4436
4437 /* Floating point return values in %st(0), except for local functions when
4438 SSE math is enabled or for functions with sseregparm attribute. */
4439 if ((func || fntype)
4440 && (mode == SFmode || mode == DFmode))
4441 {
4442 int sse_level = ix86_function_sseregparm (fntype, func);
4443 if ((sse_level >= 1 && mode == SFmode)
4444 || (sse_level == 2 && mode == DFmode))
4445 return FIRST_SSE_REG;
4446 }
4447
4448 return FIRST_FLOAT_REG;
4449 }
4450 \f
4451 /* Create the va_list data type. */
4452
4453 static tree
4454 ix86_build_builtin_va_list (void)
4455 {
4456 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4457
4458 /* For i386 we use plain pointer to argument area. */
4459 if (!TARGET_64BIT)
4460 return build_pointer_type (char_type_node);
4461
4462 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4463 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4464
4465 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4466 unsigned_type_node);
4467 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4468 unsigned_type_node);
4469 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4470 ptr_type_node);
4471 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4472 ptr_type_node);
4473
4474 va_list_gpr_counter_field = f_gpr;
4475 va_list_fpr_counter_field = f_fpr;
4476
4477 DECL_FIELD_CONTEXT (f_gpr) = record;
4478 DECL_FIELD_CONTEXT (f_fpr) = record;
4479 DECL_FIELD_CONTEXT (f_ovf) = record;
4480 DECL_FIELD_CONTEXT (f_sav) = record;
4481
4482 TREE_CHAIN (record) = type_decl;
4483 TYPE_NAME (record) = type_decl;
4484 TYPE_FIELDS (record) = f_gpr;
4485 TREE_CHAIN (f_gpr) = f_fpr;
4486 TREE_CHAIN (f_fpr) = f_ovf;
4487 TREE_CHAIN (f_ovf) = f_sav;
4488
4489 layout_type (record);
4490
4491 /* The correct type is an array type of one element. */
4492 return build_array_type (record, build_index_type (size_zero_node));
4493 }
4494
4495 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4496
4497 static void
4498 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4499 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4500 int no_rtl)
4501 {
4502 CUMULATIVE_ARGS next_cum;
4503 rtx save_area = NULL_RTX, mem;
4504 rtx label;
4505 rtx label_ref;
4506 rtx tmp_reg;
4507 rtx nsse_reg;
4508 int set;
4509 tree fntype;
4510 int stdarg_p;
4511 int i;
4512
4513 if (!TARGET_64BIT)
4514 return;
4515
4516 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4517 return;
4518
4519 /* Indicate to allocate space on the stack for varargs save area. */
4520 ix86_save_varrargs_registers = 1;
4521
4522 cfun->stack_alignment_needed = 128;
4523
4524 fntype = TREE_TYPE (current_function_decl);
4525 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4526 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4527 != void_type_node));
4528
4529 /* For varargs, we do not want to skip the dummy va_dcl argument.
4530 For stdargs, we do want to skip the last named argument. */
4531 next_cum = *cum;
4532 if (stdarg_p)
4533 function_arg_advance (&next_cum, mode, type, 1);
4534
4535 if (!no_rtl)
4536 save_area = frame_pointer_rtx;
4537
4538 set = get_varargs_alias_set ();
4539
4540 for (i = next_cum.regno;
4541 i < ix86_regparm
4542 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4543 i++)
4544 {
4545 mem = gen_rtx_MEM (Pmode,
4546 plus_constant (save_area, i * UNITS_PER_WORD));
4547 MEM_NOTRAP_P (mem) = 1;
4548 set_mem_alias_set (mem, set);
4549 emit_move_insn (mem, gen_rtx_REG (Pmode,
4550 x86_64_int_parameter_registers[i]));
4551 }
4552
4553 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4554 {
4555 /* Now emit code to save SSE registers. The AX parameter contains number
4556 of SSE parameter registers used to call this function. We use
4557 sse_prologue_save insn template that produces computed jump across
4558 SSE saves. We need some preparation work to get this working. */
4559
4560 label = gen_label_rtx ();
4561 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4562
4563 /* Compute address to jump to :
4564 label - 5*eax + nnamed_sse_arguments*5 */
4565 tmp_reg = gen_reg_rtx (Pmode);
4566 nsse_reg = gen_reg_rtx (Pmode);
4567 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4568 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4569 gen_rtx_MULT (Pmode, nsse_reg,
4570 GEN_INT (4))));
4571 if (next_cum.sse_regno)
4572 emit_move_insn
4573 (nsse_reg,
4574 gen_rtx_CONST (DImode,
4575 gen_rtx_PLUS (DImode,
4576 label_ref,
4577 GEN_INT (next_cum.sse_regno * 4))));
4578 else
4579 emit_move_insn (nsse_reg, label_ref);
4580 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4581
4582 /* Compute address of memory block we save into. We always use pointer
4583 pointing 127 bytes after first byte to store - this is needed to keep
4584 instruction size limited by 4 bytes. */
4585 tmp_reg = gen_reg_rtx (Pmode);
4586 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4587 plus_constant (save_area,
4588 8 * REGPARM_MAX + 127)));
4589 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4590 MEM_NOTRAP_P (mem) = 1;
4591 set_mem_alias_set (mem, set);
4592 set_mem_align (mem, BITS_PER_WORD);
4593
4594 /* And finally do the dirty job! */
4595 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4596 GEN_INT (next_cum.sse_regno), label));
4597 }
4598
4599 }
4600
4601 /* Implement va_start. */
4602
4603 void
4604 ix86_va_start (tree valist, rtx nextarg)
4605 {
4606 HOST_WIDE_INT words, n_gpr, n_fpr;
4607 tree f_gpr, f_fpr, f_ovf, f_sav;
4608 tree gpr, fpr, ovf, sav, t;
4609 tree type;
4610
4611 /* Only 64bit target needs something special. */
4612 if (!TARGET_64BIT)
4613 {
4614 std_expand_builtin_va_start (valist, nextarg);
4615 return;
4616 }
4617
4618 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4619 f_fpr = TREE_CHAIN (f_gpr);
4620 f_ovf = TREE_CHAIN (f_fpr);
4621 f_sav = TREE_CHAIN (f_ovf);
4622
4623 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4624 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4625 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4626 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4627 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4628
4629 /* Count number of gp and fp argument registers used. */
4630 words = current_function_args_info.words;
4631 n_gpr = current_function_args_info.regno;
4632 n_fpr = current_function_args_info.sse_regno;
4633
4634 if (TARGET_DEBUG_ARG)
4635 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4636 (int) words, (int) n_gpr, (int) n_fpr);
4637
4638 if (cfun->va_list_gpr_size)
4639 {
4640 type = TREE_TYPE (gpr);
4641 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4642 build_int_cst (type, n_gpr * 8));
4643 TREE_SIDE_EFFECTS (t) = 1;
4644 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4645 }
4646
4647 if (cfun->va_list_fpr_size)
4648 {
4649 type = TREE_TYPE (fpr);
4650 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4651 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4652 TREE_SIDE_EFFECTS (t) = 1;
4653 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4654 }
4655
4656 /* Find the overflow area. */
4657 type = TREE_TYPE (ovf);
4658 t = make_tree (type, virtual_incoming_args_rtx);
4659 if (words != 0)
4660 t = build2 (PLUS_EXPR, type, t,
4661 build_int_cst (type, words * UNITS_PER_WORD));
4662 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4663 TREE_SIDE_EFFECTS (t) = 1;
4664 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4665
4666 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4667 {
4668 /* Find the register save area.
4669 Prologue of the function save it right above stack frame. */
4670 type = TREE_TYPE (sav);
4671 t = make_tree (type, frame_pointer_rtx);
4672 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4673 TREE_SIDE_EFFECTS (t) = 1;
4674 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4675 }
4676 }
4677
4678 /* Implement va_arg. */
4679
4680 tree
4681 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4682 {
4683 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4684 tree f_gpr, f_fpr, f_ovf, f_sav;
4685 tree gpr, fpr, ovf, sav, t;
4686 int size, rsize;
4687 tree lab_false, lab_over = NULL_TREE;
4688 tree addr, t2;
4689 rtx container;
4690 int indirect_p = 0;
4691 tree ptrtype;
4692 enum machine_mode nat_mode;
4693
4694 /* Only 64bit target needs something special. */
4695 if (!TARGET_64BIT)
4696 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4697
4698 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4699 f_fpr = TREE_CHAIN (f_gpr);
4700 f_ovf = TREE_CHAIN (f_fpr);
4701 f_sav = TREE_CHAIN (f_ovf);
4702
4703 valist = build_va_arg_indirect_ref (valist);
4704 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4705 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4706 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4707 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4708
4709 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4710 if (indirect_p)
4711 type = build_pointer_type (type);
4712 size = int_size_in_bytes (type);
4713 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4714
4715 nat_mode = type_natural_mode (type);
4716 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4717 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4718
4719 /* Pull the value out of the saved registers. */
4720
4721 addr = create_tmp_var (ptr_type_node, "addr");
4722 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4723
4724 if (container)
4725 {
4726 int needed_intregs, needed_sseregs;
4727 bool need_temp;
4728 tree int_addr, sse_addr;
4729
4730 lab_false = create_artificial_label ();
4731 lab_over = create_artificial_label ();
4732
4733 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4734
4735 need_temp = (!REG_P (container)
4736 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4737 || TYPE_ALIGN (type) > 128));
4738
4739 /* In case we are passing structure, verify that it is consecutive block
4740 on the register save area. If not we need to do moves. */
4741 if (!need_temp && !REG_P (container))
4742 {
4743 /* Verify that all registers are strictly consecutive */
4744 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4745 {
4746 int i;
4747
4748 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4749 {
4750 rtx slot = XVECEXP (container, 0, i);
4751 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4752 || INTVAL (XEXP (slot, 1)) != i * 16)
4753 need_temp = 1;
4754 }
4755 }
4756 else
4757 {
4758 int i;
4759
4760 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4761 {
4762 rtx slot = XVECEXP (container, 0, i);
4763 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4764 || INTVAL (XEXP (slot, 1)) != i * 8)
4765 need_temp = 1;
4766 }
4767 }
4768 }
4769 if (!need_temp)
4770 {
4771 int_addr = addr;
4772 sse_addr = addr;
4773 }
4774 else
4775 {
4776 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4777 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4778 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4779 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4780 }
4781
4782 /* First ensure that we fit completely in registers. */
4783 if (needed_intregs)
4784 {
4785 t = build_int_cst (TREE_TYPE (gpr),
4786 (REGPARM_MAX - needed_intregs + 1) * 8);
4787 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4788 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4789 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4790 gimplify_and_add (t, pre_p);
4791 }
4792 if (needed_sseregs)
4793 {
4794 t = build_int_cst (TREE_TYPE (fpr),
4795 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4796 + REGPARM_MAX * 8);
4797 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4798 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4799 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4800 gimplify_and_add (t, pre_p);
4801 }
4802
4803 /* Compute index to start of area used for integer regs. */
4804 if (needed_intregs)
4805 {
4806 /* int_addr = gpr + sav; */
4807 t = fold_convert (ptr_type_node, gpr);
4808 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4809 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4810 gimplify_and_add (t, pre_p);
4811 }
4812 if (needed_sseregs)
4813 {
4814 /* sse_addr = fpr + sav; */
4815 t = fold_convert (ptr_type_node, fpr);
4816 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4817 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4818 gimplify_and_add (t, pre_p);
4819 }
4820 if (need_temp)
4821 {
4822 int i;
4823 tree temp = create_tmp_var (type, "va_arg_tmp");
4824
4825 /* addr = &temp; */
4826 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4827 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4828 gimplify_and_add (t, pre_p);
4829
4830 for (i = 0; i < XVECLEN (container, 0); i++)
4831 {
4832 rtx slot = XVECEXP (container, 0, i);
4833 rtx reg = XEXP (slot, 0);
4834 enum machine_mode mode = GET_MODE (reg);
4835 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4836 tree addr_type = build_pointer_type (piece_type);
4837 tree src_addr, src;
4838 int src_offset;
4839 tree dest_addr, dest;
4840
4841 if (SSE_REGNO_P (REGNO (reg)))
4842 {
4843 src_addr = sse_addr;
4844 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4845 }
4846 else
4847 {
4848 src_addr = int_addr;
4849 src_offset = REGNO (reg) * 8;
4850 }
4851 src_addr = fold_convert (addr_type, src_addr);
4852 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4853 size_int (src_offset)));
4854 src = build_va_arg_indirect_ref (src_addr);
4855
4856 dest_addr = fold_convert (addr_type, addr);
4857 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4858 size_int (INTVAL (XEXP (slot, 1)))));
4859 dest = build_va_arg_indirect_ref (dest_addr);
4860
4861 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4862 gimplify_and_add (t, pre_p);
4863 }
4864 }
4865
4866 if (needed_intregs)
4867 {
4868 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4869 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4870 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4871 gimplify_and_add (t, pre_p);
4872 }
4873 if (needed_sseregs)
4874 {
4875 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4876 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4877 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4878 gimplify_and_add (t, pre_p);
4879 }
4880
4881 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4882 gimplify_and_add (t, pre_p);
4883
4884 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4885 append_to_statement_list (t, pre_p);
4886 }
4887
4888 /* ... otherwise out of the overflow area. */
4889
4890 /* Care for on-stack alignment if needed. */
4891 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4892 || integer_zerop (TYPE_SIZE (type)))
4893 t = ovf;
4894 else
4895 {
4896 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4897 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4898 build_int_cst (TREE_TYPE (ovf), align - 1));
4899 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4900 build_int_cst (TREE_TYPE (t), -align));
4901 }
4902 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4903
4904 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4905 gimplify_and_add (t2, pre_p);
4906
4907 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4908 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4909 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4910 gimplify_and_add (t, pre_p);
4911
4912 if (container)
4913 {
4914 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4915 append_to_statement_list (t, pre_p);
4916 }
4917
4918 ptrtype = build_pointer_type (type);
4919 addr = fold_convert (ptrtype, addr);
4920
4921 if (indirect_p)
4922 addr = build_va_arg_indirect_ref (addr);
4923 return build_va_arg_indirect_ref (addr);
4924 }
4925 \f
4926 /* Return nonzero if OPNUM's MEM should be matched
4927 in movabs* patterns. */
4928
4929 int
4930 ix86_check_movabs (rtx insn, int opnum)
4931 {
4932 rtx set, mem;
4933
4934 set = PATTERN (insn);
4935 if (GET_CODE (set) == PARALLEL)
4936 set = XVECEXP (set, 0, 0);
4937 gcc_assert (GET_CODE (set) == SET);
4938 mem = XEXP (set, opnum);
4939 while (GET_CODE (mem) == SUBREG)
4940 mem = SUBREG_REG (mem);
4941 gcc_assert (MEM_P (mem));
4942 return (volatile_ok || !MEM_VOLATILE_P (mem));
4943 }
4944 \f
4945 /* Initialize the table of extra 80387 mathematical constants. */
4946
4947 static void
4948 init_ext_80387_constants (void)
4949 {
4950 static const char * cst[5] =
4951 {
4952 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4953 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4954 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4955 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4956 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4957 };
4958 int i;
4959
4960 for (i = 0; i < 5; i++)
4961 {
4962 real_from_string (&ext_80387_constants_table[i], cst[i]);
4963 /* Ensure each constant is rounded to XFmode precision. */
4964 real_convert (&ext_80387_constants_table[i],
4965 XFmode, &ext_80387_constants_table[i]);
4966 }
4967
4968 ext_80387_constants_init = 1;
4969 }
4970
4971 /* Return true if the constant is something that can be loaded with
4972 a special instruction. */
4973
4974 int
4975 standard_80387_constant_p (rtx x)
4976 {
4977 REAL_VALUE_TYPE r;
4978
4979 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4980 return -1;
4981
4982 if (x == CONST0_RTX (GET_MODE (x)))
4983 return 1;
4984 if (x == CONST1_RTX (GET_MODE (x)))
4985 return 2;
4986
4987 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4988
4989 /* For XFmode constants, try to find a special 80387 instruction when
4990 optimizing for size or on those CPUs that benefit from them. */
4991 if (GET_MODE (x) == XFmode
4992 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4993 {
4994 int i;
4995
4996 if (! ext_80387_constants_init)
4997 init_ext_80387_constants ();
4998
4999 for (i = 0; i < 5; i++)
5000 if (real_identical (&r, &ext_80387_constants_table[i]))
5001 return i + 3;
5002 }
5003
5004 /* Load of the constant -0.0 or -1.0 will be split as
5005 fldz;fchs or fld1;fchs sequence. */
5006 if (real_isnegzero (&r))
5007 return 8;
5008 if (real_identical (&r, &dconstm1))
5009 return 9;
5010
5011 return 0;
5012 }
5013
5014 /* Return the opcode of the special instruction to be used to load
5015 the constant X. */
5016
5017 const char *
5018 standard_80387_constant_opcode (rtx x)
5019 {
5020 switch (standard_80387_constant_p (x))
5021 {
5022 case 1:
5023 return "fldz";
5024 case 2:
5025 return "fld1";
5026 case 3:
5027 return "fldlg2";
5028 case 4:
5029 return "fldln2";
5030 case 5:
5031 return "fldl2e";
5032 case 6:
5033 return "fldl2t";
5034 case 7:
5035 return "fldpi";
5036 case 8:
5037 case 9:
5038 return "#";
5039 default:
5040 gcc_unreachable ();
5041 }
5042 }
5043
5044 /* Return the CONST_DOUBLE representing the 80387 constant that is
5045 loaded by the specified special instruction. The argument IDX
5046 matches the return value from standard_80387_constant_p. */
5047
5048 rtx
5049 standard_80387_constant_rtx (int idx)
5050 {
5051 int i;
5052
5053 if (! ext_80387_constants_init)
5054 init_ext_80387_constants ();
5055
5056 switch (idx)
5057 {
5058 case 3:
5059 case 4:
5060 case 5:
5061 case 6:
5062 case 7:
5063 i = idx - 3;
5064 break;
5065
5066 default:
5067 gcc_unreachable ();
5068 }
5069
5070 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5071 XFmode);
5072 }
5073
5074 /* Return 1 if mode is a valid mode for sse. */
5075 static int
5076 standard_sse_mode_p (enum machine_mode mode)
5077 {
5078 switch (mode)
5079 {
5080 case V16QImode:
5081 case V8HImode:
5082 case V4SImode:
5083 case V2DImode:
5084 case V4SFmode:
5085 case V2DFmode:
5086 return 1;
5087
5088 default:
5089 return 0;
5090 }
5091 }
5092
5093 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5094 */
5095 int
5096 standard_sse_constant_p (rtx x)
5097 {
5098 enum machine_mode mode = GET_MODE (x);
5099
5100 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5101 return 1;
5102 if (vector_all_ones_operand (x, mode)
5103 && standard_sse_mode_p (mode))
5104 return TARGET_SSE2 ? 2 : -1;
5105
5106 return 0;
5107 }
5108
5109 /* Return the opcode of the special instruction to be used to load
5110 the constant X. */
5111
5112 const char *
5113 standard_sse_constant_opcode (rtx insn, rtx x)
5114 {
5115 switch (standard_sse_constant_p (x))
5116 {
5117 case 1:
5118 if (get_attr_mode (insn) == MODE_V4SF)
5119 return "xorps\t%0, %0";
5120 else if (get_attr_mode (insn) == MODE_V2DF)
5121 return "xorpd\t%0, %0";
5122 else
5123 return "pxor\t%0, %0";
5124 case 2:
5125 return "pcmpeqd\t%0, %0";
5126 }
5127 gcc_unreachable ();
5128 }
5129
5130 /* Returns 1 if OP contains a symbol reference */
5131
5132 int
5133 symbolic_reference_mentioned_p (rtx op)
5134 {
5135 const char *fmt;
5136 int i;
5137
5138 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5139 return 1;
5140
5141 fmt = GET_RTX_FORMAT (GET_CODE (op));
5142 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5143 {
5144 if (fmt[i] == 'E')
5145 {
5146 int j;
5147
5148 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5149 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5150 return 1;
5151 }
5152
5153 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5154 return 1;
5155 }
5156
5157 return 0;
5158 }
5159
5160 /* Return 1 if it is appropriate to emit `ret' instructions in the
5161 body of a function. Do this only if the epilogue is simple, needing a
5162 couple of insns. Prior to reloading, we can't tell how many registers
5163 must be saved, so return 0 then. Return 0 if there is no frame
5164 marker to de-allocate. */
5165
5166 int
5167 ix86_can_use_return_insn_p (void)
5168 {
5169 struct ix86_frame frame;
5170
5171 if (! reload_completed || frame_pointer_needed)
5172 return 0;
5173
5174 /* Don't allow more than 32 pop, since that's all we can do
5175 with one instruction. */
5176 if (current_function_pops_args
5177 && current_function_args_size >= 32768)
5178 return 0;
5179
5180 ix86_compute_frame_layout (&frame);
5181 return frame.to_allocate == 0 && frame.nregs == 0;
5182 }
5183 \f
5184 /* Value should be nonzero if functions must have frame pointers.
5185 Zero means the frame pointer need not be set up (and parms may
5186 be accessed via the stack pointer) in functions that seem suitable. */
5187
5188 int
5189 ix86_frame_pointer_required (void)
5190 {
5191 /* If we accessed previous frames, then the generated code expects
5192 to be able to access the saved ebp value in our frame. */
5193 if (cfun->machine->accesses_prev_frame)
5194 return 1;
5195
5196 /* Several x86 os'es need a frame pointer for other reasons,
5197 usually pertaining to setjmp. */
5198 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5199 return 1;
5200
5201 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5202 the frame pointer by default. Turn it back on now if we've not
5203 got a leaf function. */
5204 if (TARGET_OMIT_LEAF_FRAME_POINTER
5205 && (!current_function_is_leaf
5206 || ix86_current_function_calls_tls_descriptor))
5207 return 1;
5208
5209 if (current_function_profile)
5210 return 1;
5211
5212 return 0;
5213 }
5214
5215 /* Record that the current function accesses previous call frames. */
5216
5217 void
5218 ix86_setup_frame_addresses (void)
5219 {
5220 cfun->machine->accesses_prev_frame = 1;
5221 }
5222 \f
5223 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5224 # define USE_HIDDEN_LINKONCE 1
5225 #else
5226 # define USE_HIDDEN_LINKONCE 0
5227 #endif
5228
5229 static int pic_labels_used;
5230
5231 /* Fills in the label name that should be used for a pc thunk for
5232 the given register. */
5233
5234 static void
5235 get_pc_thunk_name (char name[32], unsigned int regno)
5236 {
5237 gcc_assert (!TARGET_64BIT);
5238
5239 if (USE_HIDDEN_LINKONCE)
5240 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5241 else
5242 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5243 }
5244
5245
5246 /* This function generates code for -fpic that loads %ebx with
5247 the return address of the caller and then returns. */
5248
5249 void
5250 ix86_file_end (void)
5251 {
5252 rtx xops[2];
5253 int regno;
5254
5255 for (regno = 0; regno < 8; ++regno)
5256 {
5257 char name[32];
5258
5259 if (! ((pic_labels_used >> regno) & 1))
5260 continue;
5261
5262 get_pc_thunk_name (name, regno);
5263
5264 #if TARGET_MACHO
5265 if (TARGET_MACHO)
5266 {
5267 switch_to_section (darwin_sections[text_coal_section]);
5268 fputs ("\t.weak_definition\t", asm_out_file);
5269 assemble_name (asm_out_file, name);
5270 fputs ("\n\t.private_extern\t", asm_out_file);
5271 assemble_name (asm_out_file, name);
5272 fputs ("\n", asm_out_file);
5273 ASM_OUTPUT_LABEL (asm_out_file, name);
5274 }
5275 else
5276 #endif
5277 if (USE_HIDDEN_LINKONCE)
5278 {
5279 tree decl;
5280
5281 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5282 error_mark_node);
5283 TREE_PUBLIC (decl) = 1;
5284 TREE_STATIC (decl) = 1;
5285 DECL_ONE_ONLY (decl) = 1;
5286
5287 (*targetm.asm_out.unique_section) (decl, 0);
5288 switch_to_section (get_named_section (decl, NULL, 0));
5289
5290 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5291 fputs ("\t.hidden\t", asm_out_file);
5292 assemble_name (asm_out_file, name);
5293 fputc ('\n', asm_out_file);
5294 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5295 }
5296 else
5297 {
5298 switch_to_section (text_section);
5299 ASM_OUTPUT_LABEL (asm_out_file, name);
5300 }
5301
5302 xops[0] = gen_rtx_REG (SImode, regno);
5303 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5304 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5305 output_asm_insn ("ret", xops);
5306 }
5307
5308 if (NEED_INDICATE_EXEC_STACK)
5309 file_end_indicate_exec_stack ();
5310 }
5311
5312 /* Emit code for the SET_GOT patterns. */
5313
5314 const char *
5315 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5316 {
5317 rtx xops[3];
5318
5319 xops[0] = dest;
5320 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5321
5322 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5323 {
5324 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5325
5326 if (!flag_pic)
5327 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5328 else
5329 output_asm_insn ("call\t%a2", xops);
5330
5331 #if TARGET_MACHO
5332 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5333 is what will be referenced by the Mach-O PIC subsystem. */
5334 if (!label)
5335 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5336 #endif
5337
5338 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5339 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5340
5341 if (flag_pic)
5342 output_asm_insn ("pop{l}\t%0", xops);
5343 }
5344 else
5345 {
5346 char name[32];
5347 get_pc_thunk_name (name, REGNO (dest));
5348 pic_labels_used |= 1 << REGNO (dest);
5349
5350 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5351 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5352 output_asm_insn ("call\t%X2", xops);
5353 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5354 is what will be referenced by the Mach-O PIC subsystem. */
5355 #if TARGET_MACHO
5356 if (!label)
5357 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5358 else
5359 targetm.asm_out.internal_label (asm_out_file, "L",
5360 CODE_LABEL_NUMBER (label));
5361 #endif
5362 }
5363
5364 if (TARGET_MACHO)
5365 return "";
5366
5367 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5368 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5369 else
5370 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5371
5372 return "";
5373 }
5374
5375 /* Generate an "push" pattern for input ARG. */
5376
5377 static rtx
5378 gen_push (rtx arg)
5379 {
5380 return gen_rtx_SET (VOIDmode,
5381 gen_rtx_MEM (Pmode,
5382 gen_rtx_PRE_DEC (Pmode,
5383 stack_pointer_rtx)),
5384 arg);
5385 }
5386
5387 /* Return >= 0 if there is an unused call-clobbered register available
5388 for the entire function. */
5389
5390 static unsigned int
5391 ix86_select_alt_pic_regnum (void)
5392 {
5393 if (current_function_is_leaf && !current_function_profile
5394 && !ix86_current_function_calls_tls_descriptor)
5395 {
5396 int i;
5397 for (i = 2; i >= 0; --i)
5398 if (!regs_ever_live[i])
5399 return i;
5400 }
5401
5402 return INVALID_REGNUM;
5403 }
5404
5405 /* Return 1 if we need to save REGNO. */
5406 static int
5407 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5408 {
5409 if (pic_offset_table_rtx
5410 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5411 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5412 || current_function_profile
5413 || current_function_calls_eh_return
5414 || current_function_uses_const_pool))
5415 {
5416 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5417 return 0;
5418 return 1;
5419 }
5420
5421 if (current_function_calls_eh_return && maybe_eh_return)
5422 {
5423 unsigned i;
5424 for (i = 0; ; i++)
5425 {
5426 unsigned test = EH_RETURN_DATA_REGNO (i);
5427 if (test == INVALID_REGNUM)
5428 break;
5429 if (test == regno)
5430 return 1;
5431 }
5432 }
5433
5434 if (cfun->machine->force_align_arg_pointer
5435 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5436 return 1;
5437
5438 return (regs_ever_live[regno]
5439 && !call_used_regs[regno]
5440 && !fixed_regs[regno]
5441 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5442 }
5443
5444 /* Return number of registers to be saved on the stack. */
5445
5446 static int
5447 ix86_nsaved_regs (void)
5448 {
5449 int nregs = 0;
5450 int regno;
5451
5452 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5453 if (ix86_save_reg (regno, true))
5454 nregs++;
5455 return nregs;
5456 }
5457
5458 /* Return the offset between two registers, one to be eliminated, and the other
5459 its replacement, at the start of a routine. */
5460
5461 HOST_WIDE_INT
5462 ix86_initial_elimination_offset (int from, int to)
5463 {
5464 struct ix86_frame frame;
5465 ix86_compute_frame_layout (&frame);
5466
5467 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5468 return frame.hard_frame_pointer_offset;
5469 else if (from == FRAME_POINTER_REGNUM
5470 && to == HARD_FRAME_POINTER_REGNUM)
5471 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5472 else
5473 {
5474 gcc_assert (to == STACK_POINTER_REGNUM);
5475
5476 if (from == ARG_POINTER_REGNUM)
5477 return frame.stack_pointer_offset;
5478
5479 gcc_assert (from == FRAME_POINTER_REGNUM);
5480 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5481 }
5482 }
5483
5484 /* Fill structure ix86_frame about frame of currently computed function. */
5485
5486 static void
5487 ix86_compute_frame_layout (struct ix86_frame *frame)
5488 {
5489 HOST_WIDE_INT total_size;
5490 unsigned int stack_alignment_needed;
5491 HOST_WIDE_INT offset;
5492 unsigned int preferred_alignment;
5493 HOST_WIDE_INT size = get_frame_size ();
5494
5495 frame->nregs = ix86_nsaved_regs ();
5496 total_size = size;
5497
5498 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5499 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5500
5501 /* During reload iteration the amount of registers saved can change.
5502 Recompute the value as needed. Do not recompute when amount of registers
5503 didn't change as reload does multiple calls to the function and does not
5504 expect the decision to change within single iteration. */
5505 if (!optimize_size
5506 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5507 {
5508 int count = frame->nregs;
5509
5510 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5511 /* The fast prologue uses move instead of push to save registers. This
5512 is significantly longer, but also executes faster as modern hardware
5513 can execute the moves in parallel, but can't do that for push/pop.
5514
5515 Be careful about choosing what prologue to emit: When function takes
5516 many instructions to execute we may use slow version as well as in
5517 case function is known to be outside hot spot (this is known with
5518 feedback only). Weight the size of function by number of registers
5519 to save as it is cheap to use one or two push instructions but very
5520 slow to use many of them. */
5521 if (count)
5522 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5523 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5524 || (flag_branch_probabilities
5525 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5526 cfun->machine->use_fast_prologue_epilogue = false;
5527 else
5528 cfun->machine->use_fast_prologue_epilogue
5529 = !expensive_function_p (count);
5530 }
5531 if (TARGET_PROLOGUE_USING_MOVE
5532 && cfun->machine->use_fast_prologue_epilogue)
5533 frame->save_regs_using_mov = true;
5534 else
5535 frame->save_regs_using_mov = false;
5536
5537
5538 /* Skip return address and saved base pointer. */
5539 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5540
5541 frame->hard_frame_pointer_offset = offset;
5542
5543 /* Do some sanity checking of stack_alignment_needed and
5544 preferred_alignment, since i386 port is the only using those features
5545 that may break easily. */
5546
5547 gcc_assert (!size || stack_alignment_needed);
5548 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5549 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5550 gcc_assert (stack_alignment_needed
5551 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5552
5553 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5554 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5555
5556 /* Register save area */
5557 offset += frame->nregs * UNITS_PER_WORD;
5558
5559 /* Va-arg area */
5560 if (ix86_save_varrargs_registers)
5561 {
5562 offset += X86_64_VARARGS_SIZE;
5563 frame->va_arg_size = X86_64_VARARGS_SIZE;
5564 }
5565 else
5566 frame->va_arg_size = 0;
5567
5568 /* Align start of frame for local function. */
5569 frame->padding1 = ((offset + stack_alignment_needed - 1)
5570 & -stack_alignment_needed) - offset;
5571
5572 offset += frame->padding1;
5573
5574 /* Frame pointer points here. */
5575 frame->frame_pointer_offset = offset;
5576
5577 offset += size;
5578
5579 /* Add outgoing arguments area. Can be skipped if we eliminated
5580 all the function calls as dead code.
5581 Skipping is however impossible when function calls alloca. Alloca
5582 expander assumes that last current_function_outgoing_args_size
5583 of stack frame are unused. */
5584 if (ACCUMULATE_OUTGOING_ARGS
5585 && (!current_function_is_leaf || current_function_calls_alloca
5586 || ix86_current_function_calls_tls_descriptor))
5587 {
5588 offset += current_function_outgoing_args_size;
5589 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5590 }
5591 else
5592 frame->outgoing_arguments_size = 0;
5593
5594 /* Align stack boundary. Only needed if we're calling another function
5595 or using alloca. */
5596 if (!current_function_is_leaf || current_function_calls_alloca
5597 || ix86_current_function_calls_tls_descriptor)
5598 frame->padding2 = ((offset + preferred_alignment - 1)
5599 & -preferred_alignment) - offset;
5600 else
5601 frame->padding2 = 0;
5602
5603 offset += frame->padding2;
5604
5605 /* We've reached end of stack frame. */
5606 frame->stack_pointer_offset = offset;
5607
5608 /* Size prologue needs to allocate. */
5609 frame->to_allocate =
5610 (size + frame->padding1 + frame->padding2
5611 + frame->outgoing_arguments_size + frame->va_arg_size);
5612
5613 if ((!frame->to_allocate && frame->nregs <= 1)
5614 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5615 frame->save_regs_using_mov = false;
5616
5617 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5618 && current_function_is_leaf
5619 && !ix86_current_function_calls_tls_descriptor)
5620 {
5621 frame->red_zone_size = frame->to_allocate;
5622 if (frame->save_regs_using_mov)
5623 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5624 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5625 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5626 }
5627 else
5628 frame->red_zone_size = 0;
5629 frame->to_allocate -= frame->red_zone_size;
5630 frame->stack_pointer_offset -= frame->red_zone_size;
5631 #if 0
5632 fprintf (stderr, "\n");
5633 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5634 fprintf (stderr, "size: %ld\n", (long)size);
5635 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5636 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5637 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5638 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5639 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5640 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5641 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5642 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5643 (long)frame->hard_frame_pointer_offset);
5644 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5645 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5646 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5647 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5648 #endif
5649 }
5650
5651 /* Emit code to save registers in the prologue. */
5652
5653 static void
5654 ix86_emit_save_regs (void)
5655 {
5656 unsigned int regno;
5657 rtx insn;
5658
5659 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5660 if (ix86_save_reg (regno, true))
5661 {
5662 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5663 RTX_FRAME_RELATED_P (insn) = 1;
5664 }
5665 }
5666
5667 /* Emit code to save registers using MOV insns. First register
5668 is restored from POINTER + OFFSET. */
5669 static void
5670 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5671 {
5672 unsigned int regno;
5673 rtx insn;
5674
5675 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5676 if (ix86_save_reg (regno, true))
5677 {
5678 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5679 Pmode, offset),
5680 gen_rtx_REG (Pmode, regno));
5681 RTX_FRAME_RELATED_P (insn) = 1;
5682 offset += UNITS_PER_WORD;
5683 }
5684 }
5685
5686 /* Expand prologue or epilogue stack adjustment.
5687 The pattern exist to put a dependency on all ebp-based memory accesses.
5688 STYLE should be negative if instructions should be marked as frame related,
5689 zero if %r11 register is live and cannot be freely used and positive
5690 otherwise. */
5691
5692 static void
5693 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5694 {
5695 rtx insn;
5696
5697 if (! TARGET_64BIT)
5698 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5699 else if (x86_64_immediate_operand (offset, DImode))
5700 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5701 else
5702 {
5703 rtx r11;
5704 /* r11 is used by indirect sibcall return as well, set before the
5705 epilogue and used after the epilogue. ATM indirect sibcall
5706 shouldn't be used together with huge frame sizes in one
5707 function because of the frame_size check in sibcall.c. */
5708 gcc_assert (style);
5709 r11 = gen_rtx_REG (DImode, R11_REG);
5710 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5711 if (style < 0)
5712 RTX_FRAME_RELATED_P (insn) = 1;
5713 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5714 offset));
5715 }
5716 if (style < 0)
5717 RTX_FRAME_RELATED_P (insn) = 1;
5718 }
5719
5720 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5721
5722 static rtx
5723 ix86_internal_arg_pointer (void)
5724 {
5725 bool has_force_align_arg_pointer =
5726 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5727 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5728 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5729 && DECL_NAME (current_function_decl)
5730 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5731 && DECL_FILE_SCOPE_P (current_function_decl))
5732 || ix86_force_align_arg_pointer
5733 || has_force_align_arg_pointer)
5734 {
5735 /* Nested functions can't realign the stack due to a register
5736 conflict. */
5737 if (DECL_CONTEXT (current_function_decl)
5738 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5739 {
5740 if (ix86_force_align_arg_pointer)
5741 warning (0, "-mstackrealign ignored for nested functions");
5742 if (has_force_align_arg_pointer)
5743 error ("%s not supported for nested functions",
5744 ix86_force_align_arg_pointer_string);
5745 return virtual_incoming_args_rtx;
5746 }
5747 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5748 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5749 }
5750 else
5751 return virtual_incoming_args_rtx;
5752 }
5753
5754 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5755 This is called from dwarf2out.c to emit call frame instructions
5756 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5757 static void
5758 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5759 {
5760 rtx unspec = SET_SRC (pattern);
5761 gcc_assert (GET_CODE (unspec) == UNSPEC);
5762
5763 switch (index)
5764 {
5765 case UNSPEC_REG_SAVE:
5766 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5767 SET_DEST (pattern));
5768 break;
5769 case UNSPEC_DEF_CFA:
5770 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5771 INTVAL (XVECEXP (unspec, 0, 0)));
5772 break;
5773 default:
5774 gcc_unreachable ();
5775 }
5776 }
5777
5778 /* Expand the prologue into a bunch of separate insns. */
5779
5780 void
5781 ix86_expand_prologue (void)
5782 {
5783 rtx insn;
5784 bool pic_reg_used;
5785 struct ix86_frame frame;
5786 HOST_WIDE_INT allocate;
5787
5788 ix86_compute_frame_layout (&frame);
5789
5790 if (cfun->machine->force_align_arg_pointer)
5791 {
5792 rtx x, y;
5793
5794 /* Grab the argument pointer. */
5795 x = plus_constant (stack_pointer_rtx, 4);
5796 y = cfun->machine->force_align_arg_pointer;
5797 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5798 RTX_FRAME_RELATED_P (insn) = 1;
5799
5800 /* The unwind info consists of two parts: install the fafp as the cfa,
5801 and record the fafp as the "save register" of the stack pointer.
5802 The later is there in order that the unwinder can see where it
5803 should restore the stack pointer across the and insn. */
5804 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5805 x = gen_rtx_SET (VOIDmode, y, x);
5806 RTX_FRAME_RELATED_P (x) = 1;
5807 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5808 UNSPEC_REG_SAVE);
5809 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5810 RTX_FRAME_RELATED_P (y) = 1;
5811 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5812 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5813 REG_NOTES (insn) = x;
5814
5815 /* Align the stack. */
5816 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5817 GEN_INT (-16)));
5818
5819 /* And here we cheat like madmen with the unwind info. We force the
5820 cfa register back to sp+4, which is exactly what it was at the
5821 start of the function. Re-pushing the return address results in
5822 the return at the same spot relative to the cfa, and thus is
5823 correct wrt the unwind info. */
5824 x = cfun->machine->force_align_arg_pointer;
5825 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5826 insn = emit_insn (gen_push (x));
5827 RTX_FRAME_RELATED_P (insn) = 1;
5828
5829 x = GEN_INT (4);
5830 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5831 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5832 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5833 REG_NOTES (insn) = x;
5834 }
5835
5836 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5837 slower on all targets. Also sdb doesn't like it. */
5838
5839 if (frame_pointer_needed)
5840 {
5841 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5842 RTX_FRAME_RELATED_P (insn) = 1;
5843
5844 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5845 RTX_FRAME_RELATED_P (insn) = 1;
5846 }
5847
5848 allocate = frame.to_allocate;
5849
5850 if (!frame.save_regs_using_mov)
5851 ix86_emit_save_regs ();
5852 else
5853 allocate += frame.nregs * UNITS_PER_WORD;
5854
5855 /* When using red zone we may start register saving before allocating
5856 the stack frame saving one cycle of the prologue. */
5857 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5858 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5859 : stack_pointer_rtx,
5860 -frame.nregs * UNITS_PER_WORD);
5861
5862 if (allocate == 0)
5863 ;
5864 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5865 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5866 GEN_INT (-allocate), -1);
5867 else
5868 {
5869 /* Only valid for Win32. */
5870 rtx eax = gen_rtx_REG (SImode, 0);
5871 bool eax_live = ix86_eax_live_at_start_p ();
5872 rtx t;
5873
5874 gcc_assert (!TARGET_64BIT);
5875
5876 if (eax_live)
5877 {
5878 emit_insn (gen_push (eax));
5879 allocate -= 4;
5880 }
5881
5882 emit_move_insn (eax, GEN_INT (allocate));
5883
5884 insn = emit_insn (gen_allocate_stack_worker (eax));
5885 RTX_FRAME_RELATED_P (insn) = 1;
5886 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5887 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5888 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5889 t, REG_NOTES (insn));
5890
5891 if (eax_live)
5892 {
5893 if (frame_pointer_needed)
5894 t = plus_constant (hard_frame_pointer_rtx,
5895 allocate
5896 - frame.to_allocate
5897 - frame.nregs * UNITS_PER_WORD);
5898 else
5899 t = plus_constant (stack_pointer_rtx, allocate);
5900 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5901 }
5902 }
5903
5904 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5905 {
5906 if (!frame_pointer_needed || !frame.to_allocate)
5907 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5908 else
5909 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5910 -frame.nregs * UNITS_PER_WORD);
5911 }
5912
5913 pic_reg_used = false;
5914 if (pic_offset_table_rtx
5915 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5916 || current_function_profile))
5917 {
5918 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5919
5920 if (alt_pic_reg_used != INVALID_REGNUM)
5921 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5922
5923 pic_reg_used = true;
5924 }
5925
5926 if (pic_reg_used)
5927 {
5928 if (TARGET_64BIT)
5929 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5930 else
5931 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5932
5933 /* Even with accurate pre-reload life analysis, we can wind up
5934 deleting all references to the pic register after reload.
5935 Consider if cross-jumping unifies two sides of a branch
5936 controlled by a comparison vs the only read from a global.
5937 In which case, allow the set_got to be deleted, though we're
5938 too late to do anything about the ebx save in the prologue. */
5939 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5940 }
5941
5942 /* Prevent function calls from be scheduled before the call to mcount.
5943 In the pic_reg_used case, make sure that the got load isn't deleted. */
5944 if (current_function_profile)
5945 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5946 }
5947
5948 /* Emit code to restore saved registers using MOV insns. First register
5949 is restored from POINTER + OFFSET. */
5950 static void
5951 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5952 int maybe_eh_return)
5953 {
5954 int regno;
5955 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5956
5957 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5958 if (ix86_save_reg (regno, maybe_eh_return))
5959 {
5960 /* Ensure that adjust_address won't be forced to produce pointer
5961 out of range allowed by x86-64 instruction set. */
5962 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5963 {
5964 rtx r11;
5965
5966 r11 = gen_rtx_REG (DImode, R11_REG);
5967 emit_move_insn (r11, GEN_INT (offset));
5968 emit_insn (gen_adddi3 (r11, r11, pointer));
5969 base_address = gen_rtx_MEM (Pmode, r11);
5970 offset = 0;
5971 }
5972 emit_move_insn (gen_rtx_REG (Pmode, regno),
5973 adjust_address (base_address, Pmode, offset));
5974 offset += UNITS_PER_WORD;
5975 }
5976 }
5977
5978 /* Restore function stack, frame, and registers. */
5979
5980 void
5981 ix86_expand_epilogue (int style)
5982 {
5983 int regno;
5984 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5985 struct ix86_frame frame;
5986 HOST_WIDE_INT offset;
5987
5988 ix86_compute_frame_layout (&frame);
5989
5990 /* Calculate start of saved registers relative to ebp. Special care
5991 must be taken for the normal return case of a function using
5992 eh_return: the eax and edx registers are marked as saved, but not
5993 restored along this path. */
5994 offset = frame.nregs;
5995 if (current_function_calls_eh_return && style != 2)
5996 offset -= 2;
5997 offset *= -UNITS_PER_WORD;
5998
5999 /* If we're only restoring one register and sp is not valid then
6000 using a move instruction to restore the register since it's
6001 less work than reloading sp and popping the register.
6002
6003 The default code result in stack adjustment using add/lea instruction,
6004 while this code results in LEAVE instruction (or discrete equivalent),
6005 so it is profitable in some other cases as well. Especially when there
6006 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6007 and there is exactly one register to pop. This heuristic may need some
6008 tuning in future. */
6009 if ((!sp_valid && frame.nregs <= 1)
6010 || (TARGET_EPILOGUE_USING_MOVE
6011 && cfun->machine->use_fast_prologue_epilogue
6012 && (frame.nregs > 1 || frame.to_allocate))
6013 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6014 || (frame_pointer_needed && TARGET_USE_LEAVE
6015 && cfun->machine->use_fast_prologue_epilogue
6016 && frame.nregs == 1)
6017 || current_function_calls_eh_return)
6018 {
6019 /* Restore registers. We can use ebp or esp to address the memory
6020 locations. If both are available, default to ebp, since offsets
6021 are known to be small. Only exception is esp pointing directly to the
6022 end of block of saved registers, where we may simplify addressing
6023 mode. */
6024
6025 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6026 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6027 frame.to_allocate, style == 2);
6028 else
6029 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6030 offset, style == 2);
6031
6032 /* eh_return epilogues need %ecx added to the stack pointer. */
6033 if (style == 2)
6034 {
6035 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6036
6037 if (frame_pointer_needed)
6038 {
6039 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6040 tmp = plus_constant (tmp, UNITS_PER_WORD);
6041 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6042
6043 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6044 emit_move_insn (hard_frame_pointer_rtx, tmp);
6045
6046 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6047 const0_rtx, style);
6048 }
6049 else
6050 {
6051 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6052 tmp = plus_constant (tmp, (frame.to_allocate
6053 + frame.nregs * UNITS_PER_WORD));
6054 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6055 }
6056 }
6057 else if (!frame_pointer_needed)
6058 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6059 GEN_INT (frame.to_allocate
6060 + frame.nregs * UNITS_PER_WORD),
6061 style);
6062 /* If not an i386, mov & pop is faster than "leave". */
6063 else if (TARGET_USE_LEAVE || optimize_size
6064 || !cfun->machine->use_fast_prologue_epilogue)
6065 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6066 else
6067 {
6068 pro_epilogue_adjust_stack (stack_pointer_rtx,
6069 hard_frame_pointer_rtx,
6070 const0_rtx, style);
6071 if (TARGET_64BIT)
6072 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6073 else
6074 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6075 }
6076 }
6077 else
6078 {
6079 /* First step is to deallocate the stack frame so that we can
6080 pop the registers. */
6081 if (!sp_valid)
6082 {
6083 gcc_assert (frame_pointer_needed);
6084 pro_epilogue_adjust_stack (stack_pointer_rtx,
6085 hard_frame_pointer_rtx,
6086 GEN_INT (offset), style);
6087 }
6088 else if (frame.to_allocate)
6089 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6090 GEN_INT (frame.to_allocate), style);
6091
6092 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6093 if (ix86_save_reg (regno, false))
6094 {
6095 if (TARGET_64BIT)
6096 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6097 else
6098 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6099 }
6100 if (frame_pointer_needed)
6101 {
6102 /* Leave results in shorter dependency chains on CPUs that are
6103 able to grok it fast. */
6104 if (TARGET_USE_LEAVE)
6105 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6106 else if (TARGET_64BIT)
6107 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6108 else
6109 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6110 }
6111 }
6112
6113 if (cfun->machine->force_align_arg_pointer)
6114 {
6115 emit_insn (gen_addsi3 (stack_pointer_rtx,
6116 cfun->machine->force_align_arg_pointer,
6117 GEN_INT (-4)));
6118 }
6119
6120 /* Sibcall epilogues don't want a return instruction. */
6121 if (style == 0)
6122 return;
6123
6124 if (current_function_pops_args && current_function_args_size)
6125 {
6126 rtx popc = GEN_INT (current_function_pops_args);
6127
6128 /* i386 can only pop 64K bytes. If asked to pop more, pop
6129 return address, do explicit add, and jump indirectly to the
6130 caller. */
6131
6132 if (current_function_pops_args >= 65536)
6133 {
6134 rtx ecx = gen_rtx_REG (SImode, 2);
6135
6136 /* There is no "pascal" calling convention in 64bit ABI. */
6137 gcc_assert (!TARGET_64BIT);
6138
6139 emit_insn (gen_popsi1 (ecx));
6140 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6141 emit_jump_insn (gen_return_indirect_internal (ecx));
6142 }
6143 else
6144 emit_jump_insn (gen_return_pop_internal (popc));
6145 }
6146 else
6147 emit_jump_insn (gen_return_internal ());
6148 }
6149
6150 /* Reset from the function's potential modifications. */
6151
6152 static void
6153 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6154 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6155 {
6156 if (pic_offset_table_rtx)
6157 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6158 #if TARGET_MACHO
6159 /* Mach-O doesn't support labels at the end of objects, so if
6160 it looks like we might want one, insert a NOP. */
6161 {
6162 rtx insn = get_last_insn ();
6163 while (insn
6164 && NOTE_P (insn)
6165 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6166 insn = PREV_INSN (insn);
6167 if (insn
6168 && (LABEL_P (insn)
6169 || (NOTE_P (insn)
6170 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6171 fputs ("\tnop\n", file);
6172 }
6173 #endif
6174
6175 }
6176 \f
6177 /* Extract the parts of an RTL expression that is a valid memory address
6178 for an instruction. Return 0 if the structure of the address is
6179 grossly off. Return -1 if the address contains ASHIFT, so it is not
6180 strictly valid, but still used for computing length of lea instruction. */
6181
6182 int
6183 ix86_decompose_address (rtx addr, struct ix86_address *out)
6184 {
6185 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6186 rtx base_reg, index_reg;
6187 HOST_WIDE_INT scale = 1;
6188 rtx scale_rtx = NULL_RTX;
6189 int retval = 1;
6190 enum ix86_address_seg seg = SEG_DEFAULT;
6191
6192 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6193 base = addr;
6194 else if (GET_CODE (addr) == PLUS)
6195 {
6196 rtx addends[4], op;
6197 int n = 0, i;
6198
6199 op = addr;
6200 do
6201 {
6202 if (n >= 4)
6203 return 0;
6204 addends[n++] = XEXP (op, 1);
6205 op = XEXP (op, 0);
6206 }
6207 while (GET_CODE (op) == PLUS);
6208 if (n >= 4)
6209 return 0;
6210 addends[n] = op;
6211
6212 for (i = n; i >= 0; --i)
6213 {
6214 op = addends[i];
6215 switch (GET_CODE (op))
6216 {
6217 case MULT:
6218 if (index)
6219 return 0;
6220 index = XEXP (op, 0);
6221 scale_rtx = XEXP (op, 1);
6222 break;
6223
6224 case UNSPEC:
6225 if (XINT (op, 1) == UNSPEC_TP
6226 && TARGET_TLS_DIRECT_SEG_REFS
6227 && seg == SEG_DEFAULT)
6228 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6229 else
6230 return 0;
6231 break;
6232
6233 case REG:
6234 case SUBREG:
6235 if (!base)
6236 base = op;
6237 else if (!index)
6238 index = op;
6239 else
6240 return 0;
6241 break;
6242
6243 case CONST:
6244 case CONST_INT:
6245 case SYMBOL_REF:
6246 case LABEL_REF:
6247 if (disp)
6248 return 0;
6249 disp = op;
6250 break;
6251
6252 default:
6253 return 0;
6254 }
6255 }
6256 }
6257 else if (GET_CODE (addr) == MULT)
6258 {
6259 index = XEXP (addr, 0); /* index*scale */
6260 scale_rtx = XEXP (addr, 1);
6261 }
6262 else if (GET_CODE (addr) == ASHIFT)
6263 {
6264 rtx tmp;
6265
6266 /* We're called for lea too, which implements ashift on occasion. */
6267 index = XEXP (addr, 0);
6268 tmp = XEXP (addr, 1);
6269 if (!CONST_INT_P (tmp))
6270 return 0;
6271 scale = INTVAL (tmp);
6272 if ((unsigned HOST_WIDE_INT) scale > 3)
6273 return 0;
6274 scale = 1 << scale;
6275 retval = -1;
6276 }
6277 else
6278 disp = addr; /* displacement */
6279
6280 /* Extract the integral value of scale. */
6281 if (scale_rtx)
6282 {
6283 if (!CONST_INT_P (scale_rtx))
6284 return 0;
6285 scale = INTVAL (scale_rtx);
6286 }
6287
6288 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6289 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6290
6291 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6292 if (base_reg && index_reg && scale == 1
6293 && (index_reg == arg_pointer_rtx
6294 || index_reg == frame_pointer_rtx
6295 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6296 {
6297 rtx tmp;
6298 tmp = base, base = index, index = tmp;
6299 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6300 }
6301
6302 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6303 if ((base_reg == hard_frame_pointer_rtx
6304 || base_reg == frame_pointer_rtx
6305 || base_reg == arg_pointer_rtx) && !disp)
6306 disp = const0_rtx;
6307
6308 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6309 Avoid this by transforming to [%esi+0]. */
6310 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6311 && base_reg && !index_reg && !disp
6312 && REG_P (base_reg)
6313 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6314 disp = const0_rtx;
6315
6316 /* Special case: encode reg+reg instead of reg*2. */
6317 if (!base && index && scale && scale == 2)
6318 base = index, base_reg = index_reg, scale = 1;
6319
6320 /* Special case: scaling cannot be encoded without base or displacement. */
6321 if (!base && !disp && index && scale != 1)
6322 disp = const0_rtx;
6323
6324 out->base = base;
6325 out->index = index;
6326 out->disp = disp;
6327 out->scale = scale;
6328 out->seg = seg;
6329
6330 return retval;
6331 }
6332 \f
6333 /* Return cost of the memory address x.
6334 For i386, it is better to use a complex address than let gcc copy
6335 the address into a reg and make a new pseudo. But not if the address
6336 requires to two regs - that would mean more pseudos with longer
6337 lifetimes. */
6338 static int
6339 ix86_address_cost (rtx x)
6340 {
6341 struct ix86_address parts;
6342 int cost = 1;
6343 int ok = ix86_decompose_address (x, &parts);
6344
6345 gcc_assert (ok);
6346
6347 if (parts.base && GET_CODE (parts.base) == SUBREG)
6348 parts.base = SUBREG_REG (parts.base);
6349 if (parts.index && GET_CODE (parts.index) == SUBREG)
6350 parts.index = SUBREG_REG (parts.index);
6351
6352 /* More complex memory references are better. */
6353 if (parts.disp && parts.disp != const0_rtx)
6354 cost--;
6355 if (parts.seg != SEG_DEFAULT)
6356 cost--;
6357
6358 /* Attempt to minimize number of registers in the address. */
6359 if ((parts.base
6360 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6361 || (parts.index
6362 && (!REG_P (parts.index)
6363 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6364 cost++;
6365
6366 if (parts.base
6367 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6368 && parts.index
6369 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6370 && parts.base != parts.index)
6371 cost++;
6372
6373 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6374 since it's predecode logic can't detect the length of instructions
6375 and it degenerates to vector decoded. Increase cost of such
6376 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6377 to split such addresses or even refuse such addresses at all.
6378
6379 Following addressing modes are affected:
6380 [base+scale*index]
6381 [scale*index+disp]
6382 [base+index]
6383
6384 The first and last case may be avoidable by explicitly coding the zero in
6385 memory address, but I don't have AMD-K6 machine handy to check this
6386 theory. */
6387
6388 if (TARGET_K6
6389 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6390 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6391 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6392 cost += 10;
6393
6394 return cost;
6395 }
6396 \f
6397 /* If X is a machine specific address (i.e. a symbol or label being
6398 referenced as a displacement from the GOT implemented using an
6399 UNSPEC), then return the base term. Otherwise return X. */
6400
6401 rtx
6402 ix86_find_base_term (rtx x)
6403 {
6404 rtx term;
6405
6406 if (TARGET_64BIT)
6407 {
6408 if (GET_CODE (x) != CONST)
6409 return x;
6410 term = XEXP (x, 0);
6411 if (GET_CODE (term) == PLUS
6412 && (CONST_INT_P (XEXP (term, 1))
6413 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6414 term = XEXP (term, 0);
6415 if (GET_CODE (term) != UNSPEC
6416 || XINT (term, 1) != UNSPEC_GOTPCREL)
6417 return x;
6418
6419 term = XVECEXP (term, 0, 0);
6420
6421 if (GET_CODE (term) != SYMBOL_REF
6422 && GET_CODE (term) != LABEL_REF)
6423 return x;
6424
6425 return term;
6426 }
6427
6428 term = ix86_delegitimize_address (x);
6429
6430 if (GET_CODE (term) != SYMBOL_REF
6431 && GET_CODE (term) != LABEL_REF)
6432 return x;
6433
6434 return term;
6435 }
6436
6437 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6438 this is used for to form addresses to local data when -fPIC is in
6439 use. */
6440
6441 static bool
6442 darwin_local_data_pic (rtx disp)
6443 {
6444 if (GET_CODE (disp) == MINUS)
6445 {
6446 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6447 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6448 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6449 {
6450 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6451 if (! strcmp (sym_name, "<pic base>"))
6452 return true;
6453 }
6454 }
6455
6456 return false;
6457 }
6458 \f
6459 /* Determine if a given RTX is a valid constant. We already know this
6460 satisfies CONSTANT_P. */
6461
6462 bool
6463 legitimate_constant_p (rtx x)
6464 {
6465 switch (GET_CODE (x))
6466 {
6467 case CONST:
6468 x = XEXP (x, 0);
6469
6470 if (GET_CODE (x) == PLUS)
6471 {
6472 if (!CONST_INT_P (XEXP (x, 1)))
6473 return false;
6474 x = XEXP (x, 0);
6475 }
6476
6477 if (TARGET_MACHO && darwin_local_data_pic (x))
6478 return true;
6479
6480 /* Only some unspecs are valid as "constants". */
6481 if (GET_CODE (x) == UNSPEC)
6482 switch (XINT (x, 1))
6483 {
6484 case UNSPEC_GOTOFF:
6485 return TARGET_64BIT;
6486 case UNSPEC_TPOFF:
6487 case UNSPEC_NTPOFF:
6488 x = XVECEXP (x, 0, 0);
6489 return (GET_CODE (x) == SYMBOL_REF
6490 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6491 case UNSPEC_DTPOFF:
6492 x = XVECEXP (x, 0, 0);
6493 return (GET_CODE (x) == SYMBOL_REF
6494 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6495 default:
6496 return false;
6497 }
6498
6499 /* We must have drilled down to a symbol. */
6500 if (GET_CODE (x) == LABEL_REF)
6501 return true;
6502 if (GET_CODE (x) != SYMBOL_REF)
6503 return false;
6504 /* FALLTHRU */
6505
6506 case SYMBOL_REF:
6507 /* TLS symbols are never valid. */
6508 if (SYMBOL_REF_TLS_MODEL (x))
6509 return false;
6510 break;
6511
6512 case CONST_DOUBLE:
6513 if (GET_MODE (x) == TImode
6514 && x != CONST0_RTX (TImode)
6515 && !TARGET_64BIT)
6516 return false;
6517 break;
6518
6519 case CONST_VECTOR:
6520 if (x == CONST0_RTX (GET_MODE (x)))
6521 return true;
6522 return false;
6523
6524 default:
6525 break;
6526 }
6527
6528 /* Otherwise we handle everything else in the move patterns. */
6529 return true;
6530 }
6531
6532 /* Determine if it's legal to put X into the constant pool. This
6533 is not possible for the address of thread-local symbols, which
6534 is checked above. */
6535
6536 static bool
6537 ix86_cannot_force_const_mem (rtx x)
6538 {
6539 /* We can always put integral constants and vectors in memory. */
6540 switch (GET_CODE (x))
6541 {
6542 case CONST_INT:
6543 case CONST_DOUBLE:
6544 case CONST_VECTOR:
6545 return false;
6546
6547 default:
6548 break;
6549 }
6550 return !legitimate_constant_p (x);
6551 }
6552
6553 /* Determine if a given RTX is a valid constant address. */
6554
6555 bool
6556 constant_address_p (rtx x)
6557 {
6558 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6559 }
6560
6561 /* Nonzero if the constant value X is a legitimate general operand
6562 when generating PIC code. It is given that flag_pic is on and
6563 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6564
6565 bool
6566 legitimate_pic_operand_p (rtx x)
6567 {
6568 rtx inner;
6569
6570 switch (GET_CODE (x))
6571 {
6572 case CONST:
6573 inner = XEXP (x, 0);
6574 if (GET_CODE (inner) == PLUS
6575 && CONST_INT_P (XEXP (inner, 1)))
6576 inner = XEXP (inner, 0);
6577
6578 /* Only some unspecs are valid as "constants". */
6579 if (GET_CODE (inner) == UNSPEC)
6580 switch (XINT (inner, 1))
6581 {
6582 case UNSPEC_GOTOFF:
6583 return TARGET_64BIT;
6584 case UNSPEC_TPOFF:
6585 x = XVECEXP (inner, 0, 0);
6586 return (GET_CODE (x) == SYMBOL_REF
6587 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6588 default:
6589 return false;
6590 }
6591 /* FALLTHRU */
6592
6593 case SYMBOL_REF:
6594 case LABEL_REF:
6595 return legitimate_pic_address_disp_p (x);
6596
6597 default:
6598 return true;
6599 }
6600 }
6601
6602 /* Determine if a given CONST RTX is a valid memory displacement
6603 in PIC mode. */
6604
6605 int
6606 legitimate_pic_address_disp_p (rtx disp)
6607 {
6608 bool saw_plus;
6609
6610 /* In 64bit mode we can allow direct addresses of symbols and labels
6611 when they are not dynamic symbols. */
6612 if (TARGET_64BIT)
6613 {
6614 rtx op0 = disp, op1;
6615
6616 switch (GET_CODE (disp))
6617 {
6618 case LABEL_REF:
6619 return true;
6620
6621 case CONST:
6622 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6623 break;
6624 op0 = XEXP (XEXP (disp, 0), 0);
6625 op1 = XEXP (XEXP (disp, 0), 1);
6626 if (!CONST_INT_P (op1)
6627 || INTVAL (op1) >= 16*1024*1024
6628 || INTVAL (op1) < -16*1024*1024)
6629 break;
6630 if (GET_CODE (op0) == LABEL_REF)
6631 return true;
6632 if (GET_CODE (op0) != SYMBOL_REF)
6633 break;
6634 /* FALLTHRU */
6635
6636 case SYMBOL_REF:
6637 /* TLS references should always be enclosed in UNSPEC. */
6638 if (SYMBOL_REF_TLS_MODEL (op0))
6639 return false;
6640 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6641 return true;
6642 break;
6643
6644 default:
6645 break;
6646 }
6647 }
6648 if (GET_CODE (disp) != CONST)
6649 return 0;
6650 disp = XEXP (disp, 0);
6651
6652 if (TARGET_64BIT)
6653 {
6654 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6655 of GOT tables. We should not need these anyway. */
6656 if (GET_CODE (disp) != UNSPEC
6657 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6658 && XINT (disp, 1) != UNSPEC_GOTOFF))
6659 return 0;
6660
6661 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6662 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6663 return 0;
6664 return 1;
6665 }
6666
6667 saw_plus = false;
6668 if (GET_CODE (disp) == PLUS)
6669 {
6670 if (!CONST_INT_P (XEXP (disp, 1)))
6671 return 0;
6672 disp = XEXP (disp, 0);
6673 saw_plus = true;
6674 }
6675
6676 if (TARGET_MACHO && darwin_local_data_pic (disp))
6677 return 1;
6678
6679 if (GET_CODE (disp) != UNSPEC)
6680 return 0;
6681
6682 switch (XINT (disp, 1))
6683 {
6684 case UNSPEC_GOT:
6685 if (saw_plus)
6686 return false;
6687 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6688 case UNSPEC_GOTOFF:
6689 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6690 While ABI specify also 32bit relocation but we don't produce it in
6691 small PIC model at all. */
6692 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6693 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6694 && !TARGET_64BIT)
6695 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6696 return false;
6697 case UNSPEC_GOTTPOFF:
6698 case UNSPEC_GOTNTPOFF:
6699 case UNSPEC_INDNTPOFF:
6700 if (saw_plus)
6701 return false;
6702 disp = XVECEXP (disp, 0, 0);
6703 return (GET_CODE (disp) == SYMBOL_REF
6704 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6705 case UNSPEC_NTPOFF:
6706 disp = XVECEXP (disp, 0, 0);
6707 return (GET_CODE (disp) == SYMBOL_REF
6708 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6709 case UNSPEC_DTPOFF:
6710 disp = XVECEXP (disp, 0, 0);
6711 return (GET_CODE (disp) == SYMBOL_REF
6712 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6713 }
6714
6715 return 0;
6716 }
6717
6718 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6719 memory address for an instruction. The MODE argument is the machine mode
6720 for the MEM expression that wants to use this address.
6721
6722 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6723 convert common non-canonical forms to canonical form so that they will
6724 be recognized. */
6725
6726 int
6727 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6728 {
6729 struct ix86_address parts;
6730 rtx base, index, disp;
6731 HOST_WIDE_INT scale;
6732 const char *reason = NULL;
6733 rtx reason_rtx = NULL_RTX;
6734
6735 if (TARGET_DEBUG_ADDR)
6736 {
6737 fprintf (stderr,
6738 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6739 GET_MODE_NAME (mode), strict);
6740 debug_rtx (addr);
6741 }
6742
6743 if (ix86_decompose_address (addr, &parts) <= 0)
6744 {
6745 reason = "decomposition failed";
6746 goto report_error;
6747 }
6748
6749 base = parts.base;
6750 index = parts.index;
6751 disp = parts.disp;
6752 scale = parts.scale;
6753
6754 /* Validate base register.
6755
6756 Don't allow SUBREG's that span more than a word here. It can lead to spill
6757 failures when the base is one word out of a two word structure, which is
6758 represented internally as a DImode int. */
6759
6760 if (base)
6761 {
6762 rtx reg;
6763 reason_rtx = base;
6764
6765 if (REG_P (base))
6766 reg = base;
6767 else if (GET_CODE (base) == SUBREG
6768 && REG_P (SUBREG_REG (base))
6769 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6770 <= UNITS_PER_WORD)
6771 reg = SUBREG_REG (base);
6772 else
6773 {
6774 reason = "base is not a register";
6775 goto report_error;
6776 }
6777
6778 if (GET_MODE (base) != Pmode)
6779 {
6780 reason = "base is not in Pmode";
6781 goto report_error;
6782 }
6783
6784 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6785 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6786 {
6787 reason = "base is not valid";
6788 goto report_error;
6789 }
6790 }
6791
6792 /* Validate index register.
6793
6794 Don't allow SUBREG's that span more than a word here -- same as above. */
6795
6796 if (index)
6797 {
6798 rtx reg;
6799 reason_rtx = index;
6800
6801 if (REG_P (index))
6802 reg = index;
6803 else if (GET_CODE (index) == SUBREG
6804 && REG_P (SUBREG_REG (index))
6805 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6806 <= UNITS_PER_WORD)
6807 reg = SUBREG_REG (index);
6808 else
6809 {
6810 reason = "index is not a register";
6811 goto report_error;
6812 }
6813
6814 if (GET_MODE (index) != Pmode)
6815 {
6816 reason = "index is not in Pmode";
6817 goto report_error;
6818 }
6819
6820 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6821 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6822 {
6823 reason = "index is not valid";
6824 goto report_error;
6825 }
6826 }
6827
6828 /* Validate scale factor. */
6829 if (scale != 1)
6830 {
6831 reason_rtx = GEN_INT (scale);
6832 if (!index)
6833 {
6834 reason = "scale without index";
6835 goto report_error;
6836 }
6837
6838 if (scale != 2 && scale != 4 && scale != 8)
6839 {
6840 reason = "scale is not a valid multiplier";
6841 goto report_error;
6842 }
6843 }
6844
6845 /* Validate displacement. */
6846 if (disp)
6847 {
6848 reason_rtx = disp;
6849
6850 if (GET_CODE (disp) == CONST
6851 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6852 switch (XINT (XEXP (disp, 0), 1))
6853 {
6854 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6855 used. While ABI specify also 32bit relocations, we don't produce
6856 them at all and use IP relative instead. */
6857 case UNSPEC_GOT:
6858 case UNSPEC_GOTOFF:
6859 gcc_assert (flag_pic);
6860 if (!TARGET_64BIT)
6861 goto is_legitimate_pic;
6862 reason = "64bit address unspec";
6863 goto report_error;
6864
6865 case UNSPEC_GOTPCREL:
6866 gcc_assert (flag_pic);
6867 goto is_legitimate_pic;
6868
6869 case UNSPEC_GOTTPOFF:
6870 case UNSPEC_GOTNTPOFF:
6871 case UNSPEC_INDNTPOFF:
6872 case UNSPEC_NTPOFF:
6873 case UNSPEC_DTPOFF:
6874 break;
6875
6876 default:
6877 reason = "invalid address unspec";
6878 goto report_error;
6879 }
6880
6881 else if (SYMBOLIC_CONST (disp)
6882 && (flag_pic
6883 || (TARGET_MACHO
6884 #if TARGET_MACHO
6885 && MACHOPIC_INDIRECT
6886 && !machopic_operand_p (disp)
6887 #endif
6888 )))
6889 {
6890
6891 is_legitimate_pic:
6892 if (TARGET_64BIT && (index || base))
6893 {
6894 /* foo@dtpoff(%rX) is ok. */
6895 if (GET_CODE (disp) != CONST
6896 || GET_CODE (XEXP (disp, 0)) != PLUS
6897 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6898 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6899 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6900 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6901 {
6902 reason = "non-constant pic memory reference";
6903 goto report_error;
6904 }
6905 }
6906 else if (! legitimate_pic_address_disp_p (disp))
6907 {
6908 reason = "displacement is an invalid pic construct";
6909 goto report_error;
6910 }
6911
6912 /* This code used to verify that a symbolic pic displacement
6913 includes the pic_offset_table_rtx register.
6914
6915 While this is good idea, unfortunately these constructs may
6916 be created by "adds using lea" optimization for incorrect
6917 code like:
6918
6919 int a;
6920 int foo(int i)
6921 {
6922 return *(&a+i);
6923 }
6924
6925 This code is nonsensical, but results in addressing
6926 GOT table with pic_offset_table_rtx base. We can't
6927 just refuse it easily, since it gets matched by
6928 "addsi3" pattern, that later gets split to lea in the
6929 case output register differs from input. While this
6930 can be handled by separate addsi pattern for this case
6931 that never results in lea, this seems to be easier and
6932 correct fix for crash to disable this test. */
6933 }
6934 else if (GET_CODE (disp) != LABEL_REF
6935 && !CONST_INT_P (disp)
6936 && (GET_CODE (disp) != CONST
6937 || !legitimate_constant_p (disp))
6938 && (GET_CODE (disp) != SYMBOL_REF
6939 || !legitimate_constant_p (disp)))
6940 {
6941 reason = "displacement is not constant";
6942 goto report_error;
6943 }
6944 else if (TARGET_64BIT
6945 && !x86_64_immediate_operand (disp, VOIDmode))
6946 {
6947 reason = "displacement is out of range";
6948 goto report_error;
6949 }
6950 }
6951
6952 /* Everything looks valid. */
6953 if (TARGET_DEBUG_ADDR)
6954 fprintf (stderr, "Success.\n");
6955 return TRUE;
6956
6957 report_error:
6958 if (TARGET_DEBUG_ADDR)
6959 {
6960 fprintf (stderr, "Error: %s\n", reason);
6961 debug_rtx (reason_rtx);
6962 }
6963 return FALSE;
6964 }
6965 \f
6966 /* Return a unique alias set for the GOT. */
6967
6968 static HOST_WIDE_INT
6969 ix86_GOT_alias_set (void)
6970 {
6971 static HOST_WIDE_INT set = -1;
6972 if (set == -1)
6973 set = new_alias_set ();
6974 return set;
6975 }
6976
6977 /* Return a legitimate reference for ORIG (an address) using the
6978 register REG. If REG is 0, a new pseudo is generated.
6979
6980 There are two types of references that must be handled:
6981
6982 1. Global data references must load the address from the GOT, via
6983 the PIC reg. An insn is emitted to do this load, and the reg is
6984 returned.
6985
6986 2. Static data references, constant pool addresses, and code labels
6987 compute the address as an offset from the GOT, whose base is in
6988 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6989 differentiate them from global data objects. The returned
6990 address is the PIC reg + an unspec constant.
6991
6992 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6993 reg also appears in the address. */
6994
6995 static rtx
6996 legitimize_pic_address (rtx orig, rtx reg)
6997 {
6998 rtx addr = orig;
6999 rtx new = orig;
7000 rtx base;
7001
7002 #if TARGET_MACHO
7003 if (TARGET_MACHO && !TARGET_64BIT)
7004 {
7005 if (reg == 0)
7006 reg = gen_reg_rtx (Pmode);
7007 /* Use the generic Mach-O PIC machinery. */
7008 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7009 }
7010 #endif
7011
7012 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7013 new = addr;
7014 else if (TARGET_64BIT
7015 && ix86_cmodel != CM_SMALL_PIC
7016 && local_symbolic_operand (addr, Pmode))
7017 {
7018 rtx tmpreg;
7019 /* This symbol may be referenced via a displacement from the PIC
7020 base address (@GOTOFF). */
7021
7022 if (reload_in_progress)
7023 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7024 if (GET_CODE (addr) == CONST)
7025 addr = XEXP (addr, 0);
7026 if (GET_CODE (addr) == PLUS)
7027 {
7028 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7029 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7030 }
7031 else
7032 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7033 new = gen_rtx_CONST (Pmode, new);
7034 if (!reg)
7035 tmpreg = gen_reg_rtx (Pmode);
7036 else
7037 tmpreg = reg;
7038 emit_move_insn (tmpreg, new);
7039
7040 if (reg != 0)
7041 {
7042 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7043 tmpreg, 1, OPTAB_DIRECT);
7044 new = reg;
7045 }
7046 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7047 }
7048 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
7049 {
7050 /* This symbol may be referenced via a displacement from the PIC
7051 base address (@GOTOFF). */
7052
7053 if (reload_in_progress)
7054 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7055 if (GET_CODE (addr) == CONST)
7056 addr = XEXP (addr, 0);
7057 if (GET_CODE (addr) == PLUS)
7058 {
7059 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7060 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7061 }
7062 else
7063 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7064 new = gen_rtx_CONST (Pmode, new);
7065 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7066
7067 if (reg != 0)
7068 {
7069 emit_move_insn (reg, new);
7070 new = reg;
7071 }
7072 }
7073 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7074 {
7075 if (TARGET_64BIT)
7076 {
7077 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7078 new = gen_rtx_CONST (Pmode, new);
7079 new = gen_const_mem (Pmode, new);
7080 set_mem_alias_set (new, ix86_GOT_alias_set ());
7081
7082 if (reg == 0)
7083 reg = gen_reg_rtx (Pmode);
7084 /* Use directly gen_movsi, otherwise the address is loaded
7085 into register for CSE. We don't want to CSE this addresses,
7086 instead we CSE addresses from the GOT table, so skip this. */
7087 emit_insn (gen_movsi (reg, new));
7088 new = reg;
7089 }
7090 else
7091 {
7092 /* This symbol must be referenced via a load from the
7093 Global Offset Table (@GOT). */
7094
7095 if (reload_in_progress)
7096 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7097 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7098 new = gen_rtx_CONST (Pmode, new);
7099 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7100 new = gen_const_mem (Pmode, new);
7101 set_mem_alias_set (new, ix86_GOT_alias_set ());
7102
7103 if (reg == 0)
7104 reg = gen_reg_rtx (Pmode);
7105 emit_move_insn (reg, new);
7106 new = reg;
7107 }
7108 }
7109 else
7110 {
7111 if (CONST_INT_P (addr)
7112 && !x86_64_immediate_operand (addr, VOIDmode))
7113 {
7114 if (reg)
7115 {
7116 emit_move_insn (reg, addr);
7117 new = reg;
7118 }
7119 else
7120 new = force_reg (Pmode, addr);
7121 }
7122 else if (GET_CODE (addr) == CONST)
7123 {
7124 addr = XEXP (addr, 0);
7125
7126 /* We must match stuff we generate before. Assume the only
7127 unspecs that can get here are ours. Not that we could do
7128 anything with them anyway.... */
7129 if (GET_CODE (addr) == UNSPEC
7130 || (GET_CODE (addr) == PLUS
7131 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7132 return orig;
7133 gcc_assert (GET_CODE (addr) == PLUS);
7134 }
7135 if (GET_CODE (addr) == PLUS)
7136 {
7137 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7138
7139 /* Check first to see if this is a constant offset from a @GOTOFF
7140 symbol reference. */
7141 if (local_symbolic_operand (op0, Pmode)
7142 && CONST_INT_P (op1))
7143 {
7144 if (!TARGET_64BIT)
7145 {
7146 if (reload_in_progress)
7147 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7148 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7149 UNSPEC_GOTOFF);
7150 new = gen_rtx_PLUS (Pmode, new, op1);
7151 new = gen_rtx_CONST (Pmode, new);
7152 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7153
7154 if (reg != 0)
7155 {
7156 emit_move_insn (reg, new);
7157 new = reg;
7158 }
7159 }
7160 else
7161 {
7162 if (INTVAL (op1) < -16*1024*1024
7163 || INTVAL (op1) >= 16*1024*1024)
7164 {
7165 if (!x86_64_immediate_operand (op1, Pmode))
7166 op1 = force_reg (Pmode, op1);
7167 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7168 }
7169 }
7170 }
7171 else
7172 {
7173 base = legitimize_pic_address (XEXP (addr, 0), reg);
7174 new = legitimize_pic_address (XEXP (addr, 1),
7175 base == reg ? NULL_RTX : reg);
7176
7177 if (CONST_INT_P (new))
7178 new = plus_constant (base, INTVAL (new));
7179 else
7180 {
7181 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7182 {
7183 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7184 new = XEXP (new, 1);
7185 }
7186 new = gen_rtx_PLUS (Pmode, base, new);
7187 }
7188 }
7189 }
7190 }
7191 return new;
7192 }
7193 \f
7194 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7195
7196 static rtx
7197 get_thread_pointer (int to_reg)
7198 {
7199 rtx tp, reg, insn;
7200
7201 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7202 if (!to_reg)
7203 return tp;
7204
7205 reg = gen_reg_rtx (Pmode);
7206 insn = gen_rtx_SET (VOIDmode, reg, tp);
7207 insn = emit_insn (insn);
7208
7209 return reg;
7210 }
7211
7212 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7213 false if we expect this to be used for a memory address and true if
7214 we expect to load the address into a register. */
7215
7216 static rtx
7217 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7218 {
7219 rtx dest, base, off, pic, tp;
7220 int type;
7221
7222 switch (model)
7223 {
7224 case TLS_MODEL_GLOBAL_DYNAMIC:
7225 dest = gen_reg_rtx (Pmode);
7226 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7227
7228 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7229 {
7230 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7231
7232 start_sequence ();
7233 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7234 insns = get_insns ();
7235 end_sequence ();
7236
7237 emit_libcall_block (insns, dest, rax, x);
7238 }
7239 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7240 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7241 else
7242 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7243
7244 if (TARGET_GNU2_TLS)
7245 {
7246 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7247
7248 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7249 }
7250 break;
7251
7252 case TLS_MODEL_LOCAL_DYNAMIC:
7253 base = gen_reg_rtx (Pmode);
7254 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7255
7256 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7257 {
7258 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7259
7260 start_sequence ();
7261 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7262 insns = get_insns ();
7263 end_sequence ();
7264
7265 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7266 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7267 emit_libcall_block (insns, base, rax, note);
7268 }
7269 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7270 emit_insn (gen_tls_local_dynamic_base_64 (base));
7271 else
7272 emit_insn (gen_tls_local_dynamic_base_32 (base));
7273
7274 if (TARGET_GNU2_TLS)
7275 {
7276 rtx x = ix86_tls_module_base ();
7277
7278 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7279 gen_rtx_MINUS (Pmode, x, tp));
7280 }
7281
7282 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7283 off = gen_rtx_CONST (Pmode, off);
7284
7285 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7286
7287 if (TARGET_GNU2_TLS)
7288 {
7289 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7290
7291 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7292 }
7293
7294 break;
7295
7296 case TLS_MODEL_INITIAL_EXEC:
7297 if (TARGET_64BIT)
7298 {
7299 pic = NULL;
7300 type = UNSPEC_GOTNTPOFF;
7301 }
7302 else if (flag_pic)
7303 {
7304 if (reload_in_progress)
7305 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7306 pic = pic_offset_table_rtx;
7307 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7308 }
7309 else if (!TARGET_ANY_GNU_TLS)
7310 {
7311 pic = gen_reg_rtx (Pmode);
7312 emit_insn (gen_set_got (pic));
7313 type = UNSPEC_GOTTPOFF;
7314 }
7315 else
7316 {
7317 pic = NULL;
7318 type = UNSPEC_INDNTPOFF;
7319 }
7320
7321 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7322 off = gen_rtx_CONST (Pmode, off);
7323 if (pic)
7324 off = gen_rtx_PLUS (Pmode, pic, off);
7325 off = gen_const_mem (Pmode, off);
7326 set_mem_alias_set (off, ix86_GOT_alias_set ());
7327
7328 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7329 {
7330 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7331 off = force_reg (Pmode, off);
7332 return gen_rtx_PLUS (Pmode, base, off);
7333 }
7334 else
7335 {
7336 base = get_thread_pointer (true);
7337 dest = gen_reg_rtx (Pmode);
7338 emit_insn (gen_subsi3 (dest, base, off));
7339 }
7340 break;
7341
7342 case TLS_MODEL_LOCAL_EXEC:
7343 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7344 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7345 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7346 off = gen_rtx_CONST (Pmode, off);
7347
7348 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7349 {
7350 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7351 return gen_rtx_PLUS (Pmode, base, off);
7352 }
7353 else
7354 {
7355 base = get_thread_pointer (true);
7356 dest = gen_reg_rtx (Pmode);
7357 emit_insn (gen_subsi3 (dest, base, off));
7358 }
7359 break;
7360
7361 default:
7362 gcc_unreachable ();
7363 }
7364
7365 return dest;
7366 }
7367
7368 /* Try machine-dependent ways of modifying an illegitimate address
7369 to be legitimate. If we find one, return the new, valid address.
7370 This macro is used in only one place: `memory_address' in explow.c.
7371
7372 OLDX is the address as it was before break_out_memory_refs was called.
7373 In some cases it is useful to look at this to decide what needs to be done.
7374
7375 MODE and WIN are passed so that this macro can use
7376 GO_IF_LEGITIMATE_ADDRESS.
7377
7378 It is always safe for this macro to do nothing. It exists to recognize
7379 opportunities to optimize the output.
7380
7381 For the 80386, we handle X+REG by loading X into a register R and
7382 using R+REG. R will go in a general reg and indexing will be used.
7383 However, if REG is a broken-out memory address or multiplication,
7384 nothing needs to be done because REG can certainly go in a general reg.
7385
7386 When -fpic is used, special handling is needed for symbolic references.
7387 See comments by legitimize_pic_address in i386.c for details. */
7388
7389 rtx
7390 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7391 {
7392 int changed = 0;
7393 unsigned log;
7394
7395 if (TARGET_DEBUG_ADDR)
7396 {
7397 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7398 GET_MODE_NAME (mode));
7399 debug_rtx (x);
7400 }
7401
7402 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7403 if (log)
7404 return legitimize_tls_address (x, log, false);
7405 if (GET_CODE (x) == CONST
7406 && GET_CODE (XEXP (x, 0)) == PLUS
7407 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7408 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7409 {
7410 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7411 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7412 }
7413
7414 if (flag_pic && SYMBOLIC_CONST (x))
7415 return legitimize_pic_address (x, 0);
7416
7417 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7418 if (GET_CODE (x) == ASHIFT
7419 && CONST_INT_P (XEXP (x, 1))
7420 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7421 {
7422 changed = 1;
7423 log = INTVAL (XEXP (x, 1));
7424 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7425 GEN_INT (1 << log));
7426 }
7427
7428 if (GET_CODE (x) == PLUS)
7429 {
7430 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7431
7432 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7433 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7434 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7435 {
7436 changed = 1;
7437 log = INTVAL (XEXP (XEXP (x, 0), 1));
7438 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7439 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7440 GEN_INT (1 << log));
7441 }
7442
7443 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7444 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7445 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7446 {
7447 changed = 1;
7448 log = INTVAL (XEXP (XEXP (x, 1), 1));
7449 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7450 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7451 GEN_INT (1 << log));
7452 }
7453
7454 /* Put multiply first if it isn't already. */
7455 if (GET_CODE (XEXP (x, 1)) == MULT)
7456 {
7457 rtx tmp = XEXP (x, 0);
7458 XEXP (x, 0) = XEXP (x, 1);
7459 XEXP (x, 1) = tmp;
7460 changed = 1;
7461 }
7462
7463 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7464 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7465 created by virtual register instantiation, register elimination, and
7466 similar optimizations. */
7467 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7468 {
7469 changed = 1;
7470 x = gen_rtx_PLUS (Pmode,
7471 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7472 XEXP (XEXP (x, 1), 0)),
7473 XEXP (XEXP (x, 1), 1));
7474 }
7475
7476 /* Canonicalize
7477 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7478 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7479 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7480 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7481 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7482 && CONSTANT_P (XEXP (x, 1)))
7483 {
7484 rtx constant;
7485 rtx other = NULL_RTX;
7486
7487 if (CONST_INT_P (XEXP (x, 1)))
7488 {
7489 constant = XEXP (x, 1);
7490 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7491 }
7492 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7493 {
7494 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7495 other = XEXP (x, 1);
7496 }
7497 else
7498 constant = 0;
7499
7500 if (constant)
7501 {
7502 changed = 1;
7503 x = gen_rtx_PLUS (Pmode,
7504 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7505 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7506 plus_constant (other, INTVAL (constant)));
7507 }
7508 }
7509
7510 if (changed && legitimate_address_p (mode, x, FALSE))
7511 return x;
7512
7513 if (GET_CODE (XEXP (x, 0)) == MULT)
7514 {
7515 changed = 1;
7516 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7517 }
7518
7519 if (GET_CODE (XEXP (x, 1)) == MULT)
7520 {
7521 changed = 1;
7522 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7523 }
7524
7525 if (changed
7526 && REG_P (XEXP (x, 1))
7527 && REG_P (XEXP (x, 0)))
7528 return x;
7529
7530 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7531 {
7532 changed = 1;
7533 x = legitimize_pic_address (x, 0);
7534 }
7535
7536 if (changed && legitimate_address_p (mode, x, FALSE))
7537 return x;
7538
7539 if (REG_P (XEXP (x, 0)))
7540 {
7541 rtx temp = gen_reg_rtx (Pmode);
7542 rtx val = force_operand (XEXP (x, 1), temp);
7543 if (val != temp)
7544 emit_move_insn (temp, val);
7545
7546 XEXP (x, 1) = temp;
7547 return x;
7548 }
7549
7550 else if (REG_P (XEXP (x, 1)))
7551 {
7552 rtx temp = gen_reg_rtx (Pmode);
7553 rtx val = force_operand (XEXP (x, 0), temp);
7554 if (val != temp)
7555 emit_move_insn (temp, val);
7556
7557 XEXP (x, 0) = temp;
7558 return x;
7559 }
7560 }
7561
7562 return x;
7563 }
7564 \f
7565 /* Print an integer constant expression in assembler syntax. Addition
7566 and subtraction are the only arithmetic that may appear in these
7567 expressions. FILE is the stdio stream to write to, X is the rtx, and
7568 CODE is the operand print code from the output string. */
7569
7570 static void
7571 output_pic_addr_const (FILE *file, rtx x, int code)
7572 {
7573 char buf[256];
7574
7575 switch (GET_CODE (x))
7576 {
7577 case PC:
7578 gcc_assert (flag_pic);
7579 putc ('.', file);
7580 break;
7581
7582 case SYMBOL_REF:
7583 output_addr_const (file, x);
7584 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7585 fputs ("@PLT", file);
7586 break;
7587
7588 case LABEL_REF:
7589 x = XEXP (x, 0);
7590 /* FALLTHRU */
7591 case CODE_LABEL:
7592 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7593 assemble_name (asm_out_file, buf);
7594 break;
7595
7596 case CONST_INT:
7597 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7598 break;
7599
7600 case CONST:
7601 /* This used to output parentheses around the expression,
7602 but that does not work on the 386 (either ATT or BSD assembler). */
7603 output_pic_addr_const (file, XEXP (x, 0), code);
7604 break;
7605
7606 case CONST_DOUBLE:
7607 if (GET_MODE (x) == VOIDmode)
7608 {
7609 /* We can use %d if the number is <32 bits and positive. */
7610 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7611 fprintf (file, "0x%lx%08lx",
7612 (unsigned long) CONST_DOUBLE_HIGH (x),
7613 (unsigned long) CONST_DOUBLE_LOW (x));
7614 else
7615 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7616 }
7617 else
7618 /* We can't handle floating point constants;
7619 PRINT_OPERAND must handle them. */
7620 output_operand_lossage ("floating constant misused");
7621 break;
7622
7623 case PLUS:
7624 /* Some assemblers need integer constants to appear first. */
7625 if (CONST_INT_P (XEXP (x, 0)))
7626 {
7627 output_pic_addr_const (file, XEXP (x, 0), code);
7628 putc ('+', file);
7629 output_pic_addr_const (file, XEXP (x, 1), code);
7630 }
7631 else
7632 {
7633 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7634 output_pic_addr_const (file, XEXP (x, 1), code);
7635 putc ('+', file);
7636 output_pic_addr_const (file, XEXP (x, 0), code);
7637 }
7638 break;
7639
7640 case MINUS:
7641 if (!TARGET_MACHO)
7642 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7643 output_pic_addr_const (file, XEXP (x, 0), code);
7644 putc ('-', file);
7645 output_pic_addr_const (file, XEXP (x, 1), code);
7646 if (!TARGET_MACHO)
7647 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7648 break;
7649
7650 case UNSPEC:
7651 gcc_assert (XVECLEN (x, 0) == 1);
7652 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7653 switch (XINT (x, 1))
7654 {
7655 case UNSPEC_GOT:
7656 fputs ("@GOT", file);
7657 break;
7658 case UNSPEC_GOTOFF:
7659 fputs ("@GOTOFF", file);
7660 break;
7661 case UNSPEC_GOTPCREL:
7662 fputs ("@GOTPCREL(%rip)", file);
7663 break;
7664 case UNSPEC_GOTTPOFF:
7665 /* FIXME: This might be @TPOFF in Sun ld too. */
7666 fputs ("@GOTTPOFF", file);
7667 break;
7668 case UNSPEC_TPOFF:
7669 fputs ("@TPOFF", file);
7670 break;
7671 case UNSPEC_NTPOFF:
7672 if (TARGET_64BIT)
7673 fputs ("@TPOFF", file);
7674 else
7675 fputs ("@NTPOFF", file);
7676 break;
7677 case UNSPEC_DTPOFF:
7678 fputs ("@DTPOFF", file);
7679 break;
7680 case UNSPEC_GOTNTPOFF:
7681 if (TARGET_64BIT)
7682 fputs ("@GOTTPOFF(%rip)", file);
7683 else
7684 fputs ("@GOTNTPOFF", file);
7685 break;
7686 case UNSPEC_INDNTPOFF:
7687 fputs ("@INDNTPOFF", file);
7688 break;
7689 default:
7690 output_operand_lossage ("invalid UNSPEC as operand");
7691 break;
7692 }
7693 break;
7694
7695 default:
7696 output_operand_lossage ("invalid expression as operand");
7697 }
7698 }
7699
7700 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7701 We need to emit DTP-relative relocations. */
7702
7703 static void
7704 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7705 {
7706 fputs (ASM_LONG, file);
7707 output_addr_const (file, x);
7708 fputs ("@DTPOFF", file);
7709 switch (size)
7710 {
7711 case 4:
7712 break;
7713 case 8:
7714 fputs (", 0", file);
7715 break;
7716 default:
7717 gcc_unreachable ();
7718 }
7719 }
7720
7721 /* In the name of slightly smaller debug output, and to cater to
7722 general assembler lossage, recognize PIC+GOTOFF and turn it back
7723 into a direct symbol reference.
7724
7725 On Darwin, this is necessary to avoid a crash, because Darwin
7726 has a different PIC label for each routine but the DWARF debugging
7727 information is not associated with any particular routine, so it's
7728 necessary to remove references to the PIC label from RTL stored by
7729 the DWARF output code. */
7730
7731 static rtx
7732 ix86_delegitimize_address (rtx orig_x)
7733 {
7734 rtx x = orig_x;
7735 /* reg_addend is NULL or a multiple of some register. */
7736 rtx reg_addend = NULL_RTX;
7737 /* const_addend is NULL or a const_int. */
7738 rtx const_addend = NULL_RTX;
7739 /* This is the result, or NULL. */
7740 rtx result = NULL_RTX;
7741
7742 if (MEM_P (x))
7743 x = XEXP (x, 0);
7744
7745 if (TARGET_64BIT)
7746 {
7747 if (GET_CODE (x) != CONST
7748 || GET_CODE (XEXP (x, 0)) != UNSPEC
7749 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7750 || !MEM_P (orig_x))
7751 return orig_x;
7752 return XVECEXP (XEXP (x, 0), 0, 0);
7753 }
7754
7755 if (GET_CODE (x) != PLUS
7756 || GET_CODE (XEXP (x, 1)) != CONST)
7757 return orig_x;
7758
7759 if (REG_P (XEXP (x, 0))
7760 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7761 /* %ebx + GOT/GOTOFF */
7762 ;
7763 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7764 {
7765 /* %ebx + %reg * scale + GOT/GOTOFF */
7766 reg_addend = XEXP (x, 0);
7767 if (REG_P (XEXP (reg_addend, 0))
7768 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7769 reg_addend = XEXP (reg_addend, 1);
7770 else if (REG_P (XEXP (reg_addend, 1))
7771 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7772 reg_addend = XEXP (reg_addend, 0);
7773 else
7774 return orig_x;
7775 if (!REG_P (reg_addend)
7776 && GET_CODE (reg_addend) != MULT
7777 && GET_CODE (reg_addend) != ASHIFT)
7778 return orig_x;
7779 }
7780 else
7781 return orig_x;
7782
7783 x = XEXP (XEXP (x, 1), 0);
7784 if (GET_CODE (x) == PLUS
7785 && CONST_INT_P (XEXP (x, 1)))
7786 {
7787 const_addend = XEXP (x, 1);
7788 x = XEXP (x, 0);
7789 }
7790
7791 if (GET_CODE (x) == UNSPEC
7792 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7793 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7794 result = XVECEXP (x, 0, 0);
7795
7796 if (TARGET_MACHO && darwin_local_data_pic (x)
7797 && !MEM_P (orig_x))
7798 result = XEXP (x, 0);
7799
7800 if (! result)
7801 return orig_x;
7802
7803 if (const_addend)
7804 result = gen_rtx_PLUS (Pmode, result, const_addend);
7805 if (reg_addend)
7806 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7807 return result;
7808 }
7809 \f
7810 static void
7811 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7812 int fp, FILE *file)
7813 {
7814 const char *suffix;
7815
7816 if (mode == CCFPmode || mode == CCFPUmode)
7817 {
7818 enum rtx_code second_code, bypass_code;
7819 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7820 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7821 code = ix86_fp_compare_code_to_integer (code);
7822 mode = CCmode;
7823 }
7824 if (reverse)
7825 code = reverse_condition (code);
7826
7827 switch (code)
7828 {
7829 case EQ:
7830 suffix = "e";
7831 break;
7832 case NE:
7833 suffix = "ne";
7834 break;
7835 case GT:
7836 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7837 suffix = "g";
7838 break;
7839 case GTU:
7840 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7841 Those same assemblers have the same but opposite lossage on cmov. */
7842 gcc_assert (mode == CCmode);
7843 suffix = fp ? "nbe" : "a";
7844 break;
7845 case LT:
7846 switch (mode)
7847 {
7848 case CCNOmode:
7849 case CCGOCmode:
7850 suffix = "s";
7851 break;
7852
7853 case CCmode:
7854 case CCGCmode:
7855 suffix = "l";
7856 break;
7857
7858 default:
7859 gcc_unreachable ();
7860 }
7861 break;
7862 case LTU:
7863 gcc_assert (mode == CCmode);
7864 suffix = "b";
7865 break;
7866 case GE:
7867 switch (mode)
7868 {
7869 case CCNOmode:
7870 case CCGOCmode:
7871 suffix = "ns";
7872 break;
7873
7874 case CCmode:
7875 case CCGCmode:
7876 suffix = "ge";
7877 break;
7878
7879 default:
7880 gcc_unreachable ();
7881 }
7882 break;
7883 case GEU:
7884 /* ??? As above. */
7885 gcc_assert (mode == CCmode);
7886 suffix = fp ? "nb" : "ae";
7887 break;
7888 case LE:
7889 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7890 suffix = "le";
7891 break;
7892 case LEU:
7893 gcc_assert (mode == CCmode);
7894 suffix = "be";
7895 break;
7896 case UNORDERED:
7897 suffix = fp ? "u" : "p";
7898 break;
7899 case ORDERED:
7900 suffix = fp ? "nu" : "np";
7901 break;
7902 default:
7903 gcc_unreachable ();
7904 }
7905 fputs (suffix, file);
7906 }
7907
7908 /* Print the name of register X to FILE based on its machine mode and number.
7909 If CODE is 'w', pretend the mode is HImode.
7910 If CODE is 'b', pretend the mode is QImode.
7911 If CODE is 'k', pretend the mode is SImode.
7912 If CODE is 'q', pretend the mode is DImode.
7913 If CODE is 'h', pretend the reg is the 'high' byte register.
7914 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7915
7916 void
7917 print_reg (rtx x, int code, FILE *file)
7918 {
7919 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7920 && REGNO (x) != FRAME_POINTER_REGNUM
7921 && REGNO (x) != FLAGS_REG
7922 && REGNO (x) != FPSR_REG
7923 && REGNO (x) != FPCR_REG);
7924
7925 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7926 putc ('%', file);
7927
7928 if (code == 'w' || MMX_REG_P (x))
7929 code = 2;
7930 else if (code == 'b')
7931 code = 1;
7932 else if (code == 'k')
7933 code = 4;
7934 else if (code == 'q')
7935 code = 8;
7936 else if (code == 'y')
7937 code = 3;
7938 else if (code == 'h')
7939 code = 0;
7940 else
7941 code = GET_MODE_SIZE (GET_MODE (x));
7942
7943 /* Irritatingly, AMD extended registers use different naming convention
7944 from the normal registers. */
7945 if (REX_INT_REG_P (x))
7946 {
7947 gcc_assert (TARGET_64BIT);
7948 switch (code)
7949 {
7950 case 0:
7951 error ("extended registers have no high halves");
7952 break;
7953 case 1:
7954 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7955 break;
7956 case 2:
7957 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7958 break;
7959 case 4:
7960 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7961 break;
7962 case 8:
7963 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7964 break;
7965 default:
7966 error ("unsupported operand size for extended register");
7967 break;
7968 }
7969 return;
7970 }
7971 switch (code)
7972 {
7973 case 3:
7974 if (STACK_TOP_P (x))
7975 {
7976 fputs ("st(0)", file);
7977 break;
7978 }
7979 /* FALLTHRU */
7980 case 8:
7981 case 4:
7982 case 12:
7983 if (! ANY_FP_REG_P (x))
7984 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7985 /* FALLTHRU */
7986 case 16:
7987 case 2:
7988 normal:
7989 fputs (hi_reg_name[REGNO (x)], file);
7990 break;
7991 case 1:
7992 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7993 goto normal;
7994 fputs (qi_reg_name[REGNO (x)], file);
7995 break;
7996 case 0:
7997 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7998 goto normal;
7999 fputs (qi_high_reg_name[REGNO (x)], file);
8000 break;
8001 default:
8002 gcc_unreachable ();
8003 }
8004 }
8005
8006 /* Locate some local-dynamic symbol still in use by this function
8007 so that we can print its name in some tls_local_dynamic_base
8008 pattern. */
8009
8010 static const char *
8011 get_some_local_dynamic_name (void)
8012 {
8013 rtx insn;
8014
8015 if (cfun->machine->some_ld_name)
8016 return cfun->machine->some_ld_name;
8017
8018 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8019 if (INSN_P (insn)
8020 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8021 return cfun->machine->some_ld_name;
8022
8023 gcc_unreachable ();
8024 }
8025
8026 static int
8027 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8028 {
8029 rtx x = *px;
8030
8031 if (GET_CODE (x) == SYMBOL_REF
8032 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8033 {
8034 cfun->machine->some_ld_name = XSTR (x, 0);
8035 return 1;
8036 }
8037
8038 return 0;
8039 }
8040
8041 /* Meaning of CODE:
8042 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8043 C -- print opcode suffix for set/cmov insn.
8044 c -- like C, but print reversed condition
8045 F,f -- likewise, but for floating-point.
8046 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8047 otherwise nothing
8048 R -- print the prefix for register names.
8049 z -- print the opcode suffix for the size of the current operand.
8050 * -- print a star (in certain assembler syntax)
8051 A -- print an absolute memory reference.
8052 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8053 s -- print a shift double count, followed by the assemblers argument
8054 delimiter.
8055 b -- print the QImode name of the register for the indicated operand.
8056 %b0 would print %al if operands[0] is reg 0.
8057 w -- likewise, print the HImode name of the register.
8058 k -- likewise, print the SImode name of the register.
8059 q -- likewise, print the DImode name of the register.
8060 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8061 y -- print "st(0)" instead of "st" as a register.
8062 D -- print condition for SSE cmp instruction.
8063 P -- if PIC, print an @PLT suffix.
8064 X -- don't print any sort of PIC '@' suffix for a symbol.
8065 & -- print some in-use local-dynamic symbol name.
8066 H -- print a memory address offset by 8; used for sse high-parts
8067 */
8068
8069 void
8070 print_operand (FILE *file, rtx x, int code)
8071 {
8072 if (code)
8073 {
8074 switch (code)
8075 {
8076 case '*':
8077 if (ASSEMBLER_DIALECT == ASM_ATT)
8078 putc ('*', file);
8079 return;
8080
8081 case '&':
8082 assemble_name (file, get_some_local_dynamic_name ());
8083 return;
8084
8085 case 'A':
8086 switch (ASSEMBLER_DIALECT)
8087 {
8088 case ASM_ATT:
8089 putc ('*', file);
8090 break;
8091
8092 case ASM_INTEL:
8093 /* Intel syntax. For absolute addresses, registers should not
8094 be surrounded by braces. */
8095 if (!REG_P (x))
8096 {
8097 putc ('[', file);
8098 PRINT_OPERAND (file, x, 0);
8099 putc (']', file);
8100 return;
8101 }
8102 break;
8103
8104 default:
8105 gcc_unreachable ();
8106 }
8107
8108 PRINT_OPERAND (file, x, 0);
8109 return;
8110
8111
8112 case 'L':
8113 if (ASSEMBLER_DIALECT == ASM_ATT)
8114 putc ('l', file);
8115 return;
8116
8117 case 'W':
8118 if (ASSEMBLER_DIALECT == ASM_ATT)
8119 putc ('w', file);
8120 return;
8121
8122 case 'B':
8123 if (ASSEMBLER_DIALECT == ASM_ATT)
8124 putc ('b', file);
8125 return;
8126
8127 case 'Q':
8128 if (ASSEMBLER_DIALECT == ASM_ATT)
8129 putc ('l', file);
8130 return;
8131
8132 case 'S':
8133 if (ASSEMBLER_DIALECT == ASM_ATT)
8134 putc ('s', file);
8135 return;
8136
8137 case 'T':
8138 if (ASSEMBLER_DIALECT == ASM_ATT)
8139 putc ('t', file);
8140 return;
8141
8142 case 'z':
8143 /* 387 opcodes don't get size suffixes if the operands are
8144 registers. */
8145 if (STACK_REG_P (x))
8146 return;
8147
8148 /* Likewise if using Intel opcodes. */
8149 if (ASSEMBLER_DIALECT == ASM_INTEL)
8150 return;
8151
8152 /* This is the size of op from size of operand. */
8153 switch (GET_MODE_SIZE (GET_MODE (x)))
8154 {
8155 case 1:
8156 putc ('b', file);
8157 return;
8158
8159 case 2:
8160 #ifdef HAVE_GAS_FILDS_FISTS
8161 putc ('s', file);
8162 #endif
8163 return;
8164
8165 case 4:
8166 if (GET_MODE (x) == SFmode)
8167 {
8168 putc ('s', file);
8169 return;
8170 }
8171 else
8172 putc ('l', file);
8173 return;
8174
8175 case 12:
8176 case 16:
8177 putc ('t', file);
8178 return;
8179
8180 case 8:
8181 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8182 {
8183 #ifdef GAS_MNEMONICS
8184 putc ('q', file);
8185 #else
8186 putc ('l', file);
8187 putc ('l', file);
8188 #endif
8189 }
8190 else
8191 putc ('l', file);
8192 return;
8193
8194 default:
8195 gcc_unreachable ();
8196 }
8197
8198 case 'b':
8199 case 'w':
8200 case 'k':
8201 case 'q':
8202 case 'h':
8203 case 'y':
8204 case 'X':
8205 case 'P':
8206 break;
8207
8208 case 's':
8209 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8210 {
8211 PRINT_OPERAND (file, x, 0);
8212 putc (',', file);
8213 }
8214 return;
8215
8216 case 'D':
8217 /* Little bit of braindamage here. The SSE compare instructions
8218 does use completely different names for the comparisons that the
8219 fp conditional moves. */
8220 switch (GET_CODE (x))
8221 {
8222 case EQ:
8223 case UNEQ:
8224 fputs ("eq", file);
8225 break;
8226 case LT:
8227 case UNLT:
8228 fputs ("lt", file);
8229 break;
8230 case LE:
8231 case UNLE:
8232 fputs ("le", file);
8233 break;
8234 case UNORDERED:
8235 fputs ("unord", file);
8236 break;
8237 case NE:
8238 case LTGT:
8239 fputs ("neq", file);
8240 break;
8241 case UNGE:
8242 case GE:
8243 fputs ("nlt", file);
8244 break;
8245 case UNGT:
8246 case GT:
8247 fputs ("nle", file);
8248 break;
8249 case ORDERED:
8250 fputs ("ord", file);
8251 break;
8252 default:
8253 gcc_unreachable ();
8254 }
8255 return;
8256 case 'O':
8257 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8258 if (ASSEMBLER_DIALECT == ASM_ATT)
8259 {
8260 switch (GET_MODE (x))
8261 {
8262 case HImode: putc ('w', file); break;
8263 case SImode:
8264 case SFmode: putc ('l', file); break;
8265 case DImode:
8266 case DFmode: putc ('q', file); break;
8267 default: gcc_unreachable ();
8268 }
8269 putc ('.', file);
8270 }
8271 #endif
8272 return;
8273 case 'C':
8274 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8275 return;
8276 case 'F':
8277 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8278 if (ASSEMBLER_DIALECT == ASM_ATT)
8279 putc ('.', file);
8280 #endif
8281 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8282 return;
8283
8284 /* Like above, but reverse condition */
8285 case 'c':
8286 /* Check to see if argument to %c is really a constant
8287 and not a condition code which needs to be reversed. */
8288 if (!COMPARISON_P (x))
8289 {
8290 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8291 return;
8292 }
8293 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8294 return;
8295 case 'f':
8296 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8297 if (ASSEMBLER_DIALECT == ASM_ATT)
8298 putc ('.', file);
8299 #endif
8300 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8301 return;
8302
8303 case 'H':
8304 /* It doesn't actually matter what mode we use here, as we're
8305 only going to use this for printing. */
8306 x = adjust_address_nv (x, DImode, 8);
8307 break;
8308
8309 case '+':
8310 {
8311 rtx x;
8312
8313 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8314 return;
8315
8316 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8317 if (x)
8318 {
8319 int pred_val = INTVAL (XEXP (x, 0));
8320
8321 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8322 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8323 {
8324 int taken = pred_val > REG_BR_PROB_BASE / 2;
8325 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8326
8327 /* Emit hints only in the case default branch prediction
8328 heuristics would fail. */
8329 if (taken != cputaken)
8330 {
8331 /* We use 3e (DS) prefix for taken branches and
8332 2e (CS) prefix for not taken branches. */
8333 if (taken)
8334 fputs ("ds ; ", file);
8335 else
8336 fputs ("cs ; ", file);
8337 }
8338 }
8339 }
8340 return;
8341 }
8342 default:
8343 output_operand_lossage ("invalid operand code '%c'", code);
8344 }
8345 }
8346
8347 if (REG_P (x))
8348 print_reg (x, code, file);
8349
8350 else if (MEM_P (x))
8351 {
8352 /* No `byte ptr' prefix for call instructions. */
8353 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8354 {
8355 const char * size;
8356 switch (GET_MODE_SIZE (GET_MODE (x)))
8357 {
8358 case 1: size = "BYTE"; break;
8359 case 2: size = "WORD"; break;
8360 case 4: size = "DWORD"; break;
8361 case 8: size = "QWORD"; break;
8362 case 12: size = "XWORD"; break;
8363 case 16: size = "XMMWORD"; break;
8364 default:
8365 gcc_unreachable ();
8366 }
8367
8368 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8369 if (code == 'b')
8370 size = "BYTE";
8371 else if (code == 'w')
8372 size = "WORD";
8373 else if (code == 'k')
8374 size = "DWORD";
8375
8376 fputs (size, file);
8377 fputs (" PTR ", file);
8378 }
8379
8380 x = XEXP (x, 0);
8381 /* Avoid (%rip) for call operands. */
8382 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8383 && !CONST_INT_P (x))
8384 output_addr_const (file, x);
8385 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8386 output_operand_lossage ("invalid constraints for operand");
8387 else
8388 output_address (x);
8389 }
8390
8391 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8392 {
8393 REAL_VALUE_TYPE r;
8394 long l;
8395
8396 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8397 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8398
8399 if (ASSEMBLER_DIALECT == ASM_ATT)
8400 putc ('$', file);
8401 fprintf (file, "0x%08lx", l);
8402 }
8403
8404 /* These float cases don't actually occur as immediate operands. */
8405 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8406 {
8407 char dstr[30];
8408
8409 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8410 fprintf (file, "%s", dstr);
8411 }
8412
8413 else if (GET_CODE (x) == CONST_DOUBLE
8414 && GET_MODE (x) == XFmode)
8415 {
8416 char dstr[30];
8417
8418 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8419 fprintf (file, "%s", dstr);
8420 }
8421
8422 else
8423 {
8424 /* We have patterns that allow zero sets of memory, for instance.
8425 In 64-bit mode, we should probably support all 8-byte vectors,
8426 since we can in fact encode that into an immediate. */
8427 if (GET_CODE (x) == CONST_VECTOR)
8428 {
8429 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8430 x = const0_rtx;
8431 }
8432
8433 if (code != 'P')
8434 {
8435 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8436 {
8437 if (ASSEMBLER_DIALECT == ASM_ATT)
8438 putc ('$', file);
8439 }
8440 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8441 || GET_CODE (x) == LABEL_REF)
8442 {
8443 if (ASSEMBLER_DIALECT == ASM_ATT)
8444 putc ('$', file);
8445 else
8446 fputs ("OFFSET FLAT:", file);
8447 }
8448 }
8449 if (CONST_INT_P (x))
8450 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8451 else if (flag_pic)
8452 output_pic_addr_const (file, x, code);
8453 else
8454 output_addr_const (file, x);
8455 }
8456 }
8457 \f
8458 /* Print a memory operand whose address is ADDR. */
8459
8460 void
8461 print_operand_address (FILE *file, rtx addr)
8462 {
8463 struct ix86_address parts;
8464 rtx base, index, disp;
8465 int scale;
8466 int ok = ix86_decompose_address (addr, &parts);
8467
8468 gcc_assert (ok);
8469
8470 base = parts.base;
8471 index = parts.index;
8472 disp = parts.disp;
8473 scale = parts.scale;
8474
8475 switch (parts.seg)
8476 {
8477 case SEG_DEFAULT:
8478 break;
8479 case SEG_FS:
8480 case SEG_GS:
8481 if (USER_LABEL_PREFIX[0] == 0)
8482 putc ('%', file);
8483 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8484 break;
8485 default:
8486 gcc_unreachable ();
8487 }
8488
8489 if (!base && !index)
8490 {
8491 /* Displacement only requires special attention. */
8492
8493 if (CONST_INT_P (disp))
8494 {
8495 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8496 {
8497 if (USER_LABEL_PREFIX[0] == 0)
8498 putc ('%', file);
8499 fputs ("ds:", file);
8500 }
8501 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8502 }
8503 else if (flag_pic)
8504 output_pic_addr_const (file, disp, 0);
8505 else
8506 output_addr_const (file, disp);
8507
8508 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8509 if (TARGET_64BIT)
8510 {
8511 if (GET_CODE (disp) == CONST
8512 && GET_CODE (XEXP (disp, 0)) == PLUS
8513 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8514 disp = XEXP (XEXP (disp, 0), 0);
8515 if (GET_CODE (disp) == LABEL_REF
8516 || (GET_CODE (disp) == SYMBOL_REF
8517 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8518 fputs ("(%rip)", file);
8519 }
8520 }
8521 else
8522 {
8523 if (ASSEMBLER_DIALECT == ASM_ATT)
8524 {
8525 if (disp)
8526 {
8527 if (flag_pic)
8528 output_pic_addr_const (file, disp, 0);
8529 else if (GET_CODE (disp) == LABEL_REF)
8530 output_asm_label (disp);
8531 else
8532 output_addr_const (file, disp);
8533 }
8534
8535 putc ('(', file);
8536 if (base)
8537 print_reg (base, 0, file);
8538 if (index)
8539 {
8540 putc (',', file);
8541 print_reg (index, 0, file);
8542 if (scale != 1)
8543 fprintf (file, ",%d", scale);
8544 }
8545 putc (')', file);
8546 }
8547 else
8548 {
8549 rtx offset = NULL_RTX;
8550
8551 if (disp)
8552 {
8553 /* Pull out the offset of a symbol; print any symbol itself. */
8554 if (GET_CODE (disp) == CONST
8555 && GET_CODE (XEXP (disp, 0)) == PLUS
8556 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8557 {
8558 offset = XEXP (XEXP (disp, 0), 1);
8559 disp = gen_rtx_CONST (VOIDmode,
8560 XEXP (XEXP (disp, 0), 0));
8561 }
8562
8563 if (flag_pic)
8564 output_pic_addr_const (file, disp, 0);
8565 else if (GET_CODE (disp) == LABEL_REF)
8566 output_asm_label (disp);
8567 else if (CONST_INT_P (disp))
8568 offset = disp;
8569 else
8570 output_addr_const (file, disp);
8571 }
8572
8573 putc ('[', file);
8574 if (base)
8575 {
8576 print_reg (base, 0, file);
8577 if (offset)
8578 {
8579 if (INTVAL (offset) >= 0)
8580 putc ('+', file);
8581 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8582 }
8583 }
8584 else if (offset)
8585 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8586 else
8587 putc ('0', file);
8588
8589 if (index)
8590 {
8591 putc ('+', file);
8592 print_reg (index, 0, file);
8593 if (scale != 1)
8594 fprintf (file, "*%d", scale);
8595 }
8596 putc (']', file);
8597 }
8598 }
8599 }
8600
8601 bool
8602 output_addr_const_extra (FILE *file, rtx x)
8603 {
8604 rtx op;
8605
8606 if (GET_CODE (x) != UNSPEC)
8607 return false;
8608
8609 op = XVECEXP (x, 0, 0);
8610 switch (XINT (x, 1))
8611 {
8612 case UNSPEC_GOTTPOFF:
8613 output_addr_const (file, op);
8614 /* FIXME: This might be @TPOFF in Sun ld. */
8615 fputs ("@GOTTPOFF", file);
8616 break;
8617 case UNSPEC_TPOFF:
8618 output_addr_const (file, op);
8619 fputs ("@TPOFF", file);
8620 break;
8621 case UNSPEC_NTPOFF:
8622 output_addr_const (file, op);
8623 if (TARGET_64BIT)
8624 fputs ("@TPOFF", file);
8625 else
8626 fputs ("@NTPOFF", file);
8627 break;
8628 case UNSPEC_DTPOFF:
8629 output_addr_const (file, op);
8630 fputs ("@DTPOFF", file);
8631 break;
8632 case UNSPEC_GOTNTPOFF:
8633 output_addr_const (file, op);
8634 if (TARGET_64BIT)
8635 fputs ("@GOTTPOFF(%rip)", file);
8636 else
8637 fputs ("@GOTNTPOFF", file);
8638 break;
8639 case UNSPEC_INDNTPOFF:
8640 output_addr_const (file, op);
8641 fputs ("@INDNTPOFF", file);
8642 break;
8643
8644 default:
8645 return false;
8646 }
8647
8648 return true;
8649 }
8650 \f
8651 /* Split one or more DImode RTL references into pairs of SImode
8652 references. The RTL can be REG, offsettable MEM, integer constant, or
8653 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8654 split and "num" is its length. lo_half and hi_half are output arrays
8655 that parallel "operands". */
8656
8657 void
8658 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8659 {
8660 while (num--)
8661 {
8662 rtx op = operands[num];
8663
8664 /* simplify_subreg refuse to split volatile memory addresses,
8665 but we still have to handle it. */
8666 if (MEM_P (op))
8667 {
8668 lo_half[num] = adjust_address (op, SImode, 0);
8669 hi_half[num] = adjust_address (op, SImode, 4);
8670 }
8671 else
8672 {
8673 lo_half[num] = simplify_gen_subreg (SImode, op,
8674 GET_MODE (op) == VOIDmode
8675 ? DImode : GET_MODE (op), 0);
8676 hi_half[num] = simplify_gen_subreg (SImode, op,
8677 GET_MODE (op) == VOIDmode
8678 ? DImode : GET_MODE (op), 4);
8679 }
8680 }
8681 }
8682 /* Split one or more TImode RTL references into pairs of DImode
8683 references. The RTL can be REG, offsettable MEM, integer constant, or
8684 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8685 split and "num" is its length. lo_half and hi_half are output arrays
8686 that parallel "operands". */
8687
8688 void
8689 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8690 {
8691 while (num--)
8692 {
8693 rtx op = operands[num];
8694
8695 /* simplify_subreg refuse to split volatile memory addresses, but we
8696 still have to handle it. */
8697 if (MEM_P (op))
8698 {
8699 lo_half[num] = adjust_address (op, DImode, 0);
8700 hi_half[num] = adjust_address (op, DImode, 8);
8701 }
8702 else
8703 {
8704 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8705 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8706 }
8707 }
8708 }
8709 \f
8710 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8711 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8712 is the expression of the binary operation. The output may either be
8713 emitted here, or returned to the caller, like all output_* functions.
8714
8715 There is no guarantee that the operands are the same mode, as they
8716 might be within FLOAT or FLOAT_EXTEND expressions. */
8717
8718 #ifndef SYSV386_COMPAT
8719 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8720 wants to fix the assemblers because that causes incompatibility
8721 with gcc. No-one wants to fix gcc because that causes
8722 incompatibility with assemblers... You can use the option of
8723 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8724 #define SYSV386_COMPAT 1
8725 #endif
8726
8727 const char *
8728 output_387_binary_op (rtx insn, rtx *operands)
8729 {
8730 static char buf[30];
8731 const char *p;
8732 const char *ssep;
8733 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8734
8735 #ifdef ENABLE_CHECKING
8736 /* Even if we do not want to check the inputs, this documents input
8737 constraints. Which helps in understanding the following code. */
8738 if (STACK_REG_P (operands[0])
8739 && ((REG_P (operands[1])
8740 && REGNO (operands[0]) == REGNO (operands[1])
8741 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8742 || (REG_P (operands[2])
8743 && REGNO (operands[0]) == REGNO (operands[2])
8744 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8745 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8746 ; /* ok */
8747 else
8748 gcc_assert (is_sse);
8749 #endif
8750
8751 switch (GET_CODE (operands[3]))
8752 {
8753 case PLUS:
8754 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8755 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8756 p = "fiadd";
8757 else
8758 p = "fadd";
8759 ssep = "add";
8760 break;
8761
8762 case MINUS:
8763 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8764 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8765 p = "fisub";
8766 else
8767 p = "fsub";
8768 ssep = "sub";
8769 break;
8770
8771 case MULT:
8772 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8773 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8774 p = "fimul";
8775 else
8776 p = "fmul";
8777 ssep = "mul";
8778 break;
8779
8780 case DIV:
8781 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8782 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8783 p = "fidiv";
8784 else
8785 p = "fdiv";
8786 ssep = "div";
8787 break;
8788
8789 default:
8790 gcc_unreachable ();
8791 }
8792
8793 if (is_sse)
8794 {
8795 strcpy (buf, ssep);
8796 if (GET_MODE (operands[0]) == SFmode)
8797 strcat (buf, "ss\t{%2, %0|%0, %2}");
8798 else
8799 strcat (buf, "sd\t{%2, %0|%0, %2}");
8800 return buf;
8801 }
8802 strcpy (buf, p);
8803
8804 switch (GET_CODE (operands[3]))
8805 {
8806 case MULT:
8807 case PLUS:
8808 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8809 {
8810 rtx temp = operands[2];
8811 operands[2] = operands[1];
8812 operands[1] = temp;
8813 }
8814
8815 /* know operands[0] == operands[1]. */
8816
8817 if (MEM_P (operands[2]))
8818 {
8819 p = "%z2\t%2";
8820 break;
8821 }
8822
8823 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8824 {
8825 if (STACK_TOP_P (operands[0]))
8826 /* How is it that we are storing to a dead operand[2]?
8827 Well, presumably operands[1] is dead too. We can't
8828 store the result to st(0) as st(0) gets popped on this
8829 instruction. Instead store to operands[2] (which I
8830 think has to be st(1)). st(1) will be popped later.
8831 gcc <= 2.8.1 didn't have this check and generated
8832 assembly code that the Unixware assembler rejected. */
8833 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8834 else
8835 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8836 break;
8837 }
8838
8839 if (STACK_TOP_P (operands[0]))
8840 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8841 else
8842 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8843 break;
8844
8845 case MINUS:
8846 case DIV:
8847 if (MEM_P (operands[1]))
8848 {
8849 p = "r%z1\t%1";
8850 break;
8851 }
8852
8853 if (MEM_P (operands[2]))
8854 {
8855 p = "%z2\t%2";
8856 break;
8857 }
8858
8859 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8860 {
8861 #if SYSV386_COMPAT
8862 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8863 derived assemblers, confusingly reverse the direction of
8864 the operation for fsub{r} and fdiv{r} when the
8865 destination register is not st(0). The Intel assembler
8866 doesn't have this brain damage. Read !SYSV386_COMPAT to
8867 figure out what the hardware really does. */
8868 if (STACK_TOP_P (operands[0]))
8869 p = "{p\t%0, %2|rp\t%2, %0}";
8870 else
8871 p = "{rp\t%2, %0|p\t%0, %2}";
8872 #else
8873 if (STACK_TOP_P (operands[0]))
8874 /* As above for fmul/fadd, we can't store to st(0). */
8875 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8876 else
8877 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8878 #endif
8879 break;
8880 }
8881
8882 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8883 {
8884 #if SYSV386_COMPAT
8885 if (STACK_TOP_P (operands[0]))
8886 p = "{rp\t%0, %1|p\t%1, %0}";
8887 else
8888 p = "{p\t%1, %0|rp\t%0, %1}";
8889 #else
8890 if (STACK_TOP_P (operands[0]))
8891 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8892 else
8893 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8894 #endif
8895 break;
8896 }
8897
8898 if (STACK_TOP_P (operands[0]))
8899 {
8900 if (STACK_TOP_P (operands[1]))
8901 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8902 else
8903 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8904 break;
8905 }
8906 else if (STACK_TOP_P (operands[1]))
8907 {
8908 #if SYSV386_COMPAT
8909 p = "{\t%1, %0|r\t%0, %1}";
8910 #else
8911 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8912 #endif
8913 }
8914 else
8915 {
8916 #if SYSV386_COMPAT
8917 p = "{r\t%2, %0|\t%0, %2}";
8918 #else
8919 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8920 #endif
8921 }
8922 break;
8923
8924 default:
8925 gcc_unreachable ();
8926 }
8927
8928 strcat (buf, p);
8929 return buf;
8930 }
8931
8932 /* Return needed mode for entity in optimize_mode_switching pass. */
8933
8934 int
8935 ix86_mode_needed (int entity, rtx insn)
8936 {
8937 enum attr_i387_cw mode;
8938
8939 /* The mode UNINITIALIZED is used to store control word after a
8940 function call or ASM pattern. The mode ANY specify that function
8941 has no requirements on the control word and make no changes in the
8942 bits we are interested in. */
8943
8944 if (CALL_P (insn)
8945 || (NONJUMP_INSN_P (insn)
8946 && (asm_noperands (PATTERN (insn)) >= 0
8947 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8948 return I387_CW_UNINITIALIZED;
8949
8950 if (recog_memoized (insn) < 0)
8951 return I387_CW_ANY;
8952
8953 mode = get_attr_i387_cw (insn);
8954
8955 switch (entity)
8956 {
8957 case I387_TRUNC:
8958 if (mode == I387_CW_TRUNC)
8959 return mode;
8960 break;
8961
8962 case I387_FLOOR:
8963 if (mode == I387_CW_FLOOR)
8964 return mode;
8965 break;
8966
8967 case I387_CEIL:
8968 if (mode == I387_CW_CEIL)
8969 return mode;
8970 break;
8971
8972 case I387_MASK_PM:
8973 if (mode == I387_CW_MASK_PM)
8974 return mode;
8975 break;
8976
8977 default:
8978 gcc_unreachable ();
8979 }
8980
8981 return I387_CW_ANY;
8982 }
8983
8984 /* Output code to initialize control word copies used by trunc?f?i and
8985 rounding patterns. CURRENT_MODE is set to current control word,
8986 while NEW_MODE is set to new control word. */
8987
8988 void
8989 emit_i387_cw_initialization (int mode)
8990 {
8991 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8992 rtx new_mode;
8993
8994 int slot;
8995
8996 rtx reg = gen_reg_rtx (HImode);
8997
8998 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8999 emit_move_insn (reg, copy_rtx (stored_mode));
9000
9001 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9002 {
9003 switch (mode)
9004 {
9005 case I387_CW_TRUNC:
9006 /* round toward zero (truncate) */
9007 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9008 slot = SLOT_CW_TRUNC;
9009 break;
9010
9011 case I387_CW_FLOOR:
9012 /* round down toward -oo */
9013 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9014 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9015 slot = SLOT_CW_FLOOR;
9016 break;
9017
9018 case I387_CW_CEIL:
9019 /* round up toward +oo */
9020 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9021 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9022 slot = SLOT_CW_CEIL;
9023 break;
9024
9025 case I387_CW_MASK_PM:
9026 /* mask precision exception for nearbyint() */
9027 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9028 slot = SLOT_CW_MASK_PM;
9029 break;
9030
9031 default:
9032 gcc_unreachable ();
9033 }
9034 }
9035 else
9036 {
9037 switch (mode)
9038 {
9039 case I387_CW_TRUNC:
9040 /* round toward zero (truncate) */
9041 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9042 slot = SLOT_CW_TRUNC;
9043 break;
9044
9045 case I387_CW_FLOOR:
9046 /* round down toward -oo */
9047 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9048 slot = SLOT_CW_FLOOR;
9049 break;
9050
9051 case I387_CW_CEIL:
9052 /* round up toward +oo */
9053 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9054 slot = SLOT_CW_CEIL;
9055 break;
9056
9057 case I387_CW_MASK_PM:
9058 /* mask precision exception for nearbyint() */
9059 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9060 slot = SLOT_CW_MASK_PM;
9061 break;
9062
9063 default:
9064 gcc_unreachable ();
9065 }
9066 }
9067
9068 gcc_assert (slot < MAX_386_STACK_LOCALS);
9069
9070 new_mode = assign_386_stack_local (HImode, slot);
9071 emit_move_insn (new_mode, reg);
9072 }
9073
9074 /* Output code for INSN to convert a float to a signed int. OPERANDS
9075 are the insn operands. The output may be [HSD]Imode and the input
9076 operand may be [SDX]Fmode. */
9077
9078 const char *
9079 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9080 {
9081 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9082 int dimode_p = GET_MODE (operands[0]) == DImode;
9083 int round_mode = get_attr_i387_cw (insn);
9084
9085 /* Jump through a hoop or two for DImode, since the hardware has no
9086 non-popping instruction. We used to do this a different way, but
9087 that was somewhat fragile and broke with post-reload splitters. */
9088 if ((dimode_p || fisttp) && !stack_top_dies)
9089 output_asm_insn ("fld\t%y1", operands);
9090
9091 gcc_assert (STACK_TOP_P (operands[1]));
9092 gcc_assert (MEM_P (operands[0]));
9093
9094 if (fisttp)
9095 output_asm_insn ("fisttp%z0\t%0", operands);
9096 else
9097 {
9098 if (round_mode != I387_CW_ANY)
9099 output_asm_insn ("fldcw\t%3", operands);
9100 if (stack_top_dies || dimode_p)
9101 output_asm_insn ("fistp%z0\t%0", operands);
9102 else
9103 output_asm_insn ("fist%z0\t%0", operands);
9104 if (round_mode != I387_CW_ANY)
9105 output_asm_insn ("fldcw\t%2", operands);
9106 }
9107
9108 return "";
9109 }
9110
9111 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9112 have the values zero or one, indicates the ffreep insn's operand
9113 from the OPERANDS array. */
9114
9115 static const char *
9116 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9117 {
9118 if (TARGET_USE_FFREEP)
9119 #if HAVE_AS_IX86_FFREEP
9120 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9121 #else
9122 {
9123 static char retval[] = ".word\t0xc_df";
9124 int regno = REGNO (operands[opno]);
9125
9126 gcc_assert (FP_REGNO_P (regno));
9127
9128 retval[9] = '0' + (regno - FIRST_STACK_REG);
9129 return retval;
9130 }
9131 #endif
9132
9133 return opno ? "fstp\t%y1" : "fstp\t%y0";
9134 }
9135
9136
9137 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9138 should be used. UNORDERED_P is true when fucom should be used. */
9139
9140 const char *
9141 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9142 {
9143 int stack_top_dies;
9144 rtx cmp_op0, cmp_op1;
9145 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9146
9147 if (eflags_p)
9148 {
9149 cmp_op0 = operands[0];
9150 cmp_op1 = operands[1];
9151 }
9152 else
9153 {
9154 cmp_op0 = operands[1];
9155 cmp_op1 = operands[2];
9156 }
9157
9158 if (is_sse)
9159 {
9160 if (GET_MODE (operands[0]) == SFmode)
9161 if (unordered_p)
9162 return "ucomiss\t{%1, %0|%0, %1}";
9163 else
9164 return "comiss\t{%1, %0|%0, %1}";
9165 else
9166 if (unordered_p)
9167 return "ucomisd\t{%1, %0|%0, %1}";
9168 else
9169 return "comisd\t{%1, %0|%0, %1}";
9170 }
9171
9172 gcc_assert (STACK_TOP_P (cmp_op0));
9173
9174 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9175
9176 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9177 {
9178 if (stack_top_dies)
9179 {
9180 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9181 return output_387_ffreep (operands, 1);
9182 }
9183 else
9184 return "ftst\n\tfnstsw\t%0";
9185 }
9186
9187 if (STACK_REG_P (cmp_op1)
9188 && stack_top_dies
9189 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9190 && REGNO (cmp_op1) != FIRST_STACK_REG)
9191 {
9192 /* If both the top of the 387 stack dies, and the other operand
9193 is also a stack register that dies, then this must be a
9194 `fcompp' float compare */
9195
9196 if (eflags_p)
9197 {
9198 /* There is no double popping fcomi variant. Fortunately,
9199 eflags is immune from the fstp's cc clobbering. */
9200 if (unordered_p)
9201 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9202 else
9203 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9204 return output_387_ffreep (operands, 0);
9205 }
9206 else
9207 {
9208 if (unordered_p)
9209 return "fucompp\n\tfnstsw\t%0";
9210 else
9211 return "fcompp\n\tfnstsw\t%0";
9212 }
9213 }
9214 else
9215 {
9216 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9217
9218 static const char * const alt[16] =
9219 {
9220 "fcom%z2\t%y2\n\tfnstsw\t%0",
9221 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9222 "fucom%z2\t%y2\n\tfnstsw\t%0",
9223 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9224
9225 "ficom%z2\t%y2\n\tfnstsw\t%0",
9226 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9227 NULL,
9228 NULL,
9229
9230 "fcomi\t{%y1, %0|%0, %y1}",
9231 "fcomip\t{%y1, %0|%0, %y1}",
9232 "fucomi\t{%y1, %0|%0, %y1}",
9233 "fucomip\t{%y1, %0|%0, %y1}",
9234
9235 NULL,
9236 NULL,
9237 NULL,
9238 NULL
9239 };
9240
9241 int mask;
9242 const char *ret;
9243
9244 mask = eflags_p << 3;
9245 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9246 mask |= unordered_p << 1;
9247 mask |= stack_top_dies;
9248
9249 gcc_assert (mask < 16);
9250 ret = alt[mask];
9251 gcc_assert (ret);
9252
9253 return ret;
9254 }
9255 }
9256
9257 void
9258 ix86_output_addr_vec_elt (FILE *file, int value)
9259 {
9260 const char *directive = ASM_LONG;
9261
9262 #ifdef ASM_QUAD
9263 if (TARGET_64BIT)
9264 directive = ASM_QUAD;
9265 #else
9266 gcc_assert (!TARGET_64BIT);
9267 #endif
9268
9269 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9270 }
9271
9272 void
9273 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9274 {
9275 if (TARGET_64BIT)
9276 fprintf (file, "%s%s%d-%s%d\n",
9277 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9278 else if (HAVE_AS_GOTOFF_IN_DATA)
9279 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9280 #if TARGET_MACHO
9281 else if (TARGET_MACHO)
9282 {
9283 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9284 machopic_output_function_base_name (file);
9285 fprintf(file, "\n");
9286 }
9287 #endif
9288 else
9289 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9290 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9291 }
9292 \f
9293 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9294 for the target. */
9295
9296 void
9297 ix86_expand_clear (rtx dest)
9298 {
9299 rtx tmp;
9300
9301 /* We play register width games, which are only valid after reload. */
9302 gcc_assert (reload_completed);
9303
9304 /* Avoid HImode and its attendant prefix byte. */
9305 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9306 dest = gen_rtx_REG (SImode, REGNO (dest));
9307
9308 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9309
9310 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9311 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9312 {
9313 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9314 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9315 }
9316
9317 emit_insn (tmp);
9318 }
9319
9320 /* X is an unchanging MEM. If it is a constant pool reference, return
9321 the constant pool rtx, else NULL. */
9322
9323 rtx
9324 maybe_get_pool_constant (rtx x)
9325 {
9326 x = ix86_delegitimize_address (XEXP (x, 0));
9327
9328 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9329 return get_pool_constant (x);
9330
9331 return NULL_RTX;
9332 }
9333
9334 void
9335 ix86_expand_move (enum machine_mode mode, rtx operands[])
9336 {
9337 int strict = (reload_in_progress || reload_completed);
9338 rtx op0, op1;
9339 enum tls_model model;
9340
9341 op0 = operands[0];
9342 op1 = operands[1];
9343
9344 if (GET_CODE (op1) == SYMBOL_REF)
9345 {
9346 model = SYMBOL_REF_TLS_MODEL (op1);
9347 if (model)
9348 {
9349 op1 = legitimize_tls_address (op1, model, true);
9350 op1 = force_operand (op1, op0);
9351 if (op1 == op0)
9352 return;
9353 }
9354 }
9355 else if (GET_CODE (op1) == CONST
9356 && GET_CODE (XEXP (op1, 0)) == PLUS
9357 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9358 {
9359 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9360 if (model)
9361 {
9362 rtx addend = XEXP (XEXP (op1, 0), 1);
9363 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9364 op1 = force_operand (op1, NULL);
9365 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9366 op0, 1, OPTAB_DIRECT);
9367 if (op1 == op0)
9368 return;
9369 }
9370 }
9371
9372 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9373 {
9374 if (TARGET_MACHO && !TARGET_64BIT)
9375 {
9376 #if TARGET_MACHO
9377 if (MACHOPIC_PURE)
9378 {
9379 rtx temp = ((reload_in_progress
9380 || ((op0 && REG_P (op0))
9381 && mode == Pmode))
9382 ? op0 : gen_reg_rtx (Pmode));
9383 op1 = machopic_indirect_data_reference (op1, temp);
9384 op1 = machopic_legitimize_pic_address (op1, mode,
9385 temp == op1 ? 0 : temp);
9386 }
9387 else if (MACHOPIC_INDIRECT)
9388 op1 = machopic_indirect_data_reference (op1, 0);
9389 if (op0 == op1)
9390 return;
9391 #endif
9392 }
9393 else
9394 {
9395 if (MEM_P (op0))
9396 op1 = force_reg (Pmode, op1);
9397 else
9398 op1 = legitimize_address (op1, op1, Pmode);
9399 }
9400 }
9401 else
9402 {
9403 if (MEM_P (op0)
9404 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9405 || !push_operand (op0, mode))
9406 && MEM_P (op1))
9407 op1 = force_reg (mode, op1);
9408
9409 if (push_operand (op0, mode)
9410 && ! general_no_elim_operand (op1, mode))
9411 op1 = copy_to_mode_reg (mode, op1);
9412
9413 /* Force large constants in 64bit compilation into register
9414 to get them CSEed. */
9415 if (TARGET_64BIT && mode == DImode
9416 && immediate_operand (op1, mode)
9417 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9418 && !register_operand (op0, mode)
9419 && optimize && !reload_completed && !reload_in_progress)
9420 op1 = copy_to_mode_reg (mode, op1);
9421
9422 if (FLOAT_MODE_P (mode))
9423 {
9424 /* If we are loading a floating point constant to a register,
9425 force the value to memory now, since we'll get better code
9426 out the back end. */
9427
9428 if (strict)
9429 ;
9430 else if (GET_CODE (op1) == CONST_DOUBLE)
9431 {
9432 op1 = validize_mem (force_const_mem (mode, op1));
9433 if (!register_operand (op0, mode))
9434 {
9435 rtx temp = gen_reg_rtx (mode);
9436 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9437 emit_move_insn (op0, temp);
9438 return;
9439 }
9440 }
9441 }
9442 }
9443
9444 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9445 }
9446
9447 void
9448 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9449 {
9450 rtx op0 = operands[0], op1 = operands[1];
9451
9452 /* Force constants other than zero into memory. We do not know how
9453 the instructions used to build constants modify the upper 64 bits
9454 of the register, once we have that information we may be able
9455 to handle some of them more efficiently. */
9456 if ((reload_in_progress | reload_completed) == 0
9457 && register_operand (op0, mode)
9458 && CONSTANT_P (op1)
9459 && standard_sse_constant_p (op1) <= 0)
9460 op1 = validize_mem (force_const_mem (mode, op1));
9461
9462 /* Make operand1 a register if it isn't already. */
9463 if (!no_new_pseudos
9464 && !register_operand (op0, mode)
9465 && !register_operand (op1, mode))
9466 {
9467 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9468 return;
9469 }
9470
9471 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9472 }
9473
9474 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9475 straight to ix86_expand_vector_move. */
9476
9477 void
9478 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9479 {
9480 rtx op0, op1, m;
9481
9482 op0 = operands[0];
9483 op1 = operands[1];
9484
9485 if (MEM_P (op1))
9486 {
9487 /* If we're optimizing for size, movups is the smallest. */
9488 if (optimize_size)
9489 {
9490 op0 = gen_lowpart (V4SFmode, op0);
9491 op1 = gen_lowpart (V4SFmode, op1);
9492 emit_insn (gen_sse_movups (op0, op1));
9493 return;
9494 }
9495
9496 /* ??? If we have typed data, then it would appear that using
9497 movdqu is the only way to get unaligned data loaded with
9498 integer type. */
9499 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9500 {
9501 op0 = gen_lowpart (V16QImode, op0);
9502 op1 = gen_lowpart (V16QImode, op1);
9503 emit_insn (gen_sse2_movdqu (op0, op1));
9504 return;
9505 }
9506
9507 if (TARGET_SSE2 && mode == V2DFmode)
9508 {
9509 rtx zero;
9510
9511 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9512 {
9513 op0 = gen_lowpart (V2DFmode, op0);
9514 op1 = gen_lowpart (V2DFmode, op1);
9515 emit_insn (gen_sse2_movupd (op0, op1));
9516 return;
9517 }
9518
9519 /* When SSE registers are split into halves, we can avoid
9520 writing to the top half twice. */
9521 if (TARGET_SSE_SPLIT_REGS)
9522 {
9523 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9524 zero = op0;
9525 }
9526 else
9527 {
9528 /* ??? Not sure about the best option for the Intel chips.
9529 The following would seem to satisfy; the register is
9530 entirely cleared, breaking the dependency chain. We
9531 then store to the upper half, with a dependency depth
9532 of one. A rumor has it that Intel recommends two movsd
9533 followed by an unpacklpd, but this is unconfirmed. And
9534 given that the dependency depth of the unpacklpd would
9535 still be one, I'm not sure why this would be better. */
9536 zero = CONST0_RTX (V2DFmode);
9537 }
9538
9539 m = adjust_address (op1, DFmode, 0);
9540 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9541 m = adjust_address (op1, DFmode, 8);
9542 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9543 }
9544 else
9545 {
9546 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9547 {
9548 op0 = gen_lowpart (V4SFmode, op0);
9549 op1 = gen_lowpart (V4SFmode, op1);
9550 emit_insn (gen_sse_movups (op0, op1));
9551 return;
9552 }
9553
9554 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9555 emit_move_insn (op0, CONST0_RTX (mode));
9556 else
9557 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9558
9559 if (mode != V4SFmode)
9560 op0 = gen_lowpart (V4SFmode, op0);
9561 m = adjust_address (op1, V2SFmode, 0);
9562 emit_insn (gen_sse_loadlps (op0, op0, m));
9563 m = adjust_address (op1, V2SFmode, 8);
9564 emit_insn (gen_sse_loadhps (op0, op0, m));
9565 }
9566 }
9567 else if (MEM_P (op0))
9568 {
9569 /* If we're optimizing for size, movups is the smallest. */
9570 if (optimize_size)
9571 {
9572 op0 = gen_lowpart (V4SFmode, op0);
9573 op1 = gen_lowpart (V4SFmode, op1);
9574 emit_insn (gen_sse_movups (op0, op1));
9575 return;
9576 }
9577
9578 /* ??? Similar to above, only less clear because of quote
9579 typeless stores unquote. */
9580 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9581 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9582 {
9583 op0 = gen_lowpart (V16QImode, op0);
9584 op1 = gen_lowpart (V16QImode, op1);
9585 emit_insn (gen_sse2_movdqu (op0, op1));
9586 return;
9587 }
9588
9589 if (TARGET_SSE2 && mode == V2DFmode)
9590 {
9591 m = adjust_address (op0, DFmode, 0);
9592 emit_insn (gen_sse2_storelpd (m, op1));
9593 m = adjust_address (op0, DFmode, 8);
9594 emit_insn (gen_sse2_storehpd (m, op1));
9595 }
9596 else
9597 {
9598 if (mode != V4SFmode)
9599 op1 = gen_lowpart (V4SFmode, op1);
9600 m = adjust_address (op0, V2SFmode, 0);
9601 emit_insn (gen_sse_storelps (m, op1));
9602 m = adjust_address (op0, V2SFmode, 8);
9603 emit_insn (gen_sse_storehps (m, op1));
9604 }
9605 }
9606 else
9607 gcc_unreachable ();
9608 }
9609
9610 /* Expand a push in MODE. This is some mode for which we do not support
9611 proper push instructions, at least from the registers that we expect
9612 the value to live in. */
9613
9614 void
9615 ix86_expand_push (enum machine_mode mode, rtx x)
9616 {
9617 rtx tmp;
9618
9619 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9620 GEN_INT (-GET_MODE_SIZE (mode)),
9621 stack_pointer_rtx, 1, OPTAB_DIRECT);
9622 if (tmp != stack_pointer_rtx)
9623 emit_move_insn (stack_pointer_rtx, tmp);
9624
9625 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9626 emit_move_insn (tmp, x);
9627 }
9628
9629 /* Helper function of ix86_fixup_binary_operands to canonicalize
9630 operand order. Returns true if the operands should be swapped. */
9631
9632 static bool
9633 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9634 rtx operands[])
9635 {
9636 rtx dst = operands[0];
9637 rtx src1 = operands[1];
9638 rtx src2 = operands[2];
9639
9640 /* If the operation is not commutative, we can't do anything. */
9641 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9642 return false;
9643
9644 /* Highest priority is that src1 should match dst. */
9645 if (rtx_equal_p (dst, src1))
9646 return false;
9647 if (rtx_equal_p (dst, src2))
9648 return true;
9649
9650 /* Next highest priority is that immediate constants come second. */
9651 if (immediate_operand (src2, mode))
9652 return false;
9653 if (immediate_operand (src1, mode))
9654 return true;
9655
9656 /* Lowest priority is that memory references should come second. */
9657 if (MEM_P (src2))
9658 return false;
9659 if (MEM_P (src1))
9660 return true;
9661
9662 return false;
9663 }
9664
9665
9666 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9667 destination to use for the operation. If different from the true
9668 destination in operands[0], a copy operation will be required. */
9669
9670 rtx
9671 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9672 rtx operands[])
9673 {
9674 rtx dst = operands[0];
9675 rtx src1 = operands[1];
9676 rtx src2 = operands[2];
9677
9678 /* Canonicalize operand order. */
9679 if (ix86_swap_binary_operands_p (code, mode, operands))
9680 {
9681 rtx temp = src1;
9682 src1 = src2;
9683 src2 = temp;
9684 }
9685
9686 /* Both source operands cannot be in memory. */
9687 if (MEM_P (src1) && MEM_P (src2))
9688 {
9689 /* Optimization: Only read from memory once. */
9690 if (rtx_equal_p (src1, src2))
9691 {
9692 src2 = force_reg (mode, src2);
9693 src1 = src2;
9694 }
9695 else
9696 src2 = force_reg (mode, src2);
9697 }
9698
9699 /* If the destination is memory, and we do not have matching source
9700 operands, do things in registers. */
9701 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9702 dst = gen_reg_rtx (mode);
9703
9704 /* Source 1 cannot be a constant. */
9705 if (CONSTANT_P (src1))
9706 src1 = force_reg (mode, src1);
9707
9708 /* Source 1 cannot be a non-matching memory. */
9709 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9710 src1 = force_reg (mode, src1);
9711
9712 operands[1] = src1;
9713 operands[2] = src2;
9714 return dst;
9715 }
9716
9717 /* Similarly, but assume that the destination has already been
9718 set up properly. */
9719
9720 void
9721 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9722 enum machine_mode mode, rtx operands[])
9723 {
9724 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9725 gcc_assert (dst == operands[0]);
9726 }
9727
9728 /* Attempt to expand a binary operator. Make the expansion closer to the
9729 actual machine, then just general_operand, which will allow 3 separate
9730 memory references (one output, two input) in a single insn. */
9731
9732 void
9733 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9734 rtx operands[])
9735 {
9736 rtx src1, src2, dst, op, clob;
9737
9738 dst = ix86_fixup_binary_operands (code, mode, operands);
9739 src1 = operands[1];
9740 src2 = operands[2];
9741
9742 /* Emit the instruction. */
9743
9744 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9745 if (reload_in_progress)
9746 {
9747 /* Reload doesn't know about the flags register, and doesn't know that
9748 it doesn't want to clobber it. We can only do this with PLUS. */
9749 gcc_assert (code == PLUS);
9750 emit_insn (op);
9751 }
9752 else
9753 {
9754 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9755 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9756 }
9757
9758 /* Fix up the destination if needed. */
9759 if (dst != operands[0])
9760 emit_move_insn (operands[0], dst);
9761 }
9762
9763 /* Return TRUE or FALSE depending on whether the binary operator meets the
9764 appropriate constraints. */
9765
9766 int
9767 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9768 rtx operands[3])
9769 {
9770 rtx dst = operands[0];
9771 rtx src1 = operands[1];
9772 rtx src2 = operands[2];
9773
9774 /* Both source operands cannot be in memory. */
9775 if (MEM_P (src1) && MEM_P (src2))
9776 return 0;
9777
9778 /* Canonicalize operand order for commutative operators. */
9779 if (ix86_swap_binary_operands_p (code, mode, operands))
9780 {
9781 rtx temp = src1;
9782 src1 = src2;
9783 src2 = temp;
9784 }
9785
9786 /* If the destination is memory, we must have a matching source operand. */
9787 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9788 return 0;
9789
9790 /* Source 1 cannot be a constant. */
9791 if (CONSTANT_P (src1))
9792 return 0;
9793
9794 /* Source 1 cannot be a non-matching memory. */
9795 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9796 return 0;
9797
9798 return 1;
9799 }
9800
9801 /* Attempt to expand a unary operator. Make the expansion closer to the
9802 actual machine, then just general_operand, which will allow 2 separate
9803 memory references (one output, one input) in a single insn. */
9804
9805 void
9806 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9807 rtx operands[])
9808 {
9809 int matching_memory;
9810 rtx src, dst, op, clob;
9811
9812 dst = operands[0];
9813 src = operands[1];
9814
9815 /* If the destination is memory, and we do not have matching source
9816 operands, do things in registers. */
9817 matching_memory = 0;
9818 if (MEM_P (dst))
9819 {
9820 if (rtx_equal_p (dst, src))
9821 matching_memory = 1;
9822 else
9823 dst = gen_reg_rtx (mode);
9824 }
9825
9826 /* When source operand is memory, destination must match. */
9827 if (MEM_P (src) && !matching_memory)
9828 src = force_reg (mode, src);
9829
9830 /* Emit the instruction. */
9831
9832 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9833 if (reload_in_progress || code == NOT)
9834 {
9835 /* Reload doesn't know about the flags register, and doesn't know that
9836 it doesn't want to clobber it. */
9837 gcc_assert (code == NOT);
9838 emit_insn (op);
9839 }
9840 else
9841 {
9842 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9843 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9844 }
9845
9846 /* Fix up the destination if needed. */
9847 if (dst != operands[0])
9848 emit_move_insn (operands[0], dst);
9849 }
9850
9851 /* Return TRUE or FALSE depending on whether the unary operator meets the
9852 appropriate constraints. */
9853
9854 int
9855 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9856 enum machine_mode mode ATTRIBUTE_UNUSED,
9857 rtx operands[2] ATTRIBUTE_UNUSED)
9858 {
9859 /* If one of operands is memory, source and destination must match. */
9860 if ((MEM_P (operands[0])
9861 || MEM_P (operands[1]))
9862 && ! rtx_equal_p (operands[0], operands[1]))
9863 return FALSE;
9864 return TRUE;
9865 }
9866
9867 /* Post-reload splitter for converting an SF or DFmode value in an
9868 SSE register into an unsigned SImode. */
9869
9870 void
9871 ix86_split_convert_uns_si_sse (rtx operands[])
9872 {
9873 enum machine_mode vecmode;
9874 rtx value, large, zero_or_two31, input, two31, x;
9875
9876 large = operands[1];
9877 zero_or_two31 = operands[2];
9878 input = operands[3];
9879 two31 = operands[4];
9880 vecmode = GET_MODE (large);
9881 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
9882
9883 /* Load up the value into the low element. We must ensure that the other
9884 elements are valid floats -- zero is the easiest such value. */
9885 if (MEM_P (input))
9886 {
9887 if (vecmode == V4SFmode)
9888 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
9889 else
9890 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
9891 }
9892 else
9893 {
9894 input = gen_rtx_REG (vecmode, REGNO (input));
9895 emit_move_insn (value, CONST0_RTX (vecmode));
9896 if (vecmode == V4SFmode)
9897 emit_insn (gen_sse_movss (value, value, input));
9898 else
9899 emit_insn (gen_sse2_movsd (value, value, input));
9900 }
9901
9902 emit_move_insn (large, two31);
9903 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
9904
9905 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
9906 emit_insn (gen_rtx_SET (VOIDmode, large, x));
9907
9908 x = gen_rtx_AND (vecmode, zero_or_two31, large);
9909 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
9910
9911 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
9912 emit_insn (gen_rtx_SET (VOIDmode, value, x));
9913
9914 large = gen_rtx_REG (V4SImode, REGNO (large));
9915 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
9916
9917 x = gen_rtx_REG (V4SImode, REGNO (value));
9918 if (vecmode == V4SFmode)
9919 emit_insn (gen_sse2_cvttps2dq (x, value));
9920 else
9921 emit_insn (gen_sse2_cvttpd2dq (x, value));
9922 value = x;
9923
9924 emit_insn (gen_xorv4si3 (value, value, large));
9925 }
9926
9927 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9928 Expects the 64-bit DImode to be supplied in a pair of integral
9929 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9930 -mfpmath=sse, !optimize_size only. */
9931
9932 void
9933 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
9934 {
9935 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
9936 rtx int_xmm, fp_xmm;
9937 rtx biases, exponents;
9938 rtx x;
9939
9940 int_xmm = gen_reg_rtx (V4SImode);
9941 if (TARGET_INTER_UNIT_MOVES)
9942 emit_insn (gen_movdi_to_sse (int_xmm, input));
9943 else if (TARGET_SSE_SPLIT_REGS)
9944 {
9945 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
9946 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
9947 }
9948 else
9949 {
9950 x = gen_reg_rtx (V2DImode);
9951 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
9952 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
9953 }
9954
9955 x = gen_rtx_CONST_VECTOR (V4SImode,
9956 gen_rtvec (4, GEN_INT (0x43300000UL),
9957 GEN_INT (0x45300000UL),
9958 const0_rtx, const0_rtx));
9959 exponents = validize_mem (force_const_mem (V4SImode, x));
9960
9961 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9962 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
9963
9964 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9965 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9966 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9967 (0x1.0p84 + double(fp_value_hi_xmm)).
9968 Note these exponents differ by 32. */
9969
9970 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
9971
9972 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9973 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9974 real_ldexp (&bias_lo_rvt, &dconst1, 52);
9975 real_ldexp (&bias_hi_rvt, &dconst1, 84);
9976 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
9977 x = const_double_from_real_value (bias_hi_rvt, DFmode);
9978 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
9979 biases = validize_mem (force_const_mem (V2DFmode, biases));
9980 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
9981
9982 /* Add the upper and lower DFmode values together. */
9983 if (TARGET_SSE3)
9984 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
9985 else
9986 {
9987 x = copy_to_mode_reg (V2DFmode, fp_xmm);
9988 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
9989 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
9990 }
9991
9992 ix86_expand_vector_extract (false, target, fp_xmm, 0);
9993 }
9994
9995 /* Convert an unsigned SImode value into a DFmode. Only currently used
9996 for SSE, but applicable anywhere. */
9997
9998 void
9999 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10000 {
10001 REAL_VALUE_TYPE TWO31r;
10002 rtx x, fp;
10003
10004 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10005 NULL, 1, OPTAB_DIRECT);
10006
10007 fp = gen_reg_rtx (DFmode);
10008 emit_insn (gen_floatsidf2 (fp, x));
10009
10010 real_ldexp (&TWO31r, &dconst1, 31);
10011 x = const_double_from_real_value (TWO31r, DFmode);
10012
10013 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10014 if (x != target)
10015 emit_move_insn (target, x);
10016 }
10017
10018 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10019 32-bit mode; otherwise we have a direct convert instruction. */
10020
10021 void
10022 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10023 {
10024 REAL_VALUE_TYPE TWO32r;
10025 rtx fp_lo, fp_hi, x;
10026
10027 fp_lo = gen_reg_rtx (DFmode);
10028 fp_hi = gen_reg_rtx (DFmode);
10029
10030 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10031
10032 real_ldexp (&TWO32r, &dconst1, 32);
10033 x = const_double_from_real_value (TWO32r, DFmode);
10034 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10035
10036 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10037
10038 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10039 0, OPTAB_DIRECT);
10040 if (x != target)
10041 emit_move_insn (target, x);
10042 }
10043
10044 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10045 For x86_32, -mfpmath=sse, !optimize_size only. */
10046 void
10047 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10048 {
10049 REAL_VALUE_TYPE ONE16r;
10050 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10051
10052 real_ldexp (&ONE16r, &dconst1, 16);
10053 x = const_double_from_real_value (ONE16r, SFmode);
10054 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10055 NULL, 0, OPTAB_DIRECT);
10056 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10057 NULL, 0, OPTAB_DIRECT);
10058 fp_hi = gen_reg_rtx (SFmode);
10059 fp_lo = gen_reg_rtx (SFmode);
10060 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10061 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10062 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10063 0, OPTAB_DIRECT);
10064 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10065 0, OPTAB_DIRECT);
10066 if (!rtx_equal_p (target, fp_hi))
10067 emit_move_insn (target, fp_hi);
10068 }
10069
10070 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10071 then replicate the value for all elements of the vector
10072 register. */
10073
10074 rtx
10075 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10076 {
10077 rtvec v;
10078 switch (mode)
10079 {
10080 case SFmode:
10081 if (vect)
10082 v = gen_rtvec (4, value, value, value, value);
10083 else
10084 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10085 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10086 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10087
10088 case DFmode:
10089 if (vect)
10090 v = gen_rtvec (2, value, value);
10091 else
10092 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10093 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10094
10095 default:
10096 gcc_unreachable ();
10097 }
10098 }
10099
10100 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10101 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10102 true, then replicate the mask for all elements of the vector register.
10103 If INVERT is true, then create a mask excluding the sign bit. */
10104
10105 rtx
10106 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10107 {
10108 enum machine_mode vec_mode;
10109 HOST_WIDE_INT hi, lo;
10110 int shift = 63;
10111 rtx v;
10112 rtx mask;
10113
10114 /* Find the sign bit, sign extended to 2*HWI. */
10115 if (mode == SFmode)
10116 lo = 0x80000000, hi = lo < 0;
10117 else if (HOST_BITS_PER_WIDE_INT >= 64)
10118 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10119 else
10120 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10121
10122 if (invert)
10123 lo = ~lo, hi = ~hi;
10124
10125 /* Force this value into the low part of a fp vector constant. */
10126 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10127 mask = gen_lowpart (mode, mask);
10128
10129 v = ix86_build_const_vector (mode, vect, mask);
10130 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10131 return force_reg (vec_mode, v);
10132 }
10133
10134 /* Generate code for floating point ABS or NEG. */
10135
10136 void
10137 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10138 rtx operands[])
10139 {
10140 rtx mask, set, use, clob, dst, src;
10141 bool matching_memory;
10142 bool use_sse = false;
10143 bool vector_mode = VECTOR_MODE_P (mode);
10144 enum machine_mode elt_mode = mode;
10145
10146 if (vector_mode)
10147 {
10148 elt_mode = GET_MODE_INNER (mode);
10149 use_sse = true;
10150 }
10151 else if (TARGET_SSE_MATH)
10152 use_sse = SSE_FLOAT_MODE_P (mode);
10153
10154 /* NEG and ABS performed with SSE use bitwise mask operations.
10155 Create the appropriate mask now. */
10156 if (use_sse)
10157 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10158 else
10159 mask = NULL_RTX;
10160
10161 dst = operands[0];
10162 src = operands[1];
10163
10164 /* If the destination is memory, and we don't have matching source
10165 operands or we're using the x87, do things in registers. */
10166 matching_memory = false;
10167 if (MEM_P (dst))
10168 {
10169 if (use_sse && rtx_equal_p (dst, src))
10170 matching_memory = true;
10171 else
10172 dst = gen_reg_rtx (mode);
10173 }
10174 if (MEM_P (src) && !matching_memory)
10175 src = force_reg (mode, src);
10176
10177 if (vector_mode)
10178 {
10179 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10180 set = gen_rtx_SET (VOIDmode, dst, set);
10181 emit_insn (set);
10182 }
10183 else
10184 {
10185 set = gen_rtx_fmt_e (code, mode, src);
10186 set = gen_rtx_SET (VOIDmode, dst, set);
10187 if (mask)
10188 {
10189 use = gen_rtx_USE (VOIDmode, mask);
10190 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10191 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10192 gen_rtvec (3, set, use, clob)));
10193 }
10194 else
10195 emit_insn (set);
10196 }
10197
10198 if (dst != operands[0])
10199 emit_move_insn (operands[0], dst);
10200 }
10201
10202 /* Expand a copysign operation. Special case operand 0 being a constant. */
10203
10204 void
10205 ix86_expand_copysign (rtx operands[])
10206 {
10207 enum machine_mode mode, vmode;
10208 rtx dest, op0, op1, mask, nmask;
10209
10210 dest = operands[0];
10211 op0 = operands[1];
10212 op1 = operands[2];
10213
10214 mode = GET_MODE (dest);
10215 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10216
10217 if (GET_CODE (op0) == CONST_DOUBLE)
10218 {
10219 rtvec v;
10220
10221 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10222 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10223
10224 if (op0 == CONST0_RTX (mode))
10225 op0 = CONST0_RTX (vmode);
10226 else
10227 {
10228 if (mode == SFmode)
10229 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10230 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10231 else
10232 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10233 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10234 }
10235
10236 mask = ix86_build_signbit_mask (mode, 0, 0);
10237
10238 if (mode == SFmode)
10239 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10240 else
10241 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10242 }
10243 else
10244 {
10245 nmask = ix86_build_signbit_mask (mode, 0, 1);
10246 mask = ix86_build_signbit_mask (mode, 0, 0);
10247
10248 if (mode == SFmode)
10249 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10250 else
10251 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10252 }
10253 }
10254
10255 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10256 be a constant, and so has already been expanded into a vector constant. */
10257
10258 void
10259 ix86_split_copysign_const (rtx operands[])
10260 {
10261 enum machine_mode mode, vmode;
10262 rtx dest, op0, op1, mask, x;
10263
10264 dest = operands[0];
10265 op0 = operands[1];
10266 op1 = operands[2];
10267 mask = operands[3];
10268
10269 mode = GET_MODE (dest);
10270 vmode = GET_MODE (mask);
10271
10272 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10273 x = gen_rtx_AND (vmode, dest, mask);
10274 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10275
10276 if (op0 != CONST0_RTX (vmode))
10277 {
10278 x = gen_rtx_IOR (vmode, dest, op0);
10279 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10280 }
10281 }
10282
10283 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10284 so we have to do two masks. */
10285
10286 void
10287 ix86_split_copysign_var (rtx operands[])
10288 {
10289 enum machine_mode mode, vmode;
10290 rtx dest, scratch, op0, op1, mask, nmask, x;
10291
10292 dest = operands[0];
10293 scratch = operands[1];
10294 op0 = operands[2];
10295 op1 = operands[3];
10296 nmask = operands[4];
10297 mask = operands[5];
10298
10299 mode = GET_MODE (dest);
10300 vmode = GET_MODE (mask);
10301
10302 if (rtx_equal_p (op0, op1))
10303 {
10304 /* Shouldn't happen often (it's useless, obviously), but when it does
10305 we'd generate incorrect code if we continue below. */
10306 emit_move_insn (dest, op0);
10307 return;
10308 }
10309
10310 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10311 {
10312 gcc_assert (REGNO (op1) == REGNO (scratch));
10313
10314 x = gen_rtx_AND (vmode, scratch, mask);
10315 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10316
10317 dest = mask;
10318 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10319 x = gen_rtx_NOT (vmode, dest);
10320 x = gen_rtx_AND (vmode, x, op0);
10321 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10322 }
10323 else
10324 {
10325 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10326 {
10327 x = gen_rtx_AND (vmode, scratch, mask);
10328 }
10329 else /* alternative 2,4 */
10330 {
10331 gcc_assert (REGNO (mask) == REGNO (scratch));
10332 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10333 x = gen_rtx_AND (vmode, scratch, op1);
10334 }
10335 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10336
10337 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10338 {
10339 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10340 x = gen_rtx_AND (vmode, dest, nmask);
10341 }
10342 else /* alternative 3,4 */
10343 {
10344 gcc_assert (REGNO (nmask) == REGNO (dest));
10345 dest = nmask;
10346 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10347 x = gen_rtx_AND (vmode, dest, op0);
10348 }
10349 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10350 }
10351
10352 x = gen_rtx_IOR (vmode, dest, scratch);
10353 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10354 }
10355
10356 /* Return TRUE or FALSE depending on whether the first SET in INSN
10357 has source and destination with matching CC modes, and that the
10358 CC mode is at least as constrained as REQ_MODE. */
10359
10360 int
10361 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10362 {
10363 rtx set;
10364 enum machine_mode set_mode;
10365
10366 set = PATTERN (insn);
10367 if (GET_CODE (set) == PARALLEL)
10368 set = XVECEXP (set, 0, 0);
10369 gcc_assert (GET_CODE (set) == SET);
10370 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10371
10372 set_mode = GET_MODE (SET_DEST (set));
10373 switch (set_mode)
10374 {
10375 case CCNOmode:
10376 if (req_mode != CCNOmode
10377 && (req_mode != CCmode
10378 || XEXP (SET_SRC (set), 1) != const0_rtx))
10379 return 0;
10380 break;
10381 case CCmode:
10382 if (req_mode == CCGCmode)
10383 return 0;
10384 /* FALLTHRU */
10385 case CCGCmode:
10386 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10387 return 0;
10388 /* FALLTHRU */
10389 case CCGOCmode:
10390 if (req_mode == CCZmode)
10391 return 0;
10392 /* FALLTHRU */
10393 case CCZmode:
10394 break;
10395
10396 default:
10397 gcc_unreachable ();
10398 }
10399
10400 return (GET_MODE (SET_SRC (set)) == set_mode);
10401 }
10402
10403 /* Generate insn patterns to do an integer compare of OPERANDS. */
10404
10405 static rtx
10406 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10407 {
10408 enum machine_mode cmpmode;
10409 rtx tmp, flags;
10410
10411 cmpmode = SELECT_CC_MODE (code, op0, op1);
10412 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10413
10414 /* This is very simple, but making the interface the same as in the
10415 FP case makes the rest of the code easier. */
10416 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10417 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10418
10419 /* Return the test that should be put into the flags user, i.e.
10420 the bcc, scc, or cmov instruction. */
10421 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10422 }
10423
10424 /* Figure out whether to use ordered or unordered fp comparisons.
10425 Return the appropriate mode to use. */
10426
10427 enum machine_mode
10428 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10429 {
10430 /* ??? In order to make all comparisons reversible, we do all comparisons
10431 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10432 all forms trapping and nontrapping comparisons, we can make inequality
10433 comparisons trapping again, since it results in better code when using
10434 FCOM based compares. */
10435 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10436 }
10437
10438 enum machine_mode
10439 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10440 {
10441 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10442 return ix86_fp_compare_mode (code);
10443 switch (code)
10444 {
10445 /* Only zero flag is needed. */
10446 case EQ: /* ZF=0 */
10447 case NE: /* ZF!=0 */
10448 return CCZmode;
10449 /* Codes needing carry flag. */
10450 case GEU: /* CF=0 */
10451 case GTU: /* CF=0 & ZF=0 */
10452 case LTU: /* CF=1 */
10453 case LEU: /* CF=1 | ZF=1 */
10454 return CCmode;
10455 /* Codes possibly doable only with sign flag when
10456 comparing against zero. */
10457 case GE: /* SF=OF or SF=0 */
10458 case LT: /* SF<>OF or SF=1 */
10459 if (op1 == const0_rtx)
10460 return CCGOCmode;
10461 else
10462 /* For other cases Carry flag is not required. */
10463 return CCGCmode;
10464 /* Codes doable only with sign flag when comparing
10465 against zero, but we miss jump instruction for it
10466 so we need to use relational tests against overflow
10467 that thus needs to be zero. */
10468 case GT: /* ZF=0 & SF=OF */
10469 case LE: /* ZF=1 | SF<>OF */
10470 if (op1 == const0_rtx)
10471 return CCNOmode;
10472 else
10473 return CCGCmode;
10474 /* strcmp pattern do (use flags) and combine may ask us for proper
10475 mode. */
10476 case USE:
10477 return CCmode;
10478 default:
10479 gcc_unreachable ();
10480 }
10481 }
10482
10483 /* Return the fixed registers used for condition codes. */
10484
10485 static bool
10486 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10487 {
10488 *p1 = FLAGS_REG;
10489 *p2 = FPSR_REG;
10490 return true;
10491 }
10492
10493 /* If two condition code modes are compatible, return a condition code
10494 mode which is compatible with both. Otherwise, return
10495 VOIDmode. */
10496
10497 static enum machine_mode
10498 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10499 {
10500 if (m1 == m2)
10501 return m1;
10502
10503 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10504 return VOIDmode;
10505
10506 if ((m1 == CCGCmode && m2 == CCGOCmode)
10507 || (m1 == CCGOCmode && m2 == CCGCmode))
10508 return CCGCmode;
10509
10510 switch (m1)
10511 {
10512 default:
10513 gcc_unreachable ();
10514
10515 case CCmode:
10516 case CCGCmode:
10517 case CCGOCmode:
10518 case CCNOmode:
10519 case CCZmode:
10520 switch (m2)
10521 {
10522 default:
10523 return VOIDmode;
10524
10525 case CCmode:
10526 case CCGCmode:
10527 case CCGOCmode:
10528 case CCNOmode:
10529 case CCZmode:
10530 return CCmode;
10531 }
10532
10533 case CCFPmode:
10534 case CCFPUmode:
10535 /* These are only compatible with themselves, which we already
10536 checked above. */
10537 return VOIDmode;
10538 }
10539 }
10540
10541 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10542
10543 int
10544 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10545 {
10546 enum rtx_code swapped_code = swap_condition (code);
10547 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10548 || (ix86_fp_comparison_cost (swapped_code)
10549 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10550 }
10551
10552 /* Swap, force into registers, or otherwise massage the two operands
10553 to a fp comparison. The operands are updated in place; the new
10554 comparison code is returned. */
10555
10556 static enum rtx_code
10557 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10558 {
10559 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10560 rtx op0 = *pop0, op1 = *pop1;
10561 enum machine_mode op_mode = GET_MODE (op0);
10562 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10563
10564 /* All of the unordered compare instructions only work on registers.
10565 The same is true of the fcomi compare instructions. The XFmode
10566 compare instructions require registers except when comparing
10567 against zero or when converting operand 1 from fixed point to
10568 floating point. */
10569
10570 if (!is_sse
10571 && (fpcmp_mode == CCFPUmode
10572 || (op_mode == XFmode
10573 && ! (standard_80387_constant_p (op0) == 1
10574 || standard_80387_constant_p (op1) == 1)
10575 && GET_CODE (op1) != FLOAT)
10576 || ix86_use_fcomi_compare (code)))
10577 {
10578 op0 = force_reg (op_mode, op0);
10579 op1 = force_reg (op_mode, op1);
10580 }
10581 else
10582 {
10583 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10584 things around if they appear profitable, otherwise force op0
10585 into a register. */
10586
10587 if (standard_80387_constant_p (op0) == 0
10588 || (MEM_P (op0)
10589 && ! (standard_80387_constant_p (op1) == 0
10590 || MEM_P (op1))))
10591 {
10592 rtx tmp;
10593 tmp = op0, op0 = op1, op1 = tmp;
10594 code = swap_condition (code);
10595 }
10596
10597 if (!REG_P (op0))
10598 op0 = force_reg (op_mode, op0);
10599
10600 if (CONSTANT_P (op1))
10601 {
10602 int tmp = standard_80387_constant_p (op1);
10603 if (tmp == 0)
10604 op1 = validize_mem (force_const_mem (op_mode, op1));
10605 else if (tmp == 1)
10606 {
10607 if (TARGET_CMOVE)
10608 op1 = force_reg (op_mode, op1);
10609 }
10610 else
10611 op1 = force_reg (op_mode, op1);
10612 }
10613 }
10614
10615 /* Try to rearrange the comparison to make it cheaper. */
10616 if (ix86_fp_comparison_cost (code)
10617 > ix86_fp_comparison_cost (swap_condition (code))
10618 && (REG_P (op1) || !no_new_pseudos))
10619 {
10620 rtx tmp;
10621 tmp = op0, op0 = op1, op1 = tmp;
10622 code = swap_condition (code);
10623 if (!REG_P (op0))
10624 op0 = force_reg (op_mode, op0);
10625 }
10626
10627 *pop0 = op0;
10628 *pop1 = op1;
10629 return code;
10630 }
10631
10632 /* Convert comparison codes we use to represent FP comparison to integer
10633 code that will result in proper branch. Return UNKNOWN if no such code
10634 is available. */
10635
10636 enum rtx_code
10637 ix86_fp_compare_code_to_integer (enum rtx_code code)
10638 {
10639 switch (code)
10640 {
10641 case GT:
10642 return GTU;
10643 case GE:
10644 return GEU;
10645 case ORDERED:
10646 case UNORDERED:
10647 return code;
10648 break;
10649 case UNEQ:
10650 return EQ;
10651 break;
10652 case UNLT:
10653 return LTU;
10654 break;
10655 case UNLE:
10656 return LEU;
10657 break;
10658 case LTGT:
10659 return NE;
10660 break;
10661 default:
10662 return UNKNOWN;
10663 }
10664 }
10665
10666 /* Split comparison code CODE into comparisons we can do using branch
10667 instructions. BYPASS_CODE is comparison code for branch that will
10668 branch around FIRST_CODE and SECOND_CODE. If some of branches
10669 is not required, set value to UNKNOWN.
10670 We never require more than two branches. */
10671
10672 void
10673 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10674 enum rtx_code *first_code,
10675 enum rtx_code *second_code)
10676 {
10677 *first_code = code;
10678 *bypass_code = UNKNOWN;
10679 *second_code = UNKNOWN;
10680
10681 /* The fcomi comparison sets flags as follows:
10682
10683 cmp ZF PF CF
10684 > 0 0 0
10685 < 0 0 1
10686 = 1 0 0
10687 un 1 1 1 */
10688
10689 switch (code)
10690 {
10691 case GT: /* GTU - CF=0 & ZF=0 */
10692 case GE: /* GEU - CF=0 */
10693 case ORDERED: /* PF=0 */
10694 case UNORDERED: /* PF=1 */
10695 case UNEQ: /* EQ - ZF=1 */
10696 case UNLT: /* LTU - CF=1 */
10697 case UNLE: /* LEU - CF=1 | ZF=1 */
10698 case LTGT: /* EQ - ZF=0 */
10699 break;
10700 case LT: /* LTU - CF=1 - fails on unordered */
10701 *first_code = UNLT;
10702 *bypass_code = UNORDERED;
10703 break;
10704 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10705 *first_code = UNLE;
10706 *bypass_code = UNORDERED;
10707 break;
10708 case EQ: /* EQ - ZF=1 - fails on unordered */
10709 *first_code = UNEQ;
10710 *bypass_code = UNORDERED;
10711 break;
10712 case NE: /* NE - ZF=0 - fails on unordered */
10713 *first_code = LTGT;
10714 *second_code = UNORDERED;
10715 break;
10716 case UNGE: /* GEU - CF=0 - fails on unordered */
10717 *first_code = GE;
10718 *second_code = UNORDERED;
10719 break;
10720 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10721 *first_code = GT;
10722 *second_code = UNORDERED;
10723 break;
10724 default:
10725 gcc_unreachable ();
10726 }
10727 if (!TARGET_IEEE_FP)
10728 {
10729 *second_code = UNKNOWN;
10730 *bypass_code = UNKNOWN;
10731 }
10732 }
10733
10734 /* Return cost of comparison done fcom + arithmetics operations on AX.
10735 All following functions do use number of instructions as a cost metrics.
10736 In future this should be tweaked to compute bytes for optimize_size and
10737 take into account performance of various instructions on various CPUs. */
10738 static int
10739 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10740 {
10741 if (!TARGET_IEEE_FP)
10742 return 4;
10743 /* The cost of code output by ix86_expand_fp_compare. */
10744 switch (code)
10745 {
10746 case UNLE:
10747 case UNLT:
10748 case LTGT:
10749 case GT:
10750 case GE:
10751 case UNORDERED:
10752 case ORDERED:
10753 case UNEQ:
10754 return 4;
10755 break;
10756 case LT:
10757 case NE:
10758 case EQ:
10759 case UNGE:
10760 return 5;
10761 break;
10762 case LE:
10763 case UNGT:
10764 return 6;
10765 break;
10766 default:
10767 gcc_unreachable ();
10768 }
10769 }
10770
10771 /* Return cost of comparison done using fcomi operation.
10772 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10773 static int
10774 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10775 {
10776 enum rtx_code bypass_code, first_code, second_code;
10777 /* Return arbitrarily high cost when instruction is not supported - this
10778 prevents gcc from using it. */
10779 if (!TARGET_CMOVE)
10780 return 1024;
10781 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10782 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10783 }
10784
10785 /* Return cost of comparison done using sahf operation.
10786 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10787 static int
10788 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10789 {
10790 enum rtx_code bypass_code, first_code, second_code;
10791 /* Return arbitrarily high cost when instruction is not preferred - this
10792 avoids gcc from using it. */
10793 if (!TARGET_USE_SAHF && !optimize_size)
10794 return 1024;
10795 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10796 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10797 }
10798
10799 /* Compute cost of the comparison done using any method.
10800 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10801 static int
10802 ix86_fp_comparison_cost (enum rtx_code code)
10803 {
10804 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10805 int min;
10806
10807 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10808 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10809
10810 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10811 if (min > sahf_cost)
10812 min = sahf_cost;
10813 if (min > fcomi_cost)
10814 min = fcomi_cost;
10815 return min;
10816 }
10817
10818 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10819
10820 static rtx
10821 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10822 rtx *second_test, rtx *bypass_test)
10823 {
10824 enum machine_mode fpcmp_mode, intcmp_mode;
10825 rtx tmp, tmp2;
10826 int cost = ix86_fp_comparison_cost (code);
10827 enum rtx_code bypass_code, first_code, second_code;
10828
10829 fpcmp_mode = ix86_fp_compare_mode (code);
10830 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10831
10832 if (second_test)
10833 *second_test = NULL_RTX;
10834 if (bypass_test)
10835 *bypass_test = NULL_RTX;
10836
10837 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10838
10839 /* Do fcomi/sahf based test when profitable. */
10840 if ((bypass_code == UNKNOWN || bypass_test)
10841 && (second_code == UNKNOWN || second_test)
10842 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10843 {
10844 if (TARGET_CMOVE)
10845 {
10846 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10847 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10848 tmp);
10849 emit_insn (tmp);
10850 }
10851 else
10852 {
10853 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10854 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10855 if (!scratch)
10856 scratch = gen_reg_rtx (HImode);
10857 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10858 emit_insn (gen_x86_sahf_1 (scratch));
10859 }
10860
10861 /* The FP codes work out to act like unsigned. */
10862 intcmp_mode = fpcmp_mode;
10863 code = first_code;
10864 if (bypass_code != UNKNOWN)
10865 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10866 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10867 const0_rtx);
10868 if (second_code != UNKNOWN)
10869 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10870 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10871 const0_rtx);
10872 }
10873 else
10874 {
10875 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10876 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10877 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10878 if (!scratch)
10879 scratch = gen_reg_rtx (HImode);
10880 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10881
10882 /* In the unordered case, we have to check C2 for NaN's, which
10883 doesn't happen to work out to anything nice combination-wise.
10884 So do some bit twiddling on the value we've got in AH to come
10885 up with an appropriate set of condition codes. */
10886
10887 intcmp_mode = CCNOmode;
10888 switch (code)
10889 {
10890 case GT:
10891 case UNGT:
10892 if (code == GT || !TARGET_IEEE_FP)
10893 {
10894 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10895 code = EQ;
10896 }
10897 else
10898 {
10899 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10900 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10901 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10902 intcmp_mode = CCmode;
10903 code = GEU;
10904 }
10905 break;
10906 case LT:
10907 case UNLT:
10908 if (code == LT && TARGET_IEEE_FP)
10909 {
10910 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10911 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10912 intcmp_mode = CCmode;
10913 code = EQ;
10914 }
10915 else
10916 {
10917 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10918 code = NE;
10919 }
10920 break;
10921 case GE:
10922 case UNGE:
10923 if (code == GE || !TARGET_IEEE_FP)
10924 {
10925 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10926 code = EQ;
10927 }
10928 else
10929 {
10930 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10931 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10932 GEN_INT (0x01)));
10933 code = NE;
10934 }
10935 break;
10936 case LE:
10937 case UNLE:
10938 if (code == LE && TARGET_IEEE_FP)
10939 {
10940 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10941 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10942 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10943 intcmp_mode = CCmode;
10944 code = LTU;
10945 }
10946 else
10947 {
10948 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10949 code = NE;
10950 }
10951 break;
10952 case EQ:
10953 case UNEQ:
10954 if (code == EQ && TARGET_IEEE_FP)
10955 {
10956 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10957 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10958 intcmp_mode = CCmode;
10959 code = EQ;
10960 }
10961 else
10962 {
10963 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10964 code = NE;
10965 break;
10966 }
10967 break;
10968 case NE:
10969 case LTGT:
10970 if (code == NE && TARGET_IEEE_FP)
10971 {
10972 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10973 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10974 GEN_INT (0x40)));
10975 code = NE;
10976 }
10977 else
10978 {
10979 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10980 code = EQ;
10981 }
10982 break;
10983
10984 case UNORDERED:
10985 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10986 code = NE;
10987 break;
10988 case ORDERED:
10989 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10990 code = EQ;
10991 break;
10992
10993 default:
10994 gcc_unreachable ();
10995 }
10996 }
10997
10998 /* Return the test that should be put into the flags user, i.e.
10999 the bcc, scc, or cmov instruction. */
11000 return gen_rtx_fmt_ee (code, VOIDmode,
11001 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11002 const0_rtx);
11003 }
11004
11005 rtx
11006 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11007 {
11008 rtx op0, op1, ret;
11009 op0 = ix86_compare_op0;
11010 op1 = ix86_compare_op1;
11011
11012 if (second_test)
11013 *second_test = NULL_RTX;
11014 if (bypass_test)
11015 *bypass_test = NULL_RTX;
11016
11017 if (ix86_compare_emitted)
11018 {
11019 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11020 ix86_compare_emitted = NULL_RTX;
11021 }
11022 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11023 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11024 second_test, bypass_test);
11025 else
11026 ret = ix86_expand_int_compare (code, op0, op1);
11027
11028 return ret;
11029 }
11030
11031 /* Return true if the CODE will result in nontrivial jump sequence. */
11032 bool
11033 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11034 {
11035 enum rtx_code bypass_code, first_code, second_code;
11036 if (!TARGET_CMOVE)
11037 return true;
11038 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11039 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11040 }
11041
11042 void
11043 ix86_expand_branch (enum rtx_code code, rtx label)
11044 {
11045 rtx tmp;
11046
11047 /* If we have emitted a compare insn, go straight to simple.
11048 ix86_expand_compare won't emit anything if ix86_compare_emitted
11049 is non NULL. */
11050 if (ix86_compare_emitted)
11051 goto simple;
11052
11053 switch (GET_MODE (ix86_compare_op0))
11054 {
11055 case QImode:
11056 case HImode:
11057 case SImode:
11058 simple:
11059 tmp = ix86_expand_compare (code, NULL, NULL);
11060 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11061 gen_rtx_LABEL_REF (VOIDmode, label),
11062 pc_rtx);
11063 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11064 return;
11065
11066 case SFmode:
11067 case DFmode:
11068 case XFmode:
11069 {
11070 rtvec vec;
11071 int use_fcomi;
11072 enum rtx_code bypass_code, first_code, second_code;
11073
11074 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11075 &ix86_compare_op1);
11076
11077 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11078
11079 /* Check whether we will use the natural sequence with one jump. If
11080 so, we can expand jump early. Otherwise delay expansion by
11081 creating compound insn to not confuse optimizers. */
11082 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11083 && TARGET_CMOVE)
11084 {
11085 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11086 gen_rtx_LABEL_REF (VOIDmode, label),
11087 pc_rtx, NULL_RTX, NULL_RTX);
11088 }
11089 else
11090 {
11091 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11092 ix86_compare_op0, ix86_compare_op1);
11093 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11094 gen_rtx_LABEL_REF (VOIDmode, label),
11095 pc_rtx);
11096 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11097
11098 use_fcomi = ix86_use_fcomi_compare (code);
11099 vec = rtvec_alloc (3 + !use_fcomi);
11100 RTVEC_ELT (vec, 0) = tmp;
11101 RTVEC_ELT (vec, 1)
11102 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11103 RTVEC_ELT (vec, 2)
11104 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11105 if (! use_fcomi)
11106 RTVEC_ELT (vec, 3)
11107 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11108
11109 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11110 }
11111 return;
11112 }
11113
11114 case DImode:
11115 if (TARGET_64BIT)
11116 goto simple;
11117 case TImode:
11118 /* Expand DImode branch into multiple compare+branch. */
11119 {
11120 rtx lo[2], hi[2], label2;
11121 enum rtx_code code1, code2, code3;
11122 enum machine_mode submode;
11123
11124 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11125 {
11126 tmp = ix86_compare_op0;
11127 ix86_compare_op0 = ix86_compare_op1;
11128 ix86_compare_op1 = tmp;
11129 code = swap_condition (code);
11130 }
11131 if (GET_MODE (ix86_compare_op0) == DImode)
11132 {
11133 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11134 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11135 submode = SImode;
11136 }
11137 else
11138 {
11139 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11140 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11141 submode = DImode;
11142 }
11143
11144 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11145 avoid two branches. This costs one extra insn, so disable when
11146 optimizing for size. */
11147
11148 if ((code == EQ || code == NE)
11149 && (!optimize_size
11150 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11151 {
11152 rtx xor0, xor1;
11153
11154 xor1 = hi[0];
11155 if (hi[1] != const0_rtx)
11156 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11157 NULL_RTX, 0, OPTAB_WIDEN);
11158
11159 xor0 = lo[0];
11160 if (lo[1] != const0_rtx)
11161 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11162 NULL_RTX, 0, OPTAB_WIDEN);
11163
11164 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11165 NULL_RTX, 0, OPTAB_WIDEN);
11166
11167 ix86_compare_op0 = tmp;
11168 ix86_compare_op1 = const0_rtx;
11169 ix86_expand_branch (code, label);
11170 return;
11171 }
11172
11173 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11174 op1 is a constant and the low word is zero, then we can just
11175 examine the high word. */
11176
11177 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11178 switch (code)
11179 {
11180 case LT: case LTU: case GE: case GEU:
11181 ix86_compare_op0 = hi[0];
11182 ix86_compare_op1 = hi[1];
11183 ix86_expand_branch (code, label);
11184 return;
11185 default:
11186 break;
11187 }
11188
11189 /* Otherwise, we need two or three jumps. */
11190
11191 label2 = gen_label_rtx ();
11192
11193 code1 = code;
11194 code2 = swap_condition (code);
11195 code3 = unsigned_condition (code);
11196
11197 switch (code)
11198 {
11199 case LT: case GT: case LTU: case GTU:
11200 break;
11201
11202 case LE: code1 = LT; code2 = GT; break;
11203 case GE: code1 = GT; code2 = LT; break;
11204 case LEU: code1 = LTU; code2 = GTU; break;
11205 case GEU: code1 = GTU; code2 = LTU; break;
11206
11207 case EQ: code1 = UNKNOWN; code2 = NE; break;
11208 case NE: code2 = UNKNOWN; break;
11209
11210 default:
11211 gcc_unreachable ();
11212 }
11213
11214 /*
11215 * a < b =>
11216 * if (hi(a) < hi(b)) goto true;
11217 * if (hi(a) > hi(b)) goto false;
11218 * if (lo(a) < lo(b)) goto true;
11219 * false:
11220 */
11221
11222 ix86_compare_op0 = hi[0];
11223 ix86_compare_op1 = hi[1];
11224
11225 if (code1 != UNKNOWN)
11226 ix86_expand_branch (code1, label);
11227 if (code2 != UNKNOWN)
11228 ix86_expand_branch (code2, label2);
11229
11230 ix86_compare_op0 = lo[0];
11231 ix86_compare_op1 = lo[1];
11232 ix86_expand_branch (code3, label);
11233
11234 if (code2 != UNKNOWN)
11235 emit_label (label2);
11236 return;
11237 }
11238
11239 default:
11240 gcc_unreachable ();
11241 }
11242 }
11243
11244 /* Split branch based on floating point condition. */
11245 void
11246 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11247 rtx target1, rtx target2, rtx tmp, rtx pushed)
11248 {
11249 rtx second, bypass;
11250 rtx label = NULL_RTX;
11251 rtx condition;
11252 int bypass_probability = -1, second_probability = -1, probability = -1;
11253 rtx i;
11254
11255 if (target2 != pc_rtx)
11256 {
11257 rtx tmp = target2;
11258 code = reverse_condition_maybe_unordered (code);
11259 target2 = target1;
11260 target1 = tmp;
11261 }
11262
11263 condition = ix86_expand_fp_compare (code, op1, op2,
11264 tmp, &second, &bypass);
11265
11266 /* Remove pushed operand from stack. */
11267 if (pushed)
11268 ix86_free_from_memory (GET_MODE (pushed));
11269
11270 if (split_branch_probability >= 0)
11271 {
11272 /* Distribute the probabilities across the jumps.
11273 Assume the BYPASS and SECOND to be always test
11274 for UNORDERED. */
11275 probability = split_branch_probability;
11276
11277 /* Value of 1 is low enough to make no need for probability
11278 to be updated. Later we may run some experiments and see
11279 if unordered values are more frequent in practice. */
11280 if (bypass)
11281 bypass_probability = 1;
11282 if (second)
11283 second_probability = 1;
11284 }
11285 if (bypass != NULL_RTX)
11286 {
11287 label = gen_label_rtx ();
11288 i = emit_jump_insn (gen_rtx_SET
11289 (VOIDmode, pc_rtx,
11290 gen_rtx_IF_THEN_ELSE (VOIDmode,
11291 bypass,
11292 gen_rtx_LABEL_REF (VOIDmode,
11293 label),
11294 pc_rtx)));
11295 if (bypass_probability >= 0)
11296 REG_NOTES (i)
11297 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11298 GEN_INT (bypass_probability),
11299 REG_NOTES (i));
11300 }
11301 i = emit_jump_insn (gen_rtx_SET
11302 (VOIDmode, pc_rtx,
11303 gen_rtx_IF_THEN_ELSE (VOIDmode,
11304 condition, target1, target2)));
11305 if (probability >= 0)
11306 REG_NOTES (i)
11307 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11308 GEN_INT (probability),
11309 REG_NOTES (i));
11310 if (second != NULL_RTX)
11311 {
11312 i = emit_jump_insn (gen_rtx_SET
11313 (VOIDmode, pc_rtx,
11314 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11315 target2)));
11316 if (second_probability >= 0)
11317 REG_NOTES (i)
11318 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11319 GEN_INT (second_probability),
11320 REG_NOTES (i));
11321 }
11322 if (label != NULL_RTX)
11323 emit_label (label);
11324 }
11325
11326 int
11327 ix86_expand_setcc (enum rtx_code code, rtx dest)
11328 {
11329 rtx ret, tmp, tmpreg, equiv;
11330 rtx second_test, bypass_test;
11331
11332 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11333 return 0; /* FAIL */
11334
11335 gcc_assert (GET_MODE (dest) == QImode);
11336
11337 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11338 PUT_MODE (ret, QImode);
11339
11340 tmp = dest;
11341 tmpreg = dest;
11342
11343 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11344 if (bypass_test || second_test)
11345 {
11346 rtx test = second_test;
11347 int bypass = 0;
11348 rtx tmp2 = gen_reg_rtx (QImode);
11349 if (bypass_test)
11350 {
11351 gcc_assert (!second_test);
11352 test = bypass_test;
11353 bypass = 1;
11354 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11355 }
11356 PUT_MODE (test, QImode);
11357 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11358
11359 if (bypass)
11360 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11361 else
11362 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11363 }
11364
11365 /* Attach a REG_EQUAL note describing the comparison result. */
11366 if (ix86_compare_op0 && ix86_compare_op1)
11367 {
11368 equiv = simplify_gen_relational (code, QImode,
11369 GET_MODE (ix86_compare_op0),
11370 ix86_compare_op0, ix86_compare_op1);
11371 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11372 }
11373
11374 return 1; /* DONE */
11375 }
11376
11377 /* Expand comparison setting or clearing carry flag. Return true when
11378 successful and set pop for the operation. */
11379 static bool
11380 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11381 {
11382 enum machine_mode mode =
11383 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11384
11385 /* Do not handle DImode compares that go through special path. Also we can't
11386 deal with FP compares yet. This is possible to add. */
11387 if (mode == (TARGET_64BIT ? TImode : DImode))
11388 return false;
11389 if (FLOAT_MODE_P (mode))
11390 {
11391 rtx second_test = NULL, bypass_test = NULL;
11392 rtx compare_op, compare_seq;
11393
11394 /* Shortcut: following common codes never translate into carry flag compares. */
11395 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11396 || code == ORDERED || code == UNORDERED)
11397 return false;
11398
11399 /* These comparisons require zero flag; swap operands so they won't. */
11400 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11401 && !TARGET_IEEE_FP)
11402 {
11403 rtx tmp = op0;
11404 op0 = op1;
11405 op1 = tmp;
11406 code = swap_condition (code);
11407 }
11408
11409 /* Try to expand the comparison and verify that we end up with carry flag
11410 based comparison. This is fails to be true only when we decide to expand
11411 comparison using arithmetic that is not too common scenario. */
11412 start_sequence ();
11413 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11414 &second_test, &bypass_test);
11415 compare_seq = get_insns ();
11416 end_sequence ();
11417
11418 if (second_test || bypass_test)
11419 return false;
11420 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11421 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11422 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11423 else
11424 code = GET_CODE (compare_op);
11425 if (code != LTU && code != GEU)
11426 return false;
11427 emit_insn (compare_seq);
11428 *pop = compare_op;
11429 return true;
11430 }
11431 if (!INTEGRAL_MODE_P (mode))
11432 return false;
11433 switch (code)
11434 {
11435 case LTU:
11436 case GEU:
11437 break;
11438
11439 /* Convert a==0 into (unsigned)a<1. */
11440 case EQ:
11441 case NE:
11442 if (op1 != const0_rtx)
11443 return false;
11444 op1 = const1_rtx;
11445 code = (code == EQ ? LTU : GEU);
11446 break;
11447
11448 /* Convert a>b into b<a or a>=b-1. */
11449 case GTU:
11450 case LEU:
11451 if (CONST_INT_P (op1))
11452 {
11453 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11454 /* Bail out on overflow. We still can swap operands but that
11455 would force loading of the constant into register. */
11456 if (op1 == const0_rtx
11457 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11458 return false;
11459 code = (code == GTU ? GEU : LTU);
11460 }
11461 else
11462 {
11463 rtx tmp = op1;
11464 op1 = op0;
11465 op0 = tmp;
11466 code = (code == GTU ? LTU : GEU);
11467 }
11468 break;
11469
11470 /* Convert a>=0 into (unsigned)a<0x80000000. */
11471 case LT:
11472 case GE:
11473 if (mode == DImode || op1 != const0_rtx)
11474 return false;
11475 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11476 code = (code == LT ? GEU : LTU);
11477 break;
11478 case LE:
11479 case GT:
11480 if (mode == DImode || op1 != constm1_rtx)
11481 return false;
11482 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11483 code = (code == LE ? GEU : LTU);
11484 break;
11485
11486 default:
11487 return false;
11488 }
11489 /* Swapping operands may cause constant to appear as first operand. */
11490 if (!nonimmediate_operand (op0, VOIDmode))
11491 {
11492 if (no_new_pseudos)
11493 return false;
11494 op0 = force_reg (mode, op0);
11495 }
11496 ix86_compare_op0 = op0;
11497 ix86_compare_op1 = op1;
11498 *pop = ix86_expand_compare (code, NULL, NULL);
11499 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11500 return true;
11501 }
11502
11503 int
11504 ix86_expand_int_movcc (rtx operands[])
11505 {
11506 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11507 rtx compare_seq, compare_op;
11508 rtx second_test, bypass_test;
11509 enum machine_mode mode = GET_MODE (operands[0]);
11510 bool sign_bit_compare_p = false;;
11511
11512 start_sequence ();
11513 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11514 compare_seq = get_insns ();
11515 end_sequence ();
11516
11517 compare_code = GET_CODE (compare_op);
11518
11519 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11520 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11521 sign_bit_compare_p = true;
11522
11523 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11524 HImode insns, we'd be swallowed in word prefix ops. */
11525
11526 if ((mode != HImode || TARGET_FAST_PREFIX)
11527 && (mode != (TARGET_64BIT ? TImode : DImode))
11528 && CONST_INT_P (operands[2])
11529 && CONST_INT_P (operands[3]))
11530 {
11531 rtx out = operands[0];
11532 HOST_WIDE_INT ct = INTVAL (operands[2]);
11533 HOST_WIDE_INT cf = INTVAL (operands[3]);
11534 HOST_WIDE_INT diff;
11535
11536 diff = ct - cf;
11537 /* Sign bit compares are better done using shifts than we do by using
11538 sbb. */
11539 if (sign_bit_compare_p
11540 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11541 ix86_compare_op1, &compare_op))
11542 {
11543 /* Detect overlap between destination and compare sources. */
11544 rtx tmp = out;
11545
11546 if (!sign_bit_compare_p)
11547 {
11548 bool fpcmp = false;
11549
11550 compare_code = GET_CODE (compare_op);
11551
11552 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11553 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11554 {
11555 fpcmp = true;
11556 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11557 }
11558
11559 /* To simplify rest of code, restrict to the GEU case. */
11560 if (compare_code == LTU)
11561 {
11562 HOST_WIDE_INT tmp = ct;
11563 ct = cf;
11564 cf = tmp;
11565 compare_code = reverse_condition (compare_code);
11566 code = reverse_condition (code);
11567 }
11568 else
11569 {
11570 if (fpcmp)
11571 PUT_CODE (compare_op,
11572 reverse_condition_maybe_unordered
11573 (GET_CODE (compare_op)));
11574 else
11575 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11576 }
11577 diff = ct - cf;
11578
11579 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11580 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11581 tmp = gen_reg_rtx (mode);
11582
11583 if (mode == DImode)
11584 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11585 else
11586 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11587 }
11588 else
11589 {
11590 if (code == GT || code == GE)
11591 code = reverse_condition (code);
11592 else
11593 {
11594 HOST_WIDE_INT tmp = ct;
11595 ct = cf;
11596 cf = tmp;
11597 diff = ct - cf;
11598 }
11599 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11600 ix86_compare_op1, VOIDmode, 0, -1);
11601 }
11602
11603 if (diff == 1)
11604 {
11605 /*
11606 * cmpl op0,op1
11607 * sbbl dest,dest
11608 * [addl dest, ct]
11609 *
11610 * Size 5 - 8.
11611 */
11612 if (ct)
11613 tmp = expand_simple_binop (mode, PLUS,
11614 tmp, GEN_INT (ct),
11615 copy_rtx (tmp), 1, OPTAB_DIRECT);
11616 }
11617 else if (cf == -1)
11618 {
11619 /*
11620 * cmpl op0,op1
11621 * sbbl dest,dest
11622 * orl $ct, dest
11623 *
11624 * Size 8.
11625 */
11626 tmp = expand_simple_binop (mode, IOR,
11627 tmp, GEN_INT (ct),
11628 copy_rtx (tmp), 1, OPTAB_DIRECT);
11629 }
11630 else if (diff == -1 && ct)
11631 {
11632 /*
11633 * cmpl op0,op1
11634 * sbbl dest,dest
11635 * notl dest
11636 * [addl dest, cf]
11637 *
11638 * Size 8 - 11.
11639 */
11640 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11641 if (cf)
11642 tmp = expand_simple_binop (mode, PLUS,
11643 copy_rtx (tmp), GEN_INT (cf),
11644 copy_rtx (tmp), 1, OPTAB_DIRECT);
11645 }
11646 else
11647 {
11648 /*
11649 * cmpl op0,op1
11650 * sbbl dest,dest
11651 * [notl dest]
11652 * andl cf - ct, dest
11653 * [addl dest, ct]
11654 *
11655 * Size 8 - 11.
11656 */
11657
11658 if (cf == 0)
11659 {
11660 cf = ct;
11661 ct = 0;
11662 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11663 }
11664
11665 tmp = expand_simple_binop (mode, AND,
11666 copy_rtx (tmp),
11667 gen_int_mode (cf - ct, mode),
11668 copy_rtx (tmp), 1, OPTAB_DIRECT);
11669 if (ct)
11670 tmp = expand_simple_binop (mode, PLUS,
11671 copy_rtx (tmp), GEN_INT (ct),
11672 copy_rtx (tmp), 1, OPTAB_DIRECT);
11673 }
11674
11675 if (!rtx_equal_p (tmp, out))
11676 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11677
11678 return 1; /* DONE */
11679 }
11680
11681 if (diff < 0)
11682 {
11683 HOST_WIDE_INT tmp;
11684 tmp = ct, ct = cf, cf = tmp;
11685 diff = -diff;
11686 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11687 {
11688 /* We may be reversing unordered compare to normal compare, that
11689 is not valid in general (we may convert non-trapping condition
11690 to trapping one), however on i386 we currently emit all
11691 comparisons unordered. */
11692 compare_code = reverse_condition_maybe_unordered (compare_code);
11693 code = reverse_condition_maybe_unordered (code);
11694 }
11695 else
11696 {
11697 compare_code = reverse_condition (compare_code);
11698 code = reverse_condition (code);
11699 }
11700 }
11701
11702 compare_code = UNKNOWN;
11703 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11704 && CONST_INT_P (ix86_compare_op1))
11705 {
11706 if (ix86_compare_op1 == const0_rtx
11707 && (code == LT || code == GE))
11708 compare_code = code;
11709 else if (ix86_compare_op1 == constm1_rtx)
11710 {
11711 if (code == LE)
11712 compare_code = LT;
11713 else if (code == GT)
11714 compare_code = GE;
11715 }
11716 }
11717
11718 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11719 if (compare_code != UNKNOWN
11720 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11721 && (cf == -1 || ct == -1))
11722 {
11723 /* If lea code below could be used, only optimize
11724 if it results in a 2 insn sequence. */
11725
11726 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11727 || diff == 3 || diff == 5 || diff == 9)
11728 || (compare_code == LT && ct == -1)
11729 || (compare_code == GE && cf == -1))
11730 {
11731 /*
11732 * notl op1 (if necessary)
11733 * sarl $31, op1
11734 * orl cf, op1
11735 */
11736 if (ct != -1)
11737 {
11738 cf = ct;
11739 ct = -1;
11740 code = reverse_condition (code);
11741 }
11742
11743 out = emit_store_flag (out, code, ix86_compare_op0,
11744 ix86_compare_op1, VOIDmode, 0, -1);
11745
11746 out = expand_simple_binop (mode, IOR,
11747 out, GEN_INT (cf),
11748 out, 1, OPTAB_DIRECT);
11749 if (out != operands[0])
11750 emit_move_insn (operands[0], out);
11751
11752 return 1; /* DONE */
11753 }
11754 }
11755
11756
11757 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11758 || diff == 3 || diff == 5 || diff == 9)
11759 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11760 && (mode != DImode
11761 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11762 {
11763 /*
11764 * xorl dest,dest
11765 * cmpl op1,op2
11766 * setcc dest
11767 * lea cf(dest*(ct-cf)),dest
11768 *
11769 * Size 14.
11770 *
11771 * This also catches the degenerate setcc-only case.
11772 */
11773
11774 rtx tmp;
11775 int nops;
11776
11777 out = emit_store_flag (out, code, ix86_compare_op0,
11778 ix86_compare_op1, VOIDmode, 0, 1);
11779
11780 nops = 0;
11781 /* On x86_64 the lea instruction operates on Pmode, so we need
11782 to get arithmetics done in proper mode to match. */
11783 if (diff == 1)
11784 tmp = copy_rtx (out);
11785 else
11786 {
11787 rtx out1;
11788 out1 = copy_rtx (out);
11789 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11790 nops++;
11791 if (diff & 1)
11792 {
11793 tmp = gen_rtx_PLUS (mode, tmp, out1);
11794 nops++;
11795 }
11796 }
11797 if (cf != 0)
11798 {
11799 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11800 nops++;
11801 }
11802 if (!rtx_equal_p (tmp, out))
11803 {
11804 if (nops == 1)
11805 out = force_operand (tmp, copy_rtx (out));
11806 else
11807 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11808 }
11809 if (!rtx_equal_p (out, operands[0]))
11810 emit_move_insn (operands[0], copy_rtx (out));
11811
11812 return 1; /* DONE */
11813 }
11814
11815 /*
11816 * General case: Jumpful:
11817 * xorl dest,dest cmpl op1, op2
11818 * cmpl op1, op2 movl ct, dest
11819 * setcc dest jcc 1f
11820 * decl dest movl cf, dest
11821 * andl (cf-ct),dest 1:
11822 * addl ct,dest
11823 *
11824 * Size 20. Size 14.
11825 *
11826 * This is reasonably steep, but branch mispredict costs are
11827 * high on modern cpus, so consider failing only if optimizing
11828 * for space.
11829 */
11830
11831 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11832 && BRANCH_COST >= 2)
11833 {
11834 if (cf == 0)
11835 {
11836 cf = ct;
11837 ct = 0;
11838 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11839 /* We may be reversing unordered compare to normal compare,
11840 that is not valid in general (we may convert non-trapping
11841 condition to trapping one), however on i386 we currently
11842 emit all comparisons unordered. */
11843 code = reverse_condition_maybe_unordered (code);
11844 else
11845 {
11846 code = reverse_condition (code);
11847 if (compare_code != UNKNOWN)
11848 compare_code = reverse_condition (compare_code);
11849 }
11850 }
11851
11852 if (compare_code != UNKNOWN)
11853 {
11854 /* notl op1 (if needed)
11855 sarl $31, op1
11856 andl (cf-ct), op1
11857 addl ct, op1
11858
11859 For x < 0 (resp. x <= -1) there will be no notl,
11860 so if possible swap the constants to get rid of the
11861 complement.
11862 True/false will be -1/0 while code below (store flag
11863 followed by decrement) is 0/-1, so the constants need
11864 to be exchanged once more. */
11865
11866 if (compare_code == GE || !cf)
11867 {
11868 code = reverse_condition (code);
11869 compare_code = LT;
11870 }
11871 else
11872 {
11873 HOST_WIDE_INT tmp = cf;
11874 cf = ct;
11875 ct = tmp;
11876 }
11877
11878 out = emit_store_flag (out, code, ix86_compare_op0,
11879 ix86_compare_op1, VOIDmode, 0, -1);
11880 }
11881 else
11882 {
11883 out = emit_store_flag (out, code, ix86_compare_op0,
11884 ix86_compare_op1, VOIDmode, 0, 1);
11885
11886 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11887 copy_rtx (out), 1, OPTAB_DIRECT);
11888 }
11889
11890 out = expand_simple_binop (mode, AND, copy_rtx (out),
11891 gen_int_mode (cf - ct, mode),
11892 copy_rtx (out), 1, OPTAB_DIRECT);
11893 if (ct)
11894 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11895 copy_rtx (out), 1, OPTAB_DIRECT);
11896 if (!rtx_equal_p (out, operands[0]))
11897 emit_move_insn (operands[0], copy_rtx (out));
11898
11899 return 1; /* DONE */
11900 }
11901 }
11902
11903 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11904 {
11905 /* Try a few things more with specific constants and a variable. */
11906
11907 optab op;
11908 rtx var, orig_out, out, tmp;
11909
11910 if (BRANCH_COST <= 2)
11911 return 0; /* FAIL */
11912
11913 /* If one of the two operands is an interesting constant, load a
11914 constant with the above and mask it in with a logical operation. */
11915
11916 if (CONST_INT_P (operands[2]))
11917 {
11918 var = operands[3];
11919 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11920 operands[3] = constm1_rtx, op = and_optab;
11921 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11922 operands[3] = const0_rtx, op = ior_optab;
11923 else
11924 return 0; /* FAIL */
11925 }
11926 else if (CONST_INT_P (operands[3]))
11927 {
11928 var = operands[2];
11929 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11930 operands[2] = constm1_rtx, op = and_optab;
11931 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11932 operands[2] = const0_rtx, op = ior_optab;
11933 else
11934 return 0; /* FAIL */
11935 }
11936 else
11937 return 0; /* FAIL */
11938
11939 orig_out = operands[0];
11940 tmp = gen_reg_rtx (mode);
11941 operands[0] = tmp;
11942
11943 /* Recurse to get the constant loaded. */
11944 if (ix86_expand_int_movcc (operands) == 0)
11945 return 0; /* FAIL */
11946
11947 /* Mask in the interesting variable. */
11948 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11949 OPTAB_WIDEN);
11950 if (!rtx_equal_p (out, orig_out))
11951 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11952
11953 return 1; /* DONE */
11954 }
11955
11956 /*
11957 * For comparison with above,
11958 *
11959 * movl cf,dest
11960 * movl ct,tmp
11961 * cmpl op1,op2
11962 * cmovcc tmp,dest
11963 *
11964 * Size 15.
11965 */
11966
11967 if (! nonimmediate_operand (operands[2], mode))
11968 operands[2] = force_reg (mode, operands[2]);
11969 if (! nonimmediate_operand (operands[3], mode))
11970 operands[3] = force_reg (mode, operands[3]);
11971
11972 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11973 {
11974 rtx tmp = gen_reg_rtx (mode);
11975 emit_move_insn (tmp, operands[3]);
11976 operands[3] = tmp;
11977 }
11978 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11979 {
11980 rtx tmp = gen_reg_rtx (mode);
11981 emit_move_insn (tmp, operands[2]);
11982 operands[2] = tmp;
11983 }
11984
11985 if (! register_operand (operands[2], VOIDmode)
11986 && (mode == QImode
11987 || ! register_operand (operands[3], VOIDmode)))
11988 operands[2] = force_reg (mode, operands[2]);
11989
11990 if (mode == QImode
11991 && ! register_operand (operands[3], VOIDmode))
11992 operands[3] = force_reg (mode, operands[3]);
11993
11994 emit_insn (compare_seq);
11995 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11996 gen_rtx_IF_THEN_ELSE (mode,
11997 compare_op, operands[2],
11998 operands[3])));
11999 if (bypass_test)
12000 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12001 gen_rtx_IF_THEN_ELSE (mode,
12002 bypass_test,
12003 copy_rtx (operands[3]),
12004 copy_rtx (operands[0]))));
12005 if (second_test)
12006 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12007 gen_rtx_IF_THEN_ELSE (mode,
12008 second_test,
12009 copy_rtx (operands[2]),
12010 copy_rtx (operands[0]))));
12011
12012 return 1; /* DONE */
12013 }
12014
12015 /* Swap, force into registers, or otherwise massage the two operands
12016 to an sse comparison with a mask result. Thus we differ a bit from
12017 ix86_prepare_fp_compare_args which expects to produce a flags result.
12018
12019 The DEST operand exists to help determine whether to commute commutative
12020 operators. The POP0/POP1 operands are updated in place. The new
12021 comparison code is returned, or UNKNOWN if not implementable. */
12022
12023 static enum rtx_code
12024 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12025 rtx *pop0, rtx *pop1)
12026 {
12027 rtx tmp;
12028
12029 switch (code)
12030 {
12031 case LTGT:
12032 case UNEQ:
12033 /* We have no LTGT as an operator. We could implement it with
12034 NE & ORDERED, but this requires an extra temporary. It's
12035 not clear that it's worth it. */
12036 return UNKNOWN;
12037
12038 case LT:
12039 case LE:
12040 case UNGT:
12041 case UNGE:
12042 /* These are supported directly. */
12043 break;
12044
12045 case EQ:
12046 case NE:
12047 case UNORDERED:
12048 case ORDERED:
12049 /* For commutative operators, try to canonicalize the destination
12050 operand to be first in the comparison - this helps reload to
12051 avoid extra moves. */
12052 if (!dest || !rtx_equal_p (dest, *pop1))
12053 break;
12054 /* FALLTHRU */
12055
12056 case GE:
12057 case GT:
12058 case UNLE:
12059 case UNLT:
12060 /* These are not supported directly. Swap the comparison operands
12061 to transform into something that is supported. */
12062 tmp = *pop0;
12063 *pop0 = *pop1;
12064 *pop1 = tmp;
12065 code = swap_condition (code);
12066 break;
12067
12068 default:
12069 gcc_unreachable ();
12070 }
12071
12072 return code;
12073 }
12074
12075 /* Detect conditional moves that exactly match min/max operational
12076 semantics. Note that this is IEEE safe, as long as we don't
12077 interchange the operands.
12078
12079 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12080 and TRUE if the operation is successful and instructions are emitted. */
12081
12082 static bool
12083 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12084 rtx cmp_op1, rtx if_true, rtx if_false)
12085 {
12086 enum machine_mode mode;
12087 bool is_min;
12088 rtx tmp;
12089
12090 if (code == LT)
12091 ;
12092 else if (code == UNGE)
12093 {
12094 tmp = if_true;
12095 if_true = if_false;
12096 if_false = tmp;
12097 }
12098 else
12099 return false;
12100
12101 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12102 is_min = true;
12103 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12104 is_min = false;
12105 else
12106 return false;
12107
12108 mode = GET_MODE (dest);
12109
12110 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12111 but MODE may be a vector mode and thus not appropriate. */
12112 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12113 {
12114 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12115 rtvec v;
12116
12117 if_true = force_reg (mode, if_true);
12118 v = gen_rtvec (2, if_true, if_false);
12119 tmp = gen_rtx_UNSPEC (mode, v, u);
12120 }
12121 else
12122 {
12123 code = is_min ? SMIN : SMAX;
12124 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12125 }
12126
12127 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12128 return true;
12129 }
12130
12131 /* Expand an sse vector comparison. Return the register with the result. */
12132
12133 static rtx
12134 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12135 rtx op_true, rtx op_false)
12136 {
12137 enum machine_mode mode = GET_MODE (dest);
12138 rtx x;
12139
12140 cmp_op0 = force_reg (mode, cmp_op0);
12141 if (!nonimmediate_operand (cmp_op1, mode))
12142 cmp_op1 = force_reg (mode, cmp_op1);
12143
12144 if (optimize
12145 || reg_overlap_mentioned_p (dest, op_true)
12146 || reg_overlap_mentioned_p (dest, op_false))
12147 dest = gen_reg_rtx (mode);
12148
12149 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12150 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12151
12152 return dest;
12153 }
12154
12155 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12156 operations. This is used for both scalar and vector conditional moves. */
12157
12158 static void
12159 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12160 {
12161 enum machine_mode mode = GET_MODE (dest);
12162 rtx t2, t3, x;
12163
12164 if (op_false == CONST0_RTX (mode))
12165 {
12166 op_true = force_reg (mode, op_true);
12167 x = gen_rtx_AND (mode, cmp, op_true);
12168 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12169 }
12170 else if (op_true == CONST0_RTX (mode))
12171 {
12172 op_false = force_reg (mode, op_false);
12173 x = gen_rtx_NOT (mode, cmp);
12174 x = gen_rtx_AND (mode, x, op_false);
12175 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12176 }
12177 else
12178 {
12179 op_true = force_reg (mode, op_true);
12180 op_false = force_reg (mode, op_false);
12181
12182 t2 = gen_reg_rtx (mode);
12183 if (optimize)
12184 t3 = gen_reg_rtx (mode);
12185 else
12186 t3 = dest;
12187
12188 x = gen_rtx_AND (mode, op_true, cmp);
12189 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12190
12191 x = gen_rtx_NOT (mode, cmp);
12192 x = gen_rtx_AND (mode, x, op_false);
12193 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12194
12195 x = gen_rtx_IOR (mode, t3, t2);
12196 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12197 }
12198 }
12199
12200 /* Expand a floating-point conditional move. Return true if successful. */
12201
12202 int
12203 ix86_expand_fp_movcc (rtx operands[])
12204 {
12205 enum machine_mode mode = GET_MODE (operands[0]);
12206 enum rtx_code code = GET_CODE (operands[1]);
12207 rtx tmp, compare_op, second_test, bypass_test;
12208
12209 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12210 {
12211 enum machine_mode cmode;
12212
12213 /* Since we've no cmove for sse registers, don't force bad register
12214 allocation just to gain access to it. Deny movcc when the
12215 comparison mode doesn't match the move mode. */
12216 cmode = GET_MODE (ix86_compare_op0);
12217 if (cmode == VOIDmode)
12218 cmode = GET_MODE (ix86_compare_op1);
12219 if (cmode != mode)
12220 return 0;
12221
12222 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12223 &ix86_compare_op0,
12224 &ix86_compare_op1);
12225 if (code == UNKNOWN)
12226 return 0;
12227
12228 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12229 ix86_compare_op1, operands[2],
12230 operands[3]))
12231 return 1;
12232
12233 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12234 ix86_compare_op1, operands[2], operands[3]);
12235 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12236 return 1;
12237 }
12238
12239 /* The floating point conditional move instructions don't directly
12240 support conditions resulting from a signed integer comparison. */
12241
12242 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12243
12244 /* The floating point conditional move instructions don't directly
12245 support signed integer comparisons. */
12246
12247 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12248 {
12249 gcc_assert (!second_test && !bypass_test);
12250 tmp = gen_reg_rtx (QImode);
12251 ix86_expand_setcc (code, tmp);
12252 code = NE;
12253 ix86_compare_op0 = tmp;
12254 ix86_compare_op1 = const0_rtx;
12255 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12256 }
12257 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12258 {
12259 tmp = gen_reg_rtx (mode);
12260 emit_move_insn (tmp, operands[3]);
12261 operands[3] = tmp;
12262 }
12263 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12264 {
12265 tmp = gen_reg_rtx (mode);
12266 emit_move_insn (tmp, operands[2]);
12267 operands[2] = tmp;
12268 }
12269
12270 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12271 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12272 operands[2], operands[3])));
12273 if (bypass_test)
12274 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12275 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12276 operands[3], operands[0])));
12277 if (second_test)
12278 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12279 gen_rtx_IF_THEN_ELSE (mode, second_test,
12280 operands[2], operands[0])));
12281
12282 return 1;
12283 }
12284
12285 /* Expand a floating-point vector conditional move; a vcond operation
12286 rather than a movcc operation. */
12287
12288 bool
12289 ix86_expand_fp_vcond (rtx operands[])
12290 {
12291 enum rtx_code code = GET_CODE (operands[3]);
12292 rtx cmp;
12293
12294 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12295 &operands[4], &operands[5]);
12296 if (code == UNKNOWN)
12297 return false;
12298
12299 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12300 operands[5], operands[1], operands[2]))
12301 return true;
12302
12303 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12304 operands[1], operands[2]);
12305 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12306 return true;
12307 }
12308
12309 /* Expand a signed integral vector conditional move. */
12310
12311 bool
12312 ix86_expand_int_vcond (rtx operands[])
12313 {
12314 enum machine_mode mode = GET_MODE (operands[0]);
12315 enum rtx_code code = GET_CODE (operands[3]);
12316 bool negate = false;
12317 rtx x, cop0, cop1;
12318
12319 cop0 = operands[4];
12320 cop1 = operands[5];
12321
12322 /* Canonicalize the comparison to EQ, GT, GTU. */
12323 switch (code)
12324 {
12325 case EQ:
12326 case GT:
12327 case GTU:
12328 break;
12329
12330 case NE:
12331 case LE:
12332 case LEU:
12333 code = reverse_condition (code);
12334 negate = true;
12335 break;
12336
12337 case GE:
12338 case GEU:
12339 code = reverse_condition (code);
12340 negate = true;
12341 /* FALLTHRU */
12342
12343 case LT:
12344 case LTU:
12345 code = swap_condition (code);
12346 x = cop0, cop0 = cop1, cop1 = x;
12347 break;
12348
12349 default:
12350 gcc_unreachable ();
12351 }
12352
12353 /* Unsigned parallel compare is not supported by the hardware. Play some
12354 tricks to turn this into a signed comparison against 0. */
12355 if (code == GTU)
12356 {
12357 cop0 = force_reg (mode, cop0);
12358
12359 switch (mode)
12360 {
12361 case V4SImode:
12362 {
12363 rtx t1, t2, mask;
12364
12365 /* Perform a parallel modulo subtraction. */
12366 t1 = gen_reg_rtx (mode);
12367 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12368
12369 /* Extract the original sign bit of op0. */
12370 mask = GEN_INT (-0x80000000);
12371 mask = gen_rtx_CONST_VECTOR (mode,
12372 gen_rtvec (4, mask, mask, mask, mask));
12373 mask = force_reg (mode, mask);
12374 t2 = gen_reg_rtx (mode);
12375 emit_insn (gen_andv4si3 (t2, cop0, mask));
12376
12377 /* XOR it back into the result of the subtraction. This results
12378 in the sign bit set iff we saw unsigned underflow. */
12379 x = gen_reg_rtx (mode);
12380 emit_insn (gen_xorv4si3 (x, t1, t2));
12381
12382 code = GT;
12383 }
12384 break;
12385
12386 case V16QImode:
12387 case V8HImode:
12388 /* Perform a parallel unsigned saturating subtraction. */
12389 x = gen_reg_rtx (mode);
12390 emit_insn (gen_rtx_SET (VOIDmode, x,
12391 gen_rtx_US_MINUS (mode, cop0, cop1)));
12392
12393 code = EQ;
12394 negate = !negate;
12395 break;
12396
12397 default:
12398 gcc_unreachable ();
12399 }
12400
12401 cop0 = x;
12402 cop1 = CONST0_RTX (mode);
12403 }
12404
12405 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12406 operands[1+negate], operands[2-negate]);
12407
12408 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12409 operands[2-negate]);
12410 return true;
12411 }
12412
12413 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12414 true if we should do zero extension, else sign extension. HIGH_P is
12415 true if we want the N/2 high elements, else the low elements. */
12416
12417 void
12418 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12419 {
12420 enum machine_mode imode = GET_MODE (operands[1]);
12421 rtx (*unpack)(rtx, rtx, rtx);
12422 rtx se, dest;
12423
12424 switch (imode)
12425 {
12426 case V16QImode:
12427 if (high_p)
12428 unpack = gen_vec_interleave_highv16qi;
12429 else
12430 unpack = gen_vec_interleave_lowv16qi;
12431 break;
12432 case V8HImode:
12433 if (high_p)
12434 unpack = gen_vec_interleave_highv8hi;
12435 else
12436 unpack = gen_vec_interleave_lowv8hi;
12437 break;
12438 case V4SImode:
12439 if (high_p)
12440 unpack = gen_vec_interleave_highv4si;
12441 else
12442 unpack = gen_vec_interleave_lowv4si;
12443 break;
12444 default:
12445 gcc_unreachable ();
12446 }
12447
12448 dest = gen_lowpart (imode, operands[0]);
12449
12450 if (unsigned_p)
12451 se = force_reg (imode, CONST0_RTX (imode));
12452 else
12453 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12454 operands[1], pc_rtx, pc_rtx);
12455
12456 emit_insn (unpack (dest, operands[1], se));
12457 }
12458
12459 /* Expand conditional increment or decrement using adb/sbb instructions.
12460 The default case using setcc followed by the conditional move can be
12461 done by generic code. */
12462 int
12463 ix86_expand_int_addcc (rtx operands[])
12464 {
12465 enum rtx_code code = GET_CODE (operands[1]);
12466 rtx compare_op;
12467 rtx val = const0_rtx;
12468 bool fpcmp = false;
12469 enum machine_mode mode = GET_MODE (operands[0]);
12470
12471 if (operands[3] != const1_rtx
12472 && operands[3] != constm1_rtx)
12473 return 0;
12474 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12475 ix86_compare_op1, &compare_op))
12476 return 0;
12477 code = GET_CODE (compare_op);
12478
12479 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12480 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12481 {
12482 fpcmp = true;
12483 code = ix86_fp_compare_code_to_integer (code);
12484 }
12485
12486 if (code != LTU)
12487 {
12488 val = constm1_rtx;
12489 if (fpcmp)
12490 PUT_CODE (compare_op,
12491 reverse_condition_maybe_unordered
12492 (GET_CODE (compare_op)));
12493 else
12494 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12495 }
12496 PUT_MODE (compare_op, mode);
12497
12498 /* Construct either adc or sbb insn. */
12499 if ((code == LTU) == (operands[3] == constm1_rtx))
12500 {
12501 switch (GET_MODE (operands[0]))
12502 {
12503 case QImode:
12504 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12505 break;
12506 case HImode:
12507 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12508 break;
12509 case SImode:
12510 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12511 break;
12512 case DImode:
12513 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12514 break;
12515 default:
12516 gcc_unreachable ();
12517 }
12518 }
12519 else
12520 {
12521 switch (GET_MODE (operands[0]))
12522 {
12523 case QImode:
12524 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12525 break;
12526 case HImode:
12527 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12528 break;
12529 case SImode:
12530 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12531 break;
12532 case DImode:
12533 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12534 break;
12535 default:
12536 gcc_unreachable ();
12537 }
12538 }
12539 return 1; /* DONE */
12540 }
12541
12542
12543 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12544 works for floating pointer parameters and nonoffsetable memories.
12545 For pushes, it returns just stack offsets; the values will be saved
12546 in the right order. Maximally three parts are generated. */
12547
12548 static int
12549 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12550 {
12551 int size;
12552
12553 if (!TARGET_64BIT)
12554 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12555 else
12556 size = (GET_MODE_SIZE (mode) + 4) / 8;
12557
12558 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12559 gcc_assert (size >= 2 && size <= 3);
12560
12561 /* Optimize constant pool reference to immediates. This is used by fp
12562 moves, that force all constants to memory to allow combining. */
12563 if (MEM_P (operand) && MEM_READONLY_P (operand))
12564 {
12565 rtx tmp = maybe_get_pool_constant (operand);
12566 if (tmp)
12567 operand = tmp;
12568 }
12569
12570 if (MEM_P (operand) && !offsettable_memref_p (operand))
12571 {
12572 /* The only non-offsetable memories we handle are pushes. */
12573 int ok = push_operand (operand, VOIDmode);
12574
12575 gcc_assert (ok);
12576
12577 operand = copy_rtx (operand);
12578 PUT_MODE (operand, Pmode);
12579 parts[0] = parts[1] = parts[2] = operand;
12580 return size;
12581 }
12582
12583 if (GET_CODE (operand) == CONST_VECTOR)
12584 {
12585 enum machine_mode imode = int_mode_for_mode (mode);
12586 /* Caution: if we looked through a constant pool memory above,
12587 the operand may actually have a different mode now. That's
12588 ok, since we want to pun this all the way back to an integer. */
12589 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12590 gcc_assert (operand != NULL);
12591 mode = imode;
12592 }
12593
12594 if (!TARGET_64BIT)
12595 {
12596 if (mode == DImode)
12597 split_di (&operand, 1, &parts[0], &parts[1]);
12598 else
12599 {
12600 if (REG_P (operand))
12601 {
12602 gcc_assert (reload_completed);
12603 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12604 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12605 if (size == 3)
12606 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12607 }
12608 else if (offsettable_memref_p (operand))
12609 {
12610 operand = adjust_address (operand, SImode, 0);
12611 parts[0] = operand;
12612 parts[1] = adjust_address (operand, SImode, 4);
12613 if (size == 3)
12614 parts[2] = adjust_address (operand, SImode, 8);
12615 }
12616 else if (GET_CODE (operand) == CONST_DOUBLE)
12617 {
12618 REAL_VALUE_TYPE r;
12619 long l[4];
12620
12621 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12622 switch (mode)
12623 {
12624 case XFmode:
12625 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12626 parts[2] = gen_int_mode (l[2], SImode);
12627 break;
12628 case DFmode:
12629 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12630 break;
12631 default:
12632 gcc_unreachable ();
12633 }
12634 parts[1] = gen_int_mode (l[1], SImode);
12635 parts[0] = gen_int_mode (l[0], SImode);
12636 }
12637 else
12638 gcc_unreachable ();
12639 }
12640 }
12641 else
12642 {
12643 if (mode == TImode)
12644 split_ti (&operand, 1, &parts[0], &parts[1]);
12645 if (mode == XFmode || mode == TFmode)
12646 {
12647 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12648 if (REG_P (operand))
12649 {
12650 gcc_assert (reload_completed);
12651 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12652 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12653 }
12654 else if (offsettable_memref_p (operand))
12655 {
12656 operand = adjust_address (operand, DImode, 0);
12657 parts[0] = operand;
12658 parts[1] = adjust_address (operand, upper_mode, 8);
12659 }
12660 else if (GET_CODE (operand) == CONST_DOUBLE)
12661 {
12662 REAL_VALUE_TYPE r;
12663 long l[4];
12664
12665 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12666 real_to_target (l, &r, mode);
12667
12668 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12669 if (HOST_BITS_PER_WIDE_INT >= 64)
12670 parts[0]
12671 = gen_int_mode
12672 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12673 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12674 DImode);
12675 else
12676 parts[0] = immed_double_const (l[0], l[1], DImode);
12677
12678 if (upper_mode == SImode)
12679 parts[1] = gen_int_mode (l[2], SImode);
12680 else if (HOST_BITS_PER_WIDE_INT >= 64)
12681 parts[1]
12682 = gen_int_mode
12683 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12684 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12685 DImode);
12686 else
12687 parts[1] = immed_double_const (l[2], l[3], DImode);
12688 }
12689 else
12690 gcc_unreachable ();
12691 }
12692 }
12693
12694 return size;
12695 }
12696
12697 /* Emit insns to perform a move or push of DI, DF, and XF values.
12698 Return false when normal moves are needed; true when all required
12699 insns have been emitted. Operands 2-4 contain the input values
12700 int the correct order; operands 5-7 contain the output values. */
12701
12702 void
12703 ix86_split_long_move (rtx operands[])
12704 {
12705 rtx part[2][3];
12706 int nparts;
12707 int push = 0;
12708 int collisions = 0;
12709 enum machine_mode mode = GET_MODE (operands[0]);
12710
12711 /* The DFmode expanders may ask us to move double.
12712 For 64bit target this is single move. By hiding the fact
12713 here we simplify i386.md splitters. */
12714 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12715 {
12716 /* Optimize constant pool reference to immediates. This is used by
12717 fp moves, that force all constants to memory to allow combining. */
12718
12719 if (MEM_P (operands[1])
12720 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12721 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12722 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12723 if (push_operand (operands[0], VOIDmode))
12724 {
12725 operands[0] = copy_rtx (operands[0]);
12726 PUT_MODE (operands[0], Pmode);
12727 }
12728 else
12729 operands[0] = gen_lowpart (DImode, operands[0]);
12730 operands[1] = gen_lowpart (DImode, operands[1]);
12731 emit_move_insn (operands[0], operands[1]);
12732 return;
12733 }
12734
12735 /* The only non-offsettable memory we handle is push. */
12736 if (push_operand (operands[0], VOIDmode))
12737 push = 1;
12738 else
12739 gcc_assert (!MEM_P (operands[0])
12740 || offsettable_memref_p (operands[0]));
12741
12742 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12743 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12744
12745 /* When emitting push, take care for source operands on the stack. */
12746 if (push && MEM_P (operands[1])
12747 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12748 {
12749 if (nparts == 3)
12750 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12751 XEXP (part[1][2], 0));
12752 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12753 XEXP (part[1][1], 0));
12754 }
12755
12756 /* We need to do copy in the right order in case an address register
12757 of the source overlaps the destination. */
12758 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12759 {
12760 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12761 collisions++;
12762 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12763 collisions++;
12764 if (nparts == 3
12765 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12766 collisions++;
12767
12768 /* Collision in the middle part can be handled by reordering. */
12769 if (collisions == 1 && nparts == 3
12770 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12771 {
12772 rtx tmp;
12773 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12774 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12775 }
12776
12777 /* If there are more collisions, we can't handle it by reordering.
12778 Do an lea to the last part and use only one colliding move. */
12779 else if (collisions > 1)
12780 {
12781 rtx base;
12782
12783 collisions = 1;
12784
12785 base = part[0][nparts - 1];
12786
12787 /* Handle the case when the last part isn't valid for lea.
12788 Happens in 64-bit mode storing the 12-byte XFmode. */
12789 if (GET_MODE (base) != Pmode)
12790 base = gen_rtx_REG (Pmode, REGNO (base));
12791
12792 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12793 part[1][0] = replace_equiv_address (part[1][0], base);
12794 part[1][1] = replace_equiv_address (part[1][1],
12795 plus_constant (base, UNITS_PER_WORD));
12796 if (nparts == 3)
12797 part[1][2] = replace_equiv_address (part[1][2],
12798 plus_constant (base, 8));
12799 }
12800 }
12801
12802 if (push)
12803 {
12804 if (!TARGET_64BIT)
12805 {
12806 if (nparts == 3)
12807 {
12808 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12809 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12810 emit_move_insn (part[0][2], part[1][2]);
12811 }
12812 }
12813 else
12814 {
12815 /* In 64bit mode we don't have 32bit push available. In case this is
12816 register, it is OK - we will just use larger counterpart. We also
12817 retype memory - these comes from attempt to avoid REX prefix on
12818 moving of second half of TFmode value. */
12819 if (GET_MODE (part[1][1]) == SImode)
12820 {
12821 switch (GET_CODE (part[1][1]))
12822 {
12823 case MEM:
12824 part[1][1] = adjust_address (part[1][1], DImode, 0);
12825 break;
12826
12827 case REG:
12828 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12829 break;
12830
12831 default:
12832 gcc_unreachable ();
12833 }
12834
12835 if (GET_MODE (part[1][0]) == SImode)
12836 part[1][0] = part[1][1];
12837 }
12838 }
12839 emit_move_insn (part[0][1], part[1][1]);
12840 emit_move_insn (part[0][0], part[1][0]);
12841 return;
12842 }
12843
12844 /* Choose correct order to not overwrite the source before it is copied. */
12845 if ((REG_P (part[0][0])
12846 && REG_P (part[1][1])
12847 && (REGNO (part[0][0]) == REGNO (part[1][1])
12848 || (nparts == 3
12849 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12850 || (collisions > 0
12851 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12852 {
12853 if (nparts == 3)
12854 {
12855 operands[2] = part[0][2];
12856 operands[3] = part[0][1];
12857 operands[4] = part[0][0];
12858 operands[5] = part[1][2];
12859 operands[6] = part[1][1];
12860 operands[7] = part[1][0];
12861 }
12862 else
12863 {
12864 operands[2] = part[0][1];
12865 operands[3] = part[0][0];
12866 operands[5] = part[1][1];
12867 operands[6] = part[1][0];
12868 }
12869 }
12870 else
12871 {
12872 if (nparts == 3)
12873 {
12874 operands[2] = part[0][0];
12875 operands[3] = part[0][1];
12876 operands[4] = part[0][2];
12877 operands[5] = part[1][0];
12878 operands[6] = part[1][1];
12879 operands[7] = part[1][2];
12880 }
12881 else
12882 {
12883 operands[2] = part[0][0];
12884 operands[3] = part[0][1];
12885 operands[5] = part[1][0];
12886 operands[6] = part[1][1];
12887 }
12888 }
12889
12890 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12891 if (optimize_size)
12892 {
12893 if (CONST_INT_P (operands[5])
12894 && operands[5] != const0_rtx
12895 && REG_P (operands[2]))
12896 {
12897 if (CONST_INT_P (operands[6])
12898 && INTVAL (operands[6]) == INTVAL (operands[5]))
12899 operands[6] = operands[2];
12900
12901 if (nparts == 3
12902 && CONST_INT_P (operands[7])
12903 && INTVAL (operands[7]) == INTVAL (operands[5]))
12904 operands[7] = operands[2];
12905 }
12906
12907 if (nparts == 3
12908 && CONST_INT_P (operands[6])
12909 && operands[6] != const0_rtx
12910 && REG_P (operands[3])
12911 && CONST_INT_P (operands[7])
12912 && INTVAL (operands[7]) == INTVAL (operands[6]))
12913 operands[7] = operands[3];
12914 }
12915
12916 emit_move_insn (operands[2], operands[5]);
12917 emit_move_insn (operands[3], operands[6]);
12918 if (nparts == 3)
12919 emit_move_insn (operands[4], operands[7]);
12920
12921 return;
12922 }
12923
12924 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12925 left shift by a constant, either using a single shift or
12926 a sequence of add instructions. */
12927
12928 static void
12929 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12930 {
12931 if (count == 1)
12932 {
12933 emit_insn ((mode == DImode
12934 ? gen_addsi3
12935 : gen_adddi3) (operand, operand, operand));
12936 }
12937 else if (!optimize_size
12938 && count * ix86_cost->add <= ix86_cost->shift_const)
12939 {
12940 int i;
12941 for (i=0; i<count; i++)
12942 {
12943 emit_insn ((mode == DImode
12944 ? gen_addsi3
12945 : gen_adddi3) (operand, operand, operand));
12946 }
12947 }
12948 else
12949 emit_insn ((mode == DImode
12950 ? gen_ashlsi3
12951 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12952 }
12953
12954 void
12955 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12956 {
12957 rtx low[2], high[2];
12958 int count;
12959 const int single_width = mode == DImode ? 32 : 64;
12960
12961 if (CONST_INT_P (operands[2]))
12962 {
12963 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12964 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12965
12966 if (count >= single_width)
12967 {
12968 emit_move_insn (high[0], low[1]);
12969 emit_move_insn (low[0], const0_rtx);
12970
12971 if (count > single_width)
12972 ix86_expand_ashl_const (high[0], count - single_width, mode);
12973 }
12974 else
12975 {
12976 if (!rtx_equal_p (operands[0], operands[1]))
12977 emit_move_insn (operands[0], operands[1]);
12978 emit_insn ((mode == DImode
12979 ? gen_x86_shld_1
12980 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12981 ix86_expand_ashl_const (low[0], count, mode);
12982 }
12983 return;
12984 }
12985
12986 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12987
12988 if (operands[1] == const1_rtx)
12989 {
12990 /* Assuming we've chosen a QImode capable registers, then 1 << N
12991 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12992 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12993 {
12994 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12995
12996 ix86_expand_clear (low[0]);
12997 ix86_expand_clear (high[0]);
12998 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12999
13000 d = gen_lowpart (QImode, low[0]);
13001 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13002 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13003 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13004
13005 d = gen_lowpart (QImode, high[0]);
13006 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13007 s = gen_rtx_NE (QImode, flags, const0_rtx);
13008 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13009 }
13010
13011 /* Otherwise, we can get the same results by manually performing
13012 a bit extract operation on bit 5/6, and then performing the two
13013 shifts. The two methods of getting 0/1 into low/high are exactly
13014 the same size. Avoiding the shift in the bit extract case helps
13015 pentium4 a bit; no one else seems to care much either way. */
13016 else
13017 {
13018 rtx x;
13019
13020 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13021 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13022 else
13023 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13024 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13025
13026 emit_insn ((mode == DImode
13027 ? gen_lshrsi3
13028 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13029 emit_insn ((mode == DImode
13030 ? gen_andsi3
13031 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13032 emit_move_insn (low[0], high[0]);
13033 emit_insn ((mode == DImode
13034 ? gen_xorsi3
13035 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13036 }
13037
13038 emit_insn ((mode == DImode
13039 ? gen_ashlsi3
13040 : gen_ashldi3) (low[0], low[0], operands[2]));
13041 emit_insn ((mode == DImode
13042 ? gen_ashlsi3
13043 : gen_ashldi3) (high[0], high[0], operands[2]));
13044 return;
13045 }
13046
13047 if (operands[1] == constm1_rtx)
13048 {
13049 /* For -1 << N, we can avoid the shld instruction, because we
13050 know that we're shifting 0...31/63 ones into a -1. */
13051 emit_move_insn (low[0], constm1_rtx);
13052 if (optimize_size)
13053 emit_move_insn (high[0], low[0]);
13054 else
13055 emit_move_insn (high[0], constm1_rtx);
13056 }
13057 else
13058 {
13059 if (!rtx_equal_p (operands[0], operands[1]))
13060 emit_move_insn (operands[0], operands[1]);
13061
13062 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13063 emit_insn ((mode == DImode
13064 ? gen_x86_shld_1
13065 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13066 }
13067
13068 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13069
13070 if (TARGET_CMOVE && scratch)
13071 {
13072 ix86_expand_clear (scratch);
13073 emit_insn ((mode == DImode
13074 ? gen_x86_shift_adj_1
13075 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13076 }
13077 else
13078 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13079 }
13080
13081 void
13082 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13083 {
13084 rtx low[2], high[2];
13085 int count;
13086 const int single_width = mode == DImode ? 32 : 64;
13087
13088 if (CONST_INT_P (operands[2]))
13089 {
13090 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13091 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13092
13093 if (count == single_width * 2 - 1)
13094 {
13095 emit_move_insn (high[0], high[1]);
13096 emit_insn ((mode == DImode
13097 ? gen_ashrsi3
13098 : gen_ashrdi3) (high[0], high[0],
13099 GEN_INT (single_width - 1)));
13100 emit_move_insn (low[0], high[0]);
13101
13102 }
13103 else if (count >= single_width)
13104 {
13105 emit_move_insn (low[0], high[1]);
13106 emit_move_insn (high[0], low[0]);
13107 emit_insn ((mode == DImode
13108 ? gen_ashrsi3
13109 : gen_ashrdi3) (high[0], high[0],
13110 GEN_INT (single_width - 1)));
13111 if (count > single_width)
13112 emit_insn ((mode == DImode
13113 ? gen_ashrsi3
13114 : gen_ashrdi3) (low[0], low[0],
13115 GEN_INT (count - single_width)));
13116 }
13117 else
13118 {
13119 if (!rtx_equal_p (operands[0], operands[1]))
13120 emit_move_insn (operands[0], operands[1]);
13121 emit_insn ((mode == DImode
13122 ? gen_x86_shrd_1
13123 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13124 emit_insn ((mode == DImode
13125 ? gen_ashrsi3
13126 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13127 }
13128 }
13129 else
13130 {
13131 if (!rtx_equal_p (operands[0], operands[1]))
13132 emit_move_insn (operands[0], operands[1]);
13133
13134 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13135
13136 emit_insn ((mode == DImode
13137 ? gen_x86_shrd_1
13138 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13139 emit_insn ((mode == DImode
13140 ? gen_ashrsi3
13141 : gen_ashrdi3) (high[0], high[0], operands[2]));
13142
13143 if (TARGET_CMOVE && scratch)
13144 {
13145 emit_move_insn (scratch, high[0]);
13146 emit_insn ((mode == DImode
13147 ? gen_ashrsi3
13148 : gen_ashrdi3) (scratch, scratch,
13149 GEN_INT (single_width - 1)));
13150 emit_insn ((mode == DImode
13151 ? gen_x86_shift_adj_1
13152 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13153 scratch));
13154 }
13155 else
13156 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13157 }
13158 }
13159
13160 void
13161 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13162 {
13163 rtx low[2], high[2];
13164 int count;
13165 const int single_width = mode == DImode ? 32 : 64;
13166
13167 if (CONST_INT_P (operands[2]))
13168 {
13169 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13170 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13171
13172 if (count >= single_width)
13173 {
13174 emit_move_insn (low[0], high[1]);
13175 ix86_expand_clear (high[0]);
13176
13177 if (count > single_width)
13178 emit_insn ((mode == DImode
13179 ? gen_lshrsi3
13180 : gen_lshrdi3) (low[0], low[0],
13181 GEN_INT (count - single_width)));
13182 }
13183 else
13184 {
13185 if (!rtx_equal_p (operands[0], operands[1]))
13186 emit_move_insn (operands[0], operands[1]);
13187 emit_insn ((mode == DImode
13188 ? gen_x86_shrd_1
13189 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13190 emit_insn ((mode == DImode
13191 ? gen_lshrsi3
13192 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13193 }
13194 }
13195 else
13196 {
13197 if (!rtx_equal_p (operands[0], operands[1]))
13198 emit_move_insn (operands[0], operands[1]);
13199
13200 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13201
13202 emit_insn ((mode == DImode
13203 ? gen_x86_shrd_1
13204 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13205 emit_insn ((mode == DImode
13206 ? gen_lshrsi3
13207 : gen_lshrdi3) (high[0], high[0], operands[2]));
13208
13209 /* Heh. By reversing the arguments, we can reuse this pattern. */
13210 if (TARGET_CMOVE && scratch)
13211 {
13212 ix86_expand_clear (scratch);
13213 emit_insn ((mode == DImode
13214 ? gen_x86_shift_adj_1
13215 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13216 scratch));
13217 }
13218 else
13219 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13220 }
13221 }
13222
13223 /* Predict just emitted jump instruction to be taken with probability PROB. */
13224 static void
13225 predict_jump (int prob)
13226 {
13227 rtx insn = get_last_insn ();
13228 gcc_assert (JUMP_P (insn));
13229 REG_NOTES (insn)
13230 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13231 GEN_INT (prob),
13232 REG_NOTES (insn));
13233 }
13234
13235 /* Helper function for the string operations below. Dest VARIABLE whether
13236 it is aligned to VALUE bytes. If true, jump to the label. */
13237 static rtx
13238 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13239 {
13240 rtx label = gen_label_rtx ();
13241 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13242 if (GET_MODE (variable) == DImode)
13243 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13244 else
13245 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13246 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13247 1, label);
13248 if (epilogue)
13249 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13250 else
13251 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13252 return label;
13253 }
13254
13255 /* Adjust COUNTER by the VALUE. */
13256 static void
13257 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13258 {
13259 if (GET_MODE (countreg) == DImode)
13260 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13261 else
13262 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13263 }
13264
13265 /* Zero extend possibly SImode EXP to Pmode register. */
13266 rtx
13267 ix86_zero_extend_to_Pmode (rtx exp)
13268 {
13269 rtx r;
13270 if (GET_MODE (exp) == VOIDmode)
13271 return force_reg (Pmode, exp);
13272 if (GET_MODE (exp) == Pmode)
13273 return copy_to_mode_reg (Pmode, exp);
13274 r = gen_reg_rtx (Pmode);
13275 emit_insn (gen_zero_extendsidi2 (r, exp));
13276 return r;
13277 }
13278
13279 /* Divide COUNTREG by SCALE. */
13280 static rtx
13281 scale_counter (rtx countreg, int scale)
13282 {
13283 rtx sc;
13284 rtx piece_size_mask;
13285
13286 if (scale == 1)
13287 return countreg;
13288 if (CONST_INT_P (countreg))
13289 return GEN_INT (INTVAL (countreg) / scale);
13290 gcc_assert (REG_P (countreg));
13291
13292 piece_size_mask = GEN_INT (scale - 1);
13293 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13294 GEN_INT (exact_log2 (scale)),
13295 NULL, 1, OPTAB_DIRECT);
13296 return sc;
13297 }
13298
13299 /* When SRCPTR is non-NULL, output simple loop to move memory
13300 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13301 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13302 equivalent loop to set memory by VALUE (supposed to be in MODE).
13303
13304 The size is rounded down to whole number of chunk size moved at once.
13305 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13306
13307
13308 static void
13309 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13310 rtx destptr, rtx srcptr, rtx value,
13311 rtx count, enum machine_mode mode, int unroll,
13312 int expected_size)
13313 {
13314 rtx out_label, top_label, iter, tmp;
13315 enum machine_mode iter_mode;
13316 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13317 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13318 rtx size;
13319 rtx x_addr;
13320 rtx y_addr;
13321 int i;
13322
13323 iter_mode = GET_MODE (count);
13324 if (iter_mode == VOIDmode)
13325 iter_mode = word_mode;
13326
13327 top_label = gen_label_rtx ();
13328 out_label = gen_label_rtx ();
13329 iter = gen_reg_rtx (iter_mode);
13330
13331 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13332 NULL, 1, OPTAB_DIRECT);
13333 /* Those two should combine. */
13334 if (piece_size == const1_rtx)
13335 {
13336 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13337 true, out_label);
13338 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13339 }
13340 emit_move_insn (iter, const0_rtx);
13341
13342 emit_label (top_label);
13343
13344 tmp = convert_modes (Pmode, iter_mode, iter, true);
13345 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13346 destmem = change_address (destmem, mode, x_addr);
13347
13348 if (srcmem)
13349 {
13350 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13351 srcmem = change_address (srcmem, mode, y_addr);
13352
13353 /* When unrolling for chips that reorder memory reads and writes,
13354 we can save registers by using single temporary.
13355 Also using 4 temporaries is overkill in 32bit mode. */
13356 if (!TARGET_64BIT && 0)
13357 {
13358 for (i = 0; i < unroll; i++)
13359 {
13360 if (i)
13361 {
13362 destmem =
13363 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13364 srcmem =
13365 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13366 }
13367 emit_move_insn (destmem, srcmem);
13368 }
13369 }
13370 else
13371 {
13372 rtx tmpreg[4];
13373 gcc_assert (unroll <= 4);
13374 for (i = 0; i < unroll; i++)
13375 {
13376 tmpreg[i] = gen_reg_rtx (mode);
13377 if (i)
13378 {
13379 srcmem =
13380 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13381 }
13382 emit_move_insn (tmpreg[i], srcmem);
13383 }
13384 for (i = 0; i < unroll; i++)
13385 {
13386 if (i)
13387 {
13388 destmem =
13389 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13390 }
13391 emit_move_insn (destmem, tmpreg[i]);
13392 }
13393 }
13394 }
13395 else
13396 for (i = 0; i < unroll; i++)
13397 {
13398 if (i)
13399 destmem =
13400 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13401 emit_move_insn (destmem, value);
13402 }
13403
13404 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13405 true, OPTAB_LIB_WIDEN);
13406 if (tmp != iter)
13407 emit_move_insn (iter, tmp);
13408
13409 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13410 true, top_label);
13411 if (expected_size != -1)
13412 {
13413 expected_size /= GET_MODE_SIZE (mode) * unroll;
13414 if (expected_size == 0)
13415 predict_jump (0);
13416 else if (expected_size > REG_BR_PROB_BASE)
13417 predict_jump (REG_BR_PROB_BASE - 1);
13418 else
13419 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13420 }
13421 else
13422 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13423 iter = ix86_zero_extend_to_Pmode (iter);
13424 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13425 true, OPTAB_LIB_WIDEN);
13426 if (tmp != destptr)
13427 emit_move_insn (destptr, tmp);
13428 if (srcptr)
13429 {
13430 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13431 true, OPTAB_LIB_WIDEN);
13432 if (tmp != srcptr)
13433 emit_move_insn (srcptr, tmp);
13434 }
13435 emit_label (out_label);
13436 }
13437
13438 /* Output "rep; mov" instruction.
13439 Arguments have same meaning as for previous function */
13440 static void
13441 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13442 rtx destptr, rtx srcptr,
13443 rtx count,
13444 enum machine_mode mode)
13445 {
13446 rtx destexp;
13447 rtx srcexp;
13448 rtx countreg;
13449
13450 /* If the size is known, it is shorter to use rep movs. */
13451 if (mode == QImode && CONST_INT_P (count)
13452 && !(INTVAL (count) & 3))
13453 mode = SImode;
13454
13455 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13456 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13457 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13458 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13459 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13460 if (mode != QImode)
13461 {
13462 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13463 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13464 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13465 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13466 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13467 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13468 }
13469 else
13470 {
13471 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13472 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13473 }
13474 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13475 destexp, srcexp));
13476 }
13477
13478 /* Output "rep; stos" instruction.
13479 Arguments have same meaning as for previous function */
13480 static void
13481 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13482 rtx count,
13483 enum machine_mode mode)
13484 {
13485 rtx destexp;
13486 rtx countreg;
13487
13488 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13489 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13490 value = force_reg (mode, gen_lowpart (mode, value));
13491 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13492 if (mode != QImode)
13493 {
13494 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13495 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13496 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13497 }
13498 else
13499 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13500 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13501 }
13502
13503 static void
13504 emit_strmov (rtx destmem, rtx srcmem,
13505 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13506 {
13507 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13508 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13509 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13510 }
13511
13512 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13513 static void
13514 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13515 rtx destptr, rtx srcptr, rtx count, int max_size)
13516 {
13517 rtx src, dest;
13518 if (CONST_INT_P (count))
13519 {
13520 HOST_WIDE_INT countval = INTVAL (count);
13521 int offset = 0;
13522
13523 if ((countval & 0x16) && max_size > 16)
13524 {
13525 if (TARGET_64BIT)
13526 {
13527 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13528 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13529 }
13530 else
13531 gcc_unreachable ();
13532 offset += 16;
13533 }
13534 if ((countval & 0x08) && max_size > 8)
13535 {
13536 if (TARGET_64BIT)
13537 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13538 else
13539 {
13540 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13541 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 4);
13542 }
13543 offset += 8;
13544 }
13545 if ((countval & 0x04) && max_size > 4)
13546 {
13547 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13548 offset += 4;
13549 }
13550 if ((countval & 0x02) && max_size > 2)
13551 {
13552 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13553 offset += 2;
13554 }
13555 if ((countval & 0x01) && max_size > 1)
13556 {
13557 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13558 offset += 1;
13559 }
13560 return;
13561 }
13562 if (max_size > 8)
13563 {
13564 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13565 count, 1, OPTAB_DIRECT);
13566 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13567 count, QImode, 1, 4);
13568 return;
13569 }
13570
13571 /* When there are stringops, we can cheaply increase dest and src pointers.
13572 Otherwise we save code size by maintaining offset (zero is readily
13573 available from preceding rep operation) and using x86 addressing modes.
13574 */
13575 if (TARGET_SINGLE_STRINGOP)
13576 {
13577 if (max_size > 4)
13578 {
13579 rtx label = ix86_expand_aligntest (count, 4, true);
13580 src = change_address (srcmem, SImode, srcptr);
13581 dest = change_address (destmem, SImode, destptr);
13582 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13583 emit_label (label);
13584 LABEL_NUSES (label) = 1;
13585 }
13586 if (max_size > 2)
13587 {
13588 rtx label = ix86_expand_aligntest (count, 2, true);
13589 src = change_address (srcmem, HImode, srcptr);
13590 dest = change_address (destmem, HImode, destptr);
13591 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13592 emit_label (label);
13593 LABEL_NUSES (label) = 1;
13594 }
13595 if (max_size > 1)
13596 {
13597 rtx label = ix86_expand_aligntest (count, 1, true);
13598 src = change_address (srcmem, QImode, srcptr);
13599 dest = change_address (destmem, QImode, destptr);
13600 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13601 emit_label (label);
13602 LABEL_NUSES (label) = 1;
13603 }
13604 }
13605 else
13606 {
13607 rtx offset = force_reg (Pmode, const0_rtx);
13608 rtx tmp;
13609
13610 if (max_size > 4)
13611 {
13612 rtx label = ix86_expand_aligntest (count, 4, true);
13613 src = change_address (srcmem, SImode, srcptr);
13614 dest = change_address (destmem, SImode, destptr);
13615 emit_move_insn (dest, src);
13616 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13617 true, OPTAB_LIB_WIDEN);
13618 if (tmp != offset)
13619 emit_move_insn (offset, tmp);
13620 emit_label (label);
13621 LABEL_NUSES (label) = 1;
13622 }
13623 if (max_size > 2)
13624 {
13625 rtx label = ix86_expand_aligntest (count, 2, true);
13626 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13627 src = change_address (srcmem, HImode, tmp);
13628 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13629 dest = change_address (destmem, HImode, tmp);
13630 emit_move_insn (dest, src);
13631 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13632 true, OPTAB_LIB_WIDEN);
13633 if (tmp != offset)
13634 emit_move_insn (offset, tmp);
13635 emit_label (label);
13636 LABEL_NUSES (label) = 1;
13637 }
13638 if (max_size > 1)
13639 {
13640 rtx label = ix86_expand_aligntest (count, 1, true);
13641 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13642 src = change_address (srcmem, QImode, tmp);
13643 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13644 dest = change_address (destmem, QImode, tmp);
13645 emit_move_insn (dest, src);
13646 emit_label (label);
13647 LABEL_NUSES (label) = 1;
13648 }
13649 }
13650 }
13651
13652 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13653 static void
13654 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13655 rtx count, int max_size)
13656 {
13657 count =
13658 expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13659 count, 1, OPTAB_DIRECT);
13660 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13661 gen_lowpart (QImode, value), count, QImode,
13662 1, max_size / 2);
13663 }
13664
13665 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13666 static void
13667 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13668 {
13669 rtx dest;
13670
13671 if (CONST_INT_P (count))
13672 {
13673 HOST_WIDE_INT countval = INTVAL (count);
13674 int offset = 0;
13675
13676 if ((countval & 0x16) && max_size > 16)
13677 {
13678 if (TARGET_64BIT)
13679 {
13680 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13681 emit_insn (gen_strset (destptr, dest, value));
13682 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13683 emit_insn (gen_strset (destptr, dest, value));
13684 }
13685 else
13686 gcc_unreachable ();
13687 offset += 16;
13688 }
13689 if ((countval & 0x08) && max_size > 8)
13690 {
13691 if (TARGET_64BIT)
13692 {
13693 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13694 emit_insn (gen_strset (destptr, dest, value));
13695 }
13696 else
13697 {
13698 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13699 emit_insn (gen_strset (destptr, dest, value));
13700 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13701 emit_insn (gen_strset (destptr, dest, value));
13702 }
13703 offset += 8;
13704 }
13705 if ((countval & 0x04) && max_size > 4)
13706 {
13707 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13708 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13709 offset += 4;
13710 }
13711 if ((countval & 0x02) && max_size > 2)
13712 {
13713 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13714 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13715 offset += 2;
13716 }
13717 if ((countval & 0x01) && max_size > 1)
13718 {
13719 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13720 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13721 offset += 1;
13722 }
13723 return;
13724 }
13725 if (max_size > 32)
13726 {
13727 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13728 return;
13729 }
13730 if (max_size > 16)
13731 {
13732 rtx label = ix86_expand_aligntest (count, 16, true);
13733 if (TARGET_64BIT)
13734 {
13735 dest = change_address (destmem, DImode, destptr);
13736 emit_insn (gen_strset (destptr, dest, value));
13737 emit_insn (gen_strset (destptr, dest, value));
13738 }
13739 else
13740 {
13741 dest = change_address (destmem, SImode, destptr);
13742 emit_insn (gen_strset (destptr, dest, value));
13743 emit_insn (gen_strset (destptr, dest, value));
13744 emit_insn (gen_strset (destptr, dest, value));
13745 emit_insn (gen_strset (destptr, dest, value));
13746 }
13747 emit_label (label);
13748 LABEL_NUSES (label) = 1;
13749 }
13750 if (max_size > 8)
13751 {
13752 rtx label = ix86_expand_aligntest (count, 8, true);
13753 if (TARGET_64BIT)
13754 {
13755 dest = change_address (destmem, DImode, destptr);
13756 emit_insn (gen_strset (destptr, dest, value));
13757 }
13758 else
13759 {
13760 dest = change_address (destmem, SImode, destptr);
13761 emit_insn (gen_strset (destptr, dest, value));
13762 emit_insn (gen_strset (destptr, dest, value));
13763 }
13764 emit_label (label);
13765 LABEL_NUSES (label) = 1;
13766 }
13767 if (max_size > 4)
13768 {
13769 rtx label = ix86_expand_aligntest (count, 4, true);
13770 dest = change_address (destmem, SImode, destptr);
13771 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13772 emit_label (label);
13773 LABEL_NUSES (label) = 1;
13774 }
13775 if (max_size > 2)
13776 {
13777 rtx label = ix86_expand_aligntest (count, 2, true);
13778 dest = change_address (destmem, HImode, destptr);
13779 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13780 emit_label (label);
13781 LABEL_NUSES (label) = 1;
13782 }
13783 if (max_size > 1)
13784 {
13785 rtx label = ix86_expand_aligntest (count, 1, true);
13786 dest = change_address (destmem, QImode, destptr);
13787 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13788 emit_label (label);
13789 LABEL_NUSES (label) = 1;
13790 }
13791 }
13792
13793 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13794 DESIRED_ALIGNMENT. */
13795 static void
13796 expand_movmem_prologue (rtx destmem, rtx srcmem,
13797 rtx destptr, rtx srcptr, rtx count,
13798 int align, int desired_alignment)
13799 {
13800 if (align <= 1 && desired_alignment > 1)
13801 {
13802 rtx label = ix86_expand_aligntest (destptr, 1, false);
13803 srcmem = change_address (srcmem, QImode, srcptr);
13804 destmem = change_address (destmem, QImode, destptr);
13805 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13806 ix86_adjust_counter (count, 1);
13807 emit_label (label);
13808 LABEL_NUSES (label) = 1;
13809 }
13810 if (align <= 2 && desired_alignment > 2)
13811 {
13812 rtx label = ix86_expand_aligntest (destptr, 2, false);
13813 srcmem = change_address (srcmem, HImode, srcptr);
13814 destmem = change_address (destmem, HImode, destptr);
13815 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13816 ix86_adjust_counter (count, 2);
13817 emit_label (label);
13818 LABEL_NUSES (label) = 1;
13819 }
13820 if (align <= 4 && desired_alignment > 4)
13821 {
13822 rtx label = ix86_expand_aligntest (destptr, 4, false);
13823 srcmem = change_address (srcmem, SImode, srcptr);
13824 destmem = change_address (destmem, SImode, destptr);
13825 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13826 ix86_adjust_counter (count, 4);
13827 emit_label (label);
13828 LABEL_NUSES (label) = 1;
13829 }
13830 gcc_assert (desired_alignment <= 8);
13831 }
13832
13833 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13834 DESIRED_ALIGNMENT. */
13835 static void
13836 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13837 int align, int desired_alignment)
13838 {
13839 if (align <= 1 && desired_alignment > 1)
13840 {
13841 rtx label = ix86_expand_aligntest (destptr, 1, false);
13842 destmem = change_address (destmem, QImode, destptr);
13843 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13844 ix86_adjust_counter (count, 1);
13845 emit_label (label);
13846 LABEL_NUSES (label) = 1;
13847 }
13848 if (align <= 2 && desired_alignment > 2)
13849 {
13850 rtx label = ix86_expand_aligntest (destptr, 2, false);
13851 destmem = change_address (destmem, HImode, destptr);
13852 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13853 ix86_adjust_counter (count, 2);
13854 emit_label (label);
13855 LABEL_NUSES (label) = 1;
13856 }
13857 if (align <= 4 && desired_alignment > 4)
13858 {
13859 rtx label = ix86_expand_aligntest (destptr, 4, false);
13860 destmem = change_address (destmem, SImode, destptr);
13861 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13862 ix86_adjust_counter (count, 4);
13863 emit_label (label);
13864 LABEL_NUSES (label) = 1;
13865 }
13866 gcc_assert (desired_alignment <= 8);
13867 }
13868
13869 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13870 static enum stringop_alg
13871 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13872 int *dynamic_check)
13873 {
13874 const struct stringop_algs * algs;
13875
13876 *dynamic_check = -1;
13877 if (memset)
13878 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13879 else
13880 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13881 if (stringop_alg != no_stringop)
13882 return stringop_alg;
13883 /* rep; movq or rep; movl is the smallest variant. */
13884 else if (optimize_size)
13885 {
13886 if (!count || (count & 3))
13887 return rep_prefix_1_byte;
13888 else
13889 return rep_prefix_4_byte;
13890 }
13891 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13892 */
13893 else if (expected_size != -1 && expected_size < 4)
13894 return loop_1_byte;
13895 else if (expected_size != -1)
13896 {
13897 unsigned int i;
13898 enum stringop_alg alg = libcall;
13899 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13900 {
13901 gcc_assert (algs->size[i].max);
13902 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13903 {
13904 if (algs->size[i].alg != libcall)
13905 alg = algs->size[i].alg;
13906 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13907 last non-libcall inline algorithm. */
13908 if (TARGET_INLINE_ALL_STRINGOPS)
13909 {
13910 /* When the current size is best to be copied by a libcall,
13911 but we are still forced to inline, run the heuristic bellow
13912 that will pick code for medium sized blocks. */
13913 if (alg != libcall)
13914 return alg;
13915 break;
13916 }
13917 else
13918 return algs->size[i].alg;
13919 }
13920 }
13921 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13922 }
13923 /* When asked to inline the call anyway, try to pick meaningful choice.
13924 We look for maximal size of block that is faster to copy by hand and
13925 take blocks of at most of that size guessing that average size will
13926 be roughly half of the block.
13927
13928 If this turns out to be bad, we might simply specify the preferred
13929 choice in ix86_costs. */
13930 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13931 && algs->unknown_size == libcall)
13932 {
13933 int max = -1;
13934 enum stringop_alg alg;
13935 int i;
13936
13937 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13938 if (algs->size[i].alg != libcall && algs->size[i].alg)
13939 max = algs->size[i].max;
13940 if (max == -1)
13941 max = 4096;
13942 alg = decide_alg (count, max / 2, memset, dynamic_check);
13943 gcc_assert (*dynamic_check == -1);
13944 gcc_assert (alg != libcall);
13945 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13946 *dynamic_check = max;
13947 return alg;
13948 }
13949 return algs->unknown_size;
13950 }
13951
13952 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13953 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13954 static int
13955 decide_alignment (int align,
13956 enum stringop_alg alg,
13957 int expected_size)
13958 {
13959 int desired_align = 0;
13960 switch (alg)
13961 {
13962 case no_stringop:
13963 gcc_unreachable ();
13964 case loop:
13965 case unrolled_loop:
13966 desired_align = GET_MODE_SIZE (Pmode);
13967 break;
13968 case rep_prefix_8_byte:
13969 desired_align = 8;
13970 break;
13971 case rep_prefix_4_byte:
13972 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13973 copying whole cacheline at once. */
13974 if (TARGET_PENTIUMPRO)
13975 desired_align = 8;
13976 else
13977 desired_align = 4;
13978 break;
13979 case rep_prefix_1_byte:
13980 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13981 copying whole cacheline at once. */
13982 if (TARGET_PENTIUMPRO)
13983 desired_align = 8;
13984 else
13985 desired_align = 1;
13986 break;
13987 case loop_1_byte:
13988 desired_align = 1;
13989 break;
13990 case libcall:
13991 return 0;
13992 }
13993
13994 if (optimize_size)
13995 desired_align = 1;
13996 if (desired_align < align)
13997 desired_align = align;
13998 if (expected_size != -1 && expected_size < 4)
13999 desired_align = align;
14000 return desired_align;
14001 }
14002
14003 /* Return the smallest power of 2 greater than VAL. */
14004 static int
14005 smallest_pow2_greater_than (int val)
14006 {
14007 int ret = 1;
14008 while (ret <= val)
14009 ret <<= 1;
14010 return ret;
14011 }
14012
14013 /* Expand string move (memcpy) operation. Use i386 string operations when
14014 profitable. expand_clrmem contains similar code. The code depends upon
14015 architecture, block size and alignment, but always has the same
14016 overall structure:
14017
14018 1) Prologue guard: Conditional that jumps up to epilogues for small
14019 blocks that can be handled by epilogue alone. This is faster but
14020 also needed for correctness, since prologue assume the block is larger
14021 than the desired alignment.
14022
14023 Optional dynamic check for size and libcall for large
14024 blocks is emitted here too, with -minline-stringops-dynamically.
14025
14026 2) Prologue: copy first few bytes in order to get destination aligned
14027 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14028 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14029 We emit either a jump tree on power of two sized blocks, or a byte loop.
14030
14031 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14032 with specified algorithm.
14033
14034 4) Epilogue: code copying tail of the block that is too small to be
14035 handled by main body (or up to size guarded by prologue guard). */
14036
14037 int
14038 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14039 rtx expected_align_exp, rtx expected_size_exp)
14040 {
14041 rtx destreg;
14042 rtx srcreg;
14043 rtx label = NULL;
14044 rtx tmp;
14045 rtx jump_around_label = NULL;
14046 HOST_WIDE_INT align = 1;
14047 unsigned HOST_WIDE_INT count = 0;
14048 HOST_WIDE_INT expected_size = -1;
14049 int size_needed = 0, epilogue_size_needed;
14050 int desired_align = 0;
14051 enum stringop_alg alg;
14052 int dynamic_check;
14053
14054 if (CONST_INT_P (align_exp))
14055 align = INTVAL (align_exp);
14056 /* i386 can do misaligned access on reasonably increased cost. */
14057 if (CONST_INT_P (expected_align_exp)
14058 && INTVAL (expected_align_exp) > align)
14059 align = INTVAL (expected_align_exp);
14060 if (CONST_INT_P (count_exp))
14061 count = expected_size = INTVAL (count_exp);
14062 if (CONST_INT_P (expected_size_exp) && count == 0)
14063 expected_size = INTVAL (expected_size_exp);
14064
14065 /* Step 0: Decide on preferred algorithm, desired alignment and
14066 size of chunks to be copied by main loop. */
14067
14068 alg = decide_alg (count, expected_size, false, &dynamic_check);
14069 desired_align = decide_alignment (align, alg, expected_size);
14070
14071 if (!TARGET_ALIGN_STRINGOPS)
14072 align = desired_align;
14073
14074 if (alg == libcall)
14075 return 0;
14076 gcc_assert (alg != no_stringop);
14077 if (!count)
14078 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14079 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14080 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14081 switch (alg)
14082 {
14083 case libcall:
14084 case no_stringop:
14085 gcc_unreachable ();
14086 case loop:
14087 size_needed = GET_MODE_SIZE (Pmode);
14088 break;
14089 case unrolled_loop:
14090 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14091 break;
14092 case rep_prefix_8_byte:
14093 size_needed = 8;
14094 break;
14095 case rep_prefix_4_byte:
14096 size_needed = 4;
14097 break;
14098 case rep_prefix_1_byte:
14099 case loop_1_byte:
14100 size_needed = 1;
14101 break;
14102 }
14103
14104 epilogue_size_needed = size_needed;
14105
14106 /* Step 1: Prologue guard. */
14107
14108 /* Alignment code needs count to be in register. */
14109 if (CONST_INT_P (count_exp) && desired_align > align)
14110 {
14111 enum machine_mode mode = SImode;
14112 if (TARGET_64BIT && (count & ~0xffffffff))
14113 mode = DImode;
14114 count_exp = force_reg (mode, count_exp);
14115 }
14116 gcc_assert (desired_align >= 1 && align >= 1);
14117
14118 /* Ensure that alignment prologue won't copy past end of block. */
14119 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
14120 && !count)
14121 {
14122 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14123
14124 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14125 Make sure it is power of 2. */
14126 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14127
14128 label = gen_label_rtx ();
14129 emit_cmp_and_jump_insns (count_exp,
14130 GEN_INT (epilogue_size_needed),
14131 LTU, 0, GET_MODE (count_exp), 1, label);
14132 if (expected_size == -1 || expected_size < epilogue_size_needed)
14133 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14134 else
14135 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14136 }
14137 /* Emit code to decide on runtime whether library call or inline should be
14138 used. */
14139 if (dynamic_check != -1)
14140 {
14141 rtx hot_label = gen_label_rtx ();
14142 jump_around_label = gen_label_rtx ();
14143 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14144 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14145 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14146 emit_block_move_via_libcall (dst, src, count_exp, false);
14147 emit_jump (jump_around_label);
14148 emit_label (hot_label);
14149 }
14150
14151 /* Step 2: Alignment prologue. */
14152
14153 if (desired_align > align)
14154 {
14155 /* Except for the first move in epilogue, we no longer know
14156 constant offset in aliasing info. It don't seems to worth
14157 the pain to maintain it for the first move, so throw away
14158 the info early. */
14159 src = change_address (src, BLKmode, srcreg);
14160 dst = change_address (dst, BLKmode, destreg);
14161 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14162 desired_align);
14163 }
14164 if (label && size_needed == 1)
14165 {
14166 emit_label (label);
14167 LABEL_NUSES (label) = 1;
14168 label = NULL;
14169 }
14170
14171 /* Step 3: Main loop. */
14172
14173 switch (alg)
14174 {
14175 case libcall:
14176 case no_stringop:
14177 gcc_unreachable ();
14178 case loop_1_byte:
14179 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14180 count_exp, QImode, 1, expected_size);
14181 break;
14182 case loop:
14183 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14184 count_exp, Pmode, 1, expected_size);
14185 break;
14186 case unrolled_loop:
14187 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14188 registers for 4 temporaries anyway. */
14189 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14190 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14191 expected_size);
14192 break;
14193 case rep_prefix_8_byte:
14194 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14195 DImode);
14196 break;
14197 case rep_prefix_4_byte:
14198 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14199 SImode);
14200 break;
14201 case rep_prefix_1_byte:
14202 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14203 QImode);
14204 break;
14205 }
14206 /* Adjust properly the offset of src and dest memory for aliasing. */
14207 if (CONST_INT_P (count_exp))
14208 {
14209 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14210 (count / size_needed) * size_needed);
14211 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14212 (count / size_needed) * size_needed);
14213 }
14214 else
14215 {
14216 src = change_address (src, BLKmode, srcreg);
14217 dst = change_address (dst, BLKmode, destreg);
14218 }
14219
14220 /* Step 4: Epilogue to copy the remaining bytes. */
14221
14222 if (label)
14223 {
14224 /* When the main loop is done, COUNT_EXP might hold original count,
14225 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14226 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14227 bytes. Compensate if needed. */
14228
14229 if (size_needed < epilogue_size_needed)
14230 {
14231 tmp =
14232 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14233 GEN_INT (size_needed - 1), count_exp, 1,
14234 OPTAB_DIRECT);
14235 if (tmp != count_exp)
14236 emit_move_insn (count_exp, tmp);
14237 }
14238 emit_label (label);
14239 LABEL_NUSES (label) = 1;
14240 }
14241
14242 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14243 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14244 epilogue_size_needed);
14245 if (jump_around_label)
14246 emit_label (jump_around_label);
14247 return 1;
14248 }
14249
14250 /* Helper function for memcpy. For QImode value 0xXY produce
14251 0xXYXYXYXY of wide specified by MODE. This is essentially
14252 a * 0x10101010, but we can do slightly better than
14253 synth_mult by unwinding the sequence by hand on CPUs with
14254 slow multiply. */
14255 static rtx
14256 promote_duplicated_reg (enum machine_mode mode, rtx val)
14257 {
14258 enum machine_mode valmode = GET_MODE (val);
14259 rtx tmp;
14260 int nops = mode == DImode ? 3 : 2;
14261
14262 gcc_assert (mode == SImode || mode == DImode);
14263 if (val == const0_rtx)
14264 return copy_to_mode_reg (mode, const0_rtx);
14265 if (CONST_INT_P (val))
14266 {
14267 HOST_WIDE_INT v = INTVAL (val) & 255;
14268
14269 v |= v << 8;
14270 v |= v << 16;
14271 if (mode == DImode)
14272 v |= (v << 16) << 16;
14273 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14274 }
14275
14276 if (valmode == VOIDmode)
14277 valmode = QImode;
14278 if (valmode != QImode)
14279 val = gen_lowpart (QImode, val);
14280 if (mode == QImode)
14281 return val;
14282 if (!TARGET_PARTIAL_REG_STALL)
14283 nops--;
14284 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14285 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14286 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14287 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14288 {
14289 rtx reg = convert_modes (mode, QImode, val, true);
14290 tmp = promote_duplicated_reg (mode, const1_rtx);
14291 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14292 OPTAB_DIRECT);
14293 }
14294 else
14295 {
14296 rtx reg = convert_modes (mode, QImode, val, true);
14297
14298 if (!TARGET_PARTIAL_REG_STALL)
14299 if (mode == SImode)
14300 emit_insn (gen_movsi_insv_1 (reg, reg));
14301 else
14302 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14303 else
14304 {
14305 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14306 NULL, 1, OPTAB_DIRECT);
14307 reg =
14308 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14309 }
14310 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14311 NULL, 1, OPTAB_DIRECT);
14312 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14313 if (mode == SImode)
14314 return reg;
14315 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14316 NULL, 1, OPTAB_DIRECT);
14317 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14318 return reg;
14319 }
14320 }
14321
14322 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14323 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14324 alignment from ALIGN to DESIRED_ALIGN. */
14325 static rtx
14326 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14327 {
14328 rtx promoted_val;
14329
14330 if (TARGET_64BIT
14331 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14332 promoted_val = promote_duplicated_reg (DImode, val);
14333 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14334 promoted_val = promote_duplicated_reg (SImode, val);
14335 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14336 promoted_val = promote_duplicated_reg (HImode, val);
14337 else
14338 promoted_val = val;
14339
14340 return promoted_val;
14341 }
14342
14343 /* Expand string clear operation (bzero). Use i386 string operations when
14344 profitable. See expand_movmem comment for explanation of individual
14345 steps performed. */
14346 int
14347 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14348 rtx expected_align_exp, rtx expected_size_exp)
14349 {
14350 rtx destreg;
14351 rtx label = NULL;
14352 rtx tmp;
14353 rtx jump_around_label = NULL;
14354 HOST_WIDE_INT align = 1;
14355 unsigned HOST_WIDE_INT count = 0;
14356 HOST_WIDE_INT expected_size = -1;
14357 int size_needed = 0, epilogue_size_needed;
14358 int desired_align = 0;
14359 enum stringop_alg alg;
14360 rtx promoted_val = NULL;
14361 bool force_loopy_epilogue = false;
14362 int dynamic_check;
14363
14364 if (CONST_INT_P (align_exp))
14365 align = INTVAL (align_exp);
14366 /* i386 can do misaligned access on reasonably increased cost. */
14367 if (CONST_INT_P (expected_align_exp)
14368 && INTVAL (expected_align_exp) > align)
14369 align = INTVAL (expected_align_exp);
14370 if (CONST_INT_P (count_exp))
14371 count = expected_size = INTVAL (count_exp);
14372 if (CONST_INT_P (expected_size_exp) && count == 0)
14373 expected_size = INTVAL (expected_size_exp);
14374
14375 /* Step 0: Decide on preferred algorithm, desired alignment and
14376 size of chunks to be copied by main loop. */
14377
14378 alg = decide_alg (count, expected_size, true, &dynamic_check);
14379 desired_align = decide_alignment (align, alg, expected_size);
14380
14381 if (!TARGET_ALIGN_STRINGOPS)
14382 align = desired_align;
14383
14384 if (alg == libcall)
14385 return 0;
14386 gcc_assert (alg != no_stringop);
14387 if (!count)
14388 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14389 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14390 switch (alg)
14391 {
14392 case libcall:
14393 case no_stringop:
14394 gcc_unreachable ();
14395 case loop:
14396 size_needed = GET_MODE_SIZE (Pmode);
14397 break;
14398 case unrolled_loop:
14399 size_needed = GET_MODE_SIZE (Pmode) * 4;
14400 break;
14401 case rep_prefix_8_byte:
14402 size_needed = 8;
14403 break;
14404 case rep_prefix_4_byte:
14405 size_needed = 4;
14406 break;
14407 case rep_prefix_1_byte:
14408 case loop_1_byte:
14409 size_needed = 1;
14410 break;
14411 }
14412 epilogue_size_needed = size_needed;
14413
14414 /* Step 1: Prologue guard. */
14415
14416 /* Alignment code needs count to be in register. */
14417 if (CONST_INT_P (count_exp) && desired_align > align)
14418 {
14419 enum machine_mode mode = SImode;
14420 if (TARGET_64BIT && (count & ~0xffffffff))
14421 mode = DImode;
14422 count_exp = force_reg (mode, count_exp);
14423 }
14424 /* Do the cheap promotion to allow better CSE across the
14425 main loop and epilogue (ie one load of the big constant in the
14426 front of all code. */
14427 if (CONST_INT_P (val_exp))
14428 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14429 desired_align, align);
14430 /* Ensure that alignment prologue won't copy past end of block. */
14431 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
14432 && !count)
14433 {
14434 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14435
14436 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14437 Make sure it is power of 2. */
14438 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14439
14440 /* To improve performance of small blocks, we jump around the VAL
14441 promoting mode. This mean that if the promoted VAL is not constant,
14442 we might not use it in the epilogue and have to use byte
14443 loop variant. */
14444 if (epilogue_size_needed > 2 && !promoted_val)
14445 force_loopy_epilogue = true;
14446 label = gen_label_rtx ();
14447 emit_cmp_and_jump_insns (count_exp,
14448 GEN_INT (epilogue_size_needed),
14449 LTU, 0, GET_MODE (count_exp), 1, label);
14450 if (expected_size == -1 || expected_size <= epilogue_size_needed)
14451 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14452 else
14453 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14454 }
14455 if (dynamic_check != -1)
14456 {
14457 rtx hot_label = gen_label_rtx ();
14458 jump_around_label = gen_label_rtx ();
14459 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14460 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14461 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14462 set_storage_via_libcall (dst, count_exp, val_exp, false);
14463 emit_jump (jump_around_label);
14464 emit_label (hot_label);
14465 }
14466
14467 /* Step 2: Alignment prologue. */
14468
14469 /* Do the expensive promotion once we branched off the small blocks. */
14470 if (!promoted_val)
14471 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14472 desired_align, align);
14473 gcc_assert (desired_align >= 1 && align >= 1);
14474
14475 if (desired_align > align)
14476 {
14477 /* Except for the first move in epilogue, we no longer know
14478 constant offset in aliasing info. It don't seems to worth
14479 the pain to maintain it for the first move, so throw away
14480 the info early. */
14481 dst = change_address (dst, BLKmode, destreg);
14482 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14483 desired_align);
14484 }
14485 if (label && size_needed == 1)
14486 {
14487 emit_label (label);
14488 LABEL_NUSES (label) = 1;
14489 label = NULL;
14490 }
14491
14492 /* Step 3: Main loop. */
14493
14494 switch (alg)
14495 {
14496 case libcall:
14497 case no_stringop:
14498 gcc_unreachable ();
14499 case loop_1_byte:
14500 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14501 count_exp, QImode, 1, expected_size);
14502 break;
14503 case loop:
14504 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14505 count_exp, Pmode, 1, expected_size);
14506 break;
14507 case unrolled_loop:
14508 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14509 count_exp, Pmode, 4, expected_size);
14510 break;
14511 case rep_prefix_8_byte:
14512 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14513 DImode);
14514 break;
14515 case rep_prefix_4_byte:
14516 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14517 SImode);
14518 break;
14519 case rep_prefix_1_byte:
14520 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14521 QImode);
14522 break;
14523 }
14524 /* Adjust properly the offset of src and dest memory for aliasing. */
14525 if (CONST_INT_P (count_exp))
14526 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14527 (count / size_needed) * size_needed);
14528 else
14529 dst = change_address (dst, BLKmode, destreg);
14530
14531 /* Step 4: Epilogue to copy the remaining bytes. */
14532
14533 if (label)
14534 {
14535 /* When the main loop is done, COUNT_EXP might hold original count,
14536 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14537 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14538 bytes. Compensate if needed. */
14539
14540 if (size_needed < desired_align - align)
14541 {
14542 tmp =
14543 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14544 GEN_INT (size_needed - 1), count_exp, 1,
14545 OPTAB_DIRECT);
14546 size_needed = desired_align - align + 1;
14547 if (tmp != count_exp)
14548 emit_move_insn (count_exp, tmp);
14549 }
14550 emit_label (label);
14551 LABEL_NUSES (label) = 1;
14552 }
14553 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14554 {
14555 if (force_loopy_epilogue)
14556 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14557 size_needed);
14558 else
14559 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14560 size_needed);
14561 }
14562 if (jump_around_label)
14563 emit_label (jump_around_label);
14564 return 1;
14565 }
14566
14567 /* Expand strlen. */
14568 int
14569 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14570 {
14571 rtx addr, scratch1, scratch2, scratch3, scratch4;
14572
14573 /* The generic case of strlen expander is long. Avoid it's
14574 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14575
14576 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14577 && !TARGET_INLINE_ALL_STRINGOPS
14578 && !optimize_size
14579 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14580 return 0;
14581
14582 addr = force_reg (Pmode, XEXP (src, 0));
14583 scratch1 = gen_reg_rtx (Pmode);
14584
14585 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14586 && !optimize_size)
14587 {
14588 /* Well it seems that some optimizer does not combine a call like
14589 foo(strlen(bar), strlen(bar));
14590 when the move and the subtraction is done here. It does calculate
14591 the length just once when these instructions are done inside of
14592 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14593 often used and I use one fewer register for the lifetime of
14594 output_strlen_unroll() this is better. */
14595
14596 emit_move_insn (out, addr);
14597
14598 ix86_expand_strlensi_unroll_1 (out, src, align);
14599
14600 /* strlensi_unroll_1 returns the address of the zero at the end of
14601 the string, like memchr(), so compute the length by subtracting
14602 the start address. */
14603 if (TARGET_64BIT)
14604 emit_insn (gen_subdi3 (out, out, addr));
14605 else
14606 emit_insn (gen_subsi3 (out, out, addr));
14607 }
14608 else
14609 {
14610 rtx unspec;
14611 scratch2 = gen_reg_rtx (Pmode);
14612 scratch3 = gen_reg_rtx (Pmode);
14613 scratch4 = force_reg (Pmode, constm1_rtx);
14614
14615 emit_move_insn (scratch3, addr);
14616 eoschar = force_reg (QImode, eoschar);
14617
14618 src = replace_equiv_address_nv (src, scratch3);
14619
14620 /* If .md starts supporting :P, this can be done in .md. */
14621 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14622 scratch4), UNSPEC_SCAS);
14623 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14624 if (TARGET_64BIT)
14625 {
14626 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14627 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14628 }
14629 else
14630 {
14631 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14632 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14633 }
14634 }
14635 return 1;
14636 }
14637
14638 /* Expand the appropriate insns for doing strlen if not just doing
14639 repnz; scasb
14640
14641 out = result, initialized with the start address
14642 align_rtx = alignment of the address.
14643 scratch = scratch register, initialized with the startaddress when
14644 not aligned, otherwise undefined
14645
14646 This is just the body. It needs the initializations mentioned above and
14647 some address computing at the end. These things are done in i386.md. */
14648
14649 static void
14650 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14651 {
14652 int align;
14653 rtx tmp;
14654 rtx align_2_label = NULL_RTX;
14655 rtx align_3_label = NULL_RTX;
14656 rtx align_4_label = gen_label_rtx ();
14657 rtx end_0_label = gen_label_rtx ();
14658 rtx mem;
14659 rtx tmpreg = gen_reg_rtx (SImode);
14660 rtx scratch = gen_reg_rtx (SImode);
14661 rtx cmp;
14662
14663 align = 0;
14664 if (CONST_INT_P (align_rtx))
14665 align = INTVAL (align_rtx);
14666
14667 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14668
14669 /* Is there a known alignment and is it less than 4? */
14670 if (align < 4)
14671 {
14672 rtx scratch1 = gen_reg_rtx (Pmode);
14673 emit_move_insn (scratch1, out);
14674 /* Is there a known alignment and is it not 2? */
14675 if (align != 2)
14676 {
14677 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14678 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14679
14680 /* Leave just the 3 lower bits. */
14681 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14682 NULL_RTX, 0, OPTAB_WIDEN);
14683
14684 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14685 Pmode, 1, align_4_label);
14686 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14687 Pmode, 1, align_2_label);
14688 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14689 Pmode, 1, align_3_label);
14690 }
14691 else
14692 {
14693 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14694 check if is aligned to 4 - byte. */
14695
14696 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14697 NULL_RTX, 0, OPTAB_WIDEN);
14698
14699 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14700 Pmode, 1, align_4_label);
14701 }
14702
14703 mem = change_address (src, QImode, out);
14704
14705 /* Now compare the bytes. */
14706
14707 /* Compare the first n unaligned byte on a byte per byte basis. */
14708 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14709 QImode, 1, end_0_label);
14710
14711 /* Increment the address. */
14712 if (TARGET_64BIT)
14713 emit_insn (gen_adddi3 (out, out, const1_rtx));
14714 else
14715 emit_insn (gen_addsi3 (out, out, const1_rtx));
14716
14717 /* Not needed with an alignment of 2 */
14718 if (align != 2)
14719 {
14720 emit_label (align_2_label);
14721
14722 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14723 end_0_label);
14724
14725 if (TARGET_64BIT)
14726 emit_insn (gen_adddi3 (out, out, const1_rtx));
14727 else
14728 emit_insn (gen_addsi3 (out, out, const1_rtx));
14729
14730 emit_label (align_3_label);
14731 }
14732
14733 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14734 end_0_label);
14735
14736 if (TARGET_64BIT)
14737 emit_insn (gen_adddi3 (out, out, const1_rtx));
14738 else
14739 emit_insn (gen_addsi3 (out, out, const1_rtx));
14740 }
14741
14742 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14743 align this loop. It gives only huge programs, but does not help to
14744 speed up. */
14745 emit_label (align_4_label);
14746
14747 mem = change_address (src, SImode, out);
14748 emit_move_insn (scratch, mem);
14749 if (TARGET_64BIT)
14750 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14751 else
14752 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14753
14754 /* This formula yields a nonzero result iff one of the bytes is zero.
14755 This saves three branches inside loop and many cycles. */
14756
14757 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14758 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14759 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14760 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14761 gen_int_mode (0x80808080, SImode)));
14762 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14763 align_4_label);
14764
14765 if (TARGET_CMOVE)
14766 {
14767 rtx reg = gen_reg_rtx (SImode);
14768 rtx reg2 = gen_reg_rtx (Pmode);
14769 emit_move_insn (reg, tmpreg);
14770 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14771
14772 /* If zero is not in the first two bytes, move two bytes forward. */
14773 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14774 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14775 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14776 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14777 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14778 reg,
14779 tmpreg)));
14780 /* Emit lea manually to avoid clobbering of flags. */
14781 emit_insn (gen_rtx_SET (SImode, reg2,
14782 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14783
14784 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14785 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14786 emit_insn (gen_rtx_SET (VOIDmode, out,
14787 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14788 reg2,
14789 out)));
14790
14791 }
14792 else
14793 {
14794 rtx end_2_label = gen_label_rtx ();
14795 /* Is zero in the first two bytes? */
14796
14797 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14798 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14799 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14800 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14801 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14802 pc_rtx);
14803 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14804 JUMP_LABEL (tmp) = end_2_label;
14805
14806 /* Not in the first two. Move two bytes forward. */
14807 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14808 if (TARGET_64BIT)
14809 emit_insn (gen_adddi3 (out, out, const2_rtx));
14810 else
14811 emit_insn (gen_addsi3 (out, out, const2_rtx));
14812
14813 emit_label (end_2_label);
14814
14815 }
14816
14817 /* Avoid branch in fixing the byte. */
14818 tmpreg = gen_lowpart (QImode, tmpreg);
14819 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14820 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14821 if (TARGET_64BIT)
14822 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14823 else
14824 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14825
14826 emit_label (end_0_label);
14827 }
14828
14829 void
14830 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14831 rtx callarg2 ATTRIBUTE_UNUSED,
14832 rtx pop, int sibcall)
14833 {
14834 rtx use = NULL, call;
14835
14836 if (pop == const0_rtx)
14837 pop = NULL;
14838 gcc_assert (!TARGET_64BIT || !pop);
14839
14840 if (TARGET_MACHO && !TARGET_64BIT)
14841 {
14842 #if TARGET_MACHO
14843 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14844 fnaddr = machopic_indirect_call_target (fnaddr);
14845 #endif
14846 }
14847 else
14848 {
14849 /* Static functions and indirect calls don't need the pic register. */
14850 if (! TARGET_64BIT && flag_pic
14851 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14852 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14853 use_reg (&use, pic_offset_table_rtx);
14854 }
14855
14856 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14857 {
14858 rtx al = gen_rtx_REG (QImode, 0);
14859 emit_move_insn (al, callarg2);
14860 use_reg (&use, al);
14861 }
14862
14863 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14864 {
14865 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14866 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14867 }
14868 if (sibcall && TARGET_64BIT
14869 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14870 {
14871 rtx addr;
14872 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14873 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14874 emit_move_insn (fnaddr, addr);
14875 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14876 }
14877
14878 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14879 if (retval)
14880 call = gen_rtx_SET (VOIDmode, retval, call);
14881 if (pop)
14882 {
14883 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14884 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14885 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14886 }
14887
14888 call = emit_call_insn (call);
14889 if (use)
14890 CALL_INSN_FUNCTION_USAGE (call) = use;
14891 }
14892
14893 \f
14894 /* Clear stack slot assignments remembered from previous functions.
14895 This is called from INIT_EXPANDERS once before RTL is emitted for each
14896 function. */
14897
14898 static struct machine_function *
14899 ix86_init_machine_status (void)
14900 {
14901 struct machine_function *f;
14902
14903 f = ggc_alloc_cleared (sizeof (struct machine_function));
14904 f->use_fast_prologue_epilogue_nregs = -1;
14905 f->tls_descriptor_call_expanded_p = 0;
14906
14907 return f;
14908 }
14909
14910 /* Return a MEM corresponding to a stack slot with mode MODE.
14911 Allocate a new slot if necessary.
14912
14913 The RTL for a function can have several slots available: N is
14914 which slot to use. */
14915
14916 rtx
14917 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14918 {
14919 struct stack_local_entry *s;
14920
14921 gcc_assert (n < MAX_386_STACK_LOCALS);
14922
14923 for (s = ix86_stack_locals; s; s = s->next)
14924 if (s->mode == mode && s->n == n)
14925 return copy_rtx (s->rtl);
14926
14927 s = (struct stack_local_entry *)
14928 ggc_alloc (sizeof (struct stack_local_entry));
14929 s->n = n;
14930 s->mode = mode;
14931 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14932
14933 s->next = ix86_stack_locals;
14934 ix86_stack_locals = s;
14935 return s->rtl;
14936 }
14937
14938 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14939
14940 static GTY(()) rtx ix86_tls_symbol;
14941 rtx
14942 ix86_tls_get_addr (void)
14943 {
14944
14945 if (!ix86_tls_symbol)
14946 {
14947 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14948 (TARGET_ANY_GNU_TLS
14949 && !TARGET_64BIT)
14950 ? "___tls_get_addr"
14951 : "__tls_get_addr");
14952 }
14953
14954 return ix86_tls_symbol;
14955 }
14956
14957 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14958
14959 static GTY(()) rtx ix86_tls_module_base_symbol;
14960 rtx
14961 ix86_tls_module_base (void)
14962 {
14963
14964 if (!ix86_tls_module_base_symbol)
14965 {
14966 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14967 "_TLS_MODULE_BASE_");
14968 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14969 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14970 }
14971
14972 return ix86_tls_module_base_symbol;
14973 }
14974 \f
14975 /* Calculate the length of the memory address in the instruction
14976 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14977
14978 int
14979 memory_address_length (rtx addr)
14980 {
14981 struct ix86_address parts;
14982 rtx base, index, disp;
14983 int len;
14984 int ok;
14985
14986 if (GET_CODE (addr) == PRE_DEC
14987 || GET_CODE (addr) == POST_INC
14988 || GET_CODE (addr) == PRE_MODIFY
14989 || GET_CODE (addr) == POST_MODIFY)
14990 return 0;
14991
14992 ok = ix86_decompose_address (addr, &parts);
14993 gcc_assert (ok);
14994
14995 if (parts.base && GET_CODE (parts.base) == SUBREG)
14996 parts.base = SUBREG_REG (parts.base);
14997 if (parts.index && GET_CODE (parts.index) == SUBREG)
14998 parts.index = SUBREG_REG (parts.index);
14999
15000 base = parts.base;
15001 index = parts.index;
15002 disp = parts.disp;
15003 len = 0;
15004
15005 /* Rule of thumb:
15006 - esp as the base always wants an index,
15007 - ebp as the base always wants a displacement. */
15008
15009 /* Register Indirect. */
15010 if (base && !index && !disp)
15011 {
15012 /* esp (for its index) and ebp (for its displacement) need
15013 the two-byte modrm form. */
15014 if (addr == stack_pointer_rtx
15015 || addr == arg_pointer_rtx
15016 || addr == frame_pointer_rtx
15017 || addr == hard_frame_pointer_rtx)
15018 len = 1;
15019 }
15020
15021 /* Direct Addressing. */
15022 else if (disp && !base && !index)
15023 len = 4;
15024
15025 else
15026 {
15027 /* Find the length of the displacement constant. */
15028 if (disp)
15029 {
15030 if (base && satisfies_constraint_K (disp))
15031 len = 1;
15032 else
15033 len = 4;
15034 }
15035 /* ebp always wants a displacement. */
15036 else if (base == hard_frame_pointer_rtx)
15037 len = 1;
15038
15039 /* An index requires the two-byte modrm form.... */
15040 if (index
15041 /* ...like esp, which always wants an index. */
15042 || base == stack_pointer_rtx
15043 || base == arg_pointer_rtx
15044 || base == frame_pointer_rtx)
15045 len += 1;
15046 }
15047
15048 return len;
15049 }
15050
15051 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15052 is set, expect that insn have 8bit immediate alternative. */
15053 int
15054 ix86_attr_length_immediate_default (rtx insn, int shortform)
15055 {
15056 int len = 0;
15057 int i;
15058 extract_insn_cached (insn);
15059 for (i = recog_data.n_operands - 1; i >= 0; --i)
15060 if (CONSTANT_P (recog_data.operand[i]))
15061 {
15062 gcc_assert (!len);
15063 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15064 len = 1;
15065 else
15066 {
15067 switch (get_attr_mode (insn))
15068 {
15069 case MODE_QI:
15070 len+=1;
15071 break;
15072 case MODE_HI:
15073 len+=2;
15074 break;
15075 case MODE_SI:
15076 len+=4;
15077 break;
15078 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15079 case MODE_DI:
15080 len+=4;
15081 break;
15082 default:
15083 fatal_insn ("unknown insn mode", insn);
15084 }
15085 }
15086 }
15087 return len;
15088 }
15089 /* Compute default value for "length_address" attribute. */
15090 int
15091 ix86_attr_length_address_default (rtx insn)
15092 {
15093 int i;
15094
15095 if (get_attr_type (insn) == TYPE_LEA)
15096 {
15097 rtx set = PATTERN (insn);
15098
15099 if (GET_CODE (set) == PARALLEL)
15100 set = XVECEXP (set, 0, 0);
15101
15102 gcc_assert (GET_CODE (set) == SET);
15103
15104 return memory_address_length (SET_SRC (set));
15105 }
15106
15107 extract_insn_cached (insn);
15108 for (i = recog_data.n_operands - 1; i >= 0; --i)
15109 if (MEM_P (recog_data.operand[i]))
15110 {
15111 return memory_address_length (XEXP (recog_data.operand[i], 0));
15112 break;
15113 }
15114 return 0;
15115 }
15116 \f
15117 /* Return the maximum number of instructions a cpu can issue. */
15118
15119 static int
15120 ix86_issue_rate (void)
15121 {
15122 switch (ix86_tune)
15123 {
15124 case PROCESSOR_PENTIUM:
15125 case PROCESSOR_K6:
15126 return 2;
15127
15128 case PROCESSOR_PENTIUMPRO:
15129 case PROCESSOR_PENTIUM4:
15130 case PROCESSOR_ATHLON:
15131 case PROCESSOR_K8:
15132 case PROCESSOR_AMDFAM10:
15133 case PROCESSOR_NOCONA:
15134 case PROCESSOR_GENERIC32:
15135 case PROCESSOR_GENERIC64:
15136 return 3;
15137
15138 case PROCESSOR_CORE2:
15139 return 4;
15140
15141 default:
15142 return 1;
15143 }
15144 }
15145
15146 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15147 by DEP_INSN and nothing set by DEP_INSN. */
15148
15149 static int
15150 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15151 {
15152 rtx set, set2;
15153
15154 /* Simplify the test for uninteresting insns. */
15155 if (insn_type != TYPE_SETCC
15156 && insn_type != TYPE_ICMOV
15157 && insn_type != TYPE_FCMOV
15158 && insn_type != TYPE_IBR)
15159 return 0;
15160
15161 if ((set = single_set (dep_insn)) != 0)
15162 {
15163 set = SET_DEST (set);
15164 set2 = NULL_RTX;
15165 }
15166 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15167 && XVECLEN (PATTERN (dep_insn), 0) == 2
15168 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15169 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15170 {
15171 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15172 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15173 }
15174 else
15175 return 0;
15176
15177 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15178 return 0;
15179
15180 /* This test is true if the dependent insn reads the flags but
15181 not any other potentially set register. */
15182 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15183 return 0;
15184
15185 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15186 return 0;
15187
15188 return 1;
15189 }
15190
15191 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15192 address with operands set by DEP_INSN. */
15193
15194 static int
15195 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15196 {
15197 rtx addr;
15198
15199 if (insn_type == TYPE_LEA
15200 && TARGET_PENTIUM)
15201 {
15202 addr = PATTERN (insn);
15203
15204 if (GET_CODE (addr) == PARALLEL)
15205 addr = XVECEXP (addr, 0, 0);
15206
15207 gcc_assert (GET_CODE (addr) == SET);
15208
15209 addr = SET_SRC (addr);
15210 }
15211 else
15212 {
15213 int i;
15214 extract_insn_cached (insn);
15215 for (i = recog_data.n_operands - 1; i >= 0; --i)
15216 if (MEM_P (recog_data.operand[i]))
15217 {
15218 addr = XEXP (recog_data.operand[i], 0);
15219 goto found;
15220 }
15221 return 0;
15222 found:;
15223 }
15224
15225 return modified_in_p (addr, dep_insn);
15226 }
15227
15228 static int
15229 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15230 {
15231 enum attr_type insn_type, dep_insn_type;
15232 enum attr_memory memory;
15233 rtx set, set2;
15234 int dep_insn_code_number;
15235
15236 /* Anti and output dependencies have zero cost on all CPUs. */
15237 if (REG_NOTE_KIND (link) != 0)
15238 return 0;
15239
15240 dep_insn_code_number = recog_memoized (dep_insn);
15241
15242 /* If we can't recognize the insns, we can't really do anything. */
15243 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15244 return cost;
15245
15246 insn_type = get_attr_type (insn);
15247 dep_insn_type = get_attr_type (dep_insn);
15248
15249 switch (ix86_tune)
15250 {
15251 case PROCESSOR_PENTIUM:
15252 /* Address Generation Interlock adds a cycle of latency. */
15253 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15254 cost += 1;
15255
15256 /* ??? Compares pair with jump/setcc. */
15257 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15258 cost = 0;
15259
15260 /* Floating point stores require value to be ready one cycle earlier. */
15261 if (insn_type == TYPE_FMOV
15262 && get_attr_memory (insn) == MEMORY_STORE
15263 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15264 cost += 1;
15265 break;
15266
15267 case PROCESSOR_PENTIUMPRO:
15268 memory = get_attr_memory (insn);
15269
15270 /* INT->FP conversion is expensive. */
15271 if (get_attr_fp_int_src (dep_insn))
15272 cost += 5;
15273
15274 /* There is one cycle extra latency between an FP op and a store. */
15275 if (insn_type == TYPE_FMOV
15276 && (set = single_set (dep_insn)) != NULL_RTX
15277 && (set2 = single_set (insn)) != NULL_RTX
15278 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15279 && MEM_P (SET_DEST (set2)))
15280 cost += 1;
15281
15282 /* Show ability of reorder buffer to hide latency of load by executing
15283 in parallel with previous instruction in case
15284 previous instruction is not needed to compute the address. */
15285 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15286 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15287 {
15288 /* Claim moves to take one cycle, as core can issue one load
15289 at time and the next load can start cycle later. */
15290 if (dep_insn_type == TYPE_IMOV
15291 || dep_insn_type == TYPE_FMOV)
15292 cost = 1;
15293 else if (cost > 1)
15294 cost--;
15295 }
15296 break;
15297
15298 case PROCESSOR_K6:
15299 memory = get_attr_memory (insn);
15300
15301 /* The esp dependency is resolved before the instruction is really
15302 finished. */
15303 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15304 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15305 return 1;
15306
15307 /* INT->FP conversion is expensive. */
15308 if (get_attr_fp_int_src (dep_insn))
15309 cost += 5;
15310
15311 /* Show ability of reorder buffer to hide latency of load by executing
15312 in parallel with previous instruction in case
15313 previous instruction is not needed to compute the address. */
15314 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15315 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15316 {
15317 /* Claim moves to take one cycle, as core can issue one load
15318 at time and the next load can start cycle later. */
15319 if (dep_insn_type == TYPE_IMOV
15320 || dep_insn_type == TYPE_FMOV)
15321 cost = 1;
15322 else if (cost > 2)
15323 cost -= 2;
15324 else
15325 cost = 1;
15326 }
15327 break;
15328
15329 case PROCESSOR_ATHLON:
15330 case PROCESSOR_K8:
15331 case PROCESSOR_AMDFAM10:
15332 case PROCESSOR_GENERIC32:
15333 case PROCESSOR_GENERIC64:
15334 memory = get_attr_memory (insn);
15335
15336 /* Show ability of reorder buffer to hide latency of load by executing
15337 in parallel with previous instruction in case
15338 previous instruction is not needed to compute the address. */
15339 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15340 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15341 {
15342 enum attr_unit unit = get_attr_unit (insn);
15343 int loadcost = 3;
15344
15345 /* Because of the difference between the length of integer and
15346 floating unit pipeline preparation stages, the memory operands
15347 for floating point are cheaper.
15348
15349 ??? For Athlon it the difference is most probably 2. */
15350 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15351 loadcost = 3;
15352 else
15353 loadcost = TARGET_ATHLON ? 2 : 0;
15354
15355 if (cost >= loadcost)
15356 cost -= loadcost;
15357 else
15358 cost = 0;
15359 }
15360
15361 default:
15362 break;
15363 }
15364
15365 return cost;
15366 }
15367
15368 /* How many alternative schedules to try. This should be as wide as the
15369 scheduling freedom in the DFA, but no wider. Making this value too
15370 large results extra work for the scheduler. */
15371
15372 static int
15373 ia32_multipass_dfa_lookahead (void)
15374 {
15375 if (ix86_tune == PROCESSOR_PENTIUM)
15376 return 2;
15377
15378 if (ix86_tune == PROCESSOR_PENTIUMPRO
15379 || ix86_tune == PROCESSOR_K6)
15380 return 1;
15381
15382 else
15383 return 0;
15384 }
15385
15386 \f
15387 /* Compute the alignment given to a constant that is being placed in memory.
15388 EXP is the constant and ALIGN is the alignment that the object would
15389 ordinarily have.
15390 The value of this function is used instead of that alignment to align
15391 the object. */
15392
15393 int
15394 ix86_constant_alignment (tree exp, int align)
15395 {
15396 if (TREE_CODE (exp) == REAL_CST)
15397 {
15398 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15399 return 64;
15400 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15401 return 128;
15402 }
15403 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15404 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15405 return BITS_PER_WORD;
15406
15407 return align;
15408 }
15409
15410 /* Compute the alignment for a static variable.
15411 TYPE is the data type, and ALIGN is the alignment that
15412 the object would ordinarily have. The value of this function is used
15413 instead of that alignment to align the object. */
15414
15415 int
15416 ix86_data_alignment (tree type, int align)
15417 {
15418 int max_align = optimize_size ? BITS_PER_WORD : 256;
15419
15420 if (AGGREGATE_TYPE_P (type)
15421 && TYPE_SIZE (type)
15422 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15423 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15424 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15425 && align < max_align)
15426 align = max_align;
15427
15428 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15429 to 16byte boundary. */
15430 if (TARGET_64BIT)
15431 {
15432 if (AGGREGATE_TYPE_P (type)
15433 && TYPE_SIZE (type)
15434 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15435 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15436 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15437 return 128;
15438 }
15439
15440 if (TREE_CODE (type) == ARRAY_TYPE)
15441 {
15442 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15443 return 64;
15444 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15445 return 128;
15446 }
15447 else if (TREE_CODE (type) == COMPLEX_TYPE)
15448 {
15449
15450 if (TYPE_MODE (type) == DCmode && align < 64)
15451 return 64;
15452 if (TYPE_MODE (type) == XCmode && align < 128)
15453 return 128;
15454 }
15455 else if ((TREE_CODE (type) == RECORD_TYPE
15456 || TREE_CODE (type) == UNION_TYPE
15457 || TREE_CODE (type) == QUAL_UNION_TYPE)
15458 && TYPE_FIELDS (type))
15459 {
15460 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15461 return 64;
15462 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15463 return 128;
15464 }
15465 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15466 || TREE_CODE (type) == INTEGER_TYPE)
15467 {
15468 if (TYPE_MODE (type) == DFmode && align < 64)
15469 return 64;
15470 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15471 return 128;
15472 }
15473
15474 return align;
15475 }
15476
15477 /* Compute the alignment for a local variable.
15478 TYPE is the data type, and ALIGN is the alignment that
15479 the object would ordinarily have. The value of this macro is used
15480 instead of that alignment to align the object. */
15481
15482 int
15483 ix86_local_alignment (tree type, int align)
15484 {
15485 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15486 to 16byte boundary. */
15487 if (TARGET_64BIT)
15488 {
15489 if (AGGREGATE_TYPE_P (type)
15490 && TYPE_SIZE (type)
15491 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15492 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15493 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15494 return 128;
15495 }
15496 if (TREE_CODE (type) == ARRAY_TYPE)
15497 {
15498 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15499 return 64;
15500 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15501 return 128;
15502 }
15503 else if (TREE_CODE (type) == COMPLEX_TYPE)
15504 {
15505 if (TYPE_MODE (type) == DCmode && align < 64)
15506 return 64;
15507 if (TYPE_MODE (type) == XCmode && align < 128)
15508 return 128;
15509 }
15510 else if ((TREE_CODE (type) == RECORD_TYPE
15511 || TREE_CODE (type) == UNION_TYPE
15512 || TREE_CODE (type) == QUAL_UNION_TYPE)
15513 && TYPE_FIELDS (type))
15514 {
15515 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15516 return 64;
15517 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15518 return 128;
15519 }
15520 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15521 || TREE_CODE (type) == INTEGER_TYPE)
15522 {
15523
15524 if (TYPE_MODE (type) == DFmode && align < 64)
15525 return 64;
15526 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15527 return 128;
15528 }
15529 return align;
15530 }
15531 \f
15532 /* Emit RTL insns to initialize the variable parts of a trampoline.
15533 FNADDR is an RTX for the address of the function's pure code.
15534 CXT is an RTX for the static chain value for the function. */
15535 void
15536 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15537 {
15538 if (!TARGET_64BIT)
15539 {
15540 /* Compute offset from the end of the jmp to the target function. */
15541 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15542 plus_constant (tramp, 10),
15543 NULL_RTX, 1, OPTAB_DIRECT);
15544 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15545 gen_int_mode (0xb9, QImode));
15546 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15547 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15548 gen_int_mode (0xe9, QImode));
15549 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15550 }
15551 else
15552 {
15553 int offset = 0;
15554 /* Try to load address using shorter movl instead of movabs.
15555 We may want to support movq for kernel mode, but kernel does not use
15556 trampolines at the moment. */
15557 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15558 {
15559 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15560 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15561 gen_int_mode (0xbb41, HImode));
15562 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15563 gen_lowpart (SImode, fnaddr));
15564 offset += 6;
15565 }
15566 else
15567 {
15568 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15569 gen_int_mode (0xbb49, HImode));
15570 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15571 fnaddr);
15572 offset += 10;
15573 }
15574 /* Load static chain using movabs to r10. */
15575 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15576 gen_int_mode (0xba49, HImode));
15577 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15578 cxt);
15579 offset += 10;
15580 /* Jump to the r11 */
15581 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15582 gen_int_mode (0xff49, HImode));
15583 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15584 gen_int_mode (0xe3, QImode));
15585 offset += 3;
15586 gcc_assert (offset <= TRAMPOLINE_SIZE);
15587 }
15588
15589 #ifdef ENABLE_EXECUTE_STACK
15590 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15591 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15592 #endif
15593 }
15594 \f
15595 /* Codes for all the SSE/MMX builtins. */
15596 enum ix86_builtins
15597 {
15598 IX86_BUILTIN_ADDPS,
15599 IX86_BUILTIN_ADDSS,
15600 IX86_BUILTIN_DIVPS,
15601 IX86_BUILTIN_DIVSS,
15602 IX86_BUILTIN_MULPS,
15603 IX86_BUILTIN_MULSS,
15604 IX86_BUILTIN_SUBPS,
15605 IX86_BUILTIN_SUBSS,
15606
15607 IX86_BUILTIN_CMPEQPS,
15608 IX86_BUILTIN_CMPLTPS,
15609 IX86_BUILTIN_CMPLEPS,
15610 IX86_BUILTIN_CMPGTPS,
15611 IX86_BUILTIN_CMPGEPS,
15612 IX86_BUILTIN_CMPNEQPS,
15613 IX86_BUILTIN_CMPNLTPS,
15614 IX86_BUILTIN_CMPNLEPS,
15615 IX86_BUILTIN_CMPNGTPS,
15616 IX86_BUILTIN_CMPNGEPS,
15617 IX86_BUILTIN_CMPORDPS,
15618 IX86_BUILTIN_CMPUNORDPS,
15619 IX86_BUILTIN_CMPEQSS,
15620 IX86_BUILTIN_CMPLTSS,
15621 IX86_BUILTIN_CMPLESS,
15622 IX86_BUILTIN_CMPNEQSS,
15623 IX86_BUILTIN_CMPNLTSS,
15624 IX86_BUILTIN_CMPNLESS,
15625 IX86_BUILTIN_CMPNGTSS,
15626 IX86_BUILTIN_CMPNGESS,
15627 IX86_BUILTIN_CMPORDSS,
15628 IX86_BUILTIN_CMPUNORDSS,
15629
15630 IX86_BUILTIN_COMIEQSS,
15631 IX86_BUILTIN_COMILTSS,
15632 IX86_BUILTIN_COMILESS,
15633 IX86_BUILTIN_COMIGTSS,
15634 IX86_BUILTIN_COMIGESS,
15635 IX86_BUILTIN_COMINEQSS,
15636 IX86_BUILTIN_UCOMIEQSS,
15637 IX86_BUILTIN_UCOMILTSS,
15638 IX86_BUILTIN_UCOMILESS,
15639 IX86_BUILTIN_UCOMIGTSS,
15640 IX86_BUILTIN_UCOMIGESS,
15641 IX86_BUILTIN_UCOMINEQSS,
15642
15643 IX86_BUILTIN_CVTPI2PS,
15644 IX86_BUILTIN_CVTPS2PI,
15645 IX86_BUILTIN_CVTSI2SS,
15646 IX86_BUILTIN_CVTSI642SS,
15647 IX86_BUILTIN_CVTSS2SI,
15648 IX86_BUILTIN_CVTSS2SI64,
15649 IX86_BUILTIN_CVTTPS2PI,
15650 IX86_BUILTIN_CVTTSS2SI,
15651 IX86_BUILTIN_CVTTSS2SI64,
15652
15653 IX86_BUILTIN_MAXPS,
15654 IX86_BUILTIN_MAXSS,
15655 IX86_BUILTIN_MINPS,
15656 IX86_BUILTIN_MINSS,
15657
15658 IX86_BUILTIN_LOADUPS,
15659 IX86_BUILTIN_STOREUPS,
15660 IX86_BUILTIN_MOVSS,
15661
15662 IX86_BUILTIN_MOVHLPS,
15663 IX86_BUILTIN_MOVLHPS,
15664 IX86_BUILTIN_LOADHPS,
15665 IX86_BUILTIN_LOADLPS,
15666 IX86_BUILTIN_STOREHPS,
15667 IX86_BUILTIN_STORELPS,
15668
15669 IX86_BUILTIN_MASKMOVQ,
15670 IX86_BUILTIN_MOVMSKPS,
15671 IX86_BUILTIN_PMOVMSKB,
15672
15673 IX86_BUILTIN_MOVNTPS,
15674 IX86_BUILTIN_MOVNTQ,
15675
15676 IX86_BUILTIN_LOADDQU,
15677 IX86_BUILTIN_STOREDQU,
15678
15679 IX86_BUILTIN_PACKSSWB,
15680 IX86_BUILTIN_PACKSSDW,
15681 IX86_BUILTIN_PACKUSWB,
15682
15683 IX86_BUILTIN_PADDB,
15684 IX86_BUILTIN_PADDW,
15685 IX86_BUILTIN_PADDD,
15686 IX86_BUILTIN_PADDQ,
15687 IX86_BUILTIN_PADDSB,
15688 IX86_BUILTIN_PADDSW,
15689 IX86_BUILTIN_PADDUSB,
15690 IX86_BUILTIN_PADDUSW,
15691 IX86_BUILTIN_PSUBB,
15692 IX86_BUILTIN_PSUBW,
15693 IX86_BUILTIN_PSUBD,
15694 IX86_BUILTIN_PSUBQ,
15695 IX86_BUILTIN_PSUBSB,
15696 IX86_BUILTIN_PSUBSW,
15697 IX86_BUILTIN_PSUBUSB,
15698 IX86_BUILTIN_PSUBUSW,
15699
15700 IX86_BUILTIN_PAND,
15701 IX86_BUILTIN_PANDN,
15702 IX86_BUILTIN_POR,
15703 IX86_BUILTIN_PXOR,
15704
15705 IX86_BUILTIN_PAVGB,
15706 IX86_BUILTIN_PAVGW,
15707
15708 IX86_BUILTIN_PCMPEQB,
15709 IX86_BUILTIN_PCMPEQW,
15710 IX86_BUILTIN_PCMPEQD,
15711 IX86_BUILTIN_PCMPGTB,
15712 IX86_BUILTIN_PCMPGTW,
15713 IX86_BUILTIN_PCMPGTD,
15714
15715 IX86_BUILTIN_PMADDWD,
15716
15717 IX86_BUILTIN_PMAXSW,
15718 IX86_BUILTIN_PMAXUB,
15719 IX86_BUILTIN_PMINSW,
15720 IX86_BUILTIN_PMINUB,
15721
15722 IX86_BUILTIN_PMULHUW,
15723 IX86_BUILTIN_PMULHW,
15724 IX86_BUILTIN_PMULLW,
15725
15726 IX86_BUILTIN_PSADBW,
15727 IX86_BUILTIN_PSHUFW,
15728
15729 IX86_BUILTIN_PSLLW,
15730 IX86_BUILTIN_PSLLD,
15731 IX86_BUILTIN_PSLLQ,
15732 IX86_BUILTIN_PSRAW,
15733 IX86_BUILTIN_PSRAD,
15734 IX86_BUILTIN_PSRLW,
15735 IX86_BUILTIN_PSRLD,
15736 IX86_BUILTIN_PSRLQ,
15737 IX86_BUILTIN_PSLLWI,
15738 IX86_BUILTIN_PSLLDI,
15739 IX86_BUILTIN_PSLLQI,
15740 IX86_BUILTIN_PSRAWI,
15741 IX86_BUILTIN_PSRADI,
15742 IX86_BUILTIN_PSRLWI,
15743 IX86_BUILTIN_PSRLDI,
15744 IX86_BUILTIN_PSRLQI,
15745
15746 IX86_BUILTIN_PUNPCKHBW,
15747 IX86_BUILTIN_PUNPCKHWD,
15748 IX86_BUILTIN_PUNPCKHDQ,
15749 IX86_BUILTIN_PUNPCKLBW,
15750 IX86_BUILTIN_PUNPCKLWD,
15751 IX86_BUILTIN_PUNPCKLDQ,
15752
15753 IX86_BUILTIN_SHUFPS,
15754
15755 IX86_BUILTIN_RCPPS,
15756 IX86_BUILTIN_RCPSS,
15757 IX86_BUILTIN_RSQRTPS,
15758 IX86_BUILTIN_RSQRTSS,
15759 IX86_BUILTIN_SQRTPS,
15760 IX86_BUILTIN_SQRTSS,
15761
15762 IX86_BUILTIN_UNPCKHPS,
15763 IX86_BUILTIN_UNPCKLPS,
15764
15765 IX86_BUILTIN_ANDPS,
15766 IX86_BUILTIN_ANDNPS,
15767 IX86_BUILTIN_ORPS,
15768 IX86_BUILTIN_XORPS,
15769
15770 IX86_BUILTIN_EMMS,
15771 IX86_BUILTIN_LDMXCSR,
15772 IX86_BUILTIN_STMXCSR,
15773 IX86_BUILTIN_SFENCE,
15774
15775 /* 3DNow! Original */
15776 IX86_BUILTIN_FEMMS,
15777 IX86_BUILTIN_PAVGUSB,
15778 IX86_BUILTIN_PF2ID,
15779 IX86_BUILTIN_PFACC,
15780 IX86_BUILTIN_PFADD,
15781 IX86_BUILTIN_PFCMPEQ,
15782 IX86_BUILTIN_PFCMPGE,
15783 IX86_BUILTIN_PFCMPGT,
15784 IX86_BUILTIN_PFMAX,
15785 IX86_BUILTIN_PFMIN,
15786 IX86_BUILTIN_PFMUL,
15787 IX86_BUILTIN_PFRCP,
15788 IX86_BUILTIN_PFRCPIT1,
15789 IX86_BUILTIN_PFRCPIT2,
15790 IX86_BUILTIN_PFRSQIT1,
15791 IX86_BUILTIN_PFRSQRT,
15792 IX86_BUILTIN_PFSUB,
15793 IX86_BUILTIN_PFSUBR,
15794 IX86_BUILTIN_PI2FD,
15795 IX86_BUILTIN_PMULHRW,
15796
15797 /* 3DNow! Athlon Extensions */
15798 IX86_BUILTIN_PF2IW,
15799 IX86_BUILTIN_PFNACC,
15800 IX86_BUILTIN_PFPNACC,
15801 IX86_BUILTIN_PI2FW,
15802 IX86_BUILTIN_PSWAPDSI,
15803 IX86_BUILTIN_PSWAPDSF,
15804
15805 /* SSE2 */
15806 IX86_BUILTIN_ADDPD,
15807 IX86_BUILTIN_ADDSD,
15808 IX86_BUILTIN_DIVPD,
15809 IX86_BUILTIN_DIVSD,
15810 IX86_BUILTIN_MULPD,
15811 IX86_BUILTIN_MULSD,
15812 IX86_BUILTIN_SUBPD,
15813 IX86_BUILTIN_SUBSD,
15814
15815 IX86_BUILTIN_CMPEQPD,
15816 IX86_BUILTIN_CMPLTPD,
15817 IX86_BUILTIN_CMPLEPD,
15818 IX86_BUILTIN_CMPGTPD,
15819 IX86_BUILTIN_CMPGEPD,
15820 IX86_BUILTIN_CMPNEQPD,
15821 IX86_BUILTIN_CMPNLTPD,
15822 IX86_BUILTIN_CMPNLEPD,
15823 IX86_BUILTIN_CMPNGTPD,
15824 IX86_BUILTIN_CMPNGEPD,
15825 IX86_BUILTIN_CMPORDPD,
15826 IX86_BUILTIN_CMPUNORDPD,
15827 IX86_BUILTIN_CMPNEPD,
15828 IX86_BUILTIN_CMPEQSD,
15829 IX86_BUILTIN_CMPLTSD,
15830 IX86_BUILTIN_CMPLESD,
15831 IX86_BUILTIN_CMPNEQSD,
15832 IX86_BUILTIN_CMPNLTSD,
15833 IX86_BUILTIN_CMPNLESD,
15834 IX86_BUILTIN_CMPORDSD,
15835 IX86_BUILTIN_CMPUNORDSD,
15836 IX86_BUILTIN_CMPNESD,
15837
15838 IX86_BUILTIN_COMIEQSD,
15839 IX86_BUILTIN_COMILTSD,
15840 IX86_BUILTIN_COMILESD,
15841 IX86_BUILTIN_COMIGTSD,
15842 IX86_BUILTIN_COMIGESD,
15843 IX86_BUILTIN_COMINEQSD,
15844 IX86_BUILTIN_UCOMIEQSD,
15845 IX86_BUILTIN_UCOMILTSD,
15846 IX86_BUILTIN_UCOMILESD,
15847 IX86_BUILTIN_UCOMIGTSD,
15848 IX86_BUILTIN_UCOMIGESD,
15849 IX86_BUILTIN_UCOMINEQSD,
15850
15851 IX86_BUILTIN_MAXPD,
15852 IX86_BUILTIN_MAXSD,
15853 IX86_BUILTIN_MINPD,
15854 IX86_BUILTIN_MINSD,
15855
15856 IX86_BUILTIN_ANDPD,
15857 IX86_BUILTIN_ANDNPD,
15858 IX86_BUILTIN_ORPD,
15859 IX86_BUILTIN_XORPD,
15860
15861 IX86_BUILTIN_SQRTPD,
15862 IX86_BUILTIN_SQRTSD,
15863
15864 IX86_BUILTIN_UNPCKHPD,
15865 IX86_BUILTIN_UNPCKLPD,
15866
15867 IX86_BUILTIN_SHUFPD,
15868
15869 IX86_BUILTIN_LOADUPD,
15870 IX86_BUILTIN_STOREUPD,
15871 IX86_BUILTIN_MOVSD,
15872
15873 IX86_BUILTIN_LOADHPD,
15874 IX86_BUILTIN_LOADLPD,
15875
15876 IX86_BUILTIN_CVTDQ2PD,
15877 IX86_BUILTIN_CVTDQ2PS,
15878
15879 IX86_BUILTIN_CVTPD2DQ,
15880 IX86_BUILTIN_CVTPD2PI,
15881 IX86_BUILTIN_CVTPD2PS,
15882 IX86_BUILTIN_CVTTPD2DQ,
15883 IX86_BUILTIN_CVTTPD2PI,
15884
15885 IX86_BUILTIN_CVTPI2PD,
15886 IX86_BUILTIN_CVTSI2SD,
15887 IX86_BUILTIN_CVTSI642SD,
15888
15889 IX86_BUILTIN_CVTSD2SI,
15890 IX86_BUILTIN_CVTSD2SI64,
15891 IX86_BUILTIN_CVTSD2SS,
15892 IX86_BUILTIN_CVTSS2SD,
15893 IX86_BUILTIN_CVTTSD2SI,
15894 IX86_BUILTIN_CVTTSD2SI64,
15895
15896 IX86_BUILTIN_CVTPS2DQ,
15897 IX86_BUILTIN_CVTPS2PD,
15898 IX86_BUILTIN_CVTTPS2DQ,
15899
15900 IX86_BUILTIN_MOVNTI,
15901 IX86_BUILTIN_MOVNTPD,
15902 IX86_BUILTIN_MOVNTDQ,
15903
15904 /* SSE2 MMX */
15905 IX86_BUILTIN_MASKMOVDQU,
15906 IX86_BUILTIN_MOVMSKPD,
15907 IX86_BUILTIN_PMOVMSKB128,
15908
15909 IX86_BUILTIN_PACKSSWB128,
15910 IX86_BUILTIN_PACKSSDW128,
15911 IX86_BUILTIN_PACKUSWB128,
15912
15913 IX86_BUILTIN_PADDB128,
15914 IX86_BUILTIN_PADDW128,
15915 IX86_BUILTIN_PADDD128,
15916 IX86_BUILTIN_PADDQ128,
15917 IX86_BUILTIN_PADDSB128,
15918 IX86_BUILTIN_PADDSW128,
15919 IX86_BUILTIN_PADDUSB128,
15920 IX86_BUILTIN_PADDUSW128,
15921 IX86_BUILTIN_PSUBB128,
15922 IX86_BUILTIN_PSUBW128,
15923 IX86_BUILTIN_PSUBD128,
15924 IX86_BUILTIN_PSUBQ128,
15925 IX86_BUILTIN_PSUBSB128,
15926 IX86_BUILTIN_PSUBSW128,
15927 IX86_BUILTIN_PSUBUSB128,
15928 IX86_BUILTIN_PSUBUSW128,
15929
15930 IX86_BUILTIN_PAND128,
15931 IX86_BUILTIN_PANDN128,
15932 IX86_BUILTIN_POR128,
15933 IX86_BUILTIN_PXOR128,
15934
15935 IX86_BUILTIN_PAVGB128,
15936 IX86_BUILTIN_PAVGW128,
15937
15938 IX86_BUILTIN_PCMPEQB128,
15939 IX86_BUILTIN_PCMPEQW128,
15940 IX86_BUILTIN_PCMPEQD128,
15941 IX86_BUILTIN_PCMPGTB128,
15942 IX86_BUILTIN_PCMPGTW128,
15943 IX86_BUILTIN_PCMPGTD128,
15944
15945 IX86_BUILTIN_PMADDWD128,
15946
15947 IX86_BUILTIN_PMAXSW128,
15948 IX86_BUILTIN_PMAXUB128,
15949 IX86_BUILTIN_PMINSW128,
15950 IX86_BUILTIN_PMINUB128,
15951
15952 IX86_BUILTIN_PMULUDQ,
15953 IX86_BUILTIN_PMULUDQ128,
15954 IX86_BUILTIN_PMULHUW128,
15955 IX86_BUILTIN_PMULHW128,
15956 IX86_BUILTIN_PMULLW128,
15957
15958 IX86_BUILTIN_PSADBW128,
15959 IX86_BUILTIN_PSHUFHW,
15960 IX86_BUILTIN_PSHUFLW,
15961 IX86_BUILTIN_PSHUFD,
15962
15963 IX86_BUILTIN_PSLLW128,
15964 IX86_BUILTIN_PSLLD128,
15965 IX86_BUILTIN_PSLLQ128,
15966 IX86_BUILTIN_PSRAW128,
15967 IX86_BUILTIN_PSRAD128,
15968 IX86_BUILTIN_PSRLW128,
15969 IX86_BUILTIN_PSRLD128,
15970 IX86_BUILTIN_PSRLQ128,
15971 IX86_BUILTIN_PSLLDQI128,
15972 IX86_BUILTIN_PSLLWI128,
15973 IX86_BUILTIN_PSLLDI128,
15974 IX86_BUILTIN_PSLLQI128,
15975 IX86_BUILTIN_PSRAWI128,
15976 IX86_BUILTIN_PSRADI128,
15977 IX86_BUILTIN_PSRLDQI128,
15978 IX86_BUILTIN_PSRLWI128,
15979 IX86_BUILTIN_PSRLDI128,
15980 IX86_BUILTIN_PSRLQI128,
15981
15982 IX86_BUILTIN_PUNPCKHBW128,
15983 IX86_BUILTIN_PUNPCKHWD128,
15984 IX86_BUILTIN_PUNPCKHDQ128,
15985 IX86_BUILTIN_PUNPCKHQDQ128,
15986 IX86_BUILTIN_PUNPCKLBW128,
15987 IX86_BUILTIN_PUNPCKLWD128,
15988 IX86_BUILTIN_PUNPCKLDQ128,
15989 IX86_BUILTIN_PUNPCKLQDQ128,
15990
15991 IX86_BUILTIN_CLFLUSH,
15992 IX86_BUILTIN_MFENCE,
15993 IX86_BUILTIN_LFENCE,
15994
15995 /* Prescott New Instructions. */
15996 IX86_BUILTIN_ADDSUBPS,
15997 IX86_BUILTIN_HADDPS,
15998 IX86_BUILTIN_HSUBPS,
15999 IX86_BUILTIN_MOVSHDUP,
16000 IX86_BUILTIN_MOVSLDUP,
16001 IX86_BUILTIN_ADDSUBPD,
16002 IX86_BUILTIN_HADDPD,
16003 IX86_BUILTIN_HSUBPD,
16004 IX86_BUILTIN_LDDQU,
16005
16006 IX86_BUILTIN_MONITOR,
16007 IX86_BUILTIN_MWAIT,
16008
16009 /* SSSE3. */
16010 IX86_BUILTIN_PHADDW,
16011 IX86_BUILTIN_PHADDD,
16012 IX86_BUILTIN_PHADDSW,
16013 IX86_BUILTIN_PHSUBW,
16014 IX86_BUILTIN_PHSUBD,
16015 IX86_BUILTIN_PHSUBSW,
16016 IX86_BUILTIN_PMADDUBSW,
16017 IX86_BUILTIN_PMULHRSW,
16018 IX86_BUILTIN_PSHUFB,
16019 IX86_BUILTIN_PSIGNB,
16020 IX86_BUILTIN_PSIGNW,
16021 IX86_BUILTIN_PSIGND,
16022 IX86_BUILTIN_PALIGNR,
16023 IX86_BUILTIN_PABSB,
16024 IX86_BUILTIN_PABSW,
16025 IX86_BUILTIN_PABSD,
16026
16027 IX86_BUILTIN_PHADDW128,
16028 IX86_BUILTIN_PHADDD128,
16029 IX86_BUILTIN_PHADDSW128,
16030 IX86_BUILTIN_PHSUBW128,
16031 IX86_BUILTIN_PHSUBD128,
16032 IX86_BUILTIN_PHSUBSW128,
16033 IX86_BUILTIN_PMADDUBSW128,
16034 IX86_BUILTIN_PMULHRSW128,
16035 IX86_BUILTIN_PSHUFB128,
16036 IX86_BUILTIN_PSIGNB128,
16037 IX86_BUILTIN_PSIGNW128,
16038 IX86_BUILTIN_PSIGND128,
16039 IX86_BUILTIN_PALIGNR128,
16040 IX86_BUILTIN_PABSB128,
16041 IX86_BUILTIN_PABSW128,
16042 IX86_BUILTIN_PABSD128,
16043
16044 /* AMDFAM10 - SSE4A New Instructions. */
16045 IX86_BUILTIN_MOVNTSD,
16046 IX86_BUILTIN_MOVNTSS,
16047 IX86_BUILTIN_EXTRQI,
16048 IX86_BUILTIN_EXTRQ,
16049 IX86_BUILTIN_INSERTQI,
16050 IX86_BUILTIN_INSERTQ,
16051
16052 IX86_BUILTIN_VEC_INIT_V2SI,
16053 IX86_BUILTIN_VEC_INIT_V4HI,
16054 IX86_BUILTIN_VEC_INIT_V8QI,
16055 IX86_BUILTIN_VEC_EXT_V2DF,
16056 IX86_BUILTIN_VEC_EXT_V2DI,
16057 IX86_BUILTIN_VEC_EXT_V4SF,
16058 IX86_BUILTIN_VEC_EXT_V4SI,
16059 IX86_BUILTIN_VEC_EXT_V8HI,
16060 IX86_BUILTIN_VEC_EXT_V2SI,
16061 IX86_BUILTIN_VEC_EXT_V4HI,
16062 IX86_BUILTIN_VEC_SET_V8HI,
16063 IX86_BUILTIN_VEC_SET_V4HI,
16064
16065 IX86_BUILTIN_MAX
16066 };
16067
16068 /* Table for the ix86 builtin decls. */
16069 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16070
16071 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16072 * if the target_flags include one of MASK. Stores the function decl
16073 * in the ix86_builtins array.
16074 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16075
16076 static inline tree
16077 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16078 {
16079 tree decl = NULL_TREE;
16080
16081 if (mask & target_flags
16082 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16083 {
16084 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16085 NULL, NULL_TREE);
16086 ix86_builtins[(int) code] = decl;
16087 }
16088
16089 return decl;
16090 }
16091
16092 /* Like def_builtin, but also marks the function decl "const". */
16093
16094 static inline tree
16095 def_builtin_const (int mask, const char *name, tree type,
16096 enum ix86_builtins code)
16097 {
16098 tree decl = def_builtin (mask, name, type, code);
16099 if (decl)
16100 TREE_READONLY (decl) = 1;
16101 return decl;
16102 }
16103
16104 /* Bits for builtin_description.flag. */
16105
16106 /* Set when we don't support the comparison natively, and should
16107 swap_comparison in order to support it. */
16108 #define BUILTIN_DESC_SWAP_OPERANDS 1
16109
16110 struct builtin_description
16111 {
16112 const unsigned int mask;
16113 const enum insn_code icode;
16114 const char *const name;
16115 const enum ix86_builtins code;
16116 const enum rtx_code comparison;
16117 const unsigned int flag;
16118 };
16119
16120 static const struct builtin_description bdesc_comi[] =
16121 {
16122 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16123 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16124 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16125 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16126 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16127 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16128 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16129 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16130 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16131 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16132 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16133 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16134 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16135 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16136 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16137 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16138 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16139 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16140 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16141 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16142 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16143 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16144 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16145 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16146 };
16147
16148 static const struct builtin_description bdesc_2arg[] =
16149 {
16150 /* SSE */
16151 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16152 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16153 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16154 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16155 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16156 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16157 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16158 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16159
16160 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16161 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16162 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16163 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16164 BUILTIN_DESC_SWAP_OPERANDS },
16165 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16166 BUILTIN_DESC_SWAP_OPERANDS },
16167 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16168 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16169 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16170 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16171 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16172 BUILTIN_DESC_SWAP_OPERANDS },
16173 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16174 BUILTIN_DESC_SWAP_OPERANDS },
16175 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16176 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16177 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16178 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16179 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16180 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16181 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16182 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16183 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16184 BUILTIN_DESC_SWAP_OPERANDS },
16185 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16186 BUILTIN_DESC_SWAP_OPERANDS },
16187 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16188
16189 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16190 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16191 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16192 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16193
16194 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16195 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16196 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16197 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16198
16199 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16200 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16201 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16202 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16203 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16204
16205 /* MMX */
16206 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16207 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16208 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16209 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16210 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16211 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16212 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16213 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16214
16215 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16216 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16217 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16218 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16219 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16220 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16221 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16222 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16223
16224 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16225 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16226 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16227
16228 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16229 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16230 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16231 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16232
16233 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16234 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16235
16236 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16237 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16238 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16239 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16240 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16241 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16242
16243 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16244 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16245 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16246 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16247
16248 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16249 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16250 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16251 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16252 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16253 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16254
16255 /* Special. */
16256 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16257 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16258 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16259
16260 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16261 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16262 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16263
16264 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16265 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16266 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16267 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16268 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16269 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16270
16271 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16272 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16273 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16274 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16275 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16276 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16277
16278 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16279 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16280 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16281 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16282
16283 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16284 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16285
16286 /* SSE2 */
16287 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16288 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16289 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16290 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16291 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16292 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16293 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16294 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16295
16296 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16297 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16298 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16299 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16300 BUILTIN_DESC_SWAP_OPERANDS },
16301 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16302 BUILTIN_DESC_SWAP_OPERANDS },
16303 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16304 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16305 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16306 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16307 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16308 BUILTIN_DESC_SWAP_OPERANDS },
16309 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16310 BUILTIN_DESC_SWAP_OPERANDS },
16311 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16312 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16313 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16314 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16315 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16316 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16317 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16318 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16319 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16320
16321 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16322 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16323 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16324 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16325
16326 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16327 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16328 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16329 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16330
16331 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16332 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16333 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16334
16335 /* SSE2 MMX */
16336 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16337 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16338 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16339 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16340 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16341 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16342 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16343 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16344
16345 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16346 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16347 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16348 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16349 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16350 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16351 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16352 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16353
16354 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16355 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16356
16357 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16358 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16359 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16360 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16361
16362 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16363 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16364
16365 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16366 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16367 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16368 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16369 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16370 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16371
16372 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16373 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16374 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16375 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16376
16377 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16378 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16379 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16380 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16381 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16382 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16383 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16384 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16385
16386 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16387 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16388 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16389
16390 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16391 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16392
16393 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16394 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16395
16396 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16397 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16398 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16399
16400 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16401 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16402 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16403
16404 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16405 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16406
16407 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16408
16409 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16410 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16411 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16412 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16413
16414 /* SSE3 MMX */
16415 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16416 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16417 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16418 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16419 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16420 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16421
16422 /* SSSE3 */
16423 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16424 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16425 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16426 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16427 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16428 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16429 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16430 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16431 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16432 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16433 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16434 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16435 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16436 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16437 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16438 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16439 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16440 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16441 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16442 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16443 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16444 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16445 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16446 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16447 };
16448
16449 static const struct builtin_description bdesc_1arg[] =
16450 {
16451 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16452 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16453
16454 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16455 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16456 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16457
16458 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16459 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16460 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16461 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16462 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16463 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16464
16465 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16466 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16467
16468 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16469
16470 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16471 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16472
16473 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16474 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16475 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16476 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16477 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16478
16479 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16480
16481 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16482 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16483 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16484 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16485
16486 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16487 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16488 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16489
16490 /* SSE3 */
16491 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
16492 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
16493
16494 /* SSSE3 */
16495 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16496 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16497 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16498 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16499 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16500 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16501 };
16502
16503 static void
16504 ix86_init_builtins (void)
16505 {
16506 if (TARGET_MMX)
16507 ix86_init_mmx_sse_builtins ();
16508 }
16509
16510 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16511 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16512 builtins. */
16513 static void
16514 ix86_init_mmx_sse_builtins (void)
16515 {
16516 const struct builtin_description * d;
16517 size_t i;
16518
16519 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16520 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16521 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16522 tree V2DI_type_node
16523 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16524 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16525 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16526 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16527 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16528 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16529 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16530
16531 tree pchar_type_node = build_pointer_type (char_type_node);
16532 tree pcchar_type_node = build_pointer_type (
16533 build_type_variant (char_type_node, 1, 0));
16534 tree pfloat_type_node = build_pointer_type (float_type_node);
16535 tree pcfloat_type_node = build_pointer_type (
16536 build_type_variant (float_type_node, 1, 0));
16537 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16538 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16539 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16540
16541 /* Comparisons. */
16542 tree int_ftype_v4sf_v4sf
16543 = build_function_type_list (integer_type_node,
16544 V4SF_type_node, V4SF_type_node, NULL_TREE);
16545 tree v4si_ftype_v4sf_v4sf
16546 = build_function_type_list (V4SI_type_node,
16547 V4SF_type_node, V4SF_type_node, NULL_TREE);
16548 /* MMX/SSE/integer conversions. */
16549 tree int_ftype_v4sf
16550 = build_function_type_list (integer_type_node,
16551 V4SF_type_node, NULL_TREE);
16552 tree int64_ftype_v4sf
16553 = build_function_type_list (long_long_integer_type_node,
16554 V4SF_type_node, NULL_TREE);
16555 tree int_ftype_v8qi
16556 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16557 tree v4sf_ftype_v4sf_int
16558 = build_function_type_list (V4SF_type_node,
16559 V4SF_type_node, integer_type_node, NULL_TREE);
16560 tree v4sf_ftype_v4sf_int64
16561 = build_function_type_list (V4SF_type_node,
16562 V4SF_type_node, long_long_integer_type_node,
16563 NULL_TREE);
16564 tree v4sf_ftype_v4sf_v2si
16565 = build_function_type_list (V4SF_type_node,
16566 V4SF_type_node, V2SI_type_node, NULL_TREE);
16567
16568 /* Miscellaneous. */
16569 tree v8qi_ftype_v4hi_v4hi
16570 = build_function_type_list (V8QI_type_node,
16571 V4HI_type_node, V4HI_type_node, NULL_TREE);
16572 tree v4hi_ftype_v2si_v2si
16573 = build_function_type_list (V4HI_type_node,
16574 V2SI_type_node, V2SI_type_node, NULL_TREE);
16575 tree v4sf_ftype_v4sf_v4sf_int
16576 = build_function_type_list (V4SF_type_node,
16577 V4SF_type_node, V4SF_type_node,
16578 integer_type_node, NULL_TREE);
16579 tree v2si_ftype_v4hi_v4hi
16580 = build_function_type_list (V2SI_type_node,
16581 V4HI_type_node, V4HI_type_node, NULL_TREE);
16582 tree v4hi_ftype_v4hi_int
16583 = build_function_type_list (V4HI_type_node,
16584 V4HI_type_node, integer_type_node, NULL_TREE);
16585 tree v4hi_ftype_v4hi_di
16586 = build_function_type_list (V4HI_type_node,
16587 V4HI_type_node, long_long_unsigned_type_node,
16588 NULL_TREE);
16589 tree v2si_ftype_v2si_di
16590 = build_function_type_list (V2SI_type_node,
16591 V2SI_type_node, long_long_unsigned_type_node,
16592 NULL_TREE);
16593 tree void_ftype_void
16594 = build_function_type (void_type_node, void_list_node);
16595 tree void_ftype_unsigned
16596 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16597 tree void_ftype_unsigned_unsigned
16598 = build_function_type_list (void_type_node, unsigned_type_node,
16599 unsigned_type_node, NULL_TREE);
16600 tree void_ftype_pcvoid_unsigned_unsigned
16601 = build_function_type_list (void_type_node, const_ptr_type_node,
16602 unsigned_type_node, unsigned_type_node,
16603 NULL_TREE);
16604 tree unsigned_ftype_void
16605 = build_function_type (unsigned_type_node, void_list_node);
16606 tree v2si_ftype_v4sf
16607 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16608 /* Loads/stores. */
16609 tree void_ftype_v8qi_v8qi_pchar
16610 = build_function_type_list (void_type_node,
16611 V8QI_type_node, V8QI_type_node,
16612 pchar_type_node, NULL_TREE);
16613 tree v4sf_ftype_pcfloat
16614 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16615 /* @@@ the type is bogus */
16616 tree v4sf_ftype_v4sf_pv2si
16617 = build_function_type_list (V4SF_type_node,
16618 V4SF_type_node, pv2si_type_node, NULL_TREE);
16619 tree void_ftype_pv2si_v4sf
16620 = build_function_type_list (void_type_node,
16621 pv2si_type_node, V4SF_type_node, NULL_TREE);
16622 tree void_ftype_pfloat_v4sf
16623 = build_function_type_list (void_type_node,
16624 pfloat_type_node, V4SF_type_node, NULL_TREE);
16625 tree void_ftype_pdi_di
16626 = build_function_type_list (void_type_node,
16627 pdi_type_node, long_long_unsigned_type_node,
16628 NULL_TREE);
16629 tree void_ftype_pv2di_v2di
16630 = build_function_type_list (void_type_node,
16631 pv2di_type_node, V2DI_type_node, NULL_TREE);
16632 /* Normal vector unops. */
16633 tree v4sf_ftype_v4sf
16634 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16635 tree v16qi_ftype_v16qi
16636 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16637 tree v8hi_ftype_v8hi
16638 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16639 tree v4si_ftype_v4si
16640 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16641 tree v8qi_ftype_v8qi
16642 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16643 tree v4hi_ftype_v4hi
16644 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16645
16646 /* Normal vector binops. */
16647 tree v4sf_ftype_v4sf_v4sf
16648 = build_function_type_list (V4SF_type_node,
16649 V4SF_type_node, V4SF_type_node, NULL_TREE);
16650 tree v8qi_ftype_v8qi_v8qi
16651 = build_function_type_list (V8QI_type_node,
16652 V8QI_type_node, V8QI_type_node, NULL_TREE);
16653 tree v4hi_ftype_v4hi_v4hi
16654 = build_function_type_list (V4HI_type_node,
16655 V4HI_type_node, V4HI_type_node, NULL_TREE);
16656 tree v2si_ftype_v2si_v2si
16657 = build_function_type_list (V2SI_type_node,
16658 V2SI_type_node, V2SI_type_node, NULL_TREE);
16659 tree di_ftype_di_di
16660 = build_function_type_list (long_long_unsigned_type_node,
16661 long_long_unsigned_type_node,
16662 long_long_unsigned_type_node, NULL_TREE);
16663
16664 tree di_ftype_di_di_int
16665 = build_function_type_list (long_long_unsigned_type_node,
16666 long_long_unsigned_type_node,
16667 long_long_unsigned_type_node,
16668 integer_type_node, NULL_TREE);
16669
16670 tree v2si_ftype_v2sf
16671 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16672 tree v2sf_ftype_v2si
16673 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16674 tree v2si_ftype_v2si
16675 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16676 tree v2sf_ftype_v2sf
16677 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16678 tree v2sf_ftype_v2sf_v2sf
16679 = build_function_type_list (V2SF_type_node,
16680 V2SF_type_node, V2SF_type_node, NULL_TREE);
16681 tree v2si_ftype_v2sf_v2sf
16682 = build_function_type_list (V2SI_type_node,
16683 V2SF_type_node, V2SF_type_node, NULL_TREE);
16684 tree pint_type_node = build_pointer_type (integer_type_node);
16685 tree pdouble_type_node = build_pointer_type (double_type_node);
16686 tree pcdouble_type_node = build_pointer_type (
16687 build_type_variant (double_type_node, 1, 0));
16688 tree int_ftype_v2df_v2df
16689 = build_function_type_list (integer_type_node,
16690 V2DF_type_node, V2DF_type_node, NULL_TREE);
16691
16692 tree void_ftype_pcvoid
16693 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16694 tree v4sf_ftype_v4si
16695 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16696 tree v4si_ftype_v4sf
16697 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16698 tree v2df_ftype_v4si
16699 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16700 tree v4si_ftype_v2df
16701 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16702 tree v2si_ftype_v2df
16703 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16704 tree v4sf_ftype_v2df
16705 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16706 tree v2df_ftype_v2si
16707 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16708 tree v2df_ftype_v4sf
16709 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16710 tree int_ftype_v2df
16711 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16712 tree int64_ftype_v2df
16713 = build_function_type_list (long_long_integer_type_node,
16714 V2DF_type_node, NULL_TREE);
16715 tree v2df_ftype_v2df_int
16716 = build_function_type_list (V2DF_type_node,
16717 V2DF_type_node, integer_type_node, NULL_TREE);
16718 tree v2df_ftype_v2df_int64
16719 = build_function_type_list (V2DF_type_node,
16720 V2DF_type_node, long_long_integer_type_node,
16721 NULL_TREE);
16722 tree v4sf_ftype_v4sf_v2df
16723 = build_function_type_list (V4SF_type_node,
16724 V4SF_type_node, V2DF_type_node, NULL_TREE);
16725 tree v2df_ftype_v2df_v4sf
16726 = build_function_type_list (V2DF_type_node,
16727 V2DF_type_node, V4SF_type_node, NULL_TREE);
16728 tree v2df_ftype_v2df_v2df_int
16729 = build_function_type_list (V2DF_type_node,
16730 V2DF_type_node, V2DF_type_node,
16731 integer_type_node,
16732 NULL_TREE);
16733 tree v2df_ftype_v2df_pcdouble
16734 = build_function_type_list (V2DF_type_node,
16735 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16736 tree void_ftype_pdouble_v2df
16737 = build_function_type_list (void_type_node,
16738 pdouble_type_node, V2DF_type_node, NULL_TREE);
16739 tree void_ftype_pint_int
16740 = build_function_type_list (void_type_node,
16741 pint_type_node, integer_type_node, NULL_TREE);
16742 tree void_ftype_v16qi_v16qi_pchar
16743 = build_function_type_list (void_type_node,
16744 V16QI_type_node, V16QI_type_node,
16745 pchar_type_node, NULL_TREE);
16746 tree v2df_ftype_pcdouble
16747 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16748 tree v2df_ftype_v2df_v2df
16749 = build_function_type_list (V2DF_type_node,
16750 V2DF_type_node, V2DF_type_node, NULL_TREE);
16751 tree v16qi_ftype_v16qi_v16qi
16752 = build_function_type_list (V16QI_type_node,
16753 V16QI_type_node, V16QI_type_node, NULL_TREE);
16754 tree v8hi_ftype_v8hi_v8hi
16755 = build_function_type_list (V8HI_type_node,
16756 V8HI_type_node, V8HI_type_node, NULL_TREE);
16757 tree v4si_ftype_v4si_v4si
16758 = build_function_type_list (V4SI_type_node,
16759 V4SI_type_node, V4SI_type_node, NULL_TREE);
16760 tree v2di_ftype_v2di_v2di
16761 = build_function_type_list (V2DI_type_node,
16762 V2DI_type_node, V2DI_type_node, NULL_TREE);
16763 tree v2di_ftype_v2df_v2df
16764 = build_function_type_list (V2DI_type_node,
16765 V2DF_type_node, V2DF_type_node, NULL_TREE);
16766 tree v2df_ftype_v2df
16767 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16768 tree v2di_ftype_v2di_int
16769 = build_function_type_list (V2DI_type_node,
16770 V2DI_type_node, integer_type_node, NULL_TREE);
16771 tree v2di_ftype_v2di_v2di_int
16772 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16773 V2DI_type_node, integer_type_node, NULL_TREE);
16774 tree v4si_ftype_v4si_int
16775 = build_function_type_list (V4SI_type_node,
16776 V4SI_type_node, integer_type_node, NULL_TREE);
16777 tree v8hi_ftype_v8hi_int
16778 = build_function_type_list (V8HI_type_node,
16779 V8HI_type_node, integer_type_node, NULL_TREE);
16780 tree v8hi_ftype_v8hi_v2di
16781 = build_function_type_list (V8HI_type_node,
16782 V8HI_type_node, V2DI_type_node, NULL_TREE);
16783 tree v4si_ftype_v4si_v2di
16784 = build_function_type_list (V4SI_type_node,
16785 V4SI_type_node, V2DI_type_node, NULL_TREE);
16786 tree v4si_ftype_v8hi_v8hi
16787 = build_function_type_list (V4SI_type_node,
16788 V8HI_type_node, V8HI_type_node, NULL_TREE);
16789 tree di_ftype_v8qi_v8qi
16790 = build_function_type_list (long_long_unsigned_type_node,
16791 V8QI_type_node, V8QI_type_node, NULL_TREE);
16792 tree di_ftype_v2si_v2si
16793 = build_function_type_list (long_long_unsigned_type_node,
16794 V2SI_type_node, V2SI_type_node, NULL_TREE);
16795 tree v2di_ftype_v16qi_v16qi
16796 = build_function_type_list (V2DI_type_node,
16797 V16QI_type_node, V16QI_type_node, NULL_TREE);
16798 tree v2di_ftype_v4si_v4si
16799 = build_function_type_list (V2DI_type_node,
16800 V4SI_type_node, V4SI_type_node, NULL_TREE);
16801 tree int_ftype_v16qi
16802 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16803 tree v16qi_ftype_pcchar
16804 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16805 tree void_ftype_pchar_v16qi
16806 = build_function_type_list (void_type_node,
16807 pchar_type_node, V16QI_type_node, NULL_TREE);
16808
16809 tree v2di_ftype_v2di_unsigned_unsigned
16810 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16811 unsigned_type_node, unsigned_type_node,
16812 NULL_TREE);
16813 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16814 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16815 unsigned_type_node, unsigned_type_node,
16816 NULL_TREE);
16817 tree v2di_ftype_v2di_v16qi
16818 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16819 NULL_TREE);
16820
16821 tree float80_type;
16822 tree float128_type;
16823 tree ftype;
16824
16825 /* The __float80 type. */
16826 if (TYPE_MODE (long_double_type_node) == XFmode)
16827 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16828 "__float80");
16829 else
16830 {
16831 /* The __float80 type. */
16832 float80_type = make_node (REAL_TYPE);
16833 TYPE_PRECISION (float80_type) = 80;
16834 layout_type (float80_type);
16835 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16836 }
16837
16838 if (TARGET_64BIT)
16839 {
16840 float128_type = make_node (REAL_TYPE);
16841 TYPE_PRECISION (float128_type) = 128;
16842 layout_type (float128_type);
16843 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16844 }
16845
16846 /* Add all builtins that are more or less simple operations on two
16847 operands. */
16848 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16849 {
16850 /* Use one of the operands; the target can have a different mode for
16851 mask-generating compares. */
16852 enum machine_mode mode;
16853 tree type;
16854
16855 if (d->name == 0)
16856 continue;
16857 mode = insn_data[d->icode].operand[1].mode;
16858
16859 switch (mode)
16860 {
16861 case V16QImode:
16862 type = v16qi_ftype_v16qi_v16qi;
16863 break;
16864 case V8HImode:
16865 type = v8hi_ftype_v8hi_v8hi;
16866 break;
16867 case V4SImode:
16868 type = v4si_ftype_v4si_v4si;
16869 break;
16870 case V2DImode:
16871 type = v2di_ftype_v2di_v2di;
16872 break;
16873 case V2DFmode:
16874 type = v2df_ftype_v2df_v2df;
16875 break;
16876 case V4SFmode:
16877 type = v4sf_ftype_v4sf_v4sf;
16878 break;
16879 case V8QImode:
16880 type = v8qi_ftype_v8qi_v8qi;
16881 break;
16882 case V4HImode:
16883 type = v4hi_ftype_v4hi_v4hi;
16884 break;
16885 case V2SImode:
16886 type = v2si_ftype_v2si_v2si;
16887 break;
16888 case DImode:
16889 type = di_ftype_di_di;
16890 break;
16891
16892 default:
16893 gcc_unreachable ();
16894 }
16895
16896 /* Override for comparisons. */
16897 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16898 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16899 type = v4si_ftype_v4sf_v4sf;
16900
16901 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16902 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16903 type = v2di_ftype_v2df_v2df;
16904
16905 def_builtin (d->mask, d->name, type, d->code);
16906 }
16907
16908 /* Add all builtins that are more or less simple operations on 1 operand. */
16909 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16910 {
16911 enum machine_mode mode;
16912 tree type;
16913
16914 if (d->name == 0)
16915 continue;
16916 mode = insn_data[d->icode].operand[1].mode;
16917
16918 switch (mode)
16919 {
16920 case V16QImode:
16921 type = v16qi_ftype_v16qi;
16922 break;
16923 case V8HImode:
16924 type = v8hi_ftype_v8hi;
16925 break;
16926 case V4SImode:
16927 type = v4si_ftype_v4si;
16928 break;
16929 case V2DFmode:
16930 type = v2df_ftype_v2df;
16931 break;
16932 case V4SFmode:
16933 type = v4sf_ftype_v4sf;
16934 break;
16935 case V8QImode:
16936 type = v8qi_ftype_v8qi;
16937 break;
16938 case V4HImode:
16939 type = v4hi_ftype_v4hi;
16940 break;
16941 case V2SImode:
16942 type = v2si_ftype_v2si;
16943 break;
16944
16945 default:
16946 abort ();
16947 }
16948
16949 def_builtin (d->mask, d->name, type, d->code);
16950 }
16951
16952 /* Add the remaining MMX insns with somewhat more complicated types. */
16953 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16954 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16955 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16956 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16957
16958 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16959 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16960 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16961
16962 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16963 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16964
16965 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16966 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16967
16968 /* comi/ucomi insns. */
16969 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16970 if (d->mask == MASK_SSE2)
16971 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16972 else
16973 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16974
16975 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16976 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16977 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16978
16979 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16980 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16981 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16982 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16983 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16984 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16985 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16986 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16987 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16988 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16989 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16990
16991 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16992
16993 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16994 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16995
16996 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16997 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
16998 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
16999 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17000
17001 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17002 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17003 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17004 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17005
17006 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17007
17008 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17009
17010 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17011 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17012 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17013 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17014 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17015 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17016
17017 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17018
17019 /* Original 3DNow! */
17020 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17021 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17022 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17023 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17024 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17025 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17026 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17027 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17028 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17029 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17030 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17031 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17032 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17033 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17034 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17035 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17036 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17037 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17038 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17039 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17040
17041 /* 3DNow! extension as used in the Athlon CPU. */
17042 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17043 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17044 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17045 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17046 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17047 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17048
17049 /* SSE2 */
17050 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17051
17052 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17053 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17054
17055 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17056 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17057
17058 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17059 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17060 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17061 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17062 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17063
17064 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17065 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17066 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17067 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17068
17069 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17070 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17071
17072 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17073
17074 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17075 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17076
17077 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17078 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17079 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17080 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17081 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17082
17083 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17084
17085 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17086 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17087 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17088 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17089
17090 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17091 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17092 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17093
17094 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17095 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17096 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17097 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17098
17099 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17100 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17101 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17102
17103 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17104 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17105
17106 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17107 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17108
17109 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17110 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17111 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17112
17113 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17114 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17115 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17116
17117 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17118 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17119
17120 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17121 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17122 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17123 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17124
17125 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17126 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17127 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17128 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17129
17130 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17131 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17132
17133 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17134
17135 /* Prescott New Instructions. */
17136 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17137 void_ftype_pcvoid_unsigned_unsigned,
17138 IX86_BUILTIN_MONITOR);
17139 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17140 void_ftype_unsigned_unsigned,
17141 IX86_BUILTIN_MWAIT);
17142 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
17143 v4sf_ftype_v4sf,
17144 IX86_BUILTIN_MOVSHDUP);
17145 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
17146 v4sf_ftype_v4sf,
17147 IX86_BUILTIN_MOVSLDUP);
17148 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17149 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17150
17151 /* SSSE3. */
17152 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17153 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17154 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17155 IX86_BUILTIN_PALIGNR);
17156
17157 /* AMDFAM10 SSE4A New built-ins */
17158 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17159 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17160 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17161 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17162 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17163 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17164 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17165 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17166 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17167 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17168 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17169 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17170
17171 /* Access to the vec_init patterns. */
17172 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17173 integer_type_node, NULL_TREE);
17174 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17175 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17176
17177 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17178 short_integer_type_node,
17179 short_integer_type_node,
17180 short_integer_type_node, NULL_TREE);
17181 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17182 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17183
17184 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17185 char_type_node, char_type_node,
17186 char_type_node, char_type_node,
17187 char_type_node, char_type_node,
17188 char_type_node, NULL_TREE);
17189 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17190 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17191
17192 /* Access to the vec_extract patterns. */
17193 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17194 integer_type_node, NULL_TREE);
17195 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17196 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17197
17198 ftype = build_function_type_list (long_long_integer_type_node,
17199 V2DI_type_node, integer_type_node,
17200 NULL_TREE);
17201 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17202 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17203
17204 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17205 integer_type_node, NULL_TREE);
17206 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17207 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17208
17209 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17210 integer_type_node, NULL_TREE);
17211 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17212 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17213
17214 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17215 integer_type_node, NULL_TREE);
17216 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17217 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17218
17219 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17220 integer_type_node, NULL_TREE);
17221 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17222 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17223
17224 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17225 integer_type_node, NULL_TREE);
17226 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17227 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17228
17229 /* Access to the vec_set patterns. */
17230 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17231 intHI_type_node,
17232 integer_type_node, NULL_TREE);
17233 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17234 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17235
17236 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17237 intHI_type_node,
17238 integer_type_node, NULL_TREE);
17239 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17240 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17241 }
17242
17243 /* Errors in the source file can cause expand_expr to return const0_rtx
17244 where we expect a vector. To avoid crashing, use one of the vector
17245 clear instructions. */
17246 static rtx
17247 safe_vector_operand (rtx x, enum machine_mode mode)
17248 {
17249 if (x == const0_rtx)
17250 x = CONST0_RTX (mode);
17251 return x;
17252 }
17253
17254 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17255
17256 static rtx
17257 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17258 {
17259 rtx pat, xops[3];
17260 tree arg0 = CALL_EXPR_ARG (exp, 0);
17261 tree arg1 = CALL_EXPR_ARG (exp, 1);
17262 rtx op0 = expand_normal (arg0);
17263 rtx op1 = expand_normal (arg1);
17264 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17265 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17266 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17267
17268 if (VECTOR_MODE_P (mode0))
17269 op0 = safe_vector_operand (op0, mode0);
17270 if (VECTOR_MODE_P (mode1))
17271 op1 = safe_vector_operand (op1, mode1);
17272
17273 if (optimize || !target
17274 || GET_MODE (target) != tmode
17275 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17276 target = gen_reg_rtx (tmode);
17277
17278 if (GET_MODE (op1) == SImode && mode1 == TImode)
17279 {
17280 rtx x = gen_reg_rtx (V4SImode);
17281 emit_insn (gen_sse2_loadd (x, op1));
17282 op1 = gen_lowpart (TImode, x);
17283 }
17284
17285 /* The insn must want input operands in the same modes as the
17286 result. */
17287 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17288 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17289
17290 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17291 op0 = copy_to_mode_reg (mode0, op0);
17292 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17293 op1 = copy_to_mode_reg (mode1, op1);
17294
17295 /* ??? Using ix86_fixup_binary_operands is problematic when
17296 we've got mismatched modes. Fake it. */
17297
17298 xops[0] = target;
17299 xops[1] = op0;
17300 xops[2] = op1;
17301
17302 if (tmode == mode0 && tmode == mode1)
17303 {
17304 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17305 op0 = xops[1];
17306 op1 = xops[2];
17307 }
17308 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17309 {
17310 op0 = force_reg (mode0, op0);
17311 op1 = force_reg (mode1, op1);
17312 target = gen_reg_rtx (tmode);
17313 }
17314
17315 pat = GEN_FCN (icode) (target, op0, op1);
17316 if (! pat)
17317 return 0;
17318 emit_insn (pat);
17319 return target;
17320 }
17321
17322 /* Subroutine of ix86_expand_builtin to take care of stores. */
17323
17324 static rtx
17325 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17326 {
17327 rtx pat;
17328 tree arg0 = CALL_EXPR_ARG (exp, 0);
17329 tree arg1 = CALL_EXPR_ARG (exp, 1);
17330 rtx op0 = expand_normal (arg0);
17331 rtx op1 = expand_normal (arg1);
17332 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17333 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17334
17335 if (VECTOR_MODE_P (mode1))
17336 op1 = safe_vector_operand (op1, mode1);
17337
17338 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17339 op1 = copy_to_mode_reg (mode1, op1);
17340
17341 pat = GEN_FCN (icode) (op0, op1);
17342 if (pat)
17343 emit_insn (pat);
17344 return 0;
17345 }
17346
17347 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17348
17349 static rtx
17350 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17351 rtx target, int do_load)
17352 {
17353 rtx pat;
17354 tree arg0 = CALL_EXPR_ARG (exp, 0);
17355 rtx op0 = expand_normal (arg0);
17356 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17357 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17358
17359 if (optimize || !target
17360 || GET_MODE (target) != tmode
17361 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17362 target = gen_reg_rtx (tmode);
17363 if (do_load)
17364 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17365 else
17366 {
17367 if (VECTOR_MODE_P (mode0))
17368 op0 = safe_vector_operand (op0, mode0);
17369
17370 if ((optimize && !register_operand (op0, mode0))
17371 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17372 op0 = copy_to_mode_reg (mode0, op0);
17373 }
17374
17375 pat = GEN_FCN (icode) (target, op0);
17376 if (! pat)
17377 return 0;
17378 emit_insn (pat);
17379 return target;
17380 }
17381
17382 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17383 sqrtss, rsqrtss, rcpss. */
17384
17385 static rtx
17386 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17387 {
17388 rtx pat;
17389 tree arg0 = CALL_EXPR_ARG (exp, 0);
17390 rtx op1, op0 = expand_normal (arg0);
17391 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17392 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17393
17394 if (optimize || !target
17395 || GET_MODE (target) != tmode
17396 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17397 target = gen_reg_rtx (tmode);
17398
17399 if (VECTOR_MODE_P (mode0))
17400 op0 = safe_vector_operand (op0, mode0);
17401
17402 if ((optimize && !register_operand (op0, mode0))
17403 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17404 op0 = copy_to_mode_reg (mode0, op0);
17405
17406 op1 = op0;
17407 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17408 op1 = copy_to_mode_reg (mode0, op1);
17409
17410 pat = GEN_FCN (icode) (target, op0, op1);
17411 if (! pat)
17412 return 0;
17413 emit_insn (pat);
17414 return target;
17415 }
17416
17417 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17418
17419 static rtx
17420 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17421 rtx target)
17422 {
17423 rtx pat;
17424 tree arg0 = CALL_EXPR_ARG (exp, 0);
17425 tree arg1 = CALL_EXPR_ARG (exp, 1);
17426 rtx op0 = expand_normal (arg0);
17427 rtx op1 = expand_normal (arg1);
17428 rtx op2;
17429 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17430 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17431 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17432 enum rtx_code comparison = d->comparison;
17433
17434 if (VECTOR_MODE_P (mode0))
17435 op0 = safe_vector_operand (op0, mode0);
17436 if (VECTOR_MODE_P (mode1))
17437 op1 = safe_vector_operand (op1, mode1);
17438
17439 /* Swap operands if we have a comparison that isn't available in
17440 hardware. */
17441 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17442 {
17443 rtx tmp = gen_reg_rtx (mode1);
17444 emit_move_insn (tmp, op1);
17445 op1 = op0;
17446 op0 = tmp;
17447 }
17448
17449 if (optimize || !target
17450 || GET_MODE (target) != tmode
17451 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17452 target = gen_reg_rtx (tmode);
17453
17454 if ((optimize && !register_operand (op0, mode0))
17455 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17456 op0 = copy_to_mode_reg (mode0, op0);
17457 if ((optimize && !register_operand (op1, mode1))
17458 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17459 op1 = copy_to_mode_reg (mode1, op1);
17460
17461 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17462 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17463 if (! pat)
17464 return 0;
17465 emit_insn (pat);
17466 return target;
17467 }
17468
17469 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17470
17471 static rtx
17472 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17473 rtx target)
17474 {
17475 rtx pat;
17476 tree arg0 = CALL_EXPR_ARG (exp, 0);
17477 tree arg1 = CALL_EXPR_ARG (exp, 1);
17478 rtx op0 = expand_normal (arg0);
17479 rtx op1 = expand_normal (arg1);
17480 rtx op2;
17481 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17482 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17483 enum rtx_code comparison = d->comparison;
17484
17485 if (VECTOR_MODE_P (mode0))
17486 op0 = safe_vector_operand (op0, mode0);
17487 if (VECTOR_MODE_P (mode1))
17488 op1 = safe_vector_operand (op1, mode1);
17489
17490 /* Swap operands if we have a comparison that isn't available in
17491 hardware. */
17492 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17493 {
17494 rtx tmp = op1;
17495 op1 = op0;
17496 op0 = tmp;
17497 }
17498
17499 target = gen_reg_rtx (SImode);
17500 emit_move_insn (target, const0_rtx);
17501 target = gen_rtx_SUBREG (QImode, target, 0);
17502
17503 if ((optimize && !register_operand (op0, mode0))
17504 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17505 op0 = copy_to_mode_reg (mode0, op0);
17506 if ((optimize && !register_operand (op1, mode1))
17507 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17508 op1 = copy_to_mode_reg (mode1, op1);
17509
17510 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17511 pat = GEN_FCN (d->icode) (op0, op1);
17512 if (! pat)
17513 return 0;
17514 emit_insn (pat);
17515 emit_insn (gen_rtx_SET (VOIDmode,
17516 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17517 gen_rtx_fmt_ee (comparison, QImode,
17518 SET_DEST (pat),
17519 const0_rtx)));
17520
17521 return SUBREG_REG (target);
17522 }
17523
17524 /* Return the integer constant in ARG. Constrain it to be in the range
17525 of the subparts of VEC_TYPE; issue an error if not. */
17526
17527 static int
17528 get_element_number (tree vec_type, tree arg)
17529 {
17530 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17531
17532 if (!host_integerp (arg, 1)
17533 || (elt = tree_low_cst (arg, 1), elt > max))
17534 {
17535 error ("selector must be an integer constant in the range 0..%wi", max);
17536 return 0;
17537 }
17538
17539 return elt;
17540 }
17541
17542 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17543 ix86_expand_vector_init. We DO have language-level syntax for this, in
17544 the form of (type){ init-list }. Except that since we can't place emms
17545 instructions from inside the compiler, we can't allow the use of MMX
17546 registers unless the user explicitly asks for it. So we do *not* define
17547 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17548 we have builtins invoked by mmintrin.h that gives us license to emit
17549 these sorts of instructions. */
17550
17551 static rtx
17552 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17553 {
17554 enum machine_mode tmode = TYPE_MODE (type);
17555 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17556 int i, n_elt = GET_MODE_NUNITS (tmode);
17557 rtvec v = rtvec_alloc (n_elt);
17558
17559 gcc_assert (VECTOR_MODE_P (tmode));
17560 gcc_assert (call_expr_nargs (exp) == n_elt);
17561
17562 for (i = 0; i < n_elt; ++i)
17563 {
17564 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17565 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17566 }
17567
17568 if (!target || !register_operand (target, tmode))
17569 target = gen_reg_rtx (tmode);
17570
17571 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17572 return target;
17573 }
17574
17575 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17576 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17577 had a language-level syntax for referencing vector elements. */
17578
17579 static rtx
17580 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17581 {
17582 enum machine_mode tmode, mode0;
17583 tree arg0, arg1;
17584 int elt;
17585 rtx op0;
17586
17587 arg0 = CALL_EXPR_ARG (exp, 0);
17588 arg1 = CALL_EXPR_ARG (exp, 1);
17589
17590 op0 = expand_normal (arg0);
17591 elt = get_element_number (TREE_TYPE (arg0), arg1);
17592
17593 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17594 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17595 gcc_assert (VECTOR_MODE_P (mode0));
17596
17597 op0 = force_reg (mode0, op0);
17598
17599 if (optimize || !target || !register_operand (target, tmode))
17600 target = gen_reg_rtx (tmode);
17601
17602 ix86_expand_vector_extract (true, target, op0, elt);
17603
17604 return target;
17605 }
17606
17607 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17608 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17609 a language-level syntax for referencing vector elements. */
17610
17611 static rtx
17612 ix86_expand_vec_set_builtin (tree exp)
17613 {
17614 enum machine_mode tmode, mode1;
17615 tree arg0, arg1, arg2;
17616 int elt;
17617 rtx op0, op1;
17618
17619 arg0 = CALL_EXPR_ARG (exp, 0);
17620 arg1 = CALL_EXPR_ARG (exp, 1);
17621 arg2 = CALL_EXPR_ARG (exp, 2);
17622
17623 tmode = TYPE_MODE (TREE_TYPE (arg0));
17624 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17625 gcc_assert (VECTOR_MODE_P (tmode));
17626
17627 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17628 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17629 elt = get_element_number (TREE_TYPE (arg0), arg2);
17630
17631 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17632 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17633
17634 op0 = force_reg (tmode, op0);
17635 op1 = force_reg (mode1, op1);
17636
17637 ix86_expand_vector_set (true, op0, op1, elt);
17638
17639 return op0;
17640 }
17641
17642 /* Expand an expression EXP that calls a built-in function,
17643 with result going to TARGET if that's convenient
17644 (and in mode MODE if that's convenient).
17645 SUBTARGET may be used as the target for computing one of EXP's operands.
17646 IGNORE is nonzero if the value is to be ignored. */
17647
17648 static rtx
17649 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17650 enum machine_mode mode ATTRIBUTE_UNUSED,
17651 int ignore ATTRIBUTE_UNUSED)
17652 {
17653 const struct builtin_description *d;
17654 size_t i;
17655 enum insn_code icode;
17656 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17657 tree arg0, arg1, arg2, arg3;
17658 rtx op0, op1, op2, op3, pat;
17659 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17660 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17661
17662 switch (fcode)
17663 {
17664 case IX86_BUILTIN_EMMS:
17665 emit_insn (gen_mmx_emms ());
17666 return 0;
17667
17668 case IX86_BUILTIN_SFENCE:
17669 emit_insn (gen_sse_sfence ());
17670 return 0;
17671
17672 case IX86_BUILTIN_MASKMOVQ:
17673 case IX86_BUILTIN_MASKMOVDQU:
17674 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17675 ? CODE_FOR_mmx_maskmovq
17676 : CODE_FOR_sse2_maskmovdqu);
17677 /* Note the arg order is different from the operand order. */
17678 arg1 = CALL_EXPR_ARG (exp, 0);
17679 arg2 = CALL_EXPR_ARG (exp, 1);
17680 arg0 = CALL_EXPR_ARG (exp, 2);
17681 op0 = expand_normal (arg0);
17682 op1 = expand_normal (arg1);
17683 op2 = expand_normal (arg2);
17684 mode0 = insn_data[icode].operand[0].mode;
17685 mode1 = insn_data[icode].operand[1].mode;
17686 mode2 = insn_data[icode].operand[2].mode;
17687
17688 op0 = force_reg (Pmode, op0);
17689 op0 = gen_rtx_MEM (mode1, op0);
17690
17691 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17692 op0 = copy_to_mode_reg (mode0, op0);
17693 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17694 op1 = copy_to_mode_reg (mode1, op1);
17695 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17696 op2 = copy_to_mode_reg (mode2, op2);
17697 pat = GEN_FCN (icode) (op0, op1, op2);
17698 if (! pat)
17699 return 0;
17700 emit_insn (pat);
17701 return 0;
17702
17703 case IX86_BUILTIN_SQRTSS:
17704 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17705 case IX86_BUILTIN_RSQRTSS:
17706 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17707 case IX86_BUILTIN_RCPSS:
17708 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17709
17710 case IX86_BUILTIN_LOADUPS:
17711 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17712
17713 case IX86_BUILTIN_STOREUPS:
17714 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17715
17716 case IX86_BUILTIN_LOADHPS:
17717 case IX86_BUILTIN_LOADLPS:
17718 case IX86_BUILTIN_LOADHPD:
17719 case IX86_BUILTIN_LOADLPD:
17720 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17721 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17722 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17723 : CODE_FOR_sse2_loadlpd);
17724 arg0 = CALL_EXPR_ARG (exp, 0);
17725 arg1 = CALL_EXPR_ARG (exp, 1);
17726 op0 = expand_normal (arg0);
17727 op1 = expand_normal (arg1);
17728 tmode = insn_data[icode].operand[0].mode;
17729 mode0 = insn_data[icode].operand[1].mode;
17730 mode1 = insn_data[icode].operand[2].mode;
17731
17732 op0 = force_reg (mode0, op0);
17733 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17734 if (optimize || target == 0
17735 || GET_MODE (target) != tmode
17736 || !register_operand (target, tmode))
17737 target = gen_reg_rtx (tmode);
17738 pat = GEN_FCN (icode) (target, op0, op1);
17739 if (! pat)
17740 return 0;
17741 emit_insn (pat);
17742 return target;
17743
17744 case IX86_BUILTIN_STOREHPS:
17745 case IX86_BUILTIN_STORELPS:
17746 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17747 : CODE_FOR_sse_storelps);
17748 arg0 = CALL_EXPR_ARG (exp, 0);
17749 arg1 = CALL_EXPR_ARG (exp, 1);
17750 op0 = expand_normal (arg0);
17751 op1 = expand_normal (arg1);
17752 mode0 = insn_data[icode].operand[0].mode;
17753 mode1 = insn_data[icode].operand[1].mode;
17754
17755 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17756 op1 = force_reg (mode1, op1);
17757
17758 pat = GEN_FCN (icode) (op0, op1);
17759 if (! pat)
17760 return 0;
17761 emit_insn (pat);
17762 return const0_rtx;
17763
17764 case IX86_BUILTIN_MOVNTPS:
17765 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17766 case IX86_BUILTIN_MOVNTQ:
17767 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17768
17769 case IX86_BUILTIN_LDMXCSR:
17770 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17771 target = assign_386_stack_local (SImode, SLOT_TEMP);
17772 emit_move_insn (target, op0);
17773 emit_insn (gen_sse_ldmxcsr (target));
17774 return 0;
17775
17776 case IX86_BUILTIN_STMXCSR:
17777 target = assign_386_stack_local (SImode, SLOT_TEMP);
17778 emit_insn (gen_sse_stmxcsr (target));
17779 return copy_to_mode_reg (SImode, target);
17780
17781 case IX86_BUILTIN_SHUFPS:
17782 case IX86_BUILTIN_SHUFPD:
17783 icode = (fcode == IX86_BUILTIN_SHUFPS
17784 ? CODE_FOR_sse_shufps
17785 : CODE_FOR_sse2_shufpd);
17786 arg0 = CALL_EXPR_ARG (exp, 0);
17787 arg1 = CALL_EXPR_ARG (exp, 1);
17788 arg2 = CALL_EXPR_ARG (exp, 2);
17789 op0 = expand_normal (arg0);
17790 op1 = expand_normal (arg1);
17791 op2 = expand_normal (arg2);
17792 tmode = insn_data[icode].operand[0].mode;
17793 mode0 = insn_data[icode].operand[1].mode;
17794 mode1 = insn_data[icode].operand[2].mode;
17795 mode2 = insn_data[icode].operand[3].mode;
17796
17797 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17798 op0 = copy_to_mode_reg (mode0, op0);
17799 if ((optimize && !register_operand (op1, mode1))
17800 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17801 op1 = copy_to_mode_reg (mode1, op1);
17802 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17803 {
17804 /* @@@ better error message */
17805 error ("mask must be an immediate");
17806 return gen_reg_rtx (tmode);
17807 }
17808 if (optimize || target == 0
17809 || GET_MODE (target) != tmode
17810 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17811 target = gen_reg_rtx (tmode);
17812 pat = GEN_FCN (icode) (target, op0, op1, op2);
17813 if (! pat)
17814 return 0;
17815 emit_insn (pat);
17816 return target;
17817
17818 case IX86_BUILTIN_PSHUFW:
17819 case IX86_BUILTIN_PSHUFD:
17820 case IX86_BUILTIN_PSHUFHW:
17821 case IX86_BUILTIN_PSHUFLW:
17822 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17823 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17824 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17825 : CODE_FOR_mmx_pshufw);
17826 arg0 = CALL_EXPR_ARG (exp, 0);
17827 arg1 = CALL_EXPR_ARG (exp, 1);
17828 op0 = expand_normal (arg0);
17829 op1 = expand_normal (arg1);
17830 tmode = insn_data[icode].operand[0].mode;
17831 mode1 = insn_data[icode].operand[1].mode;
17832 mode2 = insn_data[icode].operand[2].mode;
17833
17834 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17835 op0 = copy_to_mode_reg (mode1, op0);
17836 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17837 {
17838 /* @@@ better error message */
17839 error ("mask must be an immediate");
17840 return const0_rtx;
17841 }
17842 if (target == 0
17843 || GET_MODE (target) != tmode
17844 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17845 target = gen_reg_rtx (tmode);
17846 pat = GEN_FCN (icode) (target, op0, op1);
17847 if (! pat)
17848 return 0;
17849 emit_insn (pat);
17850 return target;
17851
17852 case IX86_BUILTIN_PSLLDQI128:
17853 case IX86_BUILTIN_PSRLDQI128:
17854 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17855 : CODE_FOR_sse2_lshrti3);
17856 arg0 = CALL_EXPR_ARG (exp, 0);
17857 arg1 = CALL_EXPR_ARG (exp, 1);
17858 op0 = expand_normal (arg0);
17859 op1 = expand_normal (arg1);
17860 tmode = insn_data[icode].operand[0].mode;
17861 mode1 = insn_data[icode].operand[1].mode;
17862 mode2 = insn_data[icode].operand[2].mode;
17863
17864 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17865 {
17866 op0 = copy_to_reg (op0);
17867 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17868 }
17869 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17870 {
17871 error ("shift must be an immediate");
17872 return const0_rtx;
17873 }
17874 target = gen_reg_rtx (V2DImode);
17875 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17876 if (! pat)
17877 return 0;
17878 emit_insn (pat);
17879 return target;
17880
17881 case IX86_BUILTIN_FEMMS:
17882 emit_insn (gen_mmx_femms ());
17883 return NULL_RTX;
17884
17885 case IX86_BUILTIN_PAVGUSB:
17886 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
17887
17888 case IX86_BUILTIN_PF2ID:
17889 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
17890
17891 case IX86_BUILTIN_PFACC:
17892 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
17893
17894 case IX86_BUILTIN_PFADD:
17895 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
17896
17897 case IX86_BUILTIN_PFCMPEQ:
17898 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
17899
17900 case IX86_BUILTIN_PFCMPGE:
17901 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
17902
17903 case IX86_BUILTIN_PFCMPGT:
17904 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
17905
17906 case IX86_BUILTIN_PFMAX:
17907 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
17908
17909 case IX86_BUILTIN_PFMIN:
17910 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
17911
17912 case IX86_BUILTIN_PFMUL:
17913 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
17914
17915 case IX86_BUILTIN_PFRCP:
17916 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
17917
17918 case IX86_BUILTIN_PFRCPIT1:
17919 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
17920
17921 case IX86_BUILTIN_PFRCPIT2:
17922 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
17923
17924 case IX86_BUILTIN_PFRSQIT1:
17925 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
17926
17927 case IX86_BUILTIN_PFRSQRT:
17928 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
17929
17930 case IX86_BUILTIN_PFSUB:
17931 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
17932
17933 case IX86_BUILTIN_PFSUBR:
17934 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
17935
17936 case IX86_BUILTIN_PI2FD:
17937 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
17938
17939 case IX86_BUILTIN_PMULHRW:
17940 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
17941
17942 case IX86_BUILTIN_PF2IW:
17943 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
17944
17945 case IX86_BUILTIN_PFNACC:
17946 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
17947
17948 case IX86_BUILTIN_PFPNACC:
17949 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
17950
17951 case IX86_BUILTIN_PI2FW:
17952 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
17953
17954 case IX86_BUILTIN_PSWAPDSI:
17955 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
17956
17957 case IX86_BUILTIN_PSWAPDSF:
17958 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
17959
17960 case IX86_BUILTIN_SQRTSD:
17961 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
17962 case IX86_BUILTIN_LOADUPD:
17963 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
17964 case IX86_BUILTIN_STOREUPD:
17965 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
17966
17967 case IX86_BUILTIN_MFENCE:
17968 emit_insn (gen_sse2_mfence ());
17969 return 0;
17970 case IX86_BUILTIN_LFENCE:
17971 emit_insn (gen_sse2_lfence ());
17972 return 0;
17973
17974 case IX86_BUILTIN_CLFLUSH:
17975 arg0 = CALL_EXPR_ARG (exp, 0);
17976 op0 = expand_normal (arg0);
17977 icode = CODE_FOR_sse2_clflush;
17978 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
17979 op0 = copy_to_mode_reg (Pmode, op0);
17980
17981 emit_insn (gen_sse2_clflush (op0));
17982 return 0;
17983
17984 case IX86_BUILTIN_MOVNTPD:
17985 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
17986 case IX86_BUILTIN_MOVNTDQ:
17987 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
17988 case IX86_BUILTIN_MOVNTI:
17989 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
17990
17991 case IX86_BUILTIN_LOADDQU:
17992 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
17993 case IX86_BUILTIN_STOREDQU:
17994 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
17995
17996 case IX86_BUILTIN_MONITOR:
17997 arg0 = CALL_EXPR_ARG (exp, 0);
17998 arg1 = CALL_EXPR_ARG (exp, 1);
17999 arg2 = CALL_EXPR_ARG (exp, 2);
18000 op0 = expand_normal (arg0);
18001 op1 = expand_normal (arg1);
18002 op2 = expand_normal (arg2);
18003 if (!REG_P (op0))
18004 op0 = copy_to_mode_reg (Pmode, op0);
18005 if (!REG_P (op1))
18006 op1 = copy_to_mode_reg (SImode, op1);
18007 if (!REG_P (op2))
18008 op2 = copy_to_mode_reg (SImode, op2);
18009 if (!TARGET_64BIT)
18010 emit_insn (gen_sse3_monitor (op0, op1, op2));
18011 else
18012 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18013 return 0;
18014
18015 case IX86_BUILTIN_MWAIT:
18016 arg0 = CALL_EXPR_ARG (exp, 0);
18017 arg1 = CALL_EXPR_ARG (exp, 1);
18018 op0 = expand_normal (arg0);
18019 op1 = expand_normal (arg1);
18020 if (!REG_P (op0))
18021 op0 = copy_to_mode_reg (SImode, op0);
18022 if (!REG_P (op1))
18023 op1 = copy_to_mode_reg (SImode, op1);
18024 emit_insn (gen_sse3_mwait (op0, op1));
18025 return 0;
18026
18027 case IX86_BUILTIN_LDDQU:
18028 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18029 target, 1);
18030
18031 case IX86_BUILTIN_PALIGNR:
18032 case IX86_BUILTIN_PALIGNR128:
18033 if (fcode == IX86_BUILTIN_PALIGNR)
18034 {
18035 icode = CODE_FOR_ssse3_palignrdi;
18036 mode = DImode;
18037 }
18038 else
18039 {
18040 icode = CODE_FOR_ssse3_palignrti;
18041 mode = V2DImode;
18042 }
18043 arg0 = CALL_EXPR_ARG (exp, 0);
18044 arg1 = CALL_EXPR_ARG (exp, 1);
18045 arg2 = CALL_EXPR_ARG (exp, 2);
18046 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18047 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18048 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18049 tmode = insn_data[icode].operand[0].mode;
18050 mode1 = insn_data[icode].operand[1].mode;
18051 mode2 = insn_data[icode].operand[2].mode;
18052 mode3 = insn_data[icode].operand[3].mode;
18053
18054 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18055 {
18056 op0 = copy_to_reg (op0);
18057 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18058 }
18059 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18060 {
18061 op1 = copy_to_reg (op1);
18062 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18063 }
18064 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18065 {
18066 error ("shift must be an immediate");
18067 return const0_rtx;
18068 }
18069 target = gen_reg_rtx (mode);
18070 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18071 op0, op1, op2);
18072 if (! pat)
18073 return 0;
18074 emit_insn (pat);
18075 return target;
18076
18077 case IX86_BUILTIN_MOVNTSD:
18078 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18079
18080 case IX86_BUILTIN_MOVNTSS:
18081 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18082
18083 case IX86_BUILTIN_INSERTQ:
18084 case IX86_BUILTIN_EXTRQ:
18085 icode = (fcode == IX86_BUILTIN_EXTRQ
18086 ? CODE_FOR_sse4a_extrq
18087 : CODE_FOR_sse4a_insertq);
18088 arg0 = CALL_EXPR_ARG (exp, 0);
18089 arg1 = CALL_EXPR_ARG (exp, 1);
18090 op0 = expand_normal (arg0);
18091 op1 = expand_normal (arg1);
18092 tmode = insn_data[icode].operand[0].mode;
18093 mode1 = insn_data[icode].operand[1].mode;
18094 mode2 = insn_data[icode].operand[2].mode;
18095 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18096 op0 = copy_to_mode_reg (mode1, op0);
18097 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18098 op1 = copy_to_mode_reg (mode2, op1);
18099 if (optimize || target == 0
18100 || GET_MODE (target) != tmode
18101 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18102 target = gen_reg_rtx (tmode);
18103 pat = GEN_FCN (icode) (target, op0, op1);
18104 if (! pat)
18105 return NULL_RTX;
18106 emit_insn (pat);
18107 return target;
18108
18109 case IX86_BUILTIN_EXTRQI:
18110 icode = CODE_FOR_sse4a_extrqi;
18111 arg0 = CALL_EXPR_ARG (exp, 0);
18112 arg1 = CALL_EXPR_ARG (exp, 1);
18113 arg2 = CALL_EXPR_ARG (exp, 2);
18114 op0 = expand_normal (arg0);
18115 op1 = expand_normal (arg1);
18116 op2 = expand_normal (arg2);
18117 tmode = insn_data[icode].operand[0].mode;
18118 mode1 = insn_data[icode].operand[1].mode;
18119 mode2 = insn_data[icode].operand[2].mode;
18120 mode3 = insn_data[icode].operand[3].mode;
18121 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18122 op0 = copy_to_mode_reg (mode1, op0);
18123 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18124 {
18125 error ("index mask must be an immediate");
18126 return gen_reg_rtx (tmode);
18127 }
18128 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18129 {
18130 error ("length mask must be an immediate");
18131 return gen_reg_rtx (tmode);
18132 }
18133 if (optimize || target == 0
18134 || GET_MODE (target) != tmode
18135 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18136 target = gen_reg_rtx (tmode);
18137 pat = GEN_FCN (icode) (target, op0, op1, op2);
18138 if (! pat)
18139 return NULL_RTX;
18140 emit_insn (pat);
18141 return target;
18142
18143 case IX86_BUILTIN_INSERTQI:
18144 icode = CODE_FOR_sse4a_insertqi;
18145 arg0 = CALL_EXPR_ARG (exp, 0);
18146 arg1 = CALL_EXPR_ARG (exp, 1);
18147 arg2 = CALL_EXPR_ARG (exp, 2);
18148 arg3 = CALL_EXPR_ARG (exp, 3);
18149 op0 = expand_normal (arg0);
18150 op1 = expand_normal (arg1);
18151 op2 = expand_normal (arg2);
18152 op3 = expand_normal (arg3);
18153 tmode = insn_data[icode].operand[0].mode;
18154 mode1 = insn_data[icode].operand[1].mode;
18155 mode2 = insn_data[icode].operand[2].mode;
18156 mode3 = insn_data[icode].operand[3].mode;
18157 mode4 = insn_data[icode].operand[4].mode;
18158
18159 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18160 op0 = copy_to_mode_reg (mode1, op0);
18161
18162 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18163 op1 = copy_to_mode_reg (mode2, op1);
18164
18165 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18166 {
18167 error ("index mask must be an immediate");
18168 return gen_reg_rtx (tmode);
18169 }
18170 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18171 {
18172 error ("length mask must be an immediate");
18173 return gen_reg_rtx (tmode);
18174 }
18175 if (optimize || target == 0
18176 || GET_MODE (target) != tmode
18177 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18178 target = gen_reg_rtx (tmode);
18179 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18180 if (! pat)
18181 return NULL_RTX;
18182 emit_insn (pat);
18183 return target;
18184
18185 case IX86_BUILTIN_VEC_INIT_V2SI:
18186 case IX86_BUILTIN_VEC_INIT_V4HI:
18187 case IX86_BUILTIN_VEC_INIT_V8QI:
18188 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18189
18190 case IX86_BUILTIN_VEC_EXT_V2DF:
18191 case IX86_BUILTIN_VEC_EXT_V2DI:
18192 case IX86_BUILTIN_VEC_EXT_V4SF:
18193 case IX86_BUILTIN_VEC_EXT_V4SI:
18194 case IX86_BUILTIN_VEC_EXT_V8HI:
18195 case IX86_BUILTIN_VEC_EXT_V2SI:
18196 case IX86_BUILTIN_VEC_EXT_V4HI:
18197 return ix86_expand_vec_ext_builtin (exp, target);
18198
18199 case IX86_BUILTIN_VEC_SET_V8HI:
18200 case IX86_BUILTIN_VEC_SET_V4HI:
18201 return ix86_expand_vec_set_builtin (exp);
18202
18203 default:
18204 break;
18205 }
18206
18207 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18208 if (d->code == fcode)
18209 {
18210 /* Compares are treated specially. */
18211 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18212 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18213 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18214 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18215 return ix86_expand_sse_compare (d, exp, target);
18216
18217 return ix86_expand_binop_builtin (d->icode, exp, target);
18218 }
18219
18220 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18221 if (d->code == fcode)
18222 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18223
18224 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18225 if (d->code == fcode)
18226 return ix86_expand_sse_comi (d, exp, target);
18227
18228 gcc_unreachable ();
18229 }
18230
18231 /* Returns a function decl for a vectorized version of the builtin function
18232 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18233 if it is not available. */
18234
18235 static tree
18236 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18237 tree type_in)
18238 {
18239 enum machine_mode in_mode, out_mode;
18240 int in_n, out_n;
18241
18242 if (TREE_CODE (type_out) != VECTOR_TYPE
18243 || TREE_CODE (type_in) != VECTOR_TYPE)
18244 return NULL_TREE;
18245
18246 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18247 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18248 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18249 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18250
18251 switch (fn)
18252 {
18253 case BUILT_IN_SQRT:
18254 if (out_mode == DFmode && out_n == 2
18255 && in_mode == DFmode && in_n == 2)
18256 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18257 return NULL_TREE;
18258
18259 case BUILT_IN_SQRTF:
18260 if (out_mode == SFmode && out_n == 4
18261 && in_mode == SFmode && in_n == 4)
18262 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18263 return NULL_TREE;
18264
18265 case BUILT_IN_LRINTF:
18266 if (out_mode == SImode && out_n == 4
18267 && in_mode == SFmode && in_n == 4)
18268 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18269 return NULL_TREE;
18270
18271 default:
18272 ;
18273 }
18274
18275 return NULL_TREE;
18276 }
18277
18278 /* Returns a decl of a function that implements conversion of the
18279 input vector of type TYPE, or NULL_TREE if it is not available. */
18280
18281 static tree
18282 ix86_builtin_conversion (enum tree_code code, tree type)
18283 {
18284 if (TREE_CODE (type) != VECTOR_TYPE)
18285 return NULL_TREE;
18286
18287 switch (code)
18288 {
18289 case FLOAT_EXPR:
18290 switch (TYPE_MODE (type))
18291 {
18292 case V4SImode:
18293 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18294 default:
18295 return NULL_TREE;
18296 }
18297
18298 case FIX_TRUNC_EXPR:
18299 switch (TYPE_MODE (type))
18300 {
18301 case V4SFmode:
18302 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18303 default:
18304 return NULL_TREE;
18305 }
18306 default:
18307 return NULL_TREE;
18308
18309 }
18310 }
18311
18312 /* Store OPERAND to the memory after reload is completed. This means
18313 that we can't easily use assign_stack_local. */
18314 rtx
18315 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18316 {
18317 rtx result;
18318
18319 gcc_assert (reload_completed);
18320 if (TARGET_RED_ZONE)
18321 {
18322 result = gen_rtx_MEM (mode,
18323 gen_rtx_PLUS (Pmode,
18324 stack_pointer_rtx,
18325 GEN_INT (-RED_ZONE_SIZE)));
18326 emit_move_insn (result, operand);
18327 }
18328 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18329 {
18330 switch (mode)
18331 {
18332 case HImode:
18333 case SImode:
18334 operand = gen_lowpart (DImode, operand);
18335 /* FALLTHRU */
18336 case DImode:
18337 emit_insn (
18338 gen_rtx_SET (VOIDmode,
18339 gen_rtx_MEM (DImode,
18340 gen_rtx_PRE_DEC (DImode,
18341 stack_pointer_rtx)),
18342 operand));
18343 break;
18344 default:
18345 gcc_unreachable ();
18346 }
18347 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18348 }
18349 else
18350 {
18351 switch (mode)
18352 {
18353 case DImode:
18354 {
18355 rtx operands[2];
18356 split_di (&operand, 1, operands, operands + 1);
18357 emit_insn (
18358 gen_rtx_SET (VOIDmode,
18359 gen_rtx_MEM (SImode,
18360 gen_rtx_PRE_DEC (Pmode,
18361 stack_pointer_rtx)),
18362 operands[1]));
18363 emit_insn (
18364 gen_rtx_SET (VOIDmode,
18365 gen_rtx_MEM (SImode,
18366 gen_rtx_PRE_DEC (Pmode,
18367 stack_pointer_rtx)),
18368 operands[0]));
18369 }
18370 break;
18371 case HImode:
18372 /* Store HImodes as SImodes. */
18373 operand = gen_lowpart (SImode, operand);
18374 /* FALLTHRU */
18375 case SImode:
18376 emit_insn (
18377 gen_rtx_SET (VOIDmode,
18378 gen_rtx_MEM (GET_MODE (operand),
18379 gen_rtx_PRE_DEC (SImode,
18380 stack_pointer_rtx)),
18381 operand));
18382 break;
18383 default:
18384 gcc_unreachable ();
18385 }
18386 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18387 }
18388 return result;
18389 }
18390
18391 /* Free operand from the memory. */
18392 void
18393 ix86_free_from_memory (enum machine_mode mode)
18394 {
18395 if (!TARGET_RED_ZONE)
18396 {
18397 int size;
18398
18399 if (mode == DImode || TARGET_64BIT)
18400 size = 8;
18401 else
18402 size = 4;
18403 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18404 to pop or add instruction if registers are available. */
18405 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18406 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18407 GEN_INT (size))));
18408 }
18409 }
18410
18411 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18412 QImode must go into class Q_REGS.
18413 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18414 movdf to do mem-to-mem moves through integer regs. */
18415 enum reg_class
18416 ix86_preferred_reload_class (rtx x, enum reg_class class)
18417 {
18418 enum machine_mode mode = GET_MODE (x);
18419
18420 /* We're only allowed to return a subclass of CLASS. Many of the
18421 following checks fail for NO_REGS, so eliminate that early. */
18422 if (class == NO_REGS)
18423 return NO_REGS;
18424
18425 /* All classes can load zeros. */
18426 if (x == CONST0_RTX (mode))
18427 return class;
18428
18429 /* Force constants into memory if we are loading a (nonzero) constant into
18430 an MMX or SSE register. This is because there are no MMX/SSE instructions
18431 to load from a constant. */
18432 if (CONSTANT_P (x)
18433 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18434 return NO_REGS;
18435
18436 /* Prefer SSE regs only, if we can use them for math. */
18437 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18438 return SSE_CLASS_P (class) ? class : NO_REGS;
18439
18440 /* Floating-point constants need more complex checks. */
18441 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18442 {
18443 /* General regs can load everything. */
18444 if (reg_class_subset_p (class, GENERAL_REGS))
18445 return class;
18446
18447 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18448 zero above. We only want to wind up preferring 80387 registers if
18449 we plan on doing computation with them. */
18450 if (TARGET_80387
18451 && standard_80387_constant_p (x))
18452 {
18453 /* Limit class to non-sse. */
18454 if (class == FLOAT_SSE_REGS)
18455 return FLOAT_REGS;
18456 if (class == FP_TOP_SSE_REGS)
18457 return FP_TOP_REG;
18458 if (class == FP_SECOND_SSE_REGS)
18459 return FP_SECOND_REG;
18460 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18461 return class;
18462 }
18463
18464 return NO_REGS;
18465 }
18466
18467 /* Generally when we see PLUS here, it's the function invariant
18468 (plus soft-fp const_int). Which can only be computed into general
18469 regs. */
18470 if (GET_CODE (x) == PLUS)
18471 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18472
18473 /* QImode constants are easy to load, but non-constant QImode data
18474 must go into Q_REGS. */
18475 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18476 {
18477 if (reg_class_subset_p (class, Q_REGS))
18478 return class;
18479 if (reg_class_subset_p (Q_REGS, class))
18480 return Q_REGS;
18481 return NO_REGS;
18482 }
18483
18484 return class;
18485 }
18486
18487 /* Discourage putting floating-point values in SSE registers unless
18488 SSE math is being used, and likewise for the 387 registers. */
18489 enum reg_class
18490 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18491 {
18492 enum machine_mode mode = GET_MODE (x);
18493
18494 /* Restrict the output reload class to the register bank that we are doing
18495 math on. If we would like not to return a subset of CLASS, reject this
18496 alternative: if reload cannot do this, it will still use its choice. */
18497 mode = GET_MODE (x);
18498 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18499 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18500
18501 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18502 {
18503 if (class == FP_TOP_SSE_REGS)
18504 return FP_TOP_REG;
18505 else if (class == FP_SECOND_SSE_REGS)
18506 return FP_SECOND_REG;
18507 else
18508 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18509 }
18510
18511 return class;
18512 }
18513
18514 /* If we are copying between general and FP registers, we need a memory
18515 location. The same is true for SSE and MMX registers.
18516
18517 The macro can't work reliably when one of the CLASSES is class containing
18518 registers from multiple units (SSE, MMX, integer). We avoid this by never
18519 combining those units in single alternative in the machine description.
18520 Ensure that this constraint holds to avoid unexpected surprises.
18521
18522 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18523 enforce these sanity checks. */
18524
18525 int
18526 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18527 enum machine_mode mode, int strict)
18528 {
18529 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18530 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18531 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18532 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18533 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18534 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18535 {
18536 gcc_assert (!strict);
18537 return true;
18538 }
18539
18540 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18541 return true;
18542
18543 /* ??? This is a lie. We do have moves between mmx/general, and for
18544 mmx/sse2. But by saying we need secondary memory we discourage the
18545 register allocator from using the mmx registers unless needed. */
18546 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18547 return true;
18548
18549 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18550 {
18551 /* SSE1 doesn't have any direct moves from other classes. */
18552 if (!TARGET_SSE2)
18553 return true;
18554
18555 /* If the target says that inter-unit moves are more expensive
18556 than moving through memory, then don't generate them. */
18557 if (!TARGET_INTER_UNIT_MOVES)
18558 return true;
18559
18560 /* Between SSE and general, we have moves no larger than word size. */
18561 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18562 return true;
18563 }
18564
18565 return false;
18566 }
18567
18568 /* Return true if the registers in CLASS cannot represent the change from
18569 modes FROM to TO. */
18570
18571 bool
18572 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18573 enum reg_class class)
18574 {
18575 if (from == to)
18576 return false;
18577
18578 /* x87 registers can't do subreg at all, as all values are reformatted
18579 to extended precision. */
18580 if (MAYBE_FLOAT_CLASS_P (class))
18581 return true;
18582
18583 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18584 {
18585 /* Vector registers do not support QI or HImode loads. If we don't
18586 disallow a change to these modes, reload will assume it's ok to
18587 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18588 the vec_dupv4hi pattern. */
18589 if (GET_MODE_SIZE (from) < 4)
18590 return true;
18591
18592 /* Vector registers do not support subreg with nonzero offsets, which
18593 are otherwise valid for integer registers. Since we can't see
18594 whether we have a nonzero offset from here, prohibit all
18595 nonparadoxical subregs changing size. */
18596 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18597 return true;
18598 }
18599
18600 return false;
18601 }
18602
18603 /* Return the cost of moving data from a register in class CLASS1 to
18604 one in class CLASS2.
18605
18606 It is not required that the cost always equal 2 when FROM is the same as TO;
18607 on some machines it is expensive to move between registers if they are not
18608 general registers. */
18609
18610 int
18611 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18612 enum reg_class class2)
18613 {
18614 /* In case we require secondary memory, compute cost of the store followed
18615 by load. In order to avoid bad register allocation choices, we need
18616 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18617
18618 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18619 {
18620 int cost = 1;
18621
18622 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18623 MEMORY_MOVE_COST (mode, class1, 1));
18624 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18625 MEMORY_MOVE_COST (mode, class2, 1));
18626
18627 /* In case of copying from general_purpose_register we may emit multiple
18628 stores followed by single load causing memory size mismatch stall.
18629 Count this as arbitrarily high cost of 20. */
18630 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18631 cost += 20;
18632
18633 /* In the case of FP/MMX moves, the registers actually overlap, and we
18634 have to switch modes in order to treat them differently. */
18635 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18636 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18637 cost += 20;
18638
18639 return cost;
18640 }
18641
18642 /* Moves between SSE/MMX and integer unit are expensive. */
18643 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18644 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18645 return ix86_cost->mmxsse_to_integer;
18646 if (MAYBE_FLOAT_CLASS_P (class1))
18647 return ix86_cost->fp_move;
18648 if (MAYBE_SSE_CLASS_P (class1))
18649 return ix86_cost->sse_move;
18650 if (MAYBE_MMX_CLASS_P (class1))
18651 return ix86_cost->mmx_move;
18652 return 2;
18653 }
18654
18655 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18656
18657 bool
18658 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18659 {
18660 /* Flags and only flags can only hold CCmode values. */
18661 if (CC_REGNO_P (regno))
18662 return GET_MODE_CLASS (mode) == MODE_CC;
18663 if (GET_MODE_CLASS (mode) == MODE_CC
18664 || GET_MODE_CLASS (mode) == MODE_RANDOM
18665 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18666 return 0;
18667 if (FP_REGNO_P (regno))
18668 return VALID_FP_MODE_P (mode);
18669 if (SSE_REGNO_P (regno))
18670 {
18671 /* We implement the move patterns for all vector modes into and
18672 out of SSE registers, even when no operation instructions
18673 are available. */
18674 return (VALID_SSE_REG_MODE (mode)
18675 || VALID_SSE2_REG_MODE (mode)
18676 || VALID_MMX_REG_MODE (mode)
18677 || VALID_MMX_REG_MODE_3DNOW (mode));
18678 }
18679 if (MMX_REGNO_P (regno))
18680 {
18681 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18682 so if the register is available at all, then we can move data of
18683 the given mode into or out of it. */
18684 return (VALID_MMX_REG_MODE (mode)
18685 || VALID_MMX_REG_MODE_3DNOW (mode));
18686 }
18687
18688 if (mode == QImode)
18689 {
18690 /* Take care for QImode values - they can be in non-QI regs,
18691 but then they do cause partial register stalls. */
18692 if (regno < 4 || TARGET_64BIT)
18693 return 1;
18694 if (!TARGET_PARTIAL_REG_STALL)
18695 return 1;
18696 return reload_in_progress || reload_completed;
18697 }
18698 /* We handle both integer and floats in the general purpose registers. */
18699 else if (VALID_INT_MODE_P (mode))
18700 return 1;
18701 else if (VALID_FP_MODE_P (mode))
18702 return 1;
18703 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18704 on to use that value in smaller contexts, this can easily force a
18705 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18706 supporting DImode, allow it. */
18707 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18708 return 1;
18709
18710 return 0;
18711 }
18712
18713 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18714 tieable integer mode. */
18715
18716 static bool
18717 ix86_tieable_integer_mode_p (enum machine_mode mode)
18718 {
18719 switch (mode)
18720 {
18721 case HImode:
18722 case SImode:
18723 return true;
18724
18725 case QImode:
18726 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18727
18728 case DImode:
18729 return TARGET_64BIT;
18730
18731 default:
18732 return false;
18733 }
18734 }
18735
18736 /* Return true if MODE1 is accessible in a register that can hold MODE2
18737 without copying. That is, all register classes that can hold MODE2
18738 can also hold MODE1. */
18739
18740 bool
18741 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18742 {
18743 if (mode1 == mode2)
18744 return true;
18745
18746 if (ix86_tieable_integer_mode_p (mode1)
18747 && ix86_tieable_integer_mode_p (mode2))
18748 return true;
18749
18750 /* MODE2 being XFmode implies fp stack or general regs, which means we
18751 can tie any smaller floating point modes to it. Note that we do not
18752 tie this with TFmode. */
18753 if (mode2 == XFmode)
18754 return mode1 == SFmode || mode1 == DFmode;
18755
18756 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18757 that we can tie it with SFmode. */
18758 if (mode2 == DFmode)
18759 return mode1 == SFmode;
18760
18761 /* If MODE2 is only appropriate for an SSE register, then tie with
18762 any other mode acceptable to SSE registers. */
18763 if (GET_MODE_SIZE (mode2) >= 8
18764 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18765 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
18766
18767 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18768 with any other mode acceptable to MMX registers. */
18769 if (GET_MODE_SIZE (mode2) == 8
18770 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18771 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
18772
18773 return false;
18774 }
18775
18776 /* Return the cost of moving data of mode M between a
18777 register and memory. A value of 2 is the default; this cost is
18778 relative to those in `REGISTER_MOVE_COST'.
18779
18780 If moving between registers and memory is more expensive than
18781 between two registers, you should define this macro to express the
18782 relative cost.
18783
18784 Model also increased moving costs of QImode registers in non
18785 Q_REGS classes.
18786 */
18787 int
18788 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18789 {
18790 if (FLOAT_CLASS_P (class))
18791 {
18792 int index;
18793 switch (mode)
18794 {
18795 case SFmode:
18796 index = 0;
18797 break;
18798 case DFmode:
18799 index = 1;
18800 break;
18801 case XFmode:
18802 index = 2;
18803 break;
18804 default:
18805 return 100;
18806 }
18807 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18808 }
18809 if (SSE_CLASS_P (class))
18810 {
18811 int index;
18812 switch (GET_MODE_SIZE (mode))
18813 {
18814 case 4:
18815 index = 0;
18816 break;
18817 case 8:
18818 index = 1;
18819 break;
18820 case 16:
18821 index = 2;
18822 break;
18823 default:
18824 return 100;
18825 }
18826 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18827 }
18828 if (MMX_CLASS_P (class))
18829 {
18830 int index;
18831 switch (GET_MODE_SIZE (mode))
18832 {
18833 case 4:
18834 index = 0;
18835 break;
18836 case 8:
18837 index = 1;
18838 break;
18839 default:
18840 return 100;
18841 }
18842 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18843 }
18844 switch (GET_MODE_SIZE (mode))
18845 {
18846 case 1:
18847 if (in)
18848 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18849 : ix86_cost->movzbl_load);
18850 else
18851 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18852 : ix86_cost->int_store[0] + 4);
18853 break;
18854 case 2:
18855 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18856 default:
18857 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18858 if (mode == TFmode)
18859 mode = XFmode;
18860 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18861 * (((int) GET_MODE_SIZE (mode)
18862 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18863 }
18864 }
18865
18866 /* Compute a (partial) cost for rtx X. Return true if the complete
18867 cost has been computed, and false if subexpressions should be
18868 scanned. In either case, *TOTAL contains the cost result. */
18869
18870 static bool
18871 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18872 {
18873 enum machine_mode mode = GET_MODE (x);
18874
18875 switch (code)
18876 {
18877 case CONST_INT:
18878 case CONST:
18879 case LABEL_REF:
18880 case SYMBOL_REF:
18881 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18882 *total = 3;
18883 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18884 *total = 2;
18885 else if (flag_pic && SYMBOLIC_CONST (x)
18886 && (!TARGET_64BIT
18887 || (!GET_CODE (x) != LABEL_REF
18888 && (GET_CODE (x) != SYMBOL_REF
18889 || !SYMBOL_REF_LOCAL_P (x)))))
18890 *total = 1;
18891 else
18892 *total = 0;
18893 return true;
18894
18895 case CONST_DOUBLE:
18896 if (mode == VOIDmode)
18897 *total = 0;
18898 else
18899 switch (standard_80387_constant_p (x))
18900 {
18901 case 1: /* 0.0 */
18902 *total = 1;
18903 break;
18904 default: /* Other constants */
18905 *total = 2;
18906 break;
18907 case 0:
18908 case -1:
18909 /* Start with (MEM (SYMBOL_REF)), since that's where
18910 it'll probably end up. Add a penalty for size. */
18911 *total = (COSTS_N_INSNS (1)
18912 + (flag_pic != 0 && !TARGET_64BIT)
18913 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18914 break;
18915 }
18916 return true;
18917
18918 case ZERO_EXTEND:
18919 /* The zero extensions is often completely free on x86_64, so make
18920 it as cheap as possible. */
18921 if (TARGET_64BIT && mode == DImode
18922 && GET_MODE (XEXP (x, 0)) == SImode)
18923 *total = 1;
18924 else if (TARGET_ZERO_EXTEND_WITH_AND)
18925 *total = ix86_cost->add;
18926 else
18927 *total = ix86_cost->movzx;
18928 return false;
18929
18930 case SIGN_EXTEND:
18931 *total = ix86_cost->movsx;
18932 return false;
18933
18934 case ASHIFT:
18935 if (CONST_INT_P (XEXP (x, 1))
18936 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18937 {
18938 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18939 if (value == 1)
18940 {
18941 *total = ix86_cost->add;
18942 return false;
18943 }
18944 if ((value == 2 || value == 3)
18945 && ix86_cost->lea <= ix86_cost->shift_const)
18946 {
18947 *total = ix86_cost->lea;
18948 return false;
18949 }
18950 }
18951 /* FALLTHRU */
18952
18953 case ROTATE:
18954 case ASHIFTRT:
18955 case LSHIFTRT:
18956 case ROTATERT:
18957 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18958 {
18959 if (CONST_INT_P (XEXP (x, 1)))
18960 {
18961 if (INTVAL (XEXP (x, 1)) > 32)
18962 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18963 else
18964 *total = ix86_cost->shift_const * 2;
18965 }
18966 else
18967 {
18968 if (GET_CODE (XEXP (x, 1)) == AND)
18969 *total = ix86_cost->shift_var * 2;
18970 else
18971 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
18972 }
18973 }
18974 else
18975 {
18976 if (CONST_INT_P (XEXP (x, 1)))
18977 *total = ix86_cost->shift_const;
18978 else
18979 *total = ix86_cost->shift_var;
18980 }
18981 return false;
18982
18983 case MULT:
18984 if (FLOAT_MODE_P (mode))
18985 {
18986 *total = ix86_cost->fmul;
18987 return false;
18988 }
18989 else
18990 {
18991 rtx op0 = XEXP (x, 0);
18992 rtx op1 = XEXP (x, 1);
18993 int nbits;
18994 if (CONST_INT_P (XEXP (x, 1)))
18995 {
18996 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18997 for (nbits = 0; value != 0; value &= value - 1)
18998 nbits++;
18999 }
19000 else
19001 /* This is arbitrary. */
19002 nbits = 7;
19003
19004 /* Compute costs correctly for widening multiplication. */
19005 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19006 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19007 == GET_MODE_SIZE (mode))
19008 {
19009 int is_mulwiden = 0;
19010 enum machine_mode inner_mode = GET_MODE (op0);
19011
19012 if (GET_CODE (op0) == GET_CODE (op1))
19013 is_mulwiden = 1, op1 = XEXP (op1, 0);
19014 else if (CONST_INT_P (op1))
19015 {
19016 if (GET_CODE (op0) == SIGN_EXTEND)
19017 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19018 == INTVAL (op1);
19019 else
19020 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19021 }
19022
19023 if (is_mulwiden)
19024 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19025 }
19026
19027 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19028 + nbits * ix86_cost->mult_bit
19029 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19030
19031 return true;
19032 }
19033
19034 case DIV:
19035 case UDIV:
19036 case MOD:
19037 case UMOD:
19038 if (FLOAT_MODE_P (mode))
19039 *total = ix86_cost->fdiv;
19040 else
19041 *total = ix86_cost->divide[MODE_INDEX (mode)];
19042 return false;
19043
19044 case PLUS:
19045 if (FLOAT_MODE_P (mode))
19046 *total = ix86_cost->fadd;
19047 else if (GET_MODE_CLASS (mode) == MODE_INT
19048 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19049 {
19050 if (GET_CODE (XEXP (x, 0)) == PLUS
19051 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19052 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19053 && CONSTANT_P (XEXP (x, 1)))
19054 {
19055 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19056 if (val == 2 || val == 4 || val == 8)
19057 {
19058 *total = ix86_cost->lea;
19059 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19060 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19061 outer_code);
19062 *total += rtx_cost (XEXP (x, 1), outer_code);
19063 return true;
19064 }
19065 }
19066 else if (GET_CODE (XEXP (x, 0)) == MULT
19067 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19068 {
19069 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19070 if (val == 2 || val == 4 || val == 8)
19071 {
19072 *total = ix86_cost->lea;
19073 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19074 *total += rtx_cost (XEXP (x, 1), outer_code);
19075 return true;
19076 }
19077 }
19078 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19079 {
19080 *total = ix86_cost->lea;
19081 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19082 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19083 *total += rtx_cost (XEXP (x, 1), outer_code);
19084 return true;
19085 }
19086 }
19087 /* FALLTHRU */
19088
19089 case MINUS:
19090 if (FLOAT_MODE_P (mode))
19091 {
19092 *total = ix86_cost->fadd;
19093 return false;
19094 }
19095 /* FALLTHRU */
19096
19097 case AND:
19098 case IOR:
19099 case XOR:
19100 if (!TARGET_64BIT && mode == DImode)
19101 {
19102 *total = (ix86_cost->add * 2
19103 + (rtx_cost (XEXP (x, 0), outer_code)
19104 << (GET_MODE (XEXP (x, 0)) != DImode))
19105 + (rtx_cost (XEXP (x, 1), outer_code)
19106 << (GET_MODE (XEXP (x, 1)) != DImode)));
19107 return true;
19108 }
19109 /* FALLTHRU */
19110
19111 case NEG:
19112 if (FLOAT_MODE_P (mode))
19113 {
19114 *total = ix86_cost->fchs;
19115 return false;
19116 }
19117 /* FALLTHRU */
19118
19119 case NOT:
19120 if (!TARGET_64BIT && mode == DImode)
19121 *total = ix86_cost->add * 2;
19122 else
19123 *total = ix86_cost->add;
19124 return false;
19125
19126 case COMPARE:
19127 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19128 && XEXP (XEXP (x, 0), 1) == const1_rtx
19129 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19130 && XEXP (x, 1) == const0_rtx)
19131 {
19132 /* This kind of construct is implemented using test[bwl].
19133 Treat it as if we had an AND. */
19134 *total = (ix86_cost->add
19135 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19136 + rtx_cost (const1_rtx, outer_code));
19137 return true;
19138 }
19139 return false;
19140
19141 case FLOAT_EXTEND:
19142 if (!TARGET_SSE_MATH
19143 || mode == XFmode
19144 || (mode == DFmode && !TARGET_SSE2))
19145 *total = 0;
19146 return false;
19147
19148 case ABS:
19149 if (FLOAT_MODE_P (mode))
19150 *total = ix86_cost->fabs;
19151 return false;
19152
19153 case SQRT:
19154 if (FLOAT_MODE_P (mode))
19155 *total = ix86_cost->fsqrt;
19156 return false;
19157
19158 case UNSPEC:
19159 if (XINT (x, 1) == UNSPEC_TP)
19160 *total = 0;
19161 return false;
19162
19163 default:
19164 return false;
19165 }
19166 }
19167
19168 #if TARGET_MACHO
19169
19170 static int current_machopic_label_num;
19171
19172 /* Given a symbol name and its associated stub, write out the
19173 definition of the stub. */
19174
19175 void
19176 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19177 {
19178 unsigned int length;
19179 char *binder_name, *symbol_name, lazy_ptr_name[32];
19180 int label = ++current_machopic_label_num;
19181
19182 /* For 64-bit we shouldn't get here. */
19183 gcc_assert (!TARGET_64BIT);
19184
19185 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19186 symb = (*targetm.strip_name_encoding) (symb);
19187
19188 length = strlen (stub);
19189 binder_name = alloca (length + 32);
19190 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19191
19192 length = strlen (symb);
19193 symbol_name = alloca (length + 32);
19194 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19195
19196 sprintf (lazy_ptr_name, "L%d$lz", label);
19197
19198 if (MACHOPIC_PURE)
19199 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19200 else
19201 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19202
19203 fprintf (file, "%s:\n", stub);
19204 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19205
19206 if (MACHOPIC_PURE)
19207 {
19208 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19209 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19210 fprintf (file, "\tjmp\t*%%edx\n");
19211 }
19212 else
19213 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19214
19215 fprintf (file, "%s:\n", binder_name);
19216
19217 if (MACHOPIC_PURE)
19218 {
19219 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19220 fprintf (file, "\tpushl\t%%eax\n");
19221 }
19222 else
19223 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19224
19225 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19226
19227 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19228 fprintf (file, "%s:\n", lazy_ptr_name);
19229 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19230 fprintf (file, "\t.long %s\n", binder_name);
19231 }
19232
19233 void
19234 darwin_x86_file_end (void)
19235 {
19236 darwin_file_end ();
19237 ix86_file_end ();
19238 }
19239 #endif /* TARGET_MACHO */
19240
19241 /* Order the registers for register allocator. */
19242
19243 void
19244 x86_order_regs_for_local_alloc (void)
19245 {
19246 int pos = 0;
19247 int i;
19248
19249 /* First allocate the local general purpose registers. */
19250 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19251 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19252 reg_alloc_order [pos++] = i;
19253
19254 /* Global general purpose registers. */
19255 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19256 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19257 reg_alloc_order [pos++] = i;
19258
19259 /* x87 registers come first in case we are doing FP math
19260 using them. */
19261 if (!TARGET_SSE_MATH)
19262 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19263 reg_alloc_order [pos++] = i;
19264
19265 /* SSE registers. */
19266 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19267 reg_alloc_order [pos++] = i;
19268 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19269 reg_alloc_order [pos++] = i;
19270
19271 /* x87 registers. */
19272 if (TARGET_SSE_MATH)
19273 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19274 reg_alloc_order [pos++] = i;
19275
19276 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19277 reg_alloc_order [pos++] = i;
19278
19279 /* Initialize the rest of array as we do not allocate some registers
19280 at all. */
19281 while (pos < FIRST_PSEUDO_REGISTER)
19282 reg_alloc_order [pos++] = 0;
19283 }
19284
19285 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19286 struct attribute_spec.handler. */
19287 static tree
19288 ix86_handle_struct_attribute (tree *node, tree name,
19289 tree args ATTRIBUTE_UNUSED,
19290 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19291 {
19292 tree *type = NULL;
19293 if (DECL_P (*node))
19294 {
19295 if (TREE_CODE (*node) == TYPE_DECL)
19296 type = &TREE_TYPE (*node);
19297 }
19298 else
19299 type = node;
19300
19301 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19302 || TREE_CODE (*type) == UNION_TYPE)))
19303 {
19304 warning (OPT_Wattributes, "%qs attribute ignored",
19305 IDENTIFIER_POINTER (name));
19306 *no_add_attrs = true;
19307 }
19308
19309 else if ((is_attribute_p ("ms_struct", name)
19310 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19311 || ((is_attribute_p ("gcc_struct", name)
19312 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19313 {
19314 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19315 IDENTIFIER_POINTER (name));
19316 *no_add_attrs = true;
19317 }
19318
19319 return NULL_TREE;
19320 }
19321
19322 static bool
19323 ix86_ms_bitfield_layout_p (tree record_type)
19324 {
19325 return (TARGET_MS_BITFIELD_LAYOUT &&
19326 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19327 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19328 }
19329
19330 /* Returns an expression indicating where the this parameter is
19331 located on entry to the FUNCTION. */
19332
19333 static rtx
19334 x86_this_parameter (tree function)
19335 {
19336 tree type = TREE_TYPE (function);
19337
19338 if (TARGET_64BIT)
19339 {
19340 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19341 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19342 }
19343
19344 if (ix86_function_regparm (type, function) > 0)
19345 {
19346 tree parm;
19347
19348 parm = TYPE_ARG_TYPES (type);
19349 /* Figure out whether or not the function has a variable number of
19350 arguments. */
19351 for (; parm; parm = TREE_CHAIN (parm))
19352 if (TREE_VALUE (parm) == void_type_node)
19353 break;
19354 /* If not, the this parameter is in the first argument. */
19355 if (parm)
19356 {
19357 int regno = 0;
19358 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19359 regno = 2;
19360 return gen_rtx_REG (SImode, regno);
19361 }
19362 }
19363
19364 if (aggregate_value_p (TREE_TYPE (type), type))
19365 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19366 else
19367 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19368 }
19369
19370 /* Determine whether x86_output_mi_thunk can succeed. */
19371
19372 static bool
19373 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19374 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19375 HOST_WIDE_INT vcall_offset, tree function)
19376 {
19377 /* 64-bit can handle anything. */
19378 if (TARGET_64BIT)
19379 return true;
19380
19381 /* For 32-bit, everything's fine if we have one free register. */
19382 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19383 return true;
19384
19385 /* Need a free register for vcall_offset. */
19386 if (vcall_offset)
19387 return false;
19388
19389 /* Need a free register for GOT references. */
19390 if (flag_pic && !(*targetm.binds_local_p) (function))
19391 return false;
19392
19393 /* Otherwise ok. */
19394 return true;
19395 }
19396
19397 /* Output the assembler code for a thunk function. THUNK_DECL is the
19398 declaration for the thunk function itself, FUNCTION is the decl for
19399 the target function. DELTA is an immediate constant offset to be
19400 added to THIS. If VCALL_OFFSET is nonzero, the word at
19401 *(*this + vcall_offset) should be added to THIS. */
19402
19403 static void
19404 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19405 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19406 HOST_WIDE_INT vcall_offset, tree function)
19407 {
19408 rtx xops[3];
19409 rtx this = x86_this_parameter (function);
19410 rtx this_reg, tmp;
19411
19412 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19413 pull it in now and let DELTA benefit. */
19414 if (REG_P (this))
19415 this_reg = this;
19416 else if (vcall_offset)
19417 {
19418 /* Put the this parameter into %eax. */
19419 xops[0] = this;
19420 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19421 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19422 }
19423 else
19424 this_reg = NULL_RTX;
19425
19426 /* Adjust the this parameter by a fixed constant. */
19427 if (delta)
19428 {
19429 xops[0] = GEN_INT (delta);
19430 xops[1] = this_reg ? this_reg : this;
19431 if (TARGET_64BIT)
19432 {
19433 if (!x86_64_general_operand (xops[0], DImode))
19434 {
19435 tmp = gen_rtx_REG (DImode, R10_REG);
19436 xops[1] = tmp;
19437 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19438 xops[0] = tmp;
19439 xops[1] = this;
19440 }
19441 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19442 }
19443 else
19444 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19445 }
19446
19447 /* Adjust the this parameter by a value stored in the vtable. */
19448 if (vcall_offset)
19449 {
19450 if (TARGET_64BIT)
19451 tmp = gen_rtx_REG (DImode, R10_REG);
19452 else
19453 {
19454 int tmp_regno = 2 /* ECX */;
19455 if (lookup_attribute ("fastcall",
19456 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19457 tmp_regno = 0 /* EAX */;
19458 tmp = gen_rtx_REG (SImode, tmp_regno);
19459 }
19460
19461 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19462 xops[1] = tmp;
19463 if (TARGET_64BIT)
19464 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19465 else
19466 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19467
19468 /* Adjust the this parameter. */
19469 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19470 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19471 {
19472 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19473 xops[0] = GEN_INT (vcall_offset);
19474 xops[1] = tmp2;
19475 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19476 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19477 }
19478 xops[1] = this_reg;
19479 if (TARGET_64BIT)
19480 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19481 else
19482 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19483 }
19484
19485 /* If necessary, drop THIS back to its stack slot. */
19486 if (this_reg && this_reg != this)
19487 {
19488 xops[0] = this_reg;
19489 xops[1] = this;
19490 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19491 }
19492
19493 xops[0] = XEXP (DECL_RTL (function), 0);
19494 if (TARGET_64BIT)
19495 {
19496 if (!flag_pic || (*targetm.binds_local_p) (function))
19497 output_asm_insn ("jmp\t%P0", xops);
19498 else
19499 {
19500 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19501 tmp = gen_rtx_CONST (Pmode, tmp);
19502 tmp = gen_rtx_MEM (QImode, tmp);
19503 xops[0] = tmp;
19504 output_asm_insn ("jmp\t%A0", xops);
19505 }
19506 }
19507 else
19508 {
19509 if (!flag_pic || (*targetm.binds_local_p) (function))
19510 output_asm_insn ("jmp\t%P0", xops);
19511 else
19512 #if TARGET_MACHO
19513 if (TARGET_MACHO)
19514 {
19515 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19516 tmp = (gen_rtx_SYMBOL_REF
19517 (Pmode,
19518 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19519 tmp = gen_rtx_MEM (QImode, tmp);
19520 xops[0] = tmp;
19521 output_asm_insn ("jmp\t%0", xops);
19522 }
19523 else
19524 #endif /* TARGET_MACHO */
19525 {
19526 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19527 output_set_got (tmp, NULL_RTX);
19528
19529 xops[1] = tmp;
19530 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19531 output_asm_insn ("jmp\t{*}%1", xops);
19532 }
19533 }
19534 }
19535
19536 static void
19537 x86_file_start (void)
19538 {
19539 default_file_start ();
19540 #if TARGET_MACHO
19541 darwin_file_start ();
19542 #endif
19543 if (X86_FILE_START_VERSION_DIRECTIVE)
19544 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19545 if (X86_FILE_START_FLTUSED)
19546 fputs ("\t.global\t__fltused\n", asm_out_file);
19547 if (ix86_asm_dialect == ASM_INTEL)
19548 fputs ("\t.intel_syntax\n", asm_out_file);
19549 }
19550
19551 int
19552 x86_field_alignment (tree field, int computed)
19553 {
19554 enum machine_mode mode;
19555 tree type = TREE_TYPE (field);
19556
19557 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19558 return computed;
19559 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19560 ? get_inner_array_type (type) : type);
19561 if (mode == DFmode || mode == DCmode
19562 || GET_MODE_CLASS (mode) == MODE_INT
19563 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19564 return MIN (32, computed);
19565 return computed;
19566 }
19567
19568 /* Output assembler code to FILE to increment profiler label # LABELNO
19569 for profiling a function entry. */
19570 void
19571 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19572 {
19573 if (TARGET_64BIT)
19574 if (flag_pic)
19575 {
19576 #ifndef NO_PROFILE_COUNTERS
19577 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19578 #endif
19579 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19580 }
19581 else
19582 {
19583 #ifndef NO_PROFILE_COUNTERS
19584 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19585 #endif
19586 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19587 }
19588 else if (flag_pic)
19589 {
19590 #ifndef NO_PROFILE_COUNTERS
19591 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19592 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19593 #endif
19594 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19595 }
19596 else
19597 {
19598 #ifndef NO_PROFILE_COUNTERS
19599 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19600 PROFILE_COUNT_REGISTER);
19601 #endif
19602 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19603 }
19604 }
19605
19606 /* We don't have exact information about the insn sizes, but we may assume
19607 quite safely that we are informed about all 1 byte insns and memory
19608 address sizes. This is enough to eliminate unnecessary padding in
19609 99% of cases. */
19610
19611 static int
19612 min_insn_size (rtx insn)
19613 {
19614 int l = 0;
19615
19616 if (!INSN_P (insn) || !active_insn_p (insn))
19617 return 0;
19618
19619 /* Discard alignments we've emit and jump instructions. */
19620 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19621 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19622 return 0;
19623 if (JUMP_P (insn)
19624 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19625 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19626 return 0;
19627
19628 /* Important case - calls are always 5 bytes.
19629 It is common to have many calls in the row. */
19630 if (CALL_P (insn)
19631 && symbolic_reference_mentioned_p (PATTERN (insn))
19632 && !SIBLING_CALL_P (insn))
19633 return 5;
19634 if (get_attr_length (insn) <= 1)
19635 return 1;
19636
19637 /* For normal instructions we may rely on the sizes of addresses
19638 and the presence of symbol to require 4 bytes of encoding.
19639 This is not the case for jumps where references are PC relative. */
19640 if (!JUMP_P (insn))
19641 {
19642 l = get_attr_length_address (insn);
19643 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19644 l = 4;
19645 }
19646 if (l)
19647 return 1+l;
19648 else
19649 return 2;
19650 }
19651
19652 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19653 window. */
19654
19655 static void
19656 ix86_avoid_jump_misspredicts (void)
19657 {
19658 rtx insn, start = get_insns ();
19659 int nbytes = 0, njumps = 0;
19660 int isjump = 0;
19661
19662 /* Look for all minimal intervals of instructions containing 4 jumps.
19663 The intervals are bounded by START and INSN. NBYTES is the total
19664 size of instructions in the interval including INSN and not including
19665 START. When the NBYTES is smaller than 16 bytes, it is possible
19666 that the end of START and INSN ends up in the same 16byte page.
19667
19668 The smallest offset in the page INSN can start is the case where START
19669 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19670 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19671 */
19672 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19673 {
19674
19675 nbytes += min_insn_size (insn);
19676 if (dump_file)
19677 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19678 INSN_UID (insn), min_insn_size (insn));
19679 if ((JUMP_P (insn)
19680 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19681 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19682 || CALL_P (insn))
19683 njumps++;
19684 else
19685 continue;
19686
19687 while (njumps > 3)
19688 {
19689 start = NEXT_INSN (start);
19690 if ((JUMP_P (start)
19691 && GET_CODE (PATTERN (start)) != ADDR_VEC
19692 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19693 || CALL_P (start))
19694 njumps--, isjump = 1;
19695 else
19696 isjump = 0;
19697 nbytes -= min_insn_size (start);
19698 }
19699 gcc_assert (njumps >= 0);
19700 if (dump_file)
19701 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19702 INSN_UID (start), INSN_UID (insn), nbytes);
19703
19704 if (njumps == 3 && isjump && nbytes < 16)
19705 {
19706 int padsize = 15 - nbytes + min_insn_size (insn);
19707
19708 if (dump_file)
19709 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19710 INSN_UID (insn), padsize);
19711 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19712 }
19713 }
19714 }
19715
19716 /* AMD Athlon works faster
19717 when RET is not destination of conditional jump or directly preceded
19718 by other jump instruction. We avoid the penalty by inserting NOP just
19719 before the RET instructions in such cases. */
19720 static void
19721 ix86_pad_returns (void)
19722 {
19723 edge e;
19724 edge_iterator ei;
19725
19726 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19727 {
19728 basic_block bb = e->src;
19729 rtx ret = BB_END (bb);
19730 rtx prev;
19731 bool replace = false;
19732
19733 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19734 || !maybe_hot_bb_p (bb))
19735 continue;
19736 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19737 if (active_insn_p (prev) || LABEL_P (prev))
19738 break;
19739 if (prev && LABEL_P (prev))
19740 {
19741 edge e;
19742 edge_iterator ei;
19743
19744 FOR_EACH_EDGE (e, ei, bb->preds)
19745 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19746 && !(e->flags & EDGE_FALLTHRU))
19747 replace = true;
19748 }
19749 if (!replace)
19750 {
19751 prev = prev_active_insn (ret);
19752 if (prev
19753 && ((JUMP_P (prev) && any_condjump_p (prev))
19754 || CALL_P (prev)))
19755 replace = true;
19756 /* Empty functions get branch mispredict even when the jump destination
19757 is not visible to us. */
19758 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19759 replace = true;
19760 }
19761 if (replace)
19762 {
19763 emit_insn_before (gen_return_internal_long (), ret);
19764 delete_insn (ret);
19765 }
19766 }
19767 }
19768
19769 /* Implement machine specific optimizations. We implement padding of returns
19770 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19771 static void
19772 ix86_reorg (void)
19773 {
19774 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19775 ix86_pad_returns ();
19776 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19777 ix86_avoid_jump_misspredicts ();
19778 }
19779
19780 /* Return nonzero when QImode register that must be represented via REX prefix
19781 is used. */
19782 bool
19783 x86_extended_QIreg_mentioned_p (rtx insn)
19784 {
19785 int i;
19786 extract_insn_cached (insn);
19787 for (i = 0; i < recog_data.n_operands; i++)
19788 if (REG_P (recog_data.operand[i])
19789 && REGNO (recog_data.operand[i]) >= 4)
19790 return true;
19791 return false;
19792 }
19793
19794 /* Return nonzero when P points to register encoded via REX prefix.
19795 Called via for_each_rtx. */
19796 static int
19797 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19798 {
19799 unsigned int regno;
19800 if (!REG_P (*p))
19801 return 0;
19802 regno = REGNO (*p);
19803 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19804 }
19805
19806 /* Return true when INSN mentions register that must be encoded using REX
19807 prefix. */
19808 bool
19809 x86_extended_reg_mentioned_p (rtx insn)
19810 {
19811 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19812 }
19813
19814 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19815 optabs would emit if we didn't have TFmode patterns. */
19816
19817 void
19818 x86_emit_floatuns (rtx operands[2])
19819 {
19820 rtx neglab, donelab, i0, i1, f0, in, out;
19821 enum machine_mode mode, inmode;
19822
19823 inmode = GET_MODE (operands[1]);
19824 gcc_assert (inmode == SImode || inmode == DImode);
19825
19826 out = operands[0];
19827 in = force_reg (inmode, operands[1]);
19828 mode = GET_MODE (out);
19829 neglab = gen_label_rtx ();
19830 donelab = gen_label_rtx ();
19831 f0 = gen_reg_rtx (mode);
19832
19833 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
19834
19835 expand_float (out, in, 0);
19836
19837 emit_jump_insn (gen_jump (donelab));
19838 emit_barrier ();
19839
19840 emit_label (neglab);
19841
19842 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
19843 1, OPTAB_DIRECT);
19844 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
19845 1, OPTAB_DIRECT);
19846 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19847
19848 expand_float (f0, i0, 0);
19849
19850 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19851
19852 emit_label (donelab);
19853 }
19854 \f
19855 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19856 with all elements equal to VAR. Return true if successful. */
19857
19858 static bool
19859 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19860 rtx target, rtx val)
19861 {
19862 enum machine_mode smode, wsmode, wvmode;
19863 rtx x;
19864
19865 switch (mode)
19866 {
19867 case V2SImode:
19868 case V2SFmode:
19869 if (!mmx_ok)
19870 return false;
19871 /* FALLTHRU */
19872
19873 case V2DFmode:
19874 case V2DImode:
19875 case V4SFmode:
19876 case V4SImode:
19877 val = force_reg (GET_MODE_INNER (mode), val);
19878 x = gen_rtx_VEC_DUPLICATE (mode, val);
19879 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19880 return true;
19881
19882 case V4HImode:
19883 if (!mmx_ok)
19884 return false;
19885 if (TARGET_SSE || TARGET_3DNOW_A)
19886 {
19887 val = gen_lowpart (SImode, val);
19888 x = gen_rtx_TRUNCATE (HImode, val);
19889 x = gen_rtx_VEC_DUPLICATE (mode, x);
19890 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19891 return true;
19892 }
19893 else
19894 {
19895 smode = HImode;
19896 wsmode = SImode;
19897 wvmode = V2SImode;
19898 goto widen;
19899 }
19900
19901 case V8QImode:
19902 if (!mmx_ok)
19903 return false;
19904 smode = QImode;
19905 wsmode = HImode;
19906 wvmode = V4HImode;
19907 goto widen;
19908 case V8HImode:
19909 if (TARGET_SSE2)
19910 {
19911 rtx tmp1, tmp2;
19912 /* Extend HImode to SImode using a paradoxical SUBREG. */
19913 tmp1 = gen_reg_rtx (SImode);
19914 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19915 /* Insert the SImode value as low element of V4SImode vector. */
19916 tmp2 = gen_reg_rtx (V4SImode);
19917 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19918 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19919 CONST0_RTX (V4SImode),
19920 const1_rtx);
19921 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19922 /* Cast the V4SImode vector back to a V8HImode vector. */
19923 tmp1 = gen_reg_rtx (V8HImode);
19924 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19925 /* Duplicate the low short through the whole low SImode word. */
19926 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19927 /* Cast the V8HImode vector back to a V4SImode vector. */
19928 tmp2 = gen_reg_rtx (V4SImode);
19929 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19930 /* Replicate the low element of the V4SImode vector. */
19931 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19932 /* Cast the V2SImode back to V8HImode, and store in target. */
19933 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19934 return true;
19935 }
19936 smode = HImode;
19937 wsmode = SImode;
19938 wvmode = V4SImode;
19939 goto widen;
19940 case V16QImode:
19941 if (TARGET_SSE2)
19942 {
19943 rtx tmp1, tmp2;
19944 /* Extend QImode to SImode using a paradoxical SUBREG. */
19945 tmp1 = gen_reg_rtx (SImode);
19946 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19947 /* Insert the SImode value as low element of V4SImode vector. */
19948 tmp2 = gen_reg_rtx (V4SImode);
19949 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19950 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19951 CONST0_RTX (V4SImode),
19952 const1_rtx);
19953 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19954 /* Cast the V4SImode vector back to a V16QImode vector. */
19955 tmp1 = gen_reg_rtx (V16QImode);
19956 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19957 /* Duplicate the low byte through the whole low SImode word. */
19958 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19959 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19960 /* Cast the V16QImode vector back to a V4SImode vector. */
19961 tmp2 = gen_reg_rtx (V4SImode);
19962 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19963 /* Replicate the low element of the V4SImode vector. */
19964 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19965 /* Cast the V2SImode back to V16QImode, and store in target. */
19966 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
19967 return true;
19968 }
19969 smode = QImode;
19970 wsmode = HImode;
19971 wvmode = V8HImode;
19972 goto widen;
19973 widen:
19974 /* Replicate the value once into the next wider mode and recurse. */
19975 val = convert_modes (wsmode, smode, val, true);
19976 x = expand_simple_binop (wsmode, ASHIFT, val,
19977 GEN_INT (GET_MODE_BITSIZE (smode)),
19978 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19979 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
19980
19981 x = gen_reg_rtx (wvmode);
19982 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
19983 gcc_unreachable ();
19984 emit_move_insn (target, gen_lowpart (mode, x));
19985 return true;
19986
19987 default:
19988 return false;
19989 }
19990 }
19991
19992 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19993 whose ONE_VAR element is VAR, and other elements are zero. Return true
19994 if successful. */
19995
19996 static bool
19997 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
19998 rtx target, rtx var, int one_var)
19999 {
20000 enum machine_mode vsimode;
20001 rtx new_target;
20002 rtx x, tmp;
20003
20004 switch (mode)
20005 {
20006 case V2SFmode:
20007 case V2SImode:
20008 if (!mmx_ok)
20009 return false;
20010 /* FALLTHRU */
20011
20012 case V2DFmode:
20013 case V2DImode:
20014 if (one_var != 0)
20015 return false;
20016 var = force_reg (GET_MODE_INNER (mode), var);
20017 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20018 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20019 return true;
20020
20021 case V4SFmode:
20022 case V4SImode:
20023 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20024 new_target = gen_reg_rtx (mode);
20025 else
20026 new_target = target;
20027 var = force_reg (GET_MODE_INNER (mode), var);
20028 x = gen_rtx_VEC_DUPLICATE (mode, var);
20029 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20030 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20031 if (one_var != 0)
20032 {
20033 /* We need to shuffle the value to the correct position, so
20034 create a new pseudo to store the intermediate result. */
20035
20036 /* With SSE2, we can use the integer shuffle insns. */
20037 if (mode != V4SFmode && TARGET_SSE2)
20038 {
20039 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20040 GEN_INT (1),
20041 GEN_INT (one_var == 1 ? 0 : 1),
20042 GEN_INT (one_var == 2 ? 0 : 1),
20043 GEN_INT (one_var == 3 ? 0 : 1)));
20044 if (target != new_target)
20045 emit_move_insn (target, new_target);
20046 return true;
20047 }
20048
20049 /* Otherwise convert the intermediate result to V4SFmode and
20050 use the SSE1 shuffle instructions. */
20051 if (mode != V4SFmode)
20052 {
20053 tmp = gen_reg_rtx (V4SFmode);
20054 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20055 }
20056 else
20057 tmp = new_target;
20058
20059 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20060 GEN_INT (1),
20061 GEN_INT (one_var == 1 ? 0 : 1),
20062 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20063 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20064
20065 if (mode != V4SFmode)
20066 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20067 else if (tmp != target)
20068 emit_move_insn (target, tmp);
20069 }
20070 else if (target != new_target)
20071 emit_move_insn (target, new_target);
20072 return true;
20073
20074 case V8HImode:
20075 case V16QImode:
20076 vsimode = V4SImode;
20077 goto widen;
20078 case V4HImode:
20079 case V8QImode:
20080 if (!mmx_ok)
20081 return false;
20082 vsimode = V2SImode;
20083 goto widen;
20084 widen:
20085 if (one_var != 0)
20086 return false;
20087
20088 /* Zero extend the variable element to SImode and recurse. */
20089 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20090
20091 x = gen_reg_rtx (vsimode);
20092 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20093 var, one_var))
20094 gcc_unreachable ();
20095
20096 emit_move_insn (target, gen_lowpart (mode, x));
20097 return true;
20098
20099 default:
20100 return false;
20101 }
20102 }
20103
20104 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20105 consisting of the values in VALS. It is known that all elements
20106 except ONE_VAR are constants. Return true if successful. */
20107
20108 static bool
20109 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20110 rtx target, rtx vals, int one_var)
20111 {
20112 rtx var = XVECEXP (vals, 0, one_var);
20113 enum machine_mode wmode;
20114 rtx const_vec, x;
20115
20116 const_vec = copy_rtx (vals);
20117 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20118 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20119
20120 switch (mode)
20121 {
20122 case V2DFmode:
20123 case V2DImode:
20124 case V2SFmode:
20125 case V2SImode:
20126 /* For the two element vectors, it's just as easy to use
20127 the general case. */
20128 return false;
20129
20130 case V4SFmode:
20131 case V4SImode:
20132 case V8HImode:
20133 case V4HImode:
20134 break;
20135
20136 case V16QImode:
20137 wmode = V8HImode;
20138 goto widen;
20139 case V8QImode:
20140 wmode = V4HImode;
20141 goto widen;
20142 widen:
20143 /* There's no way to set one QImode entry easily. Combine
20144 the variable value with its adjacent constant value, and
20145 promote to an HImode set. */
20146 x = XVECEXP (vals, 0, one_var ^ 1);
20147 if (one_var & 1)
20148 {
20149 var = convert_modes (HImode, QImode, var, true);
20150 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20151 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20152 x = GEN_INT (INTVAL (x) & 0xff);
20153 }
20154 else
20155 {
20156 var = convert_modes (HImode, QImode, var, true);
20157 x = gen_int_mode (INTVAL (x) << 8, HImode);
20158 }
20159 if (x != const0_rtx)
20160 var = expand_simple_binop (HImode, IOR, var, x, var,
20161 1, OPTAB_LIB_WIDEN);
20162
20163 x = gen_reg_rtx (wmode);
20164 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20165 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20166
20167 emit_move_insn (target, gen_lowpart (mode, x));
20168 return true;
20169
20170 default:
20171 return false;
20172 }
20173
20174 emit_move_insn (target, const_vec);
20175 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20176 return true;
20177 }
20178
20179 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20180 all values variable, and none identical. */
20181
20182 static void
20183 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20184 rtx target, rtx vals)
20185 {
20186 enum machine_mode half_mode = GET_MODE_INNER (mode);
20187 rtx op0 = NULL, op1 = NULL;
20188 bool use_vec_concat = false;
20189
20190 switch (mode)
20191 {
20192 case V2SFmode:
20193 case V2SImode:
20194 if (!mmx_ok && !TARGET_SSE)
20195 break;
20196 /* FALLTHRU */
20197
20198 case V2DFmode:
20199 case V2DImode:
20200 /* For the two element vectors, we always implement VEC_CONCAT. */
20201 op0 = XVECEXP (vals, 0, 0);
20202 op1 = XVECEXP (vals, 0, 1);
20203 use_vec_concat = true;
20204 break;
20205
20206 case V4SFmode:
20207 half_mode = V2SFmode;
20208 goto half;
20209 case V4SImode:
20210 half_mode = V2SImode;
20211 goto half;
20212 half:
20213 {
20214 rtvec v;
20215
20216 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20217 Recurse to load the two halves. */
20218
20219 op0 = gen_reg_rtx (half_mode);
20220 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20221 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20222
20223 op1 = gen_reg_rtx (half_mode);
20224 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20225 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20226
20227 use_vec_concat = true;
20228 }
20229 break;
20230
20231 case V8HImode:
20232 case V16QImode:
20233 case V4HImode:
20234 case V8QImode:
20235 break;
20236
20237 default:
20238 gcc_unreachable ();
20239 }
20240
20241 if (use_vec_concat)
20242 {
20243 if (!register_operand (op0, half_mode))
20244 op0 = force_reg (half_mode, op0);
20245 if (!register_operand (op1, half_mode))
20246 op1 = force_reg (half_mode, op1);
20247
20248 emit_insn (gen_rtx_SET (VOIDmode, target,
20249 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20250 }
20251 else
20252 {
20253 int i, j, n_elts, n_words, n_elt_per_word;
20254 enum machine_mode inner_mode;
20255 rtx words[4], shift;
20256
20257 inner_mode = GET_MODE_INNER (mode);
20258 n_elts = GET_MODE_NUNITS (mode);
20259 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20260 n_elt_per_word = n_elts / n_words;
20261 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20262
20263 for (i = 0; i < n_words; ++i)
20264 {
20265 rtx word = NULL_RTX;
20266
20267 for (j = 0; j < n_elt_per_word; ++j)
20268 {
20269 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20270 elt = convert_modes (word_mode, inner_mode, elt, true);
20271
20272 if (j == 0)
20273 word = elt;
20274 else
20275 {
20276 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20277 word, 1, OPTAB_LIB_WIDEN);
20278 word = expand_simple_binop (word_mode, IOR, word, elt,
20279 word, 1, OPTAB_LIB_WIDEN);
20280 }
20281 }
20282
20283 words[i] = word;
20284 }
20285
20286 if (n_words == 1)
20287 emit_move_insn (target, gen_lowpart (mode, words[0]));
20288 else if (n_words == 2)
20289 {
20290 rtx tmp = gen_reg_rtx (mode);
20291 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20292 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20293 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20294 emit_move_insn (target, tmp);
20295 }
20296 else if (n_words == 4)
20297 {
20298 rtx tmp = gen_reg_rtx (V4SImode);
20299 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20300 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20301 emit_move_insn (target, gen_lowpart (mode, tmp));
20302 }
20303 else
20304 gcc_unreachable ();
20305 }
20306 }
20307
20308 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20309 instructions unless MMX_OK is true. */
20310
20311 void
20312 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20313 {
20314 enum machine_mode mode = GET_MODE (target);
20315 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20316 int n_elts = GET_MODE_NUNITS (mode);
20317 int n_var = 0, one_var = -1;
20318 bool all_same = true, all_const_zero = true;
20319 int i;
20320 rtx x;
20321
20322 for (i = 0; i < n_elts; ++i)
20323 {
20324 x = XVECEXP (vals, 0, i);
20325 if (!CONSTANT_P (x))
20326 n_var++, one_var = i;
20327 else if (x != CONST0_RTX (inner_mode))
20328 all_const_zero = false;
20329 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20330 all_same = false;
20331 }
20332
20333 /* Constants are best loaded from the constant pool. */
20334 if (n_var == 0)
20335 {
20336 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20337 return;
20338 }
20339
20340 /* If all values are identical, broadcast the value. */
20341 if (all_same
20342 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20343 XVECEXP (vals, 0, 0)))
20344 return;
20345
20346 /* Values where only one field is non-constant are best loaded from
20347 the pool and overwritten via move later. */
20348 if (n_var == 1)
20349 {
20350 if (all_const_zero
20351 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20352 XVECEXP (vals, 0, one_var),
20353 one_var))
20354 return;
20355
20356 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20357 return;
20358 }
20359
20360 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20361 }
20362
20363 void
20364 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20365 {
20366 enum machine_mode mode = GET_MODE (target);
20367 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20368 bool use_vec_merge = false;
20369 rtx tmp;
20370
20371 switch (mode)
20372 {
20373 case V2SFmode:
20374 case V2SImode:
20375 if (mmx_ok)
20376 {
20377 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20378 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20379 if (elt == 0)
20380 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20381 else
20382 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20383 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20384 return;
20385 }
20386 break;
20387
20388 case V2DFmode:
20389 case V2DImode:
20390 {
20391 rtx op0, op1;
20392
20393 /* For the two element vectors, we implement a VEC_CONCAT with
20394 the extraction of the other element. */
20395
20396 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20397 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20398
20399 if (elt == 0)
20400 op0 = val, op1 = tmp;
20401 else
20402 op0 = tmp, op1 = val;
20403
20404 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20405 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20406 }
20407 return;
20408
20409 case V4SFmode:
20410 switch (elt)
20411 {
20412 case 0:
20413 use_vec_merge = true;
20414 break;
20415
20416 case 1:
20417 /* tmp = target = A B C D */
20418 tmp = copy_to_reg (target);
20419 /* target = A A B B */
20420 emit_insn (gen_sse_unpcklps (target, target, target));
20421 /* target = X A B B */
20422 ix86_expand_vector_set (false, target, val, 0);
20423 /* target = A X C D */
20424 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20425 GEN_INT (1), GEN_INT (0),
20426 GEN_INT (2+4), GEN_INT (3+4)));
20427 return;
20428
20429 case 2:
20430 /* tmp = target = A B C D */
20431 tmp = copy_to_reg (target);
20432 /* tmp = X B C D */
20433 ix86_expand_vector_set (false, tmp, val, 0);
20434 /* target = A B X D */
20435 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20436 GEN_INT (0), GEN_INT (1),
20437 GEN_INT (0+4), GEN_INT (3+4)));
20438 return;
20439
20440 case 3:
20441 /* tmp = target = A B C D */
20442 tmp = copy_to_reg (target);
20443 /* tmp = X B C D */
20444 ix86_expand_vector_set (false, tmp, val, 0);
20445 /* target = A B X D */
20446 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20447 GEN_INT (0), GEN_INT (1),
20448 GEN_INT (2+4), GEN_INT (0+4)));
20449 return;
20450
20451 default:
20452 gcc_unreachable ();
20453 }
20454 break;
20455
20456 case V4SImode:
20457 /* Element 0 handled by vec_merge below. */
20458 if (elt == 0)
20459 {
20460 use_vec_merge = true;
20461 break;
20462 }
20463
20464 if (TARGET_SSE2)
20465 {
20466 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20467 store into element 0, then shuffle them back. */
20468
20469 rtx order[4];
20470
20471 order[0] = GEN_INT (elt);
20472 order[1] = const1_rtx;
20473 order[2] = const2_rtx;
20474 order[3] = GEN_INT (3);
20475 order[elt] = const0_rtx;
20476
20477 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20478 order[1], order[2], order[3]));
20479
20480 ix86_expand_vector_set (false, target, val, 0);
20481
20482 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20483 order[1], order[2], order[3]));
20484 }
20485 else
20486 {
20487 /* For SSE1, we have to reuse the V4SF code. */
20488 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20489 gen_lowpart (SFmode, val), elt);
20490 }
20491 return;
20492
20493 case V8HImode:
20494 use_vec_merge = TARGET_SSE2;
20495 break;
20496 case V4HImode:
20497 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20498 break;
20499
20500 case V16QImode:
20501 case V8QImode:
20502 default:
20503 break;
20504 }
20505
20506 if (use_vec_merge)
20507 {
20508 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20509 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20510 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20511 }
20512 else
20513 {
20514 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20515
20516 emit_move_insn (mem, target);
20517
20518 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20519 emit_move_insn (tmp, val);
20520
20521 emit_move_insn (target, mem);
20522 }
20523 }
20524
20525 void
20526 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20527 {
20528 enum machine_mode mode = GET_MODE (vec);
20529 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20530 bool use_vec_extr = false;
20531 rtx tmp;
20532
20533 switch (mode)
20534 {
20535 case V2SImode:
20536 case V2SFmode:
20537 if (!mmx_ok)
20538 break;
20539 /* FALLTHRU */
20540
20541 case V2DFmode:
20542 case V2DImode:
20543 use_vec_extr = true;
20544 break;
20545
20546 case V4SFmode:
20547 switch (elt)
20548 {
20549 case 0:
20550 tmp = vec;
20551 break;
20552
20553 case 1:
20554 case 3:
20555 tmp = gen_reg_rtx (mode);
20556 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20557 GEN_INT (elt), GEN_INT (elt),
20558 GEN_INT (elt+4), GEN_INT (elt+4)));
20559 break;
20560
20561 case 2:
20562 tmp = gen_reg_rtx (mode);
20563 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20564 break;
20565
20566 default:
20567 gcc_unreachable ();
20568 }
20569 vec = tmp;
20570 use_vec_extr = true;
20571 elt = 0;
20572 break;
20573
20574 case V4SImode:
20575 if (TARGET_SSE2)
20576 {
20577 switch (elt)
20578 {
20579 case 0:
20580 tmp = vec;
20581 break;
20582
20583 case 1:
20584 case 3:
20585 tmp = gen_reg_rtx (mode);
20586 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20587 GEN_INT (elt), GEN_INT (elt),
20588 GEN_INT (elt), GEN_INT (elt)));
20589 break;
20590
20591 case 2:
20592 tmp = gen_reg_rtx (mode);
20593 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20594 break;
20595
20596 default:
20597 gcc_unreachable ();
20598 }
20599 vec = tmp;
20600 use_vec_extr = true;
20601 elt = 0;
20602 }
20603 else
20604 {
20605 /* For SSE1, we have to reuse the V4SF code. */
20606 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20607 gen_lowpart (V4SFmode, vec), elt);
20608 return;
20609 }
20610 break;
20611
20612 case V8HImode:
20613 use_vec_extr = TARGET_SSE2;
20614 break;
20615 case V4HImode:
20616 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20617 break;
20618
20619 case V16QImode:
20620 case V8QImode:
20621 /* ??? Could extract the appropriate HImode element and shift. */
20622 default:
20623 break;
20624 }
20625
20626 if (use_vec_extr)
20627 {
20628 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20629 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20630
20631 /* Let the rtl optimizers know about the zero extension performed. */
20632 if (inner_mode == HImode)
20633 {
20634 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20635 target = gen_lowpart (SImode, target);
20636 }
20637
20638 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20639 }
20640 else
20641 {
20642 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20643
20644 emit_move_insn (mem, vec);
20645
20646 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20647 emit_move_insn (target, tmp);
20648 }
20649 }
20650
20651 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20652 pattern to reduce; DEST is the destination; IN is the input vector. */
20653
20654 void
20655 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20656 {
20657 rtx tmp1, tmp2, tmp3;
20658
20659 tmp1 = gen_reg_rtx (V4SFmode);
20660 tmp2 = gen_reg_rtx (V4SFmode);
20661 tmp3 = gen_reg_rtx (V4SFmode);
20662
20663 emit_insn (gen_sse_movhlps (tmp1, in, in));
20664 emit_insn (fn (tmp2, tmp1, in));
20665
20666 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20667 GEN_INT (1), GEN_INT (1),
20668 GEN_INT (1+4), GEN_INT (1+4)));
20669 emit_insn (fn (dest, tmp2, tmp3));
20670 }
20671 \f
20672 /* Target hook for scalar_mode_supported_p. */
20673 static bool
20674 ix86_scalar_mode_supported_p (enum machine_mode mode)
20675 {
20676 if (DECIMAL_FLOAT_MODE_P (mode))
20677 return true;
20678 else
20679 return default_scalar_mode_supported_p (mode);
20680 }
20681
20682 /* Implements target hook vector_mode_supported_p. */
20683 static bool
20684 ix86_vector_mode_supported_p (enum machine_mode mode)
20685 {
20686 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20687 return true;
20688 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20689 return true;
20690 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20691 return true;
20692 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20693 return true;
20694 return false;
20695 }
20696
20697 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20698
20699 We do this in the new i386 backend to maintain source compatibility
20700 with the old cc0-based compiler. */
20701
20702 static tree
20703 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20704 tree inputs ATTRIBUTE_UNUSED,
20705 tree clobbers)
20706 {
20707 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20708 clobbers);
20709 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20710 clobbers);
20711 return clobbers;
20712 }
20713
20714 /* Return true if this goes in small data/bss. */
20715
20716 static bool
20717 ix86_in_large_data_p (tree exp)
20718 {
20719 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20720 return false;
20721
20722 /* Functions are never large data. */
20723 if (TREE_CODE (exp) == FUNCTION_DECL)
20724 return false;
20725
20726 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20727 {
20728 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20729 if (strcmp (section, ".ldata") == 0
20730 || strcmp (section, ".lbss") == 0)
20731 return true;
20732 return false;
20733 }
20734 else
20735 {
20736 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20737
20738 /* If this is an incomplete type with size 0, then we can't put it
20739 in data because it might be too big when completed. */
20740 if (!size || size > ix86_section_threshold)
20741 return true;
20742 }
20743
20744 return false;
20745 }
20746 static void
20747 ix86_encode_section_info (tree decl, rtx rtl, int first)
20748 {
20749 default_encode_section_info (decl, rtl, first);
20750
20751 if (TREE_CODE (decl) == VAR_DECL
20752 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20753 && ix86_in_large_data_p (decl))
20754 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20755 }
20756
20757 /* Worker function for REVERSE_CONDITION. */
20758
20759 enum rtx_code
20760 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20761 {
20762 return (mode != CCFPmode && mode != CCFPUmode
20763 ? reverse_condition (code)
20764 : reverse_condition_maybe_unordered (code));
20765 }
20766
20767 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20768 to OPERANDS[0]. */
20769
20770 const char *
20771 output_387_reg_move (rtx insn, rtx *operands)
20772 {
20773 if (REG_P (operands[1])
20774 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20775 {
20776 if (REGNO (operands[0]) == FIRST_STACK_REG)
20777 return output_387_ffreep (operands, 0);
20778 return "fstp\t%y0";
20779 }
20780 if (STACK_TOP_P (operands[0]))
20781 return "fld%z1\t%y1";
20782 return "fst\t%y0";
20783 }
20784
20785 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20786 FP status register is set. */
20787
20788 void
20789 ix86_emit_fp_unordered_jump (rtx label)
20790 {
20791 rtx reg = gen_reg_rtx (HImode);
20792 rtx temp;
20793
20794 emit_insn (gen_x86_fnstsw_1 (reg));
20795
20796 if (TARGET_USE_SAHF)
20797 {
20798 emit_insn (gen_x86_sahf_1 (reg));
20799
20800 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20801 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20802 }
20803 else
20804 {
20805 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20806
20807 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20808 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20809 }
20810
20811 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20812 gen_rtx_LABEL_REF (VOIDmode, label),
20813 pc_rtx);
20814 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20815 emit_jump_insn (temp);
20816 }
20817
20818 /* Output code to perform a log1p XFmode calculation. */
20819
20820 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20821 {
20822 rtx label1 = gen_label_rtx ();
20823 rtx label2 = gen_label_rtx ();
20824
20825 rtx tmp = gen_reg_rtx (XFmode);
20826 rtx tmp2 = gen_reg_rtx (XFmode);
20827
20828 emit_insn (gen_absxf2 (tmp, op1));
20829 emit_insn (gen_cmpxf (tmp,
20830 CONST_DOUBLE_FROM_REAL_VALUE (
20831 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20832 XFmode)));
20833 emit_jump_insn (gen_bge (label1));
20834
20835 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20836 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20837 emit_jump (label2);
20838
20839 emit_label (label1);
20840 emit_move_insn (tmp, CONST1_RTX (XFmode));
20841 emit_insn (gen_addxf3 (tmp, op1, tmp));
20842 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20843 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20844
20845 emit_label (label2);
20846 }
20847
20848 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20849
20850 static void
20851 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20852 tree decl)
20853 {
20854 /* With Binutils 2.15, the "@unwind" marker must be specified on
20855 every occurrence of the ".eh_frame" section, not just the first
20856 one. */
20857 if (TARGET_64BIT
20858 && strcmp (name, ".eh_frame") == 0)
20859 {
20860 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20861 flags & SECTION_WRITE ? "aw" : "a");
20862 return;
20863 }
20864 default_elf_asm_named_section (name, flags, decl);
20865 }
20866
20867 /* Return the mangling of TYPE if it is an extended fundamental type. */
20868
20869 static const char *
20870 ix86_mangle_fundamental_type (tree type)
20871 {
20872 switch (TYPE_MODE (type))
20873 {
20874 case TFmode:
20875 /* __float128 is "g". */
20876 return "g";
20877 case XFmode:
20878 /* "long double" or __float80 is "e". */
20879 return "e";
20880 default:
20881 return NULL;
20882 }
20883 }
20884
20885 /* For 32-bit code we can save PIC register setup by using
20886 __stack_chk_fail_local hidden function instead of calling
20887 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20888 register, so it is better to call __stack_chk_fail directly. */
20889
20890 static tree
20891 ix86_stack_protect_fail (void)
20892 {
20893 return TARGET_64BIT
20894 ? default_external_stack_protect_fail ()
20895 : default_hidden_stack_protect_fail ();
20896 }
20897
20898 /* Select a format to encode pointers in exception handling data. CODE
20899 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20900 true if the symbol may be affected by dynamic relocations.
20901
20902 ??? All x86 object file formats are capable of representing this.
20903 After all, the relocation needed is the same as for the call insn.
20904 Whether or not a particular assembler allows us to enter such, I
20905 guess we'll have to see. */
20906 int
20907 asm_preferred_eh_data_format (int code, int global)
20908 {
20909 if (flag_pic)
20910 {
20911 int type = DW_EH_PE_sdata8;
20912 if (!TARGET_64BIT
20913 || ix86_cmodel == CM_SMALL_PIC
20914 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20915 type = DW_EH_PE_sdata4;
20916 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20917 }
20918 if (ix86_cmodel == CM_SMALL
20919 || (ix86_cmodel == CM_MEDIUM && code))
20920 return DW_EH_PE_udata4;
20921 return DW_EH_PE_absptr;
20922 }
20923 \f
20924 /* Expand copysign from SIGN to the positive value ABS_VALUE
20925 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20926 the sign-bit. */
20927 static void
20928 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20929 {
20930 enum machine_mode mode = GET_MODE (sign);
20931 rtx sgn = gen_reg_rtx (mode);
20932 if (mask == NULL_RTX)
20933 {
20934 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20935 if (!VECTOR_MODE_P (mode))
20936 {
20937 /* We need to generate a scalar mode mask in this case. */
20938 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20939 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20940 mask = gen_reg_rtx (mode);
20941 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20942 }
20943 }
20944 else
20945 mask = gen_rtx_NOT (mode, mask);
20946 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20947 gen_rtx_AND (mode, mask, sign)));
20948 emit_insn (gen_rtx_SET (VOIDmode, result,
20949 gen_rtx_IOR (mode, abs_value, sgn)));
20950 }
20951
20952 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20953 mask for masking out the sign-bit is stored in *SMASK, if that is
20954 non-null. */
20955 static rtx
20956 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20957 {
20958 enum machine_mode mode = GET_MODE (op0);
20959 rtx xa, mask;
20960
20961 xa = gen_reg_rtx (mode);
20962 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20963 if (!VECTOR_MODE_P (mode))
20964 {
20965 /* We need to generate a scalar mode mask in this case. */
20966 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20967 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20968 mask = gen_reg_rtx (mode);
20969 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20970 }
20971 emit_insn (gen_rtx_SET (VOIDmode, xa,
20972 gen_rtx_AND (mode, op0, mask)));
20973
20974 if (smask)
20975 *smask = mask;
20976
20977 return xa;
20978 }
20979
20980 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20981 swapping the operands if SWAP_OPERANDS is true. The expanded
20982 code is a forward jump to a newly created label in case the
20983 comparison is true. The generated label rtx is returned. */
20984 static rtx
20985 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
20986 bool swap_operands)
20987 {
20988 rtx label, tmp;
20989
20990 if (swap_operands)
20991 {
20992 tmp = op0;
20993 op0 = op1;
20994 op1 = tmp;
20995 }
20996
20997 label = gen_label_rtx ();
20998 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
20999 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21000 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21001 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21002 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21003 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21004 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21005 JUMP_LABEL (tmp) = label;
21006
21007 return label;
21008 }
21009
21010 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21011 using comparison code CODE. Operands are swapped for the comparison if
21012 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21013 static rtx
21014 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21015 bool swap_operands)
21016 {
21017 enum machine_mode mode = GET_MODE (op0);
21018 rtx mask = gen_reg_rtx (mode);
21019
21020 if (swap_operands)
21021 {
21022 rtx tmp = op0;
21023 op0 = op1;
21024 op1 = tmp;
21025 }
21026
21027 if (mode == DFmode)
21028 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21029 gen_rtx_fmt_ee (code, mode, op0, op1)));
21030 else
21031 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21032 gen_rtx_fmt_ee (code, mode, op0, op1)));
21033
21034 return mask;
21035 }
21036
21037 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21038 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21039 static rtx
21040 ix86_gen_TWO52 (enum machine_mode mode)
21041 {
21042 REAL_VALUE_TYPE TWO52r;
21043 rtx TWO52;
21044
21045 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21046 TWO52 = const_double_from_real_value (TWO52r, mode);
21047 TWO52 = force_reg (mode, TWO52);
21048
21049 return TWO52;
21050 }
21051
21052 /* Expand SSE sequence for computing lround from OP1 storing
21053 into OP0. */
21054 void
21055 ix86_expand_lround (rtx op0, rtx op1)
21056 {
21057 /* C code for the stuff we're doing below:
21058 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21059 return (long)tmp;
21060 */
21061 enum machine_mode mode = GET_MODE (op1);
21062 const struct real_format *fmt;
21063 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21064 rtx adj;
21065
21066 /* load nextafter (0.5, 0.0) */
21067 fmt = REAL_MODE_FORMAT (mode);
21068 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21069 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21070
21071 /* adj = copysign (0.5, op1) */
21072 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21073 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21074
21075 /* adj = op1 + adj */
21076 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21077
21078 /* op0 = (imode)adj */
21079 expand_fix (op0, adj, 0);
21080 }
21081
21082 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21083 into OPERAND0. */
21084 void
21085 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21086 {
21087 /* C code for the stuff we're doing below (for do_floor):
21088 xi = (long)op1;
21089 xi -= (double)xi > op1 ? 1 : 0;
21090 return xi;
21091 */
21092 enum machine_mode fmode = GET_MODE (op1);
21093 enum machine_mode imode = GET_MODE (op0);
21094 rtx ireg, freg, label, tmp;
21095
21096 /* reg = (long)op1 */
21097 ireg = gen_reg_rtx (imode);
21098 expand_fix (ireg, op1, 0);
21099
21100 /* freg = (double)reg */
21101 freg = gen_reg_rtx (fmode);
21102 expand_float (freg, ireg, 0);
21103
21104 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21105 label = ix86_expand_sse_compare_and_jump (UNLE,
21106 freg, op1, !do_floor);
21107 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21108 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21109 emit_move_insn (ireg, tmp);
21110
21111 emit_label (label);
21112 LABEL_NUSES (label) = 1;
21113
21114 emit_move_insn (op0, ireg);
21115 }
21116
21117 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21118 result in OPERAND0. */
21119 void
21120 ix86_expand_rint (rtx operand0, rtx operand1)
21121 {
21122 /* C code for the stuff we're doing below:
21123 xa = fabs (operand1);
21124 if (!isless (xa, 2**52))
21125 return operand1;
21126 xa = xa + 2**52 - 2**52;
21127 return copysign (xa, operand1);
21128 */
21129 enum machine_mode mode = GET_MODE (operand0);
21130 rtx res, xa, label, TWO52, mask;
21131
21132 res = gen_reg_rtx (mode);
21133 emit_move_insn (res, operand1);
21134
21135 /* xa = abs (operand1) */
21136 xa = ix86_expand_sse_fabs (res, &mask);
21137
21138 /* if (!isless (xa, TWO52)) goto label; */
21139 TWO52 = ix86_gen_TWO52 (mode);
21140 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21141
21142 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21143 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21144
21145 ix86_sse_copysign_to_positive (res, xa, res, mask);
21146
21147 emit_label (label);
21148 LABEL_NUSES (label) = 1;
21149
21150 emit_move_insn (operand0, res);
21151 }
21152
21153 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21154 into OPERAND0. */
21155 void
21156 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21157 {
21158 /* C code for the stuff we expand below.
21159 double xa = fabs (x), x2;
21160 if (!isless (xa, TWO52))
21161 return x;
21162 xa = xa + TWO52 - TWO52;
21163 x2 = copysign (xa, x);
21164 Compensate. Floor:
21165 if (x2 > x)
21166 x2 -= 1;
21167 Compensate. Ceil:
21168 if (x2 < x)
21169 x2 -= -1;
21170 return x2;
21171 */
21172 enum machine_mode mode = GET_MODE (operand0);
21173 rtx xa, TWO52, tmp, label, one, res, mask;
21174
21175 TWO52 = ix86_gen_TWO52 (mode);
21176
21177 /* Temporary for holding the result, initialized to the input
21178 operand to ease control flow. */
21179 res = gen_reg_rtx (mode);
21180 emit_move_insn (res, operand1);
21181
21182 /* xa = abs (operand1) */
21183 xa = ix86_expand_sse_fabs (res, &mask);
21184
21185 /* if (!isless (xa, TWO52)) goto label; */
21186 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21187
21188 /* xa = xa + TWO52 - TWO52; */
21189 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21190 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21191
21192 /* xa = copysign (xa, operand1) */
21193 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21194
21195 /* generate 1.0 or -1.0 */
21196 one = force_reg (mode,
21197 const_double_from_real_value (do_floor
21198 ? dconst1 : dconstm1, mode));
21199
21200 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21201 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21202 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21203 gen_rtx_AND (mode, one, tmp)));
21204 /* We always need to subtract here to preserve signed zero. */
21205 tmp = expand_simple_binop (mode, MINUS,
21206 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21207 emit_move_insn (res, tmp);
21208
21209 emit_label (label);
21210 LABEL_NUSES (label) = 1;
21211
21212 emit_move_insn (operand0, res);
21213 }
21214
21215 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21216 into OPERAND0. */
21217 void
21218 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21219 {
21220 /* C code for the stuff we expand below.
21221 double xa = fabs (x), x2;
21222 if (!isless (xa, TWO52))
21223 return x;
21224 x2 = (double)(long)x;
21225 Compensate. Floor:
21226 if (x2 > x)
21227 x2 -= 1;
21228 Compensate. Ceil:
21229 if (x2 < x)
21230 x2 += 1;
21231 if (HONOR_SIGNED_ZEROS (mode))
21232 return copysign (x2, x);
21233 return x2;
21234 */
21235 enum machine_mode mode = GET_MODE (operand0);
21236 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21237
21238 TWO52 = ix86_gen_TWO52 (mode);
21239
21240 /* Temporary for holding the result, initialized to the input
21241 operand to ease control flow. */
21242 res = gen_reg_rtx (mode);
21243 emit_move_insn (res, operand1);
21244
21245 /* xa = abs (operand1) */
21246 xa = ix86_expand_sse_fabs (res, &mask);
21247
21248 /* if (!isless (xa, TWO52)) goto label; */
21249 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21250
21251 /* xa = (double)(long)x */
21252 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21253 expand_fix (xi, res, 0);
21254 expand_float (xa, xi, 0);
21255
21256 /* generate 1.0 */
21257 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21258
21259 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21260 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21261 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21262 gen_rtx_AND (mode, one, tmp)));
21263 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21264 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21265 emit_move_insn (res, tmp);
21266
21267 if (HONOR_SIGNED_ZEROS (mode))
21268 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21269
21270 emit_label (label);
21271 LABEL_NUSES (label) = 1;
21272
21273 emit_move_insn (operand0, res);
21274 }
21275
21276 /* Expand SSE sequence for computing round from OPERAND1 storing
21277 into OPERAND0. Sequence that works without relying on DImode truncation
21278 via cvttsd2siq that is only available on 64bit targets. */
21279 void
21280 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21281 {
21282 /* C code for the stuff we expand below.
21283 double xa = fabs (x), xa2, x2;
21284 if (!isless (xa, TWO52))
21285 return x;
21286 Using the absolute value and copying back sign makes
21287 -0.0 -> -0.0 correct.
21288 xa2 = xa + TWO52 - TWO52;
21289 Compensate.
21290 dxa = xa2 - xa;
21291 if (dxa <= -0.5)
21292 xa2 += 1;
21293 else if (dxa > 0.5)
21294 xa2 -= 1;
21295 x2 = copysign (xa2, x);
21296 return x2;
21297 */
21298 enum machine_mode mode = GET_MODE (operand0);
21299 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21300
21301 TWO52 = ix86_gen_TWO52 (mode);
21302
21303 /* Temporary for holding the result, initialized to the input
21304 operand to ease control flow. */
21305 res = gen_reg_rtx (mode);
21306 emit_move_insn (res, operand1);
21307
21308 /* xa = abs (operand1) */
21309 xa = ix86_expand_sse_fabs (res, &mask);
21310
21311 /* if (!isless (xa, TWO52)) goto label; */
21312 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21313
21314 /* xa2 = xa + TWO52 - TWO52; */
21315 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21316 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21317
21318 /* dxa = xa2 - xa; */
21319 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21320
21321 /* generate 0.5, 1.0 and -0.5 */
21322 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21323 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21324 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21325 0, OPTAB_DIRECT);
21326
21327 /* Compensate. */
21328 tmp = gen_reg_rtx (mode);
21329 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21330 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21331 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21332 gen_rtx_AND (mode, one, tmp)));
21333 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21334 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21335 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21336 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21337 gen_rtx_AND (mode, one, tmp)));
21338 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21339
21340 /* res = copysign (xa2, operand1) */
21341 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21342
21343 emit_label (label);
21344 LABEL_NUSES (label) = 1;
21345
21346 emit_move_insn (operand0, res);
21347 }
21348
21349 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21350 into OPERAND0. */
21351 void
21352 ix86_expand_trunc (rtx operand0, rtx operand1)
21353 {
21354 /* C code for SSE variant we expand below.
21355 double xa = fabs (x), x2;
21356 if (!isless (xa, TWO52))
21357 return x;
21358 x2 = (double)(long)x;
21359 if (HONOR_SIGNED_ZEROS (mode))
21360 return copysign (x2, x);
21361 return x2;
21362 */
21363 enum machine_mode mode = GET_MODE (operand0);
21364 rtx xa, xi, TWO52, label, res, mask;
21365
21366 TWO52 = ix86_gen_TWO52 (mode);
21367
21368 /* Temporary for holding the result, initialized to the input
21369 operand to ease control flow. */
21370 res = gen_reg_rtx (mode);
21371 emit_move_insn (res, operand1);
21372
21373 /* xa = abs (operand1) */
21374 xa = ix86_expand_sse_fabs (res, &mask);
21375
21376 /* if (!isless (xa, TWO52)) goto label; */
21377 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21378
21379 /* x = (double)(long)x */
21380 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21381 expand_fix (xi, res, 0);
21382 expand_float (res, xi, 0);
21383
21384 if (HONOR_SIGNED_ZEROS (mode))
21385 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21386
21387 emit_label (label);
21388 LABEL_NUSES (label) = 1;
21389
21390 emit_move_insn (operand0, res);
21391 }
21392
21393 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21394 into OPERAND0. */
21395 void
21396 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21397 {
21398 enum machine_mode mode = GET_MODE (operand0);
21399 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21400
21401 /* C code for SSE variant we expand below.
21402 double xa = fabs (x), x2;
21403 if (!isless (xa, TWO52))
21404 return x;
21405 xa2 = xa + TWO52 - TWO52;
21406 Compensate:
21407 if (xa2 > xa)
21408 xa2 -= 1.0;
21409 x2 = copysign (xa2, x);
21410 return x2;
21411 */
21412
21413 TWO52 = ix86_gen_TWO52 (mode);
21414
21415 /* Temporary for holding the result, initialized to the input
21416 operand to ease control flow. */
21417 res = gen_reg_rtx (mode);
21418 emit_move_insn (res, operand1);
21419
21420 /* xa = abs (operand1) */
21421 xa = ix86_expand_sse_fabs (res, &smask);
21422
21423 /* if (!isless (xa, TWO52)) goto label; */
21424 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21425
21426 /* res = xa + TWO52 - TWO52; */
21427 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21428 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21429 emit_move_insn (res, tmp);
21430
21431 /* generate 1.0 */
21432 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21433
21434 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21435 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21436 emit_insn (gen_rtx_SET (VOIDmode, mask,
21437 gen_rtx_AND (mode, mask, one)));
21438 tmp = expand_simple_binop (mode, MINUS,
21439 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21440 emit_move_insn (res, tmp);
21441
21442 /* res = copysign (res, operand1) */
21443 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21444
21445 emit_label (label);
21446 LABEL_NUSES (label) = 1;
21447
21448 emit_move_insn (operand0, res);
21449 }
21450
21451 /* Expand SSE sequence for computing round from OPERAND1 storing
21452 into OPERAND0. */
21453 void
21454 ix86_expand_round (rtx operand0, rtx operand1)
21455 {
21456 /* C code for the stuff we're doing below:
21457 double xa = fabs (x);
21458 if (!isless (xa, TWO52))
21459 return x;
21460 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21461 return copysign (xa, x);
21462 */
21463 enum machine_mode mode = GET_MODE (operand0);
21464 rtx res, TWO52, xa, label, xi, half, mask;
21465 const struct real_format *fmt;
21466 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21467
21468 /* Temporary for holding the result, initialized to the input
21469 operand to ease control flow. */
21470 res = gen_reg_rtx (mode);
21471 emit_move_insn (res, operand1);
21472
21473 TWO52 = ix86_gen_TWO52 (mode);
21474 xa = ix86_expand_sse_fabs (res, &mask);
21475 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21476
21477 /* load nextafter (0.5, 0.0) */
21478 fmt = REAL_MODE_FORMAT (mode);
21479 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21480 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21481
21482 /* xa = xa + 0.5 */
21483 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21484 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21485
21486 /* xa = (double)(int64_t)xa */
21487 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21488 expand_fix (xi, xa, 0);
21489 expand_float (xa, xi, 0);
21490
21491 /* res = copysign (xa, operand1) */
21492 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21493
21494 emit_label (label);
21495 LABEL_NUSES (label) = 1;
21496
21497 emit_move_insn (operand0, res);
21498 }
21499
21500 #include "gt-i386.h"