i386.h (x86_use_xchgb): New.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1001
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1004
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
1009 | m_GENERIC64;
1010 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1011 | m_NOCONA | m_CORE2 | m_GENERIC;
1012 const int x86_zero_extend_with_and = m_486 | m_PENT;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1015 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
1016 const int x86_double_with_add = ~m_386;
1017 const int x86_use_bit_test = m_386;
1018 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
1019 | m_K6 | m_CORE2 | m_GENERIC;
1020 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1021 | m_NOCONA;
1022 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
1023 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
1024 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints = 0;
1030 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall = m_PPRO;
1041 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
1042 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
1043 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
1044 | m_CORE2 | m_GENERIC);
1045 const int x86_use_mov0 = m_K6;
1046 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
1047 /* Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1048 const int x86_use_xchgb = m_PENT4;
1049 const int x86_read_modify_write = ~m_PENT;
1050 const int x86_read_modify = ~(m_PENT | m_PPRO);
1051 const int x86_split_long_moves = m_PPRO;
1052 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
1053 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1054 /* m_PENT4 ? */
1055 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
1056 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
1057 const int x86_qimode_math = ~(0);
1058 const int x86_promote_qi_regs = 0;
1059 /* On PPro this flag is meant to avoid partial register stalls. Just like
1060 the x86_partial_reg_stall this option might be considered for Generic32
1061 if our scheme for avoiding partial stalls was more effective. */
1062 const int x86_himode_math = ~(m_PPRO);
1063 const int x86_promote_hi_regs = m_PPRO;
1064 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1065 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1066 | m_CORE2 | m_GENERIC;
1067 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1068 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1069 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
1070 | m_CORE2 | m_GENERIC;
1071 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1072 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1073 /* Enable if integer moves are preferred for DFmode copies */
1074 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1075 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
1076 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1077 | m_CORE2 | m_GENERIC;
1078 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1079 | m_CORE2 | m_GENERIC;
1080 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1081 for outgoing arguments will be computed and placed into the variable
1082 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1083 for each call; instead, the function prologue should increase the stack frame
1084 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1085 not proper. */
1086 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
1087 | m_NOCONA | m_PPRO | m_CORE2
1088 | m_GENERIC;
1089 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1090 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1091 const int x86_shift1 = ~m_486;
1092 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
1093 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1094 | m_NOCONA | m_CORE2 | m_GENERIC;
1095 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1096 that thread 128bit SSE registers as single units versus K8 based chips that
1097 divide SSE registers to two 64bit halves.
1098 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1099 to allow register renaming on 128bit SSE units, but usually results in one
1100 extra microop on 64bit SSE units. Experimental results shows that disabling
1101 this option on P4 brings over 20% SPECfp regression, while enabling it on
1102 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1103 of moves. */
1104 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1105 | m_GENERIC | m_AMDFAM10;
1106 /* Set for machines where the type and dependencies are resolved on SSE
1107 register parts instead of whole registers, so we may maintain just
1108 lower part of scalar values in proper format leaving the upper part
1109 undefined. */
1110 const int x86_sse_split_regs = m_ATHLON_K8;
1111 /* Code generation for scalar reg-reg moves of single and double precision data:
1112 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1113 movaps reg, reg
1114 else
1115 movss reg, reg
1116 if (x86_sse_partial_reg_dependency == true)
1117 movapd reg, reg
1118 else
1119 movsd reg, reg
1120
1121 Code generation for scalar loads of double precision data:
1122 if (x86_sse_split_regs == true)
1123 movlpd mem, reg (gas syntax)
1124 else
1125 movsd mem, reg
1126
1127 Code generation for unaligned packed loads of single precision data
1128 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1129 if (x86_sse_unaligned_move_optimal)
1130 movups mem, reg
1131
1132 if (x86_sse_partial_reg_dependency == true)
1133 {
1134 xorps reg, reg
1135 movlps mem, reg
1136 movhps mem+8, reg
1137 }
1138 else
1139 {
1140 movlps mem, reg
1141 movhps mem+8, reg
1142 }
1143
1144 Code generation for unaligned packed loads of double precision data
1145 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1146 if (x86_sse_unaligned_move_optimal)
1147 movupd mem, reg
1148
1149 if (x86_sse_split_regs == true)
1150 {
1151 movlpd mem, reg
1152 movhpd mem+8, reg
1153 }
1154 else
1155 {
1156 movsd mem, reg
1157 movhpd mem+8, reg
1158 }
1159 */
1160 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1161 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1162 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1163 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1164 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1165
1166 const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
1167
1168 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1169 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1170 /* Some CPU cores are not able to predict more than 4 branch instructions in
1171 the 16 byte window. */
1172 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1173 | m_NOCONA | m_CORE2 | m_GENERIC;
1174 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1175 | m_CORE2 | m_GENERIC;
1176 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1177 /* Compare and exchange was added for 80486. */
1178 const int x86_cmpxchg = ~m_386;
1179 /* Compare and exchange 8 bytes was added for pentium. */
1180 const int x86_cmpxchg8b = ~(m_386 | m_486);
1181 /* Exchange and add was added for 80486. */
1182 const int x86_xadd = ~m_386;
1183 /* Byteswap was added for 80486. */
1184 const int x86_bswap = ~m_386;
1185 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1186
1187 static enum stringop_alg stringop_alg = no_stringop;
1188
1189 /* In case the average insn count for single function invocation is
1190 lower than this constant, emit fast (but longer) prologue and
1191 epilogue code. */
1192 #define FAST_PROLOGUE_INSN_COUNT 20
1193
1194 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1195 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1196 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1197 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1198
1199 /* Array of the smallest class containing reg number REGNO, indexed by
1200 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1201
1202 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1203 {
1204 /* ax, dx, cx, bx */
1205 AREG, DREG, CREG, BREG,
1206 /* si, di, bp, sp */
1207 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1208 /* FP registers */
1209 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1210 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1211 /* arg pointer */
1212 NON_Q_REGS,
1213 /* flags, fpsr, fpcr, frame */
1214 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1215 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1216 SSE_REGS, SSE_REGS,
1217 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1218 MMX_REGS, MMX_REGS,
1219 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1220 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1221 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1222 SSE_REGS, SSE_REGS,
1223 };
1224
1225 /* The "default" register map used in 32bit mode. */
1226
1227 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1228 {
1229 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1230 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1231 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1232 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1233 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1234 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1235 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1236 };
1237
1238 static int const x86_64_int_parameter_registers[6] =
1239 {
1240 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1241 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1242 };
1243
1244 static int const x86_64_int_return_registers[4] =
1245 {
1246 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1247 };
1248
1249 /* The "default" register map used in 64bit mode. */
1250 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1251 {
1252 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1253 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1255 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1256 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1257 8,9,10,11,12,13,14,15, /* extended integer registers */
1258 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1259 };
1260
1261 /* Define the register numbers to be used in Dwarf debugging information.
1262 The SVR4 reference port C compiler uses the following register numbers
1263 in its Dwarf output code:
1264 0 for %eax (gcc regno = 0)
1265 1 for %ecx (gcc regno = 2)
1266 2 for %edx (gcc regno = 1)
1267 3 for %ebx (gcc regno = 3)
1268 4 for %esp (gcc regno = 7)
1269 5 for %ebp (gcc regno = 6)
1270 6 for %esi (gcc regno = 4)
1271 7 for %edi (gcc regno = 5)
1272 The following three DWARF register numbers are never generated by
1273 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1274 believes these numbers have these meanings.
1275 8 for %eip (no gcc equivalent)
1276 9 for %eflags (gcc regno = 17)
1277 10 for %trapno (no gcc equivalent)
1278 It is not at all clear how we should number the FP stack registers
1279 for the x86 architecture. If the version of SDB on x86/svr4 were
1280 a bit less brain dead with respect to floating-point then we would
1281 have a precedent to follow with respect to DWARF register numbers
1282 for x86 FP registers, but the SDB on x86/svr4 is so completely
1283 broken with respect to FP registers that it is hardly worth thinking
1284 of it as something to strive for compatibility with.
1285 The version of x86/svr4 SDB I have at the moment does (partially)
1286 seem to believe that DWARF register number 11 is associated with
1287 the x86 register %st(0), but that's about all. Higher DWARF
1288 register numbers don't seem to be associated with anything in
1289 particular, and even for DWARF regno 11, SDB only seems to under-
1290 stand that it should say that a variable lives in %st(0) (when
1291 asked via an `=' command) if we said it was in DWARF regno 11,
1292 but SDB still prints garbage when asked for the value of the
1293 variable in question (via a `/' command).
1294 (Also note that the labels SDB prints for various FP stack regs
1295 when doing an `x' command are all wrong.)
1296 Note that these problems generally don't affect the native SVR4
1297 C compiler because it doesn't allow the use of -O with -g and
1298 because when it is *not* optimizing, it allocates a memory
1299 location for each floating-point variable, and the memory
1300 location is what gets described in the DWARF AT_location
1301 attribute for the variable in question.
1302 Regardless of the severe mental illness of the x86/svr4 SDB, we
1303 do something sensible here and we use the following DWARF
1304 register numbers. Note that these are all stack-top-relative
1305 numbers.
1306 11 for %st(0) (gcc regno = 8)
1307 12 for %st(1) (gcc regno = 9)
1308 13 for %st(2) (gcc regno = 10)
1309 14 for %st(3) (gcc regno = 11)
1310 15 for %st(4) (gcc regno = 12)
1311 16 for %st(5) (gcc regno = 13)
1312 17 for %st(6) (gcc regno = 14)
1313 18 for %st(7) (gcc regno = 15)
1314 */
1315 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1316 {
1317 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1318 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1319 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1320 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1321 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1322 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1323 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1324 };
1325
1326 /* Test and compare insns in i386.md store the information needed to
1327 generate branch and scc insns here. */
1328
1329 rtx ix86_compare_op0 = NULL_RTX;
1330 rtx ix86_compare_op1 = NULL_RTX;
1331 rtx ix86_compare_emitted = NULL_RTX;
1332
1333 /* Size of the register save area. */
1334 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1335
1336 /* Define the structure for the machine field in struct function. */
1337
1338 struct stack_local_entry GTY(())
1339 {
1340 unsigned short mode;
1341 unsigned short n;
1342 rtx rtl;
1343 struct stack_local_entry *next;
1344 };
1345
1346 /* Structure describing stack frame layout.
1347 Stack grows downward:
1348
1349 [arguments]
1350 <- ARG_POINTER
1351 saved pc
1352
1353 saved frame pointer if frame_pointer_needed
1354 <- HARD_FRAME_POINTER
1355 [saved regs]
1356
1357 [padding1] \
1358 )
1359 [va_arg registers] (
1360 > to_allocate <- FRAME_POINTER
1361 [frame] (
1362 )
1363 [padding2] /
1364 */
1365 struct ix86_frame
1366 {
1367 int nregs;
1368 int padding1;
1369 int va_arg_size;
1370 HOST_WIDE_INT frame;
1371 int padding2;
1372 int outgoing_arguments_size;
1373 int red_zone_size;
1374
1375 HOST_WIDE_INT to_allocate;
1376 /* The offsets relative to ARG_POINTER. */
1377 HOST_WIDE_INT frame_pointer_offset;
1378 HOST_WIDE_INT hard_frame_pointer_offset;
1379 HOST_WIDE_INT stack_pointer_offset;
1380
1381 /* When save_regs_using_mov is set, emit prologue using
1382 move instead of push instructions. */
1383 bool save_regs_using_mov;
1384 };
1385
1386 /* Code model option. */
1387 enum cmodel ix86_cmodel;
1388 /* Asm dialect. */
1389 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1390 /* TLS dialects. */
1391 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1392
1393 /* Which unit we are generating floating point math for. */
1394 enum fpmath_unit ix86_fpmath;
1395
1396 /* Which cpu are we scheduling for. */
1397 enum processor_type ix86_tune;
1398 /* Which instruction set architecture to use. */
1399 enum processor_type ix86_arch;
1400
1401 /* true if sse prefetch instruction is not NOOP. */
1402 int x86_prefetch_sse;
1403
1404 /* true if cmpxchg16b is supported. */
1405 int x86_cmpxchg16b;
1406
1407 /* ix86_regparm_string as a number */
1408 static int ix86_regparm;
1409
1410 /* -mstackrealign option */
1411 extern int ix86_force_align_arg_pointer;
1412 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1413
1414 /* Preferred alignment for stack boundary in bits. */
1415 unsigned int ix86_preferred_stack_boundary;
1416
1417 /* Values 1-5: see jump.c */
1418 int ix86_branch_cost;
1419
1420 /* Variables which are this size or smaller are put in the data/bss
1421 or ldata/lbss sections. */
1422
1423 int ix86_section_threshold = 65536;
1424
1425 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1426 char internal_label_prefix[16];
1427 int internal_label_prefix_len;
1428 \f
1429 static bool ix86_handle_option (size_t, const char *, int);
1430 static void output_pic_addr_const (FILE *, rtx, int);
1431 static void put_condition_code (enum rtx_code, enum machine_mode,
1432 int, int, FILE *);
1433 static const char *get_some_local_dynamic_name (void);
1434 static int get_some_local_dynamic_name_1 (rtx *, void *);
1435 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1436 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1437 rtx *);
1438 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1439 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1440 enum machine_mode);
1441 static rtx get_thread_pointer (int);
1442 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1443 static void get_pc_thunk_name (char [32], unsigned int);
1444 static rtx gen_push (rtx);
1445 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1446 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1447 static struct machine_function * ix86_init_machine_status (void);
1448 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1449 static int ix86_nsaved_regs (void);
1450 static void ix86_emit_save_regs (void);
1451 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1452 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1453 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1454 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1455 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1456 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1457 static int ix86_issue_rate (void);
1458 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1459 static int ia32_multipass_dfa_lookahead (void);
1460 static void ix86_init_mmx_sse_builtins (void);
1461 static rtx x86_this_parameter (tree);
1462 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1463 HOST_WIDE_INT, tree);
1464 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1465 static void x86_file_start (void);
1466 static void ix86_reorg (void);
1467 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1468 static tree ix86_build_builtin_va_list (void);
1469 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1470 tree, int *, int);
1471 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1472 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1473 static bool ix86_vector_mode_supported_p (enum machine_mode);
1474
1475 static int ix86_address_cost (rtx);
1476 static bool ix86_cannot_force_const_mem (rtx);
1477 static rtx ix86_delegitimize_address (rtx);
1478
1479 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1480
1481 struct builtin_description;
1482 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1483 tree, rtx);
1484 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1485 tree, rtx);
1486 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1487 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1488 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1489 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1490 static rtx safe_vector_operand (rtx, enum machine_mode);
1491 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1492 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1493 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1494 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1495 static int ix86_fp_comparison_cost (enum rtx_code code);
1496 static unsigned int ix86_select_alt_pic_regnum (void);
1497 static int ix86_save_reg (unsigned int, int);
1498 static void ix86_compute_frame_layout (struct ix86_frame *);
1499 static int ix86_comp_type_attributes (tree, tree);
1500 static int ix86_function_regparm (tree, tree);
1501 const struct attribute_spec ix86_attribute_table[];
1502 static bool ix86_function_ok_for_sibcall (tree, tree);
1503 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1504 static int ix86_value_regno (enum machine_mode, tree, tree);
1505 static bool contains_128bit_aligned_vector_p (tree);
1506 static rtx ix86_struct_value_rtx (tree, int);
1507 static bool ix86_ms_bitfield_layout_p (tree);
1508 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1509 static int extended_reg_mentioned_1 (rtx *, void *);
1510 static bool ix86_rtx_costs (rtx, int, int, int *);
1511 static int min_insn_size (rtx);
1512 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1513 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1514 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1515 tree, bool);
1516 static void ix86_init_builtins (void);
1517 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1518 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1519 static tree ix86_builtin_conversion (enum tree_code, tree);
1520 static const char *ix86_mangle_fundamental_type (tree);
1521 static tree ix86_stack_protect_fail (void);
1522 static rtx ix86_internal_arg_pointer (void);
1523 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1524 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1525 rtx, rtx, int);
1526
1527 /* This function is only used on Solaris. */
1528 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1529 ATTRIBUTE_UNUSED;
1530
1531 /* Register class used for passing given 64bit part of the argument.
1532 These represent classes as documented by the PS ABI, with the exception
1533 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1534 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1535
1536 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1537 whenever possible (upper half does contain padding).
1538 */
1539 enum x86_64_reg_class
1540 {
1541 X86_64_NO_CLASS,
1542 X86_64_INTEGER_CLASS,
1543 X86_64_INTEGERSI_CLASS,
1544 X86_64_SSE_CLASS,
1545 X86_64_SSESF_CLASS,
1546 X86_64_SSEDF_CLASS,
1547 X86_64_SSEUP_CLASS,
1548 X86_64_X87_CLASS,
1549 X86_64_X87UP_CLASS,
1550 X86_64_COMPLEX_X87_CLASS,
1551 X86_64_MEMORY_CLASS
1552 };
1553 static const char * const x86_64_reg_class_name[] = {
1554 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1555 "sseup", "x87", "x87up", "cplx87", "no"
1556 };
1557
1558 #define MAX_CLASSES 4
1559
1560 /* Table of constants used by fldpi, fldln2, etc.... */
1561 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1562 static bool ext_80387_constants_init = 0;
1563 static void init_ext_80387_constants (void);
1564 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1565 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1566 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1567 static section *x86_64_elf_select_section (tree decl, int reloc,
1568 unsigned HOST_WIDE_INT align)
1569 ATTRIBUTE_UNUSED;
1570 \f
1571 /* Initialize the GCC target structure. */
1572 #undef TARGET_ATTRIBUTE_TABLE
1573 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1574 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1575 # undef TARGET_MERGE_DECL_ATTRIBUTES
1576 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1577 #endif
1578
1579 #undef TARGET_COMP_TYPE_ATTRIBUTES
1580 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1581
1582 #undef TARGET_INIT_BUILTINS
1583 #define TARGET_INIT_BUILTINS ix86_init_builtins
1584 #undef TARGET_EXPAND_BUILTIN
1585 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1586
1587 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1588 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1589 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1590 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1591
1592 #undef TARGET_ASM_FUNCTION_EPILOGUE
1593 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1594
1595 #undef TARGET_ENCODE_SECTION_INFO
1596 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1597 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1598 #else
1599 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1600 #endif
1601
1602 #undef TARGET_ASM_OPEN_PAREN
1603 #define TARGET_ASM_OPEN_PAREN ""
1604 #undef TARGET_ASM_CLOSE_PAREN
1605 #define TARGET_ASM_CLOSE_PAREN ""
1606
1607 #undef TARGET_ASM_ALIGNED_HI_OP
1608 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1609 #undef TARGET_ASM_ALIGNED_SI_OP
1610 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1611 #ifdef ASM_QUAD
1612 #undef TARGET_ASM_ALIGNED_DI_OP
1613 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1614 #endif
1615
1616 #undef TARGET_ASM_UNALIGNED_HI_OP
1617 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1618 #undef TARGET_ASM_UNALIGNED_SI_OP
1619 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1620 #undef TARGET_ASM_UNALIGNED_DI_OP
1621 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1622
1623 #undef TARGET_SCHED_ADJUST_COST
1624 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1625 #undef TARGET_SCHED_ISSUE_RATE
1626 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1627 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1628 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1629 ia32_multipass_dfa_lookahead
1630
1631 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1632 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1633
1634 #ifdef HAVE_AS_TLS
1635 #undef TARGET_HAVE_TLS
1636 #define TARGET_HAVE_TLS true
1637 #endif
1638 #undef TARGET_CANNOT_FORCE_CONST_MEM
1639 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1640 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1641 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1642
1643 #undef TARGET_DELEGITIMIZE_ADDRESS
1644 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1645
1646 #undef TARGET_MS_BITFIELD_LAYOUT_P
1647 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1648
1649 #if TARGET_MACHO
1650 #undef TARGET_BINDS_LOCAL_P
1651 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1652 #endif
1653
1654 #undef TARGET_ASM_OUTPUT_MI_THUNK
1655 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1656 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1657 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1658
1659 #undef TARGET_ASM_FILE_START
1660 #define TARGET_ASM_FILE_START x86_file_start
1661
1662 #undef TARGET_DEFAULT_TARGET_FLAGS
1663 #define TARGET_DEFAULT_TARGET_FLAGS \
1664 (TARGET_DEFAULT \
1665 | TARGET_64BIT_DEFAULT \
1666 | TARGET_SUBTARGET_DEFAULT \
1667 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1668
1669 #undef TARGET_HANDLE_OPTION
1670 #define TARGET_HANDLE_OPTION ix86_handle_option
1671
1672 #undef TARGET_RTX_COSTS
1673 #define TARGET_RTX_COSTS ix86_rtx_costs
1674 #undef TARGET_ADDRESS_COST
1675 #define TARGET_ADDRESS_COST ix86_address_cost
1676
1677 #undef TARGET_FIXED_CONDITION_CODE_REGS
1678 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1679 #undef TARGET_CC_MODES_COMPATIBLE
1680 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1681
1682 #undef TARGET_MACHINE_DEPENDENT_REORG
1683 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1684
1685 #undef TARGET_BUILD_BUILTIN_VA_LIST
1686 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1687
1688 #undef TARGET_MD_ASM_CLOBBERS
1689 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1690
1691 #undef TARGET_PROMOTE_PROTOTYPES
1692 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1693 #undef TARGET_STRUCT_VALUE_RTX
1694 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1695 #undef TARGET_SETUP_INCOMING_VARARGS
1696 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1697 #undef TARGET_MUST_PASS_IN_STACK
1698 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1699 #undef TARGET_PASS_BY_REFERENCE
1700 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1701 #undef TARGET_INTERNAL_ARG_POINTER
1702 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1703 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1704 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1705
1706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1707 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1708
1709 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1710 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1711
1712 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1713 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1714
1715 #ifdef HAVE_AS_TLS
1716 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1717 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1718 #endif
1719
1720 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1721 #undef TARGET_INSERT_ATTRIBUTES
1722 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1723 #endif
1724
1725 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1726 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1727
1728 #undef TARGET_STACK_PROTECT_FAIL
1729 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1730
1731 #undef TARGET_FUNCTION_VALUE
1732 #define TARGET_FUNCTION_VALUE ix86_function_value
1733
1734 struct gcc_target targetm = TARGET_INITIALIZER;
1735
1736 \f
1737 /* The svr4 ABI for the i386 says that records and unions are returned
1738 in memory. */
1739 #ifndef DEFAULT_PCC_STRUCT_RETURN
1740 #define DEFAULT_PCC_STRUCT_RETURN 1
1741 #endif
1742
1743 /* Implement TARGET_HANDLE_OPTION. */
1744
1745 static bool
1746 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1747 {
1748 switch (code)
1749 {
1750 case OPT_m3dnow:
1751 if (!value)
1752 {
1753 target_flags &= ~MASK_3DNOW_A;
1754 target_flags_explicit |= MASK_3DNOW_A;
1755 }
1756 return true;
1757
1758 case OPT_mmmx:
1759 if (!value)
1760 {
1761 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1762 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1763 }
1764 return true;
1765
1766 case OPT_msse:
1767 if (!value)
1768 {
1769 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1770 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1771 }
1772 return true;
1773
1774 case OPT_msse2:
1775 if (!value)
1776 {
1777 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1778 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1779 }
1780 return true;
1781
1782 case OPT_msse3:
1783 if (!value)
1784 {
1785 target_flags &= ~MASK_SSE4A;
1786 target_flags_explicit |= MASK_SSE4A;
1787 }
1788 return true;
1789
1790 default:
1791 return true;
1792 }
1793 }
1794
1795 /* Sometimes certain combinations of command options do not make
1796 sense on a particular target machine. You can define a macro
1797 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1798 defined, is executed once just after all the command options have
1799 been parsed.
1800
1801 Don't use this macro to turn on various extra optimizations for
1802 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1803
1804 void
1805 override_options (void)
1806 {
1807 int i;
1808 int ix86_tune_defaulted = 0;
1809
1810 /* Comes from final.c -- no real reason to change it. */
1811 #define MAX_CODE_ALIGN 16
1812
1813 static struct ptt
1814 {
1815 const struct processor_costs *cost; /* Processor costs */
1816 const int target_enable; /* Target flags to enable. */
1817 const int target_disable; /* Target flags to disable. */
1818 const int align_loop; /* Default alignments. */
1819 const int align_loop_max_skip;
1820 const int align_jump;
1821 const int align_jump_max_skip;
1822 const int align_func;
1823 }
1824 const processor_target_table[PROCESSOR_max] =
1825 {
1826 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1827 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1828 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1829 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1830 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1831 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1832 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1833 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1834 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1835 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1836 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1837 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1838 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1839 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1840 };
1841
1842 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1843 static struct pta
1844 {
1845 const char *const name; /* processor name or nickname. */
1846 const enum processor_type processor;
1847 const enum pta_flags
1848 {
1849 PTA_SSE = 1,
1850 PTA_SSE2 = 2,
1851 PTA_SSE3 = 4,
1852 PTA_MMX = 8,
1853 PTA_PREFETCH_SSE = 16,
1854 PTA_3DNOW = 32,
1855 PTA_3DNOW_A = 64,
1856 PTA_64BIT = 128,
1857 PTA_SSSE3 = 256,
1858 PTA_CX16 = 512,
1859 PTA_POPCNT = 1024,
1860 PTA_ABM = 2048,
1861 PTA_SSE4A = 4096
1862 } flags;
1863 }
1864 const processor_alias_table[] =
1865 {
1866 {"i386", PROCESSOR_I386, 0},
1867 {"i486", PROCESSOR_I486, 0},
1868 {"i586", PROCESSOR_PENTIUM, 0},
1869 {"pentium", PROCESSOR_PENTIUM, 0},
1870 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1871 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1872 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1873 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1874 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1875 {"i686", PROCESSOR_PENTIUMPRO, 0},
1876 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1877 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1878 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1879 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1880 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1881 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1882 | PTA_MMX | PTA_PREFETCH_SSE},
1883 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1884 | PTA_MMX | PTA_PREFETCH_SSE},
1885 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1886 | PTA_MMX | PTA_PREFETCH_SSE},
1887 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1888 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1889 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1890 | PTA_64BIT | PTA_MMX
1891 | PTA_PREFETCH_SSE | PTA_CX16},
1892 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1893 | PTA_3DNOW_A},
1894 {"k6", PROCESSOR_K6, PTA_MMX},
1895 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1896 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1897 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1898 | PTA_3DNOW_A},
1899 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1900 | PTA_3DNOW | PTA_3DNOW_A},
1901 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1902 | PTA_3DNOW_A | PTA_SSE},
1903 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1904 | PTA_3DNOW_A | PTA_SSE},
1905 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1906 | PTA_3DNOW_A | PTA_SSE},
1907 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1908 | PTA_SSE | PTA_SSE2 },
1909 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1910 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1911 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1912 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1913 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1914 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1915 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1916 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1917 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1918 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1919 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1920 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1921 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1922 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1923 };
1924
1925 int const pta_size = ARRAY_SIZE (processor_alias_table);
1926
1927 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1928 SUBTARGET_OVERRIDE_OPTIONS;
1929 #endif
1930
1931 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1932 SUBSUBTARGET_OVERRIDE_OPTIONS;
1933 #endif
1934
1935 /* -fPIC is the default for x86_64. */
1936 if (TARGET_MACHO && TARGET_64BIT)
1937 flag_pic = 2;
1938
1939 /* Set the default values for switches whose default depends on TARGET_64BIT
1940 in case they weren't overwritten by command line options. */
1941 if (TARGET_64BIT)
1942 {
1943 /* Mach-O doesn't support omitting the frame pointer for now. */
1944 if (flag_omit_frame_pointer == 2)
1945 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1946 if (flag_asynchronous_unwind_tables == 2)
1947 flag_asynchronous_unwind_tables = 1;
1948 if (flag_pcc_struct_return == 2)
1949 flag_pcc_struct_return = 0;
1950 }
1951 else
1952 {
1953 if (flag_omit_frame_pointer == 2)
1954 flag_omit_frame_pointer = 0;
1955 if (flag_asynchronous_unwind_tables == 2)
1956 flag_asynchronous_unwind_tables = 0;
1957 if (flag_pcc_struct_return == 2)
1958 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1959 }
1960
1961 /* Need to check -mtune=generic first. */
1962 if (ix86_tune_string)
1963 {
1964 if (!strcmp (ix86_tune_string, "generic")
1965 || !strcmp (ix86_tune_string, "i686")
1966 /* As special support for cross compilers we read -mtune=native
1967 as -mtune=generic. With native compilers we won't see the
1968 -mtune=native, as it was changed by the driver. */
1969 || !strcmp (ix86_tune_string, "native"))
1970 {
1971 if (TARGET_64BIT)
1972 ix86_tune_string = "generic64";
1973 else
1974 ix86_tune_string = "generic32";
1975 }
1976 else if (!strncmp (ix86_tune_string, "generic", 7))
1977 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1978 }
1979 else
1980 {
1981 if (ix86_arch_string)
1982 ix86_tune_string = ix86_arch_string;
1983 if (!ix86_tune_string)
1984 {
1985 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1986 ix86_tune_defaulted = 1;
1987 }
1988
1989 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1990 need to use a sensible tune option. */
1991 if (!strcmp (ix86_tune_string, "generic")
1992 || !strcmp (ix86_tune_string, "x86-64")
1993 || !strcmp (ix86_tune_string, "i686"))
1994 {
1995 if (TARGET_64BIT)
1996 ix86_tune_string = "generic64";
1997 else
1998 ix86_tune_string = "generic32";
1999 }
2000 }
2001 if (ix86_stringop_string)
2002 {
2003 if (!strcmp (ix86_stringop_string, "rep_byte"))
2004 stringop_alg = rep_prefix_1_byte;
2005 else if (!strcmp (ix86_stringop_string, "libcall"))
2006 stringop_alg = libcall;
2007 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2008 stringop_alg = rep_prefix_4_byte;
2009 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2010 stringop_alg = rep_prefix_8_byte;
2011 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2012 stringop_alg = loop_1_byte;
2013 else if (!strcmp (ix86_stringop_string, "loop"))
2014 stringop_alg = loop;
2015 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2016 stringop_alg = unrolled_loop;
2017 else
2018 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2019 }
2020 if (!strcmp (ix86_tune_string, "x86-64"))
2021 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2022 "-mtune=generic instead as appropriate.");
2023
2024 if (!ix86_arch_string)
2025 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2026 if (!strcmp (ix86_arch_string, "generic"))
2027 error ("generic CPU can be used only for -mtune= switch");
2028 if (!strncmp (ix86_arch_string, "generic", 7))
2029 error ("bad value (%s) for -march= switch", ix86_arch_string);
2030
2031 if (ix86_cmodel_string != 0)
2032 {
2033 if (!strcmp (ix86_cmodel_string, "small"))
2034 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2035 else if (!strcmp (ix86_cmodel_string, "medium"))
2036 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2037 else if (flag_pic)
2038 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
2039 else if (!strcmp (ix86_cmodel_string, "32"))
2040 ix86_cmodel = CM_32;
2041 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2042 ix86_cmodel = CM_KERNEL;
2043 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
2044 ix86_cmodel = CM_LARGE;
2045 else
2046 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2047 }
2048 else
2049 {
2050 ix86_cmodel = CM_32;
2051 if (TARGET_64BIT)
2052 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2053 }
2054 if (ix86_asm_string != 0)
2055 {
2056 if (! TARGET_MACHO
2057 && !strcmp (ix86_asm_string, "intel"))
2058 ix86_asm_dialect = ASM_INTEL;
2059 else if (!strcmp (ix86_asm_string, "att"))
2060 ix86_asm_dialect = ASM_ATT;
2061 else
2062 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2063 }
2064 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2065 error ("code model %qs not supported in the %s bit mode",
2066 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2067 if (ix86_cmodel == CM_LARGE)
2068 sorry ("code model %<large%> not supported yet");
2069 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2070 sorry ("%i-bit mode not compiled in",
2071 (target_flags & MASK_64BIT) ? 64 : 32);
2072
2073 for (i = 0; i < pta_size; i++)
2074 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2075 {
2076 ix86_arch = processor_alias_table[i].processor;
2077 /* Default cpu tuning to the architecture. */
2078 ix86_tune = ix86_arch;
2079 if (processor_alias_table[i].flags & PTA_MMX
2080 && !(target_flags_explicit & MASK_MMX))
2081 target_flags |= MASK_MMX;
2082 if (processor_alias_table[i].flags & PTA_3DNOW
2083 && !(target_flags_explicit & MASK_3DNOW))
2084 target_flags |= MASK_3DNOW;
2085 if (processor_alias_table[i].flags & PTA_3DNOW_A
2086 && !(target_flags_explicit & MASK_3DNOW_A))
2087 target_flags |= MASK_3DNOW_A;
2088 if (processor_alias_table[i].flags & PTA_SSE
2089 && !(target_flags_explicit & MASK_SSE))
2090 target_flags |= MASK_SSE;
2091 if (processor_alias_table[i].flags & PTA_SSE2
2092 && !(target_flags_explicit & MASK_SSE2))
2093 target_flags |= MASK_SSE2;
2094 if (processor_alias_table[i].flags & PTA_SSE3
2095 && !(target_flags_explicit & MASK_SSE3))
2096 target_flags |= MASK_SSE3;
2097 if (processor_alias_table[i].flags & PTA_SSSE3
2098 && !(target_flags_explicit & MASK_SSSE3))
2099 target_flags |= MASK_SSSE3;
2100 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2101 x86_prefetch_sse = true;
2102 if (processor_alias_table[i].flags & PTA_CX16)
2103 x86_cmpxchg16b = true;
2104 if (processor_alias_table[i].flags & PTA_POPCNT
2105 && !(target_flags_explicit & MASK_POPCNT))
2106 target_flags |= MASK_POPCNT;
2107 if (processor_alias_table[i].flags & PTA_ABM
2108 && !(target_flags_explicit & MASK_ABM))
2109 target_flags |= MASK_ABM;
2110 if (processor_alias_table[i].flags & PTA_SSE4A
2111 && !(target_flags_explicit & MASK_SSE4A))
2112 target_flags |= MASK_SSE4A;
2113 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2114 error ("CPU you selected does not support x86-64 "
2115 "instruction set");
2116 break;
2117 }
2118
2119 if (i == pta_size)
2120 error ("bad value (%s) for -march= switch", ix86_arch_string);
2121
2122 for (i = 0; i < pta_size; i++)
2123 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2124 {
2125 ix86_tune = processor_alias_table[i].processor;
2126 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2127 {
2128 if (ix86_tune_defaulted)
2129 {
2130 ix86_tune_string = "x86-64";
2131 for (i = 0; i < pta_size; i++)
2132 if (! strcmp (ix86_tune_string,
2133 processor_alias_table[i].name))
2134 break;
2135 ix86_tune = processor_alias_table[i].processor;
2136 }
2137 else
2138 error ("CPU you selected does not support x86-64 "
2139 "instruction set");
2140 }
2141 /* Intel CPUs have always interpreted SSE prefetch instructions as
2142 NOPs; so, we can enable SSE prefetch instructions even when
2143 -mtune (rather than -march) points us to a processor that has them.
2144 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2145 higher processors. */
2146 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2147 x86_prefetch_sse = true;
2148 break;
2149 }
2150 if (i == pta_size)
2151 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2152
2153 if (optimize_size)
2154 ix86_cost = &size_cost;
2155 else
2156 ix86_cost = processor_target_table[ix86_tune].cost;
2157 target_flags |= processor_target_table[ix86_tune].target_enable;
2158 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2159
2160 /* Arrange to set up i386_stack_locals for all functions. */
2161 init_machine_status = ix86_init_machine_status;
2162
2163 /* Validate -mregparm= value. */
2164 if (ix86_regparm_string)
2165 {
2166 i = atoi (ix86_regparm_string);
2167 if (i < 0 || i > REGPARM_MAX)
2168 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2169 else
2170 ix86_regparm = i;
2171 }
2172 else
2173 if (TARGET_64BIT)
2174 ix86_regparm = REGPARM_MAX;
2175
2176 /* If the user has provided any of the -malign-* options,
2177 warn and use that value only if -falign-* is not set.
2178 Remove this code in GCC 3.2 or later. */
2179 if (ix86_align_loops_string)
2180 {
2181 warning (0, "-malign-loops is obsolete, use -falign-loops");
2182 if (align_loops == 0)
2183 {
2184 i = atoi (ix86_align_loops_string);
2185 if (i < 0 || i > MAX_CODE_ALIGN)
2186 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2187 else
2188 align_loops = 1 << i;
2189 }
2190 }
2191
2192 if (ix86_align_jumps_string)
2193 {
2194 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2195 if (align_jumps == 0)
2196 {
2197 i = atoi (ix86_align_jumps_string);
2198 if (i < 0 || i > MAX_CODE_ALIGN)
2199 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2200 else
2201 align_jumps = 1 << i;
2202 }
2203 }
2204
2205 if (ix86_align_funcs_string)
2206 {
2207 warning (0, "-malign-functions is obsolete, use -falign-functions");
2208 if (align_functions == 0)
2209 {
2210 i = atoi (ix86_align_funcs_string);
2211 if (i < 0 || i > MAX_CODE_ALIGN)
2212 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2213 else
2214 align_functions = 1 << i;
2215 }
2216 }
2217
2218 /* Default align_* from the processor table. */
2219 if (align_loops == 0)
2220 {
2221 align_loops = processor_target_table[ix86_tune].align_loop;
2222 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2223 }
2224 if (align_jumps == 0)
2225 {
2226 align_jumps = processor_target_table[ix86_tune].align_jump;
2227 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2228 }
2229 if (align_functions == 0)
2230 {
2231 align_functions = processor_target_table[ix86_tune].align_func;
2232 }
2233
2234 /* Validate -mbranch-cost= value, or provide default. */
2235 ix86_branch_cost = ix86_cost->branch_cost;
2236 if (ix86_branch_cost_string)
2237 {
2238 i = atoi (ix86_branch_cost_string);
2239 if (i < 0 || i > 5)
2240 error ("-mbranch-cost=%d is not between 0 and 5", i);
2241 else
2242 ix86_branch_cost = i;
2243 }
2244 if (ix86_section_threshold_string)
2245 {
2246 i = atoi (ix86_section_threshold_string);
2247 if (i < 0)
2248 error ("-mlarge-data-threshold=%d is negative", i);
2249 else
2250 ix86_section_threshold = i;
2251 }
2252
2253 if (ix86_tls_dialect_string)
2254 {
2255 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2256 ix86_tls_dialect = TLS_DIALECT_GNU;
2257 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2258 ix86_tls_dialect = TLS_DIALECT_GNU2;
2259 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2260 ix86_tls_dialect = TLS_DIALECT_SUN;
2261 else
2262 error ("bad value (%s) for -mtls-dialect= switch",
2263 ix86_tls_dialect_string);
2264 }
2265
2266 /* Keep nonleaf frame pointers. */
2267 if (flag_omit_frame_pointer)
2268 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2269 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2270 flag_omit_frame_pointer = 1;
2271
2272 /* If we're doing fast math, we don't care about comparison order
2273 wrt NaNs. This lets us use a shorter comparison sequence. */
2274 if (flag_finite_math_only)
2275 target_flags &= ~MASK_IEEE_FP;
2276
2277 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2278 since the insns won't need emulation. */
2279 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2280 target_flags &= ~MASK_NO_FANCY_MATH_387;
2281
2282 /* Likewise, if the target doesn't have a 387, or we've specified
2283 software floating point, don't use 387 inline intrinsics. */
2284 if (!TARGET_80387)
2285 target_flags |= MASK_NO_FANCY_MATH_387;
2286
2287 /* Turn on SSE3 builtins for -mssse3. */
2288 if (TARGET_SSSE3)
2289 target_flags |= MASK_SSE3;
2290
2291 /* Turn on SSE3 builtins for -msse4a. */
2292 if (TARGET_SSE4A)
2293 target_flags |= MASK_SSE3;
2294
2295 /* Turn on SSE2 builtins for -msse3. */
2296 if (TARGET_SSE3)
2297 target_flags |= MASK_SSE2;
2298
2299 /* Turn on SSE builtins for -msse2. */
2300 if (TARGET_SSE2)
2301 target_flags |= MASK_SSE;
2302
2303 /* Turn on MMX builtins for -msse. */
2304 if (TARGET_SSE)
2305 {
2306 target_flags |= MASK_MMX & ~target_flags_explicit;
2307 x86_prefetch_sse = true;
2308 }
2309
2310 /* Turn on MMX builtins for 3Dnow. */
2311 if (TARGET_3DNOW)
2312 target_flags |= MASK_MMX;
2313
2314 /* Turn on POPCNT builtins for -mabm. */
2315 if (TARGET_ABM)
2316 target_flags |= MASK_POPCNT;
2317
2318 if (TARGET_64BIT)
2319 {
2320 if (TARGET_ALIGN_DOUBLE)
2321 error ("-malign-double makes no sense in the 64bit mode");
2322 if (TARGET_RTD)
2323 error ("-mrtd calling convention not supported in the 64bit mode");
2324
2325 /* Enable by default the SSE and MMX builtins. Do allow the user to
2326 explicitly disable any of these. In particular, disabling SSE and
2327 MMX for kernel code is extremely useful. */
2328 target_flags
2329 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2330 & ~target_flags_explicit);
2331 }
2332 else
2333 {
2334 /* i386 ABI does not specify red zone. It still makes sense to use it
2335 when programmer takes care to stack from being destroyed. */
2336 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2337 target_flags |= MASK_NO_RED_ZONE;
2338 }
2339
2340 /* Validate -mpreferred-stack-boundary= value, or provide default.
2341 The default of 128 bits is for Pentium III's SSE __m128. We can't
2342 change it because of optimize_size. Otherwise, we can't mix object
2343 files compiled with -Os and -On. */
2344 ix86_preferred_stack_boundary = 128;
2345 if (ix86_preferred_stack_boundary_string)
2346 {
2347 i = atoi (ix86_preferred_stack_boundary_string);
2348 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2349 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2350 TARGET_64BIT ? 4 : 2);
2351 else
2352 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2353 }
2354
2355 /* Accept -msseregparm only if at least SSE support is enabled. */
2356 if (TARGET_SSEREGPARM
2357 && ! TARGET_SSE)
2358 error ("-msseregparm used without SSE enabled");
2359
2360 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2361
2362 if (ix86_fpmath_string != 0)
2363 {
2364 if (! strcmp (ix86_fpmath_string, "387"))
2365 ix86_fpmath = FPMATH_387;
2366 else if (! strcmp (ix86_fpmath_string, "sse"))
2367 {
2368 if (!TARGET_SSE)
2369 {
2370 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2371 ix86_fpmath = FPMATH_387;
2372 }
2373 else
2374 ix86_fpmath = FPMATH_SSE;
2375 }
2376 else if (! strcmp (ix86_fpmath_string, "387,sse")
2377 || ! strcmp (ix86_fpmath_string, "sse,387"))
2378 {
2379 if (!TARGET_SSE)
2380 {
2381 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2382 ix86_fpmath = FPMATH_387;
2383 }
2384 else if (!TARGET_80387)
2385 {
2386 warning (0, "387 instruction set disabled, using SSE arithmetics");
2387 ix86_fpmath = FPMATH_SSE;
2388 }
2389 else
2390 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2391 }
2392 else
2393 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2394 }
2395
2396 /* If the i387 is disabled, then do not return values in it. */
2397 if (!TARGET_80387)
2398 target_flags &= ~MASK_FLOAT_RETURNS;
2399
2400 if ((x86_accumulate_outgoing_args & TUNEMASK)
2401 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2402 && !optimize_size)
2403 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2404
2405 /* ??? Unwind info is not correct around the CFG unless either a frame
2406 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2407 unwind info generation to be aware of the CFG and propagating states
2408 around edges. */
2409 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2410 || flag_exceptions || flag_non_call_exceptions)
2411 && flag_omit_frame_pointer
2412 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2413 {
2414 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2415 warning (0, "unwind tables currently require either a frame pointer "
2416 "or -maccumulate-outgoing-args for correctness");
2417 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2418 }
2419
2420 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2421 {
2422 char *p;
2423 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2424 p = strchr (internal_label_prefix, 'X');
2425 internal_label_prefix_len = p - internal_label_prefix;
2426 *p = '\0';
2427 }
2428
2429 /* When scheduling description is not available, disable scheduler pass
2430 so it won't slow down the compilation and make x87 code slower. */
2431 if (!TARGET_SCHEDULE)
2432 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2433
2434 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2435 set_param_value ("simultaneous-prefetches",
2436 ix86_cost->simultaneous_prefetches);
2437 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2438 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2439 }
2440 \f
2441 /* switch to the appropriate section for output of DECL.
2442 DECL is either a `VAR_DECL' node or a constant of some sort.
2443 RELOC indicates whether forming the initial value of DECL requires
2444 link-time relocations. */
2445
2446 static section *
2447 x86_64_elf_select_section (tree decl, int reloc,
2448 unsigned HOST_WIDE_INT align)
2449 {
2450 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2451 && ix86_in_large_data_p (decl))
2452 {
2453 const char *sname = NULL;
2454 unsigned int flags = SECTION_WRITE;
2455 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2456 {
2457 case SECCAT_DATA:
2458 sname = ".ldata";
2459 break;
2460 case SECCAT_DATA_REL:
2461 sname = ".ldata.rel";
2462 break;
2463 case SECCAT_DATA_REL_LOCAL:
2464 sname = ".ldata.rel.local";
2465 break;
2466 case SECCAT_DATA_REL_RO:
2467 sname = ".ldata.rel.ro";
2468 break;
2469 case SECCAT_DATA_REL_RO_LOCAL:
2470 sname = ".ldata.rel.ro.local";
2471 break;
2472 case SECCAT_BSS:
2473 sname = ".lbss";
2474 flags |= SECTION_BSS;
2475 break;
2476 case SECCAT_RODATA:
2477 case SECCAT_RODATA_MERGE_STR:
2478 case SECCAT_RODATA_MERGE_STR_INIT:
2479 case SECCAT_RODATA_MERGE_CONST:
2480 sname = ".lrodata";
2481 flags = 0;
2482 break;
2483 case SECCAT_SRODATA:
2484 case SECCAT_SDATA:
2485 case SECCAT_SBSS:
2486 gcc_unreachable ();
2487 case SECCAT_TEXT:
2488 case SECCAT_TDATA:
2489 case SECCAT_TBSS:
2490 /* We don't split these for medium model. Place them into
2491 default sections and hope for best. */
2492 break;
2493 }
2494 if (sname)
2495 {
2496 /* We might get called with string constants, but get_named_section
2497 doesn't like them as they are not DECLs. Also, we need to set
2498 flags in that case. */
2499 if (!DECL_P (decl))
2500 return get_section (sname, flags, NULL);
2501 return get_named_section (decl, sname, reloc);
2502 }
2503 }
2504 return default_elf_select_section (decl, reloc, align);
2505 }
2506
2507 /* Build up a unique section name, expressed as a
2508 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2509 RELOC indicates whether the initial value of EXP requires
2510 link-time relocations. */
2511
2512 static void
2513 x86_64_elf_unique_section (tree decl, int reloc)
2514 {
2515 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2516 && ix86_in_large_data_p (decl))
2517 {
2518 const char *prefix = NULL;
2519 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2520 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2521
2522 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2523 {
2524 case SECCAT_DATA:
2525 case SECCAT_DATA_REL:
2526 case SECCAT_DATA_REL_LOCAL:
2527 case SECCAT_DATA_REL_RO:
2528 case SECCAT_DATA_REL_RO_LOCAL:
2529 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2530 break;
2531 case SECCAT_BSS:
2532 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2533 break;
2534 case SECCAT_RODATA:
2535 case SECCAT_RODATA_MERGE_STR:
2536 case SECCAT_RODATA_MERGE_STR_INIT:
2537 case SECCAT_RODATA_MERGE_CONST:
2538 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2539 break;
2540 case SECCAT_SRODATA:
2541 case SECCAT_SDATA:
2542 case SECCAT_SBSS:
2543 gcc_unreachable ();
2544 case SECCAT_TEXT:
2545 case SECCAT_TDATA:
2546 case SECCAT_TBSS:
2547 /* We don't split these for medium model. Place them into
2548 default sections and hope for best. */
2549 break;
2550 }
2551 if (prefix)
2552 {
2553 const char *name;
2554 size_t nlen, plen;
2555 char *string;
2556 plen = strlen (prefix);
2557
2558 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2559 name = targetm.strip_name_encoding (name);
2560 nlen = strlen (name);
2561
2562 string = alloca (nlen + plen + 1);
2563 memcpy (string, prefix, plen);
2564 memcpy (string + plen, name, nlen + 1);
2565
2566 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2567 return;
2568 }
2569 }
2570 default_unique_section (decl, reloc);
2571 }
2572
2573 #ifdef COMMON_ASM_OP
2574 /* This says how to output assembler code to declare an
2575 uninitialized external linkage data object.
2576
2577 For medium model x86-64 we need to use .largecomm opcode for
2578 large objects. */
2579 void
2580 x86_elf_aligned_common (FILE *file,
2581 const char *name, unsigned HOST_WIDE_INT size,
2582 int align)
2583 {
2584 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2585 && size > (unsigned int)ix86_section_threshold)
2586 fprintf (file, ".largecomm\t");
2587 else
2588 fprintf (file, "%s", COMMON_ASM_OP);
2589 assemble_name (file, name);
2590 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2591 size, align / BITS_PER_UNIT);
2592 }
2593 #endif
2594 /* Utility function for targets to use in implementing
2595 ASM_OUTPUT_ALIGNED_BSS. */
2596
2597 void
2598 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2599 const char *name, unsigned HOST_WIDE_INT size,
2600 int align)
2601 {
2602 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2603 && size > (unsigned int)ix86_section_threshold)
2604 switch_to_section (get_named_section (decl, ".lbss", 0));
2605 else
2606 switch_to_section (bss_section);
2607 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2608 #ifdef ASM_DECLARE_OBJECT_NAME
2609 last_assemble_variable_decl = decl;
2610 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2611 #else
2612 /* Standard thing is just output label for the object. */
2613 ASM_OUTPUT_LABEL (file, name);
2614 #endif /* ASM_DECLARE_OBJECT_NAME */
2615 ASM_OUTPUT_SKIP (file, size ? size : 1);
2616 }
2617 \f
2618 void
2619 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2620 {
2621 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2622 make the problem with not enough registers even worse. */
2623 #ifdef INSN_SCHEDULING
2624 if (level > 1)
2625 flag_schedule_insns = 0;
2626 #endif
2627
2628 if (TARGET_MACHO)
2629 /* The Darwin libraries never set errno, so we might as well
2630 avoid calling them when that's the only reason we would. */
2631 flag_errno_math = 0;
2632
2633 /* The default values of these switches depend on the TARGET_64BIT
2634 that is not known at this moment. Mark these values with 2 and
2635 let user the to override these. In case there is no command line option
2636 specifying them, we will set the defaults in override_options. */
2637 if (optimize >= 1)
2638 flag_omit_frame_pointer = 2;
2639 flag_pcc_struct_return = 2;
2640 flag_asynchronous_unwind_tables = 2;
2641 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2642 SUBTARGET_OPTIMIZATION_OPTIONS;
2643 #endif
2644 }
2645 \f
2646 /* Table of valid machine attributes. */
2647 const struct attribute_spec ix86_attribute_table[] =
2648 {
2649 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2650 /* Stdcall attribute says callee is responsible for popping arguments
2651 if they are not variable. */
2652 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2653 /* Fastcall attribute says callee is responsible for popping arguments
2654 if they are not variable. */
2655 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2656 /* Cdecl attribute says the callee is a normal C declaration */
2657 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2658 /* Regparm attribute specifies how many integer arguments are to be
2659 passed in registers. */
2660 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2661 /* Sseregparm attribute says we are using x86_64 calling conventions
2662 for FP arguments. */
2663 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2664 /* force_align_arg_pointer says this function realigns the stack at entry. */
2665 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2666 false, true, true, ix86_handle_cconv_attribute },
2667 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2668 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2669 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2670 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2671 #endif
2672 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2673 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2674 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2675 SUBTARGET_ATTRIBUTE_TABLE,
2676 #endif
2677 { NULL, 0, 0, false, false, false, NULL }
2678 };
2679
2680 /* Decide whether we can make a sibling call to a function. DECL is the
2681 declaration of the function being targeted by the call and EXP is the
2682 CALL_EXPR representing the call. */
2683
2684 static bool
2685 ix86_function_ok_for_sibcall (tree decl, tree exp)
2686 {
2687 tree func;
2688 rtx a, b;
2689
2690 /* If we are generating position-independent code, we cannot sibcall
2691 optimize any indirect call, or a direct call to a global function,
2692 as the PLT requires %ebx be live. */
2693 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2694 return false;
2695
2696 if (decl)
2697 func = decl;
2698 else
2699 {
2700 func = TREE_TYPE (CALL_EXPR_FN (exp));
2701 if (POINTER_TYPE_P (func))
2702 func = TREE_TYPE (func);
2703 }
2704
2705 /* Check that the return value locations are the same. Like
2706 if we are returning floats on the 80387 register stack, we cannot
2707 make a sibcall from a function that doesn't return a float to a
2708 function that does or, conversely, from a function that does return
2709 a float to a function that doesn't; the necessary stack adjustment
2710 would not be executed. This is also the place we notice
2711 differences in the return value ABI. Note that it is ok for one
2712 of the functions to have void return type as long as the return
2713 value of the other is passed in a register. */
2714 a = ix86_function_value (TREE_TYPE (exp), func, false);
2715 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2716 cfun->decl, false);
2717 if (STACK_REG_P (a) || STACK_REG_P (b))
2718 {
2719 if (!rtx_equal_p (a, b))
2720 return false;
2721 }
2722 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2723 ;
2724 else if (!rtx_equal_p (a, b))
2725 return false;
2726
2727 /* If this call is indirect, we'll need to be able to use a call-clobbered
2728 register for the address of the target function. Make sure that all
2729 such registers are not used for passing parameters. */
2730 if (!decl && !TARGET_64BIT)
2731 {
2732 tree type;
2733
2734 /* We're looking at the CALL_EXPR, we need the type of the function. */
2735 type = CALL_EXPR_FN (exp); /* pointer expression */
2736 type = TREE_TYPE (type); /* pointer type */
2737 type = TREE_TYPE (type); /* function type */
2738
2739 if (ix86_function_regparm (type, NULL) >= 3)
2740 {
2741 /* ??? Need to count the actual number of registers to be used,
2742 not the possible number of registers. Fix later. */
2743 return false;
2744 }
2745 }
2746
2747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2748 /* Dllimport'd functions are also called indirectly. */
2749 if (decl && DECL_DLLIMPORT_P (decl)
2750 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2751 return false;
2752 #endif
2753
2754 /* If we forced aligned the stack, then sibcalling would unalign the
2755 stack, which may break the called function. */
2756 if (cfun->machine->force_align_arg_pointer)
2757 return false;
2758
2759 /* Otherwise okay. That also includes certain types of indirect calls. */
2760 return true;
2761 }
2762
2763 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2764 calling convention attributes;
2765 arguments as in struct attribute_spec.handler. */
2766
2767 static tree
2768 ix86_handle_cconv_attribute (tree *node, tree name,
2769 tree args,
2770 int flags ATTRIBUTE_UNUSED,
2771 bool *no_add_attrs)
2772 {
2773 if (TREE_CODE (*node) != FUNCTION_TYPE
2774 && TREE_CODE (*node) != METHOD_TYPE
2775 && TREE_CODE (*node) != FIELD_DECL
2776 && TREE_CODE (*node) != TYPE_DECL)
2777 {
2778 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2779 IDENTIFIER_POINTER (name));
2780 *no_add_attrs = true;
2781 return NULL_TREE;
2782 }
2783
2784 /* Can combine regparm with all attributes but fastcall. */
2785 if (is_attribute_p ("regparm", name))
2786 {
2787 tree cst;
2788
2789 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2790 {
2791 error ("fastcall and regparm attributes are not compatible");
2792 }
2793
2794 cst = TREE_VALUE (args);
2795 if (TREE_CODE (cst) != INTEGER_CST)
2796 {
2797 warning (OPT_Wattributes,
2798 "%qs attribute requires an integer constant argument",
2799 IDENTIFIER_POINTER (name));
2800 *no_add_attrs = true;
2801 }
2802 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2803 {
2804 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2805 IDENTIFIER_POINTER (name), REGPARM_MAX);
2806 *no_add_attrs = true;
2807 }
2808
2809 if (!TARGET_64BIT
2810 && lookup_attribute (ix86_force_align_arg_pointer_string,
2811 TYPE_ATTRIBUTES (*node))
2812 && compare_tree_int (cst, REGPARM_MAX-1))
2813 {
2814 error ("%s functions limited to %d register parameters",
2815 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2816 }
2817
2818 return NULL_TREE;
2819 }
2820
2821 if (TARGET_64BIT)
2822 {
2823 warning (OPT_Wattributes, "%qs attribute ignored",
2824 IDENTIFIER_POINTER (name));
2825 *no_add_attrs = true;
2826 return NULL_TREE;
2827 }
2828
2829 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2830 if (is_attribute_p ("fastcall", name))
2831 {
2832 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2833 {
2834 error ("fastcall and cdecl attributes are not compatible");
2835 }
2836 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2837 {
2838 error ("fastcall and stdcall attributes are not compatible");
2839 }
2840 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2841 {
2842 error ("fastcall and regparm attributes are not compatible");
2843 }
2844 }
2845
2846 /* Can combine stdcall with fastcall (redundant), regparm and
2847 sseregparm. */
2848 else if (is_attribute_p ("stdcall", name))
2849 {
2850 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2851 {
2852 error ("stdcall and cdecl attributes are not compatible");
2853 }
2854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2855 {
2856 error ("stdcall and fastcall attributes are not compatible");
2857 }
2858 }
2859
2860 /* Can combine cdecl with regparm and sseregparm. */
2861 else if (is_attribute_p ("cdecl", name))
2862 {
2863 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2864 {
2865 error ("stdcall and cdecl attributes are not compatible");
2866 }
2867 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2868 {
2869 error ("fastcall and cdecl attributes are not compatible");
2870 }
2871 }
2872
2873 /* Can combine sseregparm with all attributes. */
2874
2875 return NULL_TREE;
2876 }
2877
2878 /* Return 0 if the attributes for two types are incompatible, 1 if they
2879 are compatible, and 2 if they are nearly compatible (which causes a
2880 warning to be generated). */
2881
2882 static int
2883 ix86_comp_type_attributes (tree type1, tree type2)
2884 {
2885 /* Check for mismatch of non-default calling convention. */
2886 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2887
2888 if (TREE_CODE (type1) != FUNCTION_TYPE)
2889 return 1;
2890
2891 /* Check for mismatched fastcall/regparm types. */
2892 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2893 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2894 || (ix86_function_regparm (type1, NULL)
2895 != ix86_function_regparm (type2, NULL)))
2896 return 0;
2897
2898 /* Check for mismatched sseregparm types. */
2899 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2900 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2901 return 0;
2902
2903 /* Check for mismatched return types (cdecl vs stdcall). */
2904 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2905 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2906 return 0;
2907
2908 return 1;
2909 }
2910 \f
2911 /* Return the regparm value for a function with the indicated TYPE and DECL.
2912 DECL may be NULL when calling function indirectly
2913 or considering a libcall. */
2914
2915 static int
2916 ix86_function_regparm (tree type, tree decl)
2917 {
2918 tree attr;
2919 int regparm = ix86_regparm;
2920 bool user_convention = false;
2921
2922 if (!TARGET_64BIT)
2923 {
2924 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2925 if (attr)
2926 {
2927 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2928 user_convention = true;
2929 }
2930
2931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2932 {
2933 regparm = 2;
2934 user_convention = true;
2935 }
2936
2937 /* Use register calling convention for local functions when possible. */
2938 if (!TARGET_64BIT && !user_convention && decl
2939 && flag_unit_at_a_time && !profile_flag)
2940 {
2941 struct cgraph_local_info *i = cgraph_local_info (decl);
2942 if (i && i->local)
2943 {
2944 int local_regparm, globals = 0, regno;
2945
2946 /* Make sure no regparm register is taken by a global register
2947 variable. */
2948 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2949 if (global_regs[local_regparm])
2950 break;
2951 /* We can't use regparm(3) for nested functions as these use
2952 static chain pointer in third argument. */
2953 if (local_regparm == 3
2954 && decl_function_context (decl)
2955 && !DECL_NO_STATIC_CHAIN (decl))
2956 local_regparm = 2;
2957 /* If the function realigns its stackpointer, the
2958 prologue will clobber %ecx. If we've already
2959 generated code for the callee, the callee
2960 DECL_STRUCT_FUNCTION is gone, so we fall back to
2961 scanning the attributes for the self-realigning
2962 property. */
2963 if ((DECL_STRUCT_FUNCTION (decl)
2964 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2965 || (!DECL_STRUCT_FUNCTION (decl)
2966 && lookup_attribute (ix86_force_align_arg_pointer_string,
2967 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2968 local_regparm = 2;
2969 /* Each global register variable increases register preassure,
2970 so the more global reg vars there are, the smaller regparm
2971 optimization use, unless requested by the user explicitly. */
2972 for (regno = 0; regno < 6; regno++)
2973 if (global_regs[regno])
2974 globals++;
2975 local_regparm
2976 = globals < local_regparm ? local_regparm - globals : 0;
2977
2978 if (local_regparm > regparm)
2979 regparm = local_regparm;
2980 }
2981 }
2982 }
2983 return regparm;
2984 }
2985
2986 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2987 DFmode (2) arguments in SSE registers for a function with the
2988 indicated TYPE and DECL. DECL may be NULL when calling function
2989 indirectly or considering a libcall. Otherwise return 0. */
2990
2991 static int
2992 ix86_function_sseregparm (tree type, tree decl)
2993 {
2994 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2995 by the sseregparm attribute. */
2996 if (TARGET_SSEREGPARM
2997 || (type
2998 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2999 {
3000 if (!TARGET_SSE)
3001 {
3002 if (decl)
3003 error ("Calling %qD with attribute sseregparm without "
3004 "SSE/SSE2 enabled", decl);
3005 else
3006 error ("Calling %qT with attribute sseregparm without "
3007 "SSE/SSE2 enabled", type);
3008 return 0;
3009 }
3010
3011 return 2;
3012 }
3013
3014 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3015 (and DFmode for SSE2) arguments in SSE registers,
3016 even for 32-bit targets. */
3017 if (!TARGET_64BIT && decl
3018 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3019 {
3020 struct cgraph_local_info *i = cgraph_local_info (decl);
3021 if (i && i->local)
3022 return TARGET_SSE2 ? 2 : 1;
3023 }
3024
3025 return 0;
3026 }
3027
3028 /* Return true if EAX is live at the start of the function. Used by
3029 ix86_expand_prologue to determine if we need special help before
3030 calling allocate_stack_worker. */
3031
3032 static bool
3033 ix86_eax_live_at_start_p (void)
3034 {
3035 /* Cheat. Don't bother working forward from ix86_function_regparm
3036 to the function type to whether an actual argument is located in
3037 eax. Instead just look at cfg info, which is still close enough
3038 to correct at this point. This gives false positives for broken
3039 functions that might use uninitialized data that happens to be
3040 allocated in eax, but who cares? */
3041 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3042 }
3043
3044 /* Value is the number of bytes of arguments automatically
3045 popped when returning from a subroutine call.
3046 FUNDECL is the declaration node of the function (as a tree),
3047 FUNTYPE is the data type of the function (as a tree),
3048 or for a library call it is an identifier node for the subroutine name.
3049 SIZE is the number of bytes of arguments passed on the stack.
3050
3051 On the 80386, the RTD insn may be used to pop them if the number
3052 of args is fixed, but if the number is variable then the caller
3053 must pop them all. RTD can't be used for library calls now
3054 because the library is compiled with the Unix compiler.
3055 Use of RTD is a selectable option, since it is incompatible with
3056 standard Unix calling sequences. If the option is not selected,
3057 the caller must always pop the args.
3058
3059 The attribute stdcall is equivalent to RTD on a per module basis. */
3060
3061 int
3062 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3063 {
3064 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3065
3066 /* Cdecl functions override -mrtd, and never pop the stack. */
3067 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3068
3069 /* Stdcall and fastcall functions will pop the stack if not
3070 variable args. */
3071 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3072 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3073 rtd = 1;
3074
3075 if (rtd
3076 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3077 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3078 == void_type_node)))
3079 return size;
3080 }
3081
3082 /* Lose any fake structure return argument if it is passed on the stack. */
3083 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3084 && !TARGET_64BIT
3085 && !KEEP_AGGREGATE_RETURN_POINTER)
3086 {
3087 int nregs = ix86_function_regparm (funtype, fundecl);
3088
3089 if (!nregs)
3090 return GET_MODE_SIZE (Pmode);
3091 }
3092
3093 return 0;
3094 }
3095 \f
3096 /* Argument support functions. */
3097
3098 /* Return true when register may be used to pass function parameters. */
3099 bool
3100 ix86_function_arg_regno_p (int regno)
3101 {
3102 int i;
3103 if (!TARGET_64BIT)
3104 {
3105 if (TARGET_MACHO)
3106 return (regno < REGPARM_MAX
3107 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3108 else
3109 return (regno < REGPARM_MAX
3110 || (TARGET_MMX && MMX_REGNO_P (regno)
3111 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3112 || (TARGET_SSE && SSE_REGNO_P (regno)
3113 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3114 }
3115
3116 if (TARGET_MACHO)
3117 {
3118 if (SSE_REGNO_P (regno) && TARGET_SSE)
3119 return true;
3120 }
3121 else
3122 {
3123 if (TARGET_SSE && SSE_REGNO_P (regno)
3124 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3125 return true;
3126 }
3127 /* RAX is used as hidden argument to va_arg functions. */
3128 if (!regno)
3129 return true;
3130 for (i = 0; i < REGPARM_MAX; i++)
3131 if (regno == x86_64_int_parameter_registers[i])
3132 return true;
3133 return false;
3134 }
3135
3136 /* Return if we do not know how to pass TYPE solely in registers. */
3137
3138 static bool
3139 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3140 {
3141 if (must_pass_in_stack_var_size_or_pad (mode, type))
3142 return true;
3143
3144 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3145 The layout_type routine is crafty and tries to trick us into passing
3146 currently unsupported vector types on the stack by using TImode. */
3147 return (!TARGET_64BIT && mode == TImode
3148 && type && TREE_CODE (type) != VECTOR_TYPE);
3149 }
3150
3151 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3152 for a call to a function whose data type is FNTYPE.
3153 For a library call, FNTYPE is 0. */
3154
3155 void
3156 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3157 tree fntype, /* tree ptr for function decl */
3158 rtx libname, /* SYMBOL_REF of library name or 0 */
3159 tree fndecl)
3160 {
3161 static CUMULATIVE_ARGS zero_cum;
3162 tree param, next_param;
3163
3164 if (TARGET_DEBUG_ARG)
3165 {
3166 fprintf (stderr, "\ninit_cumulative_args (");
3167 if (fntype)
3168 fprintf (stderr, "fntype code = %s, ret code = %s",
3169 tree_code_name[(int) TREE_CODE (fntype)],
3170 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3171 else
3172 fprintf (stderr, "no fntype");
3173
3174 if (libname)
3175 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3176 }
3177
3178 *cum = zero_cum;
3179
3180 /* Set up the number of registers to use for passing arguments. */
3181 cum->nregs = ix86_regparm;
3182 if (TARGET_SSE)
3183 cum->sse_nregs = SSE_REGPARM_MAX;
3184 if (TARGET_MMX)
3185 cum->mmx_nregs = MMX_REGPARM_MAX;
3186 cum->warn_sse = true;
3187 cum->warn_mmx = true;
3188 cum->maybe_vaarg = false;
3189
3190 /* Use ecx and edx registers if function has fastcall attribute,
3191 else look for regparm information. */
3192 if (fntype && !TARGET_64BIT)
3193 {
3194 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3195 {
3196 cum->nregs = 2;
3197 cum->fastcall = 1;
3198 }
3199 else
3200 cum->nregs = ix86_function_regparm (fntype, fndecl);
3201 }
3202
3203 /* Set up the number of SSE registers used for passing SFmode
3204 and DFmode arguments. Warn for mismatching ABI. */
3205 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3206
3207 /* Determine if this function has variable arguments. This is
3208 indicated by the last argument being 'void_type_mode' if there
3209 are no variable arguments. If there are variable arguments, then
3210 we won't pass anything in registers in 32-bit mode. */
3211
3212 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3213 {
3214 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3215 param != 0; param = next_param)
3216 {
3217 next_param = TREE_CHAIN (param);
3218 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3219 {
3220 if (!TARGET_64BIT)
3221 {
3222 cum->nregs = 0;
3223 cum->sse_nregs = 0;
3224 cum->mmx_nregs = 0;
3225 cum->warn_sse = 0;
3226 cum->warn_mmx = 0;
3227 cum->fastcall = 0;
3228 cum->float_in_sse = 0;
3229 }
3230 cum->maybe_vaarg = true;
3231 }
3232 }
3233 }
3234 if ((!fntype && !libname)
3235 || (fntype && !TYPE_ARG_TYPES (fntype)))
3236 cum->maybe_vaarg = true;
3237
3238 if (TARGET_DEBUG_ARG)
3239 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3240
3241 return;
3242 }
3243
3244 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3245 But in the case of vector types, it is some vector mode.
3246
3247 When we have only some of our vector isa extensions enabled, then there
3248 are some modes for which vector_mode_supported_p is false. For these
3249 modes, the generic vector support in gcc will choose some non-vector mode
3250 in order to implement the type. By computing the natural mode, we'll
3251 select the proper ABI location for the operand and not depend on whatever
3252 the middle-end decides to do with these vector types. */
3253
3254 static enum machine_mode
3255 type_natural_mode (tree type)
3256 {
3257 enum machine_mode mode = TYPE_MODE (type);
3258
3259 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3260 {
3261 HOST_WIDE_INT size = int_size_in_bytes (type);
3262 if ((size == 8 || size == 16)
3263 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3264 && TYPE_VECTOR_SUBPARTS (type) > 1)
3265 {
3266 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3267
3268 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3269 mode = MIN_MODE_VECTOR_FLOAT;
3270 else
3271 mode = MIN_MODE_VECTOR_INT;
3272
3273 /* Get the mode which has this inner mode and number of units. */
3274 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3275 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3276 && GET_MODE_INNER (mode) == innermode)
3277 return mode;
3278
3279 gcc_unreachable ();
3280 }
3281 }
3282
3283 return mode;
3284 }
3285
3286 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3287 this may not agree with the mode that the type system has chosen for the
3288 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3289 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3290
3291 static rtx
3292 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3293 unsigned int regno)
3294 {
3295 rtx tmp;
3296
3297 if (orig_mode != BLKmode)
3298 tmp = gen_rtx_REG (orig_mode, regno);
3299 else
3300 {
3301 tmp = gen_rtx_REG (mode, regno);
3302 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3303 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3304 }
3305
3306 return tmp;
3307 }
3308
3309 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3310 of this code is to classify each 8bytes of incoming argument by the register
3311 class and assign registers accordingly. */
3312
3313 /* Return the union class of CLASS1 and CLASS2.
3314 See the x86-64 PS ABI for details. */
3315
3316 static enum x86_64_reg_class
3317 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3318 {
3319 /* Rule #1: If both classes are equal, this is the resulting class. */
3320 if (class1 == class2)
3321 return class1;
3322
3323 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3324 the other class. */
3325 if (class1 == X86_64_NO_CLASS)
3326 return class2;
3327 if (class2 == X86_64_NO_CLASS)
3328 return class1;
3329
3330 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3331 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3332 return X86_64_MEMORY_CLASS;
3333
3334 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3335 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3336 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3337 return X86_64_INTEGERSI_CLASS;
3338 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3339 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3340 return X86_64_INTEGER_CLASS;
3341
3342 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3343 MEMORY is used. */
3344 if (class1 == X86_64_X87_CLASS
3345 || class1 == X86_64_X87UP_CLASS
3346 || class1 == X86_64_COMPLEX_X87_CLASS
3347 || class2 == X86_64_X87_CLASS
3348 || class2 == X86_64_X87UP_CLASS
3349 || class2 == X86_64_COMPLEX_X87_CLASS)
3350 return X86_64_MEMORY_CLASS;
3351
3352 /* Rule #6: Otherwise class SSE is used. */
3353 return X86_64_SSE_CLASS;
3354 }
3355
3356 /* Classify the argument of type TYPE and mode MODE.
3357 CLASSES will be filled by the register class used to pass each word
3358 of the operand. The number of words is returned. In case the parameter
3359 should be passed in memory, 0 is returned. As a special case for zero
3360 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3361
3362 BIT_OFFSET is used internally for handling records and specifies offset
3363 of the offset in bits modulo 256 to avoid overflow cases.
3364
3365 See the x86-64 PS ABI for details.
3366 */
3367
3368 static int
3369 classify_argument (enum machine_mode mode, tree type,
3370 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3371 {
3372 HOST_WIDE_INT bytes =
3373 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3374 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3375
3376 /* Variable sized entities are always passed/returned in memory. */
3377 if (bytes < 0)
3378 return 0;
3379
3380 if (mode != VOIDmode
3381 && targetm.calls.must_pass_in_stack (mode, type))
3382 return 0;
3383
3384 if (type && AGGREGATE_TYPE_P (type))
3385 {
3386 int i;
3387 tree field;
3388 enum x86_64_reg_class subclasses[MAX_CLASSES];
3389
3390 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3391 if (bytes > 16)
3392 return 0;
3393
3394 for (i = 0; i < words; i++)
3395 classes[i] = X86_64_NO_CLASS;
3396
3397 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3398 signalize memory class, so handle it as special case. */
3399 if (!words)
3400 {
3401 classes[0] = X86_64_NO_CLASS;
3402 return 1;
3403 }
3404
3405 /* Classify each field of record and merge classes. */
3406 switch (TREE_CODE (type))
3407 {
3408 case RECORD_TYPE:
3409 /* And now merge the fields of structure. */
3410 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3411 {
3412 if (TREE_CODE (field) == FIELD_DECL)
3413 {
3414 int num;
3415
3416 if (TREE_TYPE (field) == error_mark_node)
3417 continue;
3418
3419 /* Bitfields are always classified as integer. Handle them
3420 early, since later code would consider them to be
3421 misaligned integers. */
3422 if (DECL_BIT_FIELD (field))
3423 {
3424 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3425 i < ((int_bit_position (field) + (bit_offset % 64))
3426 + tree_low_cst (DECL_SIZE (field), 0)
3427 + 63) / 8 / 8; i++)
3428 classes[i] =
3429 merge_classes (X86_64_INTEGER_CLASS,
3430 classes[i]);
3431 }
3432 else
3433 {
3434 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3435 TREE_TYPE (field), subclasses,
3436 (int_bit_position (field)
3437 + bit_offset) % 256);
3438 if (!num)
3439 return 0;
3440 for (i = 0; i < num; i++)
3441 {
3442 int pos =
3443 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3444 classes[i + pos] =
3445 merge_classes (subclasses[i], classes[i + pos]);
3446 }
3447 }
3448 }
3449 }
3450 break;
3451
3452 case ARRAY_TYPE:
3453 /* Arrays are handled as small records. */
3454 {
3455 int num;
3456 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3457 TREE_TYPE (type), subclasses, bit_offset);
3458 if (!num)
3459 return 0;
3460
3461 /* The partial classes are now full classes. */
3462 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3463 subclasses[0] = X86_64_SSE_CLASS;
3464 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3465 subclasses[0] = X86_64_INTEGER_CLASS;
3466
3467 for (i = 0; i < words; i++)
3468 classes[i] = subclasses[i % num];
3469
3470 break;
3471 }
3472 case UNION_TYPE:
3473 case QUAL_UNION_TYPE:
3474 /* Unions are similar to RECORD_TYPE but offset is always 0.
3475 */
3476 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3477 {
3478 if (TREE_CODE (field) == FIELD_DECL)
3479 {
3480 int num;
3481
3482 if (TREE_TYPE (field) == error_mark_node)
3483 continue;
3484
3485 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3486 TREE_TYPE (field), subclasses,
3487 bit_offset);
3488 if (!num)
3489 return 0;
3490 for (i = 0; i < num; i++)
3491 classes[i] = merge_classes (subclasses[i], classes[i]);
3492 }
3493 }
3494 break;
3495
3496 default:
3497 gcc_unreachable ();
3498 }
3499
3500 /* Final merger cleanup. */
3501 for (i = 0; i < words; i++)
3502 {
3503 /* If one class is MEMORY, everything should be passed in
3504 memory. */
3505 if (classes[i] == X86_64_MEMORY_CLASS)
3506 return 0;
3507
3508 /* The X86_64_SSEUP_CLASS should be always preceded by
3509 X86_64_SSE_CLASS. */
3510 if (classes[i] == X86_64_SSEUP_CLASS
3511 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3512 classes[i] = X86_64_SSE_CLASS;
3513
3514 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3515 if (classes[i] == X86_64_X87UP_CLASS
3516 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3517 classes[i] = X86_64_SSE_CLASS;
3518 }
3519 return words;
3520 }
3521
3522 /* Compute alignment needed. We align all types to natural boundaries with
3523 exception of XFmode that is aligned to 64bits. */
3524 if (mode != VOIDmode && mode != BLKmode)
3525 {
3526 int mode_alignment = GET_MODE_BITSIZE (mode);
3527
3528 if (mode == XFmode)
3529 mode_alignment = 128;
3530 else if (mode == XCmode)
3531 mode_alignment = 256;
3532 if (COMPLEX_MODE_P (mode))
3533 mode_alignment /= 2;
3534 /* Misaligned fields are always returned in memory. */
3535 if (bit_offset % mode_alignment)
3536 return 0;
3537 }
3538
3539 /* for V1xx modes, just use the base mode */
3540 if (VECTOR_MODE_P (mode)
3541 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3542 mode = GET_MODE_INNER (mode);
3543
3544 /* Classification of atomic types. */
3545 switch (mode)
3546 {
3547 case SDmode:
3548 case DDmode:
3549 classes[0] = X86_64_SSE_CLASS;
3550 return 1;
3551 case TDmode:
3552 classes[0] = X86_64_SSE_CLASS;
3553 classes[1] = X86_64_SSEUP_CLASS;
3554 return 2;
3555 case DImode:
3556 case SImode:
3557 case HImode:
3558 case QImode:
3559 case CSImode:
3560 case CHImode:
3561 case CQImode:
3562 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3563 classes[0] = X86_64_INTEGERSI_CLASS;
3564 else
3565 classes[0] = X86_64_INTEGER_CLASS;
3566 return 1;
3567 case CDImode:
3568 case TImode:
3569 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3570 return 2;
3571 case CTImode:
3572 return 0;
3573 case SFmode:
3574 if (!(bit_offset % 64))
3575 classes[0] = X86_64_SSESF_CLASS;
3576 else
3577 classes[0] = X86_64_SSE_CLASS;
3578 return 1;
3579 case DFmode:
3580 classes[0] = X86_64_SSEDF_CLASS;
3581 return 1;
3582 case XFmode:
3583 classes[0] = X86_64_X87_CLASS;
3584 classes[1] = X86_64_X87UP_CLASS;
3585 return 2;
3586 case TFmode:
3587 classes[0] = X86_64_SSE_CLASS;
3588 classes[1] = X86_64_SSEUP_CLASS;
3589 return 2;
3590 case SCmode:
3591 classes[0] = X86_64_SSE_CLASS;
3592 return 1;
3593 case DCmode:
3594 classes[0] = X86_64_SSEDF_CLASS;
3595 classes[1] = X86_64_SSEDF_CLASS;
3596 return 2;
3597 case XCmode:
3598 classes[0] = X86_64_COMPLEX_X87_CLASS;
3599 return 1;
3600 case TCmode:
3601 /* This modes is larger than 16 bytes. */
3602 return 0;
3603 case V4SFmode:
3604 case V4SImode:
3605 case V16QImode:
3606 case V8HImode:
3607 case V2DFmode:
3608 case V2DImode:
3609 classes[0] = X86_64_SSE_CLASS;
3610 classes[1] = X86_64_SSEUP_CLASS;
3611 return 2;
3612 case V2SFmode:
3613 case V2SImode:
3614 case V4HImode:
3615 case V8QImode:
3616 classes[0] = X86_64_SSE_CLASS;
3617 return 1;
3618 case BLKmode:
3619 case VOIDmode:
3620 return 0;
3621 default:
3622 gcc_assert (VECTOR_MODE_P (mode));
3623
3624 if (bytes > 16)
3625 return 0;
3626
3627 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3628
3629 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3630 classes[0] = X86_64_INTEGERSI_CLASS;
3631 else
3632 classes[0] = X86_64_INTEGER_CLASS;
3633 classes[1] = X86_64_INTEGER_CLASS;
3634 return 1 + (bytes > 8);
3635 }
3636 }
3637
3638 /* Examine the argument and return set number of register required in each
3639 class. Return 0 iff parameter should be passed in memory. */
3640 static int
3641 examine_argument (enum machine_mode mode, tree type, int in_return,
3642 int *int_nregs, int *sse_nregs)
3643 {
3644 enum x86_64_reg_class class[MAX_CLASSES];
3645 int n = classify_argument (mode, type, class, 0);
3646
3647 *int_nregs = 0;
3648 *sse_nregs = 0;
3649 if (!n)
3650 return 0;
3651 for (n--; n >= 0; n--)
3652 switch (class[n])
3653 {
3654 case X86_64_INTEGER_CLASS:
3655 case X86_64_INTEGERSI_CLASS:
3656 (*int_nregs)++;
3657 break;
3658 case X86_64_SSE_CLASS:
3659 case X86_64_SSESF_CLASS:
3660 case X86_64_SSEDF_CLASS:
3661 (*sse_nregs)++;
3662 break;
3663 case X86_64_NO_CLASS:
3664 case X86_64_SSEUP_CLASS:
3665 break;
3666 case X86_64_X87_CLASS:
3667 case X86_64_X87UP_CLASS:
3668 if (!in_return)
3669 return 0;
3670 break;
3671 case X86_64_COMPLEX_X87_CLASS:
3672 return in_return ? 2 : 0;
3673 case X86_64_MEMORY_CLASS:
3674 gcc_unreachable ();
3675 }
3676 return 1;
3677 }
3678
3679 /* Construct container for the argument used by GCC interface. See
3680 FUNCTION_ARG for the detailed description. */
3681
3682 static rtx
3683 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3684 tree type, int in_return, int nintregs, int nsseregs,
3685 const int *intreg, int sse_regno)
3686 {
3687 /* The following variables hold the static issued_error state. */
3688 static bool issued_sse_arg_error;
3689 static bool issued_sse_ret_error;
3690 static bool issued_x87_ret_error;
3691
3692 enum machine_mode tmpmode;
3693 int bytes =
3694 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3695 enum x86_64_reg_class class[MAX_CLASSES];
3696 int n;
3697 int i;
3698 int nexps = 0;
3699 int needed_sseregs, needed_intregs;
3700 rtx exp[MAX_CLASSES];
3701 rtx ret;
3702
3703 n = classify_argument (mode, type, class, 0);
3704 if (TARGET_DEBUG_ARG)
3705 {
3706 if (!n)
3707 fprintf (stderr, "Memory class\n");
3708 else
3709 {
3710 fprintf (stderr, "Classes:");
3711 for (i = 0; i < n; i++)
3712 {
3713 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3714 }
3715 fprintf (stderr, "\n");
3716 }
3717 }
3718 if (!n)
3719 return NULL;
3720 if (!examine_argument (mode, type, in_return, &needed_intregs,
3721 &needed_sseregs))
3722 return NULL;
3723 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3724 return NULL;
3725
3726 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3727 some less clueful developer tries to use floating-point anyway. */
3728 if (needed_sseregs && !TARGET_SSE)
3729 {
3730 if (in_return)
3731 {
3732 if (!issued_sse_ret_error)
3733 {
3734 error ("SSE register return with SSE disabled");
3735 issued_sse_ret_error = true;
3736 }
3737 }
3738 else if (!issued_sse_arg_error)
3739 {
3740 error ("SSE register argument with SSE disabled");
3741 issued_sse_arg_error = true;
3742 }
3743 return NULL;
3744 }
3745
3746 /* Likewise, error if the ABI requires us to return values in the
3747 x87 registers and the user specified -mno-80387. */
3748 if (!TARGET_80387 && in_return)
3749 for (i = 0; i < n; i++)
3750 if (class[i] == X86_64_X87_CLASS
3751 || class[i] == X86_64_X87UP_CLASS
3752 || class[i] == X86_64_COMPLEX_X87_CLASS)
3753 {
3754 if (!issued_x87_ret_error)
3755 {
3756 error ("x87 register return with x87 disabled");
3757 issued_x87_ret_error = true;
3758 }
3759 return NULL;
3760 }
3761
3762 /* First construct simple cases. Avoid SCmode, since we want to use
3763 single register to pass this type. */
3764 if (n == 1 && mode != SCmode)
3765 switch (class[0])
3766 {
3767 case X86_64_INTEGER_CLASS:
3768 case X86_64_INTEGERSI_CLASS:
3769 return gen_rtx_REG (mode, intreg[0]);
3770 case X86_64_SSE_CLASS:
3771 case X86_64_SSESF_CLASS:
3772 case X86_64_SSEDF_CLASS:
3773 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3774 case X86_64_X87_CLASS:
3775 case X86_64_COMPLEX_X87_CLASS:
3776 return gen_rtx_REG (mode, FIRST_STACK_REG);
3777 case X86_64_NO_CLASS:
3778 /* Zero sized array, struct or class. */
3779 return NULL;
3780 default:
3781 gcc_unreachable ();
3782 }
3783 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3784 && mode != BLKmode)
3785 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3786 if (n == 2
3787 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3788 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3789 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3790 && class[1] == X86_64_INTEGER_CLASS
3791 && (mode == CDImode || mode == TImode || mode == TFmode)
3792 && intreg[0] + 1 == intreg[1])
3793 return gen_rtx_REG (mode, intreg[0]);
3794
3795 /* Otherwise figure out the entries of the PARALLEL. */
3796 for (i = 0; i < n; i++)
3797 {
3798 switch (class[i])
3799 {
3800 case X86_64_NO_CLASS:
3801 break;
3802 case X86_64_INTEGER_CLASS:
3803 case X86_64_INTEGERSI_CLASS:
3804 /* Merge TImodes on aligned occasions here too. */
3805 if (i * 8 + 8 > bytes)
3806 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3807 else if (class[i] == X86_64_INTEGERSI_CLASS)
3808 tmpmode = SImode;
3809 else
3810 tmpmode = DImode;
3811 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3812 if (tmpmode == BLKmode)
3813 tmpmode = DImode;
3814 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3815 gen_rtx_REG (tmpmode, *intreg),
3816 GEN_INT (i*8));
3817 intreg++;
3818 break;
3819 case X86_64_SSESF_CLASS:
3820 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3821 gen_rtx_REG (SFmode,
3822 SSE_REGNO (sse_regno)),
3823 GEN_INT (i*8));
3824 sse_regno++;
3825 break;
3826 case X86_64_SSEDF_CLASS:
3827 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3828 gen_rtx_REG (DFmode,
3829 SSE_REGNO (sse_regno)),
3830 GEN_INT (i*8));
3831 sse_regno++;
3832 break;
3833 case X86_64_SSE_CLASS:
3834 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3835 tmpmode = TImode;
3836 else
3837 tmpmode = DImode;
3838 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3839 gen_rtx_REG (tmpmode,
3840 SSE_REGNO (sse_regno)),
3841 GEN_INT (i*8));
3842 if (tmpmode == TImode)
3843 i++;
3844 sse_regno++;
3845 break;
3846 default:
3847 gcc_unreachable ();
3848 }
3849 }
3850
3851 /* Empty aligned struct, union or class. */
3852 if (nexps == 0)
3853 return NULL;
3854
3855 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3856 for (i = 0; i < nexps; i++)
3857 XVECEXP (ret, 0, i) = exp [i];
3858 return ret;
3859 }
3860
3861 /* Update the data in CUM to advance over an argument
3862 of mode MODE and data type TYPE.
3863 (TYPE is null for libcalls where that information may not be available.) */
3864
3865 void
3866 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3867 tree type, int named)
3868 {
3869 int bytes =
3870 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3871 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3872
3873 if (type)
3874 mode = type_natural_mode (type);
3875
3876 if (TARGET_DEBUG_ARG)
3877 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3878 "mode=%s, named=%d)\n\n",
3879 words, cum->words, cum->nregs, cum->sse_nregs,
3880 GET_MODE_NAME (mode), named);
3881
3882 if (TARGET_64BIT)
3883 {
3884 int int_nregs, sse_nregs;
3885 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3886 cum->words += words;
3887 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3888 {
3889 cum->nregs -= int_nregs;
3890 cum->sse_nregs -= sse_nregs;
3891 cum->regno += int_nregs;
3892 cum->sse_regno += sse_nregs;
3893 }
3894 else
3895 cum->words += words;
3896 }
3897 else
3898 {
3899 switch (mode)
3900 {
3901 default:
3902 break;
3903
3904 case BLKmode:
3905 if (bytes < 0)
3906 break;
3907 /* FALLTHRU */
3908
3909 case DImode:
3910 case SImode:
3911 case HImode:
3912 case QImode:
3913 cum->words += words;
3914 cum->nregs -= words;
3915 cum->regno += words;
3916
3917 if (cum->nregs <= 0)
3918 {
3919 cum->nregs = 0;
3920 cum->regno = 0;
3921 }
3922 break;
3923
3924 case DFmode:
3925 if (cum->float_in_sse < 2)
3926 break;
3927 case SFmode:
3928 if (cum->float_in_sse < 1)
3929 break;
3930 /* FALLTHRU */
3931
3932 case TImode:
3933 case V16QImode:
3934 case V8HImode:
3935 case V4SImode:
3936 case V2DImode:
3937 case V4SFmode:
3938 case V2DFmode:
3939 if (!type || !AGGREGATE_TYPE_P (type))
3940 {
3941 cum->sse_words += words;
3942 cum->sse_nregs -= 1;
3943 cum->sse_regno += 1;
3944 if (cum->sse_nregs <= 0)
3945 {
3946 cum->sse_nregs = 0;
3947 cum->sse_regno = 0;
3948 }
3949 }
3950 break;
3951
3952 case V8QImode:
3953 case V4HImode:
3954 case V2SImode:
3955 case V2SFmode:
3956 if (!type || !AGGREGATE_TYPE_P (type))
3957 {
3958 cum->mmx_words += words;
3959 cum->mmx_nregs -= 1;
3960 cum->mmx_regno += 1;
3961 if (cum->mmx_nregs <= 0)
3962 {
3963 cum->mmx_nregs = 0;
3964 cum->mmx_regno = 0;
3965 }
3966 }
3967 break;
3968 }
3969 }
3970 }
3971
3972 /* Define where to put the arguments to a function.
3973 Value is zero to push the argument on the stack,
3974 or a hard register in which to store the argument.
3975
3976 MODE is the argument's machine mode.
3977 TYPE is the data type of the argument (as a tree).
3978 This is null for libcalls where that information may
3979 not be available.
3980 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3981 the preceding args and about the function being called.
3982 NAMED is nonzero if this argument is a named parameter
3983 (otherwise it is an extra parameter matching an ellipsis). */
3984
3985 rtx
3986 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3987 tree type, int named)
3988 {
3989 enum machine_mode mode = orig_mode;
3990 rtx ret = NULL_RTX;
3991 int bytes =
3992 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3993 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3994 static bool warnedsse, warnedmmx;
3995
3996 /* To simplify the code below, represent vector types with a vector mode
3997 even if MMX/SSE are not active. */
3998 if (type && TREE_CODE (type) == VECTOR_TYPE)
3999 mode = type_natural_mode (type);
4000
4001 /* Handle a hidden AL argument containing number of registers for varargs
4002 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4003 any AL settings. */
4004 if (mode == VOIDmode)
4005 {
4006 if (TARGET_64BIT)
4007 return GEN_INT (cum->maybe_vaarg
4008 ? (cum->sse_nregs < 0
4009 ? SSE_REGPARM_MAX
4010 : cum->sse_regno)
4011 : -1);
4012 else
4013 return constm1_rtx;
4014 }
4015 if (TARGET_64BIT)
4016 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4017 cum->sse_nregs,
4018 &x86_64_int_parameter_registers [cum->regno],
4019 cum->sse_regno);
4020 else
4021 switch (mode)
4022 {
4023 /* For now, pass fp/complex values on the stack. */
4024 default:
4025 break;
4026
4027 case BLKmode:
4028 if (bytes < 0)
4029 break;
4030 /* FALLTHRU */
4031 case DImode:
4032 case SImode:
4033 case HImode:
4034 case QImode:
4035 if (words <= cum->nregs)
4036 {
4037 int regno = cum->regno;
4038
4039 /* Fastcall allocates the first two DWORD (SImode) or
4040 smaller arguments to ECX and EDX. */
4041 if (cum->fastcall)
4042 {
4043 if (mode == BLKmode || mode == DImode)
4044 break;
4045
4046 /* ECX not EAX is the first allocated register. */
4047 if (regno == 0)
4048 regno = 2;
4049 }
4050 ret = gen_rtx_REG (mode, regno);
4051 }
4052 break;
4053 case DFmode:
4054 if (cum->float_in_sse < 2)
4055 break;
4056 case SFmode:
4057 if (cum->float_in_sse < 1)
4058 break;
4059 /* FALLTHRU */
4060 case TImode:
4061 case V16QImode:
4062 case V8HImode:
4063 case V4SImode:
4064 case V2DImode:
4065 case V4SFmode:
4066 case V2DFmode:
4067 if (!type || !AGGREGATE_TYPE_P (type))
4068 {
4069 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4070 {
4071 warnedsse = true;
4072 warning (0, "SSE vector argument without SSE enabled "
4073 "changes the ABI");
4074 }
4075 if (cum->sse_nregs)
4076 ret = gen_reg_or_parallel (mode, orig_mode,
4077 cum->sse_regno + FIRST_SSE_REG);
4078 }
4079 break;
4080 case V8QImode:
4081 case V4HImode:
4082 case V2SImode:
4083 case V2SFmode:
4084 if (!type || !AGGREGATE_TYPE_P (type))
4085 {
4086 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4087 {
4088 warnedmmx = true;
4089 warning (0, "MMX vector argument without MMX enabled "
4090 "changes the ABI");
4091 }
4092 if (cum->mmx_nregs)
4093 ret = gen_reg_or_parallel (mode, orig_mode,
4094 cum->mmx_regno + FIRST_MMX_REG);
4095 }
4096 break;
4097 }
4098
4099 if (TARGET_DEBUG_ARG)
4100 {
4101 fprintf (stderr,
4102 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4103 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4104
4105 if (ret)
4106 print_simple_rtl (stderr, ret);
4107 else
4108 fprintf (stderr, ", stack");
4109
4110 fprintf (stderr, " )\n");
4111 }
4112
4113 return ret;
4114 }
4115
4116 /* A C expression that indicates when an argument must be passed by
4117 reference. If nonzero for an argument, a copy of that argument is
4118 made in memory and a pointer to the argument is passed instead of
4119 the argument itself. The pointer is passed in whatever way is
4120 appropriate for passing a pointer to that type. */
4121
4122 static bool
4123 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4124 enum machine_mode mode ATTRIBUTE_UNUSED,
4125 tree type, bool named ATTRIBUTE_UNUSED)
4126 {
4127 if (!TARGET_64BIT)
4128 return 0;
4129
4130 if (type && int_size_in_bytes (type) == -1)
4131 {
4132 if (TARGET_DEBUG_ARG)
4133 fprintf (stderr, "function_arg_pass_by_reference\n");
4134 return 1;
4135 }
4136
4137 return 0;
4138 }
4139
4140 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4141 ABI. Only called if TARGET_SSE. */
4142 static bool
4143 contains_128bit_aligned_vector_p (tree type)
4144 {
4145 enum machine_mode mode = TYPE_MODE (type);
4146 if (SSE_REG_MODE_P (mode)
4147 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4148 return true;
4149 if (TYPE_ALIGN (type) < 128)
4150 return false;
4151
4152 if (AGGREGATE_TYPE_P (type))
4153 {
4154 /* Walk the aggregates recursively. */
4155 switch (TREE_CODE (type))
4156 {
4157 case RECORD_TYPE:
4158 case UNION_TYPE:
4159 case QUAL_UNION_TYPE:
4160 {
4161 tree field;
4162
4163 /* Walk all the structure fields. */
4164 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4165 {
4166 if (TREE_CODE (field) == FIELD_DECL
4167 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4168 return true;
4169 }
4170 break;
4171 }
4172
4173 case ARRAY_TYPE:
4174 /* Just for use if some languages passes arrays by value. */
4175 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4176 return true;
4177 break;
4178
4179 default:
4180 gcc_unreachable ();
4181 }
4182 }
4183 return false;
4184 }
4185
4186 /* Gives the alignment boundary, in bits, of an argument with the
4187 specified mode and type. */
4188
4189 int
4190 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4191 {
4192 int align;
4193 if (type)
4194 align = TYPE_ALIGN (type);
4195 else
4196 align = GET_MODE_ALIGNMENT (mode);
4197 if (align < PARM_BOUNDARY)
4198 align = PARM_BOUNDARY;
4199 if (!TARGET_64BIT)
4200 {
4201 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4202 make an exception for SSE modes since these require 128bit
4203 alignment.
4204
4205 The handling here differs from field_alignment. ICC aligns MMX
4206 arguments to 4 byte boundaries, while structure fields are aligned
4207 to 8 byte boundaries. */
4208 if (!TARGET_SSE)
4209 align = PARM_BOUNDARY;
4210 else if (!type)
4211 {
4212 if (!SSE_REG_MODE_P (mode))
4213 align = PARM_BOUNDARY;
4214 }
4215 else
4216 {
4217 if (!contains_128bit_aligned_vector_p (type))
4218 align = PARM_BOUNDARY;
4219 }
4220 }
4221 if (align > 128)
4222 align = 128;
4223 return align;
4224 }
4225
4226 /* Return true if N is a possible register number of function value. */
4227 bool
4228 ix86_function_value_regno_p (int regno)
4229 {
4230 if (TARGET_MACHO)
4231 {
4232 if (!TARGET_64BIT)
4233 {
4234 return ((regno) == 0
4235 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4236 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4237 }
4238 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4239 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4240 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4241 }
4242 else
4243 {
4244 if (regno == 0
4245 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4246 || (regno == FIRST_SSE_REG && TARGET_SSE))
4247 return true;
4248
4249 if (!TARGET_64BIT
4250 && (regno == FIRST_MMX_REG && TARGET_MMX))
4251 return true;
4252
4253 return false;
4254 }
4255 }
4256
4257 /* Define how to find the value returned by a function.
4258 VALTYPE is the data type of the value (as a tree).
4259 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4260 otherwise, FUNC is 0. */
4261 rtx
4262 ix86_function_value (tree valtype, tree fntype_or_decl,
4263 bool outgoing ATTRIBUTE_UNUSED)
4264 {
4265 enum machine_mode natmode = type_natural_mode (valtype);
4266
4267 if (TARGET_64BIT)
4268 {
4269 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4270 1, REGPARM_MAX, SSE_REGPARM_MAX,
4271 x86_64_int_return_registers, 0);
4272 /* For zero sized structures, construct_container return NULL, but we
4273 need to keep rest of compiler happy by returning meaningful value. */
4274 if (!ret)
4275 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4276 return ret;
4277 }
4278 else
4279 {
4280 tree fn = NULL_TREE, fntype;
4281 if (fntype_or_decl
4282 && DECL_P (fntype_or_decl))
4283 fn = fntype_or_decl;
4284 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4285 return gen_rtx_REG (TYPE_MODE (valtype),
4286 ix86_value_regno (natmode, fn, fntype));
4287 }
4288 }
4289
4290 /* Return true iff type is returned in memory. */
4291 int
4292 ix86_return_in_memory (tree type)
4293 {
4294 int needed_intregs, needed_sseregs, size;
4295 enum machine_mode mode = type_natural_mode (type);
4296
4297 if (TARGET_64BIT)
4298 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4299
4300 if (mode == BLKmode)
4301 return 1;
4302
4303 size = int_size_in_bytes (type);
4304
4305 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4306 return 0;
4307
4308 if (VECTOR_MODE_P (mode) || mode == TImode)
4309 {
4310 /* User-created vectors small enough to fit in EAX. */
4311 if (size < 8)
4312 return 0;
4313
4314 /* MMX/3dNow values are returned in MM0,
4315 except when it doesn't exits. */
4316 if (size == 8)
4317 return (TARGET_MMX ? 0 : 1);
4318
4319 /* SSE values are returned in XMM0, except when it doesn't exist. */
4320 if (size == 16)
4321 return (TARGET_SSE ? 0 : 1);
4322 }
4323
4324 if (mode == XFmode)
4325 return 0;
4326
4327 if (mode == TDmode)
4328 return 1;
4329
4330 if (size > 12)
4331 return 1;
4332 return 0;
4333 }
4334
4335 /* When returning SSE vector types, we have a choice of either
4336 (1) being abi incompatible with a -march switch, or
4337 (2) generating an error.
4338 Given no good solution, I think the safest thing is one warning.
4339 The user won't be able to use -Werror, but....
4340
4341 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4342 called in response to actually generating a caller or callee that
4343 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4344 via aggregate_value_p for general type probing from tree-ssa. */
4345
4346 static rtx
4347 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4348 {
4349 static bool warnedsse, warnedmmx;
4350
4351 if (type)
4352 {
4353 /* Look at the return type of the function, not the function type. */
4354 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4355
4356 if (!TARGET_SSE && !warnedsse)
4357 {
4358 if (mode == TImode
4359 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4360 {
4361 warnedsse = true;
4362 warning (0, "SSE vector return without SSE enabled "
4363 "changes the ABI");
4364 }
4365 }
4366
4367 if (!TARGET_MMX && !warnedmmx)
4368 {
4369 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4370 {
4371 warnedmmx = true;
4372 warning (0, "MMX vector return without MMX enabled "
4373 "changes the ABI");
4374 }
4375 }
4376 }
4377
4378 return NULL;
4379 }
4380
4381 /* Define how to find the value returned by a library function
4382 assuming the value has mode MODE. */
4383 rtx
4384 ix86_libcall_value (enum machine_mode mode)
4385 {
4386 if (TARGET_64BIT)
4387 {
4388 switch (mode)
4389 {
4390 case SFmode:
4391 case SCmode:
4392 case DFmode:
4393 case DCmode:
4394 case TFmode:
4395 case SDmode:
4396 case DDmode:
4397 case TDmode:
4398 return gen_rtx_REG (mode, FIRST_SSE_REG);
4399 case XFmode:
4400 case XCmode:
4401 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4402 case TCmode:
4403 return NULL;
4404 default:
4405 return gen_rtx_REG (mode, 0);
4406 }
4407 }
4408 else
4409 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4410 }
4411
4412 /* Given a mode, return the register to use for a return value. */
4413
4414 static int
4415 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4416 {
4417 gcc_assert (!TARGET_64BIT);
4418
4419 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4420 we normally prevent this case when mmx is not available. However
4421 some ABIs may require the result to be returned like DImode. */
4422 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4423 return TARGET_MMX ? FIRST_MMX_REG : 0;
4424
4425 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4426 we prevent this case when sse is not available. However some ABIs
4427 may require the result to be returned like integer TImode. */
4428 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4429 return TARGET_SSE ? FIRST_SSE_REG : 0;
4430
4431 /* Decimal floating point values can go in %eax, unlike other float modes. */
4432 if (DECIMAL_FLOAT_MODE_P (mode))
4433 return 0;
4434
4435 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4436 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4437 return 0;
4438
4439 /* Floating point return values in %st(0), except for local functions when
4440 SSE math is enabled or for functions with sseregparm attribute. */
4441 if ((func || fntype)
4442 && (mode == SFmode || mode == DFmode))
4443 {
4444 int sse_level = ix86_function_sseregparm (fntype, func);
4445 if ((sse_level >= 1 && mode == SFmode)
4446 || (sse_level == 2 && mode == DFmode))
4447 return FIRST_SSE_REG;
4448 }
4449
4450 return FIRST_FLOAT_REG;
4451 }
4452 \f
4453 /* Create the va_list data type. */
4454
4455 static tree
4456 ix86_build_builtin_va_list (void)
4457 {
4458 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4459
4460 /* For i386 we use plain pointer to argument area. */
4461 if (!TARGET_64BIT)
4462 return build_pointer_type (char_type_node);
4463
4464 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4465 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4466
4467 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4468 unsigned_type_node);
4469 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4470 unsigned_type_node);
4471 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4472 ptr_type_node);
4473 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4474 ptr_type_node);
4475
4476 va_list_gpr_counter_field = f_gpr;
4477 va_list_fpr_counter_field = f_fpr;
4478
4479 DECL_FIELD_CONTEXT (f_gpr) = record;
4480 DECL_FIELD_CONTEXT (f_fpr) = record;
4481 DECL_FIELD_CONTEXT (f_ovf) = record;
4482 DECL_FIELD_CONTEXT (f_sav) = record;
4483
4484 TREE_CHAIN (record) = type_decl;
4485 TYPE_NAME (record) = type_decl;
4486 TYPE_FIELDS (record) = f_gpr;
4487 TREE_CHAIN (f_gpr) = f_fpr;
4488 TREE_CHAIN (f_fpr) = f_ovf;
4489 TREE_CHAIN (f_ovf) = f_sav;
4490
4491 layout_type (record);
4492
4493 /* The correct type is an array type of one element. */
4494 return build_array_type (record, build_index_type (size_zero_node));
4495 }
4496
4497 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4498
4499 static void
4500 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4501 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4502 int no_rtl)
4503 {
4504 CUMULATIVE_ARGS next_cum;
4505 rtx save_area = NULL_RTX, mem;
4506 rtx label;
4507 rtx label_ref;
4508 rtx tmp_reg;
4509 rtx nsse_reg;
4510 int set;
4511 tree fntype;
4512 int stdarg_p;
4513 int i;
4514
4515 if (!TARGET_64BIT)
4516 return;
4517
4518 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4519 return;
4520
4521 /* Indicate to allocate space on the stack for varargs save area. */
4522 ix86_save_varrargs_registers = 1;
4523
4524 cfun->stack_alignment_needed = 128;
4525
4526 fntype = TREE_TYPE (current_function_decl);
4527 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4528 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4529 != void_type_node));
4530
4531 /* For varargs, we do not want to skip the dummy va_dcl argument.
4532 For stdargs, we do want to skip the last named argument. */
4533 next_cum = *cum;
4534 if (stdarg_p)
4535 function_arg_advance (&next_cum, mode, type, 1);
4536
4537 if (!no_rtl)
4538 save_area = frame_pointer_rtx;
4539
4540 set = get_varargs_alias_set ();
4541
4542 for (i = next_cum.regno;
4543 i < ix86_regparm
4544 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4545 i++)
4546 {
4547 mem = gen_rtx_MEM (Pmode,
4548 plus_constant (save_area, i * UNITS_PER_WORD));
4549 MEM_NOTRAP_P (mem) = 1;
4550 set_mem_alias_set (mem, set);
4551 emit_move_insn (mem, gen_rtx_REG (Pmode,
4552 x86_64_int_parameter_registers[i]));
4553 }
4554
4555 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4556 {
4557 /* Now emit code to save SSE registers. The AX parameter contains number
4558 of SSE parameter registers used to call this function. We use
4559 sse_prologue_save insn template that produces computed jump across
4560 SSE saves. We need some preparation work to get this working. */
4561
4562 label = gen_label_rtx ();
4563 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4564
4565 /* Compute address to jump to :
4566 label - 5*eax + nnamed_sse_arguments*5 */
4567 tmp_reg = gen_reg_rtx (Pmode);
4568 nsse_reg = gen_reg_rtx (Pmode);
4569 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4570 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4571 gen_rtx_MULT (Pmode, nsse_reg,
4572 GEN_INT (4))));
4573 if (next_cum.sse_regno)
4574 emit_move_insn
4575 (nsse_reg,
4576 gen_rtx_CONST (DImode,
4577 gen_rtx_PLUS (DImode,
4578 label_ref,
4579 GEN_INT (next_cum.sse_regno * 4))));
4580 else
4581 emit_move_insn (nsse_reg, label_ref);
4582 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4583
4584 /* Compute address of memory block we save into. We always use pointer
4585 pointing 127 bytes after first byte to store - this is needed to keep
4586 instruction size limited by 4 bytes. */
4587 tmp_reg = gen_reg_rtx (Pmode);
4588 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4589 plus_constant (save_area,
4590 8 * REGPARM_MAX + 127)));
4591 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4592 MEM_NOTRAP_P (mem) = 1;
4593 set_mem_alias_set (mem, set);
4594 set_mem_align (mem, BITS_PER_WORD);
4595
4596 /* And finally do the dirty job! */
4597 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4598 GEN_INT (next_cum.sse_regno), label));
4599 }
4600
4601 }
4602
4603 /* Implement va_start. */
4604
4605 void
4606 ix86_va_start (tree valist, rtx nextarg)
4607 {
4608 HOST_WIDE_INT words, n_gpr, n_fpr;
4609 tree f_gpr, f_fpr, f_ovf, f_sav;
4610 tree gpr, fpr, ovf, sav, t;
4611 tree type;
4612
4613 /* Only 64bit target needs something special. */
4614 if (!TARGET_64BIT)
4615 {
4616 std_expand_builtin_va_start (valist, nextarg);
4617 return;
4618 }
4619
4620 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4621 f_fpr = TREE_CHAIN (f_gpr);
4622 f_ovf = TREE_CHAIN (f_fpr);
4623 f_sav = TREE_CHAIN (f_ovf);
4624
4625 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4626 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4627 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4628 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4629 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4630
4631 /* Count number of gp and fp argument registers used. */
4632 words = current_function_args_info.words;
4633 n_gpr = current_function_args_info.regno;
4634 n_fpr = current_function_args_info.sse_regno;
4635
4636 if (TARGET_DEBUG_ARG)
4637 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4638 (int) words, (int) n_gpr, (int) n_fpr);
4639
4640 if (cfun->va_list_gpr_size)
4641 {
4642 type = TREE_TYPE (gpr);
4643 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4644 build_int_cst (type, n_gpr * 8));
4645 TREE_SIDE_EFFECTS (t) = 1;
4646 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4647 }
4648
4649 if (cfun->va_list_fpr_size)
4650 {
4651 type = TREE_TYPE (fpr);
4652 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4653 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4654 TREE_SIDE_EFFECTS (t) = 1;
4655 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4656 }
4657
4658 /* Find the overflow area. */
4659 type = TREE_TYPE (ovf);
4660 t = make_tree (type, virtual_incoming_args_rtx);
4661 if (words != 0)
4662 t = build2 (PLUS_EXPR, type, t,
4663 build_int_cst (type, words * UNITS_PER_WORD));
4664 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4665 TREE_SIDE_EFFECTS (t) = 1;
4666 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4667
4668 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4669 {
4670 /* Find the register save area.
4671 Prologue of the function save it right above stack frame. */
4672 type = TREE_TYPE (sav);
4673 t = make_tree (type, frame_pointer_rtx);
4674 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4675 TREE_SIDE_EFFECTS (t) = 1;
4676 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4677 }
4678 }
4679
4680 /* Implement va_arg. */
4681
4682 tree
4683 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4684 {
4685 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4686 tree f_gpr, f_fpr, f_ovf, f_sav;
4687 tree gpr, fpr, ovf, sav, t;
4688 int size, rsize;
4689 tree lab_false, lab_over = NULL_TREE;
4690 tree addr, t2;
4691 rtx container;
4692 int indirect_p = 0;
4693 tree ptrtype;
4694 enum machine_mode nat_mode;
4695
4696 /* Only 64bit target needs something special. */
4697 if (!TARGET_64BIT)
4698 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4699
4700 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4701 f_fpr = TREE_CHAIN (f_gpr);
4702 f_ovf = TREE_CHAIN (f_fpr);
4703 f_sav = TREE_CHAIN (f_ovf);
4704
4705 valist = build_va_arg_indirect_ref (valist);
4706 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4707 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4708 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4709 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4710
4711 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4712 if (indirect_p)
4713 type = build_pointer_type (type);
4714 size = int_size_in_bytes (type);
4715 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4716
4717 nat_mode = type_natural_mode (type);
4718 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4719 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4720
4721 /* Pull the value out of the saved registers. */
4722
4723 addr = create_tmp_var (ptr_type_node, "addr");
4724 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4725
4726 if (container)
4727 {
4728 int needed_intregs, needed_sseregs;
4729 bool need_temp;
4730 tree int_addr, sse_addr;
4731
4732 lab_false = create_artificial_label ();
4733 lab_over = create_artificial_label ();
4734
4735 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4736
4737 need_temp = (!REG_P (container)
4738 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4739 || TYPE_ALIGN (type) > 128));
4740
4741 /* In case we are passing structure, verify that it is consecutive block
4742 on the register save area. If not we need to do moves. */
4743 if (!need_temp && !REG_P (container))
4744 {
4745 /* Verify that all registers are strictly consecutive */
4746 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4747 {
4748 int i;
4749
4750 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4751 {
4752 rtx slot = XVECEXP (container, 0, i);
4753 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4754 || INTVAL (XEXP (slot, 1)) != i * 16)
4755 need_temp = 1;
4756 }
4757 }
4758 else
4759 {
4760 int i;
4761
4762 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4763 {
4764 rtx slot = XVECEXP (container, 0, i);
4765 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4766 || INTVAL (XEXP (slot, 1)) != i * 8)
4767 need_temp = 1;
4768 }
4769 }
4770 }
4771 if (!need_temp)
4772 {
4773 int_addr = addr;
4774 sse_addr = addr;
4775 }
4776 else
4777 {
4778 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4779 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4780 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4781 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4782 }
4783
4784 /* First ensure that we fit completely in registers. */
4785 if (needed_intregs)
4786 {
4787 t = build_int_cst (TREE_TYPE (gpr),
4788 (REGPARM_MAX - needed_intregs + 1) * 8);
4789 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4790 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4791 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4792 gimplify_and_add (t, pre_p);
4793 }
4794 if (needed_sseregs)
4795 {
4796 t = build_int_cst (TREE_TYPE (fpr),
4797 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4798 + REGPARM_MAX * 8);
4799 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4800 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4801 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4802 gimplify_and_add (t, pre_p);
4803 }
4804
4805 /* Compute index to start of area used for integer regs. */
4806 if (needed_intregs)
4807 {
4808 /* int_addr = gpr + sav; */
4809 t = fold_convert (ptr_type_node, gpr);
4810 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4811 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4812 gimplify_and_add (t, pre_p);
4813 }
4814 if (needed_sseregs)
4815 {
4816 /* sse_addr = fpr + sav; */
4817 t = fold_convert (ptr_type_node, fpr);
4818 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4819 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4820 gimplify_and_add (t, pre_p);
4821 }
4822 if (need_temp)
4823 {
4824 int i;
4825 tree temp = create_tmp_var (type, "va_arg_tmp");
4826
4827 /* addr = &temp; */
4828 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4829 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4830 gimplify_and_add (t, pre_p);
4831
4832 for (i = 0; i < XVECLEN (container, 0); i++)
4833 {
4834 rtx slot = XVECEXP (container, 0, i);
4835 rtx reg = XEXP (slot, 0);
4836 enum machine_mode mode = GET_MODE (reg);
4837 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4838 tree addr_type = build_pointer_type (piece_type);
4839 tree src_addr, src;
4840 int src_offset;
4841 tree dest_addr, dest;
4842
4843 if (SSE_REGNO_P (REGNO (reg)))
4844 {
4845 src_addr = sse_addr;
4846 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4847 }
4848 else
4849 {
4850 src_addr = int_addr;
4851 src_offset = REGNO (reg) * 8;
4852 }
4853 src_addr = fold_convert (addr_type, src_addr);
4854 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4855 size_int (src_offset)));
4856 src = build_va_arg_indirect_ref (src_addr);
4857
4858 dest_addr = fold_convert (addr_type, addr);
4859 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4860 size_int (INTVAL (XEXP (slot, 1)))));
4861 dest = build_va_arg_indirect_ref (dest_addr);
4862
4863 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4864 gimplify_and_add (t, pre_p);
4865 }
4866 }
4867
4868 if (needed_intregs)
4869 {
4870 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4871 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4872 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4873 gimplify_and_add (t, pre_p);
4874 }
4875 if (needed_sseregs)
4876 {
4877 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4878 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4879 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4880 gimplify_and_add (t, pre_p);
4881 }
4882
4883 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4884 gimplify_and_add (t, pre_p);
4885
4886 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4887 append_to_statement_list (t, pre_p);
4888 }
4889
4890 /* ... otherwise out of the overflow area. */
4891
4892 /* Care for on-stack alignment if needed. */
4893 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4894 || integer_zerop (TYPE_SIZE (type)))
4895 t = ovf;
4896 else
4897 {
4898 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4899 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4900 build_int_cst (TREE_TYPE (ovf), align - 1));
4901 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4902 build_int_cst (TREE_TYPE (t), -align));
4903 }
4904 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4905
4906 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4907 gimplify_and_add (t2, pre_p);
4908
4909 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4910 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4911 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4912 gimplify_and_add (t, pre_p);
4913
4914 if (container)
4915 {
4916 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4917 append_to_statement_list (t, pre_p);
4918 }
4919
4920 ptrtype = build_pointer_type (type);
4921 addr = fold_convert (ptrtype, addr);
4922
4923 if (indirect_p)
4924 addr = build_va_arg_indirect_ref (addr);
4925 return build_va_arg_indirect_ref (addr);
4926 }
4927 \f
4928 /* Return nonzero if OPNUM's MEM should be matched
4929 in movabs* patterns. */
4930
4931 int
4932 ix86_check_movabs (rtx insn, int opnum)
4933 {
4934 rtx set, mem;
4935
4936 set = PATTERN (insn);
4937 if (GET_CODE (set) == PARALLEL)
4938 set = XVECEXP (set, 0, 0);
4939 gcc_assert (GET_CODE (set) == SET);
4940 mem = XEXP (set, opnum);
4941 while (GET_CODE (mem) == SUBREG)
4942 mem = SUBREG_REG (mem);
4943 gcc_assert (MEM_P (mem));
4944 return (volatile_ok || !MEM_VOLATILE_P (mem));
4945 }
4946 \f
4947 /* Initialize the table of extra 80387 mathematical constants. */
4948
4949 static void
4950 init_ext_80387_constants (void)
4951 {
4952 static const char * cst[5] =
4953 {
4954 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4955 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4956 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4957 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4958 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4959 };
4960 int i;
4961
4962 for (i = 0; i < 5; i++)
4963 {
4964 real_from_string (&ext_80387_constants_table[i], cst[i]);
4965 /* Ensure each constant is rounded to XFmode precision. */
4966 real_convert (&ext_80387_constants_table[i],
4967 XFmode, &ext_80387_constants_table[i]);
4968 }
4969
4970 ext_80387_constants_init = 1;
4971 }
4972
4973 /* Return true if the constant is something that can be loaded with
4974 a special instruction. */
4975
4976 int
4977 standard_80387_constant_p (rtx x)
4978 {
4979 REAL_VALUE_TYPE r;
4980
4981 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4982 return -1;
4983
4984 if (x == CONST0_RTX (GET_MODE (x)))
4985 return 1;
4986 if (x == CONST1_RTX (GET_MODE (x)))
4987 return 2;
4988
4989 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4990
4991 /* For XFmode constants, try to find a special 80387 instruction when
4992 optimizing for size or on those CPUs that benefit from them. */
4993 if (GET_MODE (x) == XFmode
4994 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4995 {
4996 int i;
4997
4998 if (! ext_80387_constants_init)
4999 init_ext_80387_constants ();
5000
5001 for (i = 0; i < 5; i++)
5002 if (real_identical (&r, &ext_80387_constants_table[i]))
5003 return i + 3;
5004 }
5005
5006 /* Load of the constant -0.0 or -1.0 will be split as
5007 fldz;fchs or fld1;fchs sequence. */
5008 if (real_isnegzero (&r))
5009 return 8;
5010 if (real_identical (&r, &dconstm1))
5011 return 9;
5012
5013 return 0;
5014 }
5015
5016 /* Return the opcode of the special instruction to be used to load
5017 the constant X. */
5018
5019 const char *
5020 standard_80387_constant_opcode (rtx x)
5021 {
5022 switch (standard_80387_constant_p (x))
5023 {
5024 case 1:
5025 return "fldz";
5026 case 2:
5027 return "fld1";
5028 case 3:
5029 return "fldlg2";
5030 case 4:
5031 return "fldln2";
5032 case 5:
5033 return "fldl2e";
5034 case 6:
5035 return "fldl2t";
5036 case 7:
5037 return "fldpi";
5038 case 8:
5039 case 9:
5040 return "#";
5041 default:
5042 gcc_unreachable ();
5043 }
5044 }
5045
5046 /* Return the CONST_DOUBLE representing the 80387 constant that is
5047 loaded by the specified special instruction. The argument IDX
5048 matches the return value from standard_80387_constant_p. */
5049
5050 rtx
5051 standard_80387_constant_rtx (int idx)
5052 {
5053 int i;
5054
5055 if (! ext_80387_constants_init)
5056 init_ext_80387_constants ();
5057
5058 switch (idx)
5059 {
5060 case 3:
5061 case 4:
5062 case 5:
5063 case 6:
5064 case 7:
5065 i = idx - 3;
5066 break;
5067
5068 default:
5069 gcc_unreachable ();
5070 }
5071
5072 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5073 XFmode);
5074 }
5075
5076 /* Return 1 if mode is a valid mode for sse. */
5077 static int
5078 standard_sse_mode_p (enum machine_mode mode)
5079 {
5080 switch (mode)
5081 {
5082 case V16QImode:
5083 case V8HImode:
5084 case V4SImode:
5085 case V2DImode:
5086 case V4SFmode:
5087 case V2DFmode:
5088 return 1;
5089
5090 default:
5091 return 0;
5092 }
5093 }
5094
5095 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5096 */
5097 int
5098 standard_sse_constant_p (rtx x)
5099 {
5100 enum machine_mode mode = GET_MODE (x);
5101
5102 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5103 return 1;
5104 if (vector_all_ones_operand (x, mode)
5105 && standard_sse_mode_p (mode))
5106 return TARGET_SSE2 ? 2 : -1;
5107
5108 return 0;
5109 }
5110
5111 /* Return the opcode of the special instruction to be used to load
5112 the constant X. */
5113
5114 const char *
5115 standard_sse_constant_opcode (rtx insn, rtx x)
5116 {
5117 switch (standard_sse_constant_p (x))
5118 {
5119 case 1:
5120 if (get_attr_mode (insn) == MODE_V4SF)
5121 return "xorps\t%0, %0";
5122 else if (get_attr_mode (insn) == MODE_V2DF)
5123 return "xorpd\t%0, %0";
5124 else
5125 return "pxor\t%0, %0";
5126 case 2:
5127 return "pcmpeqd\t%0, %0";
5128 }
5129 gcc_unreachable ();
5130 }
5131
5132 /* Returns 1 if OP contains a symbol reference */
5133
5134 int
5135 symbolic_reference_mentioned_p (rtx op)
5136 {
5137 const char *fmt;
5138 int i;
5139
5140 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5141 return 1;
5142
5143 fmt = GET_RTX_FORMAT (GET_CODE (op));
5144 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5145 {
5146 if (fmt[i] == 'E')
5147 {
5148 int j;
5149
5150 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5151 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5152 return 1;
5153 }
5154
5155 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5156 return 1;
5157 }
5158
5159 return 0;
5160 }
5161
5162 /* Return 1 if it is appropriate to emit `ret' instructions in the
5163 body of a function. Do this only if the epilogue is simple, needing a
5164 couple of insns. Prior to reloading, we can't tell how many registers
5165 must be saved, so return 0 then. Return 0 if there is no frame
5166 marker to de-allocate. */
5167
5168 int
5169 ix86_can_use_return_insn_p (void)
5170 {
5171 struct ix86_frame frame;
5172
5173 if (! reload_completed || frame_pointer_needed)
5174 return 0;
5175
5176 /* Don't allow more than 32 pop, since that's all we can do
5177 with one instruction. */
5178 if (current_function_pops_args
5179 && current_function_args_size >= 32768)
5180 return 0;
5181
5182 ix86_compute_frame_layout (&frame);
5183 return frame.to_allocate == 0 && frame.nregs == 0;
5184 }
5185 \f
5186 /* Value should be nonzero if functions must have frame pointers.
5187 Zero means the frame pointer need not be set up (and parms may
5188 be accessed via the stack pointer) in functions that seem suitable. */
5189
5190 int
5191 ix86_frame_pointer_required (void)
5192 {
5193 /* If we accessed previous frames, then the generated code expects
5194 to be able to access the saved ebp value in our frame. */
5195 if (cfun->machine->accesses_prev_frame)
5196 return 1;
5197
5198 /* Several x86 os'es need a frame pointer for other reasons,
5199 usually pertaining to setjmp. */
5200 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5201 return 1;
5202
5203 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5204 the frame pointer by default. Turn it back on now if we've not
5205 got a leaf function. */
5206 if (TARGET_OMIT_LEAF_FRAME_POINTER
5207 && (!current_function_is_leaf
5208 || ix86_current_function_calls_tls_descriptor))
5209 return 1;
5210
5211 if (current_function_profile)
5212 return 1;
5213
5214 return 0;
5215 }
5216
5217 /* Record that the current function accesses previous call frames. */
5218
5219 void
5220 ix86_setup_frame_addresses (void)
5221 {
5222 cfun->machine->accesses_prev_frame = 1;
5223 }
5224 \f
5225 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5226 # define USE_HIDDEN_LINKONCE 1
5227 #else
5228 # define USE_HIDDEN_LINKONCE 0
5229 #endif
5230
5231 static int pic_labels_used;
5232
5233 /* Fills in the label name that should be used for a pc thunk for
5234 the given register. */
5235
5236 static void
5237 get_pc_thunk_name (char name[32], unsigned int regno)
5238 {
5239 gcc_assert (!TARGET_64BIT);
5240
5241 if (USE_HIDDEN_LINKONCE)
5242 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5243 else
5244 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5245 }
5246
5247
5248 /* This function generates code for -fpic that loads %ebx with
5249 the return address of the caller and then returns. */
5250
5251 void
5252 ix86_file_end (void)
5253 {
5254 rtx xops[2];
5255 int regno;
5256
5257 for (regno = 0; regno < 8; ++regno)
5258 {
5259 char name[32];
5260
5261 if (! ((pic_labels_used >> regno) & 1))
5262 continue;
5263
5264 get_pc_thunk_name (name, regno);
5265
5266 #if TARGET_MACHO
5267 if (TARGET_MACHO)
5268 {
5269 switch_to_section (darwin_sections[text_coal_section]);
5270 fputs ("\t.weak_definition\t", asm_out_file);
5271 assemble_name (asm_out_file, name);
5272 fputs ("\n\t.private_extern\t", asm_out_file);
5273 assemble_name (asm_out_file, name);
5274 fputs ("\n", asm_out_file);
5275 ASM_OUTPUT_LABEL (asm_out_file, name);
5276 }
5277 else
5278 #endif
5279 if (USE_HIDDEN_LINKONCE)
5280 {
5281 tree decl;
5282
5283 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5284 error_mark_node);
5285 TREE_PUBLIC (decl) = 1;
5286 TREE_STATIC (decl) = 1;
5287 DECL_ONE_ONLY (decl) = 1;
5288
5289 (*targetm.asm_out.unique_section) (decl, 0);
5290 switch_to_section (get_named_section (decl, NULL, 0));
5291
5292 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5293 fputs ("\t.hidden\t", asm_out_file);
5294 assemble_name (asm_out_file, name);
5295 fputc ('\n', asm_out_file);
5296 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5297 }
5298 else
5299 {
5300 switch_to_section (text_section);
5301 ASM_OUTPUT_LABEL (asm_out_file, name);
5302 }
5303
5304 xops[0] = gen_rtx_REG (SImode, regno);
5305 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5306 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5307 output_asm_insn ("ret", xops);
5308 }
5309
5310 if (NEED_INDICATE_EXEC_STACK)
5311 file_end_indicate_exec_stack ();
5312 }
5313
5314 /* Emit code for the SET_GOT patterns. */
5315
5316 const char *
5317 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5318 {
5319 rtx xops[3];
5320
5321 xops[0] = dest;
5322 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5323
5324 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5325 {
5326 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5327
5328 if (!flag_pic)
5329 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5330 else
5331 output_asm_insn ("call\t%a2", xops);
5332
5333 #if TARGET_MACHO
5334 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5335 is what will be referenced by the Mach-O PIC subsystem. */
5336 if (!label)
5337 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5338 #endif
5339
5340 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5341 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5342
5343 if (flag_pic)
5344 output_asm_insn ("pop{l}\t%0", xops);
5345 }
5346 else
5347 {
5348 char name[32];
5349 get_pc_thunk_name (name, REGNO (dest));
5350 pic_labels_used |= 1 << REGNO (dest);
5351
5352 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5353 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5354 output_asm_insn ("call\t%X2", xops);
5355 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5356 is what will be referenced by the Mach-O PIC subsystem. */
5357 #if TARGET_MACHO
5358 if (!label)
5359 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5360 else
5361 targetm.asm_out.internal_label (asm_out_file, "L",
5362 CODE_LABEL_NUMBER (label));
5363 #endif
5364 }
5365
5366 if (TARGET_MACHO)
5367 return "";
5368
5369 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5370 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5371 else
5372 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5373
5374 return "";
5375 }
5376
5377 /* Generate an "push" pattern for input ARG. */
5378
5379 static rtx
5380 gen_push (rtx arg)
5381 {
5382 return gen_rtx_SET (VOIDmode,
5383 gen_rtx_MEM (Pmode,
5384 gen_rtx_PRE_DEC (Pmode,
5385 stack_pointer_rtx)),
5386 arg);
5387 }
5388
5389 /* Return >= 0 if there is an unused call-clobbered register available
5390 for the entire function. */
5391
5392 static unsigned int
5393 ix86_select_alt_pic_regnum (void)
5394 {
5395 if (current_function_is_leaf && !current_function_profile
5396 && !ix86_current_function_calls_tls_descriptor)
5397 {
5398 int i;
5399 for (i = 2; i >= 0; --i)
5400 if (!regs_ever_live[i])
5401 return i;
5402 }
5403
5404 return INVALID_REGNUM;
5405 }
5406
5407 /* Return 1 if we need to save REGNO. */
5408 static int
5409 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5410 {
5411 if (pic_offset_table_rtx
5412 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5413 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5414 || current_function_profile
5415 || current_function_calls_eh_return
5416 || current_function_uses_const_pool))
5417 {
5418 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5419 return 0;
5420 return 1;
5421 }
5422
5423 if (current_function_calls_eh_return && maybe_eh_return)
5424 {
5425 unsigned i;
5426 for (i = 0; ; i++)
5427 {
5428 unsigned test = EH_RETURN_DATA_REGNO (i);
5429 if (test == INVALID_REGNUM)
5430 break;
5431 if (test == regno)
5432 return 1;
5433 }
5434 }
5435
5436 if (cfun->machine->force_align_arg_pointer
5437 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5438 return 1;
5439
5440 return (regs_ever_live[regno]
5441 && !call_used_regs[regno]
5442 && !fixed_regs[regno]
5443 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5444 }
5445
5446 /* Return number of registers to be saved on the stack. */
5447
5448 static int
5449 ix86_nsaved_regs (void)
5450 {
5451 int nregs = 0;
5452 int regno;
5453
5454 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5455 if (ix86_save_reg (regno, true))
5456 nregs++;
5457 return nregs;
5458 }
5459
5460 /* Return the offset between two registers, one to be eliminated, and the other
5461 its replacement, at the start of a routine. */
5462
5463 HOST_WIDE_INT
5464 ix86_initial_elimination_offset (int from, int to)
5465 {
5466 struct ix86_frame frame;
5467 ix86_compute_frame_layout (&frame);
5468
5469 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5470 return frame.hard_frame_pointer_offset;
5471 else if (from == FRAME_POINTER_REGNUM
5472 && to == HARD_FRAME_POINTER_REGNUM)
5473 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5474 else
5475 {
5476 gcc_assert (to == STACK_POINTER_REGNUM);
5477
5478 if (from == ARG_POINTER_REGNUM)
5479 return frame.stack_pointer_offset;
5480
5481 gcc_assert (from == FRAME_POINTER_REGNUM);
5482 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5483 }
5484 }
5485
5486 /* Fill structure ix86_frame about frame of currently computed function. */
5487
5488 static void
5489 ix86_compute_frame_layout (struct ix86_frame *frame)
5490 {
5491 HOST_WIDE_INT total_size;
5492 unsigned int stack_alignment_needed;
5493 HOST_WIDE_INT offset;
5494 unsigned int preferred_alignment;
5495 HOST_WIDE_INT size = get_frame_size ();
5496
5497 frame->nregs = ix86_nsaved_regs ();
5498 total_size = size;
5499
5500 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5501 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5502
5503 /* During reload iteration the amount of registers saved can change.
5504 Recompute the value as needed. Do not recompute when amount of registers
5505 didn't change as reload does multiple calls to the function and does not
5506 expect the decision to change within single iteration. */
5507 if (!optimize_size
5508 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5509 {
5510 int count = frame->nregs;
5511
5512 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5513 /* The fast prologue uses move instead of push to save registers. This
5514 is significantly longer, but also executes faster as modern hardware
5515 can execute the moves in parallel, but can't do that for push/pop.
5516
5517 Be careful about choosing what prologue to emit: When function takes
5518 many instructions to execute we may use slow version as well as in
5519 case function is known to be outside hot spot (this is known with
5520 feedback only). Weight the size of function by number of registers
5521 to save as it is cheap to use one or two push instructions but very
5522 slow to use many of them. */
5523 if (count)
5524 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5525 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5526 || (flag_branch_probabilities
5527 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5528 cfun->machine->use_fast_prologue_epilogue = false;
5529 else
5530 cfun->machine->use_fast_prologue_epilogue
5531 = !expensive_function_p (count);
5532 }
5533 if (TARGET_PROLOGUE_USING_MOVE
5534 && cfun->machine->use_fast_prologue_epilogue)
5535 frame->save_regs_using_mov = true;
5536 else
5537 frame->save_regs_using_mov = false;
5538
5539
5540 /* Skip return address and saved base pointer. */
5541 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5542
5543 frame->hard_frame_pointer_offset = offset;
5544
5545 /* Do some sanity checking of stack_alignment_needed and
5546 preferred_alignment, since i386 port is the only using those features
5547 that may break easily. */
5548
5549 gcc_assert (!size || stack_alignment_needed);
5550 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5551 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5552 gcc_assert (stack_alignment_needed
5553 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5554
5555 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5556 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5557
5558 /* Register save area */
5559 offset += frame->nregs * UNITS_PER_WORD;
5560
5561 /* Va-arg area */
5562 if (ix86_save_varrargs_registers)
5563 {
5564 offset += X86_64_VARARGS_SIZE;
5565 frame->va_arg_size = X86_64_VARARGS_SIZE;
5566 }
5567 else
5568 frame->va_arg_size = 0;
5569
5570 /* Align start of frame for local function. */
5571 frame->padding1 = ((offset + stack_alignment_needed - 1)
5572 & -stack_alignment_needed) - offset;
5573
5574 offset += frame->padding1;
5575
5576 /* Frame pointer points here. */
5577 frame->frame_pointer_offset = offset;
5578
5579 offset += size;
5580
5581 /* Add outgoing arguments area. Can be skipped if we eliminated
5582 all the function calls as dead code.
5583 Skipping is however impossible when function calls alloca. Alloca
5584 expander assumes that last current_function_outgoing_args_size
5585 of stack frame are unused. */
5586 if (ACCUMULATE_OUTGOING_ARGS
5587 && (!current_function_is_leaf || current_function_calls_alloca
5588 || ix86_current_function_calls_tls_descriptor))
5589 {
5590 offset += current_function_outgoing_args_size;
5591 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5592 }
5593 else
5594 frame->outgoing_arguments_size = 0;
5595
5596 /* Align stack boundary. Only needed if we're calling another function
5597 or using alloca. */
5598 if (!current_function_is_leaf || current_function_calls_alloca
5599 || ix86_current_function_calls_tls_descriptor)
5600 frame->padding2 = ((offset + preferred_alignment - 1)
5601 & -preferred_alignment) - offset;
5602 else
5603 frame->padding2 = 0;
5604
5605 offset += frame->padding2;
5606
5607 /* We've reached end of stack frame. */
5608 frame->stack_pointer_offset = offset;
5609
5610 /* Size prologue needs to allocate. */
5611 frame->to_allocate =
5612 (size + frame->padding1 + frame->padding2
5613 + frame->outgoing_arguments_size + frame->va_arg_size);
5614
5615 if ((!frame->to_allocate && frame->nregs <= 1)
5616 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5617 frame->save_regs_using_mov = false;
5618
5619 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5620 && current_function_is_leaf
5621 && !ix86_current_function_calls_tls_descriptor)
5622 {
5623 frame->red_zone_size = frame->to_allocate;
5624 if (frame->save_regs_using_mov)
5625 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5626 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5627 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5628 }
5629 else
5630 frame->red_zone_size = 0;
5631 frame->to_allocate -= frame->red_zone_size;
5632 frame->stack_pointer_offset -= frame->red_zone_size;
5633 #if 0
5634 fprintf (stderr, "\n");
5635 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5636 fprintf (stderr, "size: %ld\n", (long)size);
5637 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5638 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5639 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5640 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5641 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5642 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5643 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5644 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5645 (long)frame->hard_frame_pointer_offset);
5646 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5647 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5648 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5649 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5650 #endif
5651 }
5652
5653 /* Emit code to save registers in the prologue. */
5654
5655 static void
5656 ix86_emit_save_regs (void)
5657 {
5658 unsigned int regno;
5659 rtx insn;
5660
5661 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5662 if (ix86_save_reg (regno, true))
5663 {
5664 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5665 RTX_FRAME_RELATED_P (insn) = 1;
5666 }
5667 }
5668
5669 /* Emit code to save registers using MOV insns. First register
5670 is restored from POINTER + OFFSET. */
5671 static void
5672 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5673 {
5674 unsigned int regno;
5675 rtx insn;
5676
5677 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5678 if (ix86_save_reg (regno, true))
5679 {
5680 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5681 Pmode, offset),
5682 gen_rtx_REG (Pmode, regno));
5683 RTX_FRAME_RELATED_P (insn) = 1;
5684 offset += UNITS_PER_WORD;
5685 }
5686 }
5687
5688 /* Expand prologue or epilogue stack adjustment.
5689 The pattern exist to put a dependency on all ebp-based memory accesses.
5690 STYLE should be negative if instructions should be marked as frame related,
5691 zero if %r11 register is live and cannot be freely used and positive
5692 otherwise. */
5693
5694 static void
5695 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5696 {
5697 rtx insn;
5698
5699 if (! TARGET_64BIT)
5700 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5701 else if (x86_64_immediate_operand (offset, DImode))
5702 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5703 else
5704 {
5705 rtx r11;
5706 /* r11 is used by indirect sibcall return as well, set before the
5707 epilogue and used after the epilogue. ATM indirect sibcall
5708 shouldn't be used together with huge frame sizes in one
5709 function because of the frame_size check in sibcall.c. */
5710 gcc_assert (style);
5711 r11 = gen_rtx_REG (DImode, R11_REG);
5712 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5713 if (style < 0)
5714 RTX_FRAME_RELATED_P (insn) = 1;
5715 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5716 offset));
5717 }
5718 if (style < 0)
5719 RTX_FRAME_RELATED_P (insn) = 1;
5720 }
5721
5722 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5723
5724 static rtx
5725 ix86_internal_arg_pointer (void)
5726 {
5727 bool has_force_align_arg_pointer =
5728 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5729 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5730 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5731 && DECL_NAME (current_function_decl)
5732 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5733 && DECL_FILE_SCOPE_P (current_function_decl))
5734 || ix86_force_align_arg_pointer
5735 || has_force_align_arg_pointer)
5736 {
5737 /* Nested functions can't realign the stack due to a register
5738 conflict. */
5739 if (DECL_CONTEXT (current_function_decl)
5740 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5741 {
5742 if (ix86_force_align_arg_pointer)
5743 warning (0, "-mstackrealign ignored for nested functions");
5744 if (has_force_align_arg_pointer)
5745 error ("%s not supported for nested functions",
5746 ix86_force_align_arg_pointer_string);
5747 return virtual_incoming_args_rtx;
5748 }
5749 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5750 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5751 }
5752 else
5753 return virtual_incoming_args_rtx;
5754 }
5755
5756 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5757 This is called from dwarf2out.c to emit call frame instructions
5758 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5759 static void
5760 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5761 {
5762 rtx unspec = SET_SRC (pattern);
5763 gcc_assert (GET_CODE (unspec) == UNSPEC);
5764
5765 switch (index)
5766 {
5767 case UNSPEC_REG_SAVE:
5768 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5769 SET_DEST (pattern));
5770 break;
5771 case UNSPEC_DEF_CFA:
5772 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5773 INTVAL (XVECEXP (unspec, 0, 0)));
5774 break;
5775 default:
5776 gcc_unreachable ();
5777 }
5778 }
5779
5780 /* Expand the prologue into a bunch of separate insns. */
5781
5782 void
5783 ix86_expand_prologue (void)
5784 {
5785 rtx insn;
5786 bool pic_reg_used;
5787 struct ix86_frame frame;
5788 HOST_WIDE_INT allocate;
5789
5790 ix86_compute_frame_layout (&frame);
5791
5792 if (cfun->machine->force_align_arg_pointer)
5793 {
5794 rtx x, y;
5795
5796 /* Grab the argument pointer. */
5797 x = plus_constant (stack_pointer_rtx, 4);
5798 y = cfun->machine->force_align_arg_pointer;
5799 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5800 RTX_FRAME_RELATED_P (insn) = 1;
5801
5802 /* The unwind info consists of two parts: install the fafp as the cfa,
5803 and record the fafp as the "save register" of the stack pointer.
5804 The later is there in order that the unwinder can see where it
5805 should restore the stack pointer across the and insn. */
5806 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5807 x = gen_rtx_SET (VOIDmode, y, x);
5808 RTX_FRAME_RELATED_P (x) = 1;
5809 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5810 UNSPEC_REG_SAVE);
5811 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5812 RTX_FRAME_RELATED_P (y) = 1;
5813 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5814 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5815 REG_NOTES (insn) = x;
5816
5817 /* Align the stack. */
5818 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5819 GEN_INT (-16)));
5820
5821 /* And here we cheat like madmen with the unwind info. We force the
5822 cfa register back to sp+4, which is exactly what it was at the
5823 start of the function. Re-pushing the return address results in
5824 the return at the same spot relative to the cfa, and thus is
5825 correct wrt the unwind info. */
5826 x = cfun->machine->force_align_arg_pointer;
5827 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5828 insn = emit_insn (gen_push (x));
5829 RTX_FRAME_RELATED_P (insn) = 1;
5830
5831 x = GEN_INT (4);
5832 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5833 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5834 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5835 REG_NOTES (insn) = x;
5836 }
5837
5838 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5839 slower on all targets. Also sdb doesn't like it. */
5840
5841 if (frame_pointer_needed)
5842 {
5843 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5844 RTX_FRAME_RELATED_P (insn) = 1;
5845
5846 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5847 RTX_FRAME_RELATED_P (insn) = 1;
5848 }
5849
5850 allocate = frame.to_allocate;
5851
5852 if (!frame.save_regs_using_mov)
5853 ix86_emit_save_regs ();
5854 else
5855 allocate += frame.nregs * UNITS_PER_WORD;
5856
5857 /* When using red zone we may start register saving before allocating
5858 the stack frame saving one cycle of the prologue. */
5859 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5860 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5861 : stack_pointer_rtx,
5862 -frame.nregs * UNITS_PER_WORD);
5863
5864 if (allocate == 0)
5865 ;
5866 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5867 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5868 GEN_INT (-allocate), -1);
5869 else
5870 {
5871 /* Only valid for Win32. */
5872 rtx eax = gen_rtx_REG (SImode, 0);
5873 bool eax_live = ix86_eax_live_at_start_p ();
5874 rtx t;
5875
5876 gcc_assert (!TARGET_64BIT);
5877
5878 if (eax_live)
5879 {
5880 emit_insn (gen_push (eax));
5881 allocate -= 4;
5882 }
5883
5884 emit_move_insn (eax, GEN_INT (allocate));
5885
5886 insn = emit_insn (gen_allocate_stack_worker (eax));
5887 RTX_FRAME_RELATED_P (insn) = 1;
5888 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5889 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5890 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5891 t, REG_NOTES (insn));
5892
5893 if (eax_live)
5894 {
5895 if (frame_pointer_needed)
5896 t = plus_constant (hard_frame_pointer_rtx,
5897 allocate
5898 - frame.to_allocate
5899 - frame.nregs * UNITS_PER_WORD);
5900 else
5901 t = plus_constant (stack_pointer_rtx, allocate);
5902 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5903 }
5904 }
5905
5906 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5907 {
5908 if (!frame_pointer_needed || !frame.to_allocate)
5909 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5910 else
5911 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5912 -frame.nregs * UNITS_PER_WORD);
5913 }
5914
5915 pic_reg_used = false;
5916 if (pic_offset_table_rtx
5917 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5918 || current_function_profile))
5919 {
5920 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5921
5922 if (alt_pic_reg_used != INVALID_REGNUM)
5923 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5924
5925 pic_reg_used = true;
5926 }
5927
5928 if (pic_reg_used)
5929 {
5930 if (TARGET_64BIT)
5931 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5932 else
5933 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5934
5935 /* Even with accurate pre-reload life analysis, we can wind up
5936 deleting all references to the pic register after reload.
5937 Consider if cross-jumping unifies two sides of a branch
5938 controlled by a comparison vs the only read from a global.
5939 In which case, allow the set_got to be deleted, though we're
5940 too late to do anything about the ebx save in the prologue. */
5941 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5942 }
5943
5944 /* Prevent function calls from be scheduled before the call to mcount.
5945 In the pic_reg_used case, make sure that the got load isn't deleted. */
5946 if (current_function_profile)
5947 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5948 }
5949
5950 /* Emit code to restore saved registers using MOV insns. First register
5951 is restored from POINTER + OFFSET. */
5952 static void
5953 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5954 int maybe_eh_return)
5955 {
5956 int regno;
5957 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5958
5959 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5960 if (ix86_save_reg (regno, maybe_eh_return))
5961 {
5962 /* Ensure that adjust_address won't be forced to produce pointer
5963 out of range allowed by x86-64 instruction set. */
5964 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5965 {
5966 rtx r11;
5967
5968 r11 = gen_rtx_REG (DImode, R11_REG);
5969 emit_move_insn (r11, GEN_INT (offset));
5970 emit_insn (gen_adddi3 (r11, r11, pointer));
5971 base_address = gen_rtx_MEM (Pmode, r11);
5972 offset = 0;
5973 }
5974 emit_move_insn (gen_rtx_REG (Pmode, regno),
5975 adjust_address (base_address, Pmode, offset));
5976 offset += UNITS_PER_WORD;
5977 }
5978 }
5979
5980 /* Restore function stack, frame, and registers. */
5981
5982 void
5983 ix86_expand_epilogue (int style)
5984 {
5985 int regno;
5986 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5987 struct ix86_frame frame;
5988 HOST_WIDE_INT offset;
5989
5990 ix86_compute_frame_layout (&frame);
5991
5992 /* Calculate start of saved registers relative to ebp. Special care
5993 must be taken for the normal return case of a function using
5994 eh_return: the eax and edx registers are marked as saved, but not
5995 restored along this path. */
5996 offset = frame.nregs;
5997 if (current_function_calls_eh_return && style != 2)
5998 offset -= 2;
5999 offset *= -UNITS_PER_WORD;
6000
6001 /* If we're only restoring one register and sp is not valid then
6002 using a move instruction to restore the register since it's
6003 less work than reloading sp and popping the register.
6004
6005 The default code result in stack adjustment using add/lea instruction,
6006 while this code results in LEAVE instruction (or discrete equivalent),
6007 so it is profitable in some other cases as well. Especially when there
6008 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6009 and there is exactly one register to pop. This heuristic may need some
6010 tuning in future. */
6011 if ((!sp_valid && frame.nregs <= 1)
6012 || (TARGET_EPILOGUE_USING_MOVE
6013 && cfun->machine->use_fast_prologue_epilogue
6014 && (frame.nregs > 1 || frame.to_allocate))
6015 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6016 || (frame_pointer_needed && TARGET_USE_LEAVE
6017 && cfun->machine->use_fast_prologue_epilogue
6018 && frame.nregs == 1)
6019 || current_function_calls_eh_return)
6020 {
6021 /* Restore registers. We can use ebp or esp to address the memory
6022 locations. If both are available, default to ebp, since offsets
6023 are known to be small. Only exception is esp pointing directly to the
6024 end of block of saved registers, where we may simplify addressing
6025 mode. */
6026
6027 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6028 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6029 frame.to_allocate, style == 2);
6030 else
6031 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6032 offset, style == 2);
6033
6034 /* eh_return epilogues need %ecx added to the stack pointer. */
6035 if (style == 2)
6036 {
6037 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6038
6039 if (frame_pointer_needed)
6040 {
6041 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6042 tmp = plus_constant (tmp, UNITS_PER_WORD);
6043 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6044
6045 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6046 emit_move_insn (hard_frame_pointer_rtx, tmp);
6047
6048 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6049 const0_rtx, style);
6050 }
6051 else
6052 {
6053 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6054 tmp = plus_constant (tmp, (frame.to_allocate
6055 + frame.nregs * UNITS_PER_WORD));
6056 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6057 }
6058 }
6059 else if (!frame_pointer_needed)
6060 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6061 GEN_INT (frame.to_allocate
6062 + frame.nregs * UNITS_PER_WORD),
6063 style);
6064 /* If not an i386, mov & pop is faster than "leave". */
6065 else if (TARGET_USE_LEAVE || optimize_size
6066 || !cfun->machine->use_fast_prologue_epilogue)
6067 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6068 else
6069 {
6070 pro_epilogue_adjust_stack (stack_pointer_rtx,
6071 hard_frame_pointer_rtx,
6072 const0_rtx, style);
6073 if (TARGET_64BIT)
6074 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6075 else
6076 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6077 }
6078 }
6079 else
6080 {
6081 /* First step is to deallocate the stack frame so that we can
6082 pop the registers. */
6083 if (!sp_valid)
6084 {
6085 gcc_assert (frame_pointer_needed);
6086 pro_epilogue_adjust_stack (stack_pointer_rtx,
6087 hard_frame_pointer_rtx,
6088 GEN_INT (offset), style);
6089 }
6090 else if (frame.to_allocate)
6091 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6092 GEN_INT (frame.to_allocate), style);
6093
6094 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6095 if (ix86_save_reg (regno, false))
6096 {
6097 if (TARGET_64BIT)
6098 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6099 else
6100 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6101 }
6102 if (frame_pointer_needed)
6103 {
6104 /* Leave results in shorter dependency chains on CPUs that are
6105 able to grok it fast. */
6106 if (TARGET_USE_LEAVE)
6107 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6108 else if (TARGET_64BIT)
6109 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6110 else
6111 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6112 }
6113 }
6114
6115 if (cfun->machine->force_align_arg_pointer)
6116 {
6117 emit_insn (gen_addsi3 (stack_pointer_rtx,
6118 cfun->machine->force_align_arg_pointer,
6119 GEN_INT (-4)));
6120 }
6121
6122 /* Sibcall epilogues don't want a return instruction. */
6123 if (style == 0)
6124 return;
6125
6126 if (current_function_pops_args && current_function_args_size)
6127 {
6128 rtx popc = GEN_INT (current_function_pops_args);
6129
6130 /* i386 can only pop 64K bytes. If asked to pop more, pop
6131 return address, do explicit add, and jump indirectly to the
6132 caller. */
6133
6134 if (current_function_pops_args >= 65536)
6135 {
6136 rtx ecx = gen_rtx_REG (SImode, 2);
6137
6138 /* There is no "pascal" calling convention in 64bit ABI. */
6139 gcc_assert (!TARGET_64BIT);
6140
6141 emit_insn (gen_popsi1 (ecx));
6142 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6143 emit_jump_insn (gen_return_indirect_internal (ecx));
6144 }
6145 else
6146 emit_jump_insn (gen_return_pop_internal (popc));
6147 }
6148 else
6149 emit_jump_insn (gen_return_internal ());
6150 }
6151
6152 /* Reset from the function's potential modifications. */
6153
6154 static void
6155 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6156 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6157 {
6158 if (pic_offset_table_rtx)
6159 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6160 #if TARGET_MACHO
6161 /* Mach-O doesn't support labels at the end of objects, so if
6162 it looks like we might want one, insert a NOP. */
6163 {
6164 rtx insn = get_last_insn ();
6165 while (insn
6166 && NOTE_P (insn)
6167 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6168 insn = PREV_INSN (insn);
6169 if (insn
6170 && (LABEL_P (insn)
6171 || (NOTE_P (insn)
6172 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6173 fputs ("\tnop\n", file);
6174 }
6175 #endif
6176
6177 }
6178 \f
6179 /* Extract the parts of an RTL expression that is a valid memory address
6180 for an instruction. Return 0 if the structure of the address is
6181 grossly off. Return -1 if the address contains ASHIFT, so it is not
6182 strictly valid, but still used for computing length of lea instruction. */
6183
6184 int
6185 ix86_decompose_address (rtx addr, struct ix86_address *out)
6186 {
6187 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6188 rtx base_reg, index_reg;
6189 HOST_WIDE_INT scale = 1;
6190 rtx scale_rtx = NULL_RTX;
6191 int retval = 1;
6192 enum ix86_address_seg seg = SEG_DEFAULT;
6193
6194 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6195 base = addr;
6196 else if (GET_CODE (addr) == PLUS)
6197 {
6198 rtx addends[4], op;
6199 int n = 0, i;
6200
6201 op = addr;
6202 do
6203 {
6204 if (n >= 4)
6205 return 0;
6206 addends[n++] = XEXP (op, 1);
6207 op = XEXP (op, 0);
6208 }
6209 while (GET_CODE (op) == PLUS);
6210 if (n >= 4)
6211 return 0;
6212 addends[n] = op;
6213
6214 for (i = n; i >= 0; --i)
6215 {
6216 op = addends[i];
6217 switch (GET_CODE (op))
6218 {
6219 case MULT:
6220 if (index)
6221 return 0;
6222 index = XEXP (op, 0);
6223 scale_rtx = XEXP (op, 1);
6224 break;
6225
6226 case UNSPEC:
6227 if (XINT (op, 1) == UNSPEC_TP
6228 && TARGET_TLS_DIRECT_SEG_REFS
6229 && seg == SEG_DEFAULT)
6230 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6231 else
6232 return 0;
6233 break;
6234
6235 case REG:
6236 case SUBREG:
6237 if (!base)
6238 base = op;
6239 else if (!index)
6240 index = op;
6241 else
6242 return 0;
6243 break;
6244
6245 case CONST:
6246 case CONST_INT:
6247 case SYMBOL_REF:
6248 case LABEL_REF:
6249 if (disp)
6250 return 0;
6251 disp = op;
6252 break;
6253
6254 default:
6255 return 0;
6256 }
6257 }
6258 }
6259 else if (GET_CODE (addr) == MULT)
6260 {
6261 index = XEXP (addr, 0); /* index*scale */
6262 scale_rtx = XEXP (addr, 1);
6263 }
6264 else if (GET_CODE (addr) == ASHIFT)
6265 {
6266 rtx tmp;
6267
6268 /* We're called for lea too, which implements ashift on occasion. */
6269 index = XEXP (addr, 0);
6270 tmp = XEXP (addr, 1);
6271 if (!CONST_INT_P (tmp))
6272 return 0;
6273 scale = INTVAL (tmp);
6274 if ((unsigned HOST_WIDE_INT) scale > 3)
6275 return 0;
6276 scale = 1 << scale;
6277 retval = -1;
6278 }
6279 else
6280 disp = addr; /* displacement */
6281
6282 /* Extract the integral value of scale. */
6283 if (scale_rtx)
6284 {
6285 if (!CONST_INT_P (scale_rtx))
6286 return 0;
6287 scale = INTVAL (scale_rtx);
6288 }
6289
6290 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6291 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6292
6293 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6294 if (base_reg && index_reg && scale == 1
6295 && (index_reg == arg_pointer_rtx
6296 || index_reg == frame_pointer_rtx
6297 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6298 {
6299 rtx tmp;
6300 tmp = base, base = index, index = tmp;
6301 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6302 }
6303
6304 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6305 if ((base_reg == hard_frame_pointer_rtx
6306 || base_reg == frame_pointer_rtx
6307 || base_reg == arg_pointer_rtx) && !disp)
6308 disp = const0_rtx;
6309
6310 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6311 Avoid this by transforming to [%esi+0]. */
6312 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6313 && base_reg && !index_reg && !disp
6314 && REG_P (base_reg)
6315 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6316 disp = const0_rtx;
6317
6318 /* Special case: encode reg+reg instead of reg*2. */
6319 if (!base && index && scale && scale == 2)
6320 base = index, base_reg = index_reg, scale = 1;
6321
6322 /* Special case: scaling cannot be encoded without base or displacement. */
6323 if (!base && !disp && index && scale != 1)
6324 disp = const0_rtx;
6325
6326 out->base = base;
6327 out->index = index;
6328 out->disp = disp;
6329 out->scale = scale;
6330 out->seg = seg;
6331
6332 return retval;
6333 }
6334 \f
6335 /* Return cost of the memory address x.
6336 For i386, it is better to use a complex address than let gcc copy
6337 the address into a reg and make a new pseudo. But not if the address
6338 requires to two regs - that would mean more pseudos with longer
6339 lifetimes. */
6340 static int
6341 ix86_address_cost (rtx x)
6342 {
6343 struct ix86_address parts;
6344 int cost = 1;
6345 int ok = ix86_decompose_address (x, &parts);
6346
6347 gcc_assert (ok);
6348
6349 if (parts.base && GET_CODE (parts.base) == SUBREG)
6350 parts.base = SUBREG_REG (parts.base);
6351 if (parts.index && GET_CODE (parts.index) == SUBREG)
6352 parts.index = SUBREG_REG (parts.index);
6353
6354 /* More complex memory references are better. */
6355 if (parts.disp && parts.disp != const0_rtx)
6356 cost--;
6357 if (parts.seg != SEG_DEFAULT)
6358 cost--;
6359
6360 /* Attempt to minimize number of registers in the address. */
6361 if ((parts.base
6362 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6363 || (parts.index
6364 && (!REG_P (parts.index)
6365 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6366 cost++;
6367
6368 if (parts.base
6369 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6370 && parts.index
6371 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6372 && parts.base != parts.index)
6373 cost++;
6374
6375 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6376 since it's predecode logic can't detect the length of instructions
6377 and it degenerates to vector decoded. Increase cost of such
6378 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6379 to split such addresses or even refuse such addresses at all.
6380
6381 Following addressing modes are affected:
6382 [base+scale*index]
6383 [scale*index+disp]
6384 [base+index]
6385
6386 The first and last case may be avoidable by explicitly coding the zero in
6387 memory address, but I don't have AMD-K6 machine handy to check this
6388 theory. */
6389
6390 if (TARGET_K6
6391 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6392 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6393 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6394 cost += 10;
6395
6396 return cost;
6397 }
6398 \f
6399 /* If X is a machine specific address (i.e. a symbol or label being
6400 referenced as a displacement from the GOT implemented using an
6401 UNSPEC), then return the base term. Otherwise return X. */
6402
6403 rtx
6404 ix86_find_base_term (rtx x)
6405 {
6406 rtx term;
6407
6408 if (TARGET_64BIT)
6409 {
6410 if (GET_CODE (x) != CONST)
6411 return x;
6412 term = XEXP (x, 0);
6413 if (GET_CODE (term) == PLUS
6414 && (CONST_INT_P (XEXP (term, 1))
6415 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6416 term = XEXP (term, 0);
6417 if (GET_CODE (term) != UNSPEC
6418 || XINT (term, 1) != UNSPEC_GOTPCREL)
6419 return x;
6420
6421 term = XVECEXP (term, 0, 0);
6422
6423 if (GET_CODE (term) != SYMBOL_REF
6424 && GET_CODE (term) != LABEL_REF)
6425 return x;
6426
6427 return term;
6428 }
6429
6430 term = ix86_delegitimize_address (x);
6431
6432 if (GET_CODE (term) != SYMBOL_REF
6433 && GET_CODE (term) != LABEL_REF)
6434 return x;
6435
6436 return term;
6437 }
6438
6439 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6440 this is used for to form addresses to local data when -fPIC is in
6441 use. */
6442
6443 static bool
6444 darwin_local_data_pic (rtx disp)
6445 {
6446 if (GET_CODE (disp) == MINUS)
6447 {
6448 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6449 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6450 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6451 {
6452 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6453 if (! strcmp (sym_name, "<pic base>"))
6454 return true;
6455 }
6456 }
6457
6458 return false;
6459 }
6460 \f
6461 /* Determine if a given RTX is a valid constant. We already know this
6462 satisfies CONSTANT_P. */
6463
6464 bool
6465 legitimate_constant_p (rtx x)
6466 {
6467 switch (GET_CODE (x))
6468 {
6469 case CONST:
6470 x = XEXP (x, 0);
6471
6472 if (GET_CODE (x) == PLUS)
6473 {
6474 if (!CONST_INT_P (XEXP (x, 1)))
6475 return false;
6476 x = XEXP (x, 0);
6477 }
6478
6479 if (TARGET_MACHO && darwin_local_data_pic (x))
6480 return true;
6481
6482 /* Only some unspecs are valid as "constants". */
6483 if (GET_CODE (x) == UNSPEC)
6484 switch (XINT (x, 1))
6485 {
6486 case UNSPEC_GOTOFF:
6487 return TARGET_64BIT;
6488 case UNSPEC_TPOFF:
6489 case UNSPEC_NTPOFF:
6490 x = XVECEXP (x, 0, 0);
6491 return (GET_CODE (x) == SYMBOL_REF
6492 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6493 case UNSPEC_DTPOFF:
6494 x = XVECEXP (x, 0, 0);
6495 return (GET_CODE (x) == SYMBOL_REF
6496 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6497 default:
6498 return false;
6499 }
6500
6501 /* We must have drilled down to a symbol. */
6502 if (GET_CODE (x) == LABEL_REF)
6503 return true;
6504 if (GET_CODE (x) != SYMBOL_REF)
6505 return false;
6506 /* FALLTHRU */
6507
6508 case SYMBOL_REF:
6509 /* TLS symbols are never valid. */
6510 if (SYMBOL_REF_TLS_MODEL (x))
6511 return false;
6512 break;
6513
6514 case CONST_DOUBLE:
6515 if (GET_MODE (x) == TImode
6516 && x != CONST0_RTX (TImode)
6517 && !TARGET_64BIT)
6518 return false;
6519 break;
6520
6521 case CONST_VECTOR:
6522 if (x == CONST0_RTX (GET_MODE (x)))
6523 return true;
6524 return false;
6525
6526 default:
6527 break;
6528 }
6529
6530 /* Otherwise we handle everything else in the move patterns. */
6531 return true;
6532 }
6533
6534 /* Determine if it's legal to put X into the constant pool. This
6535 is not possible for the address of thread-local symbols, which
6536 is checked above. */
6537
6538 static bool
6539 ix86_cannot_force_const_mem (rtx x)
6540 {
6541 /* We can always put integral constants and vectors in memory. */
6542 switch (GET_CODE (x))
6543 {
6544 case CONST_INT:
6545 case CONST_DOUBLE:
6546 case CONST_VECTOR:
6547 return false;
6548
6549 default:
6550 break;
6551 }
6552 return !legitimate_constant_p (x);
6553 }
6554
6555 /* Determine if a given RTX is a valid constant address. */
6556
6557 bool
6558 constant_address_p (rtx x)
6559 {
6560 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6561 }
6562
6563 /* Nonzero if the constant value X is a legitimate general operand
6564 when generating PIC code. It is given that flag_pic is on and
6565 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6566
6567 bool
6568 legitimate_pic_operand_p (rtx x)
6569 {
6570 rtx inner;
6571
6572 switch (GET_CODE (x))
6573 {
6574 case CONST:
6575 inner = XEXP (x, 0);
6576 if (GET_CODE (inner) == PLUS
6577 && CONST_INT_P (XEXP (inner, 1)))
6578 inner = XEXP (inner, 0);
6579
6580 /* Only some unspecs are valid as "constants". */
6581 if (GET_CODE (inner) == UNSPEC)
6582 switch (XINT (inner, 1))
6583 {
6584 case UNSPEC_GOTOFF:
6585 return TARGET_64BIT;
6586 case UNSPEC_TPOFF:
6587 x = XVECEXP (inner, 0, 0);
6588 return (GET_CODE (x) == SYMBOL_REF
6589 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6590 default:
6591 return false;
6592 }
6593 /* FALLTHRU */
6594
6595 case SYMBOL_REF:
6596 case LABEL_REF:
6597 return legitimate_pic_address_disp_p (x);
6598
6599 default:
6600 return true;
6601 }
6602 }
6603
6604 /* Determine if a given CONST RTX is a valid memory displacement
6605 in PIC mode. */
6606
6607 int
6608 legitimate_pic_address_disp_p (rtx disp)
6609 {
6610 bool saw_plus;
6611
6612 /* In 64bit mode we can allow direct addresses of symbols and labels
6613 when they are not dynamic symbols. */
6614 if (TARGET_64BIT)
6615 {
6616 rtx op0 = disp, op1;
6617
6618 switch (GET_CODE (disp))
6619 {
6620 case LABEL_REF:
6621 return true;
6622
6623 case CONST:
6624 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6625 break;
6626 op0 = XEXP (XEXP (disp, 0), 0);
6627 op1 = XEXP (XEXP (disp, 0), 1);
6628 if (!CONST_INT_P (op1)
6629 || INTVAL (op1) >= 16*1024*1024
6630 || INTVAL (op1) < -16*1024*1024)
6631 break;
6632 if (GET_CODE (op0) == LABEL_REF)
6633 return true;
6634 if (GET_CODE (op0) != SYMBOL_REF)
6635 break;
6636 /* FALLTHRU */
6637
6638 case SYMBOL_REF:
6639 /* TLS references should always be enclosed in UNSPEC. */
6640 if (SYMBOL_REF_TLS_MODEL (op0))
6641 return false;
6642 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6643 return true;
6644 break;
6645
6646 default:
6647 break;
6648 }
6649 }
6650 if (GET_CODE (disp) != CONST)
6651 return 0;
6652 disp = XEXP (disp, 0);
6653
6654 if (TARGET_64BIT)
6655 {
6656 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6657 of GOT tables. We should not need these anyway. */
6658 if (GET_CODE (disp) != UNSPEC
6659 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6660 && XINT (disp, 1) != UNSPEC_GOTOFF))
6661 return 0;
6662
6663 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6664 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6665 return 0;
6666 return 1;
6667 }
6668
6669 saw_plus = false;
6670 if (GET_CODE (disp) == PLUS)
6671 {
6672 if (!CONST_INT_P (XEXP (disp, 1)))
6673 return 0;
6674 disp = XEXP (disp, 0);
6675 saw_plus = true;
6676 }
6677
6678 if (TARGET_MACHO && darwin_local_data_pic (disp))
6679 return 1;
6680
6681 if (GET_CODE (disp) != UNSPEC)
6682 return 0;
6683
6684 switch (XINT (disp, 1))
6685 {
6686 case UNSPEC_GOT:
6687 if (saw_plus)
6688 return false;
6689 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6690 case UNSPEC_GOTOFF:
6691 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6692 While ABI specify also 32bit relocation but we don't produce it in
6693 small PIC model at all. */
6694 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6695 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6696 && !TARGET_64BIT)
6697 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6698 return false;
6699 case UNSPEC_GOTTPOFF:
6700 case UNSPEC_GOTNTPOFF:
6701 case UNSPEC_INDNTPOFF:
6702 if (saw_plus)
6703 return false;
6704 disp = XVECEXP (disp, 0, 0);
6705 return (GET_CODE (disp) == SYMBOL_REF
6706 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6707 case UNSPEC_NTPOFF:
6708 disp = XVECEXP (disp, 0, 0);
6709 return (GET_CODE (disp) == SYMBOL_REF
6710 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6711 case UNSPEC_DTPOFF:
6712 disp = XVECEXP (disp, 0, 0);
6713 return (GET_CODE (disp) == SYMBOL_REF
6714 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6715 }
6716
6717 return 0;
6718 }
6719
6720 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6721 memory address for an instruction. The MODE argument is the machine mode
6722 for the MEM expression that wants to use this address.
6723
6724 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6725 convert common non-canonical forms to canonical form so that they will
6726 be recognized. */
6727
6728 int
6729 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6730 {
6731 struct ix86_address parts;
6732 rtx base, index, disp;
6733 HOST_WIDE_INT scale;
6734 const char *reason = NULL;
6735 rtx reason_rtx = NULL_RTX;
6736
6737 if (TARGET_DEBUG_ADDR)
6738 {
6739 fprintf (stderr,
6740 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6741 GET_MODE_NAME (mode), strict);
6742 debug_rtx (addr);
6743 }
6744
6745 if (ix86_decompose_address (addr, &parts) <= 0)
6746 {
6747 reason = "decomposition failed";
6748 goto report_error;
6749 }
6750
6751 base = parts.base;
6752 index = parts.index;
6753 disp = parts.disp;
6754 scale = parts.scale;
6755
6756 /* Validate base register.
6757
6758 Don't allow SUBREG's that span more than a word here. It can lead to spill
6759 failures when the base is one word out of a two word structure, which is
6760 represented internally as a DImode int. */
6761
6762 if (base)
6763 {
6764 rtx reg;
6765 reason_rtx = base;
6766
6767 if (REG_P (base))
6768 reg = base;
6769 else if (GET_CODE (base) == SUBREG
6770 && REG_P (SUBREG_REG (base))
6771 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6772 <= UNITS_PER_WORD)
6773 reg = SUBREG_REG (base);
6774 else
6775 {
6776 reason = "base is not a register";
6777 goto report_error;
6778 }
6779
6780 if (GET_MODE (base) != Pmode)
6781 {
6782 reason = "base is not in Pmode";
6783 goto report_error;
6784 }
6785
6786 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6787 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6788 {
6789 reason = "base is not valid";
6790 goto report_error;
6791 }
6792 }
6793
6794 /* Validate index register.
6795
6796 Don't allow SUBREG's that span more than a word here -- same as above. */
6797
6798 if (index)
6799 {
6800 rtx reg;
6801 reason_rtx = index;
6802
6803 if (REG_P (index))
6804 reg = index;
6805 else if (GET_CODE (index) == SUBREG
6806 && REG_P (SUBREG_REG (index))
6807 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6808 <= UNITS_PER_WORD)
6809 reg = SUBREG_REG (index);
6810 else
6811 {
6812 reason = "index is not a register";
6813 goto report_error;
6814 }
6815
6816 if (GET_MODE (index) != Pmode)
6817 {
6818 reason = "index is not in Pmode";
6819 goto report_error;
6820 }
6821
6822 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6823 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6824 {
6825 reason = "index is not valid";
6826 goto report_error;
6827 }
6828 }
6829
6830 /* Validate scale factor. */
6831 if (scale != 1)
6832 {
6833 reason_rtx = GEN_INT (scale);
6834 if (!index)
6835 {
6836 reason = "scale without index";
6837 goto report_error;
6838 }
6839
6840 if (scale != 2 && scale != 4 && scale != 8)
6841 {
6842 reason = "scale is not a valid multiplier";
6843 goto report_error;
6844 }
6845 }
6846
6847 /* Validate displacement. */
6848 if (disp)
6849 {
6850 reason_rtx = disp;
6851
6852 if (GET_CODE (disp) == CONST
6853 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6854 switch (XINT (XEXP (disp, 0), 1))
6855 {
6856 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6857 used. While ABI specify also 32bit relocations, we don't produce
6858 them at all and use IP relative instead. */
6859 case UNSPEC_GOT:
6860 case UNSPEC_GOTOFF:
6861 gcc_assert (flag_pic);
6862 if (!TARGET_64BIT)
6863 goto is_legitimate_pic;
6864 reason = "64bit address unspec";
6865 goto report_error;
6866
6867 case UNSPEC_GOTPCREL:
6868 gcc_assert (flag_pic);
6869 goto is_legitimate_pic;
6870
6871 case UNSPEC_GOTTPOFF:
6872 case UNSPEC_GOTNTPOFF:
6873 case UNSPEC_INDNTPOFF:
6874 case UNSPEC_NTPOFF:
6875 case UNSPEC_DTPOFF:
6876 break;
6877
6878 default:
6879 reason = "invalid address unspec";
6880 goto report_error;
6881 }
6882
6883 else if (SYMBOLIC_CONST (disp)
6884 && (flag_pic
6885 || (TARGET_MACHO
6886 #if TARGET_MACHO
6887 && MACHOPIC_INDIRECT
6888 && !machopic_operand_p (disp)
6889 #endif
6890 )))
6891 {
6892
6893 is_legitimate_pic:
6894 if (TARGET_64BIT && (index || base))
6895 {
6896 /* foo@dtpoff(%rX) is ok. */
6897 if (GET_CODE (disp) != CONST
6898 || GET_CODE (XEXP (disp, 0)) != PLUS
6899 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6900 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6901 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6902 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6903 {
6904 reason = "non-constant pic memory reference";
6905 goto report_error;
6906 }
6907 }
6908 else if (! legitimate_pic_address_disp_p (disp))
6909 {
6910 reason = "displacement is an invalid pic construct";
6911 goto report_error;
6912 }
6913
6914 /* This code used to verify that a symbolic pic displacement
6915 includes the pic_offset_table_rtx register.
6916
6917 While this is good idea, unfortunately these constructs may
6918 be created by "adds using lea" optimization for incorrect
6919 code like:
6920
6921 int a;
6922 int foo(int i)
6923 {
6924 return *(&a+i);
6925 }
6926
6927 This code is nonsensical, but results in addressing
6928 GOT table with pic_offset_table_rtx base. We can't
6929 just refuse it easily, since it gets matched by
6930 "addsi3" pattern, that later gets split to lea in the
6931 case output register differs from input. While this
6932 can be handled by separate addsi pattern for this case
6933 that never results in lea, this seems to be easier and
6934 correct fix for crash to disable this test. */
6935 }
6936 else if (GET_CODE (disp) != LABEL_REF
6937 && !CONST_INT_P (disp)
6938 && (GET_CODE (disp) != CONST
6939 || !legitimate_constant_p (disp))
6940 && (GET_CODE (disp) != SYMBOL_REF
6941 || !legitimate_constant_p (disp)))
6942 {
6943 reason = "displacement is not constant";
6944 goto report_error;
6945 }
6946 else if (TARGET_64BIT
6947 && !x86_64_immediate_operand (disp, VOIDmode))
6948 {
6949 reason = "displacement is out of range";
6950 goto report_error;
6951 }
6952 }
6953
6954 /* Everything looks valid. */
6955 if (TARGET_DEBUG_ADDR)
6956 fprintf (stderr, "Success.\n");
6957 return TRUE;
6958
6959 report_error:
6960 if (TARGET_DEBUG_ADDR)
6961 {
6962 fprintf (stderr, "Error: %s\n", reason);
6963 debug_rtx (reason_rtx);
6964 }
6965 return FALSE;
6966 }
6967 \f
6968 /* Return a unique alias set for the GOT. */
6969
6970 static HOST_WIDE_INT
6971 ix86_GOT_alias_set (void)
6972 {
6973 static HOST_WIDE_INT set = -1;
6974 if (set == -1)
6975 set = new_alias_set ();
6976 return set;
6977 }
6978
6979 /* Return a legitimate reference for ORIG (an address) using the
6980 register REG. If REG is 0, a new pseudo is generated.
6981
6982 There are two types of references that must be handled:
6983
6984 1. Global data references must load the address from the GOT, via
6985 the PIC reg. An insn is emitted to do this load, and the reg is
6986 returned.
6987
6988 2. Static data references, constant pool addresses, and code labels
6989 compute the address as an offset from the GOT, whose base is in
6990 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6991 differentiate them from global data objects. The returned
6992 address is the PIC reg + an unspec constant.
6993
6994 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6995 reg also appears in the address. */
6996
6997 static rtx
6998 legitimize_pic_address (rtx orig, rtx reg)
6999 {
7000 rtx addr = orig;
7001 rtx new = orig;
7002 rtx base;
7003
7004 #if TARGET_MACHO
7005 if (TARGET_MACHO && !TARGET_64BIT)
7006 {
7007 if (reg == 0)
7008 reg = gen_reg_rtx (Pmode);
7009 /* Use the generic Mach-O PIC machinery. */
7010 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7011 }
7012 #endif
7013
7014 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7015 new = addr;
7016 else if (TARGET_64BIT
7017 && ix86_cmodel != CM_SMALL_PIC
7018 && local_symbolic_operand (addr, Pmode))
7019 {
7020 rtx tmpreg;
7021 /* This symbol may be referenced via a displacement from the PIC
7022 base address (@GOTOFF). */
7023
7024 if (reload_in_progress)
7025 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7026 if (GET_CODE (addr) == CONST)
7027 addr = XEXP (addr, 0);
7028 if (GET_CODE (addr) == PLUS)
7029 {
7030 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7031 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7032 }
7033 else
7034 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7035 new = gen_rtx_CONST (Pmode, new);
7036 if (!reg)
7037 tmpreg = gen_reg_rtx (Pmode);
7038 else
7039 tmpreg = reg;
7040 emit_move_insn (tmpreg, new);
7041
7042 if (reg != 0)
7043 {
7044 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7045 tmpreg, 1, OPTAB_DIRECT);
7046 new = reg;
7047 }
7048 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7049 }
7050 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
7051 {
7052 /* This symbol may be referenced via a displacement from the PIC
7053 base address (@GOTOFF). */
7054
7055 if (reload_in_progress)
7056 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7057 if (GET_CODE (addr) == CONST)
7058 addr = XEXP (addr, 0);
7059 if (GET_CODE (addr) == PLUS)
7060 {
7061 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7062 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7063 }
7064 else
7065 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7066 new = gen_rtx_CONST (Pmode, new);
7067 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7068
7069 if (reg != 0)
7070 {
7071 emit_move_insn (reg, new);
7072 new = reg;
7073 }
7074 }
7075 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7076 {
7077 if (TARGET_64BIT)
7078 {
7079 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7080 new = gen_rtx_CONST (Pmode, new);
7081 new = gen_const_mem (Pmode, new);
7082 set_mem_alias_set (new, ix86_GOT_alias_set ());
7083
7084 if (reg == 0)
7085 reg = gen_reg_rtx (Pmode);
7086 /* Use directly gen_movsi, otherwise the address is loaded
7087 into register for CSE. We don't want to CSE this addresses,
7088 instead we CSE addresses from the GOT table, so skip this. */
7089 emit_insn (gen_movsi (reg, new));
7090 new = reg;
7091 }
7092 else
7093 {
7094 /* This symbol must be referenced via a load from the
7095 Global Offset Table (@GOT). */
7096
7097 if (reload_in_progress)
7098 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7099 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7100 new = gen_rtx_CONST (Pmode, new);
7101 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7102 new = gen_const_mem (Pmode, new);
7103 set_mem_alias_set (new, ix86_GOT_alias_set ());
7104
7105 if (reg == 0)
7106 reg = gen_reg_rtx (Pmode);
7107 emit_move_insn (reg, new);
7108 new = reg;
7109 }
7110 }
7111 else
7112 {
7113 if (CONST_INT_P (addr)
7114 && !x86_64_immediate_operand (addr, VOIDmode))
7115 {
7116 if (reg)
7117 {
7118 emit_move_insn (reg, addr);
7119 new = reg;
7120 }
7121 else
7122 new = force_reg (Pmode, addr);
7123 }
7124 else if (GET_CODE (addr) == CONST)
7125 {
7126 addr = XEXP (addr, 0);
7127
7128 /* We must match stuff we generate before. Assume the only
7129 unspecs that can get here are ours. Not that we could do
7130 anything with them anyway.... */
7131 if (GET_CODE (addr) == UNSPEC
7132 || (GET_CODE (addr) == PLUS
7133 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7134 return orig;
7135 gcc_assert (GET_CODE (addr) == PLUS);
7136 }
7137 if (GET_CODE (addr) == PLUS)
7138 {
7139 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7140
7141 /* Check first to see if this is a constant offset from a @GOTOFF
7142 symbol reference. */
7143 if (local_symbolic_operand (op0, Pmode)
7144 && CONST_INT_P (op1))
7145 {
7146 if (!TARGET_64BIT)
7147 {
7148 if (reload_in_progress)
7149 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7150 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7151 UNSPEC_GOTOFF);
7152 new = gen_rtx_PLUS (Pmode, new, op1);
7153 new = gen_rtx_CONST (Pmode, new);
7154 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7155
7156 if (reg != 0)
7157 {
7158 emit_move_insn (reg, new);
7159 new = reg;
7160 }
7161 }
7162 else
7163 {
7164 if (INTVAL (op1) < -16*1024*1024
7165 || INTVAL (op1) >= 16*1024*1024)
7166 {
7167 if (!x86_64_immediate_operand (op1, Pmode))
7168 op1 = force_reg (Pmode, op1);
7169 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7170 }
7171 }
7172 }
7173 else
7174 {
7175 base = legitimize_pic_address (XEXP (addr, 0), reg);
7176 new = legitimize_pic_address (XEXP (addr, 1),
7177 base == reg ? NULL_RTX : reg);
7178
7179 if (CONST_INT_P (new))
7180 new = plus_constant (base, INTVAL (new));
7181 else
7182 {
7183 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7184 {
7185 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7186 new = XEXP (new, 1);
7187 }
7188 new = gen_rtx_PLUS (Pmode, base, new);
7189 }
7190 }
7191 }
7192 }
7193 return new;
7194 }
7195 \f
7196 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7197
7198 static rtx
7199 get_thread_pointer (int to_reg)
7200 {
7201 rtx tp, reg, insn;
7202
7203 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7204 if (!to_reg)
7205 return tp;
7206
7207 reg = gen_reg_rtx (Pmode);
7208 insn = gen_rtx_SET (VOIDmode, reg, tp);
7209 insn = emit_insn (insn);
7210
7211 return reg;
7212 }
7213
7214 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7215 false if we expect this to be used for a memory address and true if
7216 we expect to load the address into a register. */
7217
7218 static rtx
7219 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7220 {
7221 rtx dest, base, off, pic, tp;
7222 int type;
7223
7224 switch (model)
7225 {
7226 case TLS_MODEL_GLOBAL_DYNAMIC:
7227 dest = gen_reg_rtx (Pmode);
7228 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7229
7230 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7231 {
7232 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7233
7234 start_sequence ();
7235 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7236 insns = get_insns ();
7237 end_sequence ();
7238
7239 emit_libcall_block (insns, dest, rax, x);
7240 }
7241 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7242 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7243 else
7244 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7245
7246 if (TARGET_GNU2_TLS)
7247 {
7248 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7249
7250 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7251 }
7252 break;
7253
7254 case TLS_MODEL_LOCAL_DYNAMIC:
7255 base = gen_reg_rtx (Pmode);
7256 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7257
7258 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7259 {
7260 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7261
7262 start_sequence ();
7263 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7264 insns = get_insns ();
7265 end_sequence ();
7266
7267 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7268 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7269 emit_libcall_block (insns, base, rax, note);
7270 }
7271 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7272 emit_insn (gen_tls_local_dynamic_base_64 (base));
7273 else
7274 emit_insn (gen_tls_local_dynamic_base_32 (base));
7275
7276 if (TARGET_GNU2_TLS)
7277 {
7278 rtx x = ix86_tls_module_base ();
7279
7280 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7281 gen_rtx_MINUS (Pmode, x, tp));
7282 }
7283
7284 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7285 off = gen_rtx_CONST (Pmode, off);
7286
7287 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7288
7289 if (TARGET_GNU2_TLS)
7290 {
7291 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7292
7293 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7294 }
7295
7296 break;
7297
7298 case TLS_MODEL_INITIAL_EXEC:
7299 if (TARGET_64BIT)
7300 {
7301 pic = NULL;
7302 type = UNSPEC_GOTNTPOFF;
7303 }
7304 else if (flag_pic)
7305 {
7306 if (reload_in_progress)
7307 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7308 pic = pic_offset_table_rtx;
7309 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7310 }
7311 else if (!TARGET_ANY_GNU_TLS)
7312 {
7313 pic = gen_reg_rtx (Pmode);
7314 emit_insn (gen_set_got (pic));
7315 type = UNSPEC_GOTTPOFF;
7316 }
7317 else
7318 {
7319 pic = NULL;
7320 type = UNSPEC_INDNTPOFF;
7321 }
7322
7323 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7324 off = gen_rtx_CONST (Pmode, off);
7325 if (pic)
7326 off = gen_rtx_PLUS (Pmode, pic, off);
7327 off = gen_const_mem (Pmode, off);
7328 set_mem_alias_set (off, ix86_GOT_alias_set ());
7329
7330 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7331 {
7332 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7333 off = force_reg (Pmode, off);
7334 return gen_rtx_PLUS (Pmode, base, off);
7335 }
7336 else
7337 {
7338 base = get_thread_pointer (true);
7339 dest = gen_reg_rtx (Pmode);
7340 emit_insn (gen_subsi3 (dest, base, off));
7341 }
7342 break;
7343
7344 case TLS_MODEL_LOCAL_EXEC:
7345 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7346 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7347 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7348 off = gen_rtx_CONST (Pmode, off);
7349
7350 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7351 {
7352 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7353 return gen_rtx_PLUS (Pmode, base, off);
7354 }
7355 else
7356 {
7357 base = get_thread_pointer (true);
7358 dest = gen_reg_rtx (Pmode);
7359 emit_insn (gen_subsi3 (dest, base, off));
7360 }
7361 break;
7362
7363 default:
7364 gcc_unreachable ();
7365 }
7366
7367 return dest;
7368 }
7369
7370 /* Try machine-dependent ways of modifying an illegitimate address
7371 to be legitimate. If we find one, return the new, valid address.
7372 This macro is used in only one place: `memory_address' in explow.c.
7373
7374 OLDX is the address as it was before break_out_memory_refs was called.
7375 In some cases it is useful to look at this to decide what needs to be done.
7376
7377 MODE and WIN are passed so that this macro can use
7378 GO_IF_LEGITIMATE_ADDRESS.
7379
7380 It is always safe for this macro to do nothing. It exists to recognize
7381 opportunities to optimize the output.
7382
7383 For the 80386, we handle X+REG by loading X into a register R and
7384 using R+REG. R will go in a general reg and indexing will be used.
7385 However, if REG is a broken-out memory address or multiplication,
7386 nothing needs to be done because REG can certainly go in a general reg.
7387
7388 When -fpic is used, special handling is needed for symbolic references.
7389 See comments by legitimize_pic_address in i386.c for details. */
7390
7391 rtx
7392 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7393 {
7394 int changed = 0;
7395 unsigned log;
7396
7397 if (TARGET_DEBUG_ADDR)
7398 {
7399 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7400 GET_MODE_NAME (mode));
7401 debug_rtx (x);
7402 }
7403
7404 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7405 if (log)
7406 return legitimize_tls_address (x, log, false);
7407 if (GET_CODE (x) == CONST
7408 && GET_CODE (XEXP (x, 0)) == PLUS
7409 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7410 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7411 {
7412 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7413 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7414 }
7415
7416 if (flag_pic && SYMBOLIC_CONST (x))
7417 return legitimize_pic_address (x, 0);
7418
7419 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7420 if (GET_CODE (x) == ASHIFT
7421 && CONST_INT_P (XEXP (x, 1))
7422 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7423 {
7424 changed = 1;
7425 log = INTVAL (XEXP (x, 1));
7426 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7427 GEN_INT (1 << log));
7428 }
7429
7430 if (GET_CODE (x) == PLUS)
7431 {
7432 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7433
7434 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7435 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7436 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7437 {
7438 changed = 1;
7439 log = INTVAL (XEXP (XEXP (x, 0), 1));
7440 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7441 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7442 GEN_INT (1 << log));
7443 }
7444
7445 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7446 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7447 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7448 {
7449 changed = 1;
7450 log = INTVAL (XEXP (XEXP (x, 1), 1));
7451 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7452 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7453 GEN_INT (1 << log));
7454 }
7455
7456 /* Put multiply first if it isn't already. */
7457 if (GET_CODE (XEXP (x, 1)) == MULT)
7458 {
7459 rtx tmp = XEXP (x, 0);
7460 XEXP (x, 0) = XEXP (x, 1);
7461 XEXP (x, 1) = tmp;
7462 changed = 1;
7463 }
7464
7465 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7466 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7467 created by virtual register instantiation, register elimination, and
7468 similar optimizations. */
7469 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7470 {
7471 changed = 1;
7472 x = gen_rtx_PLUS (Pmode,
7473 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7474 XEXP (XEXP (x, 1), 0)),
7475 XEXP (XEXP (x, 1), 1));
7476 }
7477
7478 /* Canonicalize
7479 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7480 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7481 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7482 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7483 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7484 && CONSTANT_P (XEXP (x, 1)))
7485 {
7486 rtx constant;
7487 rtx other = NULL_RTX;
7488
7489 if (CONST_INT_P (XEXP (x, 1)))
7490 {
7491 constant = XEXP (x, 1);
7492 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7493 }
7494 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7495 {
7496 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7497 other = XEXP (x, 1);
7498 }
7499 else
7500 constant = 0;
7501
7502 if (constant)
7503 {
7504 changed = 1;
7505 x = gen_rtx_PLUS (Pmode,
7506 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7507 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7508 plus_constant (other, INTVAL (constant)));
7509 }
7510 }
7511
7512 if (changed && legitimate_address_p (mode, x, FALSE))
7513 return x;
7514
7515 if (GET_CODE (XEXP (x, 0)) == MULT)
7516 {
7517 changed = 1;
7518 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7519 }
7520
7521 if (GET_CODE (XEXP (x, 1)) == MULT)
7522 {
7523 changed = 1;
7524 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7525 }
7526
7527 if (changed
7528 && REG_P (XEXP (x, 1))
7529 && REG_P (XEXP (x, 0)))
7530 return x;
7531
7532 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7533 {
7534 changed = 1;
7535 x = legitimize_pic_address (x, 0);
7536 }
7537
7538 if (changed && legitimate_address_p (mode, x, FALSE))
7539 return x;
7540
7541 if (REG_P (XEXP (x, 0)))
7542 {
7543 rtx temp = gen_reg_rtx (Pmode);
7544 rtx val = force_operand (XEXP (x, 1), temp);
7545 if (val != temp)
7546 emit_move_insn (temp, val);
7547
7548 XEXP (x, 1) = temp;
7549 return x;
7550 }
7551
7552 else if (REG_P (XEXP (x, 1)))
7553 {
7554 rtx temp = gen_reg_rtx (Pmode);
7555 rtx val = force_operand (XEXP (x, 0), temp);
7556 if (val != temp)
7557 emit_move_insn (temp, val);
7558
7559 XEXP (x, 0) = temp;
7560 return x;
7561 }
7562 }
7563
7564 return x;
7565 }
7566 \f
7567 /* Print an integer constant expression in assembler syntax. Addition
7568 and subtraction are the only arithmetic that may appear in these
7569 expressions. FILE is the stdio stream to write to, X is the rtx, and
7570 CODE is the operand print code from the output string. */
7571
7572 static void
7573 output_pic_addr_const (FILE *file, rtx x, int code)
7574 {
7575 char buf[256];
7576
7577 switch (GET_CODE (x))
7578 {
7579 case PC:
7580 gcc_assert (flag_pic);
7581 putc ('.', file);
7582 break;
7583
7584 case SYMBOL_REF:
7585 output_addr_const (file, x);
7586 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7587 fputs ("@PLT", file);
7588 break;
7589
7590 case LABEL_REF:
7591 x = XEXP (x, 0);
7592 /* FALLTHRU */
7593 case CODE_LABEL:
7594 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7595 assemble_name (asm_out_file, buf);
7596 break;
7597
7598 case CONST_INT:
7599 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7600 break;
7601
7602 case CONST:
7603 /* This used to output parentheses around the expression,
7604 but that does not work on the 386 (either ATT or BSD assembler). */
7605 output_pic_addr_const (file, XEXP (x, 0), code);
7606 break;
7607
7608 case CONST_DOUBLE:
7609 if (GET_MODE (x) == VOIDmode)
7610 {
7611 /* We can use %d if the number is <32 bits and positive. */
7612 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7613 fprintf (file, "0x%lx%08lx",
7614 (unsigned long) CONST_DOUBLE_HIGH (x),
7615 (unsigned long) CONST_DOUBLE_LOW (x));
7616 else
7617 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7618 }
7619 else
7620 /* We can't handle floating point constants;
7621 PRINT_OPERAND must handle them. */
7622 output_operand_lossage ("floating constant misused");
7623 break;
7624
7625 case PLUS:
7626 /* Some assemblers need integer constants to appear first. */
7627 if (CONST_INT_P (XEXP (x, 0)))
7628 {
7629 output_pic_addr_const (file, XEXP (x, 0), code);
7630 putc ('+', file);
7631 output_pic_addr_const (file, XEXP (x, 1), code);
7632 }
7633 else
7634 {
7635 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7636 output_pic_addr_const (file, XEXP (x, 1), code);
7637 putc ('+', file);
7638 output_pic_addr_const (file, XEXP (x, 0), code);
7639 }
7640 break;
7641
7642 case MINUS:
7643 if (!TARGET_MACHO)
7644 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7645 output_pic_addr_const (file, XEXP (x, 0), code);
7646 putc ('-', file);
7647 output_pic_addr_const (file, XEXP (x, 1), code);
7648 if (!TARGET_MACHO)
7649 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7650 break;
7651
7652 case UNSPEC:
7653 gcc_assert (XVECLEN (x, 0) == 1);
7654 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7655 switch (XINT (x, 1))
7656 {
7657 case UNSPEC_GOT:
7658 fputs ("@GOT", file);
7659 break;
7660 case UNSPEC_GOTOFF:
7661 fputs ("@GOTOFF", file);
7662 break;
7663 case UNSPEC_GOTPCREL:
7664 fputs ("@GOTPCREL(%rip)", file);
7665 break;
7666 case UNSPEC_GOTTPOFF:
7667 /* FIXME: This might be @TPOFF in Sun ld too. */
7668 fputs ("@GOTTPOFF", file);
7669 break;
7670 case UNSPEC_TPOFF:
7671 fputs ("@TPOFF", file);
7672 break;
7673 case UNSPEC_NTPOFF:
7674 if (TARGET_64BIT)
7675 fputs ("@TPOFF", file);
7676 else
7677 fputs ("@NTPOFF", file);
7678 break;
7679 case UNSPEC_DTPOFF:
7680 fputs ("@DTPOFF", file);
7681 break;
7682 case UNSPEC_GOTNTPOFF:
7683 if (TARGET_64BIT)
7684 fputs ("@GOTTPOFF(%rip)", file);
7685 else
7686 fputs ("@GOTNTPOFF", file);
7687 break;
7688 case UNSPEC_INDNTPOFF:
7689 fputs ("@INDNTPOFF", file);
7690 break;
7691 default:
7692 output_operand_lossage ("invalid UNSPEC as operand");
7693 break;
7694 }
7695 break;
7696
7697 default:
7698 output_operand_lossage ("invalid expression as operand");
7699 }
7700 }
7701
7702 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7703 We need to emit DTP-relative relocations. */
7704
7705 static void
7706 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7707 {
7708 fputs (ASM_LONG, file);
7709 output_addr_const (file, x);
7710 fputs ("@DTPOFF", file);
7711 switch (size)
7712 {
7713 case 4:
7714 break;
7715 case 8:
7716 fputs (", 0", file);
7717 break;
7718 default:
7719 gcc_unreachable ();
7720 }
7721 }
7722
7723 /* In the name of slightly smaller debug output, and to cater to
7724 general assembler lossage, recognize PIC+GOTOFF and turn it back
7725 into a direct symbol reference.
7726
7727 On Darwin, this is necessary to avoid a crash, because Darwin
7728 has a different PIC label for each routine but the DWARF debugging
7729 information is not associated with any particular routine, so it's
7730 necessary to remove references to the PIC label from RTL stored by
7731 the DWARF output code. */
7732
7733 static rtx
7734 ix86_delegitimize_address (rtx orig_x)
7735 {
7736 rtx x = orig_x;
7737 /* reg_addend is NULL or a multiple of some register. */
7738 rtx reg_addend = NULL_RTX;
7739 /* const_addend is NULL or a const_int. */
7740 rtx const_addend = NULL_RTX;
7741 /* This is the result, or NULL. */
7742 rtx result = NULL_RTX;
7743
7744 if (MEM_P (x))
7745 x = XEXP (x, 0);
7746
7747 if (TARGET_64BIT)
7748 {
7749 if (GET_CODE (x) != CONST
7750 || GET_CODE (XEXP (x, 0)) != UNSPEC
7751 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7752 || !MEM_P (orig_x))
7753 return orig_x;
7754 return XVECEXP (XEXP (x, 0), 0, 0);
7755 }
7756
7757 if (GET_CODE (x) != PLUS
7758 || GET_CODE (XEXP (x, 1)) != CONST)
7759 return orig_x;
7760
7761 if (REG_P (XEXP (x, 0))
7762 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7763 /* %ebx + GOT/GOTOFF */
7764 ;
7765 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7766 {
7767 /* %ebx + %reg * scale + GOT/GOTOFF */
7768 reg_addend = XEXP (x, 0);
7769 if (REG_P (XEXP (reg_addend, 0))
7770 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7771 reg_addend = XEXP (reg_addend, 1);
7772 else if (REG_P (XEXP (reg_addend, 1))
7773 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7774 reg_addend = XEXP (reg_addend, 0);
7775 else
7776 return orig_x;
7777 if (!REG_P (reg_addend)
7778 && GET_CODE (reg_addend) != MULT
7779 && GET_CODE (reg_addend) != ASHIFT)
7780 return orig_x;
7781 }
7782 else
7783 return orig_x;
7784
7785 x = XEXP (XEXP (x, 1), 0);
7786 if (GET_CODE (x) == PLUS
7787 && CONST_INT_P (XEXP (x, 1)))
7788 {
7789 const_addend = XEXP (x, 1);
7790 x = XEXP (x, 0);
7791 }
7792
7793 if (GET_CODE (x) == UNSPEC
7794 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7795 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7796 result = XVECEXP (x, 0, 0);
7797
7798 if (TARGET_MACHO && darwin_local_data_pic (x)
7799 && !MEM_P (orig_x))
7800 result = XEXP (x, 0);
7801
7802 if (! result)
7803 return orig_x;
7804
7805 if (const_addend)
7806 result = gen_rtx_PLUS (Pmode, result, const_addend);
7807 if (reg_addend)
7808 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7809 return result;
7810 }
7811 \f
7812 static void
7813 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7814 int fp, FILE *file)
7815 {
7816 const char *suffix;
7817
7818 if (mode == CCFPmode || mode == CCFPUmode)
7819 {
7820 enum rtx_code second_code, bypass_code;
7821 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7822 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7823 code = ix86_fp_compare_code_to_integer (code);
7824 mode = CCmode;
7825 }
7826 if (reverse)
7827 code = reverse_condition (code);
7828
7829 switch (code)
7830 {
7831 case EQ:
7832 suffix = "e";
7833 break;
7834 case NE:
7835 suffix = "ne";
7836 break;
7837 case GT:
7838 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7839 suffix = "g";
7840 break;
7841 case GTU:
7842 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7843 Those same assemblers have the same but opposite lossage on cmov. */
7844 gcc_assert (mode == CCmode);
7845 suffix = fp ? "nbe" : "a";
7846 break;
7847 case LT:
7848 switch (mode)
7849 {
7850 case CCNOmode:
7851 case CCGOCmode:
7852 suffix = "s";
7853 break;
7854
7855 case CCmode:
7856 case CCGCmode:
7857 suffix = "l";
7858 break;
7859
7860 default:
7861 gcc_unreachable ();
7862 }
7863 break;
7864 case LTU:
7865 gcc_assert (mode == CCmode);
7866 suffix = "b";
7867 break;
7868 case GE:
7869 switch (mode)
7870 {
7871 case CCNOmode:
7872 case CCGOCmode:
7873 suffix = "ns";
7874 break;
7875
7876 case CCmode:
7877 case CCGCmode:
7878 suffix = "ge";
7879 break;
7880
7881 default:
7882 gcc_unreachable ();
7883 }
7884 break;
7885 case GEU:
7886 /* ??? As above. */
7887 gcc_assert (mode == CCmode);
7888 suffix = fp ? "nb" : "ae";
7889 break;
7890 case LE:
7891 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7892 suffix = "le";
7893 break;
7894 case LEU:
7895 gcc_assert (mode == CCmode);
7896 suffix = "be";
7897 break;
7898 case UNORDERED:
7899 suffix = fp ? "u" : "p";
7900 break;
7901 case ORDERED:
7902 suffix = fp ? "nu" : "np";
7903 break;
7904 default:
7905 gcc_unreachable ();
7906 }
7907 fputs (suffix, file);
7908 }
7909
7910 /* Print the name of register X to FILE based on its machine mode and number.
7911 If CODE is 'w', pretend the mode is HImode.
7912 If CODE is 'b', pretend the mode is QImode.
7913 If CODE is 'k', pretend the mode is SImode.
7914 If CODE is 'q', pretend the mode is DImode.
7915 If CODE is 'h', pretend the reg is the 'high' byte register.
7916 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7917
7918 void
7919 print_reg (rtx x, int code, FILE *file)
7920 {
7921 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7922 && REGNO (x) != FRAME_POINTER_REGNUM
7923 && REGNO (x) != FLAGS_REG
7924 && REGNO (x) != FPSR_REG
7925 && REGNO (x) != FPCR_REG);
7926
7927 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7928 putc ('%', file);
7929
7930 if (code == 'w' || MMX_REG_P (x))
7931 code = 2;
7932 else if (code == 'b')
7933 code = 1;
7934 else if (code == 'k')
7935 code = 4;
7936 else if (code == 'q')
7937 code = 8;
7938 else if (code == 'y')
7939 code = 3;
7940 else if (code == 'h')
7941 code = 0;
7942 else
7943 code = GET_MODE_SIZE (GET_MODE (x));
7944
7945 /* Irritatingly, AMD extended registers use different naming convention
7946 from the normal registers. */
7947 if (REX_INT_REG_P (x))
7948 {
7949 gcc_assert (TARGET_64BIT);
7950 switch (code)
7951 {
7952 case 0:
7953 error ("extended registers have no high halves");
7954 break;
7955 case 1:
7956 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7957 break;
7958 case 2:
7959 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7960 break;
7961 case 4:
7962 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7963 break;
7964 case 8:
7965 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7966 break;
7967 default:
7968 error ("unsupported operand size for extended register");
7969 break;
7970 }
7971 return;
7972 }
7973 switch (code)
7974 {
7975 case 3:
7976 if (STACK_TOP_P (x))
7977 {
7978 fputs ("st(0)", file);
7979 break;
7980 }
7981 /* FALLTHRU */
7982 case 8:
7983 case 4:
7984 case 12:
7985 if (! ANY_FP_REG_P (x))
7986 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7987 /* FALLTHRU */
7988 case 16:
7989 case 2:
7990 normal:
7991 fputs (hi_reg_name[REGNO (x)], file);
7992 break;
7993 case 1:
7994 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7995 goto normal;
7996 fputs (qi_reg_name[REGNO (x)], file);
7997 break;
7998 case 0:
7999 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8000 goto normal;
8001 fputs (qi_high_reg_name[REGNO (x)], file);
8002 break;
8003 default:
8004 gcc_unreachable ();
8005 }
8006 }
8007
8008 /* Locate some local-dynamic symbol still in use by this function
8009 so that we can print its name in some tls_local_dynamic_base
8010 pattern. */
8011
8012 static const char *
8013 get_some_local_dynamic_name (void)
8014 {
8015 rtx insn;
8016
8017 if (cfun->machine->some_ld_name)
8018 return cfun->machine->some_ld_name;
8019
8020 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8021 if (INSN_P (insn)
8022 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8023 return cfun->machine->some_ld_name;
8024
8025 gcc_unreachable ();
8026 }
8027
8028 static int
8029 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8030 {
8031 rtx x = *px;
8032
8033 if (GET_CODE (x) == SYMBOL_REF
8034 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8035 {
8036 cfun->machine->some_ld_name = XSTR (x, 0);
8037 return 1;
8038 }
8039
8040 return 0;
8041 }
8042
8043 /* Meaning of CODE:
8044 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8045 C -- print opcode suffix for set/cmov insn.
8046 c -- like C, but print reversed condition
8047 F,f -- likewise, but for floating-point.
8048 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8049 otherwise nothing
8050 R -- print the prefix for register names.
8051 z -- print the opcode suffix for the size of the current operand.
8052 * -- print a star (in certain assembler syntax)
8053 A -- print an absolute memory reference.
8054 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8055 s -- print a shift double count, followed by the assemblers argument
8056 delimiter.
8057 b -- print the QImode name of the register for the indicated operand.
8058 %b0 would print %al if operands[0] is reg 0.
8059 w -- likewise, print the HImode name of the register.
8060 k -- likewise, print the SImode name of the register.
8061 q -- likewise, print the DImode name of the register.
8062 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8063 y -- print "st(0)" instead of "st" as a register.
8064 D -- print condition for SSE cmp instruction.
8065 P -- if PIC, print an @PLT suffix.
8066 X -- don't print any sort of PIC '@' suffix for a symbol.
8067 & -- print some in-use local-dynamic symbol name.
8068 H -- print a memory address offset by 8; used for sse high-parts
8069 */
8070
8071 void
8072 print_operand (FILE *file, rtx x, int code)
8073 {
8074 if (code)
8075 {
8076 switch (code)
8077 {
8078 case '*':
8079 if (ASSEMBLER_DIALECT == ASM_ATT)
8080 putc ('*', file);
8081 return;
8082
8083 case '&':
8084 assemble_name (file, get_some_local_dynamic_name ());
8085 return;
8086
8087 case 'A':
8088 switch (ASSEMBLER_DIALECT)
8089 {
8090 case ASM_ATT:
8091 putc ('*', file);
8092 break;
8093
8094 case ASM_INTEL:
8095 /* Intel syntax. For absolute addresses, registers should not
8096 be surrounded by braces. */
8097 if (!REG_P (x))
8098 {
8099 putc ('[', file);
8100 PRINT_OPERAND (file, x, 0);
8101 putc (']', file);
8102 return;
8103 }
8104 break;
8105
8106 default:
8107 gcc_unreachable ();
8108 }
8109
8110 PRINT_OPERAND (file, x, 0);
8111 return;
8112
8113
8114 case 'L':
8115 if (ASSEMBLER_DIALECT == ASM_ATT)
8116 putc ('l', file);
8117 return;
8118
8119 case 'W':
8120 if (ASSEMBLER_DIALECT == ASM_ATT)
8121 putc ('w', file);
8122 return;
8123
8124 case 'B':
8125 if (ASSEMBLER_DIALECT == ASM_ATT)
8126 putc ('b', file);
8127 return;
8128
8129 case 'Q':
8130 if (ASSEMBLER_DIALECT == ASM_ATT)
8131 putc ('l', file);
8132 return;
8133
8134 case 'S':
8135 if (ASSEMBLER_DIALECT == ASM_ATT)
8136 putc ('s', file);
8137 return;
8138
8139 case 'T':
8140 if (ASSEMBLER_DIALECT == ASM_ATT)
8141 putc ('t', file);
8142 return;
8143
8144 case 'z':
8145 /* 387 opcodes don't get size suffixes if the operands are
8146 registers. */
8147 if (STACK_REG_P (x))
8148 return;
8149
8150 /* Likewise if using Intel opcodes. */
8151 if (ASSEMBLER_DIALECT == ASM_INTEL)
8152 return;
8153
8154 /* This is the size of op from size of operand. */
8155 switch (GET_MODE_SIZE (GET_MODE (x)))
8156 {
8157 case 1:
8158 putc ('b', file);
8159 return;
8160
8161 case 2:
8162 #ifdef HAVE_GAS_FILDS_FISTS
8163 putc ('s', file);
8164 #endif
8165 return;
8166
8167 case 4:
8168 if (GET_MODE (x) == SFmode)
8169 {
8170 putc ('s', file);
8171 return;
8172 }
8173 else
8174 putc ('l', file);
8175 return;
8176
8177 case 12:
8178 case 16:
8179 putc ('t', file);
8180 return;
8181
8182 case 8:
8183 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8184 {
8185 #ifdef GAS_MNEMONICS
8186 putc ('q', file);
8187 #else
8188 putc ('l', file);
8189 putc ('l', file);
8190 #endif
8191 }
8192 else
8193 putc ('l', file);
8194 return;
8195
8196 default:
8197 gcc_unreachable ();
8198 }
8199
8200 case 'b':
8201 case 'w':
8202 case 'k':
8203 case 'q':
8204 case 'h':
8205 case 'y':
8206 case 'X':
8207 case 'P':
8208 break;
8209
8210 case 's':
8211 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8212 {
8213 PRINT_OPERAND (file, x, 0);
8214 putc (',', file);
8215 }
8216 return;
8217
8218 case 'D':
8219 /* Little bit of braindamage here. The SSE compare instructions
8220 does use completely different names for the comparisons that the
8221 fp conditional moves. */
8222 switch (GET_CODE (x))
8223 {
8224 case EQ:
8225 case UNEQ:
8226 fputs ("eq", file);
8227 break;
8228 case LT:
8229 case UNLT:
8230 fputs ("lt", file);
8231 break;
8232 case LE:
8233 case UNLE:
8234 fputs ("le", file);
8235 break;
8236 case UNORDERED:
8237 fputs ("unord", file);
8238 break;
8239 case NE:
8240 case LTGT:
8241 fputs ("neq", file);
8242 break;
8243 case UNGE:
8244 case GE:
8245 fputs ("nlt", file);
8246 break;
8247 case UNGT:
8248 case GT:
8249 fputs ("nle", file);
8250 break;
8251 case ORDERED:
8252 fputs ("ord", file);
8253 break;
8254 default:
8255 gcc_unreachable ();
8256 }
8257 return;
8258 case 'O':
8259 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8260 if (ASSEMBLER_DIALECT == ASM_ATT)
8261 {
8262 switch (GET_MODE (x))
8263 {
8264 case HImode: putc ('w', file); break;
8265 case SImode:
8266 case SFmode: putc ('l', file); break;
8267 case DImode:
8268 case DFmode: putc ('q', file); break;
8269 default: gcc_unreachable ();
8270 }
8271 putc ('.', file);
8272 }
8273 #endif
8274 return;
8275 case 'C':
8276 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8277 return;
8278 case 'F':
8279 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8280 if (ASSEMBLER_DIALECT == ASM_ATT)
8281 putc ('.', file);
8282 #endif
8283 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8284 return;
8285
8286 /* Like above, but reverse condition */
8287 case 'c':
8288 /* Check to see if argument to %c is really a constant
8289 and not a condition code which needs to be reversed. */
8290 if (!COMPARISON_P (x))
8291 {
8292 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8293 return;
8294 }
8295 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8296 return;
8297 case 'f':
8298 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8299 if (ASSEMBLER_DIALECT == ASM_ATT)
8300 putc ('.', file);
8301 #endif
8302 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8303 return;
8304
8305 case 'H':
8306 /* It doesn't actually matter what mode we use here, as we're
8307 only going to use this for printing. */
8308 x = adjust_address_nv (x, DImode, 8);
8309 break;
8310
8311 case '+':
8312 {
8313 rtx x;
8314
8315 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8316 return;
8317
8318 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8319 if (x)
8320 {
8321 int pred_val = INTVAL (XEXP (x, 0));
8322
8323 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8324 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8325 {
8326 int taken = pred_val > REG_BR_PROB_BASE / 2;
8327 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8328
8329 /* Emit hints only in the case default branch prediction
8330 heuristics would fail. */
8331 if (taken != cputaken)
8332 {
8333 /* We use 3e (DS) prefix for taken branches and
8334 2e (CS) prefix for not taken branches. */
8335 if (taken)
8336 fputs ("ds ; ", file);
8337 else
8338 fputs ("cs ; ", file);
8339 }
8340 }
8341 }
8342 return;
8343 }
8344 default:
8345 output_operand_lossage ("invalid operand code '%c'", code);
8346 }
8347 }
8348
8349 if (REG_P (x))
8350 print_reg (x, code, file);
8351
8352 else if (MEM_P (x))
8353 {
8354 /* No `byte ptr' prefix for call instructions. */
8355 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8356 {
8357 const char * size;
8358 switch (GET_MODE_SIZE (GET_MODE (x)))
8359 {
8360 case 1: size = "BYTE"; break;
8361 case 2: size = "WORD"; break;
8362 case 4: size = "DWORD"; break;
8363 case 8: size = "QWORD"; break;
8364 case 12: size = "XWORD"; break;
8365 case 16: size = "XMMWORD"; break;
8366 default:
8367 gcc_unreachable ();
8368 }
8369
8370 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8371 if (code == 'b')
8372 size = "BYTE";
8373 else if (code == 'w')
8374 size = "WORD";
8375 else if (code == 'k')
8376 size = "DWORD";
8377
8378 fputs (size, file);
8379 fputs (" PTR ", file);
8380 }
8381
8382 x = XEXP (x, 0);
8383 /* Avoid (%rip) for call operands. */
8384 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8385 && !CONST_INT_P (x))
8386 output_addr_const (file, x);
8387 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8388 output_operand_lossage ("invalid constraints for operand");
8389 else
8390 output_address (x);
8391 }
8392
8393 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8394 {
8395 REAL_VALUE_TYPE r;
8396 long l;
8397
8398 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8399 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8400
8401 if (ASSEMBLER_DIALECT == ASM_ATT)
8402 putc ('$', file);
8403 fprintf (file, "0x%08lx", l);
8404 }
8405
8406 /* These float cases don't actually occur as immediate operands. */
8407 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8408 {
8409 char dstr[30];
8410
8411 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8412 fprintf (file, "%s", dstr);
8413 }
8414
8415 else if (GET_CODE (x) == CONST_DOUBLE
8416 && GET_MODE (x) == XFmode)
8417 {
8418 char dstr[30];
8419
8420 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8421 fprintf (file, "%s", dstr);
8422 }
8423
8424 else
8425 {
8426 /* We have patterns that allow zero sets of memory, for instance.
8427 In 64-bit mode, we should probably support all 8-byte vectors,
8428 since we can in fact encode that into an immediate. */
8429 if (GET_CODE (x) == CONST_VECTOR)
8430 {
8431 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8432 x = const0_rtx;
8433 }
8434
8435 if (code != 'P')
8436 {
8437 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8438 {
8439 if (ASSEMBLER_DIALECT == ASM_ATT)
8440 putc ('$', file);
8441 }
8442 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8443 || GET_CODE (x) == LABEL_REF)
8444 {
8445 if (ASSEMBLER_DIALECT == ASM_ATT)
8446 putc ('$', file);
8447 else
8448 fputs ("OFFSET FLAT:", file);
8449 }
8450 }
8451 if (CONST_INT_P (x))
8452 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8453 else if (flag_pic)
8454 output_pic_addr_const (file, x, code);
8455 else
8456 output_addr_const (file, x);
8457 }
8458 }
8459 \f
8460 /* Print a memory operand whose address is ADDR. */
8461
8462 void
8463 print_operand_address (FILE *file, rtx addr)
8464 {
8465 struct ix86_address parts;
8466 rtx base, index, disp;
8467 int scale;
8468 int ok = ix86_decompose_address (addr, &parts);
8469
8470 gcc_assert (ok);
8471
8472 base = parts.base;
8473 index = parts.index;
8474 disp = parts.disp;
8475 scale = parts.scale;
8476
8477 switch (parts.seg)
8478 {
8479 case SEG_DEFAULT:
8480 break;
8481 case SEG_FS:
8482 case SEG_GS:
8483 if (USER_LABEL_PREFIX[0] == 0)
8484 putc ('%', file);
8485 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8486 break;
8487 default:
8488 gcc_unreachable ();
8489 }
8490
8491 if (!base && !index)
8492 {
8493 /* Displacement only requires special attention. */
8494
8495 if (CONST_INT_P (disp))
8496 {
8497 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8498 {
8499 if (USER_LABEL_PREFIX[0] == 0)
8500 putc ('%', file);
8501 fputs ("ds:", file);
8502 }
8503 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8504 }
8505 else if (flag_pic)
8506 output_pic_addr_const (file, disp, 0);
8507 else
8508 output_addr_const (file, disp);
8509
8510 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8511 if (TARGET_64BIT)
8512 {
8513 if (GET_CODE (disp) == CONST
8514 && GET_CODE (XEXP (disp, 0)) == PLUS
8515 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8516 disp = XEXP (XEXP (disp, 0), 0);
8517 if (GET_CODE (disp) == LABEL_REF
8518 || (GET_CODE (disp) == SYMBOL_REF
8519 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8520 fputs ("(%rip)", file);
8521 }
8522 }
8523 else
8524 {
8525 if (ASSEMBLER_DIALECT == ASM_ATT)
8526 {
8527 if (disp)
8528 {
8529 if (flag_pic)
8530 output_pic_addr_const (file, disp, 0);
8531 else if (GET_CODE (disp) == LABEL_REF)
8532 output_asm_label (disp);
8533 else
8534 output_addr_const (file, disp);
8535 }
8536
8537 putc ('(', file);
8538 if (base)
8539 print_reg (base, 0, file);
8540 if (index)
8541 {
8542 putc (',', file);
8543 print_reg (index, 0, file);
8544 if (scale != 1)
8545 fprintf (file, ",%d", scale);
8546 }
8547 putc (')', file);
8548 }
8549 else
8550 {
8551 rtx offset = NULL_RTX;
8552
8553 if (disp)
8554 {
8555 /* Pull out the offset of a symbol; print any symbol itself. */
8556 if (GET_CODE (disp) == CONST
8557 && GET_CODE (XEXP (disp, 0)) == PLUS
8558 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8559 {
8560 offset = XEXP (XEXP (disp, 0), 1);
8561 disp = gen_rtx_CONST (VOIDmode,
8562 XEXP (XEXP (disp, 0), 0));
8563 }
8564
8565 if (flag_pic)
8566 output_pic_addr_const (file, disp, 0);
8567 else if (GET_CODE (disp) == LABEL_REF)
8568 output_asm_label (disp);
8569 else if (CONST_INT_P (disp))
8570 offset = disp;
8571 else
8572 output_addr_const (file, disp);
8573 }
8574
8575 putc ('[', file);
8576 if (base)
8577 {
8578 print_reg (base, 0, file);
8579 if (offset)
8580 {
8581 if (INTVAL (offset) >= 0)
8582 putc ('+', file);
8583 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8584 }
8585 }
8586 else if (offset)
8587 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8588 else
8589 putc ('0', file);
8590
8591 if (index)
8592 {
8593 putc ('+', file);
8594 print_reg (index, 0, file);
8595 if (scale != 1)
8596 fprintf (file, "*%d", scale);
8597 }
8598 putc (']', file);
8599 }
8600 }
8601 }
8602
8603 bool
8604 output_addr_const_extra (FILE *file, rtx x)
8605 {
8606 rtx op;
8607
8608 if (GET_CODE (x) != UNSPEC)
8609 return false;
8610
8611 op = XVECEXP (x, 0, 0);
8612 switch (XINT (x, 1))
8613 {
8614 case UNSPEC_GOTTPOFF:
8615 output_addr_const (file, op);
8616 /* FIXME: This might be @TPOFF in Sun ld. */
8617 fputs ("@GOTTPOFF", file);
8618 break;
8619 case UNSPEC_TPOFF:
8620 output_addr_const (file, op);
8621 fputs ("@TPOFF", file);
8622 break;
8623 case UNSPEC_NTPOFF:
8624 output_addr_const (file, op);
8625 if (TARGET_64BIT)
8626 fputs ("@TPOFF", file);
8627 else
8628 fputs ("@NTPOFF", file);
8629 break;
8630 case UNSPEC_DTPOFF:
8631 output_addr_const (file, op);
8632 fputs ("@DTPOFF", file);
8633 break;
8634 case UNSPEC_GOTNTPOFF:
8635 output_addr_const (file, op);
8636 if (TARGET_64BIT)
8637 fputs ("@GOTTPOFF(%rip)", file);
8638 else
8639 fputs ("@GOTNTPOFF", file);
8640 break;
8641 case UNSPEC_INDNTPOFF:
8642 output_addr_const (file, op);
8643 fputs ("@INDNTPOFF", file);
8644 break;
8645
8646 default:
8647 return false;
8648 }
8649
8650 return true;
8651 }
8652 \f
8653 /* Split one or more DImode RTL references into pairs of SImode
8654 references. The RTL can be REG, offsettable MEM, integer constant, or
8655 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8656 split and "num" is its length. lo_half and hi_half are output arrays
8657 that parallel "operands". */
8658
8659 void
8660 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8661 {
8662 while (num--)
8663 {
8664 rtx op = operands[num];
8665
8666 /* simplify_subreg refuse to split volatile memory addresses,
8667 but we still have to handle it. */
8668 if (MEM_P (op))
8669 {
8670 lo_half[num] = adjust_address (op, SImode, 0);
8671 hi_half[num] = adjust_address (op, SImode, 4);
8672 }
8673 else
8674 {
8675 lo_half[num] = simplify_gen_subreg (SImode, op,
8676 GET_MODE (op) == VOIDmode
8677 ? DImode : GET_MODE (op), 0);
8678 hi_half[num] = simplify_gen_subreg (SImode, op,
8679 GET_MODE (op) == VOIDmode
8680 ? DImode : GET_MODE (op), 4);
8681 }
8682 }
8683 }
8684 /* Split one or more TImode RTL references into pairs of DImode
8685 references. The RTL can be REG, offsettable MEM, integer constant, or
8686 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8687 split and "num" is its length. lo_half and hi_half are output arrays
8688 that parallel "operands". */
8689
8690 void
8691 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8692 {
8693 while (num--)
8694 {
8695 rtx op = operands[num];
8696
8697 /* simplify_subreg refuse to split volatile memory addresses, but we
8698 still have to handle it. */
8699 if (MEM_P (op))
8700 {
8701 lo_half[num] = adjust_address (op, DImode, 0);
8702 hi_half[num] = adjust_address (op, DImode, 8);
8703 }
8704 else
8705 {
8706 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8707 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8708 }
8709 }
8710 }
8711 \f
8712 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8713 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8714 is the expression of the binary operation. The output may either be
8715 emitted here, or returned to the caller, like all output_* functions.
8716
8717 There is no guarantee that the operands are the same mode, as they
8718 might be within FLOAT or FLOAT_EXTEND expressions. */
8719
8720 #ifndef SYSV386_COMPAT
8721 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8722 wants to fix the assemblers because that causes incompatibility
8723 with gcc. No-one wants to fix gcc because that causes
8724 incompatibility with assemblers... You can use the option of
8725 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8726 #define SYSV386_COMPAT 1
8727 #endif
8728
8729 const char *
8730 output_387_binary_op (rtx insn, rtx *operands)
8731 {
8732 static char buf[30];
8733 const char *p;
8734 const char *ssep;
8735 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8736
8737 #ifdef ENABLE_CHECKING
8738 /* Even if we do not want to check the inputs, this documents input
8739 constraints. Which helps in understanding the following code. */
8740 if (STACK_REG_P (operands[0])
8741 && ((REG_P (operands[1])
8742 && REGNO (operands[0]) == REGNO (operands[1])
8743 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8744 || (REG_P (operands[2])
8745 && REGNO (operands[0]) == REGNO (operands[2])
8746 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8747 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8748 ; /* ok */
8749 else
8750 gcc_assert (is_sse);
8751 #endif
8752
8753 switch (GET_CODE (operands[3]))
8754 {
8755 case PLUS:
8756 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8757 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8758 p = "fiadd";
8759 else
8760 p = "fadd";
8761 ssep = "add";
8762 break;
8763
8764 case MINUS:
8765 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8766 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8767 p = "fisub";
8768 else
8769 p = "fsub";
8770 ssep = "sub";
8771 break;
8772
8773 case MULT:
8774 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8775 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8776 p = "fimul";
8777 else
8778 p = "fmul";
8779 ssep = "mul";
8780 break;
8781
8782 case DIV:
8783 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8784 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8785 p = "fidiv";
8786 else
8787 p = "fdiv";
8788 ssep = "div";
8789 break;
8790
8791 default:
8792 gcc_unreachable ();
8793 }
8794
8795 if (is_sse)
8796 {
8797 strcpy (buf, ssep);
8798 if (GET_MODE (operands[0]) == SFmode)
8799 strcat (buf, "ss\t{%2, %0|%0, %2}");
8800 else
8801 strcat (buf, "sd\t{%2, %0|%0, %2}");
8802 return buf;
8803 }
8804 strcpy (buf, p);
8805
8806 switch (GET_CODE (operands[3]))
8807 {
8808 case MULT:
8809 case PLUS:
8810 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8811 {
8812 rtx temp = operands[2];
8813 operands[2] = operands[1];
8814 operands[1] = temp;
8815 }
8816
8817 /* know operands[0] == operands[1]. */
8818
8819 if (MEM_P (operands[2]))
8820 {
8821 p = "%z2\t%2";
8822 break;
8823 }
8824
8825 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8826 {
8827 if (STACK_TOP_P (operands[0]))
8828 /* How is it that we are storing to a dead operand[2]?
8829 Well, presumably operands[1] is dead too. We can't
8830 store the result to st(0) as st(0) gets popped on this
8831 instruction. Instead store to operands[2] (which I
8832 think has to be st(1)). st(1) will be popped later.
8833 gcc <= 2.8.1 didn't have this check and generated
8834 assembly code that the Unixware assembler rejected. */
8835 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8836 else
8837 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8838 break;
8839 }
8840
8841 if (STACK_TOP_P (operands[0]))
8842 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8843 else
8844 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8845 break;
8846
8847 case MINUS:
8848 case DIV:
8849 if (MEM_P (operands[1]))
8850 {
8851 p = "r%z1\t%1";
8852 break;
8853 }
8854
8855 if (MEM_P (operands[2]))
8856 {
8857 p = "%z2\t%2";
8858 break;
8859 }
8860
8861 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8862 {
8863 #if SYSV386_COMPAT
8864 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8865 derived assemblers, confusingly reverse the direction of
8866 the operation for fsub{r} and fdiv{r} when the
8867 destination register is not st(0). The Intel assembler
8868 doesn't have this brain damage. Read !SYSV386_COMPAT to
8869 figure out what the hardware really does. */
8870 if (STACK_TOP_P (operands[0]))
8871 p = "{p\t%0, %2|rp\t%2, %0}";
8872 else
8873 p = "{rp\t%2, %0|p\t%0, %2}";
8874 #else
8875 if (STACK_TOP_P (operands[0]))
8876 /* As above for fmul/fadd, we can't store to st(0). */
8877 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8878 else
8879 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8880 #endif
8881 break;
8882 }
8883
8884 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8885 {
8886 #if SYSV386_COMPAT
8887 if (STACK_TOP_P (operands[0]))
8888 p = "{rp\t%0, %1|p\t%1, %0}";
8889 else
8890 p = "{p\t%1, %0|rp\t%0, %1}";
8891 #else
8892 if (STACK_TOP_P (operands[0]))
8893 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8894 else
8895 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8896 #endif
8897 break;
8898 }
8899
8900 if (STACK_TOP_P (operands[0]))
8901 {
8902 if (STACK_TOP_P (operands[1]))
8903 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8904 else
8905 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8906 break;
8907 }
8908 else if (STACK_TOP_P (operands[1]))
8909 {
8910 #if SYSV386_COMPAT
8911 p = "{\t%1, %0|r\t%0, %1}";
8912 #else
8913 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8914 #endif
8915 }
8916 else
8917 {
8918 #if SYSV386_COMPAT
8919 p = "{r\t%2, %0|\t%0, %2}";
8920 #else
8921 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8922 #endif
8923 }
8924 break;
8925
8926 default:
8927 gcc_unreachable ();
8928 }
8929
8930 strcat (buf, p);
8931 return buf;
8932 }
8933
8934 /* Return needed mode for entity in optimize_mode_switching pass. */
8935
8936 int
8937 ix86_mode_needed (int entity, rtx insn)
8938 {
8939 enum attr_i387_cw mode;
8940
8941 /* The mode UNINITIALIZED is used to store control word after a
8942 function call or ASM pattern. The mode ANY specify that function
8943 has no requirements on the control word and make no changes in the
8944 bits we are interested in. */
8945
8946 if (CALL_P (insn)
8947 || (NONJUMP_INSN_P (insn)
8948 && (asm_noperands (PATTERN (insn)) >= 0
8949 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8950 return I387_CW_UNINITIALIZED;
8951
8952 if (recog_memoized (insn) < 0)
8953 return I387_CW_ANY;
8954
8955 mode = get_attr_i387_cw (insn);
8956
8957 switch (entity)
8958 {
8959 case I387_TRUNC:
8960 if (mode == I387_CW_TRUNC)
8961 return mode;
8962 break;
8963
8964 case I387_FLOOR:
8965 if (mode == I387_CW_FLOOR)
8966 return mode;
8967 break;
8968
8969 case I387_CEIL:
8970 if (mode == I387_CW_CEIL)
8971 return mode;
8972 break;
8973
8974 case I387_MASK_PM:
8975 if (mode == I387_CW_MASK_PM)
8976 return mode;
8977 break;
8978
8979 default:
8980 gcc_unreachable ();
8981 }
8982
8983 return I387_CW_ANY;
8984 }
8985
8986 /* Output code to initialize control word copies used by trunc?f?i and
8987 rounding patterns. CURRENT_MODE is set to current control word,
8988 while NEW_MODE is set to new control word. */
8989
8990 void
8991 emit_i387_cw_initialization (int mode)
8992 {
8993 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8994 rtx new_mode;
8995
8996 int slot;
8997
8998 rtx reg = gen_reg_rtx (HImode);
8999
9000 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9001 emit_move_insn (reg, copy_rtx (stored_mode));
9002
9003 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9004 {
9005 switch (mode)
9006 {
9007 case I387_CW_TRUNC:
9008 /* round toward zero (truncate) */
9009 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9010 slot = SLOT_CW_TRUNC;
9011 break;
9012
9013 case I387_CW_FLOOR:
9014 /* round down toward -oo */
9015 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9016 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9017 slot = SLOT_CW_FLOOR;
9018 break;
9019
9020 case I387_CW_CEIL:
9021 /* round up toward +oo */
9022 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9023 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9024 slot = SLOT_CW_CEIL;
9025 break;
9026
9027 case I387_CW_MASK_PM:
9028 /* mask precision exception for nearbyint() */
9029 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9030 slot = SLOT_CW_MASK_PM;
9031 break;
9032
9033 default:
9034 gcc_unreachable ();
9035 }
9036 }
9037 else
9038 {
9039 switch (mode)
9040 {
9041 case I387_CW_TRUNC:
9042 /* round toward zero (truncate) */
9043 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9044 slot = SLOT_CW_TRUNC;
9045 break;
9046
9047 case I387_CW_FLOOR:
9048 /* round down toward -oo */
9049 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9050 slot = SLOT_CW_FLOOR;
9051 break;
9052
9053 case I387_CW_CEIL:
9054 /* round up toward +oo */
9055 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9056 slot = SLOT_CW_CEIL;
9057 break;
9058
9059 case I387_CW_MASK_PM:
9060 /* mask precision exception for nearbyint() */
9061 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9062 slot = SLOT_CW_MASK_PM;
9063 break;
9064
9065 default:
9066 gcc_unreachable ();
9067 }
9068 }
9069
9070 gcc_assert (slot < MAX_386_STACK_LOCALS);
9071
9072 new_mode = assign_386_stack_local (HImode, slot);
9073 emit_move_insn (new_mode, reg);
9074 }
9075
9076 /* Output code for INSN to convert a float to a signed int. OPERANDS
9077 are the insn operands. The output may be [HSD]Imode and the input
9078 operand may be [SDX]Fmode. */
9079
9080 const char *
9081 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9082 {
9083 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9084 int dimode_p = GET_MODE (operands[0]) == DImode;
9085 int round_mode = get_attr_i387_cw (insn);
9086
9087 /* Jump through a hoop or two for DImode, since the hardware has no
9088 non-popping instruction. We used to do this a different way, but
9089 that was somewhat fragile and broke with post-reload splitters. */
9090 if ((dimode_p || fisttp) && !stack_top_dies)
9091 output_asm_insn ("fld\t%y1", operands);
9092
9093 gcc_assert (STACK_TOP_P (operands[1]));
9094 gcc_assert (MEM_P (operands[0]));
9095
9096 if (fisttp)
9097 output_asm_insn ("fisttp%z0\t%0", operands);
9098 else
9099 {
9100 if (round_mode != I387_CW_ANY)
9101 output_asm_insn ("fldcw\t%3", operands);
9102 if (stack_top_dies || dimode_p)
9103 output_asm_insn ("fistp%z0\t%0", operands);
9104 else
9105 output_asm_insn ("fist%z0\t%0", operands);
9106 if (round_mode != I387_CW_ANY)
9107 output_asm_insn ("fldcw\t%2", operands);
9108 }
9109
9110 return "";
9111 }
9112
9113 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9114 have the values zero or one, indicates the ffreep insn's operand
9115 from the OPERANDS array. */
9116
9117 static const char *
9118 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9119 {
9120 if (TARGET_USE_FFREEP)
9121 #if HAVE_AS_IX86_FFREEP
9122 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9123 #else
9124 {
9125 static char retval[] = ".word\t0xc_df";
9126 int regno = REGNO (operands[opno]);
9127
9128 gcc_assert (FP_REGNO_P (regno));
9129
9130 retval[9] = '0' + (regno - FIRST_STACK_REG);
9131 return retval;
9132 }
9133 #endif
9134
9135 return opno ? "fstp\t%y1" : "fstp\t%y0";
9136 }
9137
9138
9139 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9140 should be used. UNORDERED_P is true when fucom should be used. */
9141
9142 const char *
9143 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9144 {
9145 int stack_top_dies;
9146 rtx cmp_op0, cmp_op1;
9147 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9148
9149 if (eflags_p)
9150 {
9151 cmp_op0 = operands[0];
9152 cmp_op1 = operands[1];
9153 }
9154 else
9155 {
9156 cmp_op0 = operands[1];
9157 cmp_op1 = operands[2];
9158 }
9159
9160 if (is_sse)
9161 {
9162 if (GET_MODE (operands[0]) == SFmode)
9163 if (unordered_p)
9164 return "ucomiss\t{%1, %0|%0, %1}";
9165 else
9166 return "comiss\t{%1, %0|%0, %1}";
9167 else
9168 if (unordered_p)
9169 return "ucomisd\t{%1, %0|%0, %1}";
9170 else
9171 return "comisd\t{%1, %0|%0, %1}";
9172 }
9173
9174 gcc_assert (STACK_TOP_P (cmp_op0));
9175
9176 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9177
9178 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9179 {
9180 if (stack_top_dies)
9181 {
9182 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9183 return output_387_ffreep (operands, 1);
9184 }
9185 else
9186 return "ftst\n\tfnstsw\t%0";
9187 }
9188
9189 if (STACK_REG_P (cmp_op1)
9190 && stack_top_dies
9191 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9192 && REGNO (cmp_op1) != FIRST_STACK_REG)
9193 {
9194 /* If both the top of the 387 stack dies, and the other operand
9195 is also a stack register that dies, then this must be a
9196 `fcompp' float compare */
9197
9198 if (eflags_p)
9199 {
9200 /* There is no double popping fcomi variant. Fortunately,
9201 eflags is immune from the fstp's cc clobbering. */
9202 if (unordered_p)
9203 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9204 else
9205 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9206 return output_387_ffreep (operands, 0);
9207 }
9208 else
9209 {
9210 if (unordered_p)
9211 return "fucompp\n\tfnstsw\t%0";
9212 else
9213 return "fcompp\n\tfnstsw\t%0";
9214 }
9215 }
9216 else
9217 {
9218 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9219
9220 static const char * const alt[16] =
9221 {
9222 "fcom%z2\t%y2\n\tfnstsw\t%0",
9223 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9224 "fucom%z2\t%y2\n\tfnstsw\t%0",
9225 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9226
9227 "ficom%z2\t%y2\n\tfnstsw\t%0",
9228 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9229 NULL,
9230 NULL,
9231
9232 "fcomi\t{%y1, %0|%0, %y1}",
9233 "fcomip\t{%y1, %0|%0, %y1}",
9234 "fucomi\t{%y1, %0|%0, %y1}",
9235 "fucomip\t{%y1, %0|%0, %y1}",
9236
9237 NULL,
9238 NULL,
9239 NULL,
9240 NULL
9241 };
9242
9243 int mask;
9244 const char *ret;
9245
9246 mask = eflags_p << 3;
9247 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9248 mask |= unordered_p << 1;
9249 mask |= stack_top_dies;
9250
9251 gcc_assert (mask < 16);
9252 ret = alt[mask];
9253 gcc_assert (ret);
9254
9255 return ret;
9256 }
9257 }
9258
9259 void
9260 ix86_output_addr_vec_elt (FILE *file, int value)
9261 {
9262 const char *directive = ASM_LONG;
9263
9264 #ifdef ASM_QUAD
9265 if (TARGET_64BIT)
9266 directive = ASM_QUAD;
9267 #else
9268 gcc_assert (!TARGET_64BIT);
9269 #endif
9270
9271 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9272 }
9273
9274 void
9275 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9276 {
9277 if (TARGET_64BIT)
9278 fprintf (file, "%s%s%d-%s%d\n",
9279 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9280 else if (HAVE_AS_GOTOFF_IN_DATA)
9281 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9282 #if TARGET_MACHO
9283 else if (TARGET_MACHO)
9284 {
9285 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9286 machopic_output_function_base_name (file);
9287 fprintf(file, "\n");
9288 }
9289 #endif
9290 else
9291 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9292 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9293 }
9294 \f
9295 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9296 for the target. */
9297
9298 void
9299 ix86_expand_clear (rtx dest)
9300 {
9301 rtx tmp;
9302
9303 /* We play register width games, which are only valid after reload. */
9304 gcc_assert (reload_completed);
9305
9306 /* Avoid HImode and its attendant prefix byte. */
9307 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9308 dest = gen_rtx_REG (SImode, REGNO (dest));
9309
9310 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9311
9312 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9313 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9314 {
9315 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9316 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9317 }
9318
9319 emit_insn (tmp);
9320 }
9321
9322 /* X is an unchanging MEM. If it is a constant pool reference, return
9323 the constant pool rtx, else NULL. */
9324
9325 rtx
9326 maybe_get_pool_constant (rtx x)
9327 {
9328 x = ix86_delegitimize_address (XEXP (x, 0));
9329
9330 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9331 return get_pool_constant (x);
9332
9333 return NULL_RTX;
9334 }
9335
9336 void
9337 ix86_expand_move (enum machine_mode mode, rtx operands[])
9338 {
9339 int strict = (reload_in_progress || reload_completed);
9340 rtx op0, op1;
9341 enum tls_model model;
9342
9343 op0 = operands[0];
9344 op1 = operands[1];
9345
9346 if (GET_CODE (op1) == SYMBOL_REF)
9347 {
9348 model = SYMBOL_REF_TLS_MODEL (op1);
9349 if (model)
9350 {
9351 op1 = legitimize_tls_address (op1, model, true);
9352 op1 = force_operand (op1, op0);
9353 if (op1 == op0)
9354 return;
9355 }
9356 }
9357 else if (GET_CODE (op1) == CONST
9358 && GET_CODE (XEXP (op1, 0)) == PLUS
9359 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9360 {
9361 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9362 if (model)
9363 {
9364 rtx addend = XEXP (XEXP (op1, 0), 1);
9365 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9366 op1 = force_operand (op1, NULL);
9367 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9368 op0, 1, OPTAB_DIRECT);
9369 if (op1 == op0)
9370 return;
9371 }
9372 }
9373
9374 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9375 {
9376 if (TARGET_MACHO && !TARGET_64BIT)
9377 {
9378 #if TARGET_MACHO
9379 if (MACHOPIC_PURE)
9380 {
9381 rtx temp = ((reload_in_progress
9382 || ((op0 && REG_P (op0))
9383 && mode == Pmode))
9384 ? op0 : gen_reg_rtx (Pmode));
9385 op1 = machopic_indirect_data_reference (op1, temp);
9386 op1 = machopic_legitimize_pic_address (op1, mode,
9387 temp == op1 ? 0 : temp);
9388 }
9389 else if (MACHOPIC_INDIRECT)
9390 op1 = machopic_indirect_data_reference (op1, 0);
9391 if (op0 == op1)
9392 return;
9393 #endif
9394 }
9395 else
9396 {
9397 if (MEM_P (op0))
9398 op1 = force_reg (Pmode, op1);
9399 else
9400 op1 = legitimize_address (op1, op1, Pmode);
9401 }
9402 }
9403 else
9404 {
9405 if (MEM_P (op0)
9406 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9407 || !push_operand (op0, mode))
9408 && MEM_P (op1))
9409 op1 = force_reg (mode, op1);
9410
9411 if (push_operand (op0, mode)
9412 && ! general_no_elim_operand (op1, mode))
9413 op1 = copy_to_mode_reg (mode, op1);
9414
9415 /* Force large constants in 64bit compilation into register
9416 to get them CSEed. */
9417 if (TARGET_64BIT && mode == DImode
9418 && immediate_operand (op1, mode)
9419 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9420 && !register_operand (op0, mode)
9421 && optimize && !reload_completed && !reload_in_progress)
9422 op1 = copy_to_mode_reg (mode, op1);
9423
9424 if (FLOAT_MODE_P (mode))
9425 {
9426 /* If we are loading a floating point constant to a register,
9427 force the value to memory now, since we'll get better code
9428 out the back end. */
9429
9430 if (strict)
9431 ;
9432 else if (GET_CODE (op1) == CONST_DOUBLE)
9433 {
9434 op1 = validize_mem (force_const_mem (mode, op1));
9435 if (!register_operand (op0, mode))
9436 {
9437 rtx temp = gen_reg_rtx (mode);
9438 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9439 emit_move_insn (op0, temp);
9440 return;
9441 }
9442 }
9443 }
9444 }
9445
9446 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9447 }
9448
9449 void
9450 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9451 {
9452 rtx op0 = operands[0], op1 = operands[1];
9453
9454 /* Force constants other than zero into memory. We do not know how
9455 the instructions used to build constants modify the upper 64 bits
9456 of the register, once we have that information we may be able
9457 to handle some of them more efficiently. */
9458 if ((reload_in_progress | reload_completed) == 0
9459 && register_operand (op0, mode)
9460 && CONSTANT_P (op1)
9461 && standard_sse_constant_p (op1) <= 0)
9462 op1 = validize_mem (force_const_mem (mode, op1));
9463
9464 /* Make operand1 a register if it isn't already. */
9465 if (!no_new_pseudos
9466 && !register_operand (op0, mode)
9467 && !register_operand (op1, mode))
9468 {
9469 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9470 return;
9471 }
9472
9473 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9474 }
9475
9476 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9477 straight to ix86_expand_vector_move. */
9478
9479 void
9480 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9481 {
9482 rtx op0, op1, m;
9483
9484 op0 = operands[0];
9485 op1 = operands[1];
9486
9487 if (MEM_P (op1))
9488 {
9489 /* If we're optimizing for size, movups is the smallest. */
9490 if (optimize_size)
9491 {
9492 op0 = gen_lowpart (V4SFmode, op0);
9493 op1 = gen_lowpart (V4SFmode, op1);
9494 emit_insn (gen_sse_movups (op0, op1));
9495 return;
9496 }
9497
9498 /* ??? If we have typed data, then it would appear that using
9499 movdqu is the only way to get unaligned data loaded with
9500 integer type. */
9501 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9502 {
9503 op0 = gen_lowpart (V16QImode, op0);
9504 op1 = gen_lowpart (V16QImode, op1);
9505 emit_insn (gen_sse2_movdqu (op0, op1));
9506 return;
9507 }
9508
9509 if (TARGET_SSE2 && mode == V2DFmode)
9510 {
9511 rtx zero;
9512
9513 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9514 {
9515 op0 = gen_lowpart (V2DFmode, op0);
9516 op1 = gen_lowpart (V2DFmode, op1);
9517 emit_insn (gen_sse2_movupd (op0, op1));
9518 return;
9519 }
9520
9521 /* When SSE registers are split into halves, we can avoid
9522 writing to the top half twice. */
9523 if (TARGET_SSE_SPLIT_REGS)
9524 {
9525 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9526 zero = op0;
9527 }
9528 else
9529 {
9530 /* ??? Not sure about the best option for the Intel chips.
9531 The following would seem to satisfy; the register is
9532 entirely cleared, breaking the dependency chain. We
9533 then store to the upper half, with a dependency depth
9534 of one. A rumor has it that Intel recommends two movsd
9535 followed by an unpacklpd, but this is unconfirmed. And
9536 given that the dependency depth of the unpacklpd would
9537 still be one, I'm not sure why this would be better. */
9538 zero = CONST0_RTX (V2DFmode);
9539 }
9540
9541 m = adjust_address (op1, DFmode, 0);
9542 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9543 m = adjust_address (op1, DFmode, 8);
9544 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9545 }
9546 else
9547 {
9548 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9549 {
9550 op0 = gen_lowpart (V4SFmode, op0);
9551 op1 = gen_lowpart (V4SFmode, op1);
9552 emit_insn (gen_sse_movups (op0, op1));
9553 return;
9554 }
9555
9556 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9557 emit_move_insn (op0, CONST0_RTX (mode));
9558 else
9559 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9560
9561 if (mode != V4SFmode)
9562 op0 = gen_lowpart (V4SFmode, op0);
9563 m = adjust_address (op1, V2SFmode, 0);
9564 emit_insn (gen_sse_loadlps (op0, op0, m));
9565 m = adjust_address (op1, V2SFmode, 8);
9566 emit_insn (gen_sse_loadhps (op0, op0, m));
9567 }
9568 }
9569 else if (MEM_P (op0))
9570 {
9571 /* If we're optimizing for size, movups is the smallest. */
9572 if (optimize_size)
9573 {
9574 op0 = gen_lowpart (V4SFmode, op0);
9575 op1 = gen_lowpart (V4SFmode, op1);
9576 emit_insn (gen_sse_movups (op0, op1));
9577 return;
9578 }
9579
9580 /* ??? Similar to above, only less clear because of quote
9581 typeless stores unquote. */
9582 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9583 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9584 {
9585 op0 = gen_lowpart (V16QImode, op0);
9586 op1 = gen_lowpart (V16QImode, op1);
9587 emit_insn (gen_sse2_movdqu (op0, op1));
9588 return;
9589 }
9590
9591 if (TARGET_SSE2 && mode == V2DFmode)
9592 {
9593 m = adjust_address (op0, DFmode, 0);
9594 emit_insn (gen_sse2_storelpd (m, op1));
9595 m = adjust_address (op0, DFmode, 8);
9596 emit_insn (gen_sse2_storehpd (m, op1));
9597 }
9598 else
9599 {
9600 if (mode != V4SFmode)
9601 op1 = gen_lowpart (V4SFmode, op1);
9602 m = adjust_address (op0, V2SFmode, 0);
9603 emit_insn (gen_sse_storelps (m, op1));
9604 m = adjust_address (op0, V2SFmode, 8);
9605 emit_insn (gen_sse_storehps (m, op1));
9606 }
9607 }
9608 else
9609 gcc_unreachable ();
9610 }
9611
9612 /* Expand a push in MODE. This is some mode for which we do not support
9613 proper push instructions, at least from the registers that we expect
9614 the value to live in. */
9615
9616 void
9617 ix86_expand_push (enum machine_mode mode, rtx x)
9618 {
9619 rtx tmp;
9620
9621 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9622 GEN_INT (-GET_MODE_SIZE (mode)),
9623 stack_pointer_rtx, 1, OPTAB_DIRECT);
9624 if (tmp != stack_pointer_rtx)
9625 emit_move_insn (stack_pointer_rtx, tmp);
9626
9627 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9628 emit_move_insn (tmp, x);
9629 }
9630
9631 /* Helper function of ix86_fixup_binary_operands to canonicalize
9632 operand order. Returns true if the operands should be swapped. */
9633
9634 static bool
9635 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9636 rtx operands[])
9637 {
9638 rtx dst = operands[0];
9639 rtx src1 = operands[1];
9640 rtx src2 = operands[2];
9641
9642 /* If the operation is not commutative, we can't do anything. */
9643 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9644 return false;
9645
9646 /* Highest priority is that src1 should match dst. */
9647 if (rtx_equal_p (dst, src1))
9648 return false;
9649 if (rtx_equal_p (dst, src2))
9650 return true;
9651
9652 /* Next highest priority is that immediate constants come second. */
9653 if (immediate_operand (src2, mode))
9654 return false;
9655 if (immediate_operand (src1, mode))
9656 return true;
9657
9658 /* Lowest priority is that memory references should come second. */
9659 if (MEM_P (src2))
9660 return false;
9661 if (MEM_P (src1))
9662 return true;
9663
9664 return false;
9665 }
9666
9667
9668 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9669 destination to use for the operation. If different from the true
9670 destination in operands[0], a copy operation will be required. */
9671
9672 rtx
9673 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9674 rtx operands[])
9675 {
9676 rtx dst = operands[0];
9677 rtx src1 = operands[1];
9678 rtx src2 = operands[2];
9679
9680 /* Canonicalize operand order. */
9681 if (ix86_swap_binary_operands_p (code, mode, operands))
9682 {
9683 rtx temp = src1;
9684 src1 = src2;
9685 src2 = temp;
9686 }
9687
9688 /* Both source operands cannot be in memory. */
9689 if (MEM_P (src1) && MEM_P (src2))
9690 {
9691 /* Optimization: Only read from memory once. */
9692 if (rtx_equal_p (src1, src2))
9693 {
9694 src2 = force_reg (mode, src2);
9695 src1 = src2;
9696 }
9697 else
9698 src2 = force_reg (mode, src2);
9699 }
9700
9701 /* If the destination is memory, and we do not have matching source
9702 operands, do things in registers. */
9703 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9704 dst = gen_reg_rtx (mode);
9705
9706 /* Source 1 cannot be a constant. */
9707 if (CONSTANT_P (src1))
9708 src1 = force_reg (mode, src1);
9709
9710 /* Source 1 cannot be a non-matching memory. */
9711 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9712 src1 = force_reg (mode, src1);
9713
9714 operands[1] = src1;
9715 operands[2] = src2;
9716 return dst;
9717 }
9718
9719 /* Similarly, but assume that the destination has already been
9720 set up properly. */
9721
9722 void
9723 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9724 enum machine_mode mode, rtx operands[])
9725 {
9726 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9727 gcc_assert (dst == operands[0]);
9728 }
9729
9730 /* Attempt to expand a binary operator. Make the expansion closer to the
9731 actual machine, then just general_operand, which will allow 3 separate
9732 memory references (one output, two input) in a single insn. */
9733
9734 void
9735 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9736 rtx operands[])
9737 {
9738 rtx src1, src2, dst, op, clob;
9739
9740 dst = ix86_fixup_binary_operands (code, mode, operands);
9741 src1 = operands[1];
9742 src2 = operands[2];
9743
9744 /* Emit the instruction. */
9745
9746 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9747 if (reload_in_progress)
9748 {
9749 /* Reload doesn't know about the flags register, and doesn't know that
9750 it doesn't want to clobber it. We can only do this with PLUS. */
9751 gcc_assert (code == PLUS);
9752 emit_insn (op);
9753 }
9754 else
9755 {
9756 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9757 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9758 }
9759
9760 /* Fix up the destination if needed. */
9761 if (dst != operands[0])
9762 emit_move_insn (operands[0], dst);
9763 }
9764
9765 /* Return TRUE or FALSE depending on whether the binary operator meets the
9766 appropriate constraints. */
9767
9768 int
9769 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9770 rtx operands[3])
9771 {
9772 rtx dst = operands[0];
9773 rtx src1 = operands[1];
9774 rtx src2 = operands[2];
9775
9776 /* Both source operands cannot be in memory. */
9777 if (MEM_P (src1) && MEM_P (src2))
9778 return 0;
9779
9780 /* Canonicalize operand order for commutative operators. */
9781 if (ix86_swap_binary_operands_p (code, mode, operands))
9782 {
9783 rtx temp = src1;
9784 src1 = src2;
9785 src2 = temp;
9786 }
9787
9788 /* If the destination is memory, we must have a matching source operand. */
9789 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9790 return 0;
9791
9792 /* Source 1 cannot be a constant. */
9793 if (CONSTANT_P (src1))
9794 return 0;
9795
9796 /* Source 1 cannot be a non-matching memory. */
9797 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9798 return 0;
9799
9800 return 1;
9801 }
9802
9803 /* Attempt to expand a unary operator. Make the expansion closer to the
9804 actual machine, then just general_operand, which will allow 2 separate
9805 memory references (one output, one input) in a single insn. */
9806
9807 void
9808 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9809 rtx operands[])
9810 {
9811 int matching_memory;
9812 rtx src, dst, op, clob;
9813
9814 dst = operands[0];
9815 src = operands[1];
9816
9817 /* If the destination is memory, and we do not have matching source
9818 operands, do things in registers. */
9819 matching_memory = 0;
9820 if (MEM_P (dst))
9821 {
9822 if (rtx_equal_p (dst, src))
9823 matching_memory = 1;
9824 else
9825 dst = gen_reg_rtx (mode);
9826 }
9827
9828 /* When source operand is memory, destination must match. */
9829 if (MEM_P (src) && !matching_memory)
9830 src = force_reg (mode, src);
9831
9832 /* Emit the instruction. */
9833
9834 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9835 if (reload_in_progress || code == NOT)
9836 {
9837 /* Reload doesn't know about the flags register, and doesn't know that
9838 it doesn't want to clobber it. */
9839 gcc_assert (code == NOT);
9840 emit_insn (op);
9841 }
9842 else
9843 {
9844 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9845 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9846 }
9847
9848 /* Fix up the destination if needed. */
9849 if (dst != operands[0])
9850 emit_move_insn (operands[0], dst);
9851 }
9852
9853 /* Return TRUE or FALSE depending on whether the unary operator meets the
9854 appropriate constraints. */
9855
9856 int
9857 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9858 enum machine_mode mode ATTRIBUTE_UNUSED,
9859 rtx operands[2] ATTRIBUTE_UNUSED)
9860 {
9861 /* If one of operands is memory, source and destination must match. */
9862 if ((MEM_P (operands[0])
9863 || MEM_P (operands[1]))
9864 && ! rtx_equal_p (operands[0], operands[1]))
9865 return FALSE;
9866 return TRUE;
9867 }
9868
9869 /* Post-reload splitter for converting an SF or DFmode value in an
9870 SSE register into an unsigned SImode. */
9871
9872 void
9873 ix86_split_convert_uns_si_sse (rtx operands[])
9874 {
9875 enum machine_mode vecmode;
9876 rtx value, large, zero_or_two31, input, two31, x;
9877
9878 large = operands[1];
9879 zero_or_two31 = operands[2];
9880 input = operands[3];
9881 two31 = operands[4];
9882 vecmode = GET_MODE (large);
9883 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
9884
9885 /* Load up the value into the low element. We must ensure that the other
9886 elements are valid floats -- zero is the easiest such value. */
9887 if (MEM_P (input))
9888 {
9889 if (vecmode == V4SFmode)
9890 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
9891 else
9892 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
9893 }
9894 else
9895 {
9896 input = gen_rtx_REG (vecmode, REGNO (input));
9897 emit_move_insn (value, CONST0_RTX (vecmode));
9898 if (vecmode == V4SFmode)
9899 emit_insn (gen_sse_movss (value, value, input));
9900 else
9901 emit_insn (gen_sse2_movsd (value, value, input));
9902 }
9903
9904 emit_move_insn (large, two31);
9905 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
9906
9907 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
9908 emit_insn (gen_rtx_SET (VOIDmode, large, x));
9909
9910 x = gen_rtx_AND (vecmode, zero_or_two31, large);
9911 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
9912
9913 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
9914 emit_insn (gen_rtx_SET (VOIDmode, value, x));
9915
9916 large = gen_rtx_REG (V4SImode, REGNO (large));
9917 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
9918
9919 x = gen_rtx_REG (V4SImode, REGNO (value));
9920 if (vecmode == V4SFmode)
9921 emit_insn (gen_sse2_cvttps2dq (x, value));
9922 else
9923 emit_insn (gen_sse2_cvttpd2dq (x, value));
9924 value = x;
9925
9926 emit_insn (gen_xorv4si3 (value, value, large));
9927 }
9928
9929 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9930 Expects the 64-bit DImode to be supplied in a pair of integral
9931 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9932 -mfpmath=sse, !optimize_size only. */
9933
9934 void
9935 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
9936 {
9937 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
9938 rtx int_xmm, fp_xmm;
9939 rtx biases, exponents;
9940 rtx x;
9941
9942 int_xmm = gen_reg_rtx (V4SImode);
9943 if (TARGET_INTER_UNIT_MOVES)
9944 emit_insn (gen_movdi_to_sse (int_xmm, input));
9945 else if (TARGET_SSE_SPLIT_REGS)
9946 {
9947 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
9948 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
9949 }
9950 else
9951 {
9952 x = gen_reg_rtx (V2DImode);
9953 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
9954 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
9955 }
9956
9957 x = gen_rtx_CONST_VECTOR (V4SImode,
9958 gen_rtvec (4, GEN_INT (0x43300000UL),
9959 GEN_INT (0x45300000UL),
9960 const0_rtx, const0_rtx));
9961 exponents = validize_mem (force_const_mem (V4SImode, x));
9962
9963 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9964 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
9965
9966 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9967 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9968 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9969 (0x1.0p84 + double(fp_value_hi_xmm)).
9970 Note these exponents differ by 32. */
9971
9972 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
9973
9974 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9975 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9976 real_ldexp (&bias_lo_rvt, &dconst1, 52);
9977 real_ldexp (&bias_hi_rvt, &dconst1, 84);
9978 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
9979 x = const_double_from_real_value (bias_hi_rvt, DFmode);
9980 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
9981 biases = validize_mem (force_const_mem (V2DFmode, biases));
9982 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
9983
9984 /* Add the upper and lower DFmode values together. */
9985 if (TARGET_SSE3)
9986 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
9987 else
9988 {
9989 x = copy_to_mode_reg (V2DFmode, fp_xmm);
9990 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
9991 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
9992 }
9993
9994 ix86_expand_vector_extract (false, target, fp_xmm, 0);
9995 }
9996
9997 /* Convert an unsigned SImode value into a DFmode. Only currently used
9998 for SSE, but applicable anywhere. */
9999
10000 void
10001 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10002 {
10003 REAL_VALUE_TYPE TWO31r;
10004 rtx x, fp;
10005
10006 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10007 NULL, 1, OPTAB_DIRECT);
10008
10009 fp = gen_reg_rtx (DFmode);
10010 emit_insn (gen_floatsidf2 (fp, x));
10011
10012 real_ldexp (&TWO31r, &dconst1, 31);
10013 x = const_double_from_real_value (TWO31r, DFmode);
10014
10015 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10016 if (x != target)
10017 emit_move_insn (target, x);
10018 }
10019
10020 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10021 32-bit mode; otherwise we have a direct convert instruction. */
10022
10023 void
10024 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10025 {
10026 REAL_VALUE_TYPE TWO32r;
10027 rtx fp_lo, fp_hi, x;
10028
10029 fp_lo = gen_reg_rtx (DFmode);
10030 fp_hi = gen_reg_rtx (DFmode);
10031
10032 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10033
10034 real_ldexp (&TWO32r, &dconst1, 32);
10035 x = const_double_from_real_value (TWO32r, DFmode);
10036 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10037
10038 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10039
10040 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10041 0, OPTAB_DIRECT);
10042 if (x != target)
10043 emit_move_insn (target, x);
10044 }
10045
10046 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10047 For x86_32, -mfpmath=sse, !optimize_size only. */
10048 void
10049 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10050 {
10051 REAL_VALUE_TYPE ONE16r;
10052 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10053
10054 real_ldexp (&ONE16r, &dconst1, 16);
10055 x = const_double_from_real_value (ONE16r, SFmode);
10056 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10057 NULL, 0, OPTAB_DIRECT);
10058 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10059 NULL, 0, OPTAB_DIRECT);
10060 fp_hi = gen_reg_rtx (SFmode);
10061 fp_lo = gen_reg_rtx (SFmode);
10062 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10063 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10064 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10065 0, OPTAB_DIRECT);
10066 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10067 0, OPTAB_DIRECT);
10068 if (!rtx_equal_p (target, fp_hi))
10069 emit_move_insn (target, fp_hi);
10070 }
10071
10072 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10073 then replicate the value for all elements of the vector
10074 register. */
10075
10076 rtx
10077 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10078 {
10079 rtvec v;
10080 switch (mode)
10081 {
10082 case SFmode:
10083 if (vect)
10084 v = gen_rtvec (4, value, value, value, value);
10085 else
10086 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10087 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10088 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10089
10090 case DFmode:
10091 if (vect)
10092 v = gen_rtvec (2, value, value);
10093 else
10094 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10095 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10096
10097 default:
10098 gcc_unreachable ();
10099 }
10100 }
10101
10102 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10103 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10104 true, then replicate the mask for all elements of the vector register.
10105 If INVERT is true, then create a mask excluding the sign bit. */
10106
10107 rtx
10108 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10109 {
10110 enum machine_mode vec_mode;
10111 HOST_WIDE_INT hi, lo;
10112 int shift = 63;
10113 rtx v;
10114 rtx mask;
10115
10116 /* Find the sign bit, sign extended to 2*HWI. */
10117 if (mode == SFmode)
10118 lo = 0x80000000, hi = lo < 0;
10119 else if (HOST_BITS_PER_WIDE_INT >= 64)
10120 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10121 else
10122 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10123
10124 if (invert)
10125 lo = ~lo, hi = ~hi;
10126
10127 /* Force this value into the low part of a fp vector constant. */
10128 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10129 mask = gen_lowpart (mode, mask);
10130
10131 v = ix86_build_const_vector (mode, vect, mask);
10132 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10133 return force_reg (vec_mode, v);
10134 }
10135
10136 /* Generate code for floating point ABS or NEG. */
10137
10138 void
10139 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10140 rtx operands[])
10141 {
10142 rtx mask, set, use, clob, dst, src;
10143 bool matching_memory;
10144 bool use_sse = false;
10145 bool vector_mode = VECTOR_MODE_P (mode);
10146 enum machine_mode elt_mode = mode;
10147
10148 if (vector_mode)
10149 {
10150 elt_mode = GET_MODE_INNER (mode);
10151 use_sse = true;
10152 }
10153 else if (TARGET_SSE_MATH)
10154 use_sse = SSE_FLOAT_MODE_P (mode);
10155
10156 /* NEG and ABS performed with SSE use bitwise mask operations.
10157 Create the appropriate mask now. */
10158 if (use_sse)
10159 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10160 else
10161 mask = NULL_RTX;
10162
10163 dst = operands[0];
10164 src = operands[1];
10165
10166 /* If the destination is memory, and we don't have matching source
10167 operands or we're using the x87, do things in registers. */
10168 matching_memory = false;
10169 if (MEM_P (dst))
10170 {
10171 if (use_sse && rtx_equal_p (dst, src))
10172 matching_memory = true;
10173 else
10174 dst = gen_reg_rtx (mode);
10175 }
10176 if (MEM_P (src) && !matching_memory)
10177 src = force_reg (mode, src);
10178
10179 if (vector_mode)
10180 {
10181 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10182 set = gen_rtx_SET (VOIDmode, dst, set);
10183 emit_insn (set);
10184 }
10185 else
10186 {
10187 set = gen_rtx_fmt_e (code, mode, src);
10188 set = gen_rtx_SET (VOIDmode, dst, set);
10189 if (mask)
10190 {
10191 use = gen_rtx_USE (VOIDmode, mask);
10192 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10193 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10194 gen_rtvec (3, set, use, clob)));
10195 }
10196 else
10197 emit_insn (set);
10198 }
10199
10200 if (dst != operands[0])
10201 emit_move_insn (operands[0], dst);
10202 }
10203
10204 /* Expand a copysign operation. Special case operand 0 being a constant. */
10205
10206 void
10207 ix86_expand_copysign (rtx operands[])
10208 {
10209 enum machine_mode mode, vmode;
10210 rtx dest, op0, op1, mask, nmask;
10211
10212 dest = operands[0];
10213 op0 = operands[1];
10214 op1 = operands[2];
10215
10216 mode = GET_MODE (dest);
10217 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10218
10219 if (GET_CODE (op0) == CONST_DOUBLE)
10220 {
10221 rtvec v;
10222
10223 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10224 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10225
10226 if (op0 == CONST0_RTX (mode))
10227 op0 = CONST0_RTX (vmode);
10228 else
10229 {
10230 if (mode == SFmode)
10231 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10232 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10233 else
10234 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10235 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10236 }
10237
10238 mask = ix86_build_signbit_mask (mode, 0, 0);
10239
10240 if (mode == SFmode)
10241 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10242 else
10243 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10244 }
10245 else
10246 {
10247 nmask = ix86_build_signbit_mask (mode, 0, 1);
10248 mask = ix86_build_signbit_mask (mode, 0, 0);
10249
10250 if (mode == SFmode)
10251 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10252 else
10253 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10254 }
10255 }
10256
10257 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10258 be a constant, and so has already been expanded into a vector constant. */
10259
10260 void
10261 ix86_split_copysign_const (rtx operands[])
10262 {
10263 enum machine_mode mode, vmode;
10264 rtx dest, op0, op1, mask, x;
10265
10266 dest = operands[0];
10267 op0 = operands[1];
10268 op1 = operands[2];
10269 mask = operands[3];
10270
10271 mode = GET_MODE (dest);
10272 vmode = GET_MODE (mask);
10273
10274 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10275 x = gen_rtx_AND (vmode, dest, mask);
10276 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10277
10278 if (op0 != CONST0_RTX (vmode))
10279 {
10280 x = gen_rtx_IOR (vmode, dest, op0);
10281 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10282 }
10283 }
10284
10285 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10286 so we have to do two masks. */
10287
10288 void
10289 ix86_split_copysign_var (rtx operands[])
10290 {
10291 enum machine_mode mode, vmode;
10292 rtx dest, scratch, op0, op1, mask, nmask, x;
10293
10294 dest = operands[0];
10295 scratch = operands[1];
10296 op0 = operands[2];
10297 op1 = operands[3];
10298 nmask = operands[4];
10299 mask = operands[5];
10300
10301 mode = GET_MODE (dest);
10302 vmode = GET_MODE (mask);
10303
10304 if (rtx_equal_p (op0, op1))
10305 {
10306 /* Shouldn't happen often (it's useless, obviously), but when it does
10307 we'd generate incorrect code if we continue below. */
10308 emit_move_insn (dest, op0);
10309 return;
10310 }
10311
10312 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10313 {
10314 gcc_assert (REGNO (op1) == REGNO (scratch));
10315
10316 x = gen_rtx_AND (vmode, scratch, mask);
10317 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10318
10319 dest = mask;
10320 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10321 x = gen_rtx_NOT (vmode, dest);
10322 x = gen_rtx_AND (vmode, x, op0);
10323 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10324 }
10325 else
10326 {
10327 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10328 {
10329 x = gen_rtx_AND (vmode, scratch, mask);
10330 }
10331 else /* alternative 2,4 */
10332 {
10333 gcc_assert (REGNO (mask) == REGNO (scratch));
10334 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10335 x = gen_rtx_AND (vmode, scratch, op1);
10336 }
10337 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10338
10339 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10340 {
10341 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10342 x = gen_rtx_AND (vmode, dest, nmask);
10343 }
10344 else /* alternative 3,4 */
10345 {
10346 gcc_assert (REGNO (nmask) == REGNO (dest));
10347 dest = nmask;
10348 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10349 x = gen_rtx_AND (vmode, dest, op0);
10350 }
10351 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10352 }
10353
10354 x = gen_rtx_IOR (vmode, dest, scratch);
10355 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10356 }
10357
10358 /* Return TRUE or FALSE depending on whether the first SET in INSN
10359 has source and destination with matching CC modes, and that the
10360 CC mode is at least as constrained as REQ_MODE. */
10361
10362 int
10363 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10364 {
10365 rtx set;
10366 enum machine_mode set_mode;
10367
10368 set = PATTERN (insn);
10369 if (GET_CODE (set) == PARALLEL)
10370 set = XVECEXP (set, 0, 0);
10371 gcc_assert (GET_CODE (set) == SET);
10372 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10373
10374 set_mode = GET_MODE (SET_DEST (set));
10375 switch (set_mode)
10376 {
10377 case CCNOmode:
10378 if (req_mode != CCNOmode
10379 && (req_mode != CCmode
10380 || XEXP (SET_SRC (set), 1) != const0_rtx))
10381 return 0;
10382 break;
10383 case CCmode:
10384 if (req_mode == CCGCmode)
10385 return 0;
10386 /* FALLTHRU */
10387 case CCGCmode:
10388 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10389 return 0;
10390 /* FALLTHRU */
10391 case CCGOCmode:
10392 if (req_mode == CCZmode)
10393 return 0;
10394 /* FALLTHRU */
10395 case CCZmode:
10396 break;
10397
10398 default:
10399 gcc_unreachable ();
10400 }
10401
10402 return (GET_MODE (SET_SRC (set)) == set_mode);
10403 }
10404
10405 /* Generate insn patterns to do an integer compare of OPERANDS. */
10406
10407 static rtx
10408 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10409 {
10410 enum machine_mode cmpmode;
10411 rtx tmp, flags;
10412
10413 cmpmode = SELECT_CC_MODE (code, op0, op1);
10414 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10415
10416 /* This is very simple, but making the interface the same as in the
10417 FP case makes the rest of the code easier. */
10418 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10419 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10420
10421 /* Return the test that should be put into the flags user, i.e.
10422 the bcc, scc, or cmov instruction. */
10423 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10424 }
10425
10426 /* Figure out whether to use ordered or unordered fp comparisons.
10427 Return the appropriate mode to use. */
10428
10429 enum machine_mode
10430 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10431 {
10432 /* ??? In order to make all comparisons reversible, we do all comparisons
10433 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10434 all forms trapping and nontrapping comparisons, we can make inequality
10435 comparisons trapping again, since it results in better code when using
10436 FCOM based compares. */
10437 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10438 }
10439
10440 enum machine_mode
10441 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10442 {
10443 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10444 return ix86_fp_compare_mode (code);
10445 switch (code)
10446 {
10447 /* Only zero flag is needed. */
10448 case EQ: /* ZF=0 */
10449 case NE: /* ZF!=0 */
10450 return CCZmode;
10451 /* Codes needing carry flag. */
10452 case GEU: /* CF=0 */
10453 case GTU: /* CF=0 & ZF=0 */
10454 case LTU: /* CF=1 */
10455 case LEU: /* CF=1 | ZF=1 */
10456 return CCmode;
10457 /* Codes possibly doable only with sign flag when
10458 comparing against zero. */
10459 case GE: /* SF=OF or SF=0 */
10460 case LT: /* SF<>OF or SF=1 */
10461 if (op1 == const0_rtx)
10462 return CCGOCmode;
10463 else
10464 /* For other cases Carry flag is not required. */
10465 return CCGCmode;
10466 /* Codes doable only with sign flag when comparing
10467 against zero, but we miss jump instruction for it
10468 so we need to use relational tests against overflow
10469 that thus needs to be zero. */
10470 case GT: /* ZF=0 & SF=OF */
10471 case LE: /* ZF=1 | SF<>OF */
10472 if (op1 == const0_rtx)
10473 return CCNOmode;
10474 else
10475 return CCGCmode;
10476 /* strcmp pattern do (use flags) and combine may ask us for proper
10477 mode. */
10478 case USE:
10479 return CCmode;
10480 default:
10481 gcc_unreachable ();
10482 }
10483 }
10484
10485 /* Return the fixed registers used for condition codes. */
10486
10487 static bool
10488 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10489 {
10490 *p1 = FLAGS_REG;
10491 *p2 = FPSR_REG;
10492 return true;
10493 }
10494
10495 /* If two condition code modes are compatible, return a condition code
10496 mode which is compatible with both. Otherwise, return
10497 VOIDmode. */
10498
10499 static enum machine_mode
10500 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10501 {
10502 if (m1 == m2)
10503 return m1;
10504
10505 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10506 return VOIDmode;
10507
10508 if ((m1 == CCGCmode && m2 == CCGOCmode)
10509 || (m1 == CCGOCmode && m2 == CCGCmode))
10510 return CCGCmode;
10511
10512 switch (m1)
10513 {
10514 default:
10515 gcc_unreachable ();
10516
10517 case CCmode:
10518 case CCGCmode:
10519 case CCGOCmode:
10520 case CCNOmode:
10521 case CCZmode:
10522 switch (m2)
10523 {
10524 default:
10525 return VOIDmode;
10526
10527 case CCmode:
10528 case CCGCmode:
10529 case CCGOCmode:
10530 case CCNOmode:
10531 case CCZmode:
10532 return CCmode;
10533 }
10534
10535 case CCFPmode:
10536 case CCFPUmode:
10537 /* These are only compatible with themselves, which we already
10538 checked above. */
10539 return VOIDmode;
10540 }
10541 }
10542
10543 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10544
10545 int
10546 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10547 {
10548 enum rtx_code swapped_code = swap_condition (code);
10549 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10550 || (ix86_fp_comparison_cost (swapped_code)
10551 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10552 }
10553
10554 /* Swap, force into registers, or otherwise massage the two operands
10555 to a fp comparison. The operands are updated in place; the new
10556 comparison code is returned. */
10557
10558 static enum rtx_code
10559 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10560 {
10561 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10562 rtx op0 = *pop0, op1 = *pop1;
10563 enum machine_mode op_mode = GET_MODE (op0);
10564 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10565
10566 /* All of the unordered compare instructions only work on registers.
10567 The same is true of the fcomi compare instructions. The XFmode
10568 compare instructions require registers except when comparing
10569 against zero or when converting operand 1 from fixed point to
10570 floating point. */
10571
10572 if (!is_sse
10573 && (fpcmp_mode == CCFPUmode
10574 || (op_mode == XFmode
10575 && ! (standard_80387_constant_p (op0) == 1
10576 || standard_80387_constant_p (op1) == 1)
10577 && GET_CODE (op1) != FLOAT)
10578 || ix86_use_fcomi_compare (code)))
10579 {
10580 op0 = force_reg (op_mode, op0);
10581 op1 = force_reg (op_mode, op1);
10582 }
10583 else
10584 {
10585 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10586 things around if they appear profitable, otherwise force op0
10587 into a register. */
10588
10589 if (standard_80387_constant_p (op0) == 0
10590 || (MEM_P (op0)
10591 && ! (standard_80387_constant_p (op1) == 0
10592 || MEM_P (op1))))
10593 {
10594 rtx tmp;
10595 tmp = op0, op0 = op1, op1 = tmp;
10596 code = swap_condition (code);
10597 }
10598
10599 if (!REG_P (op0))
10600 op0 = force_reg (op_mode, op0);
10601
10602 if (CONSTANT_P (op1))
10603 {
10604 int tmp = standard_80387_constant_p (op1);
10605 if (tmp == 0)
10606 op1 = validize_mem (force_const_mem (op_mode, op1));
10607 else if (tmp == 1)
10608 {
10609 if (TARGET_CMOVE)
10610 op1 = force_reg (op_mode, op1);
10611 }
10612 else
10613 op1 = force_reg (op_mode, op1);
10614 }
10615 }
10616
10617 /* Try to rearrange the comparison to make it cheaper. */
10618 if (ix86_fp_comparison_cost (code)
10619 > ix86_fp_comparison_cost (swap_condition (code))
10620 && (REG_P (op1) || !no_new_pseudos))
10621 {
10622 rtx tmp;
10623 tmp = op0, op0 = op1, op1 = tmp;
10624 code = swap_condition (code);
10625 if (!REG_P (op0))
10626 op0 = force_reg (op_mode, op0);
10627 }
10628
10629 *pop0 = op0;
10630 *pop1 = op1;
10631 return code;
10632 }
10633
10634 /* Convert comparison codes we use to represent FP comparison to integer
10635 code that will result in proper branch. Return UNKNOWN if no such code
10636 is available. */
10637
10638 enum rtx_code
10639 ix86_fp_compare_code_to_integer (enum rtx_code code)
10640 {
10641 switch (code)
10642 {
10643 case GT:
10644 return GTU;
10645 case GE:
10646 return GEU;
10647 case ORDERED:
10648 case UNORDERED:
10649 return code;
10650 break;
10651 case UNEQ:
10652 return EQ;
10653 break;
10654 case UNLT:
10655 return LTU;
10656 break;
10657 case UNLE:
10658 return LEU;
10659 break;
10660 case LTGT:
10661 return NE;
10662 break;
10663 default:
10664 return UNKNOWN;
10665 }
10666 }
10667
10668 /* Split comparison code CODE into comparisons we can do using branch
10669 instructions. BYPASS_CODE is comparison code for branch that will
10670 branch around FIRST_CODE and SECOND_CODE. If some of branches
10671 is not required, set value to UNKNOWN.
10672 We never require more than two branches. */
10673
10674 void
10675 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10676 enum rtx_code *first_code,
10677 enum rtx_code *second_code)
10678 {
10679 *first_code = code;
10680 *bypass_code = UNKNOWN;
10681 *second_code = UNKNOWN;
10682
10683 /* The fcomi comparison sets flags as follows:
10684
10685 cmp ZF PF CF
10686 > 0 0 0
10687 < 0 0 1
10688 = 1 0 0
10689 un 1 1 1 */
10690
10691 switch (code)
10692 {
10693 case GT: /* GTU - CF=0 & ZF=0 */
10694 case GE: /* GEU - CF=0 */
10695 case ORDERED: /* PF=0 */
10696 case UNORDERED: /* PF=1 */
10697 case UNEQ: /* EQ - ZF=1 */
10698 case UNLT: /* LTU - CF=1 */
10699 case UNLE: /* LEU - CF=1 | ZF=1 */
10700 case LTGT: /* EQ - ZF=0 */
10701 break;
10702 case LT: /* LTU - CF=1 - fails on unordered */
10703 *first_code = UNLT;
10704 *bypass_code = UNORDERED;
10705 break;
10706 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10707 *first_code = UNLE;
10708 *bypass_code = UNORDERED;
10709 break;
10710 case EQ: /* EQ - ZF=1 - fails on unordered */
10711 *first_code = UNEQ;
10712 *bypass_code = UNORDERED;
10713 break;
10714 case NE: /* NE - ZF=0 - fails on unordered */
10715 *first_code = LTGT;
10716 *second_code = UNORDERED;
10717 break;
10718 case UNGE: /* GEU - CF=0 - fails on unordered */
10719 *first_code = GE;
10720 *second_code = UNORDERED;
10721 break;
10722 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10723 *first_code = GT;
10724 *second_code = UNORDERED;
10725 break;
10726 default:
10727 gcc_unreachable ();
10728 }
10729 if (!TARGET_IEEE_FP)
10730 {
10731 *second_code = UNKNOWN;
10732 *bypass_code = UNKNOWN;
10733 }
10734 }
10735
10736 /* Return cost of comparison done fcom + arithmetics operations on AX.
10737 All following functions do use number of instructions as a cost metrics.
10738 In future this should be tweaked to compute bytes for optimize_size and
10739 take into account performance of various instructions on various CPUs. */
10740 static int
10741 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10742 {
10743 if (!TARGET_IEEE_FP)
10744 return 4;
10745 /* The cost of code output by ix86_expand_fp_compare. */
10746 switch (code)
10747 {
10748 case UNLE:
10749 case UNLT:
10750 case LTGT:
10751 case GT:
10752 case GE:
10753 case UNORDERED:
10754 case ORDERED:
10755 case UNEQ:
10756 return 4;
10757 break;
10758 case LT:
10759 case NE:
10760 case EQ:
10761 case UNGE:
10762 return 5;
10763 break;
10764 case LE:
10765 case UNGT:
10766 return 6;
10767 break;
10768 default:
10769 gcc_unreachable ();
10770 }
10771 }
10772
10773 /* Return cost of comparison done using fcomi operation.
10774 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10775 static int
10776 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10777 {
10778 enum rtx_code bypass_code, first_code, second_code;
10779 /* Return arbitrarily high cost when instruction is not supported - this
10780 prevents gcc from using it. */
10781 if (!TARGET_CMOVE)
10782 return 1024;
10783 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10784 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10785 }
10786
10787 /* Return cost of comparison done using sahf operation.
10788 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10789 static int
10790 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10791 {
10792 enum rtx_code bypass_code, first_code, second_code;
10793 /* Return arbitrarily high cost when instruction is not preferred - this
10794 avoids gcc from using it. */
10795 if (!TARGET_USE_SAHF && !optimize_size)
10796 return 1024;
10797 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10798 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10799 }
10800
10801 /* Compute cost of the comparison done using any method.
10802 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10803 static int
10804 ix86_fp_comparison_cost (enum rtx_code code)
10805 {
10806 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10807 int min;
10808
10809 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10810 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10811
10812 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10813 if (min > sahf_cost)
10814 min = sahf_cost;
10815 if (min > fcomi_cost)
10816 min = fcomi_cost;
10817 return min;
10818 }
10819
10820 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10821
10822 static rtx
10823 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10824 rtx *second_test, rtx *bypass_test)
10825 {
10826 enum machine_mode fpcmp_mode, intcmp_mode;
10827 rtx tmp, tmp2;
10828 int cost = ix86_fp_comparison_cost (code);
10829 enum rtx_code bypass_code, first_code, second_code;
10830
10831 fpcmp_mode = ix86_fp_compare_mode (code);
10832 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10833
10834 if (second_test)
10835 *second_test = NULL_RTX;
10836 if (bypass_test)
10837 *bypass_test = NULL_RTX;
10838
10839 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10840
10841 /* Do fcomi/sahf based test when profitable. */
10842 if ((bypass_code == UNKNOWN || bypass_test)
10843 && (second_code == UNKNOWN || second_test)
10844 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10845 {
10846 if (TARGET_CMOVE)
10847 {
10848 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10849 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10850 tmp);
10851 emit_insn (tmp);
10852 }
10853 else
10854 {
10855 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10856 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10857 if (!scratch)
10858 scratch = gen_reg_rtx (HImode);
10859 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10860 emit_insn (gen_x86_sahf_1 (scratch));
10861 }
10862
10863 /* The FP codes work out to act like unsigned. */
10864 intcmp_mode = fpcmp_mode;
10865 code = first_code;
10866 if (bypass_code != UNKNOWN)
10867 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10868 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10869 const0_rtx);
10870 if (second_code != UNKNOWN)
10871 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10872 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10873 const0_rtx);
10874 }
10875 else
10876 {
10877 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10878 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10879 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10880 if (!scratch)
10881 scratch = gen_reg_rtx (HImode);
10882 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10883
10884 /* In the unordered case, we have to check C2 for NaN's, which
10885 doesn't happen to work out to anything nice combination-wise.
10886 So do some bit twiddling on the value we've got in AH to come
10887 up with an appropriate set of condition codes. */
10888
10889 intcmp_mode = CCNOmode;
10890 switch (code)
10891 {
10892 case GT:
10893 case UNGT:
10894 if (code == GT || !TARGET_IEEE_FP)
10895 {
10896 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10897 code = EQ;
10898 }
10899 else
10900 {
10901 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10902 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10903 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10904 intcmp_mode = CCmode;
10905 code = GEU;
10906 }
10907 break;
10908 case LT:
10909 case UNLT:
10910 if (code == LT && TARGET_IEEE_FP)
10911 {
10912 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10913 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10914 intcmp_mode = CCmode;
10915 code = EQ;
10916 }
10917 else
10918 {
10919 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10920 code = NE;
10921 }
10922 break;
10923 case GE:
10924 case UNGE:
10925 if (code == GE || !TARGET_IEEE_FP)
10926 {
10927 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10928 code = EQ;
10929 }
10930 else
10931 {
10932 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10933 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10934 GEN_INT (0x01)));
10935 code = NE;
10936 }
10937 break;
10938 case LE:
10939 case UNLE:
10940 if (code == LE && TARGET_IEEE_FP)
10941 {
10942 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10943 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10944 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10945 intcmp_mode = CCmode;
10946 code = LTU;
10947 }
10948 else
10949 {
10950 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10951 code = NE;
10952 }
10953 break;
10954 case EQ:
10955 case UNEQ:
10956 if (code == EQ && TARGET_IEEE_FP)
10957 {
10958 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10959 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10960 intcmp_mode = CCmode;
10961 code = EQ;
10962 }
10963 else
10964 {
10965 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10966 code = NE;
10967 break;
10968 }
10969 break;
10970 case NE:
10971 case LTGT:
10972 if (code == NE && TARGET_IEEE_FP)
10973 {
10974 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10975 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10976 GEN_INT (0x40)));
10977 code = NE;
10978 }
10979 else
10980 {
10981 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10982 code = EQ;
10983 }
10984 break;
10985
10986 case UNORDERED:
10987 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10988 code = NE;
10989 break;
10990 case ORDERED:
10991 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10992 code = EQ;
10993 break;
10994
10995 default:
10996 gcc_unreachable ();
10997 }
10998 }
10999
11000 /* Return the test that should be put into the flags user, i.e.
11001 the bcc, scc, or cmov instruction. */
11002 return gen_rtx_fmt_ee (code, VOIDmode,
11003 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11004 const0_rtx);
11005 }
11006
11007 rtx
11008 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11009 {
11010 rtx op0, op1, ret;
11011 op0 = ix86_compare_op0;
11012 op1 = ix86_compare_op1;
11013
11014 if (second_test)
11015 *second_test = NULL_RTX;
11016 if (bypass_test)
11017 *bypass_test = NULL_RTX;
11018
11019 if (ix86_compare_emitted)
11020 {
11021 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11022 ix86_compare_emitted = NULL_RTX;
11023 }
11024 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11025 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11026 second_test, bypass_test);
11027 else
11028 ret = ix86_expand_int_compare (code, op0, op1);
11029
11030 return ret;
11031 }
11032
11033 /* Return true if the CODE will result in nontrivial jump sequence. */
11034 bool
11035 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11036 {
11037 enum rtx_code bypass_code, first_code, second_code;
11038 if (!TARGET_CMOVE)
11039 return true;
11040 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11041 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11042 }
11043
11044 void
11045 ix86_expand_branch (enum rtx_code code, rtx label)
11046 {
11047 rtx tmp;
11048
11049 /* If we have emitted a compare insn, go straight to simple.
11050 ix86_expand_compare won't emit anything if ix86_compare_emitted
11051 is non NULL. */
11052 if (ix86_compare_emitted)
11053 goto simple;
11054
11055 switch (GET_MODE (ix86_compare_op0))
11056 {
11057 case QImode:
11058 case HImode:
11059 case SImode:
11060 simple:
11061 tmp = ix86_expand_compare (code, NULL, NULL);
11062 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11063 gen_rtx_LABEL_REF (VOIDmode, label),
11064 pc_rtx);
11065 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11066 return;
11067
11068 case SFmode:
11069 case DFmode:
11070 case XFmode:
11071 {
11072 rtvec vec;
11073 int use_fcomi;
11074 enum rtx_code bypass_code, first_code, second_code;
11075
11076 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11077 &ix86_compare_op1);
11078
11079 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11080
11081 /* Check whether we will use the natural sequence with one jump. If
11082 so, we can expand jump early. Otherwise delay expansion by
11083 creating compound insn to not confuse optimizers. */
11084 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11085 && TARGET_CMOVE)
11086 {
11087 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11088 gen_rtx_LABEL_REF (VOIDmode, label),
11089 pc_rtx, NULL_RTX, NULL_RTX);
11090 }
11091 else
11092 {
11093 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11094 ix86_compare_op0, ix86_compare_op1);
11095 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11096 gen_rtx_LABEL_REF (VOIDmode, label),
11097 pc_rtx);
11098 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11099
11100 use_fcomi = ix86_use_fcomi_compare (code);
11101 vec = rtvec_alloc (3 + !use_fcomi);
11102 RTVEC_ELT (vec, 0) = tmp;
11103 RTVEC_ELT (vec, 1)
11104 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11105 RTVEC_ELT (vec, 2)
11106 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11107 if (! use_fcomi)
11108 RTVEC_ELT (vec, 3)
11109 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11110
11111 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11112 }
11113 return;
11114 }
11115
11116 case DImode:
11117 if (TARGET_64BIT)
11118 goto simple;
11119 case TImode:
11120 /* Expand DImode branch into multiple compare+branch. */
11121 {
11122 rtx lo[2], hi[2], label2;
11123 enum rtx_code code1, code2, code3;
11124 enum machine_mode submode;
11125
11126 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11127 {
11128 tmp = ix86_compare_op0;
11129 ix86_compare_op0 = ix86_compare_op1;
11130 ix86_compare_op1 = tmp;
11131 code = swap_condition (code);
11132 }
11133 if (GET_MODE (ix86_compare_op0) == DImode)
11134 {
11135 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11136 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11137 submode = SImode;
11138 }
11139 else
11140 {
11141 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11142 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11143 submode = DImode;
11144 }
11145
11146 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11147 avoid two branches. This costs one extra insn, so disable when
11148 optimizing for size. */
11149
11150 if ((code == EQ || code == NE)
11151 && (!optimize_size
11152 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11153 {
11154 rtx xor0, xor1;
11155
11156 xor1 = hi[0];
11157 if (hi[1] != const0_rtx)
11158 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11159 NULL_RTX, 0, OPTAB_WIDEN);
11160
11161 xor0 = lo[0];
11162 if (lo[1] != const0_rtx)
11163 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11164 NULL_RTX, 0, OPTAB_WIDEN);
11165
11166 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11167 NULL_RTX, 0, OPTAB_WIDEN);
11168
11169 ix86_compare_op0 = tmp;
11170 ix86_compare_op1 = const0_rtx;
11171 ix86_expand_branch (code, label);
11172 return;
11173 }
11174
11175 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11176 op1 is a constant and the low word is zero, then we can just
11177 examine the high word. */
11178
11179 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11180 switch (code)
11181 {
11182 case LT: case LTU: case GE: case GEU:
11183 ix86_compare_op0 = hi[0];
11184 ix86_compare_op1 = hi[1];
11185 ix86_expand_branch (code, label);
11186 return;
11187 default:
11188 break;
11189 }
11190
11191 /* Otherwise, we need two or three jumps. */
11192
11193 label2 = gen_label_rtx ();
11194
11195 code1 = code;
11196 code2 = swap_condition (code);
11197 code3 = unsigned_condition (code);
11198
11199 switch (code)
11200 {
11201 case LT: case GT: case LTU: case GTU:
11202 break;
11203
11204 case LE: code1 = LT; code2 = GT; break;
11205 case GE: code1 = GT; code2 = LT; break;
11206 case LEU: code1 = LTU; code2 = GTU; break;
11207 case GEU: code1 = GTU; code2 = LTU; break;
11208
11209 case EQ: code1 = UNKNOWN; code2 = NE; break;
11210 case NE: code2 = UNKNOWN; break;
11211
11212 default:
11213 gcc_unreachable ();
11214 }
11215
11216 /*
11217 * a < b =>
11218 * if (hi(a) < hi(b)) goto true;
11219 * if (hi(a) > hi(b)) goto false;
11220 * if (lo(a) < lo(b)) goto true;
11221 * false:
11222 */
11223
11224 ix86_compare_op0 = hi[0];
11225 ix86_compare_op1 = hi[1];
11226
11227 if (code1 != UNKNOWN)
11228 ix86_expand_branch (code1, label);
11229 if (code2 != UNKNOWN)
11230 ix86_expand_branch (code2, label2);
11231
11232 ix86_compare_op0 = lo[0];
11233 ix86_compare_op1 = lo[1];
11234 ix86_expand_branch (code3, label);
11235
11236 if (code2 != UNKNOWN)
11237 emit_label (label2);
11238 return;
11239 }
11240
11241 default:
11242 gcc_unreachable ();
11243 }
11244 }
11245
11246 /* Split branch based on floating point condition. */
11247 void
11248 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11249 rtx target1, rtx target2, rtx tmp, rtx pushed)
11250 {
11251 rtx second, bypass;
11252 rtx label = NULL_RTX;
11253 rtx condition;
11254 int bypass_probability = -1, second_probability = -1, probability = -1;
11255 rtx i;
11256
11257 if (target2 != pc_rtx)
11258 {
11259 rtx tmp = target2;
11260 code = reverse_condition_maybe_unordered (code);
11261 target2 = target1;
11262 target1 = tmp;
11263 }
11264
11265 condition = ix86_expand_fp_compare (code, op1, op2,
11266 tmp, &second, &bypass);
11267
11268 /* Remove pushed operand from stack. */
11269 if (pushed)
11270 ix86_free_from_memory (GET_MODE (pushed));
11271
11272 if (split_branch_probability >= 0)
11273 {
11274 /* Distribute the probabilities across the jumps.
11275 Assume the BYPASS and SECOND to be always test
11276 for UNORDERED. */
11277 probability = split_branch_probability;
11278
11279 /* Value of 1 is low enough to make no need for probability
11280 to be updated. Later we may run some experiments and see
11281 if unordered values are more frequent in practice. */
11282 if (bypass)
11283 bypass_probability = 1;
11284 if (second)
11285 second_probability = 1;
11286 }
11287 if (bypass != NULL_RTX)
11288 {
11289 label = gen_label_rtx ();
11290 i = emit_jump_insn (gen_rtx_SET
11291 (VOIDmode, pc_rtx,
11292 gen_rtx_IF_THEN_ELSE (VOIDmode,
11293 bypass,
11294 gen_rtx_LABEL_REF (VOIDmode,
11295 label),
11296 pc_rtx)));
11297 if (bypass_probability >= 0)
11298 REG_NOTES (i)
11299 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11300 GEN_INT (bypass_probability),
11301 REG_NOTES (i));
11302 }
11303 i = emit_jump_insn (gen_rtx_SET
11304 (VOIDmode, pc_rtx,
11305 gen_rtx_IF_THEN_ELSE (VOIDmode,
11306 condition, target1, target2)));
11307 if (probability >= 0)
11308 REG_NOTES (i)
11309 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11310 GEN_INT (probability),
11311 REG_NOTES (i));
11312 if (second != NULL_RTX)
11313 {
11314 i = emit_jump_insn (gen_rtx_SET
11315 (VOIDmode, pc_rtx,
11316 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11317 target2)));
11318 if (second_probability >= 0)
11319 REG_NOTES (i)
11320 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11321 GEN_INT (second_probability),
11322 REG_NOTES (i));
11323 }
11324 if (label != NULL_RTX)
11325 emit_label (label);
11326 }
11327
11328 int
11329 ix86_expand_setcc (enum rtx_code code, rtx dest)
11330 {
11331 rtx ret, tmp, tmpreg, equiv;
11332 rtx second_test, bypass_test;
11333
11334 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11335 return 0; /* FAIL */
11336
11337 gcc_assert (GET_MODE (dest) == QImode);
11338
11339 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11340 PUT_MODE (ret, QImode);
11341
11342 tmp = dest;
11343 tmpreg = dest;
11344
11345 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11346 if (bypass_test || second_test)
11347 {
11348 rtx test = second_test;
11349 int bypass = 0;
11350 rtx tmp2 = gen_reg_rtx (QImode);
11351 if (bypass_test)
11352 {
11353 gcc_assert (!second_test);
11354 test = bypass_test;
11355 bypass = 1;
11356 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11357 }
11358 PUT_MODE (test, QImode);
11359 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11360
11361 if (bypass)
11362 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11363 else
11364 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11365 }
11366
11367 /* Attach a REG_EQUAL note describing the comparison result. */
11368 if (ix86_compare_op0 && ix86_compare_op1)
11369 {
11370 equiv = simplify_gen_relational (code, QImode,
11371 GET_MODE (ix86_compare_op0),
11372 ix86_compare_op0, ix86_compare_op1);
11373 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11374 }
11375
11376 return 1; /* DONE */
11377 }
11378
11379 /* Expand comparison setting or clearing carry flag. Return true when
11380 successful and set pop for the operation. */
11381 static bool
11382 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11383 {
11384 enum machine_mode mode =
11385 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11386
11387 /* Do not handle DImode compares that go through special path. Also we can't
11388 deal with FP compares yet. This is possible to add. */
11389 if (mode == (TARGET_64BIT ? TImode : DImode))
11390 return false;
11391 if (FLOAT_MODE_P (mode))
11392 {
11393 rtx second_test = NULL, bypass_test = NULL;
11394 rtx compare_op, compare_seq;
11395
11396 /* Shortcut: following common codes never translate into carry flag compares. */
11397 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11398 || code == ORDERED || code == UNORDERED)
11399 return false;
11400
11401 /* These comparisons require zero flag; swap operands so they won't. */
11402 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11403 && !TARGET_IEEE_FP)
11404 {
11405 rtx tmp = op0;
11406 op0 = op1;
11407 op1 = tmp;
11408 code = swap_condition (code);
11409 }
11410
11411 /* Try to expand the comparison and verify that we end up with carry flag
11412 based comparison. This is fails to be true only when we decide to expand
11413 comparison using arithmetic that is not too common scenario. */
11414 start_sequence ();
11415 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11416 &second_test, &bypass_test);
11417 compare_seq = get_insns ();
11418 end_sequence ();
11419
11420 if (second_test || bypass_test)
11421 return false;
11422 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11423 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11424 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11425 else
11426 code = GET_CODE (compare_op);
11427 if (code != LTU && code != GEU)
11428 return false;
11429 emit_insn (compare_seq);
11430 *pop = compare_op;
11431 return true;
11432 }
11433 if (!INTEGRAL_MODE_P (mode))
11434 return false;
11435 switch (code)
11436 {
11437 case LTU:
11438 case GEU:
11439 break;
11440
11441 /* Convert a==0 into (unsigned)a<1. */
11442 case EQ:
11443 case NE:
11444 if (op1 != const0_rtx)
11445 return false;
11446 op1 = const1_rtx;
11447 code = (code == EQ ? LTU : GEU);
11448 break;
11449
11450 /* Convert a>b into b<a or a>=b-1. */
11451 case GTU:
11452 case LEU:
11453 if (CONST_INT_P (op1))
11454 {
11455 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11456 /* Bail out on overflow. We still can swap operands but that
11457 would force loading of the constant into register. */
11458 if (op1 == const0_rtx
11459 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11460 return false;
11461 code = (code == GTU ? GEU : LTU);
11462 }
11463 else
11464 {
11465 rtx tmp = op1;
11466 op1 = op0;
11467 op0 = tmp;
11468 code = (code == GTU ? LTU : GEU);
11469 }
11470 break;
11471
11472 /* Convert a>=0 into (unsigned)a<0x80000000. */
11473 case LT:
11474 case GE:
11475 if (mode == DImode || op1 != const0_rtx)
11476 return false;
11477 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11478 code = (code == LT ? GEU : LTU);
11479 break;
11480 case LE:
11481 case GT:
11482 if (mode == DImode || op1 != constm1_rtx)
11483 return false;
11484 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11485 code = (code == LE ? GEU : LTU);
11486 break;
11487
11488 default:
11489 return false;
11490 }
11491 /* Swapping operands may cause constant to appear as first operand. */
11492 if (!nonimmediate_operand (op0, VOIDmode))
11493 {
11494 if (no_new_pseudos)
11495 return false;
11496 op0 = force_reg (mode, op0);
11497 }
11498 ix86_compare_op0 = op0;
11499 ix86_compare_op1 = op1;
11500 *pop = ix86_expand_compare (code, NULL, NULL);
11501 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11502 return true;
11503 }
11504
11505 int
11506 ix86_expand_int_movcc (rtx operands[])
11507 {
11508 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11509 rtx compare_seq, compare_op;
11510 rtx second_test, bypass_test;
11511 enum machine_mode mode = GET_MODE (operands[0]);
11512 bool sign_bit_compare_p = false;;
11513
11514 start_sequence ();
11515 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11516 compare_seq = get_insns ();
11517 end_sequence ();
11518
11519 compare_code = GET_CODE (compare_op);
11520
11521 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11522 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11523 sign_bit_compare_p = true;
11524
11525 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11526 HImode insns, we'd be swallowed in word prefix ops. */
11527
11528 if ((mode != HImode || TARGET_FAST_PREFIX)
11529 && (mode != (TARGET_64BIT ? TImode : DImode))
11530 && CONST_INT_P (operands[2])
11531 && CONST_INT_P (operands[3]))
11532 {
11533 rtx out = operands[0];
11534 HOST_WIDE_INT ct = INTVAL (operands[2]);
11535 HOST_WIDE_INT cf = INTVAL (operands[3]);
11536 HOST_WIDE_INT diff;
11537
11538 diff = ct - cf;
11539 /* Sign bit compares are better done using shifts than we do by using
11540 sbb. */
11541 if (sign_bit_compare_p
11542 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11543 ix86_compare_op1, &compare_op))
11544 {
11545 /* Detect overlap between destination and compare sources. */
11546 rtx tmp = out;
11547
11548 if (!sign_bit_compare_p)
11549 {
11550 bool fpcmp = false;
11551
11552 compare_code = GET_CODE (compare_op);
11553
11554 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11555 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11556 {
11557 fpcmp = true;
11558 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11559 }
11560
11561 /* To simplify rest of code, restrict to the GEU case. */
11562 if (compare_code == LTU)
11563 {
11564 HOST_WIDE_INT tmp = ct;
11565 ct = cf;
11566 cf = tmp;
11567 compare_code = reverse_condition (compare_code);
11568 code = reverse_condition (code);
11569 }
11570 else
11571 {
11572 if (fpcmp)
11573 PUT_CODE (compare_op,
11574 reverse_condition_maybe_unordered
11575 (GET_CODE (compare_op)));
11576 else
11577 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11578 }
11579 diff = ct - cf;
11580
11581 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11582 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11583 tmp = gen_reg_rtx (mode);
11584
11585 if (mode == DImode)
11586 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11587 else
11588 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11589 }
11590 else
11591 {
11592 if (code == GT || code == GE)
11593 code = reverse_condition (code);
11594 else
11595 {
11596 HOST_WIDE_INT tmp = ct;
11597 ct = cf;
11598 cf = tmp;
11599 diff = ct - cf;
11600 }
11601 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11602 ix86_compare_op1, VOIDmode, 0, -1);
11603 }
11604
11605 if (diff == 1)
11606 {
11607 /*
11608 * cmpl op0,op1
11609 * sbbl dest,dest
11610 * [addl dest, ct]
11611 *
11612 * Size 5 - 8.
11613 */
11614 if (ct)
11615 tmp = expand_simple_binop (mode, PLUS,
11616 tmp, GEN_INT (ct),
11617 copy_rtx (tmp), 1, OPTAB_DIRECT);
11618 }
11619 else if (cf == -1)
11620 {
11621 /*
11622 * cmpl op0,op1
11623 * sbbl dest,dest
11624 * orl $ct, dest
11625 *
11626 * Size 8.
11627 */
11628 tmp = expand_simple_binop (mode, IOR,
11629 tmp, GEN_INT (ct),
11630 copy_rtx (tmp), 1, OPTAB_DIRECT);
11631 }
11632 else if (diff == -1 && ct)
11633 {
11634 /*
11635 * cmpl op0,op1
11636 * sbbl dest,dest
11637 * notl dest
11638 * [addl dest, cf]
11639 *
11640 * Size 8 - 11.
11641 */
11642 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11643 if (cf)
11644 tmp = expand_simple_binop (mode, PLUS,
11645 copy_rtx (tmp), GEN_INT (cf),
11646 copy_rtx (tmp), 1, OPTAB_DIRECT);
11647 }
11648 else
11649 {
11650 /*
11651 * cmpl op0,op1
11652 * sbbl dest,dest
11653 * [notl dest]
11654 * andl cf - ct, dest
11655 * [addl dest, ct]
11656 *
11657 * Size 8 - 11.
11658 */
11659
11660 if (cf == 0)
11661 {
11662 cf = ct;
11663 ct = 0;
11664 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11665 }
11666
11667 tmp = expand_simple_binop (mode, AND,
11668 copy_rtx (tmp),
11669 gen_int_mode (cf - ct, mode),
11670 copy_rtx (tmp), 1, OPTAB_DIRECT);
11671 if (ct)
11672 tmp = expand_simple_binop (mode, PLUS,
11673 copy_rtx (tmp), GEN_INT (ct),
11674 copy_rtx (tmp), 1, OPTAB_DIRECT);
11675 }
11676
11677 if (!rtx_equal_p (tmp, out))
11678 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11679
11680 return 1; /* DONE */
11681 }
11682
11683 if (diff < 0)
11684 {
11685 HOST_WIDE_INT tmp;
11686 tmp = ct, ct = cf, cf = tmp;
11687 diff = -diff;
11688 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11689 {
11690 /* We may be reversing unordered compare to normal compare, that
11691 is not valid in general (we may convert non-trapping condition
11692 to trapping one), however on i386 we currently emit all
11693 comparisons unordered. */
11694 compare_code = reverse_condition_maybe_unordered (compare_code);
11695 code = reverse_condition_maybe_unordered (code);
11696 }
11697 else
11698 {
11699 compare_code = reverse_condition (compare_code);
11700 code = reverse_condition (code);
11701 }
11702 }
11703
11704 compare_code = UNKNOWN;
11705 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11706 && CONST_INT_P (ix86_compare_op1))
11707 {
11708 if (ix86_compare_op1 == const0_rtx
11709 && (code == LT || code == GE))
11710 compare_code = code;
11711 else if (ix86_compare_op1 == constm1_rtx)
11712 {
11713 if (code == LE)
11714 compare_code = LT;
11715 else if (code == GT)
11716 compare_code = GE;
11717 }
11718 }
11719
11720 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11721 if (compare_code != UNKNOWN
11722 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11723 && (cf == -1 || ct == -1))
11724 {
11725 /* If lea code below could be used, only optimize
11726 if it results in a 2 insn sequence. */
11727
11728 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11729 || diff == 3 || diff == 5 || diff == 9)
11730 || (compare_code == LT && ct == -1)
11731 || (compare_code == GE && cf == -1))
11732 {
11733 /*
11734 * notl op1 (if necessary)
11735 * sarl $31, op1
11736 * orl cf, op1
11737 */
11738 if (ct != -1)
11739 {
11740 cf = ct;
11741 ct = -1;
11742 code = reverse_condition (code);
11743 }
11744
11745 out = emit_store_flag (out, code, ix86_compare_op0,
11746 ix86_compare_op1, VOIDmode, 0, -1);
11747
11748 out = expand_simple_binop (mode, IOR,
11749 out, GEN_INT (cf),
11750 out, 1, OPTAB_DIRECT);
11751 if (out != operands[0])
11752 emit_move_insn (operands[0], out);
11753
11754 return 1; /* DONE */
11755 }
11756 }
11757
11758
11759 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11760 || diff == 3 || diff == 5 || diff == 9)
11761 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11762 && (mode != DImode
11763 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11764 {
11765 /*
11766 * xorl dest,dest
11767 * cmpl op1,op2
11768 * setcc dest
11769 * lea cf(dest*(ct-cf)),dest
11770 *
11771 * Size 14.
11772 *
11773 * This also catches the degenerate setcc-only case.
11774 */
11775
11776 rtx tmp;
11777 int nops;
11778
11779 out = emit_store_flag (out, code, ix86_compare_op0,
11780 ix86_compare_op1, VOIDmode, 0, 1);
11781
11782 nops = 0;
11783 /* On x86_64 the lea instruction operates on Pmode, so we need
11784 to get arithmetics done in proper mode to match. */
11785 if (diff == 1)
11786 tmp = copy_rtx (out);
11787 else
11788 {
11789 rtx out1;
11790 out1 = copy_rtx (out);
11791 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11792 nops++;
11793 if (diff & 1)
11794 {
11795 tmp = gen_rtx_PLUS (mode, tmp, out1);
11796 nops++;
11797 }
11798 }
11799 if (cf != 0)
11800 {
11801 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11802 nops++;
11803 }
11804 if (!rtx_equal_p (tmp, out))
11805 {
11806 if (nops == 1)
11807 out = force_operand (tmp, copy_rtx (out));
11808 else
11809 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11810 }
11811 if (!rtx_equal_p (out, operands[0]))
11812 emit_move_insn (operands[0], copy_rtx (out));
11813
11814 return 1; /* DONE */
11815 }
11816
11817 /*
11818 * General case: Jumpful:
11819 * xorl dest,dest cmpl op1, op2
11820 * cmpl op1, op2 movl ct, dest
11821 * setcc dest jcc 1f
11822 * decl dest movl cf, dest
11823 * andl (cf-ct),dest 1:
11824 * addl ct,dest
11825 *
11826 * Size 20. Size 14.
11827 *
11828 * This is reasonably steep, but branch mispredict costs are
11829 * high on modern cpus, so consider failing only if optimizing
11830 * for space.
11831 */
11832
11833 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11834 && BRANCH_COST >= 2)
11835 {
11836 if (cf == 0)
11837 {
11838 cf = ct;
11839 ct = 0;
11840 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11841 /* We may be reversing unordered compare to normal compare,
11842 that is not valid in general (we may convert non-trapping
11843 condition to trapping one), however on i386 we currently
11844 emit all comparisons unordered. */
11845 code = reverse_condition_maybe_unordered (code);
11846 else
11847 {
11848 code = reverse_condition (code);
11849 if (compare_code != UNKNOWN)
11850 compare_code = reverse_condition (compare_code);
11851 }
11852 }
11853
11854 if (compare_code != UNKNOWN)
11855 {
11856 /* notl op1 (if needed)
11857 sarl $31, op1
11858 andl (cf-ct), op1
11859 addl ct, op1
11860
11861 For x < 0 (resp. x <= -1) there will be no notl,
11862 so if possible swap the constants to get rid of the
11863 complement.
11864 True/false will be -1/0 while code below (store flag
11865 followed by decrement) is 0/-1, so the constants need
11866 to be exchanged once more. */
11867
11868 if (compare_code == GE || !cf)
11869 {
11870 code = reverse_condition (code);
11871 compare_code = LT;
11872 }
11873 else
11874 {
11875 HOST_WIDE_INT tmp = cf;
11876 cf = ct;
11877 ct = tmp;
11878 }
11879
11880 out = emit_store_flag (out, code, ix86_compare_op0,
11881 ix86_compare_op1, VOIDmode, 0, -1);
11882 }
11883 else
11884 {
11885 out = emit_store_flag (out, code, ix86_compare_op0,
11886 ix86_compare_op1, VOIDmode, 0, 1);
11887
11888 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11889 copy_rtx (out), 1, OPTAB_DIRECT);
11890 }
11891
11892 out = expand_simple_binop (mode, AND, copy_rtx (out),
11893 gen_int_mode (cf - ct, mode),
11894 copy_rtx (out), 1, OPTAB_DIRECT);
11895 if (ct)
11896 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11897 copy_rtx (out), 1, OPTAB_DIRECT);
11898 if (!rtx_equal_p (out, operands[0]))
11899 emit_move_insn (operands[0], copy_rtx (out));
11900
11901 return 1; /* DONE */
11902 }
11903 }
11904
11905 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11906 {
11907 /* Try a few things more with specific constants and a variable. */
11908
11909 optab op;
11910 rtx var, orig_out, out, tmp;
11911
11912 if (BRANCH_COST <= 2)
11913 return 0; /* FAIL */
11914
11915 /* If one of the two operands is an interesting constant, load a
11916 constant with the above and mask it in with a logical operation. */
11917
11918 if (CONST_INT_P (operands[2]))
11919 {
11920 var = operands[3];
11921 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11922 operands[3] = constm1_rtx, op = and_optab;
11923 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11924 operands[3] = const0_rtx, op = ior_optab;
11925 else
11926 return 0; /* FAIL */
11927 }
11928 else if (CONST_INT_P (operands[3]))
11929 {
11930 var = operands[2];
11931 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11932 operands[2] = constm1_rtx, op = and_optab;
11933 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11934 operands[2] = const0_rtx, op = ior_optab;
11935 else
11936 return 0; /* FAIL */
11937 }
11938 else
11939 return 0; /* FAIL */
11940
11941 orig_out = operands[0];
11942 tmp = gen_reg_rtx (mode);
11943 operands[0] = tmp;
11944
11945 /* Recurse to get the constant loaded. */
11946 if (ix86_expand_int_movcc (operands) == 0)
11947 return 0; /* FAIL */
11948
11949 /* Mask in the interesting variable. */
11950 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11951 OPTAB_WIDEN);
11952 if (!rtx_equal_p (out, orig_out))
11953 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11954
11955 return 1; /* DONE */
11956 }
11957
11958 /*
11959 * For comparison with above,
11960 *
11961 * movl cf,dest
11962 * movl ct,tmp
11963 * cmpl op1,op2
11964 * cmovcc tmp,dest
11965 *
11966 * Size 15.
11967 */
11968
11969 if (! nonimmediate_operand (operands[2], mode))
11970 operands[2] = force_reg (mode, operands[2]);
11971 if (! nonimmediate_operand (operands[3], mode))
11972 operands[3] = force_reg (mode, operands[3]);
11973
11974 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11975 {
11976 rtx tmp = gen_reg_rtx (mode);
11977 emit_move_insn (tmp, operands[3]);
11978 operands[3] = tmp;
11979 }
11980 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11981 {
11982 rtx tmp = gen_reg_rtx (mode);
11983 emit_move_insn (tmp, operands[2]);
11984 operands[2] = tmp;
11985 }
11986
11987 if (! register_operand (operands[2], VOIDmode)
11988 && (mode == QImode
11989 || ! register_operand (operands[3], VOIDmode)))
11990 operands[2] = force_reg (mode, operands[2]);
11991
11992 if (mode == QImode
11993 && ! register_operand (operands[3], VOIDmode))
11994 operands[3] = force_reg (mode, operands[3]);
11995
11996 emit_insn (compare_seq);
11997 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11998 gen_rtx_IF_THEN_ELSE (mode,
11999 compare_op, operands[2],
12000 operands[3])));
12001 if (bypass_test)
12002 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12003 gen_rtx_IF_THEN_ELSE (mode,
12004 bypass_test,
12005 copy_rtx (operands[3]),
12006 copy_rtx (operands[0]))));
12007 if (second_test)
12008 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12009 gen_rtx_IF_THEN_ELSE (mode,
12010 second_test,
12011 copy_rtx (operands[2]),
12012 copy_rtx (operands[0]))));
12013
12014 return 1; /* DONE */
12015 }
12016
12017 /* Swap, force into registers, or otherwise massage the two operands
12018 to an sse comparison with a mask result. Thus we differ a bit from
12019 ix86_prepare_fp_compare_args which expects to produce a flags result.
12020
12021 The DEST operand exists to help determine whether to commute commutative
12022 operators. The POP0/POP1 operands are updated in place. The new
12023 comparison code is returned, or UNKNOWN if not implementable. */
12024
12025 static enum rtx_code
12026 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12027 rtx *pop0, rtx *pop1)
12028 {
12029 rtx tmp;
12030
12031 switch (code)
12032 {
12033 case LTGT:
12034 case UNEQ:
12035 /* We have no LTGT as an operator. We could implement it with
12036 NE & ORDERED, but this requires an extra temporary. It's
12037 not clear that it's worth it. */
12038 return UNKNOWN;
12039
12040 case LT:
12041 case LE:
12042 case UNGT:
12043 case UNGE:
12044 /* These are supported directly. */
12045 break;
12046
12047 case EQ:
12048 case NE:
12049 case UNORDERED:
12050 case ORDERED:
12051 /* For commutative operators, try to canonicalize the destination
12052 operand to be first in the comparison - this helps reload to
12053 avoid extra moves. */
12054 if (!dest || !rtx_equal_p (dest, *pop1))
12055 break;
12056 /* FALLTHRU */
12057
12058 case GE:
12059 case GT:
12060 case UNLE:
12061 case UNLT:
12062 /* These are not supported directly. Swap the comparison operands
12063 to transform into something that is supported. */
12064 tmp = *pop0;
12065 *pop0 = *pop1;
12066 *pop1 = tmp;
12067 code = swap_condition (code);
12068 break;
12069
12070 default:
12071 gcc_unreachable ();
12072 }
12073
12074 return code;
12075 }
12076
12077 /* Detect conditional moves that exactly match min/max operational
12078 semantics. Note that this is IEEE safe, as long as we don't
12079 interchange the operands.
12080
12081 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12082 and TRUE if the operation is successful and instructions are emitted. */
12083
12084 static bool
12085 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12086 rtx cmp_op1, rtx if_true, rtx if_false)
12087 {
12088 enum machine_mode mode;
12089 bool is_min;
12090 rtx tmp;
12091
12092 if (code == LT)
12093 ;
12094 else if (code == UNGE)
12095 {
12096 tmp = if_true;
12097 if_true = if_false;
12098 if_false = tmp;
12099 }
12100 else
12101 return false;
12102
12103 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12104 is_min = true;
12105 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12106 is_min = false;
12107 else
12108 return false;
12109
12110 mode = GET_MODE (dest);
12111
12112 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12113 but MODE may be a vector mode and thus not appropriate. */
12114 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12115 {
12116 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12117 rtvec v;
12118
12119 if_true = force_reg (mode, if_true);
12120 v = gen_rtvec (2, if_true, if_false);
12121 tmp = gen_rtx_UNSPEC (mode, v, u);
12122 }
12123 else
12124 {
12125 code = is_min ? SMIN : SMAX;
12126 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12127 }
12128
12129 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12130 return true;
12131 }
12132
12133 /* Expand an sse vector comparison. Return the register with the result. */
12134
12135 static rtx
12136 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12137 rtx op_true, rtx op_false)
12138 {
12139 enum machine_mode mode = GET_MODE (dest);
12140 rtx x;
12141
12142 cmp_op0 = force_reg (mode, cmp_op0);
12143 if (!nonimmediate_operand (cmp_op1, mode))
12144 cmp_op1 = force_reg (mode, cmp_op1);
12145
12146 if (optimize
12147 || reg_overlap_mentioned_p (dest, op_true)
12148 || reg_overlap_mentioned_p (dest, op_false))
12149 dest = gen_reg_rtx (mode);
12150
12151 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12152 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12153
12154 return dest;
12155 }
12156
12157 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12158 operations. This is used for both scalar and vector conditional moves. */
12159
12160 static void
12161 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12162 {
12163 enum machine_mode mode = GET_MODE (dest);
12164 rtx t2, t3, x;
12165
12166 if (op_false == CONST0_RTX (mode))
12167 {
12168 op_true = force_reg (mode, op_true);
12169 x = gen_rtx_AND (mode, cmp, op_true);
12170 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12171 }
12172 else if (op_true == CONST0_RTX (mode))
12173 {
12174 op_false = force_reg (mode, op_false);
12175 x = gen_rtx_NOT (mode, cmp);
12176 x = gen_rtx_AND (mode, x, op_false);
12177 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12178 }
12179 else
12180 {
12181 op_true = force_reg (mode, op_true);
12182 op_false = force_reg (mode, op_false);
12183
12184 t2 = gen_reg_rtx (mode);
12185 if (optimize)
12186 t3 = gen_reg_rtx (mode);
12187 else
12188 t3 = dest;
12189
12190 x = gen_rtx_AND (mode, op_true, cmp);
12191 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12192
12193 x = gen_rtx_NOT (mode, cmp);
12194 x = gen_rtx_AND (mode, x, op_false);
12195 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12196
12197 x = gen_rtx_IOR (mode, t3, t2);
12198 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12199 }
12200 }
12201
12202 /* Expand a floating-point conditional move. Return true if successful. */
12203
12204 int
12205 ix86_expand_fp_movcc (rtx operands[])
12206 {
12207 enum machine_mode mode = GET_MODE (operands[0]);
12208 enum rtx_code code = GET_CODE (operands[1]);
12209 rtx tmp, compare_op, second_test, bypass_test;
12210
12211 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12212 {
12213 enum machine_mode cmode;
12214
12215 /* Since we've no cmove for sse registers, don't force bad register
12216 allocation just to gain access to it. Deny movcc when the
12217 comparison mode doesn't match the move mode. */
12218 cmode = GET_MODE (ix86_compare_op0);
12219 if (cmode == VOIDmode)
12220 cmode = GET_MODE (ix86_compare_op1);
12221 if (cmode != mode)
12222 return 0;
12223
12224 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12225 &ix86_compare_op0,
12226 &ix86_compare_op1);
12227 if (code == UNKNOWN)
12228 return 0;
12229
12230 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12231 ix86_compare_op1, operands[2],
12232 operands[3]))
12233 return 1;
12234
12235 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12236 ix86_compare_op1, operands[2], operands[3]);
12237 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12238 return 1;
12239 }
12240
12241 /* The floating point conditional move instructions don't directly
12242 support conditions resulting from a signed integer comparison. */
12243
12244 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12245
12246 /* The floating point conditional move instructions don't directly
12247 support signed integer comparisons. */
12248
12249 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12250 {
12251 gcc_assert (!second_test && !bypass_test);
12252 tmp = gen_reg_rtx (QImode);
12253 ix86_expand_setcc (code, tmp);
12254 code = NE;
12255 ix86_compare_op0 = tmp;
12256 ix86_compare_op1 = const0_rtx;
12257 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12258 }
12259 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12260 {
12261 tmp = gen_reg_rtx (mode);
12262 emit_move_insn (tmp, operands[3]);
12263 operands[3] = tmp;
12264 }
12265 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12266 {
12267 tmp = gen_reg_rtx (mode);
12268 emit_move_insn (tmp, operands[2]);
12269 operands[2] = tmp;
12270 }
12271
12272 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12273 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12274 operands[2], operands[3])));
12275 if (bypass_test)
12276 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12277 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12278 operands[3], operands[0])));
12279 if (second_test)
12280 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12281 gen_rtx_IF_THEN_ELSE (mode, second_test,
12282 operands[2], operands[0])));
12283
12284 return 1;
12285 }
12286
12287 /* Expand a floating-point vector conditional move; a vcond operation
12288 rather than a movcc operation. */
12289
12290 bool
12291 ix86_expand_fp_vcond (rtx operands[])
12292 {
12293 enum rtx_code code = GET_CODE (operands[3]);
12294 rtx cmp;
12295
12296 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12297 &operands[4], &operands[5]);
12298 if (code == UNKNOWN)
12299 return false;
12300
12301 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12302 operands[5], operands[1], operands[2]))
12303 return true;
12304
12305 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12306 operands[1], operands[2]);
12307 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12308 return true;
12309 }
12310
12311 /* Expand a signed integral vector conditional move. */
12312
12313 bool
12314 ix86_expand_int_vcond (rtx operands[])
12315 {
12316 enum machine_mode mode = GET_MODE (operands[0]);
12317 enum rtx_code code = GET_CODE (operands[3]);
12318 bool negate = false;
12319 rtx x, cop0, cop1;
12320
12321 cop0 = operands[4];
12322 cop1 = operands[5];
12323
12324 /* Canonicalize the comparison to EQ, GT, GTU. */
12325 switch (code)
12326 {
12327 case EQ:
12328 case GT:
12329 case GTU:
12330 break;
12331
12332 case NE:
12333 case LE:
12334 case LEU:
12335 code = reverse_condition (code);
12336 negate = true;
12337 break;
12338
12339 case GE:
12340 case GEU:
12341 code = reverse_condition (code);
12342 negate = true;
12343 /* FALLTHRU */
12344
12345 case LT:
12346 case LTU:
12347 code = swap_condition (code);
12348 x = cop0, cop0 = cop1, cop1 = x;
12349 break;
12350
12351 default:
12352 gcc_unreachable ();
12353 }
12354
12355 /* Unsigned parallel compare is not supported by the hardware. Play some
12356 tricks to turn this into a signed comparison against 0. */
12357 if (code == GTU)
12358 {
12359 cop0 = force_reg (mode, cop0);
12360
12361 switch (mode)
12362 {
12363 case V4SImode:
12364 {
12365 rtx t1, t2, mask;
12366
12367 /* Perform a parallel modulo subtraction. */
12368 t1 = gen_reg_rtx (mode);
12369 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12370
12371 /* Extract the original sign bit of op0. */
12372 mask = GEN_INT (-0x80000000);
12373 mask = gen_rtx_CONST_VECTOR (mode,
12374 gen_rtvec (4, mask, mask, mask, mask));
12375 mask = force_reg (mode, mask);
12376 t2 = gen_reg_rtx (mode);
12377 emit_insn (gen_andv4si3 (t2, cop0, mask));
12378
12379 /* XOR it back into the result of the subtraction. This results
12380 in the sign bit set iff we saw unsigned underflow. */
12381 x = gen_reg_rtx (mode);
12382 emit_insn (gen_xorv4si3 (x, t1, t2));
12383
12384 code = GT;
12385 }
12386 break;
12387
12388 case V16QImode:
12389 case V8HImode:
12390 /* Perform a parallel unsigned saturating subtraction. */
12391 x = gen_reg_rtx (mode);
12392 emit_insn (gen_rtx_SET (VOIDmode, x,
12393 gen_rtx_US_MINUS (mode, cop0, cop1)));
12394
12395 code = EQ;
12396 negate = !negate;
12397 break;
12398
12399 default:
12400 gcc_unreachable ();
12401 }
12402
12403 cop0 = x;
12404 cop1 = CONST0_RTX (mode);
12405 }
12406
12407 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12408 operands[1+negate], operands[2-negate]);
12409
12410 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12411 operands[2-negate]);
12412 return true;
12413 }
12414
12415 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12416 true if we should do zero extension, else sign extension. HIGH_P is
12417 true if we want the N/2 high elements, else the low elements. */
12418
12419 void
12420 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12421 {
12422 enum machine_mode imode = GET_MODE (operands[1]);
12423 rtx (*unpack)(rtx, rtx, rtx);
12424 rtx se, dest;
12425
12426 switch (imode)
12427 {
12428 case V16QImode:
12429 if (high_p)
12430 unpack = gen_vec_interleave_highv16qi;
12431 else
12432 unpack = gen_vec_interleave_lowv16qi;
12433 break;
12434 case V8HImode:
12435 if (high_p)
12436 unpack = gen_vec_interleave_highv8hi;
12437 else
12438 unpack = gen_vec_interleave_lowv8hi;
12439 break;
12440 case V4SImode:
12441 if (high_p)
12442 unpack = gen_vec_interleave_highv4si;
12443 else
12444 unpack = gen_vec_interleave_lowv4si;
12445 break;
12446 default:
12447 gcc_unreachable ();
12448 }
12449
12450 dest = gen_lowpart (imode, operands[0]);
12451
12452 if (unsigned_p)
12453 se = force_reg (imode, CONST0_RTX (imode));
12454 else
12455 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12456 operands[1], pc_rtx, pc_rtx);
12457
12458 emit_insn (unpack (dest, operands[1], se));
12459 }
12460
12461 /* Expand conditional increment or decrement using adb/sbb instructions.
12462 The default case using setcc followed by the conditional move can be
12463 done by generic code. */
12464 int
12465 ix86_expand_int_addcc (rtx operands[])
12466 {
12467 enum rtx_code code = GET_CODE (operands[1]);
12468 rtx compare_op;
12469 rtx val = const0_rtx;
12470 bool fpcmp = false;
12471 enum machine_mode mode = GET_MODE (operands[0]);
12472
12473 if (operands[3] != const1_rtx
12474 && operands[3] != constm1_rtx)
12475 return 0;
12476 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12477 ix86_compare_op1, &compare_op))
12478 return 0;
12479 code = GET_CODE (compare_op);
12480
12481 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12482 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12483 {
12484 fpcmp = true;
12485 code = ix86_fp_compare_code_to_integer (code);
12486 }
12487
12488 if (code != LTU)
12489 {
12490 val = constm1_rtx;
12491 if (fpcmp)
12492 PUT_CODE (compare_op,
12493 reverse_condition_maybe_unordered
12494 (GET_CODE (compare_op)));
12495 else
12496 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12497 }
12498 PUT_MODE (compare_op, mode);
12499
12500 /* Construct either adc or sbb insn. */
12501 if ((code == LTU) == (operands[3] == constm1_rtx))
12502 {
12503 switch (GET_MODE (operands[0]))
12504 {
12505 case QImode:
12506 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12507 break;
12508 case HImode:
12509 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12510 break;
12511 case SImode:
12512 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12513 break;
12514 case DImode:
12515 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12516 break;
12517 default:
12518 gcc_unreachable ();
12519 }
12520 }
12521 else
12522 {
12523 switch (GET_MODE (operands[0]))
12524 {
12525 case QImode:
12526 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12527 break;
12528 case HImode:
12529 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12530 break;
12531 case SImode:
12532 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12533 break;
12534 case DImode:
12535 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12536 break;
12537 default:
12538 gcc_unreachable ();
12539 }
12540 }
12541 return 1; /* DONE */
12542 }
12543
12544
12545 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12546 works for floating pointer parameters and nonoffsetable memories.
12547 For pushes, it returns just stack offsets; the values will be saved
12548 in the right order. Maximally three parts are generated. */
12549
12550 static int
12551 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12552 {
12553 int size;
12554
12555 if (!TARGET_64BIT)
12556 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12557 else
12558 size = (GET_MODE_SIZE (mode) + 4) / 8;
12559
12560 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12561 gcc_assert (size >= 2 && size <= 3);
12562
12563 /* Optimize constant pool reference to immediates. This is used by fp
12564 moves, that force all constants to memory to allow combining. */
12565 if (MEM_P (operand) && MEM_READONLY_P (operand))
12566 {
12567 rtx tmp = maybe_get_pool_constant (operand);
12568 if (tmp)
12569 operand = tmp;
12570 }
12571
12572 if (MEM_P (operand) && !offsettable_memref_p (operand))
12573 {
12574 /* The only non-offsetable memories we handle are pushes. */
12575 int ok = push_operand (operand, VOIDmode);
12576
12577 gcc_assert (ok);
12578
12579 operand = copy_rtx (operand);
12580 PUT_MODE (operand, Pmode);
12581 parts[0] = parts[1] = parts[2] = operand;
12582 return size;
12583 }
12584
12585 if (GET_CODE (operand) == CONST_VECTOR)
12586 {
12587 enum machine_mode imode = int_mode_for_mode (mode);
12588 /* Caution: if we looked through a constant pool memory above,
12589 the operand may actually have a different mode now. That's
12590 ok, since we want to pun this all the way back to an integer. */
12591 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12592 gcc_assert (operand != NULL);
12593 mode = imode;
12594 }
12595
12596 if (!TARGET_64BIT)
12597 {
12598 if (mode == DImode)
12599 split_di (&operand, 1, &parts[0], &parts[1]);
12600 else
12601 {
12602 if (REG_P (operand))
12603 {
12604 gcc_assert (reload_completed);
12605 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12606 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12607 if (size == 3)
12608 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12609 }
12610 else if (offsettable_memref_p (operand))
12611 {
12612 operand = adjust_address (operand, SImode, 0);
12613 parts[0] = operand;
12614 parts[1] = adjust_address (operand, SImode, 4);
12615 if (size == 3)
12616 parts[2] = adjust_address (operand, SImode, 8);
12617 }
12618 else if (GET_CODE (operand) == CONST_DOUBLE)
12619 {
12620 REAL_VALUE_TYPE r;
12621 long l[4];
12622
12623 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12624 switch (mode)
12625 {
12626 case XFmode:
12627 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12628 parts[2] = gen_int_mode (l[2], SImode);
12629 break;
12630 case DFmode:
12631 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12632 break;
12633 default:
12634 gcc_unreachable ();
12635 }
12636 parts[1] = gen_int_mode (l[1], SImode);
12637 parts[0] = gen_int_mode (l[0], SImode);
12638 }
12639 else
12640 gcc_unreachable ();
12641 }
12642 }
12643 else
12644 {
12645 if (mode == TImode)
12646 split_ti (&operand, 1, &parts[0], &parts[1]);
12647 if (mode == XFmode || mode == TFmode)
12648 {
12649 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12650 if (REG_P (operand))
12651 {
12652 gcc_assert (reload_completed);
12653 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12654 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12655 }
12656 else if (offsettable_memref_p (operand))
12657 {
12658 operand = adjust_address (operand, DImode, 0);
12659 parts[0] = operand;
12660 parts[1] = adjust_address (operand, upper_mode, 8);
12661 }
12662 else if (GET_CODE (operand) == CONST_DOUBLE)
12663 {
12664 REAL_VALUE_TYPE r;
12665 long l[4];
12666
12667 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12668 real_to_target (l, &r, mode);
12669
12670 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12671 if (HOST_BITS_PER_WIDE_INT >= 64)
12672 parts[0]
12673 = gen_int_mode
12674 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12675 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12676 DImode);
12677 else
12678 parts[0] = immed_double_const (l[0], l[1], DImode);
12679
12680 if (upper_mode == SImode)
12681 parts[1] = gen_int_mode (l[2], SImode);
12682 else if (HOST_BITS_PER_WIDE_INT >= 64)
12683 parts[1]
12684 = gen_int_mode
12685 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12686 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12687 DImode);
12688 else
12689 parts[1] = immed_double_const (l[2], l[3], DImode);
12690 }
12691 else
12692 gcc_unreachable ();
12693 }
12694 }
12695
12696 return size;
12697 }
12698
12699 /* Emit insns to perform a move or push of DI, DF, and XF values.
12700 Return false when normal moves are needed; true when all required
12701 insns have been emitted. Operands 2-4 contain the input values
12702 int the correct order; operands 5-7 contain the output values. */
12703
12704 void
12705 ix86_split_long_move (rtx operands[])
12706 {
12707 rtx part[2][3];
12708 int nparts;
12709 int push = 0;
12710 int collisions = 0;
12711 enum machine_mode mode = GET_MODE (operands[0]);
12712
12713 /* The DFmode expanders may ask us to move double.
12714 For 64bit target this is single move. By hiding the fact
12715 here we simplify i386.md splitters. */
12716 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12717 {
12718 /* Optimize constant pool reference to immediates. This is used by
12719 fp moves, that force all constants to memory to allow combining. */
12720
12721 if (MEM_P (operands[1])
12722 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12723 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12724 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12725 if (push_operand (operands[0], VOIDmode))
12726 {
12727 operands[0] = copy_rtx (operands[0]);
12728 PUT_MODE (operands[0], Pmode);
12729 }
12730 else
12731 operands[0] = gen_lowpart (DImode, operands[0]);
12732 operands[1] = gen_lowpart (DImode, operands[1]);
12733 emit_move_insn (operands[0], operands[1]);
12734 return;
12735 }
12736
12737 /* The only non-offsettable memory we handle is push. */
12738 if (push_operand (operands[0], VOIDmode))
12739 push = 1;
12740 else
12741 gcc_assert (!MEM_P (operands[0])
12742 || offsettable_memref_p (operands[0]));
12743
12744 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12745 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12746
12747 /* When emitting push, take care for source operands on the stack. */
12748 if (push && MEM_P (operands[1])
12749 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12750 {
12751 if (nparts == 3)
12752 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12753 XEXP (part[1][2], 0));
12754 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12755 XEXP (part[1][1], 0));
12756 }
12757
12758 /* We need to do copy in the right order in case an address register
12759 of the source overlaps the destination. */
12760 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12761 {
12762 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12763 collisions++;
12764 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12765 collisions++;
12766 if (nparts == 3
12767 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12768 collisions++;
12769
12770 /* Collision in the middle part can be handled by reordering. */
12771 if (collisions == 1 && nparts == 3
12772 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12773 {
12774 rtx tmp;
12775 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12776 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12777 }
12778
12779 /* If there are more collisions, we can't handle it by reordering.
12780 Do an lea to the last part and use only one colliding move. */
12781 else if (collisions > 1)
12782 {
12783 rtx base;
12784
12785 collisions = 1;
12786
12787 base = part[0][nparts - 1];
12788
12789 /* Handle the case when the last part isn't valid for lea.
12790 Happens in 64-bit mode storing the 12-byte XFmode. */
12791 if (GET_MODE (base) != Pmode)
12792 base = gen_rtx_REG (Pmode, REGNO (base));
12793
12794 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12795 part[1][0] = replace_equiv_address (part[1][0], base);
12796 part[1][1] = replace_equiv_address (part[1][1],
12797 plus_constant (base, UNITS_PER_WORD));
12798 if (nparts == 3)
12799 part[1][2] = replace_equiv_address (part[1][2],
12800 plus_constant (base, 8));
12801 }
12802 }
12803
12804 if (push)
12805 {
12806 if (!TARGET_64BIT)
12807 {
12808 if (nparts == 3)
12809 {
12810 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12811 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12812 emit_move_insn (part[0][2], part[1][2]);
12813 }
12814 }
12815 else
12816 {
12817 /* In 64bit mode we don't have 32bit push available. In case this is
12818 register, it is OK - we will just use larger counterpart. We also
12819 retype memory - these comes from attempt to avoid REX prefix on
12820 moving of second half of TFmode value. */
12821 if (GET_MODE (part[1][1]) == SImode)
12822 {
12823 switch (GET_CODE (part[1][1]))
12824 {
12825 case MEM:
12826 part[1][1] = adjust_address (part[1][1], DImode, 0);
12827 break;
12828
12829 case REG:
12830 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12831 break;
12832
12833 default:
12834 gcc_unreachable ();
12835 }
12836
12837 if (GET_MODE (part[1][0]) == SImode)
12838 part[1][0] = part[1][1];
12839 }
12840 }
12841 emit_move_insn (part[0][1], part[1][1]);
12842 emit_move_insn (part[0][0], part[1][0]);
12843 return;
12844 }
12845
12846 /* Choose correct order to not overwrite the source before it is copied. */
12847 if ((REG_P (part[0][0])
12848 && REG_P (part[1][1])
12849 && (REGNO (part[0][0]) == REGNO (part[1][1])
12850 || (nparts == 3
12851 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12852 || (collisions > 0
12853 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12854 {
12855 if (nparts == 3)
12856 {
12857 operands[2] = part[0][2];
12858 operands[3] = part[0][1];
12859 operands[4] = part[0][0];
12860 operands[5] = part[1][2];
12861 operands[6] = part[1][1];
12862 operands[7] = part[1][0];
12863 }
12864 else
12865 {
12866 operands[2] = part[0][1];
12867 operands[3] = part[0][0];
12868 operands[5] = part[1][1];
12869 operands[6] = part[1][0];
12870 }
12871 }
12872 else
12873 {
12874 if (nparts == 3)
12875 {
12876 operands[2] = part[0][0];
12877 operands[3] = part[0][1];
12878 operands[4] = part[0][2];
12879 operands[5] = part[1][0];
12880 operands[6] = part[1][1];
12881 operands[7] = part[1][2];
12882 }
12883 else
12884 {
12885 operands[2] = part[0][0];
12886 operands[3] = part[0][1];
12887 operands[5] = part[1][0];
12888 operands[6] = part[1][1];
12889 }
12890 }
12891
12892 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12893 if (optimize_size)
12894 {
12895 if (CONST_INT_P (operands[5])
12896 && operands[5] != const0_rtx
12897 && REG_P (operands[2]))
12898 {
12899 if (CONST_INT_P (operands[6])
12900 && INTVAL (operands[6]) == INTVAL (operands[5]))
12901 operands[6] = operands[2];
12902
12903 if (nparts == 3
12904 && CONST_INT_P (operands[7])
12905 && INTVAL (operands[7]) == INTVAL (operands[5]))
12906 operands[7] = operands[2];
12907 }
12908
12909 if (nparts == 3
12910 && CONST_INT_P (operands[6])
12911 && operands[6] != const0_rtx
12912 && REG_P (operands[3])
12913 && CONST_INT_P (operands[7])
12914 && INTVAL (operands[7]) == INTVAL (operands[6]))
12915 operands[7] = operands[3];
12916 }
12917
12918 emit_move_insn (operands[2], operands[5]);
12919 emit_move_insn (operands[3], operands[6]);
12920 if (nparts == 3)
12921 emit_move_insn (operands[4], operands[7]);
12922
12923 return;
12924 }
12925
12926 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12927 left shift by a constant, either using a single shift or
12928 a sequence of add instructions. */
12929
12930 static void
12931 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12932 {
12933 if (count == 1)
12934 {
12935 emit_insn ((mode == DImode
12936 ? gen_addsi3
12937 : gen_adddi3) (operand, operand, operand));
12938 }
12939 else if (!optimize_size
12940 && count * ix86_cost->add <= ix86_cost->shift_const)
12941 {
12942 int i;
12943 for (i=0; i<count; i++)
12944 {
12945 emit_insn ((mode == DImode
12946 ? gen_addsi3
12947 : gen_adddi3) (operand, operand, operand));
12948 }
12949 }
12950 else
12951 emit_insn ((mode == DImode
12952 ? gen_ashlsi3
12953 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12954 }
12955
12956 void
12957 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12958 {
12959 rtx low[2], high[2];
12960 int count;
12961 const int single_width = mode == DImode ? 32 : 64;
12962
12963 if (CONST_INT_P (operands[2]))
12964 {
12965 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12966 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12967
12968 if (count >= single_width)
12969 {
12970 emit_move_insn (high[0], low[1]);
12971 emit_move_insn (low[0], const0_rtx);
12972
12973 if (count > single_width)
12974 ix86_expand_ashl_const (high[0], count - single_width, mode);
12975 }
12976 else
12977 {
12978 if (!rtx_equal_p (operands[0], operands[1]))
12979 emit_move_insn (operands[0], operands[1]);
12980 emit_insn ((mode == DImode
12981 ? gen_x86_shld_1
12982 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12983 ix86_expand_ashl_const (low[0], count, mode);
12984 }
12985 return;
12986 }
12987
12988 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12989
12990 if (operands[1] == const1_rtx)
12991 {
12992 /* Assuming we've chosen a QImode capable registers, then 1 << N
12993 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12994 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12995 {
12996 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12997
12998 ix86_expand_clear (low[0]);
12999 ix86_expand_clear (high[0]);
13000 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13001
13002 d = gen_lowpart (QImode, low[0]);
13003 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13004 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13005 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13006
13007 d = gen_lowpart (QImode, high[0]);
13008 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13009 s = gen_rtx_NE (QImode, flags, const0_rtx);
13010 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13011 }
13012
13013 /* Otherwise, we can get the same results by manually performing
13014 a bit extract operation on bit 5/6, and then performing the two
13015 shifts. The two methods of getting 0/1 into low/high are exactly
13016 the same size. Avoiding the shift in the bit extract case helps
13017 pentium4 a bit; no one else seems to care much either way. */
13018 else
13019 {
13020 rtx x;
13021
13022 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13023 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13024 else
13025 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13026 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13027
13028 emit_insn ((mode == DImode
13029 ? gen_lshrsi3
13030 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13031 emit_insn ((mode == DImode
13032 ? gen_andsi3
13033 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13034 emit_move_insn (low[0], high[0]);
13035 emit_insn ((mode == DImode
13036 ? gen_xorsi3
13037 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13038 }
13039
13040 emit_insn ((mode == DImode
13041 ? gen_ashlsi3
13042 : gen_ashldi3) (low[0], low[0], operands[2]));
13043 emit_insn ((mode == DImode
13044 ? gen_ashlsi3
13045 : gen_ashldi3) (high[0], high[0], operands[2]));
13046 return;
13047 }
13048
13049 if (operands[1] == constm1_rtx)
13050 {
13051 /* For -1 << N, we can avoid the shld instruction, because we
13052 know that we're shifting 0...31/63 ones into a -1. */
13053 emit_move_insn (low[0], constm1_rtx);
13054 if (optimize_size)
13055 emit_move_insn (high[0], low[0]);
13056 else
13057 emit_move_insn (high[0], constm1_rtx);
13058 }
13059 else
13060 {
13061 if (!rtx_equal_p (operands[0], operands[1]))
13062 emit_move_insn (operands[0], operands[1]);
13063
13064 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13065 emit_insn ((mode == DImode
13066 ? gen_x86_shld_1
13067 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13068 }
13069
13070 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13071
13072 if (TARGET_CMOVE && scratch)
13073 {
13074 ix86_expand_clear (scratch);
13075 emit_insn ((mode == DImode
13076 ? gen_x86_shift_adj_1
13077 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13078 }
13079 else
13080 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13081 }
13082
13083 void
13084 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13085 {
13086 rtx low[2], high[2];
13087 int count;
13088 const int single_width = mode == DImode ? 32 : 64;
13089
13090 if (CONST_INT_P (operands[2]))
13091 {
13092 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13093 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13094
13095 if (count == single_width * 2 - 1)
13096 {
13097 emit_move_insn (high[0], high[1]);
13098 emit_insn ((mode == DImode
13099 ? gen_ashrsi3
13100 : gen_ashrdi3) (high[0], high[0],
13101 GEN_INT (single_width - 1)));
13102 emit_move_insn (low[0], high[0]);
13103
13104 }
13105 else if (count >= single_width)
13106 {
13107 emit_move_insn (low[0], high[1]);
13108 emit_move_insn (high[0], low[0]);
13109 emit_insn ((mode == DImode
13110 ? gen_ashrsi3
13111 : gen_ashrdi3) (high[0], high[0],
13112 GEN_INT (single_width - 1)));
13113 if (count > single_width)
13114 emit_insn ((mode == DImode
13115 ? gen_ashrsi3
13116 : gen_ashrdi3) (low[0], low[0],
13117 GEN_INT (count - single_width)));
13118 }
13119 else
13120 {
13121 if (!rtx_equal_p (operands[0], operands[1]))
13122 emit_move_insn (operands[0], operands[1]);
13123 emit_insn ((mode == DImode
13124 ? gen_x86_shrd_1
13125 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13126 emit_insn ((mode == DImode
13127 ? gen_ashrsi3
13128 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13129 }
13130 }
13131 else
13132 {
13133 if (!rtx_equal_p (operands[0], operands[1]))
13134 emit_move_insn (operands[0], operands[1]);
13135
13136 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13137
13138 emit_insn ((mode == DImode
13139 ? gen_x86_shrd_1
13140 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13141 emit_insn ((mode == DImode
13142 ? gen_ashrsi3
13143 : gen_ashrdi3) (high[0], high[0], operands[2]));
13144
13145 if (TARGET_CMOVE && scratch)
13146 {
13147 emit_move_insn (scratch, high[0]);
13148 emit_insn ((mode == DImode
13149 ? gen_ashrsi3
13150 : gen_ashrdi3) (scratch, scratch,
13151 GEN_INT (single_width - 1)));
13152 emit_insn ((mode == DImode
13153 ? gen_x86_shift_adj_1
13154 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13155 scratch));
13156 }
13157 else
13158 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13159 }
13160 }
13161
13162 void
13163 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13164 {
13165 rtx low[2], high[2];
13166 int count;
13167 const int single_width = mode == DImode ? 32 : 64;
13168
13169 if (CONST_INT_P (operands[2]))
13170 {
13171 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13172 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13173
13174 if (count >= single_width)
13175 {
13176 emit_move_insn (low[0], high[1]);
13177 ix86_expand_clear (high[0]);
13178
13179 if (count > single_width)
13180 emit_insn ((mode == DImode
13181 ? gen_lshrsi3
13182 : gen_lshrdi3) (low[0], low[0],
13183 GEN_INT (count - single_width)));
13184 }
13185 else
13186 {
13187 if (!rtx_equal_p (operands[0], operands[1]))
13188 emit_move_insn (operands[0], operands[1]);
13189 emit_insn ((mode == DImode
13190 ? gen_x86_shrd_1
13191 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13192 emit_insn ((mode == DImode
13193 ? gen_lshrsi3
13194 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13195 }
13196 }
13197 else
13198 {
13199 if (!rtx_equal_p (operands[0], operands[1]))
13200 emit_move_insn (operands[0], operands[1]);
13201
13202 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13203
13204 emit_insn ((mode == DImode
13205 ? gen_x86_shrd_1
13206 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13207 emit_insn ((mode == DImode
13208 ? gen_lshrsi3
13209 : gen_lshrdi3) (high[0], high[0], operands[2]));
13210
13211 /* Heh. By reversing the arguments, we can reuse this pattern. */
13212 if (TARGET_CMOVE && scratch)
13213 {
13214 ix86_expand_clear (scratch);
13215 emit_insn ((mode == DImode
13216 ? gen_x86_shift_adj_1
13217 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13218 scratch));
13219 }
13220 else
13221 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13222 }
13223 }
13224
13225 /* Predict just emitted jump instruction to be taken with probability PROB. */
13226 static void
13227 predict_jump (int prob)
13228 {
13229 rtx insn = get_last_insn ();
13230 gcc_assert (JUMP_P (insn));
13231 REG_NOTES (insn)
13232 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13233 GEN_INT (prob),
13234 REG_NOTES (insn));
13235 }
13236
13237 /* Helper function for the string operations below. Dest VARIABLE whether
13238 it is aligned to VALUE bytes. If true, jump to the label. */
13239 static rtx
13240 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13241 {
13242 rtx label = gen_label_rtx ();
13243 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13244 if (GET_MODE (variable) == DImode)
13245 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13246 else
13247 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13248 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13249 1, label);
13250 if (epilogue)
13251 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13252 else
13253 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13254 return label;
13255 }
13256
13257 /* Adjust COUNTER by the VALUE. */
13258 static void
13259 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13260 {
13261 if (GET_MODE (countreg) == DImode)
13262 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13263 else
13264 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13265 }
13266
13267 /* Zero extend possibly SImode EXP to Pmode register. */
13268 rtx
13269 ix86_zero_extend_to_Pmode (rtx exp)
13270 {
13271 rtx r;
13272 if (GET_MODE (exp) == VOIDmode)
13273 return force_reg (Pmode, exp);
13274 if (GET_MODE (exp) == Pmode)
13275 return copy_to_mode_reg (Pmode, exp);
13276 r = gen_reg_rtx (Pmode);
13277 emit_insn (gen_zero_extendsidi2 (r, exp));
13278 return r;
13279 }
13280
13281 /* Divide COUNTREG by SCALE. */
13282 static rtx
13283 scale_counter (rtx countreg, int scale)
13284 {
13285 rtx sc;
13286 rtx piece_size_mask;
13287
13288 if (scale == 1)
13289 return countreg;
13290 if (CONST_INT_P (countreg))
13291 return GEN_INT (INTVAL (countreg) / scale);
13292 gcc_assert (REG_P (countreg));
13293
13294 piece_size_mask = GEN_INT (scale - 1);
13295 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13296 GEN_INT (exact_log2 (scale)),
13297 NULL, 1, OPTAB_DIRECT);
13298 return sc;
13299 }
13300
13301 /* When SRCPTR is non-NULL, output simple loop to move memory
13302 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13303 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13304 equivalent loop to set memory by VALUE (supposed to be in MODE).
13305
13306 The size is rounded down to whole number of chunk size moved at once.
13307 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13308
13309
13310 static void
13311 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13312 rtx destptr, rtx srcptr, rtx value,
13313 rtx count, enum machine_mode mode, int unroll,
13314 int expected_size)
13315 {
13316 rtx out_label, top_label, iter, tmp;
13317 enum machine_mode iter_mode;
13318 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13319 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13320 rtx size;
13321 rtx x_addr;
13322 rtx y_addr;
13323 int i;
13324
13325 iter_mode = GET_MODE (count);
13326 if (iter_mode == VOIDmode)
13327 iter_mode = word_mode;
13328
13329 top_label = gen_label_rtx ();
13330 out_label = gen_label_rtx ();
13331 iter = gen_reg_rtx (iter_mode);
13332
13333 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13334 NULL, 1, OPTAB_DIRECT);
13335 /* Those two should combine. */
13336 if (piece_size == const1_rtx)
13337 {
13338 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13339 true, out_label);
13340 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13341 }
13342 emit_move_insn (iter, const0_rtx);
13343
13344 emit_label (top_label);
13345
13346 tmp = convert_modes (Pmode, iter_mode, iter, true);
13347 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13348 destmem = change_address (destmem, mode, x_addr);
13349
13350 if (srcmem)
13351 {
13352 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13353 srcmem = change_address (srcmem, mode, y_addr);
13354
13355 /* When unrolling for chips that reorder memory reads and writes,
13356 we can save registers by using single temporary.
13357 Also using 4 temporaries is overkill in 32bit mode. */
13358 if (!TARGET_64BIT && 0)
13359 {
13360 for (i = 0; i < unroll; i++)
13361 {
13362 if (i)
13363 {
13364 destmem =
13365 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13366 srcmem =
13367 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13368 }
13369 emit_move_insn (destmem, srcmem);
13370 }
13371 }
13372 else
13373 {
13374 rtx tmpreg[4];
13375 gcc_assert (unroll <= 4);
13376 for (i = 0; i < unroll; i++)
13377 {
13378 tmpreg[i] = gen_reg_rtx (mode);
13379 if (i)
13380 {
13381 srcmem =
13382 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13383 }
13384 emit_move_insn (tmpreg[i], srcmem);
13385 }
13386 for (i = 0; i < unroll; i++)
13387 {
13388 if (i)
13389 {
13390 destmem =
13391 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13392 }
13393 emit_move_insn (destmem, tmpreg[i]);
13394 }
13395 }
13396 }
13397 else
13398 for (i = 0; i < unroll; i++)
13399 {
13400 if (i)
13401 destmem =
13402 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13403 emit_move_insn (destmem, value);
13404 }
13405
13406 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13407 true, OPTAB_LIB_WIDEN);
13408 if (tmp != iter)
13409 emit_move_insn (iter, tmp);
13410
13411 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13412 true, top_label);
13413 if (expected_size != -1)
13414 {
13415 expected_size /= GET_MODE_SIZE (mode) * unroll;
13416 if (expected_size == 0)
13417 predict_jump (0);
13418 else if (expected_size > REG_BR_PROB_BASE)
13419 predict_jump (REG_BR_PROB_BASE - 1);
13420 else
13421 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13422 }
13423 else
13424 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13425 iter = ix86_zero_extend_to_Pmode (iter);
13426 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13427 true, OPTAB_LIB_WIDEN);
13428 if (tmp != destptr)
13429 emit_move_insn (destptr, tmp);
13430 if (srcptr)
13431 {
13432 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13433 true, OPTAB_LIB_WIDEN);
13434 if (tmp != srcptr)
13435 emit_move_insn (srcptr, tmp);
13436 }
13437 emit_label (out_label);
13438 }
13439
13440 /* Output "rep; mov" instruction.
13441 Arguments have same meaning as for previous function */
13442 static void
13443 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13444 rtx destptr, rtx srcptr,
13445 rtx count,
13446 enum machine_mode mode)
13447 {
13448 rtx destexp;
13449 rtx srcexp;
13450 rtx countreg;
13451
13452 /* If the size is known, it is shorter to use rep movs. */
13453 if (mode == QImode && CONST_INT_P (count)
13454 && !(INTVAL (count) & 3))
13455 mode = SImode;
13456
13457 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13458 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13459 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13460 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13461 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13462 if (mode != QImode)
13463 {
13464 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13465 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13466 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13467 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13468 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13469 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13470 }
13471 else
13472 {
13473 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13474 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13475 }
13476 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13477 destexp, srcexp));
13478 }
13479
13480 /* Output "rep; stos" instruction.
13481 Arguments have same meaning as for previous function */
13482 static void
13483 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13484 rtx count,
13485 enum machine_mode mode)
13486 {
13487 rtx destexp;
13488 rtx countreg;
13489
13490 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13491 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13492 value = force_reg (mode, gen_lowpart (mode, value));
13493 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13494 if (mode != QImode)
13495 {
13496 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13497 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13498 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13499 }
13500 else
13501 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13502 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13503 }
13504
13505 static void
13506 emit_strmov (rtx destmem, rtx srcmem,
13507 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13508 {
13509 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13510 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13511 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13512 }
13513
13514 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13515 static void
13516 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13517 rtx destptr, rtx srcptr, rtx count, int max_size)
13518 {
13519 rtx src, dest;
13520 if (CONST_INT_P (count))
13521 {
13522 HOST_WIDE_INT countval = INTVAL (count);
13523 int offset = 0;
13524
13525 if ((countval & 0x16) && max_size > 16)
13526 {
13527 if (TARGET_64BIT)
13528 {
13529 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13530 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13531 }
13532 else
13533 gcc_unreachable ();
13534 offset += 16;
13535 }
13536 if ((countval & 0x08) && max_size > 8)
13537 {
13538 if (TARGET_64BIT)
13539 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13540 else
13541 {
13542 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13543 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 4);
13544 }
13545 offset += 8;
13546 }
13547 if ((countval & 0x04) && max_size > 4)
13548 {
13549 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13550 offset += 4;
13551 }
13552 if ((countval & 0x02) && max_size > 2)
13553 {
13554 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13555 offset += 2;
13556 }
13557 if ((countval & 0x01) && max_size > 1)
13558 {
13559 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13560 offset += 1;
13561 }
13562 return;
13563 }
13564 if (max_size > 8)
13565 {
13566 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13567 count, 1, OPTAB_DIRECT);
13568 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13569 count, QImode, 1, 4);
13570 return;
13571 }
13572
13573 /* When there are stringops, we can cheaply increase dest and src pointers.
13574 Otherwise we save code size by maintaining offset (zero is readily
13575 available from preceding rep operation) and using x86 addressing modes.
13576 */
13577 if (TARGET_SINGLE_STRINGOP)
13578 {
13579 if (max_size > 4)
13580 {
13581 rtx label = ix86_expand_aligntest (count, 4, true);
13582 src = change_address (srcmem, SImode, srcptr);
13583 dest = change_address (destmem, SImode, destptr);
13584 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13585 emit_label (label);
13586 LABEL_NUSES (label) = 1;
13587 }
13588 if (max_size > 2)
13589 {
13590 rtx label = ix86_expand_aligntest (count, 2, true);
13591 src = change_address (srcmem, HImode, srcptr);
13592 dest = change_address (destmem, HImode, destptr);
13593 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13594 emit_label (label);
13595 LABEL_NUSES (label) = 1;
13596 }
13597 if (max_size > 1)
13598 {
13599 rtx label = ix86_expand_aligntest (count, 1, true);
13600 src = change_address (srcmem, QImode, srcptr);
13601 dest = change_address (destmem, QImode, destptr);
13602 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13603 emit_label (label);
13604 LABEL_NUSES (label) = 1;
13605 }
13606 }
13607 else
13608 {
13609 rtx offset = force_reg (Pmode, const0_rtx);
13610 rtx tmp;
13611
13612 if (max_size > 4)
13613 {
13614 rtx label = ix86_expand_aligntest (count, 4, true);
13615 src = change_address (srcmem, SImode, srcptr);
13616 dest = change_address (destmem, SImode, destptr);
13617 emit_move_insn (dest, src);
13618 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13619 true, OPTAB_LIB_WIDEN);
13620 if (tmp != offset)
13621 emit_move_insn (offset, tmp);
13622 emit_label (label);
13623 LABEL_NUSES (label) = 1;
13624 }
13625 if (max_size > 2)
13626 {
13627 rtx label = ix86_expand_aligntest (count, 2, true);
13628 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13629 src = change_address (srcmem, HImode, tmp);
13630 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13631 dest = change_address (destmem, HImode, tmp);
13632 emit_move_insn (dest, src);
13633 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13634 true, OPTAB_LIB_WIDEN);
13635 if (tmp != offset)
13636 emit_move_insn (offset, tmp);
13637 emit_label (label);
13638 LABEL_NUSES (label) = 1;
13639 }
13640 if (max_size > 1)
13641 {
13642 rtx label = ix86_expand_aligntest (count, 1, true);
13643 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13644 src = change_address (srcmem, QImode, tmp);
13645 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13646 dest = change_address (destmem, QImode, tmp);
13647 emit_move_insn (dest, src);
13648 emit_label (label);
13649 LABEL_NUSES (label) = 1;
13650 }
13651 }
13652 }
13653
13654 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13655 static void
13656 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13657 rtx count, int max_size)
13658 {
13659 count =
13660 expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13661 count, 1, OPTAB_DIRECT);
13662 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13663 gen_lowpart (QImode, value), count, QImode,
13664 1, max_size / 2);
13665 }
13666
13667 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13668 static void
13669 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13670 {
13671 rtx dest;
13672
13673 if (CONST_INT_P (count))
13674 {
13675 HOST_WIDE_INT countval = INTVAL (count);
13676 int offset = 0;
13677
13678 if ((countval & 0x16) && max_size > 16)
13679 {
13680 if (TARGET_64BIT)
13681 {
13682 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13683 emit_insn (gen_strset (destptr, dest, value));
13684 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13685 emit_insn (gen_strset (destptr, dest, value));
13686 }
13687 else
13688 gcc_unreachable ();
13689 offset += 16;
13690 }
13691 if ((countval & 0x08) && max_size > 8)
13692 {
13693 if (TARGET_64BIT)
13694 {
13695 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13696 emit_insn (gen_strset (destptr, dest, value));
13697 }
13698 else
13699 {
13700 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13701 emit_insn (gen_strset (destptr, dest, value));
13702 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13703 emit_insn (gen_strset (destptr, dest, value));
13704 }
13705 offset += 8;
13706 }
13707 if ((countval & 0x04) && max_size > 4)
13708 {
13709 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13710 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13711 offset += 4;
13712 }
13713 if ((countval & 0x02) && max_size > 2)
13714 {
13715 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13716 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13717 offset += 2;
13718 }
13719 if ((countval & 0x01) && max_size > 1)
13720 {
13721 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13722 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13723 offset += 1;
13724 }
13725 return;
13726 }
13727 if (max_size > 32)
13728 {
13729 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13730 return;
13731 }
13732 if (max_size > 16)
13733 {
13734 rtx label = ix86_expand_aligntest (count, 16, true);
13735 if (TARGET_64BIT)
13736 {
13737 dest = change_address (destmem, DImode, destptr);
13738 emit_insn (gen_strset (destptr, dest, value));
13739 emit_insn (gen_strset (destptr, dest, value));
13740 }
13741 else
13742 {
13743 dest = change_address (destmem, SImode, destptr);
13744 emit_insn (gen_strset (destptr, dest, value));
13745 emit_insn (gen_strset (destptr, dest, value));
13746 emit_insn (gen_strset (destptr, dest, value));
13747 emit_insn (gen_strset (destptr, dest, value));
13748 }
13749 emit_label (label);
13750 LABEL_NUSES (label) = 1;
13751 }
13752 if (max_size > 8)
13753 {
13754 rtx label = ix86_expand_aligntest (count, 8, true);
13755 if (TARGET_64BIT)
13756 {
13757 dest = change_address (destmem, DImode, destptr);
13758 emit_insn (gen_strset (destptr, dest, value));
13759 }
13760 else
13761 {
13762 dest = change_address (destmem, SImode, destptr);
13763 emit_insn (gen_strset (destptr, dest, value));
13764 emit_insn (gen_strset (destptr, dest, value));
13765 }
13766 emit_label (label);
13767 LABEL_NUSES (label) = 1;
13768 }
13769 if (max_size > 4)
13770 {
13771 rtx label = ix86_expand_aligntest (count, 4, true);
13772 dest = change_address (destmem, SImode, destptr);
13773 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13774 emit_label (label);
13775 LABEL_NUSES (label) = 1;
13776 }
13777 if (max_size > 2)
13778 {
13779 rtx label = ix86_expand_aligntest (count, 2, true);
13780 dest = change_address (destmem, HImode, destptr);
13781 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13782 emit_label (label);
13783 LABEL_NUSES (label) = 1;
13784 }
13785 if (max_size > 1)
13786 {
13787 rtx label = ix86_expand_aligntest (count, 1, true);
13788 dest = change_address (destmem, QImode, destptr);
13789 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13790 emit_label (label);
13791 LABEL_NUSES (label) = 1;
13792 }
13793 }
13794
13795 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13796 DESIRED_ALIGNMENT. */
13797 static void
13798 expand_movmem_prologue (rtx destmem, rtx srcmem,
13799 rtx destptr, rtx srcptr, rtx count,
13800 int align, int desired_alignment)
13801 {
13802 if (align <= 1 && desired_alignment > 1)
13803 {
13804 rtx label = ix86_expand_aligntest (destptr, 1, false);
13805 srcmem = change_address (srcmem, QImode, srcptr);
13806 destmem = change_address (destmem, QImode, destptr);
13807 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13808 ix86_adjust_counter (count, 1);
13809 emit_label (label);
13810 LABEL_NUSES (label) = 1;
13811 }
13812 if (align <= 2 && desired_alignment > 2)
13813 {
13814 rtx label = ix86_expand_aligntest (destptr, 2, false);
13815 srcmem = change_address (srcmem, HImode, srcptr);
13816 destmem = change_address (destmem, HImode, destptr);
13817 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13818 ix86_adjust_counter (count, 2);
13819 emit_label (label);
13820 LABEL_NUSES (label) = 1;
13821 }
13822 if (align <= 4 && desired_alignment > 4)
13823 {
13824 rtx label = ix86_expand_aligntest (destptr, 4, false);
13825 srcmem = change_address (srcmem, SImode, srcptr);
13826 destmem = change_address (destmem, SImode, destptr);
13827 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13828 ix86_adjust_counter (count, 4);
13829 emit_label (label);
13830 LABEL_NUSES (label) = 1;
13831 }
13832 gcc_assert (desired_alignment <= 8);
13833 }
13834
13835 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13836 DESIRED_ALIGNMENT. */
13837 static void
13838 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13839 int align, int desired_alignment)
13840 {
13841 if (align <= 1 && desired_alignment > 1)
13842 {
13843 rtx label = ix86_expand_aligntest (destptr, 1, false);
13844 destmem = change_address (destmem, QImode, destptr);
13845 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13846 ix86_adjust_counter (count, 1);
13847 emit_label (label);
13848 LABEL_NUSES (label) = 1;
13849 }
13850 if (align <= 2 && desired_alignment > 2)
13851 {
13852 rtx label = ix86_expand_aligntest (destptr, 2, false);
13853 destmem = change_address (destmem, HImode, destptr);
13854 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13855 ix86_adjust_counter (count, 2);
13856 emit_label (label);
13857 LABEL_NUSES (label) = 1;
13858 }
13859 if (align <= 4 && desired_alignment > 4)
13860 {
13861 rtx label = ix86_expand_aligntest (destptr, 4, false);
13862 destmem = change_address (destmem, SImode, destptr);
13863 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13864 ix86_adjust_counter (count, 4);
13865 emit_label (label);
13866 LABEL_NUSES (label) = 1;
13867 }
13868 gcc_assert (desired_alignment <= 8);
13869 }
13870
13871 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13872 static enum stringop_alg
13873 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13874 int *dynamic_check)
13875 {
13876 const struct stringop_algs * algs;
13877
13878 *dynamic_check = -1;
13879 if (memset)
13880 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13881 else
13882 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13883 if (stringop_alg != no_stringop)
13884 return stringop_alg;
13885 /* rep; movq or rep; movl is the smallest variant. */
13886 else if (optimize_size)
13887 {
13888 if (!count || (count & 3))
13889 return rep_prefix_1_byte;
13890 else
13891 return rep_prefix_4_byte;
13892 }
13893 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13894 */
13895 else if (expected_size != -1 && expected_size < 4)
13896 return loop_1_byte;
13897 else if (expected_size != -1)
13898 {
13899 unsigned int i;
13900 enum stringop_alg alg = libcall;
13901 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13902 {
13903 gcc_assert (algs->size[i].max);
13904 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13905 {
13906 if (algs->size[i].alg != libcall)
13907 alg = algs->size[i].alg;
13908 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13909 last non-libcall inline algorithm. */
13910 if (TARGET_INLINE_ALL_STRINGOPS)
13911 {
13912 /* When the current size is best to be copied by a libcall,
13913 but we are still forced to inline, run the heuristic bellow
13914 that will pick code for medium sized blocks. */
13915 if (alg != libcall)
13916 return alg;
13917 break;
13918 }
13919 else
13920 return algs->size[i].alg;
13921 }
13922 }
13923 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13924 }
13925 /* When asked to inline the call anyway, try to pick meaningful choice.
13926 We look for maximal size of block that is faster to copy by hand and
13927 take blocks of at most of that size guessing that average size will
13928 be roughly half of the block.
13929
13930 If this turns out to be bad, we might simply specify the preferred
13931 choice in ix86_costs. */
13932 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13933 && algs->unknown_size == libcall)
13934 {
13935 int max = -1;
13936 enum stringop_alg alg;
13937 int i;
13938
13939 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13940 if (algs->size[i].alg != libcall && algs->size[i].alg)
13941 max = algs->size[i].max;
13942 if (max == -1)
13943 max = 4096;
13944 alg = decide_alg (count, max / 2, memset, dynamic_check);
13945 gcc_assert (*dynamic_check == -1);
13946 gcc_assert (alg != libcall);
13947 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13948 *dynamic_check = max;
13949 return alg;
13950 }
13951 return algs->unknown_size;
13952 }
13953
13954 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13955 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13956 static int
13957 decide_alignment (int align,
13958 enum stringop_alg alg,
13959 int expected_size)
13960 {
13961 int desired_align = 0;
13962 switch (alg)
13963 {
13964 case no_stringop:
13965 gcc_unreachable ();
13966 case loop:
13967 case unrolled_loop:
13968 desired_align = GET_MODE_SIZE (Pmode);
13969 break;
13970 case rep_prefix_8_byte:
13971 desired_align = 8;
13972 break;
13973 case rep_prefix_4_byte:
13974 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13975 copying whole cacheline at once. */
13976 if (TARGET_PENTIUMPRO)
13977 desired_align = 8;
13978 else
13979 desired_align = 4;
13980 break;
13981 case rep_prefix_1_byte:
13982 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13983 copying whole cacheline at once. */
13984 if (TARGET_PENTIUMPRO)
13985 desired_align = 8;
13986 else
13987 desired_align = 1;
13988 break;
13989 case loop_1_byte:
13990 desired_align = 1;
13991 break;
13992 case libcall:
13993 return 0;
13994 }
13995
13996 if (optimize_size)
13997 desired_align = 1;
13998 if (desired_align < align)
13999 desired_align = align;
14000 if (expected_size != -1 && expected_size < 4)
14001 desired_align = align;
14002 return desired_align;
14003 }
14004
14005 /* Return the smallest power of 2 greater than VAL. */
14006 static int
14007 smallest_pow2_greater_than (int val)
14008 {
14009 int ret = 1;
14010 while (ret <= val)
14011 ret <<= 1;
14012 return ret;
14013 }
14014
14015 /* Expand string move (memcpy) operation. Use i386 string operations when
14016 profitable. expand_clrmem contains similar code. The code depends upon
14017 architecture, block size and alignment, but always has the same
14018 overall structure:
14019
14020 1) Prologue guard: Conditional that jumps up to epilogues for small
14021 blocks that can be handled by epilogue alone. This is faster but
14022 also needed for correctness, since prologue assume the block is larger
14023 than the desired alignment.
14024
14025 Optional dynamic check for size and libcall for large
14026 blocks is emitted here too, with -minline-stringops-dynamically.
14027
14028 2) Prologue: copy first few bytes in order to get destination aligned
14029 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14030 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14031 We emit either a jump tree on power of two sized blocks, or a byte loop.
14032
14033 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14034 with specified algorithm.
14035
14036 4) Epilogue: code copying tail of the block that is too small to be
14037 handled by main body (or up to size guarded by prologue guard). */
14038
14039 int
14040 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14041 rtx expected_align_exp, rtx expected_size_exp)
14042 {
14043 rtx destreg;
14044 rtx srcreg;
14045 rtx label = NULL;
14046 rtx tmp;
14047 rtx jump_around_label = NULL;
14048 HOST_WIDE_INT align = 1;
14049 unsigned HOST_WIDE_INT count = 0;
14050 HOST_WIDE_INT expected_size = -1;
14051 int size_needed = 0, epilogue_size_needed;
14052 int desired_align = 0;
14053 enum stringop_alg alg;
14054 int dynamic_check;
14055
14056 if (CONST_INT_P (align_exp))
14057 align = INTVAL (align_exp);
14058 /* i386 can do misaligned access on reasonably increased cost. */
14059 if (CONST_INT_P (expected_align_exp)
14060 && INTVAL (expected_align_exp) > align)
14061 align = INTVAL (expected_align_exp);
14062 if (CONST_INT_P (count_exp))
14063 count = expected_size = INTVAL (count_exp);
14064 if (CONST_INT_P (expected_size_exp) && count == 0)
14065 expected_size = INTVAL (expected_size_exp);
14066
14067 /* Step 0: Decide on preferred algorithm, desired alignment and
14068 size of chunks to be copied by main loop. */
14069
14070 alg = decide_alg (count, expected_size, false, &dynamic_check);
14071 desired_align = decide_alignment (align, alg, expected_size);
14072
14073 if (!TARGET_ALIGN_STRINGOPS)
14074 align = desired_align;
14075
14076 if (alg == libcall)
14077 return 0;
14078 gcc_assert (alg != no_stringop);
14079 if (!count)
14080 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14081 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14082 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14083 switch (alg)
14084 {
14085 case libcall:
14086 case no_stringop:
14087 gcc_unreachable ();
14088 case loop:
14089 size_needed = GET_MODE_SIZE (Pmode);
14090 break;
14091 case unrolled_loop:
14092 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14093 break;
14094 case rep_prefix_8_byte:
14095 size_needed = 8;
14096 break;
14097 case rep_prefix_4_byte:
14098 size_needed = 4;
14099 break;
14100 case rep_prefix_1_byte:
14101 case loop_1_byte:
14102 size_needed = 1;
14103 break;
14104 }
14105
14106 epilogue_size_needed = size_needed;
14107
14108 /* Step 1: Prologue guard. */
14109
14110 /* Alignment code needs count to be in register. */
14111 if (CONST_INT_P (count_exp) && desired_align > align)
14112 {
14113 enum machine_mode mode = SImode;
14114 if (TARGET_64BIT && (count & ~0xffffffff))
14115 mode = DImode;
14116 count_exp = force_reg (mode, count_exp);
14117 }
14118 gcc_assert (desired_align >= 1 && align >= 1);
14119
14120 /* Ensure that alignment prologue won't copy past end of block. */
14121 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
14122 && !count)
14123 {
14124 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14125
14126 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14127 Make sure it is power of 2. */
14128 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14129
14130 label = gen_label_rtx ();
14131 emit_cmp_and_jump_insns (count_exp,
14132 GEN_INT (epilogue_size_needed),
14133 LTU, 0, GET_MODE (count_exp), 1, label);
14134 if (expected_size == -1 || expected_size < epilogue_size_needed)
14135 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14136 else
14137 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14138 }
14139 /* Emit code to decide on runtime whether library call or inline should be
14140 used. */
14141 if (dynamic_check != -1)
14142 {
14143 rtx hot_label = gen_label_rtx ();
14144 jump_around_label = gen_label_rtx ();
14145 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14146 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14147 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14148 emit_block_move_via_libcall (dst, src, count_exp, false);
14149 emit_jump (jump_around_label);
14150 emit_label (hot_label);
14151 }
14152
14153 /* Step 2: Alignment prologue. */
14154
14155 if (desired_align > align)
14156 {
14157 /* Except for the first move in epilogue, we no longer know
14158 constant offset in aliasing info. It don't seems to worth
14159 the pain to maintain it for the first move, so throw away
14160 the info early. */
14161 src = change_address (src, BLKmode, srcreg);
14162 dst = change_address (dst, BLKmode, destreg);
14163 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14164 desired_align);
14165 }
14166 if (label && size_needed == 1)
14167 {
14168 emit_label (label);
14169 LABEL_NUSES (label) = 1;
14170 label = NULL;
14171 }
14172
14173 /* Step 3: Main loop. */
14174
14175 switch (alg)
14176 {
14177 case libcall:
14178 case no_stringop:
14179 gcc_unreachable ();
14180 case loop_1_byte:
14181 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14182 count_exp, QImode, 1, expected_size);
14183 break;
14184 case loop:
14185 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14186 count_exp, Pmode, 1, expected_size);
14187 break;
14188 case unrolled_loop:
14189 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14190 registers for 4 temporaries anyway. */
14191 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14192 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14193 expected_size);
14194 break;
14195 case rep_prefix_8_byte:
14196 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14197 DImode);
14198 break;
14199 case rep_prefix_4_byte:
14200 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14201 SImode);
14202 break;
14203 case rep_prefix_1_byte:
14204 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14205 QImode);
14206 break;
14207 }
14208 /* Adjust properly the offset of src and dest memory for aliasing. */
14209 if (CONST_INT_P (count_exp))
14210 {
14211 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14212 (count / size_needed) * size_needed);
14213 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14214 (count / size_needed) * size_needed);
14215 }
14216 else
14217 {
14218 src = change_address (src, BLKmode, srcreg);
14219 dst = change_address (dst, BLKmode, destreg);
14220 }
14221
14222 /* Step 4: Epilogue to copy the remaining bytes. */
14223
14224 if (label)
14225 {
14226 /* When the main loop is done, COUNT_EXP might hold original count,
14227 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14228 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14229 bytes. Compensate if needed. */
14230
14231 if (size_needed < epilogue_size_needed)
14232 {
14233 tmp =
14234 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14235 GEN_INT (size_needed - 1), count_exp, 1,
14236 OPTAB_DIRECT);
14237 if (tmp != count_exp)
14238 emit_move_insn (count_exp, tmp);
14239 }
14240 emit_label (label);
14241 LABEL_NUSES (label) = 1;
14242 }
14243
14244 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14245 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14246 epilogue_size_needed);
14247 if (jump_around_label)
14248 emit_label (jump_around_label);
14249 return 1;
14250 }
14251
14252 /* Helper function for memcpy. For QImode value 0xXY produce
14253 0xXYXYXYXY of wide specified by MODE. This is essentially
14254 a * 0x10101010, but we can do slightly better than
14255 synth_mult by unwinding the sequence by hand on CPUs with
14256 slow multiply. */
14257 static rtx
14258 promote_duplicated_reg (enum machine_mode mode, rtx val)
14259 {
14260 enum machine_mode valmode = GET_MODE (val);
14261 rtx tmp;
14262 int nops = mode == DImode ? 3 : 2;
14263
14264 gcc_assert (mode == SImode || mode == DImode);
14265 if (val == const0_rtx)
14266 return copy_to_mode_reg (mode, const0_rtx);
14267 if (CONST_INT_P (val))
14268 {
14269 HOST_WIDE_INT v = INTVAL (val) & 255;
14270
14271 v |= v << 8;
14272 v |= v << 16;
14273 if (mode == DImode)
14274 v |= (v << 16) << 16;
14275 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14276 }
14277
14278 if (valmode == VOIDmode)
14279 valmode = QImode;
14280 if (valmode != QImode)
14281 val = gen_lowpart (QImode, val);
14282 if (mode == QImode)
14283 return val;
14284 if (!TARGET_PARTIAL_REG_STALL)
14285 nops--;
14286 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14287 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14288 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14289 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14290 {
14291 rtx reg = convert_modes (mode, QImode, val, true);
14292 tmp = promote_duplicated_reg (mode, const1_rtx);
14293 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14294 OPTAB_DIRECT);
14295 }
14296 else
14297 {
14298 rtx reg = convert_modes (mode, QImode, val, true);
14299
14300 if (!TARGET_PARTIAL_REG_STALL)
14301 if (mode == SImode)
14302 emit_insn (gen_movsi_insv_1 (reg, reg));
14303 else
14304 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14305 else
14306 {
14307 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14308 NULL, 1, OPTAB_DIRECT);
14309 reg =
14310 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14311 }
14312 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14313 NULL, 1, OPTAB_DIRECT);
14314 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14315 if (mode == SImode)
14316 return reg;
14317 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14318 NULL, 1, OPTAB_DIRECT);
14319 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14320 return reg;
14321 }
14322 }
14323
14324 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14325 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14326 alignment from ALIGN to DESIRED_ALIGN. */
14327 static rtx
14328 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14329 {
14330 rtx promoted_val;
14331
14332 if (TARGET_64BIT
14333 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14334 promoted_val = promote_duplicated_reg (DImode, val);
14335 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14336 promoted_val = promote_duplicated_reg (SImode, val);
14337 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14338 promoted_val = promote_duplicated_reg (HImode, val);
14339 else
14340 promoted_val = val;
14341
14342 return promoted_val;
14343 }
14344
14345 /* Expand string clear operation (bzero). Use i386 string operations when
14346 profitable. See expand_movmem comment for explanation of individual
14347 steps performed. */
14348 int
14349 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14350 rtx expected_align_exp, rtx expected_size_exp)
14351 {
14352 rtx destreg;
14353 rtx label = NULL;
14354 rtx tmp;
14355 rtx jump_around_label = NULL;
14356 HOST_WIDE_INT align = 1;
14357 unsigned HOST_WIDE_INT count = 0;
14358 HOST_WIDE_INT expected_size = -1;
14359 int size_needed = 0, epilogue_size_needed;
14360 int desired_align = 0;
14361 enum stringop_alg alg;
14362 rtx promoted_val = NULL;
14363 bool force_loopy_epilogue = false;
14364 int dynamic_check;
14365
14366 if (CONST_INT_P (align_exp))
14367 align = INTVAL (align_exp);
14368 /* i386 can do misaligned access on reasonably increased cost. */
14369 if (CONST_INT_P (expected_align_exp)
14370 && INTVAL (expected_align_exp) > align)
14371 align = INTVAL (expected_align_exp);
14372 if (CONST_INT_P (count_exp))
14373 count = expected_size = INTVAL (count_exp);
14374 if (CONST_INT_P (expected_size_exp) && count == 0)
14375 expected_size = INTVAL (expected_size_exp);
14376
14377 /* Step 0: Decide on preferred algorithm, desired alignment and
14378 size of chunks to be copied by main loop. */
14379
14380 alg = decide_alg (count, expected_size, true, &dynamic_check);
14381 desired_align = decide_alignment (align, alg, expected_size);
14382
14383 if (!TARGET_ALIGN_STRINGOPS)
14384 align = desired_align;
14385
14386 if (alg == libcall)
14387 return 0;
14388 gcc_assert (alg != no_stringop);
14389 if (!count)
14390 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14391 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14392 switch (alg)
14393 {
14394 case libcall:
14395 case no_stringop:
14396 gcc_unreachable ();
14397 case loop:
14398 size_needed = GET_MODE_SIZE (Pmode);
14399 break;
14400 case unrolled_loop:
14401 size_needed = GET_MODE_SIZE (Pmode) * 4;
14402 break;
14403 case rep_prefix_8_byte:
14404 size_needed = 8;
14405 break;
14406 case rep_prefix_4_byte:
14407 size_needed = 4;
14408 break;
14409 case rep_prefix_1_byte:
14410 case loop_1_byte:
14411 size_needed = 1;
14412 break;
14413 }
14414 epilogue_size_needed = size_needed;
14415
14416 /* Step 1: Prologue guard. */
14417
14418 /* Alignment code needs count to be in register. */
14419 if (CONST_INT_P (count_exp) && desired_align > align)
14420 {
14421 enum machine_mode mode = SImode;
14422 if (TARGET_64BIT && (count & ~0xffffffff))
14423 mode = DImode;
14424 count_exp = force_reg (mode, count_exp);
14425 }
14426 /* Do the cheap promotion to allow better CSE across the
14427 main loop and epilogue (ie one load of the big constant in the
14428 front of all code. */
14429 if (CONST_INT_P (val_exp))
14430 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14431 desired_align, align);
14432 /* Ensure that alignment prologue won't copy past end of block. */
14433 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
14434 && !count)
14435 {
14436 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14437
14438 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14439 Make sure it is power of 2. */
14440 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14441
14442 /* To improve performance of small blocks, we jump around the VAL
14443 promoting mode. This mean that if the promoted VAL is not constant,
14444 we might not use it in the epilogue and have to use byte
14445 loop variant. */
14446 if (epilogue_size_needed > 2 && !promoted_val)
14447 force_loopy_epilogue = true;
14448 label = gen_label_rtx ();
14449 emit_cmp_and_jump_insns (count_exp,
14450 GEN_INT (epilogue_size_needed),
14451 LTU, 0, GET_MODE (count_exp), 1, label);
14452 if (expected_size == -1 || expected_size <= epilogue_size_needed)
14453 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14454 else
14455 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14456 }
14457 if (dynamic_check != -1)
14458 {
14459 rtx hot_label = gen_label_rtx ();
14460 jump_around_label = gen_label_rtx ();
14461 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14462 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14463 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14464 set_storage_via_libcall (dst, count_exp, val_exp, false);
14465 emit_jump (jump_around_label);
14466 emit_label (hot_label);
14467 }
14468
14469 /* Step 2: Alignment prologue. */
14470
14471 /* Do the expensive promotion once we branched off the small blocks. */
14472 if (!promoted_val)
14473 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14474 desired_align, align);
14475 gcc_assert (desired_align >= 1 && align >= 1);
14476
14477 if (desired_align > align)
14478 {
14479 /* Except for the first move in epilogue, we no longer know
14480 constant offset in aliasing info. It don't seems to worth
14481 the pain to maintain it for the first move, so throw away
14482 the info early. */
14483 dst = change_address (dst, BLKmode, destreg);
14484 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14485 desired_align);
14486 }
14487 if (label && size_needed == 1)
14488 {
14489 emit_label (label);
14490 LABEL_NUSES (label) = 1;
14491 label = NULL;
14492 }
14493
14494 /* Step 3: Main loop. */
14495
14496 switch (alg)
14497 {
14498 case libcall:
14499 case no_stringop:
14500 gcc_unreachable ();
14501 case loop_1_byte:
14502 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14503 count_exp, QImode, 1, expected_size);
14504 break;
14505 case loop:
14506 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14507 count_exp, Pmode, 1, expected_size);
14508 break;
14509 case unrolled_loop:
14510 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14511 count_exp, Pmode, 4, expected_size);
14512 break;
14513 case rep_prefix_8_byte:
14514 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14515 DImode);
14516 break;
14517 case rep_prefix_4_byte:
14518 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14519 SImode);
14520 break;
14521 case rep_prefix_1_byte:
14522 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14523 QImode);
14524 break;
14525 }
14526 /* Adjust properly the offset of src and dest memory for aliasing. */
14527 if (CONST_INT_P (count_exp))
14528 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14529 (count / size_needed) * size_needed);
14530 else
14531 dst = change_address (dst, BLKmode, destreg);
14532
14533 /* Step 4: Epilogue to copy the remaining bytes. */
14534
14535 if (label)
14536 {
14537 /* When the main loop is done, COUNT_EXP might hold original count,
14538 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14539 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14540 bytes. Compensate if needed. */
14541
14542 if (size_needed < desired_align - align)
14543 {
14544 tmp =
14545 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14546 GEN_INT (size_needed - 1), count_exp, 1,
14547 OPTAB_DIRECT);
14548 size_needed = desired_align - align + 1;
14549 if (tmp != count_exp)
14550 emit_move_insn (count_exp, tmp);
14551 }
14552 emit_label (label);
14553 LABEL_NUSES (label) = 1;
14554 }
14555 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14556 {
14557 if (force_loopy_epilogue)
14558 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14559 size_needed);
14560 else
14561 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14562 size_needed);
14563 }
14564 if (jump_around_label)
14565 emit_label (jump_around_label);
14566 return 1;
14567 }
14568
14569 /* Expand strlen. */
14570 int
14571 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14572 {
14573 rtx addr, scratch1, scratch2, scratch3, scratch4;
14574
14575 /* The generic case of strlen expander is long. Avoid it's
14576 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14577
14578 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14579 && !TARGET_INLINE_ALL_STRINGOPS
14580 && !optimize_size
14581 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14582 return 0;
14583
14584 addr = force_reg (Pmode, XEXP (src, 0));
14585 scratch1 = gen_reg_rtx (Pmode);
14586
14587 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14588 && !optimize_size)
14589 {
14590 /* Well it seems that some optimizer does not combine a call like
14591 foo(strlen(bar), strlen(bar));
14592 when the move and the subtraction is done here. It does calculate
14593 the length just once when these instructions are done inside of
14594 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14595 often used and I use one fewer register for the lifetime of
14596 output_strlen_unroll() this is better. */
14597
14598 emit_move_insn (out, addr);
14599
14600 ix86_expand_strlensi_unroll_1 (out, src, align);
14601
14602 /* strlensi_unroll_1 returns the address of the zero at the end of
14603 the string, like memchr(), so compute the length by subtracting
14604 the start address. */
14605 if (TARGET_64BIT)
14606 emit_insn (gen_subdi3 (out, out, addr));
14607 else
14608 emit_insn (gen_subsi3 (out, out, addr));
14609 }
14610 else
14611 {
14612 rtx unspec;
14613 scratch2 = gen_reg_rtx (Pmode);
14614 scratch3 = gen_reg_rtx (Pmode);
14615 scratch4 = force_reg (Pmode, constm1_rtx);
14616
14617 emit_move_insn (scratch3, addr);
14618 eoschar = force_reg (QImode, eoschar);
14619
14620 src = replace_equiv_address_nv (src, scratch3);
14621
14622 /* If .md starts supporting :P, this can be done in .md. */
14623 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14624 scratch4), UNSPEC_SCAS);
14625 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14626 if (TARGET_64BIT)
14627 {
14628 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14629 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14630 }
14631 else
14632 {
14633 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14634 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14635 }
14636 }
14637 return 1;
14638 }
14639
14640 /* Expand the appropriate insns for doing strlen if not just doing
14641 repnz; scasb
14642
14643 out = result, initialized with the start address
14644 align_rtx = alignment of the address.
14645 scratch = scratch register, initialized with the startaddress when
14646 not aligned, otherwise undefined
14647
14648 This is just the body. It needs the initializations mentioned above and
14649 some address computing at the end. These things are done in i386.md. */
14650
14651 static void
14652 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14653 {
14654 int align;
14655 rtx tmp;
14656 rtx align_2_label = NULL_RTX;
14657 rtx align_3_label = NULL_RTX;
14658 rtx align_4_label = gen_label_rtx ();
14659 rtx end_0_label = gen_label_rtx ();
14660 rtx mem;
14661 rtx tmpreg = gen_reg_rtx (SImode);
14662 rtx scratch = gen_reg_rtx (SImode);
14663 rtx cmp;
14664
14665 align = 0;
14666 if (CONST_INT_P (align_rtx))
14667 align = INTVAL (align_rtx);
14668
14669 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14670
14671 /* Is there a known alignment and is it less than 4? */
14672 if (align < 4)
14673 {
14674 rtx scratch1 = gen_reg_rtx (Pmode);
14675 emit_move_insn (scratch1, out);
14676 /* Is there a known alignment and is it not 2? */
14677 if (align != 2)
14678 {
14679 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14680 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14681
14682 /* Leave just the 3 lower bits. */
14683 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14684 NULL_RTX, 0, OPTAB_WIDEN);
14685
14686 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14687 Pmode, 1, align_4_label);
14688 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14689 Pmode, 1, align_2_label);
14690 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14691 Pmode, 1, align_3_label);
14692 }
14693 else
14694 {
14695 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14696 check if is aligned to 4 - byte. */
14697
14698 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14699 NULL_RTX, 0, OPTAB_WIDEN);
14700
14701 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14702 Pmode, 1, align_4_label);
14703 }
14704
14705 mem = change_address (src, QImode, out);
14706
14707 /* Now compare the bytes. */
14708
14709 /* Compare the first n unaligned byte on a byte per byte basis. */
14710 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14711 QImode, 1, end_0_label);
14712
14713 /* Increment the address. */
14714 if (TARGET_64BIT)
14715 emit_insn (gen_adddi3 (out, out, const1_rtx));
14716 else
14717 emit_insn (gen_addsi3 (out, out, const1_rtx));
14718
14719 /* Not needed with an alignment of 2 */
14720 if (align != 2)
14721 {
14722 emit_label (align_2_label);
14723
14724 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14725 end_0_label);
14726
14727 if (TARGET_64BIT)
14728 emit_insn (gen_adddi3 (out, out, const1_rtx));
14729 else
14730 emit_insn (gen_addsi3 (out, out, const1_rtx));
14731
14732 emit_label (align_3_label);
14733 }
14734
14735 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14736 end_0_label);
14737
14738 if (TARGET_64BIT)
14739 emit_insn (gen_adddi3 (out, out, const1_rtx));
14740 else
14741 emit_insn (gen_addsi3 (out, out, const1_rtx));
14742 }
14743
14744 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14745 align this loop. It gives only huge programs, but does not help to
14746 speed up. */
14747 emit_label (align_4_label);
14748
14749 mem = change_address (src, SImode, out);
14750 emit_move_insn (scratch, mem);
14751 if (TARGET_64BIT)
14752 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14753 else
14754 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14755
14756 /* This formula yields a nonzero result iff one of the bytes is zero.
14757 This saves three branches inside loop and many cycles. */
14758
14759 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14760 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14761 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14762 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14763 gen_int_mode (0x80808080, SImode)));
14764 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14765 align_4_label);
14766
14767 if (TARGET_CMOVE)
14768 {
14769 rtx reg = gen_reg_rtx (SImode);
14770 rtx reg2 = gen_reg_rtx (Pmode);
14771 emit_move_insn (reg, tmpreg);
14772 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14773
14774 /* If zero is not in the first two bytes, move two bytes forward. */
14775 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14776 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14777 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14778 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14779 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14780 reg,
14781 tmpreg)));
14782 /* Emit lea manually to avoid clobbering of flags. */
14783 emit_insn (gen_rtx_SET (SImode, reg2,
14784 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14785
14786 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14787 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14788 emit_insn (gen_rtx_SET (VOIDmode, out,
14789 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14790 reg2,
14791 out)));
14792
14793 }
14794 else
14795 {
14796 rtx end_2_label = gen_label_rtx ();
14797 /* Is zero in the first two bytes? */
14798
14799 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14800 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14801 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14802 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14803 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14804 pc_rtx);
14805 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14806 JUMP_LABEL (tmp) = end_2_label;
14807
14808 /* Not in the first two. Move two bytes forward. */
14809 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14810 if (TARGET_64BIT)
14811 emit_insn (gen_adddi3 (out, out, const2_rtx));
14812 else
14813 emit_insn (gen_addsi3 (out, out, const2_rtx));
14814
14815 emit_label (end_2_label);
14816
14817 }
14818
14819 /* Avoid branch in fixing the byte. */
14820 tmpreg = gen_lowpart (QImode, tmpreg);
14821 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14822 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14823 if (TARGET_64BIT)
14824 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14825 else
14826 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14827
14828 emit_label (end_0_label);
14829 }
14830
14831 void
14832 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14833 rtx callarg2 ATTRIBUTE_UNUSED,
14834 rtx pop, int sibcall)
14835 {
14836 rtx use = NULL, call;
14837
14838 if (pop == const0_rtx)
14839 pop = NULL;
14840 gcc_assert (!TARGET_64BIT || !pop);
14841
14842 if (TARGET_MACHO && !TARGET_64BIT)
14843 {
14844 #if TARGET_MACHO
14845 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14846 fnaddr = machopic_indirect_call_target (fnaddr);
14847 #endif
14848 }
14849 else
14850 {
14851 /* Static functions and indirect calls don't need the pic register. */
14852 if (! TARGET_64BIT && flag_pic
14853 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14854 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14855 use_reg (&use, pic_offset_table_rtx);
14856 }
14857
14858 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14859 {
14860 rtx al = gen_rtx_REG (QImode, 0);
14861 emit_move_insn (al, callarg2);
14862 use_reg (&use, al);
14863 }
14864
14865 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14866 {
14867 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14868 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14869 }
14870 if (sibcall && TARGET_64BIT
14871 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14872 {
14873 rtx addr;
14874 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14875 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14876 emit_move_insn (fnaddr, addr);
14877 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14878 }
14879
14880 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14881 if (retval)
14882 call = gen_rtx_SET (VOIDmode, retval, call);
14883 if (pop)
14884 {
14885 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14886 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14887 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14888 }
14889
14890 call = emit_call_insn (call);
14891 if (use)
14892 CALL_INSN_FUNCTION_USAGE (call) = use;
14893 }
14894
14895 \f
14896 /* Clear stack slot assignments remembered from previous functions.
14897 This is called from INIT_EXPANDERS once before RTL is emitted for each
14898 function. */
14899
14900 static struct machine_function *
14901 ix86_init_machine_status (void)
14902 {
14903 struct machine_function *f;
14904
14905 f = ggc_alloc_cleared (sizeof (struct machine_function));
14906 f->use_fast_prologue_epilogue_nregs = -1;
14907 f->tls_descriptor_call_expanded_p = 0;
14908
14909 return f;
14910 }
14911
14912 /* Return a MEM corresponding to a stack slot with mode MODE.
14913 Allocate a new slot if necessary.
14914
14915 The RTL for a function can have several slots available: N is
14916 which slot to use. */
14917
14918 rtx
14919 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14920 {
14921 struct stack_local_entry *s;
14922
14923 gcc_assert (n < MAX_386_STACK_LOCALS);
14924
14925 for (s = ix86_stack_locals; s; s = s->next)
14926 if (s->mode == mode && s->n == n)
14927 return copy_rtx (s->rtl);
14928
14929 s = (struct stack_local_entry *)
14930 ggc_alloc (sizeof (struct stack_local_entry));
14931 s->n = n;
14932 s->mode = mode;
14933 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14934
14935 s->next = ix86_stack_locals;
14936 ix86_stack_locals = s;
14937 return s->rtl;
14938 }
14939
14940 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14941
14942 static GTY(()) rtx ix86_tls_symbol;
14943 rtx
14944 ix86_tls_get_addr (void)
14945 {
14946
14947 if (!ix86_tls_symbol)
14948 {
14949 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14950 (TARGET_ANY_GNU_TLS
14951 && !TARGET_64BIT)
14952 ? "___tls_get_addr"
14953 : "__tls_get_addr");
14954 }
14955
14956 return ix86_tls_symbol;
14957 }
14958
14959 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14960
14961 static GTY(()) rtx ix86_tls_module_base_symbol;
14962 rtx
14963 ix86_tls_module_base (void)
14964 {
14965
14966 if (!ix86_tls_module_base_symbol)
14967 {
14968 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14969 "_TLS_MODULE_BASE_");
14970 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14971 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14972 }
14973
14974 return ix86_tls_module_base_symbol;
14975 }
14976 \f
14977 /* Calculate the length of the memory address in the instruction
14978 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14979
14980 int
14981 memory_address_length (rtx addr)
14982 {
14983 struct ix86_address parts;
14984 rtx base, index, disp;
14985 int len;
14986 int ok;
14987
14988 if (GET_CODE (addr) == PRE_DEC
14989 || GET_CODE (addr) == POST_INC
14990 || GET_CODE (addr) == PRE_MODIFY
14991 || GET_CODE (addr) == POST_MODIFY)
14992 return 0;
14993
14994 ok = ix86_decompose_address (addr, &parts);
14995 gcc_assert (ok);
14996
14997 if (parts.base && GET_CODE (parts.base) == SUBREG)
14998 parts.base = SUBREG_REG (parts.base);
14999 if (parts.index && GET_CODE (parts.index) == SUBREG)
15000 parts.index = SUBREG_REG (parts.index);
15001
15002 base = parts.base;
15003 index = parts.index;
15004 disp = parts.disp;
15005 len = 0;
15006
15007 /* Rule of thumb:
15008 - esp as the base always wants an index,
15009 - ebp as the base always wants a displacement. */
15010
15011 /* Register Indirect. */
15012 if (base && !index && !disp)
15013 {
15014 /* esp (for its index) and ebp (for its displacement) need
15015 the two-byte modrm form. */
15016 if (addr == stack_pointer_rtx
15017 || addr == arg_pointer_rtx
15018 || addr == frame_pointer_rtx
15019 || addr == hard_frame_pointer_rtx)
15020 len = 1;
15021 }
15022
15023 /* Direct Addressing. */
15024 else if (disp && !base && !index)
15025 len = 4;
15026
15027 else
15028 {
15029 /* Find the length of the displacement constant. */
15030 if (disp)
15031 {
15032 if (base && satisfies_constraint_K (disp))
15033 len = 1;
15034 else
15035 len = 4;
15036 }
15037 /* ebp always wants a displacement. */
15038 else if (base == hard_frame_pointer_rtx)
15039 len = 1;
15040
15041 /* An index requires the two-byte modrm form.... */
15042 if (index
15043 /* ...like esp, which always wants an index. */
15044 || base == stack_pointer_rtx
15045 || base == arg_pointer_rtx
15046 || base == frame_pointer_rtx)
15047 len += 1;
15048 }
15049
15050 return len;
15051 }
15052
15053 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15054 is set, expect that insn have 8bit immediate alternative. */
15055 int
15056 ix86_attr_length_immediate_default (rtx insn, int shortform)
15057 {
15058 int len = 0;
15059 int i;
15060 extract_insn_cached (insn);
15061 for (i = recog_data.n_operands - 1; i >= 0; --i)
15062 if (CONSTANT_P (recog_data.operand[i]))
15063 {
15064 gcc_assert (!len);
15065 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15066 len = 1;
15067 else
15068 {
15069 switch (get_attr_mode (insn))
15070 {
15071 case MODE_QI:
15072 len+=1;
15073 break;
15074 case MODE_HI:
15075 len+=2;
15076 break;
15077 case MODE_SI:
15078 len+=4;
15079 break;
15080 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15081 case MODE_DI:
15082 len+=4;
15083 break;
15084 default:
15085 fatal_insn ("unknown insn mode", insn);
15086 }
15087 }
15088 }
15089 return len;
15090 }
15091 /* Compute default value for "length_address" attribute. */
15092 int
15093 ix86_attr_length_address_default (rtx insn)
15094 {
15095 int i;
15096
15097 if (get_attr_type (insn) == TYPE_LEA)
15098 {
15099 rtx set = PATTERN (insn);
15100
15101 if (GET_CODE (set) == PARALLEL)
15102 set = XVECEXP (set, 0, 0);
15103
15104 gcc_assert (GET_CODE (set) == SET);
15105
15106 return memory_address_length (SET_SRC (set));
15107 }
15108
15109 extract_insn_cached (insn);
15110 for (i = recog_data.n_operands - 1; i >= 0; --i)
15111 if (MEM_P (recog_data.operand[i]))
15112 {
15113 return memory_address_length (XEXP (recog_data.operand[i], 0));
15114 break;
15115 }
15116 return 0;
15117 }
15118 \f
15119 /* Return the maximum number of instructions a cpu can issue. */
15120
15121 static int
15122 ix86_issue_rate (void)
15123 {
15124 switch (ix86_tune)
15125 {
15126 case PROCESSOR_PENTIUM:
15127 case PROCESSOR_K6:
15128 return 2;
15129
15130 case PROCESSOR_PENTIUMPRO:
15131 case PROCESSOR_PENTIUM4:
15132 case PROCESSOR_ATHLON:
15133 case PROCESSOR_K8:
15134 case PROCESSOR_AMDFAM10:
15135 case PROCESSOR_NOCONA:
15136 case PROCESSOR_GENERIC32:
15137 case PROCESSOR_GENERIC64:
15138 return 3;
15139
15140 case PROCESSOR_CORE2:
15141 return 4;
15142
15143 default:
15144 return 1;
15145 }
15146 }
15147
15148 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15149 by DEP_INSN and nothing set by DEP_INSN. */
15150
15151 static int
15152 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15153 {
15154 rtx set, set2;
15155
15156 /* Simplify the test for uninteresting insns. */
15157 if (insn_type != TYPE_SETCC
15158 && insn_type != TYPE_ICMOV
15159 && insn_type != TYPE_FCMOV
15160 && insn_type != TYPE_IBR)
15161 return 0;
15162
15163 if ((set = single_set (dep_insn)) != 0)
15164 {
15165 set = SET_DEST (set);
15166 set2 = NULL_RTX;
15167 }
15168 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15169 && XVECLEN (PATTERN (dep_insn), 0) == 2
15170 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15171 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15172 {
15173 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15174 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15175 }
15176 else
15177 return 0;
15178
15179 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15180 return 0;
15181
15182 /* This test is true if the dependent insn reads the flags but
15183 not any other potentially set register. */
15184 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15185 return 0;
15186
15187 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15188 return 0;
15189
15190 return 1;
15191 }
15192
15193 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15194 address with operands set by DEP_INSN. */
15195
15196 static int
15197 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15198 {
15199 rtx addr;
15200
15201 if (insn_type == TYPE_LEA
15202 && TARGET_PENTIUM)
15203 {
15204 addr = PATTERN (insn);
15205
15206 if (GET_CODE (addr) == PARALLEL)
15207 addr = XVECEXP (addr, 0, 0);
15208
15209 gcc_assert (GET_CODE (addr) == SET);
15210
15211 addr = SET_SRC (addr);
15212 }
15213 else
15214 {
15215 int i;
15216 extract_insn_cached (insn);
15217 for (i = recog_data.n_operands - 1; i >= 0; --i)
15218 if (MEM_P (recog_data.operand[i]))
15219 {
15220 addr = XEXP (recog_data.operand[i], 0);
15221 goto found;
15222 }
15223 return 0;
15224 found:;
15225 }
15226
15227 return modified_in_p (addr, dep_insn);
15228 }
15229
15230 static int
15231 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15232 {
15233 enum attr_type insn_type, dep_insn_type;
15234 enum attr_memory memory;
15235 rtx set, set2;
15236 int dep_insn_code_number;
15237
15238 /* Anti and output dependencies have zero cost on all CPUs. */
15239 if (REG_NOTE_KIND (link) != 0)
15240 return 0;
15241
15242 dep_insn_code_number = recog_memoized (dep_insn);
15243
15244 /* If we can't recognize the insns, we can't really do anything. */
15245 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15246 return cost;
15247
15248 insn_type = get_attr_type (insn);
15249 dep_insn_type = get_attr_type (dep_insn);
15250
15251 switch (ix86_tune)
15252 {
15253 case PROCESSOR_PENTIUM:
15254 /* Address Generation Interlock adds a cycle of latency. */
15255 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15256 cost += 1;
15257
15258 /* ??? Compares pair with jump/setcc. */
15259 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15260 cost = 0;
15261
15262 /* Floating point stores require value to be ready one cycle earlier. */
15263 if (insn_type == TYPE_FMOV
15264 && get_attr_memory (insn) == MEMORY_STORE
15265 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15266 cost += 1;
15267 break;
15268
15269 case PROCESSOR_PENTIUMPRO:
15270 memory = get_attr_memory (insn);
15271
15272 /* INT->FP conversion is expensive. */
15273 if (get_attr_fp_int_src (dep_insn))
15274 cost += 5;
15275
15276 /* There is one cycle extra latency between an FP op and a store. */
15277 if (insn_type == TYPE_FMOV
15278 && (set = single_set (dep_insn)) != NULL_RTX
15279 && (set2 = single_set (insn)) != NULL_RTX
15280 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15281 && MEM_P (SET_DEST (set2)))
15282 cost += 1;
15283
15284 /* Show ability of reorder buffer to hide latency of load by executing
15285 in parallel with previous instruction in case
15286 previous instruction is not needed to compute the address. */
15287 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15288 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15289 {
15290 /* Claim moves to take one cycle, as core can issue one load
15291 at time and the next load can start cycle later. */
15292 if (dep_insn_type == TYPE_IMOV
15293 || dep_insn_type == TYPE_FMOV)
15294 cost = 1;
15295 else if (cost > 1)
15296 cost--;
15297 }
15298 break;
15299
15300 case PROCESSOR_K6:
15301 memory = get_attr_memory (insn);
15302
15303 /* The esp dependency is resolved before the instruction is really
15304 finished. */
15305 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15306 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15307 return 1;
15308
15309 /* INT->FP conversion is expensive. */
15310 if (get_attr_fp_int_src (dep_insn))
15311 cost += 5;
15312
15313 /* Show ability of reorder buffer to hide latency of load by executing
15314 in parallel with previous instruction in case
15315 previous instruction is not needed to compute the address. */
15316 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15317 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15318 {
15319 /* Claim moves to take one cycle, as core can issue one load
15320 at time and the next load can start cycle later. */
15321 if (dep_insn_type == TYPE_IMOV
15322 || dep_insn_type == TYPE_FMOV)
15323 cost = 1;
15324 else if (cost > 2)
15325 cost -= 2;
15326 else
15327 cost = 1;
15328 }
15329 break;
15330
15331 case PROCESSOR_ATHLON:
15332 case PROCESSOR_K8:
15333 case PROCESSOR_AMDFAM10:
15334 case PROCESSOR_GENERIC32:
15335 case PROCESSOR_GENERIC64:
15336 memory = get_attr_memory (insn);
15337
15338 /* Show ability of reorder buffer to hide latency of load by executing
15339 in parallel with previous instruction in case
15340 previous instruction is not needed to compute the address. */
15341 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15342 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15343 {
15344 enum attr_unit unit = get_attr_unit (insn);
15345 int loadcost = 3;
15346
15347 /* Because of the difference between the length of integer and
15348 floating unit pipeline preparation stages, the memory operands
15349 for floating point are cheaper.
15350
15351 ??? For Athlon it the difference is most probably 2. */
15352 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15353 loadcost = 3;
15354 else
15355 loadcost = TARGET_ATHLON ? 2 : 0;
15356
15357 if (cost >= loadcost)
15358 cost -= loadcost;
15359 else
15360 cost = 0;
15361 }
15362
15363 default:
15364 break;
15365 }
15366
15367 return cost;
15368 }
15369
15370 /* How many alternative schedules to try. This should be as wide as the
15371 scheduling freedom in the DFA, but no wider. Making this value too
15372 large results extra work for the scheduler. */
15373
15374 static int
15375 ia32_multipass_dfa_lookahead (void)
15376 {
15377 if (ix86_tune == PROCESSOR_PENTIUM)
15378 return 2;
15379
15380 if (ix86_tune == PROCESSOR_PENTIUMPRO
15381 || ix86_tune == PROCESSOR_K6)
15382 return 1;
15383
15384 else
15385 return 0;
15386 }
15387
15388 \f
15389 /* Compute the alignment given to a constant that is being placed in memory.
15390 EXP is the constant and ALIGN is the alignment that the object would
15391 ordinarily have.
15392 The value of this function is used instead of that alignment to align
15393 the object. */
15394
15395 int
15396 ix86_constant_alignment (tree exp, int align)
15397 {
15398 if (TREE_CODE (exp) == REAL_CST)
15399 {
15400 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15401 return 64;
15402 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15403 return 128;
15404 }
15405 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15406 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15407 return BITS_PER_WORD;
15408
15409 return align;
15410 }
15411
15412 /* Compute the alignment for a static variable.
15413 TYPE is the data type, and ALIGN is the alignment that
15414 the object would ordinarily have. The value of this function is used
15415 instead of that alignment to align the object. */
15416
15417 int
15418 ix86_data_alignment (tree type, int align)
15419 {
15420 int max_align = optimize_size ? BITS_PER_WORD : 256;
15421
15422 if (AGGREGATE_TYPE_P (type)
15423 && TYPE_SIZE (type)
15424 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15425 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15426 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15427 && align < max_align)
15428 align = max_align;
15429
15430 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15431 to 16byte boundary. */
15432 if (TARGET_64BIT)
15433 {
15434 if (AGGREGATE_TYPE_P (type)
15435 && TYPE_SIZE (type)
15436 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15437 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15438 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15439 return 128;
15440 }
15441
15442 if (TREE_CODE (type) == ARRAY_TYPE)
15443 {
15444 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15445 return 64;
15446 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15447 return 128;
15448 }
15449 else if (TREE_CODE (type) == COMPLEX_TYPE)
15450 {
15451
15452 if (TYPE_MODE (type) == DCmode && align < 64)
15453 return 64;
15454 if (TYPE_MODE (type) == XCmode && align < 128)
15455 return 128;
15456 }
15457 else if ((TREE_CODE (type) == RECORD_TYPE
15458 || TREE_CODE (type) == UNION_TYPE
15459 || TREE_CODE (type) == QUAL_UNION_TYPE)
15460 && TYPE_FIELDS (type))
15461 {
15462 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15463 return 64;
15464 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15465 return 128;
15466 }
15467 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15468 || TREE_CODE (type) == INTEGER_TYPE)
15469 {
15470 if (TYPE_MODE (type) == DFmode && align < 64)
15471 return 64;
15472 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15473 return 128;
15474 }
15475
15476 return align;
15477 }
15478
15479 /* Compute the alignment for a local variable.
15480 TYPE is the data type, and ALIGN is the alignment that
15481 the object would ordinarily have. The value of this macro is used
15482 instead of that alignment to align the object. */
15483
15484 int
15485 ix86_local_alignment (tree type, int align)
15486 {
15487 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15488 to 16byte boundary. */
15489 if (TARGET_64BIT)
15490 {
15491 if (AGGREGATE_TYPE_P (type)
15492 && TYPE_SIZE (type)
15493 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15494 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15495 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15496 return 128;
15497 }
15498 if (TREE_CODE (type) == ARRAY_TYPE)
15499 {
15500 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15501 return 64;
15502 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15503 return 128;
15504 }
15505 else if (TREE_CODE (type) == COMPLEX_TYPE)
15506 {
15507 if (TYPE_MODE (type) == DCmode && align < 64)
15508 return 64;
15509 if (TYPE_MODE (type) == XCmode && align < 128)
15510 return 128;
15511 }
15512 else if ((TREE_CODE (type) == RECORD_TYPE
15513 || TREE_CODE (type) == UNION_TYPE
15514 || TREE_CODE (type) == QUAL_UNION_TYPE)
15515 && TYPE_FIELDS (type))
15516 {
15517 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15518 return 64;
15519 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15520 return 128;
15521 }
15522 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15523 || TREE_CODE (type) == INTEGER_TYPE)
15524 {
15525
15526 if (TYPE_MODE (type) == DFmode && align < 64)
15527 return 64;
15528 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15529 return 128;
15530 }
15531 return align;
15532 }
15533 \f
15534 /* Emit RTL insns to initialize the variable parts of a trampoline.
15535 FNADDR is an RTX for the address of the function's pure code.
15536 CXT is an RTX for the static chain value for the function. */
15537 void
15538 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15539 {
15540 if (!TARGET_64BIT)
15541 {
15542 /* Compute offset from the end of the jmp to the target function. */
15543 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15544 plus_constant (tramp, 10),
15545 NULL_RTX, 1, OPTAB_DIRECT);
15546 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15547 gen_int_mode (0xb9, QImode));
15548 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15549 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15550 gen_int_mode (0xe9, QImode));
15551 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15552 }
15553 else
15554 {
15555 int offset = 0;
15556 /* Try to load address using shorter movl instead of movabs.
15557 We may want to support movq for kernel mode, but kernel does not use
15558 trampolines at the moment. */
15559 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15560 {
15561 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15562 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15563 gen_int_mode (0xbb41, HImode));
15564 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15565 gen_lowpart (SImode, fnaddr));
15566 offset += 6;
15567 }
15568 else
15569 {
15570 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15571 gen_int_mode (0xbb49, HImode));
15572 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15573 fnaddr);
15574 offset += 10;
15575 }
15576 /* Load static chain using movabs to r10. */
15577 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15578 gen_int_mode (0xba49, HImode));
15579 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15580 cxt);
15581 offset += 10;
15582 /* Jump to the r11 */
15583 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15584 gen_int_mode (0xff49, HImode));
15585 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15586 gen_int_mode (0xe3, QImode));
15587 offset += 3;
15588 gcc_assert (offset <= TRAMPOLINE_SIZE);
15589 }
15590
15591 #ifdef ENABLE_EXECUTE_STACK
15592 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15593 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15594 #endif
15595 }
15596 \f
15597 /* Codes for all the SSE/MMX builtins. */
15598 enum ix86_builtins
15599 {
15600 IX86_BUILTIN_ADDPS,
15601 IX86_BUILTIN_ADDSS,
15602 IX86_BUILTIN_DIVPS,
15603 IX86_BUILTIN_DIVSS,
15604 IX86_BUILTIN_MULPS,
15605 IX86_BUILTIN_MULSS,
15606 IX86_BUILTIN_SUBPS,
15607 IX86_BUILTIN_SUBSS,
15608
15609 IX86_BUILTIN_CMPEQPS,
15610 IX86_BUILTIN_CMPLTPS,
15611 IX86_BUILTIN_CMPLEPS,
15612 IX86_BUILTIN_CMPGTPS,
15613 IX86_BUILTIN_CMPGEPS,
15614 IX86_BUILTIN_CMPNEQPS,
15615 IX86_BUILTIN_CMPNLTPS,
15616 IX86_BUILTIN_CMPNLEPS,
15617 IX86_BUILTIN_CMPNGTPS,
15618 IX86_BUILTIN_CMPNGEPS,
15619 IX86_BUILTIN_CMPORDPS,
15620 IX86_BUILTIN_CMPUNORDPS,
15621 IX86_BUILTIN_CMPEQSS,
15622 IX86_BUILTIN_CMPLTSS,
15623 IX86_BUILTIN_CMPLESS,
15624 IX86_BUILTIN_CMPNEQSS,
15625 IX86_BUILTIN_CMPNLTSS,
15626 IX86_BUILTIN_CMPNLESS,
15627 IX86_BUILTIN_CMPNGTSS,
15628 IX86_BUILTIN_CMPNGESS,
15629 IX86_BUILTIN_CMPORDSS,
15630 IX86_BUILTIN_CMPUNORDSS,
15631
15632 IX86_BUILTIN_COMIEQSS,
15633 IX86_BUILTIN_COMILTSS,
15634 IX86_BUILTIN_COMILESS,
15635 IX86_BUILTIN_COMIGTSS,
15636 IX86_BUILTIN_COMIGESS,
15637 IX86_BUILTIN_COMINEQSS,
15638 IX86_BUILTIN_UCOMIEQSS,
15639 IX86_BUILTIN_UCOMILTSS,
15640 IX86_BUILTIN_UCOMILESS,
15641 IX86_BUILTIN_UCOMIGTSS,
15642 IX86_BUILTIN_UCOMIGESS,
15643 IX86_BUILTIN_UCOMINEQSS,
15644
15645 IX86_BUILTIN_CVTPI2PS,
15646 IX86_BUILTIN_CVTPS2PI,
15647 IX86_BUILTIN_CVTSI2SS,
15648 IX86_BUILTIN_CVTSI642SS,
15649 IX86_BUILTIN_CVTSS2SI,
15650 IX86_BUILTIN_CVTSS2SI64,
15651 IX86_BUILTIN_CVTTPS2PI,
15652 IX86_BUILTIN_CVTTSS2SI,
15653 IX86_BUILTIN_CVTTSS2SI64,
15654
15655 IX86_BUILTIN_MAXPS,
15656 IX86_BUILTIN_MAXSS,
15657 IX86_BUILTIN_MINPS,
15658 IX86_BUILTIN_MINSS,
15659
15660 IX86_BUILTIN_LOADUPS,
15661 IX86_BUILTIN_STOREUPS,
15662 IX86_BUILTIN_MOVSS,
15663
15664 IX86_BUILTIN_MOVHLPS,
15665 IX86_BUILTIN_MOVLHPS,
15666 IX86_BUILTIN_LOADHPS,
15667 IX86_BUILTIN_LOADLPS,
15668 IX86_BUILTIN_STOREHPS,
15669 IX86_BUILTIN_STORELPS,
15670
15671 IX86_BUILTIN_MASKMOVQ,
15672 IX86_BUILTIN_MOVMSKPS,
15673 IX86_BUILTIN_PMOVMSKB,
15674
15675 IX86_BUILTIN_MOVNTPS,
15676 IX86_BUILTIN_MOVNTQ,
15677
15678 IX86_BUILTIN_LOADDQU,
15679 IX86_BUILTIN_STOREDQU,
15680
15681 IX86_BUILTIN_PACKSSWB,
15682 IX86_BUILTIN_PACKSSDW,
15683 IX86_BUILTIN_PACKUSWB,
15684
15685 IX86_BUILTIN_PADDB,
15686 IX86_BUILTIN_PADDW,
15687 IX86_BUILTIN_PADDD,
15688 IX86_BUILTIN_PADDQ,
15689 IX86_BUILTIN_PADDSB,
15690 IX86_BUILTIN_PADDSW,
15691 IX86_BUILTIN_PADDUSB,
15692 IX86_BUILTIN_PADDUSW,
15693 IX86_BUILTIN_PSUBB,
15694 IX86_BUILTIN_PSUBW,
15695 IX86_BUILTIN_PSUBD,
15696 IX86_BUILTIN_PSUBQ,
15697 IX86_BUILTIN_PSUBSB,
15698 IX86_BUILTIN_PSUBSW,
15699 IX86_BUILTIN_PSUBUSB,
15700 IX86_BUILTIN_PSUBUSW,
15701
15702 IX86_BUILTIN_PAND,
15703 IX86_BUILTIN_PANDN,
15704 IX86_BUILTIN_POR,
15705 IX86_BUILTIN_PXOR,
15706
15707 IX86_BUILTIN_PAVGB,
15708 IX86_BUILTIN_PAVGW,
15709
15710 IX86_BUILTIN_PCMPEQB,
15711 IX86_BUILTIN_PCMPEQW,
15712 IX86_BUILTIN_PCMPEQD,
15713 IX86_BUILTIN_PCMPGTB,
15714 IX86_BUILTIN_PCMPGTW,
15715 IX86_BUILTIN_PCMPGTD,
15716
15717 IX86_BUILTIN_PMADDWD,
15718
15719 IX86_BUILTIN_PMAXSW,
15720 IX86_BUILTIN_PMAXUB,
15721 IX86_BUILTIN_PMINSW,
15722 IX86_BUILTIN_PMINUB,
15723
15724 IX86_BUILTIN_PMULHUW,
15725 IX86_BUILTIN_PMULHW,
15726 IX86_BUILTIN_PMULLW,
15727
15728 IX86_BUILTIN_PSADBW,
15729 IX86_BUILTIN_PSHUFW,
15730
15731 IX86_BUILTIN_PSLLW,
15732 IX86_BUILTIN_PSLLD,
15733 IX86_BUILTIN_PSLLQ,
15734 IX86_BUILTIN_PSRAW,
15735 IX86_BUILTIN_PSRAD,
15736 IX86_BUILTIN_PSRLW,
15737 IX86_BUILTIN_PSRLD,
15738 IX86_BUILTIN_PSRLQ,
15739 IX86_BUILTIN_PSLLWI,
15740 IX86_BUILTIN_PSLLDI,
15741 IX86_BUILTIN_PSLLQI,
15742 IX86_BUILTIN_PSRAWI,
15743 IX86_BUILTIN_PSRADI,
15744 IX86_BUILTIN_PSRLWI,
15745 IX86_BUILTIN_PSRLDI,
15746 IX86_BUILTIN_PSRLQI,
15747
15748 IX86_BUILTIN_PUNPCKHBW,
15749 IX86_BUILTIN_PUNPCKHWD,
15750 IX86_BUILTIN_PUNPCKHDQ,
15751 IX86_BUILTIN_PUNPCKLBW,
15752 IX86_BUILTIN_PUNPCKLWD,
15753 IX86_BUILTIN_PUNPCKLDQ,
15754
15755 IX86_BUILTIN_SHUFPS,
15756
15757 IX86_BUILTIN_RCPPS,
15758 IX86_BUILTIN_RCPSS,
15759 IX86_BUILTIN_RSQRTPS,
15760 IX86_BUILTIN_RSQRTSS,
15761 IX86_BUILTIN_SQRTPS,
15762 IX86_BUILTIN_SQRTSS,
15763
15764 IX86_BUILTIN_UNPCKHPS,
15765 IX86_BUILTIN_UNPCKLPS,
15766
15767 IX86_BUILTIN_ANDPS,
15768 IX86_BUILTIN_ANDNPS,
15769 IX86_BUILTIN_ORPS,
15770 IX86_BUILTIN_XORPS,
15771
15772 IX86_BUILTIN_EMMS,
15773 IX86_BUILTIN_LDMXCSR,
15774 IX86_BUILTIN_STMXCSR,
15775 IX86_BUILTIN_SFENCE,
15776
15777 /* 3DNow! Original */
15778 IX86_BUILTIN_FEMMS,
15779 IX86_BUILTIN_PAVGUSB,
15780 IX86_BUILTIN_PF2ID,
15781 IX86_BUILTIN_PFACC,
15782 IX86_BUILTIN_PFADD,
15783 IX86_BUILTIN_PFCMPEQ,
15784 IX86_BUILTIN_PFCMPGE,
15785 IX86_BUILTIN_PFCMPGT,
15786 IX86_BUILTIN_PFMAX,
15787 IX86_BUILTIN_PFMIN,
15788 IX86_BUILTIN_PFMUL,
15789 IX86_BUILTIN_PFRCP,
15790 IX86_BUILTIN_PFRCPIT1,
15791 IX86_BUILTIN_PFRCPIT2,
15792 IX86_BUILTIN_PFRSQIT1,
15793 IX86_BUILTIN_PFRSQRT,
15794 IX86_BUILTIN_PFSUB,
15795 IX86_BUILTIN_PFSUBR,
15796 IX86_BUILTIN_PI2FD,
15797 IX86_BUILTIN_PMULHRW,
15798
15799 /* 3DNow! Athlon Extensions */
15800 IX86_BUILTIN_PF2IW,
15801 IX86_BUILTIN_PFNACC,
15802 IX86_BUILTIN_PFPNACC,
15803 IX86_BUILTIN_PI2FW,
15804 IX86_BUILTIN_PSWAPDSI,
15805 IX86_BUILTIN_PSWAPDSF,
15806
15807 /* SSE2 */
15808 IX86_BUILTIN_ADDPD,
15809 IX86_BUILTIN_ADDSD,
15810 IX86_BUILTIN_DIVPD,
15811 IX86_BUILTIN_DIVSD,
15812 IX86_BUILTIN_MULPD,
15813 IX86_BUILTIN_MULSD,
15814 IX86_BUILTIN_SUBPD,
15815 IX86_BUILTIN_SUBSD,
15816
15817 IX86_BUILTIN_CMPEQPD,
15818 IX86_BUILTIN_CMPLTPD,
15819 IX86_BUILTIN_CMPLEPD,
15820 IX86_BUILTIN_CMPGTPD,
15821 IX86_BUILTIN_CMPGEPD,
15822 IX86_BUILTIN_CMPNEQPD,
15823 IX86_BUILTIN_CMPNLTPD,
15824 IX86_BUILTIN_CMPNLEPD,
15825 IX86_BUILTIN_CMPNGTPD,
15826 IX86_BUILTIN_CMPNGEPD,
15827 IX86_BUILTIN_CMPORDPD,
15828 IX86_BUILTIN_CMPUNORDPD,
15829 IX86_BUILTIN_CMPNEPD,
15830 IX86_BUILTIN_CMPEQSD,
15831 IX86_BUILTIN_CMPLTSD,
15832 IX86_BUILTIN_CMPLESD,
15833 IX86_BUILTIN_CMPNEQSD,
15834 IX86_BUILTIN_CMPNLTSD,
15835 IX86_BUILTIN_CMPNLESD,
15836 IX86_BUILTIN_CMPORDSD,
15837 IX86_BUILTIN_CMPUNORDSD,
15838 IX86_BUILTIN_CMPNESD,
15839
15840 IX86_BUILTIN_COMIEQSD,
15841 IX86_BUILTIN_COMILTSD,
15842 IX86_BUILTIN_COMILESD,
15843 IX86_BUILTIN_COMIGTSD,
15844 IX86_BUILTIN_COMIGESD,
15845 IX86_BUILTIN_COMINEQSD,
15846 IX86_BUILTIN_UCOMIEQSD,
15847 IX86_BUILTIN_UCOMILTSD,
15848 IX86_BUILTIN_UCOMILESD,
15849 IX86_BUILTIN_UCOMIGTSD,
15850 IX86_BUILTIN_UCOMIGESD,
15851 IX86_BUILTIN_UCOMINEQSD,
15852
15853 IX86_BUILTIN_MAXPD,
15854 IX86_BUILTIN_MAXSD,
15855 IX86_BUILTIN_MINPD,
15856 IX86_BUILTIN_MINSD,
15857
15858 IX86_BUILTIN_ANDPD,
15859 IX86_BUILTIN_ANDNPD,
15860 IX86_BUILTIN_ORPD,
15861 IX86_BUILTIN_XORPD,
15862
15863 IX86_BUILTIN_SQRTPD,
15864 IX86_BUILTIN_SQRTSD,
15865
15866 IX86_BUILTIN_UNPCKHPD,
15867 IX86_BUILTIN_UNPCKLPD,
15868
15869 IX86_BUILTIN_SHUFPD,
15870
15871 IX86_BUILTIN_LOADUPD,
15872 IX86_BUILTIN_STOREUPD,
15873 IX86_BUILTIN_MOVSD,
15874
15875 IX86_BUILTIN_LOADHPD,
15876 IX86_BUILTIN_LOADLPD,
15877
15878 IX86_BUILTIN_CVTDQ2PD,
15879 IX86_BUILTIN_CVTDQ2PS,
15880
15881 IX86_BUILTIN_CVTPD2DQ,
15882 IX86_BUILTIN_CVTPD2PI,
15883 IX86_BUILTIN_CVTPD2PS,
15884 IX86_BUILTIN_CVTTPD2DQ,
15885 IX86_BUILTIN_CVTTPD2PI,
15886
15887 IX86_BUILTIN_CVTPI2PD,
15888 IX86_BUILTIN_CVTSI2SD,
15889 IX86_BUILTIN_CVTSI642SD,
15890
15891 IX86_BUILTIN_CVTSD2SI,
15892 IX86_BUILTIN_CVTSD2SI64,
15893 IX86_BUILTIN_CVTSD2SS,
15894 IX86_BUILTIN_CVTSS2SD,
15895 IX86_BUILTIN_CVTTSD2SI,
15896 IX86_BUILTIN_CVTTSD2SI64,
15897
15898 IX86_BUILTIN_CVTPS2DQ,
15899 IX86_BUILTIN_CVTPS2PD,
15900 IX86_BUILTIN_CVTTPS2DQ,
15901
15902 IX86_BUILTIN_MOVNTI,
15903 IX86_BUILTIN_MOVNTPD,
15904 IX86_BUILTIN_MOVNTDQ,
15905
15906 /* SSE2 MMX */
15907 IX86_BUILTIN_MASKMOVDQU,
15908 IX86_BUILTIN_MOVMSKPD,
15909 IX86_BUILTIN_PMOVMSKB128,
15910
15911 IX86_BUILTIN_PACKSSWB128,
15912 IX86_BUILTIN_PACKSSDW128,
15913 IX86_BUILTIN_PACKUSWB128,
15914
15915 IX86_BUILTIN_PADDB128,
15916 IX86_BUILTIN_PADDW128,
15917 IX86_BUILTIN_PADDD128,
15918 IX86_BUILTIN_PADDQ128,
15919 IX86_BUILTIN_PADDSB128,
15920 IX86_BUILTIN_PADDSW128,
15921 IX86_BUILTIN_PADDUSB128,
15922 IX86_BUILTIN_PADDUSW128,
15923 IX86_BUILTIN_PSUBB128,
15924 IX86_BUILTIN_PSUBW128,
15925 IX86_BUILTIN_PSUBD128,
15926 IX86_BUILTIN_PSUBQ128,
15927 IX86_BUILTIN_PSUBSB128,
15928 IX86_BUILTIN_PSUBSW128,
15929 IX86_BUILTIN_PSUBUSB128,
15930 IX86_BUILTIN_PSUBUSW128,
15931
15932 IX86_BUILTIN_PAND128,
15933 IX86_BUILTIN_PANDN128,
15934 IX86_BUILTIN_POR128,
15935 IX86_BUILTIN_PXOR128,
15936
15937 IX86_BUILTIN_PAVGB128,
15938 IX86_BUILTIN_PAVGW128,
15939
15940 IX86_BUILTIN_PCMPEQB128,
15941 IX86_BUILTIN_PCMPEQW128,
15942 IX86_BUILTIN_PCMPEQD128,
15943 IX86_BUILTIN_PCMPGTB128,
15944 IX86_BUILTIN_PCMPGTW128,
15945 IX86_BUILTIN_PCMPGTD128,
15946
15947 IX86_BUILTIN_PMADDWD128,
15948
15949 IX86_BUILTIN_PMAXSW128,
15950 IX86_BUILTIN_PMAXUB128,
15951 IX86_BUILTIN_PMINSW128,
15952 IX86_BUILTIN_PMINUB128,
15953
15954 IX86_BUILTIN_PMULUDQ,
15955 IX86_BUILTIN_PMULUDQ128,
15956 IX86_BUILTIN_PMULHUW128,
15957 IX86_BUILTIN_PMULHW128,
15958 IX86_BUILTIN_PMULLW128,
15959
15960 IX86_BUILTIN_PSADBW128,
15961 IX86_BUILTIN_PSHUFHW,
15962 IX86_BUILTIN_PSHUFLW,
15963 IX86_BUILTIN_PSHUFD,
15964
15965 IX86_BUILTIN_PSLLW128,
15966 IX86_BUILTIN_PSLLD128,
15967 IX86_BUILTIN_PSLLQ128,
15968 IX86_BUILTIN_PSRAW128,
15969 IX86_BUILTIN_PSRAD128,
15970 IX86_BUILTIN_PSRLW128,
15971 IX86_BUILTIN_PSRLD128,
15972 IX86_BUILTIN_PSRLQ128,
15973 IX86_BUILTIN_PSLLDQI128,
15974 IX86_BUILTIN_PSLLWI128,
15975 IX86_BUILTIN_PSLLDI128,
15976 IX86_BUILTIN_PSLLQI128,
15977 IX86_BUILTIN_PSRAWI128,
15978 IX86_BUILTIN_PSRADI128,
15979 IX86_BUILTIN_PSRLDQI128,
15980 IX86_BUILTIN_PSRLWI128,
15981 IX86_BUILTIN_PSRLDI128,
15982 IX86_BUILTIN_PSRLQI128,
15983
15984 IX86_BUILTIN_PUNPCKHBW128,
15985 IX86_BUILTIN_PUNPCKHWD128,
15986 IX86_BUILTIN_PUNPCKHDQ128,
15987 IX86_BUILTIN_PUNPCKHQDQ128,
15988 IX86_BUILTIN_PUNPCKLBW128,
15989 IX86_BUILTIN_PUNPCKLWD128,
15990 IX86_BUILTIN_PUNPCKLDQ128,
15991 IX86_BUILTIN_PUNPCKLQDQ128,
15992
15993 IX86_BUILTIN_CLFLUSH,
15994 IX86_BUILTIN_MFENCE,
15995 IX86_BUILTIN_LFENCE,
15996
15997 /* Prescott New Instructions. */
15998 IX86_BUILTIN_ADDSUBPS,
15999 IX86_BUILTIN_HADDPS,
16000 IX86_BUILTIN_HSUBPS,
16001 IX86_BUILTIN_MOVSHDUP,
16002 IX86_BUILTIN_MOVSLDUP,
16003 IX86_BUILTIN_ADDSUBPD,
16004 IX86_BUILTIN_HADDPD,
16005 IX86_BUILTIN_HSUBPD,
16006 IX86_BUILTIN_LDDQU,
16007
16008 IX86_BUILTIN_MONITOR,
16009 IX86_BUILTIN_MWAIT,
16010
16011 /* SSSE3. */
16012 IX86_BUILTIN_PHADDW,
16013 IX86_BUILTIN_PHADDD,
16014 IX86_BUILTIN_PHADDSW,
16015 IX86_BUILTIN_PHSUBW,
16016 IX86_BUILTIN_PHSUBD,
16017 IX86_BUILTIN_PHSUBSW,
16018 IX86_BUILTIN_PMADDUBSW,
16019 IX86_BUILTIN_PMULHRSW,
16020 IX86_BUILTIN_PSHUFB,
16021 IX86_BUILTIN_PSIGNB,
16022 IX86_BUILTIN_PSIGNW,
16023 IX86_BUILTIN_PSIGND,
16024 IX86_BUILTIN_PALIGNR,
16025 IX86_BUILTIN_PABSB,
16026 IX86_BUILTIN_PABSW,
16027 IX86_BUILTIN_PABSD,
16028
16029 IX86_BUILTIN_PHADDW128,
16030 IX86_BUILTIN_PHADDD128,
16031 IX86_BUILTIN_PHADDSW128,
16032 IX86_BUILTIN_PHSUBW128,
16033 IX86_BUILTIN_PHSUBD128,
16034 IX86_BUILTIN_PHSUBSW128,
16035 IX86_BUILTIN_PMADDUBSW128,
16036 IX86_BUILTIN_PMULHRSW128,
16037 IX86_BUILTIN_PSHUFB128,
16038 IX86_BUILTIN_PSIGNB128,
16039 IX86_BUILTIN_PSIGNW128,
16040 IX86_BUILTIN_PSIGND128,
16041 IX86_BUILTIN_PALIGNR128,
16042 IX86_BUILTIN_PABSB128,
16043 IX86_BUILTIN_PABSW128,
16044 IX86_BUILTIN_PABSD128,
16045
16046 /* AMDFAM10 - SSE4A New Instructions. */
16047 IX86_BUILTIN_MOVNTSD,
16048 IX86_BUILTIN_MOVNTSS,
16049 IX86_BUILTIN_EXTRQI,
16050 IX86_BUILTIN_EXTRQ,
16051 IX86_BUILTIN_INSERTQI,
16052 IX86_BUILTIN_INSERTQ,
16053
16054 IX86_BUILTIN_VEC_INIT_V2SI,
16055 IX86_BUILTIN_VEC_INIT_V4HI,
16056 IX86_BUILTIN_VEC_INIT_V8QI,
16057 IX86_BUILTIN_VEC_EXT_V2DF,
16058 IX86_BUILTIN_VEC_EXT_V2DI,
16059 IX86_BUILTIN_VEC_EXT_V4SF,
16060 IX86_BUILTIN_VEC_EXT_V4SI,
16061 IX86_BUILTIN_VEC_EXT_V8HI,
16062 IX86_BUILTIN_VEC_EXT_V2SI,
16063 IX86_BUILTIN_VEC_EXT_V4HI,
16064 IX86_BUILTIN_VEC_SET_V8HI,
16065 IX86_BUILTIN_VEC_SET_V4HI,
16066
16067 IX86_BUILTIN_MAX
16068 };
16069
16070 /* Table for the ix86 builtin decls. */
16071 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16072
16073 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16074 * if the target_flags include one of MASK. Stores the function decl
16075 * in the ix86_builtins array.
16076 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16077
16078 static inline tree
16079 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16080 {
16081 tree decl = NULL_TREE;
16082
16083 if (mask & target_flags
16084 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16085 {
16086 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16087 NULL, NULL_TREE);
16088 ix86_builtins[(int) code] = decl;
16089 }
16090
16091 return decl;
16092 }
16093
16094 /* Like def_builtin, but also marks the function decl "const". */
16095
16096 static inline tree
16097 def_builtin_const (int mask, const char *name, tree type,
16098 enum ix86_builtins code)
16099 {
16100 tree decl = def_builtin (mask, name, type, code);
16101 if (decl)
16102 TREE_READONLY (decl) = 1;
16103 return decl;
16104 }
16105
16106 /* Bits for builtin_description.flag. */
16107
16108 /* Set when we don't support the comparison natively, and should
16109 swap_comparison in order to support it. */
16110 #define BUILTIN_DESC_SWAP_OPERANDS 1
16111
16112 struct builtin_description
16113 {
16114 const unsigned int mask;
16115 const enum insn_code icode;
16116 const char *const name;
16117 const enum ix86_builtins code;
16118 const enum rtx_code comparison;
16119 const unsigned int flag;
16120 };
16121
16122 static const struct builtin_description bdesc_comi[] =
16123 {
16124 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16125 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16126 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16127 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16128 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16129 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16130 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16131 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16132 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16133 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16134 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16135 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16136 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16137 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16138 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16139 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16140 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16141 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16142 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16143 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16144 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16145 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16146 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16147 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16148 };
16149
16150 static const struct builtin_description bdesc_2arg[] =
16151 {
16152 /* SSE */
16153 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16154 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16155 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16156 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16157 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16158 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16159 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16160 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16161
16162 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16163 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16164 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16165 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16166 BUILTIN_DESC_SWAP_OPERANDS },
16167 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16168 BUILTIN_DESC_SWAP_OPERANDS },
16169 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16170 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16171 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16172 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16173 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16174 BUILTIN_DESC_SWAP_OPERANDS },
16175 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16176 BUILTIN_DESC_SWAP_OPERANDS },
16177 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16178 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16179 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16180 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16181 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16182 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16183 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16184 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16185 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16186 BUILTIN_DESC_SWAP_OPERANDS },
16187 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16188 BUILTIN_DESC_SWAP_OPERANDS },
16189 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16190
16191 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16192 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16193 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16194 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16195
16196 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16197 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16198 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16199 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16200
16201 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16202 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16203 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16204 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16205 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16206
16207 /* MMX */
16208 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16209 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16210 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16211 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16212 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16213 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16214 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16215 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16216
16217 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16218 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16219 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16220 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16221 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16222 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16223 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16224 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16225
16226 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16227 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16228 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16229
16230 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16231 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16232 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16233 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16234
16235 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16236 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16237
16238 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16239 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16240 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16241 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16242 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16243 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16244
16245 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16246 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16247 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16248 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16249
16250 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16251 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16252 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16253 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16254 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16255 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16256
16257 /* Special. */
16258 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16259 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16260 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16261
16262 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16263 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16264 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16265
16266 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16267 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16268 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16269 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16270 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16271 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16272
16273 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16274 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16275 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16276 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16277 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16278 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16279
16280 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16281 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16282 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16283 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16284
16285 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16286 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16287
16288 /* SSE2 */
16289 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16290 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16291 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16292 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16293 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16294 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16295 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16296 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16297
16298 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16299 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16300 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16301 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16302 BUILTIN_DESC_SWAP_OPERANDS },
16303 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16304 BUILTIN_DESC_SWAP_OPERANDS },
16305 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16306 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16307 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16308 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16309 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16310 BUILTIN_DESC_SWAP_OPERANDS },
16311 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16312 BUILTIN_DESC_SWAP_OPERANDS },
16313 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16314 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16315 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16316 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16317 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16318 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16319 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16320 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16321 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16322
16323 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16324 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16325 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16326 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16327
16328 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16329 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16330 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16331 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16332
16333 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16334 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16335 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16336
16337 /* SSE2 MMX */
16338 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16339 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16340 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16341 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16342 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16343 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16344 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16345 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16346
16347 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16348 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16349 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16350 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16351 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16352 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16353 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16354 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16355
16356 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16357 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16358
16359 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16360 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16361 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16362 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16363
16364 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16365 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16366
16367 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16368 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16369 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16370 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16371 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16372 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16373
16374 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16375 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16376 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16377 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16378
16379 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16380 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16381 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16382 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16383 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16384 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16385 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16386 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16387
16388 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16389 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16390 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16391
16392 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16393 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16394
16395 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16396 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16397
16398 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16399 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16400 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16401
16402 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16403 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16404 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16405
16406 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16407 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16408
16409 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16410
16411 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16412 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16413 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16414 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16415
16416 /* SSE3 MMX */
16417 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16418 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16419 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16420 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16421 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16422 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16423
16424 /* SSSE3 */
16425 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16426 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16427 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16428 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16429 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16430 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16431 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16432 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16433 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16434 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16435 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16436 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16437 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16438 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16439 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16440 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16441 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16442 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16443 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16444 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16445 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16446 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16447 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16448 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16449 };
16450
16451 static const struct builtin_description bdesc_1arg[] =
16452 {
16453 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16454 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16455
16456 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16457 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16458 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16459
16460 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16461 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16462 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16463 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16464 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16465 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16466
16467 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16468 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16469
16470 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16471
16472 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16473 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16474
16475 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16476 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16477 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16478 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16479 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16480
16481 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16482
16483 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16484 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16485 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16486 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16487
16488 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16489 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16490 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16491
16492 /* SSE3 */
16493 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
16494 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
16495
16496 /* SSSE3 */
16497 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16498 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16499 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16500 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16501 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16502 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16503 };
16504
16505 static void
16506 ix86_init_builtins (void)
16507 {
16508 if (TARGET_MMX)
16509 ix86_init_mmx_sse_builtins ();
16510 }
16511
16512 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16513 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16514 builtins. */
16515 static void
16516 ix86_init_mmx_sse_builtins (void)
16517 {
16518 const struct builtin_description * d;
16519 size_t i;
16520
16521 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16522 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16523 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16524 tree V2DI_type_node
16525 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16526 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16527 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16528 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16529 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16530 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16531 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16532
16533 tree pchar_type_node = build_pointer_type (char_type_node);
16534 tree pcchar_type_node = build_pointer_type (
16535 build_type_variant (char_type_node, 1, 0));
16536 tree pfloat_type_node = build_pointer_type (float_type_node);
16537 tree pcfloat_type_node = build_pointer_type (
16538 build_type_variant (float_type_node, 1, 0));
16539 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16540 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16541 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16542
16543 /* Comparisons. */
16544 tree int_ftype_v4sf_v4sf
16545 = build_function_type_list (integer_type_node,
16546 V4SF_type_node, V4SF_type_node, NULL_TREE);
16547 tree v4si_ftype_v4sf_v4sf
16548 = build_function_type_list (V4SI_type_node,
16549 V4SF_type_node, V4SF_type_node, NULL_TREE);
16550 /* MMX/SSE/integer conversions. */
16551 tree int_ftype_v4sf
16552 = build_function_type_list (integer_type_node,
16553 V4SF_type_node, NULL_TREE);
16554 tree int64_ftype_v4sf
16555 = build_function_type_list (long_long_integer_type_node,
16556 V4SF_type_node, NULL_TREE);
16557 tree int_ftype_v8qi
16558 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16559 tree v4sf_ftype_v4sf_int
16560 = build_function_type_list (V4SF_type_node,
16561 V4SF_type_node, integer_type_node, NULL_TREE);
16562 tree v4sf_ftype_v4sf_int64
16563 = build_function_type_list (V4SF_type_node,
16564 V4SF_type_node, long_long_integer_type_node,
16565 NULL_TREE);
16566 tree v4sf_ftype_v4sf_v2si
16567 = build_function_type_list (V4SF_type_node,
16568 V4SF_type_node, V2SI_type_node, NULL_TREE);
16569
16570 /* Miscellaneous. */
16571 tree v8qi_ftype_v4hi_v4hi
16572 = build_function_type_list (V8QI_type_node,
16573 V4HI_type_node, V4HI_type_node, NULL_TREE);
16574 tree v4hi_ftype_v2si_v2si
16575 = build_function_type_list (V4HI_type_node,
16576 V2SI_type_node, V2SI_type_node, NULL_TREE);
16577 tree v4sf_ftype_v4sf_v4sf_int
16578 = build_function_type_list (V4SF_type_node,
16579 V4SF_type_node, V4SF_type_node,
16580 integer_type_node, NULL_TREE);
16581 tree v2si_ftype_v4hi_v4hi
16582 = build_function_type_list (V2SI_type_node,
16583 V4HI_type_node, V4HI_type_node, NULL_TREE);
16584 tree v4hi_ftype_v4hi_int
16585 = build_function_type_list (V4HI_type_node,
16586 V4HI_type_node, integer_type_node, NULL_TREE);
16587 tree v4hi_ftype_v4hi_di
16588 = build_function_type_list (V4HI_type_node,
16589 V4HI_type_node, long_long_unsigned_type_node,
16590 NULL_TREE);
16591 tree v2si_ftype_v2si_di
16592 = build_function_type_list (V2SI_type_node,
16593 V2SI_type_node, long_long_unsigned_type_node,
16594 NULL_TREE);
16595 tree void_ftype_void
16596 = build_function_type (void_type_node, void_list_node);
16597 tree void_ftype_unsigned
16598 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16599 tree void_ftype_unsigned_unsigned
16600 = build_function_type_list (void_type_node, unsigned_type_node,
16601 unsigned_type_node, NULL_TREE);
16602 tree void_ftype_pcvoid_unsigned_unsigned
16603 = build_function_type_list (void_type_node, const_ptr_type_node,
16604 unsigned_type_node, unsigned_type_node,
16605 NULL_TREE);
16606 tree unsigned_ftype_void
16607 = build_function_type (unsigned_type_node, void_list_node);
16608 tree v2si_ftype_v4sf
16609 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16610 /* Loads/stores. */
16611 tree void_ftype_v8qi_v8qi_pchar
16612 = build_function_type_list (void_type_node,
16613 V8QI_type_node, V8QI_type_node,
16614 pchar_type_node, NULL_TREE);
16615 tree v4sf_ftype_pcfloat
16616 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16617 /* @@@ the type is bogus */
16618 tree v4sf_ftype_v4sf_pv2si
16619 = build_function_type_list (V4SF_type_node,
16620 V4SF_type_node, pv2si_type_node, NULL_TREE);
16621 tree void_ftype_pv2si_v4sf
16622 = build_function_type_list (void_type_node,
16623 pv2si_type_node, V4SF_type_node, NULL_TREE);
16624 tree void_ftype_pfloat_v4sf
16625 = build_function_type_list (void_type_node,
16626 pfloat_type_node, V4SF_type_node, NULL_TREE);
16627 tree void_ftype_pdi_di
16628 = build_function_type_list (void_type_node,
16629 pdi_type_node, long_long_unsigned_type_node,
16630 NULL_TREE);
16631 tree void_ftype_pv2di_v2di
16632 = build_function_type_list (void_type_node,
16633 pv2di_type_node, V2DI_type_node, NULL_TREE);
16634 /* Normal vector unops. */
16635 tree v4sf_ftype_v4sf
16636 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16637 tree v16qi_ftype_v16qi
16638 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16639 tree v8hi_ftype_v8hi
16640 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16641 tree v4si_ftype_v4si
16642 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16643 tree v8qi_ftype_v8qi
16644 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16645 tree v4hi_ftype_v4hi
16646 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16647
16648 /* Normal vector binops. */
16649 tree v4sf_ftype_v4sf_v4sf
16650 = build_function_type_list (V4SF_type_node,
16651 V4SF_type_node, V4SF_type_node, NULL_TREE);
16652 tree v8qi_ftype_v8qi_v8qi
16653 = build_function_type_list (V8QI_type_node,
16654 V8QI_type_node, V8QI_type_node, NULL_TREE);
16655 tree v4hi_ftype_v4hi_v4hi
16656 = build_function_type_list (V4HI_type_node,
16657 V4HI_type_node, V4HI_type_node, NULL_TREE);
16658 tree v2si_ftype_v2si_v2si
16659 = build_function_type_list (V2SI_type_node,
16660 V2SI_type_node, V2SI_type_node, NULL_TREE);
16661 tree di_ftype_di_di
16662 = build_function_type_list (long_long_unsigned_type_node,
16663 long_long_unsigned_type_node,
16664 long_long_unsigned_type_node, NULL_TREE);
16665
16666 tree di_ftype_di_di_int
16667 = build_function_type_list (long_long_unsigned_type_node,
16668 long_long_unsigned_type_node,
16669 long_long_unsigned_type_node,
16670 integer_type_node, NULL_TREE);
16671
16672 tree v2si_ftype_v2sf
16673 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16674 tree v2sf_ftype_v2si
16675 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16676 tree v2si_ftype_v2si
16677 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16678 tree v2sf_ftype_v2sf
16679 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16680 tree v2sf_ftype_v2sf_v2sf
16681 = build_function_type_list (V2SF_type_node,
16682 V2SF_type_node, V2SF_type_node, NULL_TREE);
16683 tree v2si_ftype_v2sf_v2sf
16684 = build_function_type_list (V2SI_type_node,
16685 V2SF_type_node, V2SF_type_node, NULL_TREE);
16686 tree pint_type_node = build_pointer_type (integer_type_node);
16687 tree pdouble_type_node = build_pointer_type (double_type_node);
16688 tree pcdouble_type_node = build_pointer_type (
16689 build_type_variant (double_type_node, 1, 0));
16690 tree int_ftype_v2df_v2df
16691 = build_function_type_list (integer_type_node,
16692 V2DF_type_node, V2DF_type_node, NULL_TREE);
16693
16694 tree void_ftype_pcvoid
16695 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16696 tree v4sf_ftype_v4si
16697 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16698 tree v4si_ftype_v4sf
16699 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16700 tree v2df_ftype_v4si
16701 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16702 tree v4si_ftype_v2df
16703 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16704 tree v2si_ftype_v2df
16705 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16706 tree v4sf_ftype_v2df
16707 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16708 tree v2df_ftype_v2si
16709 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16710 tree v2df_ftype_v4sf
16711 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16712 tree int_ftype_v2df
16713 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16714 tree int64_ftype_v2df
16715 = build_function_type_list (long_long_integer_type_node,
16716 V2DF_type_node, NULL_TREE);
16717 tree v2df_ftype_v2df_int
16718 = build_function_type_list (V2DF_type_node,
16719 V2DF_type_node, integer_type_node, NULL_TREE);
16720 tree v2df_ftype_v2df_int64
16721 = build_function_type_list (V2DF_type_node,
16722 V2DF_type_node, long_long_integer_type_node,
16723 NULL_TREE);
16724 tree v4sf_ftype_v4sf_v2df
16725 = build_function_type_list (V4SF_type_node,
16726 V4SF_type_node, V2DF_type_node, NULL_TREE);
16727 tree v2df_ftype_v2df_v4sf
16728 = build_function_type_list (V2DF_type_node,
16729 V2DF_type_node, V4SF_type_node, NULL_TREE);
16730 tree v2df_ftype_v2df_v2df_int
16731 = build_function_type_list (V2DF_type_node,
16732 V2DF_type_node, V2DF_type_node,
16733 integer_type_node,
16734 NULL_TREE);
16735 tree v2df_ftype_v2df_pcdouble
16736 = build_function_type_list (V2DF_type_node,
16737 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16738 tree void_ftype_pdouble_v2df
16739 = build_function_type_list (void_type_node,
16740 pdouble_type_node, V2DF_type_node, NULL_TREE);
16741 tree void_ftype_pint_int
16742 = build_function_type_list (void_type_node,
16743 pint_type_node, integer_type_node, NULL_TREE);
16744 tree void_ftype_v16qi_v16qi_pchar
16745 = build_function_type_list (void_type_node,
16746 V16QI_type_node, V16QI_type_node,
16747 pchar_type_node, NULL_TREE);
16748 tree v2df_ftype_pcdouble
16749 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16750 tree v2df_ftype_v2df_v2df
16751 = build_function_type_list (V2DF_type_node,
16752 V2DF_type_node, V2DF_type_node, NULL_TREE);
16753 tree v16qi_ftype_v16qi_v16qi
16754 = build_function_type_list (V16QI_type_node,
16755 V16QI_type_node, V16QI_type_node, NULL_TREE);
16756 tree v8hi_ftype_v8hi_v8hi
16757 = build_function_type_list (V8HI_type_node,
16758 V8HI_type_node, V8HI_type_node, NULL_TREE);
16759 tree v4si_ftype_v4si_v4si
16760 = build_function_type_list (V4SI_type_node,
16761 V4SI_type_node, V4SI_type_node, NULL_TREE);
16762 tree v2di_ftype_v2di_v2di
16763 = build_function_type_list (V2DI_type_node,
16764 V2DI_type_node, V2DI_type_node, NULL_TREE);
16765 tree v2di_ftype_v2df_v2df
16766 = build_function_type_list (V2DI_type_node,
16767 V2DF_type_node, V2DF_type_node, NULL_TREE);
16768 tree v2df_ftype_v2df
16769 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16770 tree v2di_ftype_v2di_int
16771 = build_function_type_list (V2DI_type_node,
16772 V2DI_type_node, integer_type_node, NULL_TREE);
16773 tree v2di_ftype_v2di_v2di_int
16774 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16775 V2DI_type_node, integer_type_node, NULL_TREE);
16776 tree v4si_ftype_v4si_int
16777 = build_function_type_list (V4SI_type_node,
16778 V4SI_type_node, integer_type_node, NULL_TREE);
16779 tree v8hi_ftype_v8hi_int
16780 = build_function_type_list (V8HI_type_node,
16781 V8HI_type_node, integer_type_node, NULL_TREE);
16782 tree v8hi_ftype_v8hi_v2di
16783 = build_function_type_list (V8HI_type_node,
16784 V8HI_type_node, V2DI_type_node, NULL_TREE);
16785 tree v4si_ftype_v4si_v2di
16786 = build_function_type_list (V4SI_type_node,
16787 V4SI_type_node, V2DI_type_node, NULL_TREE);
16788 tree v4si_ftype_v8hi_v8hi
16789 = build_function_type_list (V4SI_type_node,
16790 V8HI_type_node, V8HI_type_node, NULL_TREE);
16791 tree di_ftype_v8qi_v8qi
16792 = build_function_type_list (long_long_unsigned_type_node,
16793 V8QI_type_node, V8QI_type_node, NULL_TREE);
16794 tree di_ftype_v2si_v2si
16795 = build_function_type_list (long_long_unsigned_type_node,
16796 V2SI_type_node, V2SI_type_node, NULL_TREE);
16797 tree v2di_ftype_v16qi_v16qi
16798 = build_function_type_list (V2DI_type_node,
16799 V16QI_type_node, V16QI_type_node, NULL_TREE);
16800 tree v2di_ftype_v4si_v4si
16801 = build_function_type_list (V2DI_type_node,
16802 V4SI_type_node, V4SI_type_node, NULL_TREE);
16803 tree int_ftype_v16qi
16804 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16805 tree v16qi_ftype_pcchar
16806 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16807 tree void_ftype_pchar_v16qi
16808 = build_function_type_list (void_type_node,
16809 pchar_type_node, V16QI_type_node, NULL_TREE);
16810
16811 tree v2di_ftype_v2di_unsigned_unsigned
16812 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16813 unsigned_type_node, unsigned_type_node,
16814 NULL_TREE);
16815 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16816 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16817 unsigned_type_node, unsigned_type_node,
16818 NULL_TREE);
16819 tree v2di_ftype_v2di_v16qi
16820 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16821 NULL_TREE);
16822
16823 tree float80_type;
16824 tree float128_type;
16825 tree ftype;
16826
16827 /* The __float80 type. */
16828 if (TYPE_MODE (long_double_type_node) == XFmode)
16829 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16830 "__float80");
16831 else
16832 {
16833 /* The __float80 type. */
16834 float80_type = make_node (REAL_TYPE);
16835 TYPE_PRECISION (float80_type) = 80;
16836 layout_type (float80_type);
16837 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16838 }
16839
16840 if (TARGET_64BIT)
16841 {
16842 float128_type = make_node (REAL_TYPE);
16843 TYPE_PRECISION (float128_type) = 128;
16844 layout_type (float128_type);
16845 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16846 }
16847
16848 /* Add all builtins that are more or less simple operations on two
16849 operands. */
16850 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16851 {
16852 /* Use one of the operands; the target can have a different mode for
16853 mask-generating compares. */
16854 enum machine_mode mode;
16855 tree type;
16856
16857 if (d->name == 0)
16858 continue;
16859 mode = insn_data[d->icode].operand[1].mode;
16860
16861 switch (mode)
16862 {
16863 case V16QImode:
16864 type = v16qi_ftype_v16qi_v16qi;
16865 break;
16866 case V8HImode:
16867 type = v8hi_ftype_v8hi_v8hi;
16868 break;
16869 case V4SImode:
16870 type = v4si_ftype_v4si_v4si;
16871 break;
16872 case V2DImode:
16873 type = v2di_ftype_v2di_v2di;
16874 break;
16875 case V2DFmode:
16876 type = v2df_ftype_v2df_v2df;
16877 break;
16878 case V4SFmode:
16879 type = v4sf_ftype_v4sf_v4sf;
16880 break;
16881 case V8QImode:
16882 type = v8qi_ftype_v8qi_v8qi;
16883 break;
16884 case V4HImode:
16885 type = v4hi_ftype_v4hi_v4hi;
16886 break;
16887 case V2SImode:
16888 type = v2si_ftype_v2si_v2si;
16889 break;
16890 case DImode:
16891 type = di_ftype_di_di;
16892 break;
16893
16894 default:
16895 gcc_unreachable ();
16896 }
16897
16898 /* Override for comparisons. */
16899 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16900 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16901 type = v4si_ftype_v4sf_v4sf;
16902
16903 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16904 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16905 type = v2di_ftype_v2df_v2df;
16906
16907 def_builtin (d->mask, d->name, type, d->code);
16908 }
16909
16910 /* Add all builtins that are more or less simple operations on 1 operand. */
16911 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16912 {
16913 enum machine_mode mode;
16914 tree type;
16915
16916 if (d->name == 0)
16917 continue;
16918 mode = insn_data[d->icode].operand[1].mode;
16919
16920 switch (mode)
16921 {
16922 case V16QImode:
16923 type = v16qi_ftype_v16qi;
16924 break;
16925 case V8HImode:
16926 type = v8hi_ftype_v8hi;
16927 break;
16928 case V4SImode:
16929 type = v4si_ftype_v4si;
16930 break;
16931 case V2DFmode:
16932 type = v2df_ftype_v2df;
16933 break;
16934 case V4SFmode:
16935 type = v4sf_ftype_v4sf;
16936 break;
16937 case V8QImode:
16938 type = v8qi_ftype_v8qi;
16939 break;
16940 case V4HImode:
16941 type = v4hi_ftype_v4hi;
16942 break;
16943 case V2SImode:
16944 type = v2si_ftype_v2si;
16945 break;
16946
16947 default:
16948 abort ();
16949 }
16950
16951 def_builtin (d->mask, d->name, type, d->code);
16952 }
16953
16954 /* Add the remaining MMX insns with somewhat more complicated types. */
16955 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16956 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16957 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16958 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16959
16960 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16961 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16962 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16963
16964 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16965 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16966
16967 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16968 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16969
16970 /* comi/ucomi insns. */
16971 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16972 if (d->mask == MASK_SSE2)
16973 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16974 else
16975 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16976
16977 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16978 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16979 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16980
16981 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16982 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16983 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16984 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16985 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16986 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16987 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16988 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16989 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16990 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16991 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16992
16993 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16994
16995 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16996 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16997
16998 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16999 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17000 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17001 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17002
17003 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17004 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17005 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17006 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17007
17008 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17009
17010 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17011
17012 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17013 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17014 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17015 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17016 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17017 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17018
17019 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17020
17021 /* Original 3DNow! */
17022 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17023 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17024 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17025 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17026 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17027 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17028 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17029 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17030 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17031 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17032 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17033 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17034 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17035 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17036 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17037 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17038 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17039 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17040 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17041 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17042
17043 /* 3DNow! extension as used in the Athlon CPU. */
17044 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17045 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17046 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17047 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17048 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17049 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17050
17051 /* SSE2 */
17052 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17053
17054 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17055 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17056
17057 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17058 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17059
17060 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17061 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17062 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17063 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17064 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17065
17066 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17067 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17068 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17069 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17070
17071 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17072 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17073
17074 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17075
17076 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17077 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17078
17079 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17080 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17081 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17082 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17083 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17084
17085 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17086
17087 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17088 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17089 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17090 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17091
17092 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17093 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17094 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17095
17096 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17097 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17098 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17099 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17100
17101 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17102 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17103 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17104
17105 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17106 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17107
17108 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17109 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17110
17111 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17112 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17113 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17114
17115 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17116 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17117 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17118
17119 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17120 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17121
17122 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17123 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17124 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17125 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17126
17127 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17128 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17129 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17130 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17131
17132 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17133 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17134
17135 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17136
17137 /* Prescott New Instructions. */
17138 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17139 void_ftype_pcvoid_unsigned_unsigned,
17140 IX86_BUILTIN_MONITOR);
17141 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17142 void_ftype_unsigned_unsigned,
17143 IX86_BUILTIN_MWAIT);
17144 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
17145 v4sf_ftype_v4sf,
17146 IX86_BUILTIN_MOVSHDUP);
17147 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
17148 v4sf_ftype_v4sf,
17149 IX86_BUILTIN_MOVSLDUP);
17150 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17151 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17152
17153 /* SSSE3. */
17154 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17155 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17156 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17157 IX86_BUILTIN_PALIGNR);
17158
17159 /* AMDFAM10 SSE4A New built-ins */
17160 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17161 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17162 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17163 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17164 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17165 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17166 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17167 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17168 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17169 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17170 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17171 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17172
17173 /* Access to the vec_init patterns. */
17174 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17175 integer_type_node, NULL_TREE);
17176 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17177 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17178
17179 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17180 short_integer_type_node,
17181 short_integer_type_node,
17182 short_integer_type_node, NULL_TREE);
17183 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17184 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17185
17186 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17187 char_type_node, char_type_node,
17188 char_type_node, char_type_node,
17189 char_type_node, char_type_node,
17190 char_type_node, NULL_TREE);
17191 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17192 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17193
17194 /* Access to the vec_extract patterns. */
17195 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17196 integer_type_node, NULL_TREE);
17197 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17198 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17199
17200 ftype = build_function_type_list (long_long_integer_type_node,
17201 V2DI_type_node, integer_type_node,
17202 NULL_TREE);
17203 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17204 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17205
17206 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17207 integer_type_node, NULL_TREE);
17208 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17209 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17210
17211 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17212 integer_type_node, NULL_TREE);
17213 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17214 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17215
17216 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17217 integer_type_node, NULL_TREE);
17218 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17219 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17220
17221 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17222 integer_type_node, NULL_TREE);
17223 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17224 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17225
17226 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17227 integer_type_node, NULL_TREE);
17228 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17229 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17230
17231 /* Access to the vec_set patterns. */
17232 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17233 intHI_type_node,
17234 integer_type_node, NULL_TREE);
17235 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17236 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17237
17238 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17239 intHI_type_node,
17240 integer_type_node, NULL_TREE);
17241 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17242 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17243 }
17244
17245 /* Errors in the source file can cause expand_expr to return const0_rtx
17246 where we expect a vector. To avoid crashing, use one of the vector
17247 clear instructions. */
17248 static rtx
17249 safe_vector_operand (rtx x, enum machine_mode mode)
17250 {
17251 if (x == const0_rtx)
17252 x = CONST0_RTX (mode);
17253 return x;
17254 }
17255
17256 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17257
17258 static rtx
17259 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17260 {
17261 rtx pat, xops[3];
17262 tree arg0 = CALL_EXPR_ARG (exp, 0);
17263 tree arg1 = CALL_EXPR_ARG (exp, 1);
17264 rtx op0 = expand_normal (arg0);
17265 rtx op1 = expand_normal (arg1);
17266 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17267 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17268 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17269
17270 if (VECTOR_MODE_P (mode0))
17271 op0 = safe_vector_operand (op0, mode0);
17272 if (VECTOR_MODE_P (mode1))
17273 op1 = safe_vector_operand (op1, mode1);
17274
17275 if (optimize || !target
17276 || GET_MODE (target) != tmode
17277 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17278 target = gen_reg_rtx (tmode);
17279
17280 if (GET_MODE (op1) == SImode && mode1 == TImode)
17281 {
17282 rtx x = gen_reg_rtx (V4SImode);
17283 emit_insn (gen_sse2_loadd (x, op1));
17284 op1 = gen_lowpart (TImode, x);
17285 }
17286
17287 /* The insn must want input operands in the same modes as the
17288 result. */
17289 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17290 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17291
17292 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17293 op0 = copy_to_mode_reg (mode0, op0);
17294 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17295 op1 = copy_to_mode_reg (mode1, op1);
17296
17297 /* ??? Using ix86_fixup_binary_operands is problematic when
17298 we've got mismatched modes. Fake it. */
17299
17300 xops[0] = target;
17301 xops[1] = op0;
17302 xops[2] = op1;
17303
17304 if (tmode == mode0 && tmode == mode1)
17305 {
17306 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17307 op0 = xops[1];
17308 op1 = xops[2];
17309 }
17310 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17311 {
17312 op0 = force_reg (mode0, op0);
17313 op1 = force_reg (mode1, op1);
17314 target = gen_reg_rtx (tmode);
17315 }
17316
17317 pat = GEN_FCN (icode) (target, op0, op1);
17318 if (! pat)
17319 return 0;
17320 emit_insn (pat);
17321 return target;
17322 }
17323
17324 /* Subroutine of ix86_expand_builtin to take care of stores. */
17325
17326 static rtx
17327 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17328 {
17329 rtx pat;
17330 tree arg0 = CALL_EXPR_ARG (exp, 0);
17331 tree arg1 = CALL_EXPR_ARG (exp, 1);
17332 rtx op0 = expand_normal (arg0);
17333 rtx op1 = expand_normal (arg1);
17334 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17335 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17336
17337 if (VECTOR_MODE_P (mode1))
17338 op1 = safe_vector_operand (op1, mode1);
17339
17340 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17341 op1 = copy_to_mode_reg (mode1, op1);
17342
17343 pat = GEN_FCN (icode) (op0, op1);
17344 if (pat)
17345 emit_insn (pat);
17346 return 0;
17347 }
17348
17349 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17350
17351 static rtx
17352 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17353 rtx target, int do_load)
17354 {
17355 rtx pat;
17356 tree arg0 = CALL_EXPR_ARG (exp, 0);
17357 rtx op0 = expand_normal (arg0);
17358 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17359 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17360
17361 if (optimize || !target
17362 || GET_MODE (target) != tmode
17363 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17364 target = gen_reg_rtx (tmode);
17365 if (do_load)
17366 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17367 else
17368 {
17369 if (VECTOR_MODE_P (mode0))
17370 op0 = safe_vector_operand (op0, mode0);
17371
17372 if ((optimize && !register_operand (op0, mode0))
17373 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17374 op0 = copy_to_mode_reg (mode0, op0);
17375 }
17376
17377 pat = GEN_FCN (icode) (target, op0);
17378 if (! pat)
17379 return 0;
17380 emit_insn (pat);
17381 return target;
17382 }
17383
17384 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17385 sqrtss, rsqrtss, rcpss. */
17386
17387 static rtx
17388 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17389 {
17390 rtx pat;
17391 tree arg0 = CALL_EXPR_ARG (exp, 0);
17392 rtx op1, op0 = expand_normal (arg0);
17393 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17394 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17395
17396 if (optimize || !target
17397 || GET_MODE (target) != tmode
17398 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17399 target = gen_reg_rtx (tmode);
17400
17401 if (VECTOR_MODE_P (mode0))
17402 op0 = safe_vector_operand (op0, mode0);
17403
17404 if ((optimize && !register_operand (op0, mode0))
17405 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17406 op0 = copy_to_mode_reg (mode0, op0);
17407
17408 op1 = op0;
17409 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17410 op1 = copy_to_mode_reg (mode0, op1);
17411
17412 pat = GEN_FCN (icode) (target, op0, op1);
17413 if (! pat)
17414 return 0;
17415 emit_insn (pat);
17416 return target;
17417 }
17418
17419 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17420
17421 static rtx
17422 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17423 rtx target)
17424 {
17425 rtx pat;
17426 tree arg0 = CALL_EXPR_ARG (exp, 0);
17427 tree arg1 = CALL_EXPR_ARG (exp, 1);
17428 rtx op0 = expand_normal (arg0);
17429 rtx op1 = expand_normal (arg1);
17430 rtx op2;
17431 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17432 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17433 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17434 enum rtx_code comparison = d->comparison;
17435
17436 if (VECTOR_MODE_P (mode0))
17437 op0 = safe_vector_operand (op0, mode0);
17438 if (VECTOR_MODE_P (mode1))
17439 op1 = safe_vector_operand (op1, mode1);
17440
17441 /* Swap operands if we have a comparison that isn't available in
17442 hardware. */
17443 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17444 {
17445 rtx tmp = gen_reg_rtx (mode1);
17446 emit_move_insn (tmp, op1);
17447 op1 = op0;
17448 op0 = tmp;
17449 }
17450
17451 if (optimize || !target
17452 || GET_MODE (target) != tmode
17453 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17454 target = gen_reg_rtx (tmode);
17455
17456 if ((optimize && !register_operand (op0, mode0))
17457 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17458 op0 = copy_to_mode_reg (mode0, op0);
17459 if ((optimize && !register_operand (op1, mode1))
17460 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17461 op1 = copy_to_mode_reg (mode1, op1);
17462
17463 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17464 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17465 if (! pat)
17466 return 0;
17467 emit_insn (pat);
17468 return target;
17469 }
17470
17471 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17472
17473 static rtx
17474 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17475 rtx target)
17476 {
17477 rtx pat;
17478 tree arg0 = CALL_EXPR_ARG (exp, 0);
17479 tree arg1 = CALL_EXPR_ARG (exp, 1);
17480 rtx op0 = expand_normal (arg0);
17481 rtx op1 = expand_normal (arg1);
17482 rtx op2;
17483 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17484 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17485 enum rtx_code comparison = d->comparison;
17486
17487 if (VECTOR_MODE_P (mode0))
17488 op0 = safe_vector_operand (op0, mode0);
17489 if (VECTOR_MODE_P (mode1))
17490 op1 = safe_vector_operand (op1, mode1);
17491
17492 /* Swap operands if we have a comparison that isn't available in
17493 hardware. */
17494 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17495 {
17496 rtx tmp = op1;
17497 op1 = op0;
17498 op0 = tmp;
17499 }
17500
17501 target = gen_reg_rtx (SImode);
17502 emit_move_insn (target, const0_rtx);
17503 target = gen_rtx_SUBREG (QImode, target, 0);
17504
17505 if ((optimize && !register_operand (op0, mode0))
17506 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17507 op0 = copy_to_mode_reg (mode0, op0);
17508 if ((optimize && !register_operand (op1, mode1))
17509 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17510 op1 = copy_to_mode_reg (mode1, op1);
17511
17512 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17513 pat = GEN_FCN (d->icode) (op0, op1);
17514 if (! pat)
17515 return 0;
17516 emit_insn (pat);
17517 emit_insn (gen_rtx_SET (VOIDmode,
17518 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17519 gen_rtx_fmt_ee (comparison, QImode,
17520 SET_DEST (pat),
17521 const0_rtx)));
17522
17523 return SUBREG_REG (target);
17524 }
17525
17526 /* Return the integer constant in ARG. Constrain it to be in the range
17527 of the subparts of VEC_TYPE; issue an error if not. */
17528
17529 static int
17530 get_element_number (tree vec_type, tree arg)
17531 {
17532 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17533
17534 if (!host_integerp (arg, 1)
17535 || (elt = tree_low_cst (arg, 1), elt > max))
17536 {
17537 error ("selector must be an integer constant in the range 0..%wi", max);
17538 return 0;
17539 }
17540
17541 return elt;
17542 }
17543
17544 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17545 ix86_expand_vector_init. We DO have language-level syntax for this, in
17546 the form of (type){ init-list }. Except that since we can't place emms
17547 instructions from inside the compiler, we can't allow the use of MMX
17548 registers unless the user explicitly asks for it. So we do *not* define
17549 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17550 we have builtins invoked by mmintrin.h that gives us license to emit
17551 these sorts of instructions. */
17552
17553 static rtx
17554 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17555 {
17556 enum machine_mode tmode = TYPE_MODE (type);
17557 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17558 int i, n_elt = GET_MODE_NUNITS (tmode);
17559 rtvec v = rtvec_alloc (n_elt);
17560
17561 gcc_assert (VECTOR_MODE_P (tmode));
17562 gcc_assert (call_expr_nargs (exp) == n_elt);
17563
17564 for (i = 0; i < n_elt; ++i)
17565 {
17566 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17567 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17568 }
17569
17570 if (!target || !register_operand (target, tmode))
17571 target = gen_reg_rtx (tmode);
17572
17573 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17574 return target;
17575 }
17576
17577 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17578 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17579 had a language-level syntax for referencing vector elements. */
17580
17581 static rtx
17582 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17583 {
17584 enum machine_mode tmode, mode0;
17585 tree arg0, arg1;
17586 int elt;
17587 rtx op0;
17588
17589 arg0 = CALL_EXPR_ARG (exp, 0);
17590 arg1 = CALL_EXPR_ARG (exp, 1);
17591
17592 op0 = expand_normal (arg0);
17593 elt = get_element_number (TREE_TYPE (arg0), arg1);
17594
17595 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17596 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17597 gcc_assert (VECTOR_MODE_P (mode0));
17598
17599 op0 = force_reg (mode0, op0);
17600
17601 if (optimize || !target || !register_operand (target, tmode))
17602 target = gen_reg_rtx (tmode);
17603
17604 ix86_expand_vector_extract (true, target, op0, elt);
17605
17606 return target;
17607 }
17608
17609 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17610 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17611 a language-level syntax for referencing vector elements. */
17612
17613 static rtx
17614 ix86_expand_vec_set_builtin (tree exp)
17615 {
17616 enum machine_mode tmode, mode1;
17617 tree arg0, arg1, arg2;
17618 int elt;
17619 rtx op0, op1;
17620
17621 arg0 = CALL_EXPR_ARG (exp, 0);
17622 arg1 = CALL_EXPR_ARG (exp, 1);
17623 arg2 = CALL_EXPR_ARG (exp, 2);
17624
17625 tmode = TYPE_MODE (TREE_TYPE (arg0));
17626 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17627 gcc_assert (VECTOR_MODE_P (tmode));
17628
17629 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17630 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17631 elt = get_element_number (TREE_TYPE (arg0), arg2);
17632
17633 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17634 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17635
17636 op0 = force_reg (tmode, op0);
17637 op1 = force_reg (mode1, op1);
17638
17639 ix86_expand_vector_set (true, op0, op1, elt);
17640
17641 return op0;
17642 }
17643
17644 /* Expand an expression EXP that calls a built-in function,
17645 with result going to TARGET if that's convenient
17646 (and in mode MODE if that's convenient).
17647 SUBTARGET may be used as the target for computing one of EXP's operands.
17648 IGNORE is nonzero if the value is to be ignored. */
17649
17650 static rtx
17651 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17652 enum machine_mode mode ATTRIBUTE_UNUSED,
17653 int ignore ATTRIBUTE_UNUSED)
17654 {
17655 const struct builtin_description *d;
17656 size_t i;
17657 enum insn_code icode;
17658 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17659 tree arg0, arg1, arg2, arg3;
17660 rtx op0, op1, op2, op3, pat;
17661 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17662 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17663
17664 switch (fcode)
17665 {
17666 case IX86_BUILTIN_EMMS:
17667 emit_insn (gen_mmx_emms ());
17668 return 0;
17669
17670 case IX86_BUILTIN_SFENCE:
17671 emit_insn (gen_sse_sfence ());
17672 return 0;
17673
17674 case IX86_BUILTIN_MASKMOVQ:
17675 case IX86_BUILTIN_MASKMOVDQU:
17676 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17677 ? CODE_FOR_mmx_maskmovq
17678 : CODE_FOR_sse2_maskmovdqu);
17679 /* Note the arg order is different from the operand order. */
17680 arg1 = CALL_EXPR_ARG (exp, 0);
17681 arg2 = CALL_EXPR_ARG (exp, 1);
17682 arg0 = CALL_EXPR_ARG (exp, 2);
17683 op0 = expand_normal (arg0);
17684 op1 = expand_normal (arg1);
17685 op2 = expand_normal (arg2);
17686 mode0 = insn_data[icode].operand[0].mode;
17687 mode1 = insn_data[icode].operand[1].mode;
17688 mode2 = insn_data[icode].operand[2].mode;
17689
17690 op0 = force_reg (Pmode, op0);
17691 op0 = gen_rtx_MEM (mode1, op0);
17692
17693 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17694 op0 = copy_to_mode_reg (mode0, op0);
17695 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17696 op1 = copy_to_mode_reg (mode1, op1);
17697 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17698 op2 = copy_to_mode_reg (mode2, op2);
17699 pat = GEN_FCN (icode) (op0, op1, op2);
17700 if (! pat)
17701 return 0;
17702 emit_insn (pat);
17703 return 0;
17704
17705 case IX86_BUILTIN_SQRTSS:
17706 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17707 case IX86_BUILTIN_RSQRTSS:
17708 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17709 case IX86_BUILTIN_RCPSS:
17710 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17711
17712 case IX86_BUILTIN_LOADUPS:
17713 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17714
17715 case IX86_BUILTIN_STOREUPS:
17716 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17717
17718 case IX86_BUILTIN_LOADHPS:
17719 case IX86_BUILTIN_LOADLPS:
17720 case IX86_BUILTIN_LOADHPD:
17721 case IX86_BUILTIN_LOADLPD:
17722 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17723 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17724 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17725 : CODE_FOR_sse2_loadlpd);
17726 arg0 = CALL_EXPR_ARG (exp, 0);
17727 arg1 = CALL_EXPR_ARG (exp, 1);
17728 op0 = expand_normal (arg0);
17729 op1 = expand_normal (arg1);
17730 tmode = insn_data[icode].operand[0].mode;
17731 mode0 = insn_data[icode].operand[1].mode;
17732 mode1 = insn_data[icode].operand[2].mode;
17733
17734 op0 = force_reg (mode0, op0);
17735 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17736 if (optimize || target == 0
17737 || GET_MODE (target) != tmode
17738 || !register_operand (target, tmode))
17739 target = gen_reg_rtx (tmode);
17740 pat = GEN_FCN (icode) (target, op0, op1);
17741 if (! pat)
17742 return 0;
17743 emit_insn (pat);
17744 return target;
17745
17746 case IX86_BUILTIN_STOREHPS:
17747 case IX86_BUILTIN_STORELPS:
17748 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17749 : CODE_FOR_sse_storelps);
17750 arg0 = CALL_EXPR_ARG (exp, 0);
17751 arg1 = CALL_EXPR_ARG (exp, 1);
17752 op0 = expand_normal (arg0);
17753 op1 = expand_normal (arg1);
17754 mode0 = insn_data[icode].operand[0].mode;
17755 mode1 = insn_data[icode].operand[1].mode;
17756
17757 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17758 op1 = force_reg (mode1, op1);
17759
17760 pat = GEN_FCN (icode) (op0, op1);
17761 if (! pat)
17762 return 0;
17763 emit_insn (pat);
17764 return const0_rtx;
17765
17766 case IX86_BUILTIN_MOVNTPS:
17767 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17768 case IX86_BUILTIN_MOVNTQ:
17769 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17770
17771 case IX86_BUILTIN_LDMXCSR:
17772 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17773 target = assign_386_stack_local (SImode, SLOT_TEMP);
17774 emit_move_insn (target, op0);
17775 emit_insn (gen_sse_ldmxcsr (target));
17776 return 0;
17777
17778 case IX86_BUILTIN_STMXCSR:
17779 target = assign_386_stack_local (SImode, SLOT_TEMP);
17780 emit_insn (gen_sse_stmxcsr (target));
17781 return copy_to_mode_reg (SImode, target);
17782
17783 case IX86_BUILTIN_SHUFPS:
17784 case IX86_BUILTIN_SHUFPD:
17785 icode = (fcode == IX86_BUILTIN_SHUFPS
17786 ? CODE_FOR_sse_shufps
17787 : CODE_FOR_sse2_shufpd);
17788 arg0 = CALL_EXPR_ARG (exp, 0);
17789 arg1 = CALL_EXPR_ARG (exp, 1);
17790 arg2 = CALL_EXPR_ARG (exp, 2);
17791 op0 = expand_normal (arg0);
17792 op1 = expand_normal (arg1);
17793 op2 = expand_normal (arg2);
17794 tmode = insn_data[icode].operand[0].mode;
17795 mode0 = insn_data[icode].operand[1].mode;
17796 mode1 = insn_data[icode].operand[2].mode;
17797 mode2 = insn_data[icode].operand[3].mode;
17798
17799 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17800 op0 = copy_to_mode_reg (mode0, op0);
17801 if ((optimize && !register_operand (op1, mode1))
17802 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17803 op1 = copy_to_mode_reg (mode1, op1);
17804 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17805 {
17806 /* @@@ better error message */
17807 error ("mask must be an immediate");
17808 return gen_reg_rtx (tmode);
17809 }
17810 if (optimize || target == 0
17811 || GET_MODE (target) != tmode
17812 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17813 target = gen_reg_rtx (tmode);
17814 pat = GEN_FCN (icode) (target, op0, op1, op2);
17815 if (! pat)
17816 return 0;
17817 emit_insn (pat);
17818 return target;
17819
17820 case IX86_BUILTIN_PSHUFW:
17821 case IX86_BUILTIN_PSHUFD:
17822 case IX86_BUILTIN_PSHUFHW:
17823 case IX86_BUILTIN_PSHUFLW:
17824 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17825 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17826 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17827 : CODE_FOR_mmx_pshufw);
17828 arg0 = CALL_EXPR_ARG (exp, 0);
17829 arg1 = CALL_EXPR_ARG (exp, 1);
17830 op0 = expand_normal (arg0);
17831 op1 = expand_normal (arg1);
17832 tmode = insn_data[icode].operand[0].mode;
17833 mode1 = insn_data[icode].operand[1].mode;
17834 mode2 = insn_data[icode].operand[2].mode;
17835
17836 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17837 op0 = copy_to_mode_reg (mode1, op0);
17838 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17839 {
17840 /* @@@ better error message */
17841 error ("mask must be an immediate");
17842 return const0_rtx;
17843 }
17844 if (target == 0
17845 || GET_MODE (target) != tmode
17846 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17847 target = gen_reg_rtx (tmode);
17848 pat = GEN_FCN (icode) (target, op0, op1);
17849 if (! pat)
17850 return 0;
17851 emit_insn (pat);
17852 return target;
17853
17854 case IX86_BUILTIN_PSLLDQI128:
17855 case IX86_BUILTIN_PSRLDQI128:
17856 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17857 : CODE_FOR_sse2_lshrti3);
17858 arg0 = CALL_EXPR_ARG (exp, 0);
17859 arg1 = CALL_EXPR_ARG (exp, 1);
17860 op0 = expand_normal (arg0);
17861 op1 = expand_normal (arg1);
17862 tmode = insn_data[icode].operand[0].mode;
17863 mode1 = insn_data[icode].operand[1].mode;
17864 mode2 = insn_data[icode].operand[2].mode;
17865
17866 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17867 {
17868 op0 = copy_to_reg (op0);
17869 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17870 }
17871 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17872 {
17873 error ("shift must be an immediate");
17874 return const0_rtx;
17875 }
17876 target = gen_reg_rtx (V2DImode);
17877 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17878 if (! pat)
17879 return 0;
17880 emit_insn (pat);
17881 return target;
17882
17883 case IX86_BUILTIN_FEMMS:
17884 emit_insn (gen_mmx_femms ());
17885 return NULL_RTX;
17886
17887 case IX86_BUILTIN_PAVGUSB:
17888 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
17889
17890 case IX86_BUILTIN_PF2ID:
17891 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
17892
17893 case IX86_BUILTIN_PFACC:
17894 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
17895
17896 case IX86_BUILTIN_PFADD:
17897 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
17898
17899 case IX86_BUILTIN_PFCMPEQ:
17900 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
17901
17902 case IX86_BUILTIN_PFCMPGE:
17903 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
17904
17905 case IX86_BUILTIN_PFCMPGT:
17906 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
17907
17908 case IX86_BUILTIN_PFMAX:
17909 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
17910
17911 case IX86_BUILTIN_PFMIN:
17912 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
17913
17914 case IX86_BUILTIN_PFMUL:
17915 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
17916
17917 case IX86_BUILTIN_PFRCP:
17918 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
17919
17920 case IX86_BUILTIN_PFRCPIT1:
17921 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
17922
17923 case IX86_BUILTIN_PFRCPIT2:
17924 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
17925
17926 case IX86_BUILTIN_PFRSQIT1:
17927 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
17928
17929 case IX86_BUILTIN_PFRSQRT:
17930 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
17931
17932 case IX86_BUILTIN_PFSUB:
17933 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
17934
17935 case IX86_BUILTIN_PFSUBR:
17936 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
17937
17938 case IX86_BUILTIN_PI2FD:
17939 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
17940
17941 case IX86_BUILTIN_PMULHRW:
17942 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
17943
17944 case IX86_BUILTIN_PF2IW:
17945 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
17946
17947 case IX86_BUILTIN_PFNACC:
17948 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
17949
17950 case IX86_BUILTIN_PFPNACC:
17951 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
17952
17953 case IX86_BUILTIN_PI2FW:
17954 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
17955
17956 case IX86_BUILTIN_PSWAPDSI:
17957 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
17958
17959 case IX86_BUILTIN_PSWAPDSF:
17960 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
17961
17962 case IX86_BUILTIN_SQRTSD:
17963 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
17964 case IX86_BUILTIN_LOADUPD:
17965 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
17966 case IX86_BUILTIN_STOREUPD:
17967 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
17968
17969 case IX86_BUILTIN_MFENCE:
17970 emit_insn (gen_sse2_mfence ());
17971 return 0;
17972 case IX86_BUILTIN_LFENCE:
17973 emit_insn (gen_sse2_lfence ());
17974 return 0;
17975
17976 case IX86_BUILTIN_CLFLUSH:
17977 arg0 = CALL_EXPR_ARG (exp, 0);
17978 op0 = expand_normal (arg0);
17979 icode = CODE_FOR_sse2_clflush;
17980 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
17981 op0 = copy_to_mode_reg (Pmode, op0);
17982
17983 emit_insn (gen_sse2_clflush (op0));
17984 return 0;
17985
17986 case IX86_BUILTIN_MOVNTPD:
17987 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
17988 case IX86_BUILTIN_MOVNTDQ:
17989 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
17990 case IX86_BUILTIN_MOVNTI:
17991 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
17992
17993 case IX86_BUILTIN_LOADDQU:
17994 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
17995 case IX86_BUILTIN_STOREDQU:
17996 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
17997
17998 case IX86_BUILTIN_MONITOR:
17999 arg0 = CALL_EXPR_ARG (exp, 0);
18000 arg1 = CALL_EXPR_ARG (exp, 1);
18001 arg2 = CALL_EXPR_ARG (exp, 2);
18002 op0 = expand_normal (arg0);
18003 op1 = expand_normal (arg1);
18004 op2 = expand_normal (arg2);
18005 if (!REG_P (op0))
18006 op0 = copy_to_mode_reg (Pmode, op0);
18007 if (!REG_P (op1))
18008 op1 = copy_to_mode_reg (SImode, op1);
18009 if (!REG_P (op2))
18010 op2 = copy_to_mode_reg (SImode, op2);
18011 if (!TARGET_64BIT)
18012 emit_insn (gen_sse3_monitor (op0, op1, op2));
18013 else
18014 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18015 return 0;
18016
18017 case IX86_BUILTIN_MWAIT:
18018 arg0 = CALL_EXPR_ARG (exp, 0);
18019 arg1 = CALL_EXPR_ARG (exp, 1);
18020 op0 = expand_normal (arg0);
18021 op1 = expand_normal (arg1);
18022 if (!REG_P (op0))
18023 op0 = copy_to_mode_reg (SImode, op0);
18024 if (!REG_P (op1))
18025 op1 = copy_to_mode_reg (SImode, op1);
18026 emit_insn (gen_sse3_mwait (op0, op1));
18027 return 0;
18028
18029 case IX86_BUILTIN_LDDQU:
18030 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18031 target, 1);
18032
18033 case IX86_BUILTIN_PALIGNR:
18034 case IX86_BUILTIN_PALIGNR128:
18035 if (fcode == IX86_BUILTIN_PALIGNR)
18036 {
18037 icode = CODE_FOR_ssse3_palignrdi;
18038 mode = DImode;
18039 }
18040 else
18041 {
18042 icode = CODE_FOR_ssse3_palignrti;
18043 mode = V2DImode;
18044 }
18045 arg0 = CALL_EXPR_ARG (exp, 0);
18046 arg1 = CALL_EXPR_ARG (exp, 1);
18047 arg2 = CALL_EXPR_ARG (exp, 2);
18048 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18049 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18050 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18051 tmode = insn_data[icode].operand[0].mode;
18052 mode1 = insn_data[icode].operand[1].mode;
18053 mode2 = insn_data[icode].operand[2].mode;
18054 mode3 = insn_data[icode].operand[3].mode;
18055
18056 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18057 {
18058 op0 = copy_to_reg (op0);
18059 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18060 }
18061 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18062 {
18063 op1 = copy_to_reg (op1);
18064 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18065 }
18066 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18067 {
18068 error ("shift must be an immediate");
18069 return const0_rtx;
18070 }
18071 target = gen_reg_rtx (mode);
18072 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18073 op0, op1, op2);
18074 if (! pat)
18075 return 0;
18076 emit_insn (pat);
18077 return target;
18078
18079 case IX86_BUILTIN_MOVNTSD:
18080 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18081
18082 case IX86_BUILTIN_MOVNTSS:
18083 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18084
18085 case IX86_BUILTIN_INSERTQ:
18086 case IX86_BUILTIN_EXTRQ:
18087 icode = (fcode == IX86_BUILTIN_EXTRQ
18088 ? CODE_FOR_sse4a_extrq
18089 : CODE_FOR_sse4a_insertq);
18090 arg0 = CALL_EXPR_ARG (exp, 0);
18091 arg1 = CALL_EXPR_ARG (exp, 1);
18092 op0 = expand_normal (arg0);
18093 op1 = expand_normal (arg1);
18094 tmode = insn_data[icode].operand[0].mode;
18095 mode1 = insn_data[icode].operand[1].mode;
18096 mode2 = insn_data[icode].operand[2].mode;
18097 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18098 op0 = copy_to_mode_reg (mode1, op0);
18099 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18100 op1 = copy_to_mode_reg (mode2, op1);
18101 if (optimize || target == 0
18102 || GET_MODE (target) != tmode
18103 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18104 target = gen_reg_rtx (tmode);
18105 pat = GEN_FCN (icode) (target, op0, op1);
18106 if (! pat)
18107 return NULL_RTX;
18108 emit_insn (pat);
18109 return target;
18110
18111 case IX86_BUILTIN_EXTRQI:
18112 icode = CODE_FOR_sse4a_extrqi;
18113 arg0 = CALL_EXPR_ARG (exp, 0);
18114 arg1 = CALL_EXPR_ARG (exp, 1);
18115 arg2 = CALL_EXPR_ARG (exp, 2);
18116 op0 = expand_normal (arg0);
18117 op1 = expand_normal (arg1);
18118 op2 = expand_normal (arg2);
18119 tmode = insn_data[icode].operand[0].mode;
18120 mode1 = insn_data[icode].operand[1].mode;
18121 mode2 = insn_data[icode].operand[2].mode;
18122 mode3 = insn_data[icode].operand[3].mode;
18123 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18124 op0 = copy_to_mode_reg (mode1, op0);
18125 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18126 {
18127 error ("index mask must be an immediate");
18128 return gen_reg_rtx (tmode);
18129 }
18130 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18131 {
18132 error ("length mask must be an immediate");
18133 return gen_reg_rtx (tmode);
18134 }
18135 if (optimize || target == 0
18136 || GET_MODE (target) != tmode
18137 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18138 target = gen_reg_rtx (tmode);
18139 pat = GEN_FCN (icode) (target, op0, op1, op2);
18140 if (! pat)
18141 return NULL_RTX;
18142 emit_insn (pat);
18143 return target;
18144
18145 case IX86_BUILTIN_INSERTQI:
18146 icode = CODE_FOR_sse4a_insertqi;
18147 arg0 = CALL_EXPR_ARG (exp, 0);
18148 arg1 = CALL_EXPR_ARG (exp, 1);
18149 arg2 = CALL_EXPR_ARG (exp, 2);
18150 arg3 = CALL_EXPR_ARG (exp, 3);
18151 op0 = expand_normal (arg0);
18152 op1 = expand_normal (arg1);
18153 op2 = expand_normal (arg2);
18154 op3 = expand_normal (arg3);
18155 tmode = insn_data[icode].operand[0].mode;
18156 mode1 = insn_data[icode].operand[1].mode;
18157 mode2 = insn_data[icode].operand[2].mode;
18158 mode3 = insn_data[icode].operand[3].mode;
18159 mode4 = insn_data[icode].operand[4].mode;
18160
18161 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18162 op0 = copy_to_mode_reg (mode1, op0);
18163
18164 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18165 op1 = copy_to_mode_reg (mode2, op1);
18166
18167 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18168 {
18169 error ("index mask must be an immediate");
18170 return gen_reg_rtx (tmode);
18171 }
18172 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18173 {
18174 error ("length mask must be an immediate");
18175 return gen_reg_rtx (tmode);
18176 }
18177 if (optimize || target == 0
18178 || GET_MODE (target) != tmode
18179 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18180 target = gen_reg_rtx (tmode);
18181 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18182 if (! pat)
18183 return NULL_RTX;
18184 emit_insn (pat);
18185 return target;
18186
18187 case IX86_BUILTIN_VEC_INIT_V2SI:
18188 case IX86_BUILTIN_VEC_INIT_V4HI:
18189 case IX86_BUILTIN_VEC_INIT_V8QI:
18190 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18191
18192 case IX86_BUILTIN_VEC_EXT_V2DF:
18193 case IX86_BUILTIN_VEC_EXT_V2DI:
18194 case IX86_BUILTIN_VEC_EXT_V4SF:
18195 case IX86_BUILTIN_VEC_EXT_V4SI:
18196 case IX86_BUILTIN_VEC_EXT_V8HI:
18197 case IX86_BUILTIN_VEC_EXT_V2SI:
18198 case IX86_BUILTIN_VEC_EXT_V4HI:
18199 return ix86_expand_vec_ext_builtin (exp, target);
18200
18201 case IX86_BUILTIN_VEC_SET_V8HI:
18202 case IX86_BUILTIN_VEC_SET_V4HI:
18203 return ix86_expand_vec_set_builtin (exp);
18204
18205 default:
18206 break;
18207 }
18208
18209 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18210 if (d->code == fcode)
18211 {
18212 /* Compares are treated specially. */
18213 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18214 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18215 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18216 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18217 return ix86_expand_sse_compare (d, exp, target);
18218
18219 return ix86_expand_binop_builtin (d->icode, exp, target);
18220 }
18221
18222 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18223 if (d->code == fcode)
18224 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18225
18226 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18227 if (d->code == fcode)
18228 return ix86_expand_sse_comi (d, exp, target);
18229
18230 gcc_unreachable ();
18231 }
18232
18233 /* Returns a function decl for a vectorized version of the builtin function
18234 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18235 if it is not available. */
18236
18237 static tree
18238 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18239 tree type_in)
18240 {
18241 enum machine_mode in_mode, out_mode;
18242 int in_n, out_n;
18243
18244 if (TREE_CODE (type_out) != VECTOR_TYPE
18245 || TREE_CODE (type_in) != VECTOR_TYPE)
18246 return NULL_TREE;
18247
18248 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18249 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18250 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18251 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18252
18253 switch (fn)
18254 {
18255 case BUILT_IN_SQRT:
18256 if (out_mode == DFmode && out_n == 2
18257 && in_mode == DFmode && in_n == 2)
18258 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18259 return NULL_TREE;
18260
18261 case BUILT_IN_SQRTF:
18262 if (out_mode == SFmode && out_n == 4
18263 && in_mode == SFmode && in_n == 4)
18264 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18265 return NULL_TREE;
18266
18267 case BUILT_IN_LRINTF:
18268 if (out_mode == SImode && out_n == 4
18269 && in_mode == SFmode && in_n == 4)
18270 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18271 return NULL_TREE;
18272
18273 default:
18274 ;
18275 }
18276
18277 return NULL_TREE;
18278 }
18279
18280 /* Returns a decl of a function that implements conversion of the
18281 input vector of type TYPE, or NULL_TREE if it is not available. */
18282
18283 static tree
18284 ix86_builtin_conversion (enum tree_code code, tree type)
18285 {
18286 if (TREE_CODE (type) != VECTOR_TYPE)
18287 return NULL_TREE;
18288
18289 switch (code)
18290 {
18291 case FLOAT_EXPR:
18292 switch (TYPE_MODE (type))
18293 {
18294 case V4SImode:
18295 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18296 default:
18297 return NULL_TREE;
18298 }
18299
18300 case FIX_TRUNC_EXPR:
18301 switch (TYPE_MODE (type))
18302 {
18303 case V4SFmode:
18304 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18305 default:
18306 return NULL_TREE;
18307 }
18308 default:
18309 return NULL_TREE;
18310
18311 }
18312 }
18313
18314 /* Store OPERAND to the memory after reload is completed. This means
18315 that we can't easily use assign_stack_local. */
18316 rtx
18317 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18318 {
18319 rtx result;
18320
18321 gcc_assert (reload_completed);
18322 if (TARGET_RED_ZONE)
18323 {
18324 result = gen_rtx_MEM (mode,
18325 gen_rtx_PLUS (Pmode,
18326 stack_pointer_rtx,
18327 GEN_INT (-RED_ZONE_SIZE)));
18328 emit_move_insn (result, operand);
18329 }
18330 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18331 {
18332 switch (mode)
18333 {
18334 case HImode:
18335 case SImode:
18336 operand = gen_lowpart (DImode, operand);
18337 /* FALLTHRU */
18338 case DImode:
18339 emit_insn (
18340 gen_rtx_SET (VOIDmode,
18341 gen_rtx_MEM (DImode,
18342 gen_rtx_PRE_DEC (DImode,
18343 stack_pointer_rtx)),
18344 operand));
18345 break;
18346 default:
18347 gcc_unreachable ();
18348 }
18349 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18350 }
18351 else
18352 {
18353 switch (mode)
18354 {
18355 case DImode:
18356 {
18357 rtx operands[2];
18358 split_di (&operand, 1, operands, operands + 1);
18359 emit_insn (
18360 gen_rtx_SET (VOIDmode,
18361 gen_rtx_MEM (SImode,
18362 gen_rtx_PRE_DEC (Pmode,
18363 stack_pointer_rtx)),
18364 operands[1]));
18365 emit_insn (
18366 gen_rtx_SET (VOIDmode,
18367 gen_rtx_MEM (SImode,
18368 gen_rtx_PRE_DEC (Pmode,
18369 stack_pointer_rtx)),
18370 operands[0]));
18371 }
18372 break;
18373 case HImode:
18374 /* Store HImodes as SImodes. */
18375 operand = gen_lowpart (SImode, operand);
18376 /* FALLTHRU */
18377 case SImode:
18378 emit_insn (
18379 gen_rtx_SET (VOIDmode,
18380 gen_rtx_MEM (GET_MODE (operand),
18381 gen_rtx_PRE_DEC (SImode,
18382 stack_pointer_rtx)),
18383 operand));
18384 break;
18385 default:
18386 gcc_unreachable ();
18387 }
18388 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18389 }
18390 return result;
18391 }
18392
18393 /* Free operand from the memory. */
18394 void
18395 ix86_free_from_memory (enum machine_mode mode)
18396 {
18397 if (!TARGET_RED_ZONE)
18398 {
18399 int size;
18400
18401 if (mode == DImode || TARGET_64BIT)
18402 size = 8;
18403 else
18404 size = 4;
18405 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18406 to pop or add instruction if registers are available. */
18407 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18408 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18409 GEN_INT (size))));
18410 }
18411 }
18412
18413 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18414 QImode must go into class Q_REGS.
18415 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18416 movdf to do mem-to-mem moves through integer regs. */
18417 enum reg_class
18418 ix86_preferred_reload_class (rtx x, enum reg_class class)
18419 {
18420 enum machine_mode mode = GET_MODE (x);
18421
18422 /* We're only allowed to return a subclass of CLASS. Many of the
18423 following checks fail for NO_REGS, so eliminate that early. */
18424 if (class == NO_REGS)
18425 return NO_REGS;
18426
18427 /* All classes can load zeros. */
18428 if (x == CONST0_RTX (mode))
18429 return class;
18430
18431 /* Force constants into memory if we are loading a (nonzero) constant into
18432 an MMX or SSE register. This is because there are no MMX/SSE instructions
18433 to load from a constant. */
18434 if (CONSTANT_P (x)
18435 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18436 return NO_REGS;
18437
18438 /* Prefer SSE regs only, if we can use them for math. */
18439 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18440 return SSE_CLASS_P (class) ? class : NO_REGS;
18441
18442 /* Floating-point constants need more complex checks. */
18443 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18444 {
18445 /* General regs can load everything. */
18446 if (reg_class_subset_p (class, GENERAL_REGS))
18447 return class;
18448
18449 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18450 zero above. We only want to wind up preferring 80387 registers if
18451 we plan on doing computation with them. */
18452 if (TARGET_80387
18453 && standard_80387_constant_p (x))
18454 {
18455 /* Limit class to non-sse. */
18456 if (class == FLOAT_SSE_REGS)
18457 return FLOAT_REGS;
18458 if (class == FP_TOP_SSE_REGS)
18459 return FP_TOP_REG;
18460 if (class == FP_SECOND_SSE_REGS)
18461 return FP_SECOND_REG;
18462 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18463 return class;
18464 }
18465
18466 return NO_REGS;
18467 }
18468
18469 /* Generally when we see PLUS here, it's the function invariant
18470 (plus soft-fp const_int). Which can only be computed into general
18471 regs. */
18472 if (GET_CODE (x) == PLUS)
18473 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18474
18475 /* QImode constants are easy to load, but non-constant QImode data
18476 must go into Q_REGS. */
18477 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18478 {
18479 if (reg_class_subset_p (class, Q_REGS))
18480 return class;
18481 if (reg_class_subset_p (Q_REGS, class))
18482 return Q_REGS;
18483 return NO_REGS;
18484 }
18485
18486 return class;
18487 }
18488
18489 /* Discourage putting floating-point values in SSE registers unless
18490 SSE math is being used, and likewise for the 387 registers. */
18491 enum reg_class
18492 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18493 {
18494 enum machine_mode mode = GET_MODE (x);
18495
18496 /* Restrict the output reload class to the register bank that we are doing
18497 math on. If we would like not to return a subset of CLASS, reject this
18498 alternative: if reload cannot do this, it will still use its choice. */
18499 mode = GET_MODE (x);
18500 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18501 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18502
18503 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18504 {
18505 if (class == FP_TOP_SSE_REGS)
18506 return FP_TOP_REG;
18507 else if (class == FP_SECOND_SSE_REGS)
18508 return FP_SECOND_REG;
18509 else
18510 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18511 }
18512
18513 return class;
18514 }
18515
18516 /* If we are copying between general and FP registers, we need a memory
18517 location. The same is true for SSE and MMX registers.
18518
18519 The macro can't work reliably when one of the CLASSES is class containing
18520 registers from multiple units (SSE, MMX, integer). We avoid this by never
18521 combining those units in single alternative in the machine description.
18522 Ensure that this constraint holds to avoid unexpected surprises.
18523
18524 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18525 enforce these sanity checks. */
18526
18527 int
18528 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18529 enum machine_mode mode, int strict)
18530 {
18531 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18532 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18533 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18534 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18535 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18536 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18537 {
18538 gcc_assert (!strict);
18539 return true;
18540 }
18541
18542 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18543 return true;
18544
18545 /* ??? This is a lie. We do have moves between mmx/general, and for
18546 mmx/sse2. But by saying we need secondary memory we discourage the
18547 register allocator from using the mmx registers unless needed. */
18548 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18549 return true;
18550
18551 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18552 {
18553 /* SSE1 doesn't have any direct moves from other classes. */
18554 if (!TARGET_SSE2)
18555 return true;
18556
18557 /* If the target says that inter-unit moves are more expensive
18558 than moving through memory, then don't generate them. */
18559 if (!TARGET_INTER_UNIT_MOVES)
18560 return true;
18561
18562 /* Between SSE and general, we have moves no larger than word size. */
18563 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18564 return true;
18565 }
18566
18567 return false;
18568 }
18569
18570 /* Return true if the registers in CLASS cannot represent the change from
18571 modes FROM to TO. */
18572
18573 bool
18574 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18575 enum reg_class class)
18576 {
18577 if (from == to)
18578 return false;
18579
18580 /* x87 registers can't do subreg at all, as all values are reformatted
18581 to extended precision. */
18582 if (MAYBE_FLOAT_CLASS_P (class))
18583 return true;
18584
18585 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18586 {
18587 /* Vector registers do not support QI or HImode loads. If we don't
18588 disallow a change to these modes, reload will assume it's ok to
18589 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18590 the vec_dupv4hi pattern. */
18591 if (GET_MODE_SIZE (from) < 4)
18592 return true;
18593
18594 /* Vector registers do not support subreg with nonzero offsets, which
18595 are otherwise valid for integer registers. Since we can't see
18596 whether we have a nonzero offset from here, prohibit all
18597 nonparadoxical subregs changing size. */
18598 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18599 return true;
18600 }
18601
18602 return false;
18603 }
18604
18605 /* Return the cost of moving data from a register in class CLASS1 to
18606 one in class CLASS2.
18607
18608 It is not required that the cost always equal 2 when FROM is the same as TO;
18609 on some machines it is expensive to move between registers if they are not
18610 general registers. */
18611
18612 int
18613 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18614 enum reg_class class2)
18615 {
18616 /* In case we require secondary memory, compute cost of the store followed
18617 by load. In order to avoid bad register allocation choices, we need
18618 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18619
18620 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18621 {
18622 int cost = 1;
18623
18624 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18625 MEMORY_MOVE_COST (mode, class1, 1));
18626 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18627 MEMORY_MOVE_COST (mode, class2, 1));
18628
18629 /* In case of copying from general_purpose_register we may emit multiple
18630 stores followed by single load causing memory size mismatch stall.
18631 Count this as arbitrarily high cost of 20. */
18632 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18633 cost += 20;
18634
18635 /* In the case of FP/MMX moves, the registers actually overlap, and we
18636 have to switch modes in order to treat them differently. */
18637 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18638 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18639 cost += 20;
18640
18641 return cost;
18642 }
18643
18644 /* Moves between SSE/MMX and integer unit are expensive. */
18645 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18646 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18647 return ix86_cost->mmxsse_to_integer;
18648 if (MAYBE_FLOAT_CLASS_P (class1))
18649 return ix86_cost->fp_move;
18650 if (MAYBE_SSE_CLASS_P (class1))
18651 return ix86_cost->sse_move;
18652 if (MAYBE_MMX_CLASS_P (class1))
18653 return ix86_cost->mmx_move;
18654 return 2;
18655 }
18656
18657 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18658
18659 bool
18660 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18661 {
18662 /* Flags and only flags can only hold CCmode values. */
18663 if (CC_REGNO_P (regno))
18664 return GET_MODE_CLASS (mode) == MODE_CC;
18665 if (GET_MODE_CLASS (mode) == MODE_CC
18666 || GET_MODE_CLASS (mode) == MODE_RANDOM
18667 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18668 return 0;
18669 if (FP_REGNO_P (regno))
18670 return VALID_FP_MODE_P (mode);
18671 if (SSE_REGNO_P (regno))
18672 {
18673 /* We implement the move patterns for all vector modes into and
18674 out of SSE registers, even when no operation instructions
18675 are available. */
18676 return (VALID_SSE_REG_MODE (mode)
18677 || VALID_SSE2_REG_MODE (mode)
18678 || VALID_MMX_REG_MODE (mode)
18679 || VALID_MMX_REG_MODE_3DNOW (mode));
18680 }
18681 if (MMX_REGNO_P (regno))
18682 {
18683 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18684 so if the register is available at all, then we can move data of
18685 the given mode into or out of it. */
18686 return (VALID_MMX_REG_MODE (mode)
18687 || VALID_MMX_REG_MODE_3DNOW (mode));
18688 }
18689
18690 if (mode == QImode)
18691 {
18692 /* Take care for QImode values - they can be in non-QI regs,
18693 but then they do cause partial register stalls. */
18694 if (regno < 4 || TARGET_64BIT)
18695 return 1;
18696 if (!TARGET_PARTIAL_REG_STALL)
18697 return 1;
18698 return reload_in_progress || reload_completed;
18699 }
18700 /* We handle both integer and floats in the general purpose registers. */
18701 else if (VALID_INT_MODE_P (mode))
18702 return 1;
18703 else if (VALID_FP_MODE_P (mode))
18704 return 1;
18705 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18706 on to use that value in smaller contexts, this can easily force a
18707 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18708 supporting DImode, allow it. */
18709 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18710 return 1;
18711
18712 return 0;
18713 }
18714
18715 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18716 tieable integer mode. */
18717
18718 static bool
18719 ix86_tieable_integer_mode_p (enum machine_mode mode)
18720 {
18721 switch (mode)
18722 {
18723 case HImode:
18724 case SImode:
18725 return true;
18726
18727 case QImode:
18728 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18729
18730 case DImode:
18731 return TARGET_64BIT;
18732
18733 default:
18734 return false;
18735 }
18736 }
18737
18738 /* Return true if MODE1 is accessible in a register that can hold MODE2
18739 without copying. That is, all register classes that can hold MODE2
18740 can also hold MODE1. */
18741
18742 bool
18743 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18744 {
18745 if (mode1 == mode2)
18746 return true;
18747
18748 if (ix86_tieable_integer_mode_p (mode1)
18749 && ix86_tieable_integer_mode_p (mode2))
18750 return true;
18751
18752 /* MODE2 being XFmode implies fp stack or general regs, which means we
18753 can tie any smaller floating point modes to it. Note that we do not
18754 tie this with TFmode. */
18755 if (mode2 == XFmode)
18756 return mode1 == SFmode || mode1 == DFmode;
18757
18758 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18759 that we can tie it with SFmode. */
18760 if (mode2 == DFmode)
18761 return mode1 == SFmode;
18762
18763 /* If MODE2 is only appropriate for an SSE register, then tie with
18764 any other mode acceptable to SSE registers. */
18765 if (GET_MODE_SIZE (mode2) >= 8
18766 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18767 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
18768
18769 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18770 with any other mode acceptable to MMX registers. */
18771 if (GET_MODE_SIZE (mode2) == 8
18772 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18773 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
18774
18775 return false;
18776 }
18777
18778 /* Return the cost of moving data of mode M between a
18779 register and memory. A value of 2 is the default; this cost is
18780 relative to those in `REGISTER_MOVE_COST'.
18781
18782 If moving between registers and memory is more expensive than
18783 between two registers, you should define this macro to express the
18784 relative cost.
18785
18786 Model also increased moving costs of QImode registers in non
18787 Q_REGS classes.
18788 */
18789 int
18790 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18791 {
18792 if (FLOAT_CLASS_P (class))
18793 {
18794 int index;
18795 switch (mode)
18796 {
18797 case SFmode:
18798 index = 0;
18799 break;
18800 case DFmode:
18801 index = 1;
18802 break;
18803 case XFmode:
18804 index = 2;
18805 break;
18806 default:
18807 return 100;
18808 }
18809 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18810 }
18811 if (SSE_CLASS_P (class))
18812 {
18813 int index;
18814 switch (GET_MODE_SIZE (mode))
18815 {
18816 case 4:
18817 index = 0;
18818 break;
18819 case 8:
18820 index = 1;
18821 break;
18822 case 16:
18823 index = 2;
18824 break;
18825 default:
18826 return 100;
18827 }
18828 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18829 }
18830 if (MMX_CLASS_P (class))
18831 {
18832 int index;
18833 switch (GET_MODE_SIZE (mode))
18834 {
18835 case 4:
18836 index = 0;
18837 break;
18838 case 8:
18839 index = 1;
18840 break;
18841 default:
18842 return 100;
18843 }
18844 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18845 }
18846 switch (GET_MODE_SIZE (mode))
18847 {
18848 case 1:
18849 if (in)
18850 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18851 : ix86_cost->movzbl_load);
18852 else
18853 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18854 : ix86_cost->int_store[0] + 4);
18855 break;
18856 case 2:
18857 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18858 default:
18859 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18860 if (mode == TFmode)
18861 mode = XFmode;
18862 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18863 * (((int) GET_MODE_SIZE (mode)
18864 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18865 }
18866 }
18867
18868 /* Compute a (partial) cost for rtx X. Return true if the complete
18869 cost has been computed, and false if subexpressions should be
18870 scanned. In either case, *TOTAL contains the cost result. */
18871
18872 static bool
18873 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18874 {
18875 enum machine_mode mode = GET_MODE (x);
18876
18877 switch (code)
18878 {
18879 case CONST_INT:
18880 case CONST:
18881 case LABEL_REF:
18882 case SYMBOL_REF:
18883 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18884 *total = 3;
18885 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18886 *total = 2;
18887 else if (flag_pic && SYMBOLIC_CONST (x)
18888 && (!TARGET_64BIT
18889 || (!GET_CODE (x) != LABEL_REF
18890 && (GET_CODE (x) != SYMBOL_REF
18891 || !SYMBOL_REF_LOCAL_P (x)))))
18892 *total = 1;
18893 else
18894 *total = 0;
18895 return true;
18896
18897 case CONST_DOUBLE:
18898 if (mode == VOIDmode)
18899 *total = 0;
18900 else
18901 switch (standard_80387_constant_p (x))
18902 {
18903 case 1: /* 0.0 */
18904 *total = 1;
18905 break;
18906 default: /* Other constants */
18907 *total = 2;
18908 break;
18909 case 0:
18910 case -1:
18911 /* Start with (MEM (SYMBOL_REF)), since that's where
18912 it'll probably end up. Add a penalty for size. */
18913 *total = (COSTS_N_INSNS (1)
18914 + (flag_pic != 0 && !TARGET_64BIT)
18915 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18916 break;
18917 }
18918 return true;
18919
18920 case ZERO_EXTEND:
18921 /* The zero extensions is often completely free on x86_64, so make
18922 it as cheap as possible. */
18923 if (TARGET_64BIT && mode == DImode
18924 && GET_MODE (XEXP (x, 0)) == SImode)
18925 *total = 1;
18926 else if (TARGET_ZERO_EXTEND_WITH_AND)
18927 *total = ix86_cost->add;
18928 else
18929 *total = ix86_cost->movzx;
18930 return false;
18931
18932 case SIGN_EXTEND:
18933 *total = ix86_cost->movsx;
18934 return false;
18935
18936 case ASHIFT:
18937 if (CONST_INT_P (XEXP (x, 1))
18938 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18939 {
18940 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18941 if (value == 1)
18942 {
18943 *total = ix86_cost->add;
18944 return false;
18945 }
18946 if ((value == 2 || value == 3)
18947 && ix86_cost->lea <= ix86_cost->shift_const)
18948 {
18949 *total = ix86_cost->lea;
18950 return false;
18951 }
18952 }
18953 /* FALLTHRU */
18954
18955 case ROTATE:
18956 case ASHIFTRT:
18957 case LSHIFTRT:
18958 case ROTATERT:
18959 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18960 {
18961 if (CONST_INT_P (XEXP (x, 1)))
18962 {
18963 if (INTVAL (XEXP (x, 1)) > 32)
18964 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18965 else
18966 *total = ix86_cost->shift_const * 2;
18967 }
18968 else
18969 {
18970 if (GET_CODE (XEXP (x, 1)) == AND)
18971 *total = ix86_cost->shift_var * 2;
18972 else
18973 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
18974 }
18975 }
18976 else
18977 {
18978 if (CONST_INT_P (XEXP (x, 1)))
18979 *total = ix86_cost->shift_const;
18980 else
18981 *total = ix86_cost->shift_var;
18982 }
18983 return false;
18984
18985 case MULT:
18986 if (FLOAT_MODE_P (mode))
18987 {
18988 *total = ix86_cost->fmul;
18989 return false;
18990 }
18991 else
18992 {
18993 rtx op0 = XEXP (x, 0);
18994 rtx op1 = XEXP (x, 1);
18995 int nbits;
18996 if (CONST_INT_P (XEXP (x, 1)))
18997 {
18998 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18999 for (nbits = 0; value != 0; value &= value - 1)
19000 nbits++;
19001 }
19002 else
19003 /* This is arbitrary. */
19004 nbits = 7;
19005
19006 /* Compute costs correctly for widening multiplication. */
19007 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19008 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19009 == GET_MODE_SIZE (mode))
19010 {
19011 int is_mulwiden = 0;
19012 enum machine_mode inner_mode = GET_MODE (op0);
19013
19014 if (GET_CODE (op0) == GET_CODE (op1))
19015 is_mulwiden = 1, op1 = XEXP (op1, 0);
19016 else if (CONST_INT_P (op1))
19017 {
19018 if (GET_CODE (op0) == SIGN_EXTEND)
19019 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19020 == INTVAL (op1);
19021 else
19022 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19023 }
19024
19025 if (is_mulwiden)
19026 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19027 }
19028
19029 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19030 + nbits * ix86_cost->mult_bit
19031 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19032
19033 return true;
19034 }
19035
19036 case DIV:
19037 case UDIV:
19038 case MOD:
19039 case UMOD:
19040 if (FLOAT_MODE_P (mode))
19041 *total = ix86_cost->fdiv;
19042 else
19043 *total = ix86_cost->divide[MODE_INDEX (mode)];
19044 return false;
19045
19046 case PLUS:
19047 if (FLOAT_MODE_P (mode))
19048 *total = ix86_cost->fadd;
19049 else if (GET_MODE_CLASS (mode) == MODE_INT
19050 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19051 {
19052 if (GET_CODE (XEXP (x, 0)) == PLUS
19053 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19054 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19055 && CONSTANT_P (XEXP (x, 1)))
19056 {
19057 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19058 if (val == 2 || val == 4 || val == 8)
19059 {
19060 *total = ix86_cost->lea;
19061 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19062 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19063 outer_code);
19064 *total += rtx_cost (XEXP (x, 1), outer_code);
19065 return true;
19066 }
19067 }
19068 else if (GET_CODE (XEXP (x, 0)) == MULT
19069 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19070 {
19071 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19072 if (val == 2 || val == 4 || val == 8)
19073 {
19074 *total = ix86_cost->lea;
19075 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19076 *total += rtx_cost (XEXP (x, 1), outer_code);
19077 return true;
19078 }
19079 }
19080 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19081 {
19082 *total = ix86_cost->lea;
19083 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19084 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19085 *total += rtx_cost (XEXP (x, 1), outer_code);
19086 return true;
19087 }
19088 }
19089 /* FALLTHRU */
19090
19091 case MINUS:
19092 if (FLOAT_MODE_P (mode))
19093 {
19094 *total = ix86_cost->fadd;
19095 return false;
19096 }
19097 /* FALLTHRU */
19098
19099 case AND:
19100 case IOR:
19101 case XOR:
19102 if (!TARGET_64BIT && mode == DImode)
19103 {
19104 *total = (ix86_cost->add * 2
19105 + (rtx_cost (XEXP (x, 0), outer_code)
19106 << (GET_MODE (XEXP (x, 0)) != DImode))
19107 + (rtx_cost (XEXP (x, 1), outer_code)
19108 << (GET_MODE (XEXP (x, 1)) != DImode)));
19109 return true;
19110 }
19111 /* FALLTHRU */
19112
19113 case NEG:
19114 if (FLOAT_MODE_P (mode))
19115 {
19116 *total = ix86_cost->fchs;
19117 return false;
19118 }
19119 /* FALLTHRU */
19120
19121 case NOT:
19122 if (!TARGET_64BIT && mode == DImode)
19123 *total = ix86_cost->add * 2;
19124 else
19125 *total = ix86_cost->add;
19126 return false;
19127
19128 case COMPARE:
19129 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19130 && XEXP (XEXP (x, 0), 1) == const1_rtx
19131 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19132 && XEXP (x, 1) == const0_rtx)
19133 {
19134 /* This kind of construct is implemented using test[bwl].
19135 Treat it as if we had an AND. */
19136 *total = (ix86_cost->add
19137 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19138 + rtx_cost (const1_rtx, outer_code));
19139 return true;
19140 }
19141 return false;
19142
19143 case FLOAT_EXTEND:
19144 if (!TARGET_SSE_MATH
19145 || mode == XFmode
19146 || (mode == DFmode && !TARGET_SSE2))
19147 *total = 0;
19148 return false;
19149
19150 case ABS:
19151 if (FLOAT_MODE_P (mode))
19152 *total = ix86_cost->fabs;
19153 return false;
19154
19155 case SQRT:
19156 if (FLOAT_MODE_P (mode))
19157 *total = ix86_cost->fsqrt;
19158 return false;
19159
19160 case UNSPEC:
19161 if (XINT (x, 1) == UNSPEC_TP)
19162 *total = 0;
19163 return false;
19164
19165 default:
19166 return false;
19167 }
19168 }
19169
19170 #if TARGET_MACHO
19171
19172 static int current_machopic_label_num;
19173
19174 /* Given a symbol name and its associated stub, write out the
19175 definition of the stub. */
19176
19177 void
19178 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19179 {
19180 unsigned int length;
19181 char *binder_name, *symbol_name, lazy_ptr_name[32];
19182 int label = ++current_machopic_label_num;
19183
19184 /* For 64-bit we shouldn't get here. */
19185 gcc_assert (!TARGET_64BIT);
19186
19187 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19188 symb = (*targetm.strip_name_encoding) (symb);
19189
19190 length = strlen (stub);
19191 binder_name = alloca (length + 32);
19192 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19193
19194 length = strlen (symb);
19195 symbol_name = alloca (length + 32);
19196 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19197
19198 sprintf (lazy_ptr_name, "L%d$lz", label);
19199
19200 if (MACHOPIC_PURE)
19201 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19202 else
19203 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19204
19205 fprintf (file, "%s:\n", stub);
19206 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19207
19208 if (MACHOPIC_PURE)
19209 {
19210 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19211 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19212 fprintf (file, "\tjmp\t*%%edx\n");
19213 }
19214 else
19215 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19216
19217 fprintf (file, "%s:\n", binder_name);
19218
19219 if (MACHOPIC_PURE)
19220 {
19221 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19222 fprintf (file, "\tpushl\t%%eax\n");
19223 }
19224 else
19225 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19226
19227 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19228
19229 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19230 fprintf (file, "%s:\n", lazy_ptr_name);
19231 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19232 fprintf (file, "\t.long %s\n", binder_name);
19233 }
19234
19235 void
19236 darwin_x86_file_end (void)
19237 {
19238 darwin_file_end ();
19239 ix86_file_end ();
19240 }
19241 #endif /* TARGET_MACHO */
19242
19243 /* Order the registers for register allocator. */
19244
19245 void
19246 x86_order_regs_for_local_alloc (void)
19247 {
19248 int pos = 0;
19249 int i;
19250
19251 /* First allocate the local general purpose registers. */
19252 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19253 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19254 reg_alloc_order [pos++] = i;
19255
19256 /* Global general purpose registers. */
19257 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19258 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19259 reg_alloc_order [pos++] = i;
19260
19261 /* x87 registers come first in case we are doing FP math
19262 using them. */
19263 if (!TARGET_SSE_MATH)
19264 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19265 reg_alloc_order [pos++] = i;
19266
19267 /* SSE registers. */
19268 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19269 reg_alloc_order [pos++] = i;
19270 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19271 reg_alloc_order [pos++] = i;
19272
19273 /* x87 registers. */
19274 if (TARGET_SSE_MATH)
19275 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19276 reg_alloc_order [pos++] = i;
19277
19278 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19279 reg_alloc_order [pos++] = i;
19280
19281 /* Initialize the rest of array as we do not allocate some registers
19282 at all. */
19283 while (pos < FIRST_PSEUDO_REGISTER)
19284 reg_alloc_order [pos++] = 0;
19285 }
19286
19287 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19288 struct attribute_spec.handler. */
19289 static tree
19290 ix86_handle_struct_attribute (tree *node, tree name,
19291 tree args ATTRIBUTE_UNUSED,
19292 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19293 {
19294 tree *type = NULL;
19295 if (DECL_P (*node))
19296 {
19297 if (TREE_CODE (*node) == TYPE_DECL)
19298 type = &TREE_TYPE (*node);
19299 }
19300 else
19301 type = node;
19302
19303 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19304 || TREE_CODE (*type) == UNION_TYPE)))
19305 {
19306 warning (OPT_Wattributes, "%qs attribute ignored",
19307 IDENTIFIER_POINTER (name));
19308 *no_add_attrs = true;
19309 }
19310
19311 else if ((is_attribute_p ("ms_struct", name)
19312 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19313 || ((is_attribute_p ("gcc_struct", name)
19314 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19315 {
19316 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19317 IDENTIFIER_POINTER (name));
19318 *no_add_attrs = true;
19319 }
19320
19321 return NULL_TREE;
19322 }
19323
19324 static bool
19325 ix86_ms_bitfield_layout_p (tree record_type)
19326 {
19327 return (TARGET_MS_BITFIELD_LAYOUT &&
19328 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19329 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19330 }
19331
19332 /* Returns an expression indicating where the this parameter is
19333 located on entry to the FUNCTION. */
19334
19335 static rtx
19336 x86_this_parameter (tree function)
19337 {
19338 tree type = TREE_TYPE (function);
19339
19340 if (TARGET_64BIT)
19341 {
19342 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19343 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19344 }
19345
19346 if (ix86_function_regparm (type, function) > 0)
19347 {
19348 tree parm;
19349
19350 parm = TYPE_ARG_TYPES (type);
19351 /* Figure out whether or not the function has a variable number of
19352 arguments. */
19353 for (; parm; parm = TREE_CHAIN (parm))
19354 if (TREE_VALUE (parm) == void_type_node)
19355 break;
19356 /* If not, the this parameter is in the first argument. */
19357 if (parm)
19358 {
19359 int regno = 0;
19360 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19361 regno = 2;
19362 return gen_rtx_REG (SImode, regno);
19363 }
19364 }
19365
19366 if (aggregate_value_p (TREE_TYPE (type), type))
19367 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19368 else
19369 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19370 }
19371
19372 /* Determine whether x86_output_mi_thunk can succeed. */
19373
19374 static bool
19375 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19376 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19377 HOST_WIDE_INT vcall_offset, tree function)
19378 {
19379 /* 64-bit can handle anything. */
19380 if (TARGET_64BIT)
19381 return true;
19382
19383 /* For 32-bit, everything's fine if we have one free register. */
19384 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19385 return true;
19386
19387 /* Need a free register for vcall_offset. */
19388 if (vcall_offset)
19389 return false;
19390
19391 /* Need a free register for GOT references. */
19392 if (flag_pic && !(*targetm.binds_local_p) (function))
19393 return false;
19394
19395 /* Otherwise ok. */
19396 return true;
19397 }
19398
19399 /* Output the assembler code for a thunk function. THUNK_DECL is the
19400 declaration for the thunk function itself, FUNCTION is the decl for
19401 the target function. DELTA is an immediate constant offset to be
19402 added to THIS. If VCALL_OFFSET is nonzero, the word at
19403 *(*this + vcall_offset) should be added to THIS. */
19404
19405 static void
19406 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19407 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19408 HOST_WIDE_INT vcall_offset, tree function)
19409 {
19410 rtx xops[3];
19411 rtx this = x86_this_parameter (function);
19412 rtx this_reg, tmp;
19413
19414 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19415 pull it in now and let DELTA benefit. */
19416 if (REG_P (this))
19417 this_reg = this;
19418 else if (vcall_offset)
19419 {
19420 /* Put the this parameter into %eax. */
19421 xops[0] = this;
19422 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19423 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19424 }
19425 else
19426 this_reg = NULL_RTX;
19427
19428 /* Adjust the this parameter by a fixed constant. */
19429 if (delta)
19430 {
19431 xops[0] = GEN_INT (delta);
19432 xops[1] = this_reg ? this_reg : this;
19433 if (TARGET_64BIT)
19434 {
19435 if (!x86_64_general_operand (xops[0], DImode))
19436 {
19437 tmp = gen_rtx_REG (DImode, R10_REG);
19438 xops[1] = tmp;
19439 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19440 xops[0] = tmp;
19441 xops[1] = this;
19442 }
19443 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19444 }
19445 else
19446 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19447 }
19448
19449 /* Adjust the this parameter by a value stored in the vtable. */
19450 if (vcall_offset)
19451 {
19452 if (TARGET_64BIT)
19453 tmp = gen_rtx_REG (DImode, R10_REG);
19454 else
19455 {
19456 int tmp_regno = 2 /* ECX */;
19457 if (lookup_attribute ("fastcall",
19458 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19459 tmp_regno = 0 /* EAX */;
19460 tmp = gen_rtx_REG (SImode, tmp_regno);
19461 }
19462
19463 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19464 xops[1] = tmp;
19465 if (TARGET_64BIT)
19466 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19467 else
19468 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19469
19470 /* Adjust the this parameter. */
19471 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19472 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19473 {
19474 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19475 xops[0] = GEN_INT (vcall_offset);
19476 xops[1] = tmp2;
19477 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19478 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19479 }
19480 xops[1] = this_reg;
19481 if (TARGET_64BIT)
19482 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19483 else
19484 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19485 }
19486
19487 /* If necessary, drop THIS back to its stack slot. */
19488 if (this_reg && this_reg != this)
19489 {
19490 xops[0] = this_reg;
19491 xops[1] = this;
19492 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19493 }
19494
19495 xops[0] = XEXP (DECL_RTL (function), 0);
19496 if (TARGET_64BIT)
19497 {
19498 if (!flag_pic || (*targetm.binds_local_p) (function))
19499 output_asm_insn ("jmp\t%P0", xops);
19500 else
19501 {
19502 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19503 tmp = gen_rtx_CONST (Pmode, tmp);
19504 tmp = gen_rtx_MEM (QImode, tmp);
19505 xops[0] = tmp;
19506 output_asm_insn ("jmp\t%A0", xops);
19507 }
19508 }
19509 else
19510 {
19511 if (!flag_pic || (*targetm.binds_local_p) (function))
19512 output_asm_insn ("jmp\t%P0", xops);
19513 else
19514 #if TARGET_MACHO
19515 if (TARGET_MACHO)
19516 {
19517 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19518 tmp = (gen_rtx_SYMBOL_REF
19519 (Pmode,
19520 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19521 tmp = gen_rtx_MEM (QImode, tmp);
19522 xops[0] = tmp;
19523 output_asm_insn ("jmp\t%0", xops);
19524 }
19525 else
19526 #endif /* TARGET_MACHO */
19527 {
19528 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19529 output_set_got (tmp, NULL_RTX);
19530
19531 xops[1] = tmp;
19532 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19533 output_asm_insn ("jmp\t{*}%1", xops);
19534 }
19535 }
19536 }
19537
19538 static void
19539 x86_file_start (void)
19540 {
19541 default_file_start ();
19542 #if TARGET_MACHO
19543 darwin_file_start ();
19544 #endif
19545 if (X86_FILE_START_VERSION_DIRECTIVE)
19546 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19547 if (X86_FILE_START_FLTUSED)
19548 fputs ("\t.global\t__fltused\n", asm_out_file);
19549 if (ix86_asm_dialect == ASM_INTEL)
19550 fputs ("\t.intel_syntax\n", asm_out_file);
19551 }
19552
19553 int
19554 x86_field_alignment (tree field, int computed)
19555 {
19556 enum machine_mode mode;
19557 tree type = TREE_TYPE (field);
19558
19559 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19560 return computed;
19561 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19562 ? get_inner_array_type (type) : type);
19563 if (mode == DFmode || mode == DCmode
19564 || GET_MODE_CLASS (mode) == MODE_INT
19565 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19566 return MIN (32, computed);
19567 return computed;
19568 }
19569
19570 /* Output assembler code to FILE to increment profiler label # LABELNO
19571 for profiling a function entry. */
19572 void
19573 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19574 {
19575 if (TARGET_64BIT)
19576 if (flag_pic)
19577 {
19578 #ifndef NO_PROFILE_COUNTERS
19579 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19580 #endif
19581 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19582 }
19583 else
19584 {
19585 #ifndef NO_PROFILE_COUNTERS
19586 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19587 #endif
19588 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19589 }
19590 else if (flag_pic)
19591 {
19592 #ifndef NO_PROFILE_COUNTERS
19593 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19594 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19595 #endif
19596 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19597 }
19598 else
19599 {
19600 #ifndef NO_PROFILE_COUNTERS
19601 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19602 PROFILE_COUNT_REGISTER);
19603 #endif
19604 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19605 }
19606 }
19607
19608 /* We don't have exact information about the insn sizes, but we may assume
19609 quite safely that we are informed about all 1 byte insns and memory
19610 address sizes. This is enough to eliminate unnecessary padding in
19611 99% of cases. */
19612
19613 static int
19614 min_insn_size (rtx insn)
19615 {
19616 int l = 0;
19617
19618 if (!INSN_P (insn) || !active_insn_p (insn))
19619 return 0;
19620
19621 /* Discard alignments we've emit and jump instructions. */
19622 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19623 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19624 return 0;
19625 if (JUMP_P (insn)
19626 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19627 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19628 return 0;
19629
19630 /* Important case - calls are always 5 bytes.
19631 It is common to have many calls in the row. */
19632 if (CALL_P (insn)
19633 && symbolic_reference_mentioned_p (PATTERN (insn))
19634 && !SIBLING_CALL_P (insn))
19635 return 5;
19636 if (get_attr_length (insn) <= 1)
19637 return 1;
19638
19639 /* For normal instructions we may rely on the sizes of addresses
19640 and the presence of symbol to require 4 bytes of encoding.
19641 This is not the case for jumps where references are PC relative. */
19642 if (!JUMP_P (insn))
19643 {
19644 l = get_attr_length_address (insn);
19645 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19646 l = 4;
19647 }
19648 if (l)
19649 return 1+l;
19650 else
19651 return 2;
19652 }
19653
19654 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19655 window. */
19656
19657 static void
19658 ix86_avoid_jump_misspredicts (void)
19659 {
19660 rtx insn, start = get_insns ();
19661 int nbytes = 0, njumps = 0;
19662 int isjump = 0;
19663
19664 /* Look for all minimal intervals of instructions containing 4 jumps.
19665 The intervals are bounded by START and INSN. NBYTES is the total
19666 size of instructions in the interval including INSN and not including
19667 START. When the NBYTES is smaller than 16 bytes, it is possible
19668 that the end of START and INSN ends up in the same 16byte page.
19669
19670 The smallest offset in the page INSN can start is the case where START
19671 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19672 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19673 */
19674 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19675 {
19676
19677 nbytes += min_insn_size (insn);
19678 if (dump_file)
19679 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19680 INSN_UID (insn), min_insn_size (insn));
19681 if ((JUMP_P (insn)
19682 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19683 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19684 || CALL_P (insn))
19685 njumps++;
19686 else
19687 continue;
19688
19689 while (njumps > 3)
19690 {
19691 start = NEXT_INSN (start);
19692 if ((JUMP_P (start)
19693 && GET_CODE (PATTERN (start)) != ADDR_VEC
19694 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19695 || CALL_P (start))
19696 njumps--, isjump = 1;
19697 else
19698 isjump = 0;
19699 nbytes -= min_insn_size (start);
19700 }
19701 gcc_assert (njumps >= 0);
19702 if (dump_file)
19703 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19704 INSN_UID (start), INSN_UID (insn), nbytes);
19705
19706 if (njumps == 3 && isjump && nbytes < 16)
19707 {
19708 int padsize = 15 - nbytes + min_insn_size (insn);
19709
19710 if (dump_file)
19711 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19712 INSN_UID (insn), padsize);
19713 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19714 }
19715 }
19716 }
19717
19718 /* AMD Athlon works faster
19719 when RET is not destination of conditional jump or directly preceded
19720 by other jump instruction. We avoid the penalty by inserting NOP just
19721 before the RET instructions in such cases. */
19722 static void
19723 ix86_pad_returns (void)
19724 {
19725 edge e;
19726 edge_iterator ei;
19727
19728 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19729 {
19730 basic_block bb = e->src;
19731 rtx ret = BB_END (bb);
19732 rtx prev;
19733 bool replace = false;
19734
19735 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19736 || !maybe_hot_bb_p (bb))
19737 continue;
19738 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19739 if (active_insn_p (prev) || LABEL_P (prev))
19740 break;
19741 if (prev && LABEL_P (prev))
19742 {
19743 edge e;
19744 edge_iterator ei;
19745
19746 FOR_EACH_EDGE (e, ei, bb->preds)
19747 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19748 && !(e->flags & EDGE_FALLTHRU))
19749 replace = true;
19750 }
19751 if (!replace)
19752 {
19753 prev = prev_active_insn (ret);
19754 if (prev
19755 && ((JUMP_P (prev) && any_condjump_p (prev))
19756 || CALL_P (prev)))
19757 replace = true;
19758 /* Empty functions get branch mispredict even when the jump destination
19759 is not visible to us. */
19760 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19761 replace = true;
19762 }
19763 if (replace)
19764 {
19765 emit_insn_before (gen_return_internal_long (), ret);
19766 delete_insn (ret);
19767 }
19768 }
19769 }
19770
19771 /* Implement machine specific optimizations. We implement padding of returns
19772 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19773 static void
19774 ix86_reorg (void)
19775 {
19776 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19777 ix86_pad_returns ();
19778 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19779 ix86_avoid_jump_misspredicts ();
19780 }
19781
19782 /* Return nonzero when QImode register that must be represented via REX prefix
19783 is used. */
19784 bool
19785 x86_extended_QIreg_mentioned_p (rtx insn)
19786 {
19787 int i;
19788 extract_insn_cached (insn);
19789 for (i = 0; i < recog_data.n_operands; i++)
19790 if (REG_P (recog_data.operand[i])
19791 && REGNO (recog_data.operand[i]) >= 4)
19792 return true;
19793 return false;
19794 }
19795
19796 /* Return nonzero when P points to register encoded via REX prefix.
19797 Called via for_each_rtx. */
19798 static int
19799 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19800 {
19801 unsigned int regno;
19802 if (!REG_P (*p))
19803 return 0;
19804 regno = REGNO (*p);
19805 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19806 }
19807
19808 /* Return true when INSN mentions register that must be encoded using REX
19809 prefix. */
19810 bool
19811 x86_extended_reg_mentioned_p (rtx insn)
19812 {
19813 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19814 }
19815
19816 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19817 optabs would emit if we didn't have TFmode patterns. */
19818
19819 void
19820 x86_emit_floatuns (rtx operands[2])
19821 {
19822 rtx neglab, donelab, i0, i1, f0, in, out;
19823 enum machine_mode mode, inmode;
19824
19825 inmode = GET_MODE (operands[1]);
19826 gcc_assert (inmode == SImode || inmode == DImode);
19827
19828 out = operands[0];
19829 in = force_reg (inmode, operands[1]);
19830 mode = GET_MODE (out);
19831 neglab = gen_label_rtx ();
19832 donelab = gen_label_rtx ();
19833 f0 = gen_reg_rtx (mode);
19834
19835 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
19836
19837 expand_float (out, in, 0);
19838
19839 emit_jump_insn (gen_jump (donelab));
19840 emit_barrier ();
19841
19842 emit_label (neglab);
19843
19844 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
19845 1, OPTAB_DIRECT);
19846 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
19847 1, OPTAB_DIRECT);
19848 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19849
19850 expand_float (f0, i0, 0);
19851
19852 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19853
19854 emit_label (donelab);
19855 }
19856 \f
19857 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19858 with all elements equal to VAR. Return true if successful. */
19859
19860 static bool
19861 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19862 rtx target, rtx val)
19863 {
19864 enum machine_mode smode, wsmode, wvmode;
19865 rtx x;
19866
19867 switch (mode)
19868 {
19869 case V2SImode:
19870 case V2SFmode:
19871 if (!mmx_ok)
19872 return false;
19873 /* FALLTHRU */
19874
19875 case V2DFmode:
19876 case V2DImode:
19877 case V4SFmode:
19878 case V4SImode:
19879 val = force_reg (GET_MODE_INNER (mode), val);
19880 x = gen_rtx_VEC_DUPLICATE (mode, val);
19881 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19882 return true;
19883
19884 case V4HImode:
19885 if (!mmx_ok)
19886 return false;
19887 if (TARGET_SSE || TARGET_3DNOW_A)
19888 {
19889 val = gen_lowpart (SImode, val);
19890 x = gen_rtx_TRUNCATE (HImode, val);
19891 x = gen_rtx_VEC_DUPLICATE (mode, x);
19892 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19893 return true;
19894 }
19895 else
19896 {
19897 smode = HImode;
19898 wsmode = SImode;
19899 wvmode = V2SImode;
19900 goto widen;
19901 }
19902
19903 case V8QImode:
19904 if (!mmx_ok)
19905 return false;
19906 smode = QImode;
19907 wsmode = HImode;
19908 wvmode = V4HImode;
19909 goto widen;
19910 case V8HImode:
19911 if (TARGET_SSE2)
19912 {
19913 rtx tmp1, tmp2;
19914 /* Extend HImode to SImode using a paradoxical SUBREG. */
19915 tmp1 = gen_reg_rtx (SImode);
19916 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19917 /* Insert the SImode value as low element of V4SImode vector. */
19918 tmp2 = gen_reg_rtx (V4SImode);
19919 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19920 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19921 CONST0_RTX (V4SImode),
19922 const1_rtx);
19923 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19924 /* Cast the V4SImode vector back to a V8HImode vector. */
19925 tmp1 = gen_reg_rtx (V8HImode);
19926 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19927 /* Duplicate the low short through the whole low SImode word. */
19928 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19929 /* Cast the V8HImode vector back to a V4SImode vector. */
19930 tmp2 = gen_reg_rtx (V4SImode);
19931 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19932 /* Replicate the low element of the V4SImode vector. */
19933 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19934 /* Cast the V2SImode back to V8HImode, and store in target. */
19935 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19936 return true;
19937 }
19938 smode = HImode;
19939 wsmode = SImode;
19940 wvmode = V4SImode;
19941 goto widen;
19942 case V16QImode:
19943 if (TARGET_SSE2)
19944 {
19945 rtx tmp1, tmp2;
19946 /* Extend QImode to SImode using a paradoxical SUBREG. */
19947 tmp1 = gen_reg_rtx (SImode);
19948 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19949 /* Insert the SImode value as low element of V4SImode vector. */
19950 tmp2 = gen_reg_rtx (V4SImode);
19951 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19952 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19953 CONST0_RTX (V4SImode),
19954 const1_rtx);
19955 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19956 /* Cast the V4SImode vector back to a V16QImode vector. */
19957 tmp1 = gen_reg_rtx (V16QImode);
19958 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19959 /* Duplicate the low byte through the whole low SImode word. */
19960 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19961 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19962 /* Cast the V16QImode vector back to a V4SImode vector. */
19963 tmp2 = gen_reg_rtx (V4SImode);
19964 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19965 /* Replicate the low element of the V4SImode vector. */
19966 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19967 /* Cast the V2SImode back to V16QImode, and store in target. */
19968 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
19969 return true;
19970 }
19971 smode = QImode;
19972 wsmode = HImode;
19973 wvmode = V8HImode;
19974 goto widen;
19975 widen:
19976 /* Replicate the value once into the next wider mode and recurse. */
19977 val = convert_modes (wsmode, smode, val, true);
19978 x = expand_simple_binop (wsmode, ASHIFT, val,
19979 GEN_INT (GET_MODE_BITSIZE (smode)),
19980 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19981 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
19982
19983 x = gen_reg_rtx (wvmode);
19984 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
19985 gcc_unreachable ();
19986 emit_move_insn (target, gen_lowpart (mode, x));
19987 return true;
19988
19989 default:
19990 return false;
19991 }
19992 }
19993
19994 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19995 whose ONE_VAR element is VAR, and other elements are zero. Return true
19996 if successful. */
19997
19998 static bool
19999 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20000 rtx target, rtx var, int one_var)
20001 {
20002 enum machine_mode vsimode;
20003 rtx new_target;
20004 rtx x, tmp;
20005
20006 switch (mode)
20007 {
20008 case V2SFmode:
20009 case V2SImode:
20010 if (!mmx_ok)
20011 return false;
20012 /* FALLTHRU */
20013
20014 case V2DFmode:
20015 case V2DImode:
20016 if (one_var != 0)
20017 return false;
20018 var = force_reg (GET_MODE_INNER (mode), var);
20019 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20020 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20021 return true;
20022
20023 case V4SFmode:
20024 case V4SImode:
20025 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20026 new_target = gen_reg_rtx (mode);
20027 else
20028 new_target = target;
20029 var = force_reg (GET_MODE_INNER (mode), var);
20030 x = gen_rtx_VEC_DUPLICATE (mode, var);
20031 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20032 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20033 if (one_var != 0)
20034 {
20035 /* We need to shuffle the value to the correct position, so
20036 create a new pseudo to store the intermediate result. */
20037
20038 /* With SSE2, we can use the integer shuffle insns. */
20039 if (mode != V4SFmode && TARGET_SSE2)
20040 {
20041 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20042 GEN_INT (1),
20043 GEN_INT (one_var == 1 ? 0 : 1),
20044 GEN_INT (one_var == 2 ? 0 : 1),
20045 GEN_INT (one_var == 3 ? 0 : 1)));
20046 if (target != new_target)
20047 emit_move_insn (target, new_target);
20048 return true;
20049 }
20050
20051 /* Otherwise convert the intermediate result to V4SFmode and
20052 use the SSE1 shuffle instructions. */
20053 if (mode != V4SFmode)
20054 {
20055 tmp = gen_reg_rtx (V4SFmode);
20056 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20057 }
20058 else
20059 tmp = new_target;
20060
20061 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20062 GEN_INT (1),
20063 GEN_INT (one_var == 1 ? 0 : 1),
20064 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20065 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20066
20067 if (mode != V4SFmode)
20068 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20069 else if (tmp != target)
20070 emit_move_insn (target, tmp);
20071 }
20072 else if (target != new_target)
20073 emit_move_insn (target, new_target);
20074 return true;
20075
20076 case V8HImode:
20077 case V16QImode:
20078 vsimode = V4SImode;
20079 goto widen;
20080 case V4HImode:
20081 case V8QImode:
20082 if (!mmx_ok)
20083 return false;
20084 vsimode = V2SImode;
20085 goto widen;
20086 widen:
20087 if (one_var != 0)
20088 return false;
20089
20090 /* Zero extend the variable element to SImode and recurse. */
20091 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20092
20093 x = gen_reg_rtx (vsimode);
20094 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20095 var, one_var))
20096 gcc_unreachable ();
20097
20098 emit_move_insn (target, gen_lowpart (mode, x));
20099 return true;
20100
20101 default:
20102 return false;
20103 }
20104 }
20105
20106 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20107 consisting of the values in VALS. It is known that all elements
20108 except ONE_VAR are constants. Return true if successful. */
20109
20110 static bool
20111 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20112 rtx target, rtx vals, int one_var)
20113 {
20114 rtx var = XVECEXP (vals, 0, one_var);
20115 enum machine_mode wmode;
20116 rtx const_vec, x;
20117
20118 const_vec = copy_rtx (vals);
20119 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20120 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20121
20122 switch (mode)
20123 {
20124 case V2DFmode:
20125 case V2DImode:
20126 case V2SFmode:
20127 case V2SImode:
20128 /* For the two element vectors, it's just as easy to use
20129 the general case. */
20130 return false;
20131
20132 case V4SFmode:
20133 case V4SImode:
20134 case V8HImode:
20135 case V4HImode:
20136 break;
20137
20138 case V16QImode:
20139 wmode = V8HImode;
20140 goto widen;
20141 case V8QImode:
20142 wmode = V4HImode;
20143 goto widen;
20144 widen:
20145 /* There's no way to set one QImode entry easily. Combine
20146 the variable value with its adjacent constant value, and
20147 promote to an HImode set. */
20148 x = XVECEXP (vals, 0, one_var ^ 1);
20149 if (one_var & 1)
20150 {
20151 var = convert_modes (HImode, QImode, var, true);
20152 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20153 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20154 x = GEN_INT (INTVAL (x) & 0xff);
20155 }
20156 else
20157 {
20158 var = convert_modes (HImode, QImode, var, true);
20159 x = gen_int_mode (INTVAL (x) << 8, HImode);
20160 }
20161 if (x != const0_rtx)
20162 var = expand_simple_binop (HImode, IOR, var, x, var,
20163 1, OPTAB_LIB_WIDEN);
20164
20165 x = gen_reg_rtx (wmode);
20166 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20167 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20168
20169 emit_move_insn (target, gen_lowpart (mode, x));
20170 return true;
20171
20172 default:
20173 return false;
20174 }
20175
20176 emit_move_insn (target, const_vec);
20177 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20178 return true;
20179 }
20180
20181 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20182 all values variable, and none identical. */
20183
20184 static void
20185 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20186 rtx target, rtx vals)
20187 {
20188 enum machine_mode half_mode = GET_MODE_INNER (mode);
20189 rtx op0 = NULL, op1 = NULL;
20190 bool use_vec_concat = false;
20191
20192 switch (mode)
20193 {
20194 case V2SFmode:
20195 case V2SImode:
20196 if (!mmx_ok && !TARGET_SSE)
20197 break;
20198 /* FALLTHRU */
20199
20200 case V2DFmode:
20201 case V2DImode:
20202 /* For the two element vectors, we always implement VEC_CONCAT. */
20203 op0 = XVECEXP (vals, 0, 0);
20204 op1 = XVECEXP (vals, 0, 1);
20205 use_vec_concat = true;
20206 break;
20207
20208 case V4SFmode:
20209 half_mode = V2SFmode;
20210 goto half;
20211 case V4SImode:
20212 half_mode = V2SImode;
20213 goto half;
20214 half:
20215 {
20216 rtvec v;
20217
20218 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20219 Recurse to load the two halves. */
20220
20221 op0 = gen_reg_rtx (half_mode);
20222 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20223 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20224
20225 op1 = gen_reg_rtx (half_mode);
20226 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20227 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20228
20229 use_vec_concat = true;
20230 }
20231 break;
20232
20233 case V8HImode:
20234 case V16QImode:
20235 case V4HImode:
20236 case V8QImode:
20237 break;
20238
20239 default:
20240 gcc_unreachable ();
20241 }
20242
20243 if (use_vec_concat)
20244 {
20245 if (!register_operand (op0, half_mode))
20246 op0 = force_reg (half_mode, op0);
20247 if (!register_operand (op1, half_mode))
20248 op1 = force_reg (half_mode, op1);
20249
20250 emit_insn (gen_rtx_SET (VOIDmode, target,
20251 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20252 }
20253 else
20254 {
20255 int i, j, n_elts, n_words, n_elt_per_word;
20256 enum machine_mode inner_mode;
20257 rtx words[4], shift;
20258
20259 inner_mode = GET_MODE_INNER (mode);
20260 n_elts = GET_MODE_NUNITS (mode);
20261 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20262 n_elt_per_word = n_elts / n_words;
20263 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20264
20265 for (i = 0; i < n_words; ++i)
20266 {
20267 rtx word = NULL_RTX;
20268
20269 for (j = 0; j < n_elt_per_word; ++j)
20270 {
20271 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20272 elt = convert_modes (word_mode, inner_mode, elt, true);
20273
20274 if (j == 0)
20275 word = elt;
20276 else
20277 {
20278 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20279 word, 1, OPTAB_LIB_WIDEN);
20280 word = expand_simple_binop (word_mode, IOR, word, elt,
20281 word, 1, OPTAB_LIB_WIDEN);
20282 }
20283 }
20284
20285 words[i] = word;
20286 }
20287
20288 if (n_words == 1)
20289 emit_move_insn (target, gen_lowpart (mode, words[0]));
20290 else if (n_words == 2)
20291 {
20292 rtx tmp = gen_reg_rtx (mode);
20293 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20294 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20295 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20296 emit_move_insn (target, tmp);
20297 }
20298 else if (n_words == 4)
20299 {
20300 rtx tmp = gen_reg_rtx (V4SImode);
20301 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20302 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20303 emit_move_insn (target, gen_lowpart (mode, tmp));
20304 }
20305 else
20306 gcc_unreachable ();
20307 }
20308 }
20309
20310 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20311 instructions unless MMX_OK is true. */
20312
20313 void
20314 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20315 {
20316 enum machine_mode mode = GET_MODE (target);
20317 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20318 int n_elts = GET_MODE_NUNITS (mode);
20319 int n_var = 0, one_var = -1;
20320 bool all_same = true, all_const_zero = true;
20321 int i;
20322 rtx x;
20323
20324 for (i = 0; i < n_elts; ++i)
20325 {
20326 x = XVECEXP (vals, 0, i);
20327 if (!CONSTANT_P (x))
20328 n_var++, one_var = i;
20329 else if (x != CONST0_RTX (inner_mode))
20330 all_const_zero = false;
20331 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20332 all_same = false;
20333 }
20334
20335 /* Constants are best loaded from the constant pool. */
20336 if (n_var == 0)
20337 {
20338 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20339 return;
20340 }
20341
20342 /* If all values are identical, broadcast the value. */
20343 if (all_same
20344 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20345 XVECEXP (vals, 0, 0)))
20346 return;
20347
20348 /* Values where only one field is non-constant are best loaded from
20349 the pool and overwritten via move later. */
20350 if (n_var == 1)
20351 {
20352 if (all_const_zero
20353 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20354 XVECEXP (vals, 0, one_var),
20355 one_var))
20356 return;
20357
20358 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20359 return;
20360 }
20361
20362 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20363 }
20364
20365 void
20366 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20367 {
20368 enum machine_mode mode = GET_MODE (target);
20369 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20370 bool use_vec_merge = false;
20371 rtx tmp;
20372
20373 switch (mode)
20374 {
20375 case V2SFmode:
20376 case V2SImode:
20377 if (mmx_ok)
20378 {
20379 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20380 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20381 if (elt == 0)
20382 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20383 else
20384 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20385 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20386 return;
20387 }
20388 break;
20389
20390 case V2DFmode:
20391 case V2DImode:
20392 {
20393 rtx op0, op1;
20394
20395 /* For the two element vectors, we implement a VEC_CONCAT with
20396 the extraction of the other element. */
20397
20398 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20399 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20400
20401 if (elt == 0)
20402 op0 = val, op1 = tmp;
20403 else
20404 op0 = tmp, op1 = val;
20405
20406 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20407 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20408 }
20409 return;
20410
20411 case V4SFmode:
20412 switch (elt)
20413 {
20414 case 0:
20415 use_vec_merge = true;
20416 break;
20417
20418 case 1:
20419 /* tmp = target = A B C D */
20420 tmp = copy_to_reg (target);
20421 /* target = A A B B */
20422 emit_insn (gen_sse_unpcklps (target, target, target));
20423 /* target = X A B B */
20424 ix86_expand_vector_set (false, target, val, 0);
20425 /* target = A X C D */
20426 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20427 GEN_INT (1), GEN_INT (0),
20428 GEN_INT (2+4), GEN_INT (3+4)));
20429 return;
20430
20431 case 2:
20432 /* tmp = target = A B C D */
20433 tmp = copy_to_reg (target);
20434 /* tmp = X B C D */
20435 ix86_expand_vector_set (false, tmp, val, 0);
20436 /* target = A B X D */
20437 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20438 GEN_INT (0), GEN_INT (1),
20439 GEN_INT (0+4), GEN_INT (3+4)));
20440 return;
20441
20442 case 3:
20443 /* tmp = target = A B C D */
20444 tmp = copy_to_reg (target);
20445 /* tmp = X B C D */
20446 ix86_expand_vector_set (false, tmp, val, 0);
20447 /* target = A B X D */
20448 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20449 GEN_INT (0), GEN_INT (1),
20450 GEN_INT (2+4), GEN_INT (0+4)));
20451 return;
20452
20453 default:
20454 gcc_unreachable ();
20455 }
20456 break;
20457
20458 case V4SImode:
20459 /* Element 0 handled by vec_merge below. */
20460 if (elt == 0)
20461 {
20462 use_vec_merge = true;
20463 break;
20464 }
20465
20466 if (TARGET_SSE2)
20467 {
20468 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20469 store into element 0, then shuffle them back. */
20470
20471 rtx order[4];
20472
20473 order[0] = GEN_INT (elt);
20474 order[1] = const1_rtx;
20475 order[2] = const2_rtx;
20476 order[3] = GEN_INT (3);
20477 order[elt] = const0_rtx;
20478
20479 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20480 order[1], order[2], order[3]));
20481
20482 ix86_expand_vector_set (false, target, val, 0);
20483
20484 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20485 order[1], order[2], order[3]));
20486 }
20487 else
20488 {
20489 /* For SSE1, we have to reuse the V4SF code. */
20490 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20491 gen_lowpart (SFmode, val), elt);
20492 }
20493 return;
20494
20495 case V8HImode:
20496 use_vec_merge = TARGET_SSE2;
20497 break;
20498 case V4HImode:
20499 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20500 break;
20501
20502 case V16QImode:
20503 case V8QImode:
20504 default:
20505 break;
20506 }
20507
20508 if (use_vec_merge)
20509 {
20510 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20511 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20512 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20513 }
20514 else
20515 {
20516 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20517
20518 emit_move_insn (mem, target);
20519
20520 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20521 emit_move_insn (tmp, val);
20522
20523 emit_move_insn (target, mem);
20524 }
20525 }
20526
20527 void
20528 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20529 {
20530 enum machine_mode mode = GET_MODE (vec);
20531 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20532 bool use_vec_extr = false;
20533 rtx tmp;
20534
20535 switch (mode)
20536 {
20537 case V2SImode:
20538 case V2SFmode:
20539 if (!mmx_ok)
20540 break;
20541 /* FALLTHRU */
20542
20543 case V2DFmode:
20544 case V2DImode:
20545 use_vec_extr = true;
20546 break;
20547
20548 case V4SFmode:
20549 switch (elt)
20550 {
20551 case 0:
20552 tmp = vec;
20553 break;
20554
20555 case 1:
20556 case 3:
20557 tmp = gen_reg_rtx (mode);
20558 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20559 GEN_INT (elt), GEN_INT (elt),
20560 GEN_INT (elt+4), GEN_INT (elt+4)));
20561 break;
20562
20563 case 2:
20564 tmp = gen_reg_rtx (mode);
20565 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20566 break;
20567
20568 default:
20569 gcc_unreachable ();
20570 }
20571 vec = tmp;
20572 use_vec_extr = true;
20573 elt = 0;
20574 break;
20575
20576 case V4SImode:
20577 if (TARGET_SSE2)
20578 {
20579 switch (elt)
20580 {
20581 case 0:
20582 tmp = vec;
20583 break;
20584
20585 case 1:
20586 case 3:
20587 tmp = gen_reg_rtx (mode);
20588 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20589 GEN_INT (elt), GEN_INT (elt),
20590 GEN_INT (elt), GEN_INT (elt)));
20591 break;
20592
20593 case 2:
20594 tmp = gen_reg_rtx (mode);
20595 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20596 break;
20597
20598 default:
20599 gcc_unreachable ();
20600 }
20601 vec = tmp;
20602 use_vec_extr = true;
20603 elt = 0;
20604 }
20605 else
20606 {
20607 /* For SSE1, we have to reuse the V4SF code. */
20608 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20609 gen_lowpart (V4SFmode, vec), elt);
20610 return;
20611 }
20612 break;
20613
20614 case V8HImode:
20615 use_vec_extr = TARGET_SSE2;
20616 break;
20617 case V4HImode:
20618 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20619 break;
20620
20621 case V16QImode:
20622 case V8QImode:
20623 /* ??? Could extract the appropriate HImode element and shift. */
20624 default:
20625 break;
20626 }
20627
20628 if (use_vec_extr)
20629 {
20630 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20631 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20632
20633 /* Let the rtl optimizers know about the zero extension performed. */
20634 if (inner_mode == HImode)
20635 {
20636 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20637 target = gen_lowpart (SImode, target);
20638 }
20639
20640 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20641 }
20642 else
20643 {
20644 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20645
20646 emit_move_insn (mem, vec);
20647
20648 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20649 emit_move_insn (target, tmp);
20650 }
20651 }
20652
20653 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20654 pattern to reduce; DEST is the destination; IN is the input vector. */
20655
20656 void
20657 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20658 {
20659 rtx tmp1, tmp2, tmp3;
20660
20661 tmp1 = gen_reg_rtx (V4SFmode);
20662 tmp2 = gen_reg_rtx (V4SFmode);
20663 tmp3 = gen_reg_rtx (V4SFmode);
20664
20665 emit_insn (gen_sse_movhlps (tmp1, in, in));
20666 emit_insn (fn (tmp2, tmp1, in));
20667
20668 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20669 GEN_INT (1), GEN_INT (1),
20670 GEN_INT (1+4), GEN_INT (1+4)));
20671 emit_insn (fn (dest, tmp2, tmp3));
20672 }
20673 \f
20674 /* Target hook for scalar_mode_supported_p. */
20675 static bool
20676 ix86_scalar_mode_supported_p (enum machine_mode mode)
20677 {
20678 if (DECIMAL_FLOAT_MODE_P (mode))
20679 return true;
20680 else
20681 return default_scalar_mode_supported_p (mode);
20682 }
20683
20684 /* Implements target hook vector_mode_supported_p. */
20685 static bool
20686 ix86_vector_mode_supported_p (enum machine_mode mode)
20687 {
20688 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20689 return true;
20690 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20691 return true;
20692 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20693 return true;
20694 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20695 return true;
20696 return false;
20697 }
20698
20699 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20700
20701 We do this in the new i386 backend to maintain source compatibility
20702 with the old cc0-based compiler. */
20703
20704 static tree
20705 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20706 tree inputs ATTRIBUTE_UNUSED,
20707 tree clobbers)
20708 {
20709 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20710 clobbers);
20711 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20712 clobbers);
20713 return clobbers;
20714 }
20715
20716 /* Return true if this goes in small data/bss. */
20717
20718 static bool
20719 ix86_in_large_data_p (tree exp)
20720 {
20721 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20722 return false;
20723
20724 /* Functions are never large data. */
20725 if (TREE_CODE (exp) == FUNCTION_DECL)
20726 return false;
20727
20728 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20729 {
20730 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20731 if (strcmp (section, ".ldata") == 0
20732 || strcmp (section, ".lbss") == 0)
20733 return true;
20734 return false;
20735 }
20736 else
20737 {
20738 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20739
20740 /* If this is an incomplete type with size 0, then we can't put it
20741 in data because it might be too big when completed. */
20742 if (!size || size > ix86_section_threshold)
20743 return true;
20744 }
20745
20746 return false;
20747 }
20748 static void
20749 ix86_encode_section_info (tree decl, rtx rtl, int first)
20750 {
20751 default_encode_section_info (decl, rtl, first);
20752
20753 if (TREE_CODE (decl) == VAR_DECL
20754 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20755 && ix86_in_large_data_p (decl))
20756 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20757 }
20758
20759 /* Worker function for REVERSE_CONDITION. */
20760
20761 enum rtx_code
20762 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20763 {
20764 return (mode != CCFPmode && mode != CCFPUmode
20765 ? reverse_condition (code)
20766 : reverse_condition_maybe_unordered (code));
20767 }
20768
20769 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20770 to OPERANDS[0]. */
20771
20772 const char *
20773 output_387_reg_move (rtx insn, rtx *operands)
20774 {
20775 if (REG_P (operands[1])
20776 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20777 {
20778 if (REGNO (operands[0]) == FIRST_STACK_REG)
20779 return output_387_ffreep (operands, 0);
20780 return "fstp\t%y0";
20781 }
20782 if (STACK_TOP_P (operands[0]))
20783 return "fld%z1\t%y1";
20784 return "fst\t%y0";
20785 }
20786
20787 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20788 FP status register is set. */
20789
20790 void
20791 ix86_emit_fp_unordered_jump (rtx label)
20792 {
20793 rtx reg = gen_reg_rtx (HImode);
20794 rtx temp;
20795
20796 emit_insn (gen_x86_fnstsw_1 (reg));
20797
20798 if (TARGET_USE_SAHF)
20799 {
20800 emit_insn (gen_x86_sahf_1 (reg));
20801
20802 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20803 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20804 }
20805 else
20806 {
20807 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20808
20809 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20810 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20811 }
20812
20813 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20814 gen_rtx_LABEL_REF (VOIDmode, label),
20815 pc_rtx);
20816 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20817 emit_jump_insn (temp);
20818 }
20819
20820 /* Output code to perform a log1p XFmode calculation. */
20821
20822 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20823 {
20824 rtx label1 = gen_label_rtx ();
20825 rtx label2 = gen_label_rtx ();
20826
20827 rtx tmp = gen_reg_rtx (XFmode);
20828 rtx tmp2 = gen_reg_rtx (XFmode);
20829
20830 emit_insn (gen_absxf2 (tmp, op1));
20831 emit_insn (gen_cmpxf (tmp,
20832 CONST_DOUBLE_FROM_REAL_VALUE (
20833 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20834 XFmode)));
20835 emit_jump_insn (gen_bge (label1));
20836
20837 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20838 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20839 emit_jump (label2);
20840
20841 emit_label (label1);
20842 emit_move_insn (tmp, CONST1_RTX (XFmode));
20843 emit_insn (gen_addxf3 (tmp, op1, tmp));
20844 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20845 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20846
20847 emit_label (label2);
20848 }
20849
20850 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20851
20852 static void
20853 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20854 tree decl)
20855 {
20856 /* With Binutils 2.15, the "@unwind" marker must be specified on
20857 every occurrence of the ".eh_frame" section, not just the first
20858 one. */
20859 if (TARGET_64BIT
20860 && strcmp (name, ".eh_frame") == 0)
20861 {
20862 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20863 flags & SECTION_WRITE ? "aw" : "a");
20864 return;
20865 }
20866 default_elf_asm_named_section (name, flags, decl);
20867 }
20868
20869 /* Return the mangling of TYPE if it is an extended fundamental type. */
20870
20871 static const char *
20872 ix86_mangle_fundamental_type (tree type)
20873 {
20874 switch (TYPE_MODE (type))
20875 {
20876 case TFmode:
20877 /* __float128 is "g". */
20878 return "g";
20879 case XFmode:
20880 /* "long double" or __float80 is "e". */
20881 return "e";
20882 default:
20883 return NULL;
20884 }
20885 }
20886
20887 /* For 32-bit code we can save PIC register setup by using
20888 __stack_chk_fail_local hidden function instead of calling
20889 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20890 register, so it is better to call __stack_chk_fail directly. */
20891
20892 static tree
20893 ix86_stack_protect_fail (void)
20894 {
20895 return TARGET_64BIT
20896 ? default_external_stack_protect_fail ()
20897 : default_hidden_stack_protect_fail ();
20898 }
20899
20900 /* Select a format to encode pointers in exception handling data. CODE
20901 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20902 true if the symbol may be affected by dynamic relocations.
20903
20904 ??? All x86 object file formats are capable of representing this.
20905 After all, the relocation needed is the same as for the call insn.
20906 Whether or not a particular assembler allows us to enter such, I
20907 guess we'll have to see. */
20908 int
20909 asm_preferred_eh_data_format (int code, int global)
20910 {
20911 if (flag_pic)
20912 {
20913 int type = DW_EH_PE_sdata8;
20914 if (!TARGET_64BIT
20915 || ix86_cmodel == CM_SMALL_PIC
20916 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20917 type = DW_EH_PE_sdata4;
20918 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20919 }
20920 if (ix86_cmodel == CM_SMALL
20921 || (ix86_cmodel == CM_MEDIUM && code))
20922 return DW_EH_PE_udata4;
20923 return DW_EH_PE_absptr;
20924 }
20925 \f
20926 /* Expand copysign from SIGN to the positive value ABS_VALUE
20927 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20928 the sign-bit. */
20929 static void
20930 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20931 {
20932 enum machine_mode mode = GET_MODE (sign);
20933 rtx sgn = gen_reg_rtx (mode);
20934 if (mask == NULL_RTX)
20935 {
20936 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20937 if (!VECTOR_MODE_P (mode))
20938 {
20939 /* We need to generate a scalar mode mask in this case. */
20940 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20941 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20942 mask = gen_reg_rtx (mode);
20943 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20944 }
20945 }
20946 else
20947 mask = gen_rtx_NOT (mode, mask);
20948 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20949 gen_rtx_AND (mode, mask, sign)));
20950 emit_insn (gen_rtx_SET (VOIDmode, result,
20951 gen_rtx_IOR (mode, abs_value, sgn)));
20952 }
20953
20954 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20955 mask for masking out the sign-bit is stored in *SMASK, if that is
20956 non-null. */
20957 static rtx
20958 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20959 {
20960 enum machine_mode mode = GET_MODE (op0);
20961 rtx xa, mask;
20962
20963 xa = gen_reg_rtx (mode);
20964 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20965 if (!VECTOR_MODE_P (mode))
20966 {
20967 /* We need to generate a scalar mode mask in this case. */
20968 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20969 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20970 mask = gen_reg_rtx (mode);
20971 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20972 }
20973 emit_insn (gen_rtx_SET (VOIDmode, xa,
20974 gen_rtx_AND (mode, op0, mask)));
20975
20976 if (smask)
20977 *smask = mask;
20978
20979 return xa;
20980 }
20981
20982 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20983 swapping the operands if SWAP_OPERANDS is true. The expanded
20984 code is a forward jump to a newly created label in case the
20985 comparison is true. The generated label rtx is returned. */
20986 static rtx
20987 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
20988 bool swap_operands)
20989 {
20990 rtx label, tmp;
20991
20992 if (swap_operands)
20993 {
20994 tmp = op0;
20995 op0 = op1;
20996 op1 = tmp;
20997 }
20998
20999 label = gen_label_rtx ();
21000 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21001 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21002 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21003 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21004 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21005 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21006 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21007 JUMP_LABEL (tmp) = label;
21008
21009 return label;
21010 }
21011
21012 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21013 using comparison code CODE. Operands are swapped for the comparison if
21014 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21015 static rtx
21016 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21017 bool swap_operands)
21018 {
21019 enum machine_mode mode = GET_MODE (op0);
21020 rtx mask = gen_reg_rtx (mode);
21021
21022 if (swap_operands)
21023 {
21024 rtx tmp = op0;
21025 op0 = op1;
21026 op1 = tmp;
21027 }
21028
21029 if (mode == DFmode)
21030 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21031 gen_rtx_fmt_ee (code, mode, op0, op1)));
21032 else
21033 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21034 gen_rtx_fmt_ee (code, mode, op0, op1)));
21035
21036 return mask;
21037 }
21038
21039 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21040 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21041 static rtx
21042 ix86_gen_TWO52 (enum machine_mode mode)
21043 {
21044 REAL_VALUE_TYPE TWO52r;
21045 rtx TWO52;
21046
21047 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21048 TWO52 = const_double_from_real_value (TWO52r, mode);
21049 TWO52 = force_reg (mode, TWO52);
21050
21051 return TWO52;
21052 }
21053
21054 /* Expand SSE sequence for computing lround from OP1 storing
21055 into OP0. */
21056 void
21057 ix86_expand_lround (rtx op0, rtx op1)
21058 {
21059 /* C code for the stuff we're doing below:
21060 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21061 return (long)tmp;
21062 */
21063 enum machine_mode mode = GET_MODE (op1);
21064 const struct real_format *fmt;
21065 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21066 rtx adj;
21067
21068 /* load nextafter (0.5, 0.0) */
21069 fmt = REAL_MODE_FORMAT (mode);
21070 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21071 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21072
21073 /* adj = copysign (0.5, op1) */
21074 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21075 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21076
21077 /* adj = op1 + adj */
21078 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21079
21080 /* op0 = (imode)adj */
21081 expand_fix (op0, adj, 0);
21082 }
21083
21084 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21085 into OPERAND0. */
21086 void
21087 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21088 {
21089 /* C code for the stuff we're doing below (for do_floor):
21090 xi = (long)op1;
21091 xi -= (double)xi > op1 ? 1 : 0;
21092 return xi;
21093 */
21094 enum machine_mode fmode = GET_MODE (op1);
21095 enum machine_mode imode = GET_MODE (op0);
21096 rtx ireg, freg, label, tmp;
21097
21098 /* reg = (long)op1 */
21099 ireg = gen_reg_rtx (imode);
21100 expand_fix (ireg, op1, 0);
21101
21102 /* freg = (double)reg */
21103 freg = gen_reg_rtx (fmode);
21104 expand_float (freg, ireg, 0);
21105
21106 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21107 label = ix86_expand_sse_compare_and_jump (UNLE,
21108 freg, op1, !do_floor);
21109 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21110 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21111 emit_move_insn (ireg, tmp);
21112
21113 emit_label (label);
21114 LABEL_NUSES (label) = 1;
21115
21116 emit_move_insn (op0, ireg);
21117 }
21118
21119 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21120 result in OPERAND0. */
21121 void
21122 ix86_expand_rint (rtx operand0, rtx operand1)
21123 {
21124 /* C code for the stuff we're doing below:
21125 xa = fabs (operand1);
21126 if (!isless (xa, 2**52))
21127 return operand1;
21128 xa = xa + 2**52 - 2**52;
21129 return copysign (xa, operand1);
21130 */
21131 enum machine_mode mode = GET_MODE (operand0);
21132 rtx res, xa, label, TWO52, mask;
21133
21134 res = gen_reg_rtx (mode);
21135 emit_move_insn (res, operand1);
21136
21137 /* xa = abs (operand1) */
21138 xa = ix86_expand_sse_fabs (res, &mask);
21139
21140 /* if (!isless (xa, TWO52)) goto label; */
21141 TWO52 = ix86_gen_TWO52 (mode);
21142 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21143
21144 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21145 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21146
21147 ix86_sse_copysign_to_positive (res, xa, res, mask);
21148
21149 emit_label (label);
21150 LABEL_NUSES (label) = 1;
21151
21152 emit_move_insn (operand0, res);
21153 }
21154
21155 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21156 into OPERAND0. */
21157 void
21158 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21159 {
21160 /* C code for the stuff we expand below.
21161 double xa = fabs (x), x2;
21162 if (!isless (xa, TWO52))
21163 return x;
21164 xa = xa + TWO52 - TWO52;
21165 x2 = copysign (xa, x);
21166 Compensate. Floor:
21167 if (x2 > x)
21168 x2 -= 1;
21169 Compensate. Ceil:
21170 if (x2 < x)
21171 x2 -= -1;
21172 return x2;
21173 */
21174 enum machine_mode mode = GET_MODE (operand0);
21175 rtx xa, TWO52, tmp, label, one, res, mask;
21176
21177 TWO52 = ix86_gen_TWO52 (mode);
21178
21179 /* Temporary for holding the result, initialized to the input
21180 operand to ease control flow. */
21181 res = gen_reg_rtx (mode);
21182 emit_move_insn (res, operand1);
21183
21184 /* xa = abs (operand1) */
21185 xa = ix86_expand_sse_fabs (res, &mask);
21186
21187 /* if (!isless (xa, TWO52)) goto label; */
21188 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21189
21190 /* xa = xa + TWO52 - TWO52; */
21191 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21192 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21193
21194 /* xa = copysign (xa, operand1) */
21195 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21196
21197 /* generate 1.0 or -1.0 */
21198 one = force_reg (mode,
21199 const_double_from_real_value (do_floor
21200 ? dconst1 : dconstm1, mode));
21201
21202 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21203 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21204 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21205 gen_rtx_AND (mode, one, tmp)));
21206 /* We always need to subtract here to preserve signed zero. */
21207 tmp = expand_simple_binop (mode, MINUS,
21208 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21209 emit_move_insn (res, tmp);
21210
21211 emit_label (label);
21212 LABEL_NUSES (label) = 1;
21213
21214 emit_move_insn (operand0, res);
21215 }
21216
21217 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21218 into OPERAND0. */
21219 void
21220 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21221 {
21222 /* C code for the stuff we expand below.
21223 double xa = fabs (x), x2;
21224 if (!isless (xa, TWO52))
21225 return x;
21226 x2 = (double)(long)x;
21227 Compensate. Floor:
21228 if (x2 > x)
21229 x2 -= 1;
21230 Compensate. Ceil:
21231 if (x2 < x)
21232 x2 += 1;
21233 if (HONOR_SIGNED_ZEROS (mode))
21234 return copysign (x2, x);
21235 return x2;
21236 */
21237 enum machine_mode mode = GET_MODE (operand0);
21238 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21239
21240 TWO52 = ix86_gen_TWO52 (mode);
21241
21242 /* Temporary for holding the result, initialized to the input
21243 operand to ease control flow. */
21244 res = gen_reg_rtx (mode);
21245 emit_move_insn (res, operand1);
21246
21247 /* xa = abs (operand1) */
21248 xa = ix86_expand_sse_fabs (res, &mask);
21249
21250 /* if (!isless (xa, TWO52)) goto label; */
21251 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21252
21253 /* xa = (double)(long)x */
21254 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21255 expand_fix (xi, res, 0);
21256 expand_float (xa, xi, 0);
21257
21258 /* generate 1.0 */
21259 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21260
21261 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21262 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21263 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21264 gen_rtx_AND (mode, one, tmp)));
21265 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21266 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21267 emit_move_insn (res, tmp);
21268
21269 if (HONOR_SIGNED_ZEROS (mode))
21270 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21271
21272 emit_label (label);
21273 LABEL_NUSES (label) = 1;
21274
21275 emit_move_insn (operand0, res);
21276 }
21277
21278 /* Expand SSE sequence for computing round from OPERAND1 storing
21279 into OPERAND0. Sequence that works without relying on DImode truncation
21280 via cvttsd2siq that is only available on 64bit targets. */
21281 void
21282 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21283 {
21284 /* C code for the stuff we expand below.
21285 double xa = fabs (x), xa2, x2;
21286 if (!isless (xa, TWO52))
21287 return x;
21288 Using the absolute value and copying back sign makes
21289 -0.0 -> -0.0 correct.
21290 xa2 = xa + TWO52 - TWO52;
21291 Compensate.
21292 dxa = xa2 - xa;
21293 if (dxa <= -0.5)
21294 xa2 += 1;
21295 else if (dxa > 0.5)
21296 xa2 -= 1;
21297 x2 = copysign (xa2, x);
21298 return x2;
21299 */
21300 enum machine_mode mode = GET_MODE (operand0);
21301 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21302
21303 TWO52 = ix86_gen_TWO52 (mode);
21304
21305 /* Temporary for holding the result, initialized to the input
21306 operand to ease control flow. */
21307 res = gen_reg_rtx (mode);
21308 emit_move_insn (res, operand1);
21309
21310 /* xa = abs (operand1) */
21311 xa = ix86_expand_sse_fabs (res, &mask);
21312
21313 /* if (!isless (xa, TWO52)) goto label; */
21314 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21315
21316 /* xa2 = xa + TWO52 - TWO52; */
21317 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21318 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21319
21320 /* dxa = xa2 - xa; */
21321 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21322
21323 /* generate 0.5, 1.0 and -0.5 */
21324 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21325 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21326 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21327 0, OPTAB_DIRECT);
21328
21329 /* Compensate. */
21330 tmp = gen_reg_rtx (mode);
21331 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21332 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21333 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21334 gen_rtx_AND (mode, one, tmp)));
21335 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21336 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21337 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21338 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21339 gen_rtx_AND (mode, one, tmp)));
21340 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21341
21342 /* res = copysign (xa2, operand1) */
21343 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21344
21345 emit_label (label);
21346 LABEL_NUSES (label) = 1;
21347
21348 emit_move_insn (operand0, res);
21349 }
21350
21351 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21352 into OPERAND0. */
21353 void
21354 ix86_expand_trunc (rtx operand0, rtx operand1)
21355 {
21356 /* C code for SSE variant we expand below.
21357 double xa = fabs (x), x2;
21358 if (!isless (xa, TWO52))
21359 return x;
21360 x2 = (double)(long)x;
21361 if (HONOR_SIGNED_ZEROS (mode))
21362 return copysign (x2, x);
21363 return x2;
21364 */
21365 enum machine_mode mode = GET_MODE (operand0);
21366 rtx xa, xi, TWO52, label, res, mask;
21367
21368 TWO52 = ix86_gen_TWO52 (mode);
21369
21370 /* Temporary for holding the result, initialized to the input
21371 operand to ease control flow. */
21372 res = gen_reg_rtx (mode);
21373 emit_move_insn (res, operand1);
21374
21375 /* xa = abs (operand1) */
21376 xa = ix86_expand_sse_fabs (res, &mask);
21377
21378 /* if (!isless (xa, TWO52)) goto label; */
21379 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21380
21381 /* x = (double)(long)x */
21382 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21383 expand_fix (xi, res, 0);
21384 expand_float (res, xi, 0);
21385
21386 if (HONOR_SIGNED_ZEROS (mode))
21387 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21388
21389 emit_label (label);
21390 LABEL_NUSES (label) = 1;
21391
21392 emit_move_insn (operand0, res);
21393 }
21394
21395 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21396 into OPERAND0. */
21397 void
21398 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21399 {
21400 enum machine_mode mode = GET_MODE (operand0);
21401 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21402
21403 /* C code for SSE variant we expand below.
21404 double xa = fabs (x), x2;
21405 if (!isless (xa, TWO52))
21406 return x;
21407 xa2 = xa + TWO52 - TWO52;
21408 Compensate:
21409 if (xa2 > xa)
21410 xa2 -= 1.0;
21411 x2 = copysign (xa2, x);
21412 return x2;
21413 */
21414
21415 TWO52 = ix86_gen_TWO52 (mode);
21416
21417 /* Temporary for holding the result, initialized to the input
21418 operand to ease control flow. */
21419 res = gen_reg_rtx (mode);
21420 emit_move_insn (res, operand1);
21421
21422 /* xa = abs (operand1) */
21423 xa = ix86_expand_sse_fabs (res, &smask);
21424
21425 /* if (!isless (xa, TWO52)) goto label; */
21426 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21427
21428 /* res = xa + TWO52 - TWO52; */
21429 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21430 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21431 emit_move_insn (res, tmp);
21432
21433 /* generate 1.0 */
21434 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21435
21436 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21437 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21438 emit_insn (gen_rtx_SET (VOIDmode, mask,
21439 gen_rtx_AND (mode, mask, one)));
21440 tmp = expand_simple_binop (mode, MINUS,
21441 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21442 emit_move_insn (res, tmp);
21443
21444 /* res = copysign (res, operand1) */
21445 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21446
21447 emit_label (label);
21448 LABEL_NUSES (label) = 1;
21449
21450 emit_move_insn (operand0, res);
21451 }
21452
21453 /* Expand SSE sequence for computing round from OPERAND1 storing
21454 into OPERAND0. */
21455 void
21456 ix86_expand_round (rtx operand0, rtx operand1)
21457 {
21458 /* C code for the stuff we're doing below:
21459 double xa = fabs (x);
21460 if (!isless (xa, TWO52))
21461 return x;
21462 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21463 return copysign (xa, x);
21464 */
21465 enum machine_mode mode = GET_MODE (operand0);
21466 rtx res, TWO52, xa, label, xi, half, mask;
21467 const struct real_format *fmt;
21468 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21469
21470 /* Temporary for holding the result, initialized to the input
21471 operand to ease control flow. */
21472 res = gen_reg_rtx (mode);
21473 emit_move_insn (res, operand1);
21474
21475 TWO52 = ix86_gen_TWO52 (mode);
21476 xa = ix86_expand_sse_fabs (res, &mask);
21477 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21478
21479 /* load nextafter (0.5, 0.0) */
21480 fmt = REAL_MODE_FORMAT (mode);
21481 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21482 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21483
21484 /* xa = xa + 0.5 */
21485 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21486 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21487
21488 /* xa = (double)(int64_t)xa */
21489 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21490 expand_fix (xi, xa, 0);
21491 expand_float (xa, xi, 0);
21492
21493 /* res = copysign (xa, operand1) */
21494 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21495
21496 emit_label (label);
21497 LABEL_NUSES (label) = 1;
21498
21499 emit_move_insn (operand0, res);
21500 }
21501
21502 #include "gt-i386.h"