f68dc6aabd29d6448929631ddb510dac74183a79
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1030
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1035 0,
1036
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1038 ~m_386,
1039
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1043
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1048
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1057 m_PPRO,
1058
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1061
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1064
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1067
1068 /* X86_TUNE_USE_MOV0 */
1069 m_K6,
1070
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1073
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1075 m_PENT4,
1076
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1078 m_PPRO,
1079
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1081 ~m_PENT,
1082
1083 /* X86_TUNE_READ_MODIFY */
1084 ~(m_PENT | m_PPRO),
1085
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1089
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1092
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1095
1096 /* X86_TUNE_QIMODE_MATH */
1097 ~0,
1098
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1103 ~m_PPRO,
1104
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1106 0,
1107
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1109 m_PPRO,
1110
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1113
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1117
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1120
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1124
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1129
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1132
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1143
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1145 m_AMDFAM10,
1146
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1151 m_ATHLON_K8,
1152
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1155
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1158
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1161
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1164
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1167
1168 /* X86_TUNE_SHIFT1 */
1169 ~m_486,
1170
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1173
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1176
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1180
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1183
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1186
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1189
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1192
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1195
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1197 ~m_K8,
1198
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1200 m_K8 | m_GENERIC64,
1201
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1204 ~(m_386 | m_486),
1205
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1209
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1211 machines. */
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1213
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1215 than a MOV. */
1216 m_PENT,
1217
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1220 m_PENT,
1221
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1225 m_K6,
1226 };
1227
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE */
1231 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1232
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1234 ~m_386,
1235
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1237 ~(m_386 | m_486),
1238
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1240 ~m_386,
1241
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 ~m_386,
1244 };
1245
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1248
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1252
1253 static enum stringop_alg stringop_alg = no_stringop;
1254
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1257 epilogue code. */
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1259
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1264
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1267
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1269 {
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1274 /* FP registers */
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1277 /* arg pointer */
1278 NON_Q_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1282 SSE_REGS, SSE_REGS,
1283 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1284 MMX_REGS, MMX_REGS,
1285 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1288 SSE_REGS, SSE_REGS,
1289 };
1290
1291 /* The "default" register map used in 32bit mode. */
1292
1293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1294 {
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1302 };
1303
1304 static int const x86_64_int_parameter_registers[6] =
1305 {
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1308 };
1309
1310 static int const x86_64_ms_abi_int_parameter_registers[4] =
1311 {
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1314 };
1315
1316 static int const x86_64_int_return_registers[4] =
1317 {
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1319 };
1320
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1323 {
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1331 };
1332
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1377 numbers.
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1386 */
1387 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1388 {
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1396 };
1397
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1400
1401 rtx ix86_compare_op0 = NULL_RTX;
1402 rtx ix86_compare_op1 = NULL_RTX;
1403 rtx ix86_compare_emitted = NULL_RTX;
1404
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1407
1408 /* Define the structure for the machine field in struct function. */
1409
1410 struct stack_local_entry GTY(())
1411 {
1412 unsigned short mode;
1413 unsigned short n;
1414 rtx rtl;
1415 struct stack_local_entry *next;
1416 };
1417
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1420
1421 [arguments]
1422 <- ARG_POINTER
1423 saved pc
1424
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1427 [saved regs]
1428
1429 [padding1] \
1430 )
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1433 [frame] (
1434 )
1435 [padding2] /
1436 */
1437 struct ix86_frame
1438 {
1439 int nregs;
1440 int padding1;
1441 int va_arg_size;
1442 HOST_WIDE_INT frame;
1443 int padding2;
1444 int outgoing_arguments_size;
1445 int red_zone_size;
1446
1447 HOST_WIDE_INT to_allocate;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset;
1450 HOST_WIDE_INT hard_frame_pointer_offset;
1451 HOST_WIDE_INT stack_pointer_offset;
1452
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov;
1456 };
1457
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel;
1460 /* Asm dialect. */
1461 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1462 /* TLS dialects. */
1463 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1464
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath;
1467
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune;
1470
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch;
1473
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse;
1476
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm;
1479
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer;
1482 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1483
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary;
1486
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost;
1489
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1492
1493 int ix86_section_threshold = 65536;
1494
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix[16];
1497 int internal_label_prefix_len;
1498
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1503
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1507 {
1508 X86_64_NO_CLASS,
1509 X86_64_INTEGER_CLASS,
1510 X86_64_INTEGERSI_CLASS,
1511 X86_64_SSE_CLASS,
1512 X86_64_SSESF_CLASS,
1513 X86_64_SSEDF_CLASS,
1514 X86_64_SSEUP_CLASS,
1515 X86_64_X87_CLASS,
1516 X86_64_X87UP_CLASS,
1517 X86_64_COMPLEX_X87_CLASS,
1518 X86_64_MEMORY_CLASS
1519 };
1520 static const char * const x86_64_reg_class_name[] =
1521 {
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1524 };
1525
1526 #define MAX_CLASSES 4
1527
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1530 static bool ext_80387_constants_init = 0;
1531
1532 \f
1533 static struct machine_function * ix86_init_machine_status (void);
1534 static rtx ix86_function_value (tree, tree, bool);
1535 static int ix86_function_regparm (tree, tree);
1536 static void ix86_compute_frame_layout (struct ix86_frame *);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1538 rtx, rtx, int);
1539
1540 \f
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1542 in memory. */
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1545 #endif
1546
1547 /* Implement TARGET_HANDLE_OPTION. */
1548
1549 static bool
1550 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1551 {
1552 switch (code)
1553 {
1554 case OPT_m3dnow:
1555 if (!value)
1556 {
1557 target_flags &= ~MASK_3DNOW_A;
1558 target_flags_explicit |= MASK_3DNOW_A;
1559 }
1560 return true;
1561
1562 case OPT_mmmx:
1563 if (!value)
1564 {
1565 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1566 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1567 }
1568 return true;
1569
1570 case OPT_msse:
1571 if (!value)
1572 {
1573 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1574 | MASK_SSE4_1 | MASK_SSE4A);
1575 target_flags_explicit |= (MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1576 | MASK_SSE4_1 | MASK_SSE4A);
1577 }
1578 return true;
1579
1580 case OPT_msse2:
1581 if (!value)
1582 {
1583 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4_1
1584 | MASK_SSE4A);
1585 target_flags_explicit |= (MASK_SSE3 | MASK_SSSE3
1586 | MASK_SSE4_1 | MASK_SSE4A);
1587 }
1588 return true;
1589
1590 case OPT_msse3:
1591 if (!value)
1592 {
1593 target_flags &= ~(MASK_SSSE3 | MASK_SSE4_1 | MASK_SSE4A);
1594 target_flags_explicit |= (MASK_SSSE3 | MASK_SSE4_1
1595 | MASK_SSE4A);
1596 }
1597 return true;
1598
1599 case OPT_mssse3:
1600 if (!value)
1601 {
1602 target_flags &= ~(MASK_SSE4_1 | MASK_SSE4A);
1603 target_flags_explicit |= MASK_SSE4_1 | MASK_SSE4A;
1604 }
1605 return true;
1606
1607 case OPT_msse4_1:
1608 if (!value)
1609 {
1610 target_flags &= ~MASK_SSE4A;
1611 target_flags_explicit |= MASK_SSE4A;
1612 }
1613 return true;
1614
1615 case OPT_msse4a:
1616 if (!value)
1617 {
1618 target_flags &= ~MASK_SSE4_1;
1619 target_flags_explicit |= MASK_SSE4_1;
1620 }
1621 return true;
1622
1623 default:
1624 return true;
1625 }
1626 }
1627
1628 /* Sometimes certain combinations of command options do not make
1629 sense on a particular target machine. You can define a macro
1630 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1631 defined, is executed once just after all the command options have
1632 been parsed.
1633
1634 Don't use this macro to turn on various extra optimizations for
1635 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1636
1637 void
1638 override_options (void)
1639 {
1640 int i;
1641 int ix86_tune_defaulted = 0;
1642 unsigned int ix86_arch_mask, ix86_tune_mask;
1643
1644 /* Comes from final.c -- no real reason to change it. */
1645 #define MAX_CODE_ALIGN 16
1646
1647 static struct ptt
1648 {
1649 const struct processor_costs *cost; /* Processor costs */
1650 const int target_enable; /* Target flags to enable. */
1651 const int target_disable; /* Target flags to disable. */
1652 const int align_loop; /* Default alignments. */
1653 const int align_loop_max_skip;
1654 const int align_jump;
1655 const int align_jump_max_skip;
1656 const int align_func;
1657 }
1658 const processor_target_table[PROCESSOR_max] =
1659 {
1660 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1661 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1662 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1663 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1664 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1665 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1666 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1667 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1668 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1669 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1670 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1671 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1672 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1673 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1674 };
1675
1676 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1677 static struct pta
1678 {
1679 const char *const name; /* processor name or nickname. */
1680 const enum processor_type processor;
1681 const enum pta_flags
1682 {
1683 PTA_SSE = 1 << 0,
1684 PTA_SSE2 = 1 << 1,
1685 PTA_SSE3 = 1 << 2,
1686 PTA_MMX = 1 << 3,
1687 PTA_PREFETCH_SSE = 1 << 4,
1688 PTA_3DNOW = 1 << 5,
1689 PTA_3DNOW_A = 1 << 6,
1690 PTA_64BIT = 1 << 7,
1691 PTA_SSSE3 = 1 << 8,
1692 PTA_CX16 = 1 << 9,
1693 PTA_POPCNT = 1 << 10,
1694 PTA_ABM = 1 << 11,
1695 PTA_SSE4A = 1 << 12,
1696 PTA_NO_SAHF = 1 << 13,
1697 PTA_SSE4_1 = 1 << 14
1698 } flags;
1699 }
1700 const processor_alias_table[] =
1701 {
1702 {"i386", PROCESSOR_I386, 0},
1703 {"i486", PROCESSOR_I486, 0},
1704 {"i586", PROCESSOR_PENTIUM, 0},
1705 {"pentium", PROCESSOR_PENTIUM, 0},
1706 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1707 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1708 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1709 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1710 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1711 {"i686", PROCESSOR_PENTIUMPRO, 0},
1712 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1713 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1714 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1715 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1716 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1717 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1718 | PTA_MMX | PTA_PREFETCH_SSE},
1719 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1720 | PTA_MMX | PTA_PREFETCH_SSE},
1721 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1722 | PTA_MMX | PTA_PREFETCH_SSE},
1723 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1724 | PTA_MMX | PTA_PREFETCH_SSE
1725 | PTA_CX16 | PTA_NO_SAHF},
1726 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1727 | PTA_64BIT | PTA_MMX
1728 | PTA_PREFETCH_SSE | PTA_CX16},
1729 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1730 | PTA_3DNOW_A},
1731 {"k6", PROCESSOR_K6, PTA_MMX},
1732 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1733 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1734 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1735 | PTA_3DNOW_A},
1736 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1737 | PTA_3DNOW | PTA_3DNOW_A},
1738 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1739 | PTA_3DNOW_A | PTA_SSE},
1740 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1741 | PTA_3DNOW_A | PTA_SSE},
1742 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1743 | PTA_3DNOW_A | PTA_SSE},
1744 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1745 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1746 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1747 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1748 | PTA_NO_SAHF},
1749 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1750 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1751 | PTA_SSE3 | PTA_NO_SAHF},
1752 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1753 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1754 | PTA_SSE2 | PTA_NO_SAHF},
1755 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1756 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1757 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
1758 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1759 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1760 | PTA_SSE2 | PTA_NO_SAHF},
1761 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1762 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1763 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
1764 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1765 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1766 | PTA_SSE2 | PTA_NO_SAHF},
1767 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1768 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1769 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1770 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1771 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1772 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1773 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1774 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1775 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1776 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1777 };
1778
1779 int const pta_size = ARRAY_SIZE (processor_alias_table);
1780
1781 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1782 SUBTARGET_OVERRIDE_OPTIONS;
1783 #endif
1784
1785 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1786 SUBSUBTARGET_OVERRIDE_OPTIONS;
1787 #endif
1788
1789 /* -fPIC is the default for x86_64. */
1790 if (TARGET_MACHO && TARGET_64BIT)
1791 flag_pic = 2;
1792
1793 /* Set the default values for switches whose default depends on TARGET_64BIT
1794 in case they weren't overwritten by command line options. */
1795 if (TARGET_64BIT)
1796 {
1797 /* Mach-O doesn't support omitting the frame pointer for now. */
1798 if (flag_omit_frame_pointer == 2)
1799 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1800 if (flag_asynchronous_unwind_tables == 2)
1801 flag_asynchronous_unwind_tables = 1;
1802 if (flag_pcc_struct_return == 2)
1803 flag_pcc_struct_return = 0;
1804 }
1805 else
1806 {
1807 if (flag_omit_frame_pointer == 2)
1808 flag_omit_frame_pointer = 0;
1809 if (flag_asynchronous_unwind_tables == 2)
1810 flag_asynchronous_unwind_tables = 0;
1811 if (flag_pcc_struct_return == 2)
1812 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1813 }
1814
1815 /* Need to check -mtune=generic first. */
1816 if (ix86_tune_string)
1817 {
1818 if (!strcmp (ix86_tune_string, "generic")
1819 || !strcmp (ix86_tune_string, "i686")
1820 /* As special support for cross compilers we read -mtune=native
1821 as -mtune=generic. With native compilers we won't see the
1822 -mtune=native, as it was changed by the driver. */
1823 || !strcmp (ix86_tune_string, "native"))
1824 {
1825 if (TARGET_64BIT)
1826 ix86_tune_string = "generic64";
1827 else
1828 ix86_tune_string = "generic32";
1829 }
1830 else if (!strncmp (ix86_tune_string, "generic", 7))
1831 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1832 }
1833 else
1834 {
1835 if (ix86_arch_string)
1836 ix86_tune_string = ix86_arch_string;
1837 if (!ix86_tune_string)
1838 {
1839 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1840 ix86_tune_defaulted = 1;
1841 }
1842
1843 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1844 need to use a sensible tune option. */
1845 if (!strcmp (ix86_tune_string, "generic")
1846 || !strcmp (ix86_tune_string, "x86-64")
1847 || !strcmp (ix86_tune_string, "i686"))
1848 {
1849 if (TARGET_64BIT)
1850 ix86_tune_string = "generic64";
1851 else
1852 ix86_tune_string = "generic32";
1853 }
1854 }
1855 if (ix86_stringop_string)
1856 {
1857 if (!strcmp (ix86_stringop_string, "rep_byte"))
1858 stringop_alg = rep_prefix_1_byte;
1859 else if (!strcmp (ix86_stringop_string, "libcall"))
1860 stringop_alg = libcall;
1861 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1862 stringop_alg = rep_prefix_4_byte;
1863 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1864 stringop_alg = rep_prefix_8_byte;
1865 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1866 stringop_alg = loop_1_byte;
1867 else if (!strcmp (ix86_stringop_string, "loop"))
1868 stringop_alg = loop;
1869 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1870 stringop_alg = unrolled_loop;
1871 else
1872 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1873 }
1874 if (!strcmp (ix86_tune_string, "x86-64"))
1875 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1876 "-mtune=generic instead as appropriate.");
1877
1878 if (!ix86_arch_string)
1879 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1880 if (!strcmp (ix86_arch_string, "generic"))
1881 error ("generic CPU can be used only for -mtune= switch");
1882 if (!strncmp (ix86_arch_string, "generic", 7))
1883 error ("bad value (%s) for -march= switch", ix86_arch_string);
1884
1885 if (ix86_cmodel_string != 0)
1886 {
1887 if (!strcmp (ix86_cmodel_string, "small"))
1888 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1889 else if (!strcmp (ix86_cmodel_string, "medium"))
1890 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1891 else if (!strcmp (ix86_cmodel_string, "large"))
1892 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1893 else if (flag_pic)
1894 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1895 else if (!strcmp (ix86_cmodel_string, "32"))
1896 ix86_cmodel = CM_32;
1897 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1898 ix86_cmodel = CM_KERNEL;
1899 else
1900 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1901 }
1902 else
1903 {
1904 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1905 use of rip-relative addressing. This eliminates fixups that
1906 would otherwise be needed if this object is to be placed in a
1907 DLL, and is essentially just as efficient as direct addressing. */
1908 if (TARGET_64BIT_MS_ABI)
1909 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1910 else if (TARGET_64BIT)
1911 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1912 else
1913 ix86_cmodel = CM_32;
1914 }
1915 if (ix86_asm_string != 0)
1916 {
1917 if (! TARGET_MACHO
1918 && !strcmp (ix86_asm_string, "intel"))
1919 ix86_asm_dialect = ASM_INTEL;
1920 else if (!strcmp (ix86_asm_string, "att"))
1921 ix86_asm_dialect = ASM_ATT;
1922 else
1923 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1924 }
1925 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1926 error ("code model %qs not supported in the %s bit mode",
1927 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1928 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1929 sorry ("%i-bit mode not compiled in",
1930 (target_flags & MASK_64BIT) ? 64 : 32);
1931
1932 for (i = 0; i < pta_size; i++)
1933 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1934 {
1935 ix86_arch = processor_alias_table[i].processor;
1936 /* Default cpu tuning to the architecture. */
1937 ix86_tune = ix86_arch;
1938 if (processor_alias_table[i].flags & PTA_MMX
1939 && !(target_flags_explicit & MASK_MMX))
1940 target_flags |= MASK_MMX;
1941 if (processor_alias_table[i].flags & PTA_3DNOW
1942 && !(target_flags_explicit & MASK_3DNOW))
1943 target_flags |= MASK_3DNOW;
1944 if (processor_alias_table[i].flags & PTA_3DNOW_A
1945 && !(target_flags_explicit & MASK_3DNOW_A))
1946 target_flags |= MASK_3DNOW_A;
1947 if (processor_alias_table[i].flags & PTA_SSE
1948 && !(target_flags_explicit & MASK_SSE))
1949 target_flags |= MASK_SSE;
1950 if (processor_alias_table[i].flags & PTA_SSE2
1951 && !(target_flags_explicit & MASK_SSE2))
1952 target_flags |= MASK_SSE2;
1953 if (processor_alias_table[i].flags & PTA_SSE3
1954 && !(target_flags_explicit & MASK_SSE3))
1955 target_flags |= MASK_SSE3;
1956 if (processor_alias_table[i].flags & PTA_SSSE3
1957 && !(target_flags_explicit & MASK_SSSE3))
1958 target_flags |= MASK_SSSE3;
1959 if (processor_alias_table[i].flags & PTA_SSE4_1
1960 && !(target_flags_explicit & MASK_SSE4_1))
1961 target_flags |= MASK_SSE4_1;
1962 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1963 x86_prefetch_sse = true;
1964 if (processor_alias_table[i].flags & PTA_CX16)
1965 x86_cmpxchg16b = true;
1966 if (processor_alias_table[i].flags & PTA_POPCNT
1967 && !(target_flags_explicit & MASK_POPCNT))
1968 target_flags |= MASK_POPCNT;
1969 if (processor_alias_table[i].flags & PTA_ABM
1970 && !(target_flags_explicit & MASK_ABM))
1971 target_flags |= MASK_ABM;
1972 if (processor_alias_table[i].flags & PTA_SSE4A
1973 && !(target_flags_explicit & MASK_SSE4A))
1974 target_flags |= MASK_SSE4A;
1975 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1976 x86_sahf = true;
1977 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1978 error ("CPU you selected does not support x86-64 "
1979 "instruction set");
1980 break;
1981 }
1982
1983 if (i == pta_size)
1984 error ("bad value (%s) for -march= switch", ix86_arch_string);
1985
1986 ix86_arch_mask = 1u << ix86_arch;
1987 for (i = 0; i < X86_ARCH_LAST; ++i)
1988 ix86_arch_features[i] &= ix86_arch_mask;
1989
1990 for (i = 0; i < pta_size; i++)
1991 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1992 {
1993 ix86_tune = processor_alias_table[i].processor;
1994 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1995 {
1996 if (ix86_tune_defaulted)
1997 {
1998 ix86_tune_string = "x86-64";
1999 for (i = 0; i < pta_size; i++)
2000 if (! strcmp (ix86_tune_string,
2001 processor_alias_table[i].name))
2002 break;
2003 ix86_tune = processor_alias_table[i].processor;
2004 }
2005 else
2006 error ("CPU you selected does not support x86-64 "
2007 "instruction set");
2008 }
2009 /* Intel CPUs have always interpreted SSE prefetch instructions as
2010 NOPs; so, we can enable SSE prefetch instructions even when
2011 -mtune (rather than -march) points us to a processor that has them.
2012 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2013 higher processors. */
2014 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2015 x86_prefetch_sse = true;
2016 break;
2017 }
2018 if (i == pta_size)
2019 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2020
2021 ix86_tune_mask = 1u << ix86_tune;
2022 for (i = 0; i < X86_TUNE_LAST; ++i)
2023 ix86_tune_features[i] &= ix86_tune_mask;
2024
2025 if (optimize_size)
2026 ix86_cost = &size_cost;
2027 else
2028 ix86_cost = processor_target_table[ix86_tune].cost;
2029 target_flags |= processor_target_table[ix86_tune].target_enable;
2030 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2031
2032 /* Arrange to set up i386_stack_locals for all functions. */
2033 init_machine_status = ix86_init_machine_status;
2034
2035 /* Validate -mregparm= value. */
2036 if (ix86_regparm_string)
2037 {
2038 if (TARGET_64BIT)
2039 warning (0, "-mregparm is ignored in 64-bit mode");
2040 i = atoi (ix86_regparm_string);
2041 if (i < 0 || i > REGPARM_MAX)
2042 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2043 else
2044 ix86_regparm = i;
2045 }
2046 if (TARGET_64BIT)
2047 ix86_regparm = REGPARM_MAX;
2048
2049 /* If the user has provided any of the -malign-* options,
2050 warn and use that value only if -falign-* is not set.
2051 Remove this code in GCC 3.2 or later. */
2052 if (ix86_align_loops_string)
2053 {
2054 warning (0, "-malign-loops is obsolete, use -falign-loops");
2055 if (align_loops == 0)
2056 {
2057 i = atoi (ix86_align_loops_string);
2058 if (i < 0 || i > MAX_CODE_ALIGN)
2059 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2060 else
2061 align_loops = 1 << i;
2062 }
2063 }
2064
2065 if (ix86_align_jumps_string)
2066 {
2067 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2068 if (align_jumps == 0)
2069 {
2070 i = atoi (ix86_align_jumps_string);
2071 if (i < 0 || i > MAX_CODE_ALIGN)
2072 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2073 else
2074 align_jumps = 1 << i;
2075 }
2076 }
2077
2078 if (ix86_align_funcs_string)
2079 {
2080 warning (0, "-malign-functions is obsolete, use -falign-functions");
2081 if (align_functions == 0)
2082 {
2083 i = atoi (ix86_align_funcs_string);
2084 if (i < 0 || i > MAX_CODE_ALIGN)
2085 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2086 else
2087 align_functions = 1 << i;
2088 }
2089 }
2090
2091 /* Default align_* from the processor table. */
2092 if (align_loops == 0)
2093 {
2094 align_loops = processor_target_table[ix86_tune].align_loop;
2095 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2096 }
2097 if (align_jumps == 0)
2098 {
2099 align_jumps = processor_target_table[ix86_tune].align_jump;
2100 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2101 }
2102 if (align_functions == 0)
2103 {
2104 align_functions = processor_target_table[ix86_tune].align_func;
2105 }
2106
2107 /* Validate -mbranch-cost= value, or provide default. */
2108 ix86_branch_cost = ix86_cost->branch_cost;
2109 if (ix86_branch_cost_string)
2110 {
2111 i = atoi (ix86_branch_cost_string);
2112 if (i < 0 || i > 5)
2113 error ("-mbranch-cost=%d is not between 0 and 5", i);
2114 else
2115 ix86_branch_cost = i;
2116 }
2117 if (ix86_section_threshold_string)
2118 {
2119 i = atoi (ix86_section_threshold_string);
2120 if (i < 0)
2121 error ("-mlarge-data-threshold=%d is negative", i);
2122 else
2123 ix86_section_threshold = i;
2124 }
2125
2126 if (ix86_tls_dialect_string)
2127 {
2128 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2129 ix86_tls_dialect = TLS_DIALECT_GNU;
2130 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2131 ix86_tls_dialect = TLS_DIALECT_GNU2;
2132 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2133 ix86_tls_dialect = TLS_DIALECT_SUN;
2134 else
2135 error ("bad value (%s) for -mtls-dialect= switch",
2136 ix86_tls_dialect_string);
2137 }
2138
2139 if (ix87_precision_string)
2140 {
2141 i = atoi (ix87_precision_string);
2142 if (i != 32 && i != 64 && i != 80)
2143 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2144 }
2145
2146 /* Keep nonleaf frame pointers. */
2147 if (flag_omit_frame_pointer)
2148 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2149 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2150 flag_omit_frame_pointer = 1;
2151
2152 /* If we're doing fast math, we don't care about comparison order
2153 wrt NaNs. This lets us use a shorter comparison sequence. */
2154 if (flag_finite_math_only)
2155 target_flags &= ~MASK_IEEE_FP;
2156
2157 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2158 since the insns won't need emulation. */
2159 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2160 target_flags &= ~MASK_NO_FANCY_MATH_387;
2161
2162 /* Likewise, if the target doesn't have a 387, or we've specified
2163 software floating point, don't use 387 inline intrinsics. */
2164 if (!TARGET_80387)
2165 target_flags |= MASK_NO_FANCY_MATH_387;
2166
2167 /* Turn on SSSE3 builtins for -msse4.1. */
2168 if (TARGET_SSE4_1)
2169 target_flags |= MASK_SSSE3;
2170
2171 /* Turn on SSE3 builtins for -mssse3. */
2172 if (TARGET_SSSE3)
2173 target_flags |= MASK_SSE3;
2174
2175 /* Turn on SSE3 builtins for -msse4a. */
2176 if (TARGET_SSE4A)
2177 target_flags |= MASK_SSE3;
2178
2179 /* Turn on SSE2 builtins for -msse3. */
2180 if (TARGET_SSE3)
2181 target_flags |= MASK_SSE2;
2182
2183 /* Turn on SSE builtins for -msse2. */
2184 if (TARGET_SSE2)
2185 target_flags |= MASK_SSE;
2186
2187 /* Turn on MMX builtins for -msse. */
2188 if (TARGET_SSE)
2189 {
2190 target_flags |= MASK_MMX & ~target_flags_explicit;
2191 x86_prefetch_sse = true;
2192 }
2193
2194 /* Turn on MMX builtins for 3Dnow. */
2195 if (TARGET_3DNOW)
2196 target_flags |= MASK_MMX;
2197
2198 /* Turn on POPCNT builtins for -mabm. */
2199 if (TARGET_ABM)
2200 target_flags |= MASK_POPCNT;
2201
2202 if (TARGET_64BIT)
2203 {
2204 if (TARGET_RTD)
2205 warning (0, "-mrtd is ignored in 64bit mode");
2206
2207 /* Enable by default the SSE and MMX builtins. Do allow the user to
2208 explicitly disable any of these. In particular, disabling SSE and
2209 MMX for kernel code is extremely useful. */
2210 target_flags
2211 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | TARGET_SUBTARGET64_DEFAULT)
2212 & ~target_flags_explicit);
2213 }
2214 else
2215 {
2216 /* i386 ABI does not specify red zone. It still makes sense to use it
2217 when programmer takes care to stack from being destroyed. */
2218 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2219 target_flags |= MASK_NO_RED_ZONE;
2220 }
2221
2222 /* Validate -mpreferred-stack-boundary= value, or provide default.
2223 The default of 128 bits is for Pentium III's SSE __m128. We can't
2224 change it because of optimize_size. Otherwise, we can't mix object
2225 files compiled with -Os and -On. */
2226 ix86_preferred_stack_boundary = 128;
2227 if (ix86_preferred_stack_boundary_string)
2228 {
2229 i = atoi (ix86_preferred_stack_boundary_string);
2230 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2231 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2232 TARGET_64BIT ? 4 : 2);
2233 else
2234 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2235 }
2236
2237 /* Accept -msseregparm only if at least SSE support is enabled. */
2238 if (TARGET_SSEREGPARM
2239 && ! TARGET_SSE)
2240 error ("-msseregparm used without SSE enabled");
2241
2242 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2243 if (ix86_fpmath_string != 0)
2244 {
2245 if (! strcmp (ix86_fpmath_string, "387"))
2246 ix86_fpmath = FPMATH_387;
2247 else if (! strcmp (ix86_fpmath_string, "sse"))
2248 {
2249 if (!TARGET_SSE)
2250 {
2251 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2252 ix86_fpmath = FPMATH_387;
2253 }
2254 else
2255 ix86_fpmath = FPMATH_SSE;
2256 }
2257 else if (! strcmp (ix86_fpmath_string, "387,sse")
2258 || ! strcmp (ix86_fpmath_string, "sse,387"))
2259 {
2260 if (!TARGET_SSE)
2261 {
2262 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2263 ix86_fpmath = FPMATH_387;
2264 }
2265 else if (!TARGET_80387)
2266 {
2267 warning (0, "387 instruction set disabled, using SSE arithmetics");
2268 ix86_fpmath = FPMATH_SSE;
2269 }
2270 else
2271 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2272 }
2273 else
2274 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2275 }
2276
2277 /* If the i387 is disabled, then do not return values in it. */
2278 if (!TARGET_80387)
2279 target_flags &= ~MASK_FLOAT_RETURNS;
2280
2281 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2282 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2283 && !optimize_size)
2284 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2285
2286 /* ??? Unwind info is not correct around the CFG unless either a frame
2287 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2288 unwind info generation to be aware of the CFG and propagating states
2289 around edges. */
2290 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2291 || flag_exceptions || flag_non_call_exceptions)
2292 && flag_omit_frame_pointer
2293 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2294 {
2295 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2296 warning (0, "unwind tables currently require either a frame pointer "
2297 "or -maccumulate-outgoing-args for correctness");
2298 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2299 }
2300
2301 /* For sane SSE instruction set generation we need fcomi instruction.
2302 It is safe to enable all CMOVE instructions. */
2303 if (TARGET_SSE)
2304 TARGET_CMOVE = 1;
2305
2306 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2307 {
2308 char *p;
2309 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2310 p = strchr (internal_label_prefix, 'X');
2311 internal_label_prefix_len = p - internal_label_prefix;
2312 *p = '\0';
2313 }
2314
2315 /* When scheduling description is not available, disable scheduler pass
2316 so it won't slow down the compilation and make x87 code slower. */
2317 if (!TARGET_SCHEDULE)
2318 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2319
2320 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2321 set_param_value ("simultaneous-prefetches",
2322 ix86_cost->simultaneous_prefetches);
2323 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2324 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2325 }
2326 \f
2327 /* Return true if this goes in large data/bss. */
2328
2329 static bool
2330 ix86_in_large_data_p (tree exp)
2331 {
2332 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2333 return false;
2334
2335 /* Functions are never large data. */
2336 if (TREE_CODE (exp) == FUNCTION_DECL)
2337 return false;
2338
2339 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2340 {
2341 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2342 if (strcmp (section, ".ldata") == 0
2343 || strcmp (section, ".lbss") == 0)
2344 return true;
2345 return false;
2346 }
2347 else
2348 {
2349 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2350
2351 /* If this is an incomplete type with size 0, then we can't put it
2352 in data because it might be too big when completed. */
2353 if (!size || size > ix86_section_threshold)
2354 return true;
2355 }
2356
2357 return false;
2358 }
2359
2360 /* Switch to the appropriate section for output of DECL.
2361 DECL is either a `VAR_DECL' node or a constant of some sort.
2362 RELOC indicates whether forming the initial value of DECL requires
2363 link-time relocations. */
2364
2365 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2366 ATTRIBUTE_UNUSED;
2367
2368 static section *
2369 x86_64_elf_select_section (tree decl, int reloc,
2370 unsigned HOST_WIDE_INT align)
2371 {
2372 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2373 && ix86_in_large_data_p (decl))
2374 {
2375 const char *sname = NULL;
2376 unsigned int flags = SECTION_WRITE;
2377 switch (categorize_decl_for_section (decl, reloc))
2378 {
2379 case SECCAT_DATA:
2380 sname = ".ldata";
2381 break;
2382 case SECCAT_DATA_REL:
2383 sname = ".ldata.rel";
2384 break;
2385 case SECCAT_DATA_REL_LOCAL:
2386 sname = ".ldata.rel.local";
2387 break;
2388 case SECCAT_DATA_REL_RO:
2389 sname = ".ldata.rel.ro";
2390 break;
2391 case SECCAT_DATA_REL_RO_LOCAL:
2392 sname = ".ldata.rel.ro.local";
2393 break;
2394 case SECCAT_BSS:
2395 sname = ".lbss";
2396 flags |= SECTION_BSS;
2397 break;
2398 case SECCAT_RODATA:
2399 case SECCAT_RODATA_MERGE_STR:
2400 case SECCAT_RODATA_MERGE_STR_INIT:
2401 case SECCAT_RODATA_MERGE_CONST:
2402 sname = ".lrodata";
2403 flags = 0;
2404 break;
2405 case SECCAT_SRODATA:
2406 case SECCAT_SDATA:
2407 case SECCAT_SBSS:
2408 gcc_unreachable ();
2409 case SECCAT_TEXT:
2410 case SECCAT_TDATA:
2411 case SECCAT_TBSS:
2412 /* We don't split these for medium model. Place them into
2413 default sections and hope for best. */
2414 break;
2415 }
2416 if (sname)
2417 {
2418 /* We might get called with string constants, but get_named_section
2419 doesn't like them as they are not DECLs. Also, we need to set
2420 flags in that case. */
2421 if (!DECL_P (decl))
2422 return get_section (sname, flags, NULL);
2423 return get_named_section (decl, sname, reloc);
2424 }
2425 }
2426 return default_elf_select_section (decl, reloc, align);
2427 }
2428
2429 /* Build up a unique section name, expressed as a
2430 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2431 RELOC indicates whether the initial value of EXP requires
2432 link-time relocations. */
2433
2434 static void ATTRIBUTE_UNUSED
2435 x86_64_elf_unique_section (tree decl, int reloc)
2436 {
2437 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2438 && ix86_in_large_data_p (decl))
2439 {
2440 const char *prefix = NULL;
2441 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2442 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2443
2444 switch (categorize_decl_for_section (decl, reloc))
2445 {
2446 case SECCAT_DATA:
2447 case SECCAT_DATA_REL:
2448 case SECCAT_DATA_REL_LOCAL:
2449 case SECCAT_DATA_REL_RO:
2450 case SECCAT_DATA_REL_RO_LOCAL:
2451 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2452 break;
2453 case SECCAT_BSS:
2454 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2455 break;
2456 case SECCAT_RODATA:
2457 case SECCAT_RODATA_MERGE_STR:
2458 case SECCAT_RODATA_MERGE_STR_INIT:
2459 case SECCAT_RODATA_MERGE_CONST:
2460 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2461 break;
2462 case SECCAT_SRODATA:
2463 case SECCAT_SDATA:
2464 case SECCAT_SBSS:
2465 gcc_unreachable ();
2466 case SECCAT_TEXT:
2467 case SECCAT_TDATA:
2468 case SECCAT_TBSS:
2469 /* We don't split these for medium model. Place them into
2470 default sections and hope for best. */
2471 break;
2472 }
2473 if (prefix)
2474 {
2475 const char *name;
2476 size_t nlen, plen;
2477 char *string;
2478 plen = strlen (prefix);
2479
2480 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2481 name = targetm.strip_name_encoding (name);
2482 nlen = strlen (name);
2483
2484 string = alloca (nlen + plen + 1);
2485 memcpy (string, prefix, plen);
2486 memcpy (string + plen, name, nlen + 1);
2487
2488 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2489 return;
2490 }
2491 }
2492 default_unique_section (decl, reloc);
2493 }
2494
2495 #ifdef COMMON_ASM_OP
2496 /* This says how to output assembler code to declare an
2497 uninitialized external linkage data object.
2498
2499 For medium model x86-64 we need to use .largecomm opcode for
2500 large objects. */
2501 void
2502 x86_elf_aligned_common (FILE *file,
2503 const char *name, unsigned HOST_WIDE_INT size,
2504 int align)
2505 {
2506 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2507 && size > (unsigned int)ix86_section_threshold)
2508 fprintf (file, ".largecomm\t");
2509 else
2510 fprintf (file, "%s", COMMON_ASM_OP);
2511 assemble_name (file, name);
2512 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2513 size, align / BITS_PER_UNIT);
2514 }
2515 #endif
2516
2517 /* Utility function for targets to use in implementing
2518 ASM_OUTPUT_ALIGNED_BSS. */
2519
2520 void
2521 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2522 const char *name, unsigned HOST_WIDE_INT size,
2523 int align)
2524 {
2525 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2526 && size > (unsigned int)ix86_section_threshold)
2527 switch_to_section (get_named_section (decl, ".lbss", 0));
2528 else
2529 switch_to_section (bss_section);
2530 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2531 #ifdef ASM_DECLARE_OBJECT_NAME
2532 last_assemble_variable_decl = decl;
2533 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2534 #else
2535 /* Standard thing is just output label for the object. */
2536 ASM_OUTPUT_LABEL (file, name);
2537 #endif /* ASM_DECLARE_OBJECT_NAME */
2538 ASM_OUTPUT_SKIP (file, size ? size : 1);
2539 }
2540 \f
2541 void
2542 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2543 {
2544 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2545 make the problem with not enough registers even worse. */
2546 #ifdef INSN_SCHEDULING
2547 if (level > 1)
2548 flag_schedule_insns = 0;
2549 #endif
2550
2551 if (TARGET_MACHO)
2552 /* The Darwin libraries never set errno, so we might as well
2553 avoid calling them when that's the only reason we would. */
2554 flag_errno_math = 0;
2555
2556 /* The default values of these switches depend on the TARGET_64BIT
2557 that is not known at this moment. Mark these values with 2 and
2558 let user the to override these. In case there is no command line option
2559 specifying them, we will set the defaults in override_options. */
2560 if (optimize >= 1)
2561 flag_omit_frame_pointer = 2;
2562 flag_pcc_struct_return = 2;
2563 flag_asynchronous_unwind_tables = 2;
2564 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2565 SUBTARGET_OPTIMIZATION_OPTIONS;
2566 #endif
2567 }
2568 \f
2569 /* Decide whether we can make a sibling call to a function. DECL is the
2570 declaration of the function being targeted by the call and EXP is the
2571 CALL_EXPR representing the call. */
2572
2573 static bool
2574 ix86_function_ok_for_sibcall (tree decl, tree exp)
2575 {
2576 tree func;
2577 rtx a, b;
2578
2579 /* If we are generating position-independent code, we cannot sibcall
2580 optimize any indirect call, or a direct call to a global function,
2581 as the PLT requires %ebx be live. */
2582 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2583 return false;
2584
2585 if (decl)
2586 func = decl;
2587 else
2588 {
2589 func = TREE_TYPE (CALL_EXPR_FN (exp));
2590 if (POINTER_TYPE_P (func))
2591 func = TREE_TYPE (func);
2592 }
2593
2594 /* Check that the return value locations are the same. Like
2595 if we are returning floats on the 80387 register stack, we cannot
2596 make a sibcall from a function that doesn't return a float to a
2597 function that does or, conversely, from a function that does return
2598 a float to a function that doesn't; the necessary stack adjustment
2599 would not be executed. This is also the place we notice
2600 differences in the return value ABI. Note that it is ok for one
2601 of the functions to have void return type as long as the return
2602 value of the other is passed in a register. */
2603 a = ix86_function_value (TREE_TYPE (exp), func, false);
2604 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2605 cfun->decl, false);
2606 if (STACK_REG_P (a) || STACK_REG_P (b))
2607 {
2608 if (!rtx_equal_p (a, b))
2609 return false;
2610 }
2611 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2612 ;
2613 else if (!rtx_equal_p (a, b))
2614 return false;
2615
2616 /* If this call is indirect, we'll need to be able to use a call-clobbered
2617 register for the address of the target function. Make sure that all
2618 such registers are not used for passing parameters. */
2619 if (!decl && !TARGET_64BIT)
2620 {
2621 tree type;
2622
2623 /* We're looking at the CALL_EXPR, we need the type of the function. */
2624 type = CALL_EXPR_FN (exp); /* pointer expression */
2625 type = TREE_TYPE (type); /* pointer type */
2626 type = TREE_TYPE (type); /* function type */
2627
2628 if (ix86_function_regparm (type, NULL) >= 3)
2629 {
2630 /* ??? Need to count the actual number of registers to be used,
2631 not the possible number of registers. Fix later. */
2632 return false;
2633 }
2634 }
2635
2636 /* Dllimport'd functions are also called indirectly. */
2637 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2638 && decl && DECL_DLLIMPORT_P (decl)
2639 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2640 return false;
2641
2642 /* If we forced aligned the stack, then sibcalling would unalign the
2643 stack, which may break the called function. */
2644 if (cfun->machine->force_align_arg_pointer)
2645 return false;
2646
2647 /* Otherwise okay. That also includes certain types of indirect calls. */
2648 return true;
2649 }
2650
2651 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2652 calling convention attributes;
2653 arguments as in struct attribute_spec.handler. */
2654
2655 static tree
2656 ix86_handle_cconv_attribute (tree *node, tree name,
2657 tree args,
2658 int flags ATTRIBUTE_UNUSED,
2659 bool *no_add_attrs)
2660 {
2661 if (TREE_CODE (*node) != FUNCTION_TYPE
2662 && TREE_CODE (*node) != METHOD_TYPE
2663 && TREE_CODE (*node) != FIELD_DECL
2664 && TREE_CODE (*node) != TYPE_DECL)
2665 {
2666 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2667 IDENTIFIER_POINTER (name));
2668 *no_add_attrs = true;
2669 return NULL_TREE;
2670 }
2671
2672 /* Can combine regparm with all attributes but fastcall. */
2673 if (is_attribute_p ("regparm", name))
2674 {
2675 tree cst;
2676
2677 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2678 {
2679 error ("fastcall and regparm attributes are not compatible");
2680 }
2681
2682 cst = TREE_VALUE (args);
2683 if (TREE_CODE (cst) != INTEGER_CST)
2684 {
2685 warning (OPT_Wattributes,
2686 "%qs attribute requires an integer constant argument",
2687 IDENTIFIER_POINTER (name));
2688 *no_add_attrs = true;
2689 }
2690 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2691 {
2692 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2693 IDENTIFIER_POINTER (name), REGPARM_MAX);
2694 *no_add_attrs = true;
2695 }
2696
2697 if (!TARGET_64BIT
2698 && lookup_attribute (ix86_force_align_arg_pointer_string,
2699 TYPE_ATTRIBUTES (*node))
2700 && compare_tree_int (cst, REGPARM_MAX-1))
2701 {
2702 error ("%s functions limited to %d register parameters",
2703 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2704 }
2705
2706 return NULL_TREE;
2707 }
2708
2709 if (TARGET_64BIT)
2710 {
2711 /* Do not warn when emulating the MS ABI. */
2712 if (!TARGET_64BIT_MS_ABI)
2713 warning (OPT_Wattributes, "%qs attribute ignored",
2714 IDENTIFIER_POINTER (name));
2715 *no_add_attrs = true;
2716 return NULL_TREE;
2717 }
2718
2719 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2720 if (is_attribute_p ("fastcall", name))
2721 {
2722 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2723 {
2724 error ("fastcall and cdecl attributes are not compatible");
2725 }
2726 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2727 {
2728 error ("fastcall and stdcall attributes are not compatible");
2729 }
2730 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2731 {
2732 error ("fastcall and regparm attributes are not compatible");
2733 }
2734 }
2735
2736 /* Can combine stdcall with fastcall (redundant), regparm and
2737 sseregparm. */
2738 else if (is_attribute_p ("stdcall", name))
2739 {
2740 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2741 {
2742 error ("stdcall and cdecl attributes are not compatible");
2743 }
2744 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2745 {
2746 error ("stdcall and fastcall attributes are not compatible");
2747 }
2748 }
2749
2750 /* Can combine cdecl with regparm and sseregparm. */
2751 else if (is_attribute_p ("cdecl", name))
2752 {
2753 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2754 {
2755 error ("stdcall and cdecl attributes are not compatible");
2756 }
2757 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2758 {
2759 error ("fastcall and cdecl attributes are not compatible");
2760 }
2761 }
2762
2763 /* Can combine sseregparm with all attributes. */
2764
2765 return NULL_TREE;
2766 }
2767
2768 /* Return 0 if the attributes for two types are incompatible, 1 if they
2769 are compatible, and 2 if they are nearly compatible (which causes a
2770 warning to be generated). */
2771
2772 static int
2773 ix86_comp_type_attributes (tree type1, tree type2)
2774 {
2775 /* Check for mismatch of non-default calling convention. */
2776 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2777
2778 if (TREE_CODE (type1) != FUNCTION_TYPE)
2779 return 1;
2780
2781 /* Check for mismatched fastcall/regparm types. */
2782 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2783 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2784 || (ix86_function_regparm (type1, NULL)
2785 != ix86_function_regparm (type2, NULL)))
2786 return 0;
2787
2788 /* Check for mismatched sseregparm types. */
2789 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2790 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2791 return 0;
2792
2793 /* Check for mismatched return types (cdecl vs stdcall). */
2794 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2795 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2796 return 0;
2797
2798 return 1;
2799 }
2800 \f
2801 /* Return the regparm value for a function with the indicated TYPE and DECL.
2802 DECL may be NULL when calling function indirectly
2803 or considering a libcall. */
2804
2805 static int
2806 ix86_function_regparm (tree type, tree decl)
2807 {
2808 tree attr;
2809 int regparm = ix86_regparm;
2810
2811 if (TARGET_64BIT)
2812 return regparm;
2813
2814 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2815 if (attr)
2816 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2817
2818 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2819 return 2;
2820
2821 /* Use register calling convention for local functions when possible. */
2822 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2823 && flag_unit_at_a_time && !profile_flag)
2824 {
2825 struct cgraph_local_info *i = cgraph_local_info (decl);
2826 if (i && i->local)
2827 {
2828 int local_regparm, globals = 0, regno;
2829 struct function *f;
2830
2831 /* Make sure no regparm register is taken by a
2832 global register variable. */
2833 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2834 if (global_regs[local_regparm])
2835 break;
2836
2837 /* We can't use regparm(3) for nested functions as these use
2838 static chain pointer in third argument. */
2839 if (local_regparm == 3
2840 && decl_function_context (decl)
2841 && !DECL_NO_STATIC_CHAIN (decl))
2842 local_regparm = 2;
2843
2844 /* If the function realigns its stackpointer, the prologue will
2845 clobber %ecx. If we've already generated code for the callee,
2846 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2847 scanning the attributes for the self-realigning property. */
2848 f = DECL_STRUCT_FUNCTION (decl);
2849 if (local_regparm == 3
2850 && (f ? !!f->machine->force_align_arg_pointer
2851 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2852 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2853 local_regparm = 2;
2854
2855 /* Each global register variable increases register preassure,
2856 so the more global reg vars there are, the smaller regparm
2857 optimization use, unless requested by the user explicitly. */
2858 for (regno = 0; regno < 6; regno++)
2859 if (global_regs[regno])
2860 globals++;
2861 local_regparm
2862 = globals < local_regparm ? local_regparm - globals : 0;
2863
2864 if (local_regparm > regparm)
2865 regparm = local_regparm;
2866 }
2867 }
2868
2869 return regparm;
2870 }
2871
2872 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2873 DFmode (2) arguments in SSE registers for a function with the
2874 indicated TYPE and DECL. DECL may be NULL when calling function
2875 indirectly or considering a libcall. Otherwise return 0. */
2876
2877 static int
2878 ix86_function_sseregparm (tree type, tree decl)
2879 {
2880 gcc_assert (!TARGET_64BIT);
2881
2882 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2883 by the sseregparm attribute. */
2884 if (TARGET_SSEREGPARM
2885 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2886 {
2887 if (!TARGET_SSE)
2888 {
2889 if (decl)
2890 error ("Calling %qD with attribute sseregparm without "
2891 "SSE/SSE2 enabled", decl);
2892 else
2893 error ("Calling %qT with attribute sseregparm without "
2894 "SSE/SSE2 enabled", type);
2895 return 0;
2896 }
2897
2898 return 2;
2899 }
2900
2901 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2902 (and DFmode for SSE2) arguments in SSE registers. */
2903 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2904 {
2905 struct cgraph_local_info *i = cgraph_local_info (decl);
2906 if (i && i->local)
2907 return TARGET_SSE2 ? 2 : 1;
2908 }
2909
2910 return 0;
2911 }
2912
2913 /* Return true if EAX is live at the start of the function. Used by
2914 ix86_expand_prologue to determine if we need special help before
2915 calling allocate_stack_worker. */
2916
2917 static bool
2918 ix86_eax_live_at_start_p (void)
2919 {
2920 /* Cheat. Don't bother working forward from ix86_function_regparm
2921 to the function type to whether an actual argument is located in
2922 eax. Instead just look at cfg info, which is still close enough
2923 to correct at this point. This gives false positives for broken
2924 functions that might use uninitialized data that happens to be
2925 allocated in eax, but who cares? */
2926 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2927 }
2928
2929 /* Return true if TYPE has a variable argument list. */
2930
2931 static bool
2932 type_has_variadic_args_p (tree type)
2933 {
2934 tree n, t = TYPE_ARG_TYPES (type);
2935
2936 if (t == NULL)
2937 return false;
2938
2939 while ((n = TREE_CHAIN (t)) != NULL)
2940 t = n;
2941
2942 return TREE_VALUE (t) != void_type_node;
2943 }
2944
2945 /* Value is the number of bytes of arguments automatically
2946 popped when returning from a subroutine call.
2947 FUNDECL is the declaration node of the function (as a tree),
2948 FUNTYPE is the data type of the function (as a tree),
2949 or for a library call it is an identifier node for the subroutine name.
2950 SIZE is the number of bytes of arguments passed on the stack.
2951
2952 On the 80386, the RTD insn may be used to pop them if the number
2953 of args is fixed, but if the number is variable then the caller
2954 must pop them all. RTD can't be used for library calls now
2955 because the library is compiled with the Unix compiler.
2956 Use of RTD is a selectable option, since it is incompatible with
2957 standard Unix calling sequences. If the option is not selected,
2958 the caller must always pop the args.
2959
2960 The attribute stdcall is equivalent to RTD on a per module basis. */
2961
2962 int
2963 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2964 {
2965 int rtd;
2966
2967 /* None of the 64-bit ABIs pop arguments. */
2968 if (TARGET_64BIT)
2969 return 0;
2970
2971 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2972
2973 /* Cdecl functions override -mrtd, and never pop the stack. */
2974 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2975 {
2976 /* Stdcall and fastcall functions will pop the stack if not
2977 variable args. */
2978 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2979 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2980 rtd = 1;
2981
2982 if (rtd && ! type_has_variadic_args_p (funtype))
2983 return size;
2984 }
2985
2986 /* Lose any fake structure return argument if it is passed on the stack. */
2987 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2988 && !KEEP_AGGREGATE_RETURN_POINTER)
2989 {
2990 int nregs = ix86_function_regparm (funtype, fundecl);
2991 if (nregs == 0)
2992 return GET_MODE_SIZE (Pmode);
2993 }
2994
2995 return 0;
2996 }
2997 \f
2998 /* Argument support functions. */
2999
3000 /* Return true when register may be used to pass function parameters. */
3001 bool
3002 ix86_function_arg_regno_p (int regno)
3003 {
3004 int i;
3005 const int *parm_regs;
3006
3007 if (!TARGET_64BIT)
3008 {
3009 if (TARGET_MACHO)
3010 return (regno < REGPARM_MAX
3011 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3012 else
3013 return (regno < REGPARM_MAX
3014 || (TARGET_MMX && MMX_REGNO_P (regno)
3015 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3016 || (TARGET_SSE && SSE_REGNO_P (regno)
3017 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3018 }
3019
3020 if (TARGET_MACHO)
3021 {
3022 if (SSE_REGNO_P (regno) && TARGET_SSE)
3023 return true;
3024 }
3025 else
3026 {
3027 if (TARGET_SSE && SSE_REGNO_P (regno)
3028 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3029 return true;
3030 }
3031
3032 /* RAX is used as hidden argument to va_arg functions. */
3033 if (!TARGET_64BIT_MS_ABI && regno == 0)
3034 return true;
3035
3036 if (TARGET_64BIT_MS_ABI)
3037 parm_regs = x86_64_ms_abi_int_parameter_registers;
3038 else
3039 parm_regs = x86_64_int_parameter_registers;
3040 for (i = 0; i < REGPARM_MAX; i++)
3041 if (regno == parm_regs[i])
3042 return true;
3043 return false;
3044 }
3045
3046 /* Return if we do not know how to pass TYPE solely in registers. */
3047
3048 static bool
3049 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3050 {
3051 if (must_pass_in_stack_var_size_or_pad (mode, type))
3052 return true;
3053
3054 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3055 The layout_type routine is crafty and tries to trick us into passing
3056 currently unsupported vector types on the stack by using TImode. */
3057 return (!TARGET_64BIT && mode == TImode
3058 && type && TREE_CODE (type) != VECTOR_TYPE);
3059 }
3060
3061 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3062 for a call to a function whose data type is FNTYPE.
3063 For a library call, FNTYPE is 0. */
3064
3065 void
3066 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3067 tree fntype, /* tree ptr for function decl */
3068 rtx libname, /* SYMBOL_REF of library name or 0 */
3069 tree fndecl)
3070 {
3071 memset (cum, 0, sizeof (*cum));
3072
3073 /* Set up the number of registers to use for passing arguments. */
3074 cum->nregs = ix86_regparm;
3075 if (TARGET_SSE)
3076 cum->sse_nregs = SSE_REGPARM_MAX;
3077 if (TARGET_MMX)
3078 cum->mmx_nregs = MMX_REGPARM_MAX;
3079 cum->warn_sse = true;
3080 cum->warn_mmx = true;
3081 cum->maybe_vaarg = (fntype
3082 ? (!TYPE_ARG_TYPES (fntype)
3083 || type_has_variadic_args_p (fntype))
3084 : !libname);
3085
3086 if (!TARGET_64BIT)
3087 {
3088 /* If there are variable arguments, then we won't pass anything
3089 in registers in 32-bit mode. */
3090 if (cum->maybe_vaarg)
3091 {
3092 cum->nregs = 0;
3093 cum->sse_nregs = 0;
3094 cum->mmx_nregs = 0;
3095 cum->warn_sse = 0;
3096 cum->warn_mmx = 0;
3097 return;
3098 }
3099
3100 /* Use ecx and edx registers if function has fastcall attribute,
3101 else look for regparm information. */
3102 if (fntype)
3103 {
3104 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3105 {
3106 cum->nregs = 2;
3107 cum->fastcall = 1;
3108 }
3109 else
3110 cum->nregs = ix86_function_regparm (fntype, fndecl);
3111 }
3112
3113 /* Set up the number of SSE registers used for passing SFmode
3114 and DFmode arguments. Warn for mismatching ABI. */
3115 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3116 }
3117 }
3118
3119 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3120 But in the case of vector types, it is some vector mode.
3121
3122 When we have only some of our vector isa extensions enabled, then there
3123 are some modes for which vector_mode_supported_p is false. For these
3124 modes, the generic vector support in gcc will choose some non-vector mode
3125 in order to implement the type. By computing the natural mode, we'll
3126 select the proper ABI location for the operand and not depend on whatever
3127 the middle-end decides to do with these vector types. */
3128
3129 static enum machine_mode
3130 type_natural_mode (tree type)
3131 {
3132 enum machine_mode mode = TYPE_MODE (type);
3133
3134 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3135 {
3136 HOST_WIDE_INT size = int_size_in_bytes (type);
3137 if ((size == 8 || size == 16)
3138 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3139 && TYPE_VECTOR_SUBPARTS (type) > 1)
3140 {
3141 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3142
3143 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3144 mode = MIN_MODE_VECTOR_FLOAT;
3145 else
3146 mode = MIN_MODE_VECTOR_INT;
3147
3148 /* Get the mode which has this inner mode and number of units. */
3149 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3150 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3151 && GET_MODE_INNER (mode) == innermode)
3152 return mode;
3153
3154 gcc_unreachable ();
3155 }
3156 }
3157
3158 return mode;
3159 }
3160
3161 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3162 this may not agree with the mode that the type system has chosen for the
3163 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3164 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3165
3166 static rtx
3167 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3168 unsigned int regno)
3169 {
3170 rtx tmp;
3171
3172 if (orig_mode != BLKmode)
3173 tmp = gen_rtx_REG (orig_mode, regno);
3174 else
3175 {
3176 tmp = gen_rtx_REG (mode, regno);
3177 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3178 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3179 }
3180
3181 return tmp;
3182 }
3183
3184 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3185 of this code is to classify each 8bytes of incoming argument by the register
3186 class and assign registers accordingly. */
3187
3188 /* Return the union class of CLASS1 and CLASS2.
3189 See the x86-64 PS ABI for details. */
3190
3191 static enum x86_64_reg_class
3192 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3193 {
3194 /* Rule #1: If both classes are equal, this is the resulting class. */
3195 if (class1 == class2)
3196 return class1;
3197
3198 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3199 the other class. */
3200 if (class1 == X86_64_NO_CLASS)
3201 return class2;
3202 if (class2 == X86_64_NO_CLASS)
3203 return class1;
3204
3205 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3206 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3207 return X86_64_MEMORY_CLASS;
3208
3209 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3210 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3211 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3212 return X86_64_INTEGERSI_CLASS;
3213 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3214 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3215 return X86_64_INTEGER_CLASS;
3216
3217 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3218 MEMORY is used. */
3219 if (class1 == X86_64_X87_CLASS
3220 || class1 == X86_64_X87UP_CLASS
3221 || class1 == X86_64_COMPLEX_X87_CLASS
3222 || class2 == X86_64_X87_CLASS
3223 || class2 == X86_64_X87UP_CLASS
3224 || class2 == X86_64_COMPLEX_X87_CLASS)
3225 return X86_64_MEMORY_CLASS;
3226
3227 /* Rule #6: Otherwise class SSE is used. */
3228 return X86_64_SSE_CLASS;
3229 }
3230
3231 /* Classify the argument of type TYPE and mode MODE.
3232 CLASSES will be filled by the register class used to pass each word
3233 of the operand. The number of words is returned. In case the parameter
3234 should be passed in memory, 0 is returned. As a special case for zero
3235 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3236
3237 BIT_OFFSET is used internally for handling records and specifies offset
3238 of the offset in bits modulo 256 to avoid overflow cases.
3239
3240 See the x86-64 PS ABI for details.
3241 */
3242
3243 static int
3244 classify_argument (enum machine_mode mode, tree type,
3245 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3246 {
3247 HOST_WIDE_INT bytes =
3248 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3249 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3250
3251 /* Variable sized entities are always passed/returned in memory. */
3252 if (bytes < 0)
3253 return 0;
3254
3255 if (mode != VOIDmode
3256 && targetm.calls.must_pass_in_stack (mode, type))
3257 return 0;
3258
3259 if (type && AGGREGATE_TYPE_P (type))
3260 {
3261 int i;
3262 tree field;
3263 enum x86_64_reg_class subclasses[MAX_CLASSES];
3264
3265 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3266 if (bytes > 16)
3267 return 0;
3268
3269 for (i = 0; i < words; i++)
3270 classes[i] = X86_64_NO_CLASS;
3271
3272 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3273 signalize memory class, so handle it as special case. */
3274 if (!words)
3275 {
3276 classes[0] = X86_64_NO_CLASS;
3277 return 1;
3278 }
3279
3280 /* Classify each field of record and merge classes. */
3281 switch (TREE_CODE (type))
3282 {
3283 case RECORD_TYPE:
3284 /* And now merge the fields of structure. */
3285 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3286 {
3287 if (TREE_CODE (field) == FIELD_DECL)
3288 {
3289 int num;
3290
3291 if (TREE_TYPE (field) == error_mark_node)
3292 continue;
3293
3294 /* Bitfields are always classified as integer. Handle them
3295 early, since later code would consider them to be
3296 misaligned integers. */
3297 if (DECL_BIT_FIELD (field))
3298 {
3299 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3300 i < ((int_bit_position (field) + (bit_offset % 64))
3301 + tree_low_cst (DECL_SIZE (field), 0)
3302 + 63) / 8 / 8; i++)
3303 classes[i] =
3304 merge_classes (X86_64_INTEGER_CLASS,
3305 classes[i]);
3306 }
3307 else
3308 {
3309 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3310 TREE_TYPE (field), subclasses,
3311 (int_bit_position (field)
3312 + bit_offset) % 256);
3313 if (!num)
3314 return 0;
3315 for (i = 0; i < num; i++)
3316 {
3317 int pos =
3318 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3319 classes[i + pos] =
3320 merge_classes (subclasses[i], classes[i + pos]);
3321 }
3322 }
3323 }
3324 }
3325 break;
3326
3327 case ARRAY_TYPE:
3328 /* Arrays are handled as small records. */
3329 {
3330 int num;
3331 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3332 TREE_TYPE (type), subclasses, bit_offset);
3333 if (!num)
3334 return 0;
3335
3336 /* The partial classes are now full classes. */
3337 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3338 subclasses[0] = X86_64_SSE_CLASS;
3339 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3340 subclasses[0] = X86_64_INTEGER_CLASS;
3341
3342 for (i = 0; i < words; i++)
3343 classes[i] = subclasses[i % num];
3344
3345 break;
3346 }
3347 case UNION_TYPE:
3348 case QUAL_UNION_TYPE:
3349 /* Unions are similar to RECORD_TYPE but offset is always 0.
3350 */
3351 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3352 {
3353 if (TREE_CODE (field) == FIELD_DECL)
3354 {
3355 int num;
3356
3357 if (TREE_TYPE (field) == error_mark_node)
3358 continue;
3359
3360 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3361 TREE_TYPE (field), subclasses,
3362 bit_offset);
3363 if (!num)
3364 return 0;
3365 for (i = 0; i < num; i++)
3366 classes[i] = merge_classes (subclasses[i], classes[i]);
3367 }
3368 }
3369 break;
3370
3371 default:
3372 gcc_unreachable ();
3373 }
3374
3375 /* Final merger cleanup. */
3376 for (i = 0; i < words; i++)
3377 {
3378 /* If one class is MEMORY, everything should be passed in
3379 memory. */
3380 if (classes[i] == X86_64_MEMORY_CLASS)
3381 return 0;
3382
3383 /* The X86_64_SSEUP_CLASS should be always preceded by
3384 X86_64_SSE_CLASS. */
3385 if (classes[i] == X86_64_SSEUP_CLASS
3386 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3387 classes[i] = X86_64_SSE_CLASS;
3388
3389 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3390 if (classes[i] == X86_64_X87UP_CLASS
3391 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3392 classes[i] = X86_64_SSE_CLASS;
3393 }
3394 return words;
3395 }
3396
3397 /* Compute alignment needed. We align all types to natural boundaries with
3398 exception of XFmode that is aligned to 64bits. */
3399 if (mode != VOIDmode && mode != BLKmode)
3400 {
3401 int mode_alignment = GET_MODE_BITSIZE (mode);
3402
3403 if (mode == XFmode)
3404 mode_alignment = 128;
3405 else if (mode == XCmode)
3406 mode_alignment = 256;
3407 if (COMPLEX_MODE_P (mode))
3408 mode_alignment /= 2;
3409 /* Misaligned fields are always returned in memory. */
3410 if (bit_offset % mode_alignment)
3411 return 0;
3412 }
3413
3414 /* for V1xx modes, just use the base mode */
3415 if (VECTOR_MODE_P (mode)
3416 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3417 mode = GET_MODE_INNER (mode);
3418
3419 /* Classification of atomic types. */
3420 switch (mode)
3421 {
3422 case SDmode:
3423 case DDmode:
3424 classes[0] = X86_64_SSE_CLASS;
3425 return 1;
3426 case TDmode:
3427 classes[0] = X86_64_SSE_CLASS;
3428 classes[1] = X86_64_SSEUP_CLASS;
3429 return 2;
3430 case DImode:
3431 case SImode:
3432 case HImode:
3433 case QImode:
3434 case CSImode:
3435 case CHImode:
3436 case CQImode:
3437 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3438 classes[0] = X86_64_INTEGERSI_CLASS;
3439 else
3440 classes[0] = X86_64_INTEGER_CLASS;
3441 return 1;
3442 case CDImode:
3443 case TImode:
3444 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3445 return 2;
3446 case CTImode:
3447 return 0;
3448 case SFmode:
3449 if (!(bit_offset % 64))
3450 classes[0] = X86_64_SSESF_CLASS;
3451 else
3452 classes[0] = X86_64_SSE_CLASS;
3453 return 1;
3454 case DFmode:
3455 classes[0] = X86_64_SSEDF_CLASS;
3456 return 1;
3457 case XFmode:
3458 classes[0] = X86_64_X87_CLASS;
3459 classes[1] = X86_64_X87UP_CLASS;
3460 return 2;
3461 case TFmode:
3462 classes[0] = X86_64_SSE_CLASS;
3463 classes[1] = X86_64_SSEUP_CLASS;
3464 return 2;
3465 case SCmode:
3466 classes[0] = X86_64_SSE_CLASS;
3467 return 1;
3468 case DCmode:
3469 classes[0] = X86_64_SSEDF_CLASS;
3470 classes[1] = X86_64_SSEDF_CLASS;
3471 return 2;
3472 case XCmode:
3473 classes[0] = X86_64_COMPLEX_X87_CLASS;
3474 return 1;
3475 case TCmode:
3476 /* This modes is larger than 16 bytes. */
3477 return 0;
3478 case V4SFmode:
3479 case V4SImode:
3480 case V16QImode:
3481 case V8HImode:
3482 case V2DFmode:
3483 case V2DImode:
3484 classes[0] = X86_64_SSE_CLASS;
3485 classes[1] = X86_64_SSEUP_CLASS;
3486 return 2;
3487 case V2SFmode:
3488 case V2SImode:
3489 case V4HImode:
3490 case V8QImode:
3491 classes[0] = X86_64_SSE_CLASS;
3492 return 1;
3493 case BLKmode:
3494 case VOIDmode:
3495 return 0;
3496 default:
3497 gcc_assert (VECTOR_MODE_P (mode));
3498
3499 if (bytes > 16)
3500 return 0;
3501
3502 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3503
3504 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3505 classes[0] = X86_64_INTEGERSI_CLASS;
3506 else
3507 classes[0] = X86_64_INTEGER_CLASS;
3508 classes[1] = X86_64_INTEGER_CLASS;
3509 return 1 + (bytes > 8);
3510 }
3511 }
3512
3513 /* Examine the argument and return set number of register required in each
3514 class. Return 0 iff parameter should be passed in memory. */
3515 static int
3516 examine_argument (enum machine_mode mode, tree type, int in_return,
3517 int *int_nregs, int *sse_nregs)
3518 {
3519 enum x86_64_reg_class class[MAX_CLASSES];
3520 int n = classify_argument (mode, type, class, 0);
3521
3522 *int_nregs = 0;
3523 *sse_nregs = 0;
3524 if (!n)
3525 return 0;
3526 for (n--; n >= 0; n--)
3527 switch (class[n])
3528 {
3529 case X86_64_INTEGER_CLASS:
3530 case X86_64_INTEGERSI_CLASS:
3531 (*int_nregs)++;
3532 break;
3533 case X86_64_SSE_CLASS:
3534 case X86_64_SSESF_CLASS:
3535 case X86_64_SSEDF_CLASS:
3536 (*sse_nregs)++;
3537 break;
3538 case X86_64_NO_CLASS:
3539 case X86_64_SSEUP_CLASS:
3540 break;
3541 case X86_64_X87_CLASS:
3542 case X86_64_X87UP_CLASS:
3543 if (!in_return)
3544 return 0;
3545 break;
3546 case X86_64_COMPLEX_X87_CLASS:
3547 return in_return ? 2 : 0;
3548 case X86_64_MEMORY_CLASS:
3549 gcc_unreachable ();
3550 }
3551 return 1;
3552 }
3553
3554 /* Construct container for the argument used by GCC interface. See
3555 FUNCTION_ARG for the detailed description. */
3556
3557 static rtx
3558 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3559 tree type, int in_return, int nintregs, int nsseregs,
3560 const int *intreg, int sse_regno)
3561 {
3562 /* The following variables hold the static issued_error state. */
3563 static bool issued_sse_arg_error;
3564 static bool issued_sse_ret_error;
3565 static bool issued_x87_ret_error;
3566
3567 enum machine_mode tmpmode;
3568 int bytes =
3569 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3570 enum x86_64_reg_class class[MAX_CLASSES];
3571 int n;
3572 int i;
3573 int nexps = 0;
3574 int needed_sseregs, needed_intregs;
3575 rtx exp[MAX_CLASSES];
3576 rtx ret;
3577
3578 n = classify_argument (mode, type, class, 0);
3579 if (!n)
3580 return NULL;
3581 if (!examine_argument (mode, type, in_return, &needed_intregs,
3582 &needed_sseregs))
3583 return NULL;
3584 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3585 return NULL;
3586
3587 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3588 some less clueful developer tries to use floating-point anyway. */
3589 if (needed_sseregs && !TARGET_SSE)
3590 {
3591 if (in_return)
3592 {
3593 if (!issued_sse_ret_error)
3594 {
3595 error ("SSE register return with SSE disabled");
3596 issued_sse_ret_error = true;
3597 }
3598 }
3599 else if (!issued_sse_arg_error)
3600 {
3601 error ("SSE register argument with SSE disabled");
3602 issued_sse_arg_error = true;
3603 }
3604 return NULL;
3605 }
3606
3607 /* Likewise, error if the ABI requires us to return values in the
3608 x87 registers and the user specified -mno-80387. */
3609 if (!TARGET_80387 && in_return)
3610 for (i = 0; i < n; i++)
3611 if (class[i] == X86_64_X87_CLASS
3612 || class[i] == X86_64_X87UP_CLASS
3613 || class[i] == X86_64_COMPLEX_X87_CLASS)
3614 {
3615 if (!issued_x87_ret_error)
3616 {
3617 error ("x87 register return with x87 disabled");
3618 issued_x87_ret_error = true;
3619 }
3620 return NULL;
3621 }
3622
3623 /* First construct simple cases. Avoid SCmode, since we want to use
3624 single register to pass this type. */
3625 if (n == 1 && mode != SCmode)
3626 switch (class[0])
3627 {
3628 case X86_64_INTEGER_CLASS:
3629 case X86_64_INTEGERSI_CLASS:
3630 return gen_rtx_REG (mode, intreg[0]);
3631 case X86_64_SSE_CLASS:
3632 case X86_64_SSESF_CLASS:
3633 case X86_64_SSEDF_CLASS:
3634 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3635 case X86_64_X87_CLASS:
3636 case X86_64_COMPLEX_X87_CLASS:
3637 return gen_rtx_REG (mode, FIRST_STACK_REG);
3638 case X86_64_NO_CLASS:
3639 /* Zero sized array, struct or class. */
3640 return NULL;
3641 default:
3642 gcc_unreachable ();
3643 }
3644 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3645 && mode != BLKmode)
3646 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3647
3648 if (n == 2
3649 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3650 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3651 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3652 && class[1] == X86_64_INTEGER_CLASS
3653 && (mode == CDImode || mode == TImode || mode == TFmode)
3654 && intreg[0] + 1 == intreg[1])
3655 return gen_rtx_REG (mode, intreg[0]);
3656
3657 /* Otherwise figure out the entries of the PARALLEL. */
3658 for (i = 0; i < n; i++)
3659 {
3660 switch (class[i])
3661 {
3662 case X86_64_NO_CLASS:
3663 break;
3664 case X86_64_INTEGER_CLASS:
3665 case X86_64_INTEGERSI_CLASS:
3666 /* Merge TImodes on aligned occasions here too. */
3667 if (i * 8 + 8 > bytes)
3668 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3669 else if (class[i] == X86_64_INTEGERSI_CLASS)
3670 tmpmode = SImode;
3671 else
3672 tmpmode = DImode;
3673 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3674 if (tmpmode == BLKmode)
3675 tmpmode = DImode;
3676 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3677 gen_rtx_REG (tmpmode, *intreg),
3678 GEN_INT (i*8));
3679 intreg++;
3680 break;
3681 case X86_64_SSESF_CLASS:
3682 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3683 gen_rtx_REG (SFmode,
3684 SSE_REGNO (sse_regno)),
3685 GEN_INT (i*8));
3686 sse_regno++;
3687 break;
3688 case X86_64_SSEDF_CLASS:
3689 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3690 gen_rtx_REG (DFmode,
3691 SSE_REGNO (sse_regno)),
3692 GEN_INT (i*8));
3693 sse_regno++;
3694 break;
3695 case X86_64_SSE_CLASS:
3696 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3697 tmpmode = TImode;
3698 else
3699 tmpmode = DImode;
3700 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3701 gen_rtx_REG (tmpmode,
3702 SSE_REGNO (sse_regno)),
3703 GEN_INT (i*8));
3704 if (tmpmode == TImode)
3705 i++;
3706 sse_regno++;
3707 break;
3708 default:
3709 gcc_unreachable ();
3710 }
3711 }
3712
3713 /* Empty aligned struct, union or class. */
3714 if (nexps == 0)
3715 return NULL;
3716
3717 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3718 for (i = 0; i < nexps; i++)
3719 XVECEXP (ret, 0, i) = exp [i];
3720 return ret;
3721 }
3722
3723 /* Update the data in CUM to advance over an argument of mode MODE
3724 and data type TYPE. (TYPE is null for libcalls where that information
3725 may not be available.) */
3726
3727 static void
3728 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3729 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3730 {
3731 switch (mode)
3732 {
3733 default:
3734 break;
3735
3736 case BLKmode:
3737 if (bytes < 0)
3738 break;
3739 /* FALLTHRU */
3740
3741 case DImode:
3742 case SImode:
3743 case HImode:
3744 case QImode:
3745 cum->words += words;
3746 cum->nregs -= words;
3747 cum->regno += words;
3748
3749 if (cum->nregs <= 0)
3750 {
3751 cum->nregs = 0;
3752 cum->regno = 0;
3753 }
3754 break;
3755
3756 case DFmode:
3757 if (cum->float_in_sse < 2)
3758 break;
3759 case SFmode:
3760 if (cum->float_in_sse < 1)
3761 break;
3762 /* FALLTHRU */
3763
3764 case TImode:
3765 case V16QImode:
3766 case V8HImode:
3767 case V4SImode:
3768 case V2DImode:
3769 case V4SFmode:
3770 case V2DFmode:
3771 if (!type || !AGGREGATE_TYPE_P (type))
3772 {
3773 cum->sse_words += words;
3774 cum->sse_nregs -= 1;
3775 cum->sse_regno += 1;
3776 if (cum->sse_nregs <= 0)
3777 {
3778 cum->sse_nregs = 0;
3779 cum->sse_regno = 0;
3780 }
3781 }
3782 break;
3783
3784 case V8QImode:
3785 case V4HImode:
3786 case V2SImode:
3787 case V2SFmode:
3788 if (!type || !AGGREGATE_TYPE_P (type))
3789 {
3790 cum->mmx_words += words;
3791 cum->mmx_nregs -= 1;
3792 cum->mmx_regno += 1;
3793 if (cum->mmx_nregs <= 0)
3794 {
3795 cum->mmx_nregs = 0;
3796 cum->mmx_regno = 0;
3797 }
3798 }
3799 break;
3800 }
3801 }
3802
3803 static void
3804 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3805 tree type, HOST_WIDE_INT words)
3806 {
3807 int int_nregs, sse_nregs;
3808
3809 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3810 cum->words += words;
3811 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3812 {
3813 cum->nregs -= int_nregs;
3814 cum->sse_nregs -= sse_nregs;
3815 cum->regno += int_nregs;
3816 cum->sse_regno += sse_nregs;
3817 }
3818 else
3819 cum->words += words;
3820 }
3821
3822 static void
3823 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3824 HOST_WIDE_INT words)
3825 {
3826 /* Otherwise, this should be passed indirect. */
3827 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3828
3829 cum->words += words;
3830 if (cum->nregs > 0)
3831 {
3832 cum->nregs -= 1;
3833 cum->regno += 1;
3834 }
3835 }
3836
3837 void
3838 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3839 tree type, int named ATTRIBUTE_UNUSED)
3840 {
3841 HOST_WIDE_INT bytes, words;
3842
3843 if (mode == BLKmode)
3844 bytes = int_size_in_bytes (type);
3845 else
3846 bytes = GET_MODE_SIZE (mode);
3847 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3848
3849 if (type)
3850 mode = type_natural_mode (type);
3851
3852 if (TARGET_64BIT_MS_ABI)
3853 function_arg_advance_ms_64 (cum, bytes, words);
3854 else if (TARGET_64BIT)
3855 function_arg_advance_64 (cum, mode, type, words);
3856 else
3857 function_arg_advance_32 (cum, mode, type, bytes, words);
3858 }
3859
3860 /* Define where to put the arguments to a function.
3861 Value is zero to push the argument on the stack,
3862 or a hard register in which to store the argument.
3863
3864 MODE is the argument's machine mode.
3865 TYPE is the data type of the argument (as a tree).
3866 This is null for libcalls where that information may
3867 not be available.
3868 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3869 the preceding args and about the function being called.
3870 NAMED is nonzero if this argument is a named parameter
3871 (otherwise it is an extra parameter matching an ellipsis). */
3872
3873 static rtx
3874 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3875 enum machine_mode orig_mode, tree type,
3876 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3877 {
3878 static bool warnedsse, warnedmmx;
3879
3880 /* Avoid the AL settings for the Unix64 ABI. */
3881 if (mode == VOIDmode)
3882 return constm1_rtx;
3883
3884 switch (mode)
3885 {
3886 default:
3887 break;
3888
3889 case BLKmode:
3890 if (bytes < 0)
3891 break;
3892 /* FALLTHRU */
3893 case DImode:
3894 case SImode:
3895 case HImode:
3896 case QImode:
3897 if (words <= cum->nregs)
3898 {
3899 int regno = cum->regno;
3900
3901 /* Fastcall allocates the first two DWORD (SImode) or
3902 smaller arguments to ECX and EDX. */
3903 if (cum->fastcall)
3904 {
3905 if (mode == BLKmode || mode == DImode)
3906 break;
3907
3908 /* ECX not EAX is the first allocated register. */
3909 if (regno == 0)
3910 regno = 2;
3911 }
3912 return gen_rtx_REG (mode, regno);
3913 }
3914 break;
3915
3916 case DFmode:
3917 if (cum->float_in_sse < 2)
3918 break;
3919 case SFmode:
3920 if (cum->float_in_sse < 1)
3921 break;
3922 /* FALLTHRU */
3923 case TImode:
3924 case V16QImode:
3925 case V8HImode:
3926 case V4SImode:
3927 case V2DImode:
3928 case V4SFmode:
3929 case V2DFmode:
3930 if (!type || !AGGREGATE_TYPE_P (type))
3931 {
3932 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3933 {
3934 warnedsse = true;
3935 warning (0, "SSE vector argument without SSE enabled "
3936 "changes the ABI");
3937 }
3938 if (cum->sse_nregs)
3939 return gen_reg_or_parallel (mode, orig_mode,
3940 cum->sse_regno + FIRST_SSE_REG);
3941 }
3942 break;
3943
3944 case V8QImode:
3945 case V4HImode:
3946 case V2SImode:
3947 case V2SFmode:
3948 if (!type || !AGGREGATE_TYPE_P (type))
3949 {
3950 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3951 {
3952 warnedmmx = true;
3953 warning (0, "MMX vector argument without MMX enabled "
3954 "changes the ABI");
3955 }
3956 if (cum->mmx_nregs)
3957 return gen_reg_or_parallel (mode, orig_mode,
3958 cum->mmx_regno + FIRST_MMX_REG);
3959 }
3960 break;
3961 }
3962
3963 return NULL_RTX;
3964 }
3965
3966 static rtx
3967 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3968 enum machine_mode orig_mode, tree type)
3969 {
3970 /* Handle a hidden AL argument containing number of registers
3971 for varargs x86-64 functions. */
3972 if (mode == VOIDmode)
3973 return GEN_INT (cum->maybe_vaarg
3974 ? (cum->sse_nregs < 0
3975 ? SSE_REGPARM_MAX
3976 : cum->sse_regno)
3977 : -1);
3978
3979 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3980 cum->sse_nregs,
3981 &x86_64_int_parameter_registers [cum->regno],
3982 cum->sse_regno);
3983 }
3984
3985 static rtx
3986 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3987 enum machine_mode orig_mode, int named)
3988 {
3989 unsigned int regno;
3990
3991 /* Avoid the AL settings for the Unix64 ABI. */
3992 if (mode == VOIDmode)
3993 return constm1_rtx;
3994
3995 /* If we've run out of registers, it goes on the stack. */
3996 if (cum->nregs == 0)
3997 return NULL_RTX;
3998
3999 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4000
4001 /* Only floating point modes are passed in anything but integer regs. */
4002 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4003 {
4004 if (named)
4005 regno = cum->regno + FIRST_SSE_REG;
4006 else
4007 {
4008 rtx t1, t2;
4009
4010 /* Unnamed floating parameters are passed in both the
4011 SSE and integer registers. */
4012 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4013 t2 = gen_rtx_REG (mode, regno);
4014 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4015 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4016 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4017 }
4018 }
4019
4020 return gen_reg_or_parallel (mode, orig_mode, regno);
4021 }
4022
4023 rtx
4024 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4025 tree type, int named)
4026 {
4027 enum machine_mode mode = omode;
4028 HOST_WIDE_INT bytes, words;
4029
4030 if (mode == BLKmode)
4031 bytes = int_size_in_bytes (type);
4032 else
4033 bytes = GET_MODE_SIZE (mode);
4034 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4035
4036 /* To simplify the code below, represent vector types with a vector mode
4037 even if MMX/SSE are not active. */
4038 if (type && TREE_CODE (type) == VECTOR_TYPE)
4039 mode = type_natural_mode (type);
4040
4041 if (TARGET_64BIT_MS_ABI)
4042 return function_arg_ms_64 (cum, mode, omode, named);
4043 else if (TARGET_64BIT)
4044 return function_arg_64 (cum, mode, omode, type);
4045 else
4046 return function_arg_32 (cum, mode, omode, type, bytes, words);
4047 }
4048
4049 /* A C expression that indicates when an argument must be passed by
4050 reference. If nonzero for an argument, a copy of that argument is
4051 made in memory and a pointer to the argument is passed instead of
4052 the argument itself. The pointer is passed in whatever way is
4053 appropriate for passing a pointer to that type. */
4054
4055 static bool
4056 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4057 enum machine_mode mode ATTRIBUTE_UNUSED,
4058 tree type, bool named ATTRIBUTE_UNUSED)
4059 {
4060 if (TARGET_64BIT_MS_ABI)
4061 {
4062 if (type)
4063 {
4064 /* Arrays are passed by reference. */
4065 if (TREE_CODE (type) == ARRAY_TYPE)
4066 return true;
4067
4068 if (AGGREGATE_TYPE_P (type))
4069 {
4070 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4071 are passed by reference. */
4072 int el2 = exact_log2 (int_size_in_bytes (type));
4073 return !(el2 >= 0 && el2 <= 3);
4074 }
4075 }
4076
4077 /* __m128 is passed by reference. */
4078 /* ??? How to handle complex? For now treat them as structs,
4079 and pass them by reference if they're too large. */
4080 if (GET_MODE_SIZE (mode) > 8)
4081 return true;
4082 }
4083 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4084 return 1;
4085
4086 return 0;
4087 }
4088
4089 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4090 ABI. Only called if TARGET_SSE. */
4091 static bool
4092 contains_128bit_aligned_vector_p (tree type)
4093 {
4094 enum machine_mode mode = TYPE_MODE (type);
4095 if (SSE_REG_MODE_P (mode)
4096 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4097 return true;
4098 if (TYPE_ALIGN (type) < 128)
4099 return false;
4100
4101 if (AGGREGATE_TYPE_P (type))
4102 {
4103 /* Walk the aggregates recursively. */
4104 switch (TREE_CODE (type))
4105 {
4106 case RECORD_TYPE:
4107 case UNION_TYPE:
4108 case QUAL_UNION_TYPE:
4109 {
4110 tree field;
4111
4112 /* Walk all the structure fields. */
4113 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4114 {
4115 if (TREE_CODE (field) == FIELD_DECL
4116 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4117 return true;
4118 }
4119 break;
4120 }
4121
4122 case ARRAY_TYPE:
4123 /* Just for use if some languages passes arrays by value. */
4124 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4125 return true;
4126 break;
4127
4128 default:
4129 gcc_unreachable ();
4130 }
4131 }
4132 return false;
4133 }
4134
4135 /* Gives the alignment boundary, in bits, of an argument with the
4136 specified mode and type. */
4137
4138 int
4139 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4140 {
4141 int align;
4142 if (type)
4143 align = TYPE_ALIGN (type);
4144 else
4145 align = GET_MODE_ALIGNMENT (mode);
4146 if (align < PARM_BOUNDARY)
4147 align = PARM_BOUNDARY;
4148 if (!TARGET_64BIT)
4149 {
4150 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4151 make an exception for SSE modes since these require 128bit
4152 alignment.
4153
4154 The handling here differs from field_alignment. ICC aligns MMX
4155 arguments to 4 byte boundaries, while structure fields are aligned
4156 to 8 byte boundaries. */
4157 if (!TARGET_SSE)
4158 align = PARM_BOUNDARY;
4159 else if (!type)
4160 {
4161 if (!SSE_REG_MODE_P (mode))
4162 align = PARM_BOUNDARY;
4163 }
4164 else
4165 {
4166 if (!contains_128bit_aligned_vector_p (type))
4167 align = PARM_BOUNDARY;
4168 }
4169 }
4170 if (align > 128)
4171 align = 128;
4172 return align;
4173 }
4174
4175 /* Return true if N is a possible register number of function value. */
4176
4177 bool
4178 ix86_function_value_regno_p (int regno)
4179 {
4180 switch (regno)
4181 {
4182 case 0:
4183 return true;
4184
4185 case FIRST_FLOAT_REG:
4186 if (TARGET_64BIT_MS_ABI)
4187 return false;
4188 return TARGET_FLOAT_RETURNS_IN_80387;
4189
4190 case FIRST_SSE_REG:
4191 return TARGET_SSE;
4192
4193 case FIRST_MMX_REG:
4194 if (TARGET_MACHO || TARGET_64BIT)
4195 return false;
4196 return TARGET_MMX;
4197 }
4198
4199 return false;
4200 }
4201
4202 /* Define how to find the value returned by a function.
4203 VALTYPE is the data type of the value (as a tree).
4204 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4205 otherwise, FUNC is 0. */
4206
4207 static rtx
4208 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4209 tree fntype, tree fn)
4210 {
4211 unsigned int regno;
4212
4213 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4214 we normally prevent this case when mmx is not available. However
4215 some ABIs may require the result to be returned like DImode. */
4216 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4217 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4218
4219 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4220 we prevent this case when sse is not available. However some ABIs
4221 may require the result to be returned like integer TImode. */
4222 else if (mode == TImode
4223 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4224 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4225
4226 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4227 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4228 regno = FIRST_FLOAT_REG;
4229 else
4230 /* Most things go in %eax. */
4231 regno = 0;
4232
4233 /* Override FP return register with %xmm0 for local functions when
4234 SSE math is enabled or for functions with sseregparm attribute. */
4235 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4236 {
4237 int sse_level = ix86_function_sseregparm (fntype, fn);
4238 if ((sse_level >= 1 && mode == SFmode)
4239 || (sse_level == 2 && mode == DFmode))
4240 regno = FIRST_SSE_REG;
4241 }
4242
4243 return gen_rtx_REG (orig_mode, regno);
4244 }
4245
4246 static rtx
4247 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4248 tree valtype)
4249 {
4250 rtx ret;
4251
4252 /* Handle libcalls, which don't provide a type node. */
4253 if (valtype == NULL)
4254 {
4255 switch (mode)
4256 {
4257 case SFmode:
4258 case SCmode:
4259 case DFmode:
4260 case DCmode:
4261 case TFmode:
4262 case SDmode:
4263 case DDmode:
4264 case TDmode:
4265 return gen_rtx_REG (mode, FIRST_SSE_REG);
4266 case XFmode:
4267 case XCmode:
4268 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4269 case TCmode:
4270 return NULL;
4271 default:
4272 return gen_rtx_REG (mode, 0);
4273 }
4274 }
4275
4276 ret = construct_container (mode, orig_mode, valtype, 1,
4277 REGPARM_MAX, SSE_REGPARM_MAX,
4278 x86_64_int_return_registers, 0);
4279
4280 /* For zero sized structures, construct_container returns NULL, but we
4281 need to keep rest of compiler happy by returning meaningful value. */
4282 if (!ret)
4283 ret = gen_rtx_REG (orig_mode, 0);
4284
4285 return ret;
4286 }
4287
4288 static rtx
4289 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4290 {
4291 unsigned int regno = 0;
4292
4293 if (TARGET_SSE)
4294 {
4295 if (mode == SFmode || mode == DFmode)
4296 regno = FIRST_SSE_REG;
4297 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4298 regno = FIRST_SSE_REG;
4299 }
4300
4301 return gen_rtx_REG (orig_mode, regno);
4302 }
4303
4304 static rtx
4305 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4306 enum machine_mode orig_mode, enum machine_mode mode)
4307 {
4308 tree fn, fntype;
4309
4310 fn = NULL_TREE;
4311 if (fntype_or_decl && DECL_P (fntype_or_decl))
4312 fn = fntype_or_decl;
4313 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4314
4315 if (TARGET_64BIT_MS_ABI)
4316 return function_value_ms_64 (orig_mode, mode);
4317 else if (TARGET_64BIT)
4318 return function_value_64 (orig_mode, mode, valtype);
4319 else
4320 return function_value_32 (orig_mode, mode, fntype, fn);
4321 }
4322
4323 static rtx
4324 ix86_function_value (tree valtype, tree fntype_or_decl,
4325 bool outgoing ATTRIBUTE_UNUSED)
4326 {
4327 enum machine_mode mode, orig_mode;
4328
4329 orig_mode = TYPE_MODE (valtype);
4330 mode = type_natural_mode (valtype);
4331 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4332 }
4333
4334 rtx
4335 ix86_libcall_value (enum machine_mode mode)
4336 {
4337 return ix86_function_value_1 (NULL, NULL, mode, mode);
4338 }
4339
4340 /* Return true iff type is returned in memory. */
4341
4342 static int
4343 return_in_memory_32 (tree type, enum machine_mode mode)
4344 {
4345 HOST_WIDE_INT size;
4346
4347 if (mode == BLKmode)
4348 return 1;
4349
4350 size = int_size_in_bytes (type);
4351
4352 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4353 return 0;
4354
4355 if (VECTOR_MODE_P (mode) || mode == TImode)
4356 {
4357 /* User-created vectors small enough to fit in EAX. */
4358 if (size < 8)
4359 return 0;
4360
4361 /* MMX/3dNow values are returned in MM0,
4362 except when it doesn't exits. */
4363 if (size == 8)
4364 return (TARGET_MMX ? 0 : 1);
4365
4366 /* SSE values are returned in XMM0, except when it doesn't exist. */
4367 if (size == 16)
4368 return (TARGET_SSE ? 0 : 1);
4369 }
4370
4371 if (mode == XFmode)
4372 return 0;
4373
4374 if (mode == TDmode)
4375 return 1;
4376
4377 if (size > 12)
4378 return 1;
4379 return 0;
4380 }
4381
4382 static int
4383 return_in_memory_64 (tree type, enum machine_mode mode)
4384 {
4385 int needed_intregs, needed_sseregs;
4386 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4387 }
4388
4389 static int
4390 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4391 {
4392 HOST_WIDE_INT size = int_size_in_bytes (type);
4393
4394 /* __m128 and friends are returned in xmm0. */
4395 if (size == 16 && VECTOR_MODE_P (mode))
4396 return 0;
4397
4398 /* Otherwise, the size must be exactly in [1248]. */
4399 return (size != 1 && size != 2 && size != 4 && size != 8);
4400 }
4401
4402 int
4403 ix86_return_in_memory (tree type)
4404 {
4405 enum machine_mode mode = type_natural_mode (type);
4406
4407 if (TARGET_64BIT_MS_ABI)
4408 return return_in_memory_ms_64 (type, mode);
4409 else if (TARGET_64BIT)
4410 return return_in_memory_64 (type, mode);
4411 else
4412 return return_in_memory_32 (type, mode);
4413 }
4414
4415 /* Return false iff TYPE is returned in memory. This version is used
4416 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4417 but differs notably in that when MMX is available, 8-byte vectors
4418 are returned in memory, rather than in MMX registers. */
4419
4420 int
4421 ix86_sol10_return_in_memory (tree type)
4422 {
4423 int size;
4424 enum machine_mode mode = type_natural_mode (type);
4425
4426 if (TARGET_64BIT)
4427 return return_in_memory_64 (type, mode);
4428
4429 if (mode == BLKmode)
4430 return 1;
4431
4432 size = int_size_in_bytes (type);
4433
4434 if (VECTOR_MODE_P (mode))
4435 {
4436 /* Return in memory only if MMX registers *are* available. This
4437 seems backwards, but it is consistent with the existing
4438 Solaris x86 ABI. */
4439 if (size == 8)
4440 return TARGET_MMX;
4441 if (size == 16)
4442 return !TARGET_SSE;
4443 }
4444 else if (mode == TImode)
4445 return !TARGET_SSE;
4446 else if (mode == XFmode)
4447 return 0;
4448
4449 return size > 12;
4450 }
4451
4452 /* When returning SSE vector types, we have a choice of either
4453 (1) being abi incompatible with a -march switch, or
4454 (2) generating an error.
4455 Given no good solution, I think the safest thing is one warning.
4456 The user won't be able to use -Werror, but....
4457
4458 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4459 called in response to actually generating a caller or callee that
4460 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4461 via aggregate_value_p for general type probing from tree-ssa. */
4462
4463 static rtx
4464 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4465 {
4466 static bool warnedsse, warnedmmx;
4467
4468 if (!TARGET_64BIT && type)
4469 {
4470 /* Look at the return type of the function, not the function type. */
4471 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4472
4473 if (!TARGET_SSE && !warnedsse)
4474 {
4475 if (mode == TImode
4476 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4477 {
4478 warnedsse = true;
4479 warning (0, "SSE vector return without SSE enabled "
4480 "changes the ABI");
4481 }
4482 }
4483
4484 if (!TARGET_MMX && !warnedmmx)
4485 {
4486 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4487 {
4488 warnedmmx = true;
4489 warning (0, "MMX vector return without MMX enabled "
4490 "changes the ABI");
4491 }
4492 }
4493 }
4494
4495 return NULL;
4496 }
4497
4498 \f
4499 /* Create the va_list data type. */
4500
4501 static tree
4502 ix86_build_builtin_va_list (void)
4503 {
4504 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4505
4506 /* For i386 we use plain pointer to argument area. */
4507 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4508 return build_pointer_type (char_type_node);
4509
4510 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4511 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4512
4513 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4514 unsigned_type_node);
4515 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4516 unsigned_type_node);
4517 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4518 ptr_type_node);
4519 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4520 ptr_type_node);
4521
4522 va_list_gpr_counter_field = f_gpr;
4523 va_list_fpr_counter_field = f_fpr;
4524
4525 DECL_FIELD_CONTEXT (f_gpr) = record;
4526 DECL_FIELD_CONTEXT (f_fpr) = record;
4527 DECL_FIELD_CONTEXT (f_ovf) = record;
4528 DECL_FIELD_CONTEXT (f_sav) = record;
4529
4530 TREE_CHAIN (record) = type_decl;
4531 TYPE_NAME (record) = type_decl;
4532 TYPE_FIELDS (record) = f_gpr;
4533 TREE_CHAIN (f_gpr) = f_fpr;
4534 TREE_CHAIN (f_fpr) = f_ovf;
4535 TREE_CHAIN (f_ovf) = f_sav;
4536
4537 layout_type (record);
4538
4539 /* The correct type is an array type of one element. */
4540 return build_array_type (record, build_index_type (size_zero_node));
4541 }
4542
4543 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4544
4545 static void
4546 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4547 {
4548 rtx save_area, mem;
4549 rtx label;
4550 rtx label_ref;
4551 rtx tmp_reg;
4552 rtx nsse_reg;
4553 int set;
4554 int i;
4555
4556 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4557 return;
4558
4559 /* Indicate to allocate space on the stack for varargs save area. */
4560 ix86_save_varrargs_registers = 1;
4561 cfun->stack_alignment_needed = 128;
4562
4563 save_area = frame_pointer_rtx;
4564 set = get_varargs_alias_set ();
4565
4566 for (i = cum->regno;
4567 i < ix86_regparm
4568 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4569 i++)
4570 {
4571 mem = gen_rtx_MEM (Pmode,
4572 plus_constant (save_area, i * UNITS_PER_WORD));
4573 MEM_NOTRAP_P (mem) = 1;
4574 set_mem_alias_set (mem, set);
4575 emit_move_insn (mem, gen_rtx_REG (Pmode,
4576 x86_64_int_parameter_registers[i]));
4577 }
4578
4579 if (cum->sse_nregs && cfun->va_list_fpr_size)
4580 {
4581 /* Now emit code to save SSE registers. The AX parameter contains number
4582 of SSE parameter registers used to call this function. We use
4583 sse_prologue_save insn template that produces computed jump across
4584 SSE saves. We need some preparation work to get this working. */
4585
4586 label = gen_label_rtx ();
4587 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4588
4589 /* Compute address to jump to :
4590 label - 5*eax + nnamed_sse_arguments*5 */
4591 tmp_reg = gen_reg_rtx (Pmode);
4592 nsse_reg = gen_reg_rtx (Pmode);
4593 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4594 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4595 gen_rtx_MULT (Pmode, nsse_reg,
4596 GEN_INT (4))));
4597 if (cum->sse_regno)
4598 emit_move_insn
4599 (nsse_reg,
4600 gen_rtx_CONST (DImode,
4601 gen_rtx_PLUS (DImode,
4602 label_ref,
4603 GEN_INT (cum->sse_regno * 4))));
4604 else
4605 emit_move_insn (nsse_reg, label_ref);
4606 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4607
4608 /* Compute address of memory block we save into. We always use pointer
4609 pointing 127 bytes after first byte to store - this is needed to keep
4610 instruction size limited by 4 bytes. */
4611 tmp_reg = gen_reg_rtx (Pmode);
4612 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4613 plus_constant (save_area,
4614 8 * REGPARM_MAX + 127)));
4615 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4616 MEM_NOTRAP_P (mem) = 1;
4617 set_mem_alias_set (mem, set);
4618 set_mem_align (mem, BITS_PER_WORD);
4619
4620 /* And finally do the dirty job! */
4621 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4622 GEN_INT (cum->sse_regno), label));
4623 }
4624 }
4625
4626 static void
4627 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4628 {
4629 int set = get_varargs_alias_set ();
4630 int i;
4631
4632 for (i = cum->regno; i < REGPARM_MAX; i++)
4633 {
4634 rtx reg, mem;
4635
4636 mem = gen_rtx_MEM (Pmode,
4637 plus_constant (virtual_incoming_args_rtx,
4638 i * UNITS_PER_WORD));
4639 MEM_NOTRAP_P (mem) = 1;
4640 set_mem_alias_set (mem, set);
4641
4642 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4643 emit_move_insn (mem, reg);
4644 }
4645 }
4646
4647 static void
4648 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4649 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4650 int no_rtl)
4651 {
4652 CUMULATIVE_ARGS next_cum;
4653 tree fntype;
4654 int stdarg_p;
4655
4656 /* This argument doesn't appear to be used anymore. Which is good,
4657 because the old code here didn't suppress rtl generation. */
4658 gcc_assert (!no_rtl);
4659
4660 if (!TARGET_64BIT)
4661 return;
4662
4663 fntype = TREE_TYPE (current_function_decl);
4664 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4665 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4666 != void_type_node));
4667
4668 /* For varargs, we do not want to skip the dummy va_dcl argument.
4669 For stdargs, we do want to skip the last named argument. */
4670 next_cum = *cum;
4671 if (stdarg_p)
4672 function_arg_advance (&next_cum, mode, type, 1);
4673
4674 if (TARGET_64BIT_MS_ABI)
4675 setup_incoming_varargs_ms_64 (&next_cum);
4676 else
4677 setup_incoming_varargs_64 (&next_cum);
4678 }
4679
4680 /* Implement va_start. */
4681
4682 void
4683 ix86_va_start (tree valist, rtx nextarg)
4684 {
4685 HOST_WIDE_INT words, n_gpr, n_fpr;
4686 tree f_gpr, f_fpr, f_ovf, f_sav;
4687 tree gpr, fpr, ovf, sav, t;
4688 tree type;
4689
4690 /* Only 64bit target needs something special. */
4691 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4692 {
4693 std_expand_builtin_va_start (valist, nextarg);
4694 return;
4695 }
4696
4697 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4698 f_fpr = TREE_CHAIN (f_gpr);
4699 f_ovf = TREE_CHAIN (f_fpr);
4700 f_sav = TREE_CHAIN (f_ovf);
4701
4702 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4703 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4704 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4705 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4706 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4707
4708 /* Count number of gp and fp argument registers used. */
4709 words = current_function_args_info.words;
4710 n_gpr = current_function_args_info.regno;
4711 n_fpr = current_function_args_info.sse_regno;
4712
4713 if (cfun->va_list_gpr_size)
4714 {
4715 type = TREE_TYPE (gpr);
4716 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4717 build_int_cst (type, n_gpr * 8));
4718 TREE_SIDE_EFFECTS (t) = 1;
4719 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4720 }
4721
4722 if (cfun->va_list_fpr_size)
4723 {
4724 type = TREE_TYPE (fpr);
4725 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4726 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4727 TREE_SIDE_EFFECTS (t) = 1;
4728 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4729 }
4730
4731 /* Find the overflow area. */
4732 type = TREE_TYPE (ovf);
4733 t = make_tree (type, virtual_incoming_args_rtx);
4734 if (words != 0)
4735 t = build2 (PLUS_EXPR, type, t,
4736 build_int_cst (type, words * UNITS_PER_WORD));
4737 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4738 TREE_SIDE_EFFECTS (t) = 1;
4739 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4740
4741 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4742 {
4743 /* Find the register save area.
4744 Prologue of the function save it right above stack frame. */
4745 type = TREE_TYPE (sav);
4746 t = make_tree (type, frame_pointer_rtx);
4747 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4748 TREE_SIDE_EFFECTS (t) = 1;
4749 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4750 }
4751 }
4752
4753 /* Implement va_arg. */
4754
4755 static tree
4756 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4757 {
4758 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4759 tree f_gpr, f_fpr, f_ovf, f_sav;
4760 tree gpr, fpr, ovf, sav, t;
4761 int size, rsize;
4762 tree lab_false, lab_over = NULL_TREE;
4763 tree addr, t2;
4764 rtx container;
4765 int indirect_p = 0;
4766 tree ptrtype;
4767 enum machine_mode nat_mode;
4768
4769 /* Only 64bit target needs something special. */
4770 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4771 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4772
4773 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4774 f_fpr = TREE_CHAIN (f_gpr);
4775 f_ovf = TREE_CHAIN (f_fpr);
4776 f_sav = TREE_CHAIN (f_ovf);
4777
4778 valist = build_va_arg_indirect_ref (valist);
4779 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4780 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4781 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4782 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4783
4784 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4785 if (indirect_p)
4786 type = build_pointer_type (type);
4787 size = int_size_in_bytes (type);
4788 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4789
4790 nat_mode = type_natural_mode (type);
4791 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4792 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4793
4794 /* Pull the value out of the saved registers. */
4795
4796 addr = create_tmp_var (ptr_type_node, "addr");
4797 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4798
4799 if (container)
4800 {
4801 int needed_intregs, needed_sseregs;
4802 bool need_temp;
4803 tree int_addr, sse_addr;
4804
4805 lab_false = create_artificial_label ();
4806 lab_over = create_artificial_label ();
4807
4808 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4809
4810 need_temp = (!REG_P (container)
4811 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4812 || TYPE_ALIGN (type) > 128));
4813
4814 /* In case we are passing structure, verify that it is consecutive block
4815 on the register save area. If not we need to do moves. */
4816 if (!need_temp && !REG_P (container))
4817 {
4818 /* Verify that all registers are strictly consecutive */
4819 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4820 {
4821 int i;
4822
4823 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4824 {
4825 rtx slot = XVECEXP (container, 0, i);
4826 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4827 || INTVAL (XEXP (slot, 1)) != i * 16)
4828 need_temp = 1;
4829 }
4830 }
4831 else
4832 {
4833 int i;
4834
4835 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4836 {
4837 rtx slot = XVECEXP (container, 0, i);
4838 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4839 || INTVAL (XEXP (slot, 1)) != i * 8)
4840 need_temp = 1;
4841 }
4842 }
4843 }
4844 if (!need_temp)
4845 {
4846 int_addr = addr;
4847 sse_addr = addr;
4848 }
4849 else
4850 {
4851 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4852 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4853 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4854 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4855 }
4856
4857 /* First ensure that we fit completely in registers. */
4858 if (needed_intregs)
4859 {
4860 t = build_int_cst (TREE_TYPE (gpr),
4861 (REGPARM_MAX - needed_intregs + 1) * 8);
4862 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4863 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4864 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4865 gimplify_and_add (t, pre_p);
4866 }
4867 if (needed_sseregs)
4868 {
4869 t = build_int_cst (TREE_TYPE (fpr),
4870 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4871 + REGPARM_MAX * 8);
4872 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4873 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4874 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4875 gimplify_and_add (t, pre_p);
4876 }
4877
4878 /* Compute index to start of area used for integer regs. */
4879 if (needed_intregs)
4880 {
4881 /* int_addr = gpr + sav; */
4882 t = fold_convert (ptr_type_node, gpr);
4883 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4884 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4885 gimplify_and_add (t, pre_p);
4886 }
4887 if (needed_sseregs)
4888 {
4889 /* sse_addr = fpr + sav; */
4890 t = fold_convert (ptr_type_node, fpr);
4891 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4892 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4893 gimplify_and_add (t, pre_p);
4894 }
4895 if (need_temp)
4896 {
4897 int i;
4898 tree temp = create_tmp_var (type, "va_arg_tmp");
4899
4900 /* addr = &temp; */
4901 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4902 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4903 gimplify_and_add (t, pre_p);
4904
4905 for (i = 0; i < XVECLEN (container, 0); i++)
4906 {
4907 rtx slot = XVECEXP (container, 0, i);
4908 rtx reg = XEXP (slot, 0);
4909 enum machine_mode mode = GET_MODE (reg);
4910 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4911 tree addr_type = build_pointer_type (piece_type);
4912 tree src_addr, src;
4913 int src_offset;
4914 tree dest_addr, dest;
4915
4916 if (SSE_REGNO_P (REGNO (reg)))
4917 {
4918 src_addr = sse_addr;
4919 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4920 }
4921 else
4922 {
4923 src_addr = int_addr;
4924 src_offset = REGNO (reg) * 8;
4925 }
4926 src_addr = fold_convert (addr_type, src_addr);
4927 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4928 size_int (src_offset));
4929 src = build_va_arg_indirect_ref (src_addr);
4930
4931 dest_addr = fold_convert (addr_type, addr);
4932 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4933 size_int (INTVAL (XEXP (slot, 1))));
4934 dest = build_va_arg_indirect_ref (dest_addr);
4935
4936 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4937 gimplify_and_add (t, pre_p);
4938 }
4939 }
4940
4941 if (needed_intregs)
4942 {
4943 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4944 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4945 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4946 gimplify_and_add (t, pre_p);
4947 }
4948 if (needed_sseregs)
4949 {
4950 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4951 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4952 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4953 gimplify_and_add (t, pre_p);
4954 }
4955
4956 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4957 gimplify_and_add (t, pre_p);
4958
4959 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4960 append_to_statement_list (t, pre_p);
4961 }
4962
4963 /* ... otherwise out of the overflow area. */
4964
4965 /* Care for on-stack alignment if needed. */
4966 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4967 || integer_zerop (TYPE_SIZE (type)))
4968 t = ovf;
4969 else
4970 {
4971 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4972 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4973 build_int_cst (TREE_TYPE (ovf), align - 1));
4974 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4975 build_int_cst (TREE_TYPE (t), -align));
4976 }
4977 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4978
4979 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4980 gimplify_and_add (t2, pre_p);
4981
4982 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4983 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4984 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4985 gimplify_and_add (t, pre_p);
4986
4987 if (container)
4988 {
4989 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4990 append_to_statement_list (t, pre_p);
4991 }
4992
4993 ptrtype = build_pointer_type (type);
4994 addr = fold_convert (ptrtype, addr);
4995
4996 if (indirect_p)
4997 addr = build_va_arg_indirect_ref (addr);
4998 return build_va_arg_indirect_ref (addr);
4999 }
5000 \f
5001 /* Return nonzero if OPNUM's MEM should be matched
5002 in movabs* patterns. */
5003
5004 int
5005 ix86_check_movabs (rtx insn, int opnum)
5006 {
5007 rtx set, mem;
5008
5009 set = PATTERN (insn);
5010 if (GET_CODE (set) == PARALLEL)
5011 set = XVECEXP (set, 0, 0);
5012 gcc_assert (GET_CODE (set) == SET);
5013 mem = XEXP (set, opnum);
5014 while (GET_CODE (mem) == SUBREG)
5015 mem = SUBREG_REG (mem);
5016 gcc_assert (MEM_P (mem));
5017 return (volatile_ok || !MEM_VOLATILE_P (mem));
5018 }
5019 \f
5020 /* Initialize the table of extra 80387 mathematical constants. */
5021
5022 static void
5023 init_ext_80387_constants (void)
5024 {
5025 static const char * cst[5] =
5026 {
5027 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5028 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5029 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5030 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5031 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5032 };
5033 int i;
5034
5035 for (i = 0; i < 5; i++)
5036 {
5037 real_from_string (&ext_80387_constants_table[i], cst[i]);
5038 /* Ensure each constant is rounded to XFmode precision. */
5039 real_convert (&ext_80387_constants_table[i],
5040 XFmode, &ext_80387_constants_table[i]);
5041 }
5042
5043 ext_80387_constants_init = 1;
5044 }
5045
5046 /* Return true if the constant is something that can be loaded with
5047 a special instruction. */
5048
5049 int
5050 standard_80387_constant_p (rtx x)
5051 {
5052 enum machine_mode mode = GET_MODE (x);
5053
5054 REAL_VALUE_TYPE r;
5055
5056 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5057 return -1;
5058
5059 if (x == CONST0_RTX (mode))
5060 return 1;
5061 if (x == CONST1_RTX (mode))
5062 return 2;
5063
5064 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5065
5066 /* For XFmode constants, try to find a special 80387 instruction when
5067 optimizing for size or on those CPUs that benefit from them. */
5068 if (mode == XFmode
5069 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5070 {
5071 int i;
5072
5073 if (! ext_80387_constants_init)
5074 init_ext_80387_constants ();
5075
5076 for (i = 0; i < 5; i++)
5077 if (real_identical (&r, &ext_80387_constants_table[i]))
5078 return i + 3;
5079 }
5080
5081 /* Load of the constant -0.0 or -1.0 will be split as
5082 fldz;fchs or fld1;fchs sequence. */
5083 if (real_isnegzero (&r))
5084 return 8;
5085 if (real_identical (&r, &dconstm1))
5086 return 9;
5087
5088 return 0;
5089 }
5090
5091 /* Return the opcode of the special instruction to be used to load
5092 the constant X. */
5093
5094 const char *
5095 standard_80387_constant_opcode (rtx x)
5096 {
5097 switch (standard_80387_constant_p (x))
5098 {
5099 case 1:
5100 return "fldz";
5101 case 2:
5102 return "fld1";
5103 case 3:
5104 return "fldlg2";
5105 case 4:
5106 return "fldln2";
5107 case 5:
5108 return "fldl2e";
5109 case 6:
5110 return "fldl2t";
5111 case 7:
5112 return "fldpi";
5113 case 8:
5114 case 9:
5115 return "#";
5116 default:
5117 gcc_unreachable ();
5118 }
5119 }
5120
5121 /* Return the CONST_DOUBLE representing the 80387 constant that is
5122 loaded by the specified special instruction. The argument IDX
5123 matches the return value from standard_80387_constant_p. */
5124
5125 rtx
5126 standard_80387_constant_rtx (int idx)
5127 {
5128 int i;
5129
5130 if (! ext_80387_constants_init)
5131 init_ext_80387_constants ();
5132
5133 switch (idx)
5134 {
5135 case 3:
5136 case 4:
5137 case 5:
5138 case 6:
5139 case 7:
5140 i = idx - 3;
5141 break;
5142
5143 default:
5144 gcc_unreachable ();
5145 }
5146
5147 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5148 XFmode);
5149 }
5150
5151 /* Return 1 if mode is a valid mode for sse. */
5152 static int
5153 standard_sse_mode_p (enum machine_mode mode)
5154 {
5155 switch (mode)
5156 {
5157 case V16QImode:
5158 case V8HImode:
5159 case V4SImode:
5160 case V2DImode:
5161 case V4SFmode:
5162 case V2DFmode:
5163 return 1;
5164
5165 default:
5166 return 0;
5167 }
5168 }
5169
5170 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5171 */
5172 int
5173 standard_sse_constant_p (rtx x)
5174 {
5175 enum machine_mode mode = GET_MODE (x);
5176
5177 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5178 return 1;
5179 if (vector_all_ones_operand (x, mode)
5180 && standard_sse_mode_p (mode))
5181 return TARGET_SSE2 ? 2 : -1;
5182
5183 return 0;
5184 }
5185
5186 /* Return the opcode of the special instruction to be used to load
5187 the constant X. */
5188
5189 const char *
5190 standard_sse_constant_opcode (rtx insn, rtx x)
5191 {
5192 switch (standard_sse_constant_p (x))
5193 {
5194 case 1:
5195 if (get_attr_mode (insn) == MODE_V4SF)
5196 return "xorps\t%0, %0";
5197 else if (get_attr_mode (insn) == MODE_V2DF)
5198 return "xorpd\t%0, %0";
5199 else
5200 return "pxor\t%0, %0";
5201 case 2:
5202 return "pcmpeqd\t%0, %0";
5203 }
5204 gcc_unreachable ();
5205 }
5206
5207 /* Returns 1 if OP contains a symbol reference */
5208
5209 int
5210 symbolic_reference_mentioned_p (rtx op)
5211 {
5212 const char *fmt;
5213 int i;
5214
5215 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5216 return 1;
5217
5218 fmt = GET_RTX_FORMAT (GET_CODE (op));
5219 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5220 {
5221 if (fmt[i] == 'E')
5222 {
5223 int j;
5224
5225 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5226 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5227 return 1;
5228 }
5229
5230 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5231 return 1;
5232 }
5233
5234 return 0;
5235 }
5236
5237 /* Return 1 if it is appropriate to emit `ret' instructions in the
5238 body of a function. Do this only if the epilogue is simple, needing a
5239 couple of insns. Prior to reloading, we can't tell how many registers
5240 must be saved, so return 0 then. Return 0 if there is no frame
5241 marker to de-allocate. */
5242
5243 int
5244 ix86_can_use_return_insn_p (void)
5245 {
5246 struct ix86_frame frame;
5247
5248 if (! reload_completed || frame_pointer_needed)
5249 return 0;
5250
5251 /* Don't allow more than 32 pop, since that's all we can do
5252 with one instruction. */
5253 if (current_function_pops_args
5254 && current_function_args_size >= 32768)
5255 return 0;
5256
5257 ix86_compute_frame_layout (&frame);
5258 return frame.to_allocate == 0 && frame.nregs == 0;
5259 }
5260 \f
5261 /* Value should be nonzero if functions must have frame pointers.
5262 Zero means the frame pointer need not be set up (and parms may
5263 be accessed via the stack pointer) in functions that seem suitable. */
5264
5265 int
5266 ix86_frame_pointer_required (void)
5267 {
5268 /* If we accessed previous frames, then the generated code expects
5269 to be able to access the saved ebp value in our frame. */
5270 if (cfun->machine->accesses_prev_frame)
5271 return 1;
5272
5273 /* Several x86 os'es need a frame pointer for other reasons,
5274 usually pertaining to setjmp. */
5275 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5276 return 1;
5277
5278 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5279 the frame pointer by default. Turn it back on now if we've not
5280 got a leaf function. */
5281 if (TARGET_OMIT_LEAF_FRAME_POINTER
5282 && (!current_function_is_leaf
5283 || ix86_current_function_calls_tls_descriptor))
5284 return 1;
5285
5286 if (current_function_profile)
5287 return 1;
5288
5289 return 0;
5290 }
5291
5292 /* Record that the current function accesses previous call frames. */
5293
5294 void
5295 ix86_setup_frame_addresses (void)
5296 {
5297 cfun->machine->accesses_prev_frame = 1;
5298 }
5299 \f
5300 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5301 # define USE_HIDDEN_LINKONCE 1
5302 #else
5303 # define USE_HIDDEN_LINKONCE 0
5304 #endif
5305
5306 static int pic_labels_used;
5307
5308 /* Fills in the label name that should be used for a pc thunk for
5309 the given register. */
5310
5311 static void
5312 get_pc_thunk_name (char name[32], unsigned int regno)
5313 {
5314 gcc_assert (!TARGET_64BIT);
5315
5316 if (USE_HIDDEN_LINKONCE)
5317 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5318 else
5319 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5320 }
5321
5322
5323 /* This function generates code for -fpic that loads %ebx with
5324 the return address of the caller and then returns. */
5325
5326 void
5327 ix86_file_end (void)
5328 {
5329 rtx xops[2];
5330 int regno;
5331
5332 for (regno = 0; regno < 8; ++regno)
5333 {
5334 char name[32];
5335
5336 if (! ((pic_labels_used >> regno) & 1))
5337 continue;
5338
5339 get_pc_thunk_name (name, regno);
5340
5341 #if TARGET_MACHO
5342 if (TARGET_MACHO)
5343 {
5344 switch_to_section (darwin_sections[text_coal_section]);
5345 fputs ("\t.weak_definition\t", asm_out_file);
5346 assemble_name (asm_out_file, name);
5347 fputs ("\n\t.private_extern\t", asm_out_file);
5348 assemble_name (asm_out_file, name);
5349 fputs ("\n", asm_out_file);
5350 ASM_OUTPUT_LABEL (asm_out_file, name);
5351 }
5352 else
5353 #endif
5354 if (USE_HIDDEN_LINKONCE)
5355 {
5356 tree decl;
5357
5358 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5359 error_mark_node);
5360 TREE_PUBLIC (decl) = 1;
5361 TREE_STATIC (decl) = 1;
5362 DECL_ONE_ONLY (decl) = 1;
5363
5364 (*targetm.asm_out.unique_section) (decl, 0);
5365 switch_to_section (get_named_section (decl, NULL, 0));
5366
5367 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5368 fputs ("\t.hidden\t", asm_out_file);
5369 assemble_name (asm_out_file, name);
5370 fputc ('\n', asm_out_file);
5371 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5372 }
5373 else
5374 {
5375 switch_to_section (text_section);
5376 ASM_OUTPUT_LABEL (asm_out_file, name);
5377 }
5378
5379 xops[0] = gen_rtx_REG (SImode, regno);
5380 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5381 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5382 output_asm_insn ("ret", xops);
5383 }
5384
5385 if (NEED_INDICATE_EXEC_STACK)
5386 file_end_indicate_exec_stack ();
5387 }
5388
5389 /* Emit code for the SET_GOT patterns. */
5390
5391 const char *
5392 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5393 {
5394 rtx xops[3];
5395
5396 xops[0] = dest;
5397
5398 if (TARGET_VXWORKS_RTP && flag_pic)
5399 {
5400 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5401 xops[2] = gen_rtx_MEM (Pmode,
5402 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5403 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5404
5405 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5406 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5407 an unadorned address. */
5408 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5409 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5410 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5411 return "";
5412 }
5413
5414 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5415
5416 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5417 {
5418 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5419
5420 if (!flag_pic)
5421 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5422 else
5423 output_asm_insn ("call\t%a2", xops);
5424
5425 #if TARGET_MACHO
5426 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5427 is what will be referenced by the Mach-O PIC subsystem. */
5428 if (!label)
5429 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5430 #endif
5431
5432 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5433 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5434
5435 if (flag_pic)
5436 output_asm_insn ("pop{l}\t%0", xops);
5437 }
5438 else
5439 {
5440 char name[32];
5441 get_pc_thunk_name (name, REGNO (dest));
5442 pic_labels_used |= 1 << REGNO (dest);
5443
5444 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5445 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5446 output_asm_insn ("call\t%X2", xops);
5447 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5448 is what will be referenced by the Mach-O PIC subsystem. */
5449 #if TARGET_MACHO
5450 if (!label)
5451 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5452 else
5453 targetm.asm_out.internal_label (asm_out_file, "L",
5454 CODE_LABEL_NUMBER (label));
5455 #endif
5456 }
5457
5458 if (TARGET_MACHO)
5459 return "";
5460
5461 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5462 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5463 else
5464 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5465
5466 return "";
5467 }
5468
5469 /* Generate an "push" pattern for input ARG. */
5470
5471 static rtx
5472 gen_push (rtx arg)
5473 {
5474 return gen_rtx_SET (VOIDmode,
5475 gen_rtx_MEM (Pmode,
5476 gen_rtx_PRE_DEC (Pmode,
5477 stack_pointer_rtx)),
5478 arg);
5479 }
5480
5481 /* Return >= 0 if there is an unused call-clobbered register available
5482 for the entire function. */
5483
5484 static unsigned int
5485 ix86_select_alt_pic_regnum (void)
5486 {
5487 if (current_function_is_leaf && !current_function_profile
5488 && !ix86_current_function_calls_tls_descriptor)
5489 {
5490 int i;
5491 for (i = 2; i >= 0; --i)
5492 if (!regs_ever_live[i])
5493 return i;
5494 }
5495
5496 return INVALID_REGNUM;
5497 }
5498
5499 /* Return 1 if we need to save REGNO. */
5500 static int
5501 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5502 {
5503 if (pic_offset_table_rtx
5504 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5505 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5506 || current_function_profile
5507 || current_function_calls_eh_return
5508 || current_function_uses_const_pool))
5509 {
5510 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5511 return 0;
5512 return 1;
5513 }
5514
5515 if (current_function_calls_eh_return && maybe_eh_return)
5516 {
5517 unsigned i;
5518 for (i = 0; ; i++)
5519 {
5520 unsigned test = EH_RETURN_DATA_REGNO (i);
5521 if (test == INVALID_REGNUM)
5522 break;
5523 if (test == regno)
5524 return 1;
5525 }
5526 }
5527
5528 if (cfun->machine->force_align_arg_pointer
5529 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5530 return 1;
5531
5532 return (regs_ever_live[regno]
5533 && !call_used_regs[regno]
5534 && !fixed_regs[regno]
5535 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5536 }
5537
5538 /* Return number of registers to be saved on the stack. */
5539
5540 static int
5541 ix86_nsaved_regs (void)
5542 {
5543 int nregs = 0;
5544 int regno;
5545
5546 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5547 if (ix86_save_reg (regno, true))
5548 nregs++;
5549 return nregs;
5550 }
5551
5552 /* Return the offset between two registers, one to be eliminated, and the other
5553 its replacement, at the start of a routine. */
5554
5555 HOST_WIDE_INT
5556 ix86_initial_elimination_offset (int from, int to)
5557 {
5558 struct ix86_frame frame;
5559 ix86_compute_frame_layout (&frame);
5560
5561 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5562 return frame.hard_frame_pointer_offset;
5563 else if (from == FRAME_POINTER_REGNUM
5564 && to == HARD_FRAME_POINTER_REGNUM)
5565 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5566 else
5567 {
5568 gcc_assert (to == STACK_POINTER_REGNUM);
5569
5570 if (from == ARG_POINTER_REGNUM)
5571 return frame.stack_pointer_offset;
5572
5573 gcc_assert (from == FRAME_POINTER_REGNUM);
5574 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5575 }
5576 }
5577
5578 /* Fill structure ix86_frame about frame of currently computed function. */
5579
5580 static void
5581 ix86_compute_frame_layout (struct ix86_frame *frame)
5582 {
5583 HOST_WIDE_INT total_size;
5584 unsigned int stack_alignment_needed;
5585 HOST_WIDE_INT offset;
5586 unsigned int preferred_alignment;
5587 HOST_WIDE_INT size = get_frame_size ();
5588
5589 frame->nregs = ix86_nsaved_regs ();
5590 total_size = size;
5591
5592 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5593 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5594
5595 /* During reload iteration the amount of registers saved can change.
5596 Recompute the value as needed. Do not recompute when amount of registers
5597 didn't change as reload does multiple calls to the function and does not
5598 expect the decision to change within single iteration. */
5599 if (!optimize_size
5600 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5601 {
5602 int count = frame->nregs;
5603
5604 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5605 /* The fast prologue uses move instead of push to save registers. This
5606 is significantly longer, but also executes faster as modern hardware
5607 can execute the moves in parallel, but can't do that for push/pop.
5608
5609 Be careful about choosing what prologue to emit: When function takes
5610 many instructions to execute we may use slow version as well as in
5611 case function is known to be outside hot spot (this is known with
5612 feedback only). Weight the size of function by number of registers
5613 to save as it is cheap to use one or two push instructions but very
5614 slow to use many of them. */
5615 if (count)
5616 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5617 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5618 || (flag_branch_probabilities
5619 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5620 cfun->machine->use_fast_prologue_epilogue = false;
5621 else
5622 cfun->machine->use_fast_prologue_epilogue
5623 = !expensive_function_p (count);
5624 }
5625 if (TARGET_PROLOGUE_USING_MOVE
5626 && cfun->machine->use_fast_prologue_epilogue)
5627 frame->save_regs_using_mov = true;
5628 else
5629 frame->save_regs_using_mov = false;
5630
5631
5632 /* Skip return address and saved base pointer. */
5633 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5634
5635 frame->hard_frame_pointer_offset = offset;
5636
5637 /* Do some sanity checking of stack_alignment_needed and
5638 preferred_alignment, since i386 port is the only using those features
5639 that may break easily. */
5640
5641 gcc_assert (!size || stack_alignment_needed);
5642 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5643 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5644 gcc_assert (stack_alignment_needed
5645 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5646
5647 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5648 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5649
5650 /* Register save area */
5651 offset += frame->nregs * UNITS_PER_WORD;
5652
5653 /* Va-arg area */
5654 if (ix86_save_varrargs_registers)
5655 {
5656 offset += X86_64_VARARGS_SIZE;
5657 frame->va_arg_size = X86_64_VARARGS_SIZE;
5658 }
5659 else
5660 frame->va_arg_size = 0;
5661
5662 /* Align start of frame for local function. */
5663 frame->padding1 = ((offset + stack_alignment_needed - 1)
5664 & -stack_alignment_needed) - offset;
5665
5666 offset += frame->padding1;
5667
5668 /* Frame pointer points here. */
5669 frame->frame_pointer_offset = offset;
5670
5671 offset += size;
5672
5673 /* Add outgoing arguments area. Can be skipped if we eliminated
5674 all the function calls as dead code.
5675 Skipping is however impossible when function calls alloca. Alloca
5676 expander assumes that last current_function_outgoing_args_size
5677 of stack frame are unused. */
5678 if (ACCUMULATE_OUTGOING_ARGS
5679 && (!current_function_is_leaf || current_function_calls_alloca
5680 || ix86_current_function_calls_tls_descriptor))
5681 {
5682 offset += current_function_outgoing_args_size;
5683 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5684 }
5685 else
5686 frame->outgoing_arguments_size = 0;
5687
5688 /* Align stack boundary. Only needed if we're calling another function
5689 or using alloca. */
5690 if (!current_function_is_leaf || current_function_calls_alloca
5691 || ix86_current_function_calls_tls_descriptor)
5692 frame->padding2 = ((offset + preferred_alignment - 1)
5693 & -preferred_alignment) - offset;
5694 else
5695 frame->padding2 = 0;
5696
5697 offset += frame->padding2;
5698
5699 /* We've reached end of stack frame. */
5700 frame->stack_pointer_offset = offset;
5701
5702 /* Size prologue needs to allocate. */
5703 frame->to_allocate =
5704 (size + frame->padding1 + frame->padding2
5705 + frame->outgoing_arguments_size + frame->va_arg_size);
5706
5707 if ((!frame->to_allocate && frame->nregs <= 1)
5708 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5709 frame->save_regs_using_mov = false;
5710
5711 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5712 && current_function_is_leaf
5713 && !ix86_current_function_calls_tls_descriptor)
5714 {
5715 frame->red_zone_size = frame->to_allocate;
5716 if (frame->save_regs_using_mov)
5717 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5718 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5719 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5720 }
5721 else
5722 frame->red_zone_size = 0;
5723 frame->to_allocate -= frame->red_zone_size;
5724 frame->stack_pointer_offset -= frame->red_zone_size;
5725 #if 0
5726 fprintf (stderr, "\n");
5727 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5728 fprintf (stderr, "size: %ld\n", (long)size);
5729 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5730 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5731 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5732 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5733 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5734 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5735 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5736 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5737 (long)frame->hard_frame_pointer_offset);
5738 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5739 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5740 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5741 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5742 #endif
5743 }
5744
5745 /* Emit code to save registers in the prologue. */
5746
5747 static void
5748 ix86_emit_save_regs (void)
5749 {
5750 unsigned int regno;
5751 rtx insn;
5752
5753 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5754 if (ix86_save_reg (regno, true))
5755 {
5756 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5757 RTX_FRAME_RELATED_P (insn) = 1;
5758 }
5759 }
5760
5761 /* Emit code to save registers using MOV insns. First register
5762 is restored from POINTER + OFFSET. */
5763 static void
5764 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5765 {
5766 unsigned int regno;
5767 rtx insn;
5768
5769 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5770 if (ix86_save_reg (regno, true))
5771 {
5772 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5773 Pmode, offset),
5774 gen_rtx_REG (Pmode, regno));
5775 RTX_FRAME_RELATED_P (insn) = 1;
5776 offset += UNITS_PER_WORD;
5777 }
5778 }
5779
5780 /* Expand prologue or epilogue stack adjustment.
5781 The pattern exist to put a dependency on all ebp-based memory accesses.
5782 STYLE should be negative if instructions should be marked as frame related,
5783 zero if %r11 register is live and cannot be freely used and positive
5784 otherwise. */
5785
5786 static void
5787 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5788 {
5789 rtx insn;
5790
5791 if (! TARGET_64BIT)
5792 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5793 else if (x86_64_immediate_operand (offset, DImode))
5794 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5795 else
5796 {
5797 rtx r11;
5798 /* r11 is used by indirect sibcall return as well, set before the
5799 epilogue and used after the epilogue. ATM indirect sibcall
5800 shouldn't be used together with huge frame sizes in one
5801 function because of the frame_size check in sibcall.c. */
5802 gcc_assert (style);
5803 r11 = gen_rtx_REG (DImode, R11_REG);
5804 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5805 if (style < 0)
5806 RTX_FRAME_RELATED_P (insn) = 1;
5807 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5808 offset));
5809 }
5810 if (style < 0)
5811 RTX_FRAME_RELATED_P (insn) = 1;
5812 }
5813
5814 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5815
5816 static rtx
5817 ix86_internal_arg_pointer (void)
5818 {
5819 bool has_force_align_arg_pointer =
5820 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5821 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5822 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5823 && DECL_NAME (current_function_decl)
5824 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5825 && DECL_FILE_SCOPE_P (current_function_decl))
5826 || ix86_force_align_arg_pointer
5827 || has_force_align_arg_pointer)
5828 {
5829 /* Nested functions can't realign the stack due to a register
5830 conflict. */
5831 if (DECL_CONTEXT (current_function_decl)
5832 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5833 {
5834 if (ix86_force_align_arg_pointer)
5835 warning (0, "-mstackrealign ignored for nested functions");
5836 if (has_force_align_arg_pointer)
5837 error ("%s not supported for nested functions",
5838 ix86_force_align_arg_pointer_string);
5839 return virtual_incoming_args_rtx;
5840 }
5841 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5842 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5843 }
5844 else
5845 return virtual_incoming_args_rtx;
5846 }
5847
5848 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5849 This is called from dwarf2out.c to emit call frame instructions
5850 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5851 static void
5852 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5853 {
5854 rtx unspec = SET_SRC (pattern);
5855 gcc_assert (GET_CODE (unspec) == UNSPEC);
5856
5857 switch (index)
5858 {
5859 case UNSPEC_REG_SAVE:
5860 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5861 SET_DEST (pattern));
5862 break;
5863 case UNSPEC_DEF_CFA:
5864 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5865 INTVAL (XVECEXP (unspec, 0, 0)));
5866 break;
5867 default:
5868 gcc_unreachable ();
5869 }
5870 }
5871
5872 /* Expand the prologue into a bunch of separate insns. */
5873
5874 void
5875 ix86_expand_prologue (void)
5876 {
5877 rtx insn;
5878 bool pic_reg_used;
5879 struct ix86_frame frame;
5880 HOST_WIDE_INT allocate;
5881
5882 ix86_compute_frame_layout (&frame);
5883
5884 if (cfun->machine->force_align_arg_pointer)
5885 {
5886 rtx x, y;
5887
5888 /* Grab the argument pointer. */
5889 x = plus_constant (stack_pointer_rtx, 4);
5890 y = cfun->machine->force_align_arg_pointer;
5891 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5892 RTX_FRAME_RELATED_P (insn) = 1;
5893
5894 /* The unwind info consists of two parts: install the fafp as the cfa,
5895 and record the fafp as the "save register" of the stack pointer.
5896 The later is there in order that the unwinder can see where it
5897 should restore the stack pointer across the and insn. */
5898 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5899 x = gen_rtx_SET (VOIDmode, y, x);
5900 RTX_FRAME_RELATED_P (x) = 1;
5901 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5902 UNSPEC_REG_SAVE);
5903 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5904 RTX_FRAME_RELATED_P (y) = 1;
5905 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5906 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5907 REG_NOTES (insn) = x;
5908
5909 /* Align the stack. */
5910 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5911 GEN_INT (-16)));
5912
5913 /* And here we cheat like madmen with the unwind info. We force the
5914 cfa register back to sp+4, which is exactly what it was at the
5915 start of the function. Re-pushing the return address results in
5916 the return at the same spot relative to the cfa, and thus is
5917 correct wrt the unwind info. */
5918 x = cfun->machine->force_align_arg_pointer;
5919 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5920 insn = emit_insn (gen_push (x));
5921 RTX_FRAME_RELATED_P (insn) = 1;
5922
5923 x = GEN_INT (4);
5924 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5925 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5926 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5927 REG_NOTES (insn) = x;
5928 }
5929
5930 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5931 slower on all targets. Also sdb doesn't like it. */
5932
5933 if (frame_pointer_needed)
5934 {
5935 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5936 RTX_FRAME_RELATED_P (insn) = 1;
5937
5938 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5939 RTX_FRAME_RELATED_P (insn) = 1;
5940 }
5941
5942 allocate = frame.to_allocate;
5943
5944 if (!frame.save_regs_using_mov)
5945 ix86_emit_save_regs ();
5946 else
5947 allocate += frame.nregs * UNITS_PER_WORD;
5948
5949 /* When using red zone we may start register saving before allocating
5950 the stack frame saving one cycle of the prologue. */
5951 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5952 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5953 : stack_pointer_rtx,
5954 -frame.nregs * UNITS_PER_WORD);
5955
5956 if (allocate == 0)
5957 ;
5958 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5959 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5960 GEN_INT (-allocate), -1);
5961 else
5962 {
5963 /* Only valid for Win32. */
5964 rtx eax = gen_rtx_REG (Pmode, 0);
5965 bool eax_live;
5966 rtx t;
5967
5968 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
5969
5970 if (TARGET_64BIT_MS_ABI)
5971 eax_live = false;
5972 else
5973 eax_live = ix86_eax_live_at_start_p ();
5974
5975 if (eax_live)
5976 {
5977 emit_insn (gen_push (eax));
5978 allocate -= UNITS_PER_WORD;
5979 }
5980
5981 emit_move_insn (eax, GEN_INT (allocate));
5982
5983 if (TARGET_64BIT)
5984 insn = gen_allocate_stack_worker_64 (eax);
5985 else
5986 insn = gen_allocate_stack_worker_32 (eax);
5987 insn = emit_insn (insn);
5988 RTX_FRAME_RELATED_P (insn) = 1;
5989 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5990 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5991 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5992 t, REG_NOTES (insn));
5993
5994 if (eax_live)
5995 {
5996 if (frame_pointer_needed)
5997 t = plus_constant (hard_frame_pointer_rtx,
5998 allocate
5999 - frame.to_allocate
6000 - frame.nregs * UNITS_PER_WORD);
6001 else
6002 t = plus_constant (stack_pointer_rtx, allocate);
6003 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6004 }
6005 }
6006
6007 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6008 {
6009 if (!frame_pointer_needed || !frame.to_allocate)
6010 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6011 else
6012 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6013 -frame.nregs * UNITS_PER_WORD);
6014 }
6015
6016 pic_reg_used = false;
6017 if (pic_offset_table_rtx
6018 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6019 || current_function_profile))
6020 {
6021 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6022
6023 if (alt_pic_reg_used != INVALID_REGNUM)
6024 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6025
6026 pic_reg_used = true;
6027 }
6028
6029 if (pic_reg_used)
6030 {
6031 if (TARGET_64BIT)
6032 {
6033 if (ix86_cmodel == CM_LARGE_PIC)
6034 {
6035 rtx tmp_reg = gen_rtx_REG (DImode,
6036 FIRST_REX_INT_REG + 3 /* R11 */);
6037 rtx label = gen_label_rtx ();
6038 emit_label (label);
6039 LABEL_PRESERVE_P (label) = 1;
6040 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6041 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6042 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6043 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6044 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6045 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6046 pic_offset_table_rtx, tmp_reg));
6047 }
6048 else
6049 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6050 }
6051 else
6052 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6053
6054 /* Even with accurate pre-reload life analysis, we can wind up
6055 deleting all references to the pic register after reload.
6056 Consider if cross-jumping unifies two sides of a branch
6057 controlled by a comparison vs the only read from a global.
6058 In which case, allow the set_got to be deleted, though we're
6059 too late to do anything about the ebx save in the prologue. */
6060 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6061 }
6062
6063 /* Prevent function calls from be scheduled before the call to mcount.
6064 In the pic_reg_used case, make sure that the got load isn't deleted. */
6065 if (current_function_profile)
6066 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6067 }
6068
6069 /* Emit code to restore saved registers using MOV insns. First register
6070 is restored from POINTER + OFFSET. */
6071 static void
6072 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6073 int maybe_eh_return)
6074 {
6075 int regno;
6076 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6077
6078 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6079 if (ix86_save_reg (regno, maybe_eh_return))
6080 {
6081 /* Ensure that adjust_address won't be forced to produce pointer
6082 out of range allowed by x86-64 instruction set. */
6083 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6084 {
6085 rtx r11;
6086
6087 r11 = gen_rtx_REG (DImode, R11_REG);
6088 emit_move_insn (r11, GEN_INT (offset));
6089 emit_insn (gen_adddi3 (r11, r11, pointer));
6090 base_address = gen_rtx_MEM (Pmode, r11);
6091 offset = 0;
6092 }
6093 emit_move_insn (gen_rtx_REG (Pmode, regno),
6094 adjust_address (base_address, Pmode, offset));
6095 offset += UNITS_PER_WORD;
6096 }
6097 }
6098
6099 /* Restore function stack, frame, and registers. */
6100
6101 void
6102 ix86_expand_epilogue (int style)
6103 {
6104 int regno;
6105 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6106 struct ix86_frame frame;
6107 HOST_WIDE_INT offset;
6108
6109 ix86_compute_frame_layout (&frame);
6110
6111 /* Calculate start of saved registers relative to ebp. Special care
6112 must be taken for the normal return case of a function using
6113 eh_return: the eax and edx registers are marked as saved, but not
6114 restored along this path. */
6115 offset = frame.nregs;
6116 if (current_function_calls_eh_return && style != 2)
6117 offset -= 2;
6118 offset *= -UNITS_PER_WORD;
6119
6120 /* If we're only restoring one register and sp is not valid then
6121 using a move instruction to restore the register since it's
6122 less work than reloading sp and popping the register.
6123
6124 The default code result in stack adjustment using add/lea instruction,
6125 while this code results in LEAVE instruction (or discrete equivalent),
6126 so it is profitable in some other cases as well. Especially when there
6127 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6128 and there is exactly one register to pop. This heuristic may need some
6129 tuning in future. */
6130 if ((!sp_valid && frame.nregs <= 1)
6131 || (TARGET_EPILOGUE_USING_MOVE
6132 && cfun->machine->use_fast_prologue_epilogue
6133 && (frame.nregs > 1 || frame.to_allocate))
6134 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6135 || (frame_pointer_needed && TARGET_USE_LEAVE
6136 && cfun->machine->use_fast_prologue_epilogue
6137 && frame.nregs == 1)
6138 || current_function_calls_eh_return)
6139 {
6140 /* Restore registers. We can use ebp or esp to address the memory
6141 locations. If both are available, default to ebp, since offsets
6142 are known to be small. Only exception is esp pointing directly to the
6143 end of block of saved registers, where we may simplify addressing
6144 mode. */
6145
6146 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6147 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6148 frame.to_allocate, style == 2);
6149 else
6150 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6151 offset, style == 2);
6152
6153 /* eh_return epilogues need %ecx added to the stack pointer. */
6154 if (style == 2)
6155 {
6156 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6157
6158 if (frame_pointer_needed)
6159 {
6160 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6161 tmp = plus_constant (tmp, UNITS_PER_WORD);
6162 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6163
6164 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6165 emit_move_insn (hard_frame_pointer_rtx, tmp);
6166
6167 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6168 const0_rtx, style);
6169 }
6170 else
6171 {
6172 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6173 tmp = plus_constant (tmp, (frame.to_allocate
6174 + frame.nregs * UNITS_PER_WORD));
6175 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6176 }
6177 }
6178 else if (!frame_pointer_needed)
6179 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6180 GEN_INT (frame.to_allocate
6181 + frame.nregs * UNITS_PER_WORD),
6182 style);
6183 /* If not an i386, mov & pop is faster than "leave". */
6184 else if (TARGET_USE_LEAVE || optimize_size
6185 || !cfun->machine->use_fast_prologue_epilogue)
6186 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6187 else
6188 {
6189 pro_epilogue_adjust_stack (stack_pointer_rtx,
6190 hard_frame_pointer_rtx,
6191 const0_rtx, style);
6192 if (TARGET_64BIT)
6193 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6194 else
6195 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6196 }
6197 }
6198 else
6199 {
6200 /* First step is to deallocate the stack frame so that we can
6201 pop the registers. */
6202 if (!sp_valid)
6203 {
6204 gcc_assert (frame_pointer_needed);
6205 pro_epilogue_adjust_stack (stack_pointer_rtx,
6206 hard_frame_pointer_rtx,
6207 GEN_INT (offset), style);
6208 }
6209 else if (frame.to_allocate)
6210 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6211 GEN_INT (frame.to_allocate), style);
6212
6213 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6214 if (ix86_save_reg (regno, false))
6215 {
6216 if (TARGET_64BIT)
6217 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6218 else
6219 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6220 }
6221 if (frame_pointer_needed)
6222 {
6223 /* Leave results in shorter dependency chains on CPUs that are
6224 able to grok it fast. */
6225 if (TARGET_USE_LEAVE)
6226 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6227 else if (TARGET_64BIT)
6228 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6229 else
6230 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6231 }
6232 }
6233
6234 if (cfun->machine->force_align_arg_pointer)
6235 {
6236 emit_insn (gen_addsi3 (stack_pointer_rtx,
6237 cfun->machine->force_align_arg_pointer,
6238 GEN_INT (-4)));
6239 }
6240
6241 /* Sibcall epilogues don't want a return instruction. */
6242 if (style == 0)
6243 return;
6244
6245 if (current_function_pops_args && current_function_args_size)
6246 {
6247 rtx popc = GEN_INT (current_function_pops_args);
6248
6249 /* i386 can only pop 64K bytes. If asked to pop more, pop
6250 return address, do explicit add, and jump indirectly to the
6251 caller. */
6252
6253 if (current_function_pops_args >= 65536)
6254 {
6255 rtx ecx = gen_rtx_REG (SImode, 2);
6256
6257 /* There is no "pascal" calling convention in any 64bit ABI. */
6258 gcc_assert (!TARGET_64BIT);
6259
6260 emit_insn (gen_popsi1 (ecx));
6261 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6262 emit_jump_insn (gen_return_indirect_internal (ecx));
6263 }
6264 else
6265 emit_jump_insn (gen_return_pop_internal (popc));
6266 }
6267 else
6268 emit_jump_insn (gen_return_internal ());
6269 }
6270
6271 /* Reset from the function's potential modifications. */
6272
6273 static void
6274 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6275 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6276 {
6277 if (pic_offset_table_rtx)
6278 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6279 #if TARGET_MACHO
6280 /* Mach-O doesn't support labels at the end of objects, so if
6281 it looks like we might want one, insert a NOP. */
6282 {
6283 rtx insn = get_last_insn ();
6284 while (insn
6285 && NOTE_P (insn)
6286 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6287 insn = PREV_INSN (insn);
6288 if (insn
6289 && (LABEL_P (insn)
6290 || (NOTE_P (insn)
6291 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6292 fputs ("\tnop\n", file);
6293 }
6294 #endif
6295
6296 }
6297 \f
6298 /* Extract the parts of an RTL expression that is a valid memory address
6299 for an instruction. Return 0 if the structure of the address is
6300 grossly off. Return -1 if the address contains ASHIFT, so it is not
6301 strictly valid, but still used for computing length of lea instruction. */
6302
6303 int
6304 ix86_decompose_address (rtx addr, struct ix86_address *out)
6305 {
6306 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6307 rtx base_reg, index_reg;
6308 HOST_WIDE_INT scale = 1;
6309 rtx scale_rtx = NULL_RTX;
6310 int retval = 1;
6311 enum ix86_address_seg seg = SEG_DEFAULT;
6312
6313 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6314 base = addr;
6315 else if (GET_CODE (addr) == PLUS)
6316 {
6317 rtx addends[4], op;
6318 int n = 0, i;
6319
6320 op = addr;
6321 do
6322 {
6323 if (n >= 4)
6324 return 0;
6325 addends[n++] = XEXP (op, 1);
6326 op = XEXP (op, 0);
6327 }
6328 while (GET_CODE (op) == PLUS);
6329 if (n >= 4)
6330 return 0;
6331 addends[n] = op;
6332
6333 for (i = n; i >= 0; --i)
6334 {
6335 op = addends[i];
6336 switch (GET_CODE (op))
6337 {
6338 case MULT:
6339 if (index)
6340 return 0;
6341 index = XEXP (op, 0);
6342 scale_rtx = XEXP (op, 1);
6343 break;
6344
6345 case UNSPEC:
6346 if (XINT (op, 1) == UNSPEC_TP
6347 && TARGET_TLS_DIRECT_SEG_REFS
6348 && seg == SEG_DEFAULT)
6349 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6350 else
6351 return 0;
6352 break;
6353
6354 case REG:
6355 case SUBREG:
6356 if (!base)
6357 base = op;
6358 else if (!index)
6359 index = op;
6360 else
6361 return 0;
6362 break;
6363
6364 case CONST:
6365 case CONST_INT:
6366 case SYMBOL_REF:
6367 case LABEL_REF:
6368 if (disp)
6369 return 0;
6370 disp = op;
6371 break;
6372
6373 default:
6374 return 0;
6375 }
6376 }
6377 }
6378 else if (GET_CODE (addr) == MULT)
6379 {
6380 index = XEXP (addr, 0); /* index*scale */
6381 scale_rtx = XEXP (addr, 1);
6382 }
6383 else if (GET_CODE (addr) == ASHIFT)
6384 {
6385 rtx tmp;
6386
6387 /* We're called for lea too, which implements ashift on occasion. */
6388 index = XEXP (addr, 0);
6389 tmp = XEXP (addr, 1);
6390 if (!CONST_INT_P (tmp))
6391 return 0;
6392 scale = INTVAL (tmp);
6393 if ((unsigned HOST_WIDE_INT) scale > 3)
6394 return 0;
6395 scale = 1 << scale;
6396 retval = -1;
6397 }
6398 else
6399 disp = addr; /* displacement */
6400
6401 /* Extract the integral value of scale. */
6402 if (scale_rtx)
6403 {
6404 if (!CONST_INT_P (scale_rtx))
6405 return 0;
6406 scale = INTVAL (scale_rtx);
6407 }
6408
6409 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6410 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6411
6412 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6413 if (base_reg && index_reg && scale == 1
6414 && (index_reg == arg_pointer_rtx
6415 || index_reg == frame_pointer_rtx
6416 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6417 {
6418 rtx tmp;
6419 tmp = base, base = index, index = tmp;
6420 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6421 }
6422
6423 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6424 if ((base_reg == hard_frame_pointer_rtx
6425 || base_reg == frame_pointer_rtx
6426 || base_reg == arg_pointer_rtx) && !disp)
6427 disp = const0_rtx;
6428
6429 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6430 Avoid this by transforming to [%esi+0]. */
6431 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6432 && base_reg && !index_reg && !disp
6433 && REG_P (base_reg)
6434 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6435 disp = const0_rtx;
6436
6437 /* Special case: encode reg+reg instead of reg*2. */
6438 if (!base && index && scale && scale == 2)
6439 base = index, base_reg = index_reg, scale = 1;
6440
6441 /* Special case: scaling cannot be encoded without base or displacement. */
6442 if (!base && !disp && index && scale != 1)
6443 disp = const0_rtx;
6444
6445 out->base = base;
6446 out->index = index;
6447 out->disp = disp;
6448 out->scale = scale;
6449 out->seg = seg;
6450
6451 return retval;
6452 }
6453 \f
6454 /* Return cost of the memory address x.
6455 For i386, it is better to use a complex address than let gcc copy
6456 the address into a reg and make a new pseudo. But not if the address
6457 requires to two regs - that would mean more pseudos with longer
6458 lifetimes. */
6459 static int
6460 ix86_address_cost (rtx x)
6461 {
6462 struct ix86_address parts;
6463 int cost = 1;
6464 int ok = ix86_decompose_address (x, &parts);
6465
6466 gcc_assert (ok);
6467
6468 if (parts.base && GET_CODE (parts.base) == SUBREG)
6469 parts.base = SUBREG_REG (parts.base);
6470 if (parts.index && GET_CODE (parts.index) == SUBREG)
6471 parts.index = SUBREG_REG (parts.index);
6472
6473 /* More complex memory references are better. */
6474 if (parts.disp && parts.disp != const0_rtx)
6475 cost--;
6476 if (parts.seg != SEG_DEFAULT)
6477 cost--;
6478
6479 /* Attempt to minimize number of registers in the address. */
6480 if ((parts.base
6481 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6482 || (parts.index
6483 && (!REG_P (parts.index)
6484 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6485 cost++;
6486
6487 if (parts.base
6488 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6489 && parts.index
6490 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6491 && parts.base != parts.index)
6492 cost++;
6493
6494 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6495 since it's predecode logic can't detect the length of instructions
6496 and it degenerates to vector decoded. Increase cost of such
6497 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6498 to split such addresses or even refuse such addresses at all.
6499
6500 Following addressing modes are affected:
6501 [base+scale*index]
6502 [scale*index+disp]
6503 [base+index]
6504
6505 The first and last case may be avoidable by explicitly coding the zero in
6506 memory address, but I don't have AMD-K6 machine handy to check this
6507 theory. */
6508
6509 if (TARGET_K6
6510 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6511 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6512 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6513 cost += 10;
6514
6515 return cost;
6516 }
6517 \f
6518 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6519 this is used for to form addresses to local data when -fPIC is in
6520 use. */
6521
6522 static bool
6523 darwin_local_data_pic (rtx disp)
6524 {
6525 if (GET_CODE (disp) == MINUS)
6526 {
6527 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6528 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6529 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6530 {
6531 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6532 if (! strcmp (sym_name, "<pic base>"))
6533 return true;
6534 }
6535 }
6536
6537 return false;
6538 }
6539
6540 /* Determine if a given RTX is a valid constant. We already know this
6541 satisfies CONSTANT_P. */
6542
6543 bool
6544 legitimate_constant_p (rtx x)
6545 {
6546 switch (GET_CODE (x))
6547 {
6548 case CONST:
6549 x = XEXP (x, 0);
6550
6551 if (GET_CODE (x) == PLUS)
6552 {
6553 if (!CONST_INT_P (XEXP (x, 1)))
6554 return false;
6555 x = XEXP (x, 0);
6556 }
6557
6558 if (TARGET_MACHO && darwin_local_data_pic (x))
6559 return true;
6560
6561 /* Only some unspecs are valid as "constants". */
6562 if (GET_CODE (x) == UNSPEC)
6563 switch (XINT (x, 1))
6564 {
6565 case UNSPEC_GOT:
6566 case UNSPEC_GOTOFF:
6567 case UNSPEC_PLTOFF:
6568 return TARGET_64BIT;
6569 case UNSPEC_TPOFF:
6570 case UNSPEC_NTPOFF:
6571 x = XVECEXP (x, 0, 0);
6572 return (GET_CODE (x) == SYMBOL_REF
6573 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6574 case UNSPEC_DTPOFF:
6575 x = XVECEXP (x, 0, 0);
6576 return (GET_CODE (x) == SYMBOL_REF
6577 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6578 default:
6579 return false;
6580 }
6581
6582 /* We must have drilled down to a symbol. */
6583 if (GET_CODE (x) == LABEL_REF)
6584 return true;
6585 if (GET_CODE (x) != SYMBOL_REF)
6586 return false;
6587 /* FALLTHRU */
6588
6589 case SYMBOL_REF:
6590 /* TLS symbols are never valid. */
6591 if (SYMBOL_REF_TLS_MODEL (x))
6592 return false;
6593
6594 /* DLLIMPORT symbols are never valid. */
6595 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6596 && SYMBOL_REF_DLLIMPORT_P (x))
6597 return false;
6598 break;
6599
6600 case CONST_DOUBLE:
6601 if (GET_MODE (x) == TImode
6602 && x != CONST0_RTX (TImode)
6603 && !TARGET_64BIT)
6604 return false;
6605 break;
6606
6607 case CONST_VECTOR:
6608 if (x == CONST0_RTX (GET_MODE (x)))
6609 return true;
6610 return false;
6611
6612 default:
6613 break;
6614 }
6615
6616 /* Otherwise we handle everything else in the move patterns. */
6617 return true;
6618 }
6619
6620 /* Determine if it's legal to put X into the constant pool. This
6621 is not possible for the address of thread-local symbols, which
6622 is checked above. */
6623
6624 static bool
6625 ix86_cannot_force_const_mem (rtx x)
6626 {
6627 /* We can always put integral constants and vectors in memory. */
6628 switch (GET_CODE (x))
6629 {
6630 case CONST_INT:
6631 case CONST_DOUBLE:
6632 case CONST_VECTOR:
6633 return false;
6634
6635 default:
6636 break;
6637 }
6638 return !legitimate_constant_p (x);
6639 }
6640
6641 /* Determine if a given RTX is a valid constant address. */
6642
6643 bool
6644 constant_address_p (rtx x)
6645 {
6646 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6647 }
6648
6649 /* Nonzero if the constant value X is a legitimate general operand
6650 when generating PIC code. It is given that flag_pic is on and
6651 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6652
6653 bool
6654 legitimate_pic_operand_p (rtx x)
6655 {
6656 rtx inner;
6657
6658 switch (GET_CODE (x))
6659 {
6660 case CONST:
6661 inner = XEXP (x, 0);
6662 if (GET_CODE (inner) == PLUS
6663 && CONST_INT_P (XEXP (inner, 1)))
6664 inner = XEXP (inner, 0);
6665
6666 /* Only some unspecs are valid as "constants". */
6667 if (GET_CODE (inner) == UNSPEC)
6668 switch (XINT (inner, 1))
6669 {
6670 case UNSPEC_GOT:
6671 case UNSPEC_GOTOFF:
6672 case UNSPEC_PLTOFF:
6673 return TARGET_64BIT;
6674 case UNSPEC_TPOFF:
6675 x = XVECEXP (inner, 0, 0);
6676 return (GET_CODE (x) == SYMBOL_REF
6677 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6678 default:
6679 return false;
6680 }
6681 /* FALLTHRU */
6682
6683 case SYMBOL_REF:
6684 case LABEL_REF:
6685 return legitimate_pic_address_disp_p (x);
6686
6687 default:
6688 return true;
6689 }
6690 }
6691
6692 /* Determine if a given CONST RTX is a valid memory displacement
6693 in PIC mode. */
6694
6695 int
6696 legitimate_pic_address_disp_p (rtx disp)
6697 {
6698 bool saw_plus;
6699
6700 /* In 64bit mode we can allow direct addresses of symbols and labels
6701 when they are not dynamic symbols. */
6702 if (TARGET_64BIT)
6703 {
6704 rtx op0 = disp, op1;
6705
6706 switch (GET_CODE (disp))
6707 {
6708 case LABEL_REF:
6709 return true;
6710
6711 case CONST:
6712 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6713 break;
6714 op0 = XEXP (XEXP (disp, 0), 0);
6715 op1 = XEXP (XEXP (disp, 0), 1);
6716 if (!CONST_INT_P (op1)
6717 || INTVAL (op1) >= 16*1024*1024
6718 || INTVAL (op1) < -16*1024*1024)
6719 break;
6720 if (GET_CODE (op0) == LABEL_REF)
6721 return true;
6722 if (GET_CODE (op0) != SYMBOL_REF)
6723 break;
6724 /* FALLTHRU */
6725
6726 case SYMBOL_REF:
6727 /* TLS references should always be enclosed in UNSPEC. */
6728 if (SYMBOL_REF_TLS_MODEL (op0))
6729 return false;
6730 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6731 && ix86_cmodel != CM_LARGE_PIC)
6732 return true;
6733 break;
6734
6735 default:
6736 break;
6737 }
6738 }
6739 if (GET_CODE (disp) != CONST)
6740 return 0;
6741 disp = XEXP (disp, 0);
6742
6743 if (TARGET_64BIT)
6744 {
6745 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6746 of GOT tables. We should not need these anyway. */
6747 if (GET_CODE (disp) != UNSPEC
6748 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6749 && XINT (disp, 1) != UNSPEC_GOTOFF
6750 && XINT (disp, 1) != UNSPEC_PLTOFF))
6751 return 0;
6752
6753 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6754 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6755 return 0;
6756 return 1;
6757 }
6758
6759 saw_plus = false;
6760 if (GET_CODE (disp) == PLUS)
6761 {
6762 if (!CONST_INT_P (XEXP (disp, 1)))
6763 return 0;
6764 disp = XEXP (disp, 0);
6765 saw_plus = true;
6766 }
6767
6768 if (TARGET_MACHO && darwin_local_data_pic (disp))
6769 return 1;
6770
6771 if (GET_CODE (disp) != UNSPEC)
6772 return 0;
6773
6774 switch (XINT (disp, 1))
6775 {
6776 case UNSPEC_GOT:
6777 if (saw_plus)
6778 return false;
6779 /* We need to check for both symbols and labels because VxWorks loads
6780 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6781 details. */
6782 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6783 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6784 case UNSPEC_GOTOFF:
6785 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6786 While ABI specify also 32bit relocation but we don't produce it in
6787 small PIC model at all. */
6788 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6789 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6790 && !TARGET_64BIT)
6791 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6792 return false;
6793 case UNSPEC_GOTTPOFF:
6794 case UNSPEC_GOTNTPOFF:
6795 case UNSPEC_INDNTPOFF:
6796 if (saw_plus)
6797 return false;
6798 disp = XVECEXP (disp, 0, 0);
6799 return (GET_CODE (disp) == SYMBOL_REF
6800 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6801 case UNSPEC_NTPOFF:
6802 disp = XVECEXP (disp, 0, 0);
6803 return (GET_CODE (disp) == SYMBOL_REF
6804 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6805 case UNSPEC_DTPOFF:
6806 disp = XVECEXP (disp, 0, 0);
6807 return (GET_CODE (disp) == SYMBOL_REF
6808 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6809 }
6810
6811 return 0;
6812 }
6813
6814 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6815 memory address for an instruction. The MODE argument is the machine mode
6816 for the MEM expression that wants to use this address.
6817
6818 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6819 convert common non-canonical forms to canonical form so that they will
6820 be recognized. */
6821
6822 int
6823 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6824 rtx addr, int strict)
6825 {
6826 struct ix86_address parts;
6827 rtx base, index, disp;
6828 HOST_WIDE_INT scale;
6829 const char *reason = NULL;
6830 rtx reason_rtx = NULL_RTX;
6831
6832 if (ix86_decompose_address (addr, &parts) <= 0)
6833 {
6834 reason = "decomposition failed";
6835 goto report_error;
6836 }
6837
6838 base = parts.base;
6839 index = parts.index;
6840 disp = parts.disp;
6841 scale = parts.scale;
6842
6843 /* Validate base register.
6844
6845 Don't allow SUBREG's that span more than a word here. It can lead to spill
6846 failures when the base is one word out of a two word structure, which is
6847 represented internally as a DImode int. */
6848
6849 if (base)
6850 {
6851 rtx reg;
6852 reason_rtx = base;
6853
6854 if (REG_P (base))
6855 reg = base;
6856 else if (GET_CODE (base) == SUBREG
6857 && REG_P (SUBREG_REG (base))
6858 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6859 <= UNITS_PER_WORD)
6860 reg = SUBREG_REG (base);
6861 else
6862 {
6863 reason = "base is not a register";
6864 goto report_error;
6865 }
6866
6867 if (GET_MODE (base) != Pmode)
6868 {
6869 reason = "base is not in Pmode";
6870 goto report_error;
6871 }
6872
6873 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6874 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6875 {
6876 reason = "base is not valid";
6877 goto report_error;
6878 }
6879 }
6880
6881 /* Validate index register.
6882
6883 Don't allow SUBREG's that span more than a word here -- same as above. */
6884
6885 if (index)
6886 {
6887 rtx reg;
6888 reason_rtx = index;
6889
6890 if (REG_P (index))
6891 reg = index;
6892 else if (GET_CODE (index) == SUBREG
6893 && REG_P (SUBREG_REG (index))
6894 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6895 <= UNITS_PER_WORD)
6896 reg = SUBREG_REG (index);
6897 else
6898 {
6899 reason = "index is not a register";
6900 goto report_error;
6901 }
6902
6903 if (GET_MODE (index) != Pmode)
6904 {
6905 reason = "index is not in Pmode";
6906 goto report_error;
6907 }
6908
6909 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6910 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6911 {
6912 reason = "index is not valid";
6913 goto report_error;
6914 }
6915 }
6916
6917 /* Validate scale factor. */
6918 if (scale != 1)
6919 {
6920 reason_rtx = GEN_INT (scale);
6921 if (!index)
6922 {
6923 reason = "scale without index";
6924 goto report_error;
6925 }
6926
6927 if (scale != 2 && scale != 4 && scale != 8)
6928 {
6929 reason = "scale is not a valid multiplier";
6930 goto report_error;
6931 }
6932 }
6933
6934 /* Validate displacement. */
6935 if (disp)
6936 {
6937 reason_rtx = disp;
6938
6939 if (GET_CODE (disp) == CONST
6940 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6941 switch (XINT (XEXP (disp, 0), 1))
6942 {
6943 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6944 used. While ABI specify also 32bit relocations, we don't produce
6945 them at all and use IP relative instead. */
6946 case UNSPEC_GOT:
6947 case UNSPEC_GOTOFF:
6948 gcc_assert (flag_pic);
6949 if (!TARGET_64BIT)
6950 goto is_legitimate_pic;
6951 reason = "64bit address unspec";
6952 goto report_error;
6953
6954 case UNSPEC_GOTPCREL:
6955 gcc_assert (flag_pic);
6956 goto is_legitimate_pic;
6957
6958 case UNSPEC_GOTTPOFF:
6959 case UNSPEC_GOTNTPOFF:
6960 case UNSPEC_INDNTPOFF:
6961 case UNSPEC_NTPOFF:
6962 case UNSPEC_DTPOFF:
6963 break;
6964
6965 default:
6966 reason = "invalid address unspec";
6967 goto report_error;
6968 }
6969
6970 else if (SYMBOLIC_CONST (disp)
6971 && (flag_pic
6972 || (TARGET_MACHO
6973 #if TARGET_MACHO
6974 && MACHOPIC_INDIRECT
6975 && !machopic_operand_p (disp)
6976 #endif
6977 )))
6978 {
6979
6980 is_legitimate_pic:
6981 if (TARGET_64BIT && (index || base))
6982 {
6983 /* foo@dtpoff(%rX) is ok. */
6984 if (GET_CODE (disp) != CONST
6985 || GET_CODE (XEXP (disp, 0)) != PLUS
6986 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6987 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6988 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6989 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6990 {
6991 reason = "non-constant pic memory reference";
6992 goto report_error;
6993 }
6994 }
6995 else if (! legitimate_pic_address_disp_p (disp))
6996 {
6997 reason = "displacement is an invalid pic construct";
6998 goto report_error;
6999 }
7000
7001 /* This code used to verify that a symbolic pic displacement
7002 includes the pic_offset_table_rtx register.
7003
7004 While this is good idea, unfortunately these constructs may
7005 be created by "adds using lea" optimization for incorrect
7006 code like:
7007
7008 int a;
7009 int foo(int i)
7010 {
7011 return *(&a+i);
7012 }
7013
7014 This code is nonsensical, but results in addressing
7015 GOT table with pic_offset_table_rtx base. We can't
7016 just refuse it easily, since it gets matched by
7017 "addsi3" pattern, that later gets split to lea in the
7018 case output register differs from input. While this
7019 can be handled by separate addsi pattern for this case
7020 that never results in lea, this seems to be easier and
7021 correct fix for crash to disable this test. */
7022 }
7023 else if (GET_CODE (disp) != LABEL_REF
7024 && !CONST_INT_P (disp)
7025 && (GET_CODE (disp) != CONST
7026 || !legitimate_constant_p (disp))
7027 && (GET_CODE (disp) != SYMBOL_REF
7028 || !legitimate_constant_p (disp)))
7029 {
7030 reason = "displacement is not constant";
7031 goto report_error;
7032 }
7033 else if (TARGET_64BIT
7034 && !x86_64_immediate_operand (disp, VOIDmode))
7035 {
7036 reason = "displacement is out of range";
7037 goto report_error;
7038 }
7039 }
7040
7041 /* Everything looks valid. */
7042 return TRUE;
7043
7044 report_error:
7045 return FALSE;
7046 }
7047 \f
7048 /* Return a unique alias set for the GOT. */
7049
7050 static HOST_WIDE_INT
7051 ix86_GOT_alias_set (void)
7052 {
7053 static HOST_WIDE_INT set = -1;
7054 if (set == -1)
7055 set = new_alias_set ();
7056 return set;
7057 }
7058
7059 /* Return a legitimate reference for ORIG (an address) using the
7060 register REG. If REG is 0, a new pseudo is generated.
7061
7062 There are two types of references that must be handled:
7063
7064 1. Global data references must load the address from the GOT, via
7065 the PIC reg. An insn is emitted to do this load, and the reg is
7066 returned.
7067
7068 2. Static data references, constant pool addresses, and code labels
7069 compute the address as an offset from the GOT, whose base is in
7070 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7071 differentiate them from global data objects. The returned
7072 address is the PIC reg + an unspec constant.
7073
7074 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7075 reg also appears in the address. */
7076
7077 static rtx
7078 legitimize_pic_address (rtx orig, rtx reg)
7079 {
7080 rtx addr = orig;
7081 rtx new = orig;
7082 rtx base;
7083
7084 #if TARGET_MACHO
7085 if (TARGET_MACHO && !TARGET_64BIT)
7086 {
7087 if (reg == 0)
7088 reg = gen_reg_rtx (Pmode);
7089 /* Use the generic Mach-O PIC machinery. */
7090 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7091 }
7092 #endif
7093
7094 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7095 new = addr;
7096 else if (TARGET_64BIT
7097 && ix86_cmodel != CM_SMALL_PIC
7098 && gotoff_operand (addr, Pmode))
7099 {
7100 rtx tmpreg;
7101 /* This symbol may be referenced via a displacement from the PIC
7102 base address (@GOTOFF). */
7103
7104 if (reload_in_progress)
7105 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7106 if (GET_CODE (addr) == CONST)
7107 addr = XEXP (addr, 0);
7108 if (GET_CODE (addr) == PLUS)
7109 {
7110 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7111 UNSPEC_GOTOFF);
7112 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7113 }
7114 else
7115 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7116 new = gen_rtx_CONST (Pmode, new);
7117 if (!reg)
7118 tmpreg = gen_reg_rtx (Pmode);
7119 else
7120 tmpreg = reg;
7121 emit_move_insn (tmpreg, new);
7122
7123 if (reg != 0)
7124 {
7125 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7126 tmpreg, 1, OPTAB_DIRECT);
7127 new = reg;
7128 }
7129 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7130 }
7131 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7132 {
7133 /* This symbol may be referenced via a displacement from the PIC
7134 base address (@GOTOFF). */
7135
7136 if (reload_in_progress)
7137 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7138 if (GET_CODE (addr) == CONST)
7139 addr = XEXP (addr, 0);
7140 if (GET_CODE (addr) == PLUS)
7141 {
7142 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7143 UNSPEC_GOTOFF);
7144 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7145 }
7146 else
7147 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7148 new = gen_rtx_CONST (Pmode, new);
7149 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7150
7151 if (reg != 0)
7152 {
7153 emit_move_insn (reg, new);
7154 new = reg;
7155 }
7156 }
7157 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7158 /* We can't use @GOTOFF for text labels on VxWorks;
7159 see gotoff_operand. */
7160 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7161 {
7162 /* Given that we've already handled dllimport variables separately
7163 in legitimize_address, and all other variables should satisfy
7164 legitimate_pic_address_disp_p, we should never arrive here. */
7165 gcc_assert (!TARGET_64BIT_MS_ABI);
7166
7167 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7168 {
7169 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7170 new = gen_rtx_CONST (Pmode, new);
7171 new = gen_const_mem (Pmode, new);
7172 set_mem_alias_set (new, ix86_GOT_alias_set ());
7173
7174 if (reg == 0)
7175 reg = gen_reg_rtx (Pmode);
7176 /* Use directly gen_movsi, otherwise the address is loaded
7177 into register for CSE. We don't want to CSE this addresses,
7178 instead we CSE addresses from the GOT table, so skip this. */
7179 emit_insn (gen_movsi (reg, new));
7180 new = reg;
7181 }
7182 else
7183 {
7184 /* This symbol must be referenced via a load from the
7185 Global Offset Table (@GOT). */
7186
7187 if (reload_in_progress)
7188 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7189 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7190 new = gen_rtx_CONST (Pmode, new);
7191 if (TARGET_64BIT)
7192 new = force_reg (Pmode, new);
7193 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7194 new = gen_const_mem (Pmode, new);
7195 set_mem_alias_set (new, ix86_GOT_alias_set ());
7196
7197 if (reg == 0)
7198 reg = gen_reg_rtx (Pmode);
7199 emit_move_insn (reg, new);
7200 new = reg;
7201 }
7202 }
7203 else
7204 {
7205 if (CONST_INT_P (addr)
7206 && !x86_64_immediate_operand (addr, VOIDmode))
7207 {
7208 if (reg)
7209 {
7210 emit_move_insn (reg, addr);
7211 new = reg;
7212 }
7213 else
7214 new = force_reg (Pmode, addr);
7215 }
7216 else if (GET_CODE (addr) == CONST)
7217 {
7218 addr = XEXP (addr, 0);
7219
7220 /* We must match stuff we generate before. Assume the only
7221 unspecs that can get here are ours. Not that we could do
7222 anything with them anyway.... */
7223 if (GET_CODE (addr) == UNSPEC
7224 || (GET_CODE (addr) == PLUS
7225 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7226 return orig;
7227 gcc_assert (GET_CODE (addr) == PLUS);
7228 }
7229 if (GET_CODE (addr) == PLUS)
7230 {
7231 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7232
7233 /* Check first to see if this is a constant offset from a @GOTOFF
7234 symbol reference. */
7235 if (gotoff_operand (op0, Pmode)
7236 && CONST_INT_P (op1))
7237 {
7238 if (!TARGET_64BIT)
7239 {
7240 if (reload_in_progress)
7241 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7242 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7243 UNSPEC_GOTOFF);
7244 new = gen_rtx_PLUS (Pmode, new, op1);
7245 new = gen_rtx_CONST (Pmode, new);
7246 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7247
7248 if (reg != 0)
7249 {
7250 emit_move_insn (reg, new);
7251 new = reg;
7252 }
7253 }
7254 else
7255 {
7256 if (INTVAL (op1) < -16*1024*1024
7257 || INTVAL (op1) >= 16*1024*1024)
7258 {
7259 if (!x86_64_immediate_operand (op1, Pmode))
7260 op1 = force_reg (Pmode, op1);
7261 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7262 }
7263 }
7264 }
7265 else
7266 {
7267 base = legitimize_pic_address (XEXP (addr, 0), reg);
7268 new = legitimize_pic_address (XEXP (addr, 1),
7269 base == reg ? NULL_RTX : reg);
7270
7271 if (CONST_INT_P (new))
7272 new = plus_constant (base, INTVAL (new));
7273 else
7274 {
7275 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7276 {
7277 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7278 new = XEXP (new, 1);
7279 }
7280 new = gen_rtx_PLUS (Pmode, base, new);
7281 }
7282 }
7283 }
7284 }
7285 return new;
7286 }
7287 \f
7288 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7289
7290 static rtx
7291 get_thread_pointer (int to_reg)
7292 {
7293 rtx tp, reg, insn;
7294
7295 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7296 if (!to_reg)
7297 return tp;
7298
7299 reg = gen_reg_rtx (Pmode);
7300 insn = gen_rtx_SET (VOIDmode, reg, tp);
7301 insn = emit_insn (insn);
7302
7303 return reg;
7304 }
7305
7306 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7307 false if we expect this to be used for a memory address and true if
7308 we expect to load the address into a register. */
7309
7310 static rtx
7311 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7312 {
7313 rtx dest, base, off, pic, tp;
7314 int type;
7315
7316 switch (model)
7317 {
7318 case TLS_MODEL_GLOBAL_DYNAMIC:
7319 dest = gen_reg_rtx (Pmode);
7320 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7321
7322 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7323 {
7324 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7325
7326 start_sequence ();
7327 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7328 insns = get_insns ();
7329 end_sequence ();
7330
7331 CONST_OR_PURE_CALL_P (insns) = 1;
7332 emit_libcall_block (insns, dest, rax, x);
7333 }
7334 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7335 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7336 else
7337 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7338
7339 if (TARGET_GNU2_TLS)
7340 {
7341 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7342
7343 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7344 }
7345 break;
7346
7347 case TLS_MODEL_LOCAL_DYNAMIC:
7348 base = gen_reg_rtx (Pmode);
7349 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7350
7351 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7352 {
7353 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7354
7355 start_sequence ();
7356 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7357 insns = get_insns ();
7358 end_sequence ();
7359
7360 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7361 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7362 CONST_OR_PURE_CALL_P (insns) = 1;
7363 emit_libcall_block (insns, base, rax, note);
7364 }
7365 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7366 emit_insn (gen_tls_local_dynamic_base_64 (base));
7367 else
7368 emit_insn (gen_tls_local_dynamic_base_32 (base));
7369
7370 if (TARGET_GNU2_TLS)
7371 {
7372 rtx x = ix86_tls_module_base ();
7373
7374 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7375 gen_rtx_MINUS (Pmode, x, tp));
7376 }
7377
7378 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7379 off = gen_rtx_CONST (Pmode, off);
7380
7381 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7382
7383 if (TARGET_GNU2_TLS)
7384 {
7385 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7386
7387 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7388 }
7389
7390 break;
7391
7392 case TLS_MODEL_INITIAL_EXEC:
7393 if (TARGET_64BIT)
7394 {
7395 pic = NULL;
7396 type = UNSPEC_GOTNTPOFF;
7397 }
7398 else if (flag_pic)
7399 {
7400 if (reload_in_progress)
7401 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7402 pic = pic_offset_table_rtx;
7403 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7404 }
7405 else if (!TARGET_ANY_GNU_TLS)
7406 {
7407 pic = gen_reg_rtx (Pmode);
7408 emit_insn (gen_set_got (pic));
7409 type = UNSPEC_GOTTPOFF;
7410 }
7411 else
7412 {
7413 pic = NULL;
7414 type = UNSPEC_INDNTPOFF;
7415 }
7416
7417 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7418 off = gen_rtx_CONST (Pmode, off);
7419 if (pic)
7420 off = gen_rtx_PLUS (Pmode, pic, off);
7421 off = gen_const_mem (Pmode, off);
7422 set_mem_alias_set (off, ix86_GOT_alias_set ());
7423
7424 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7425 {
7426 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7427 off = force_reg (Pmode, off);
7428 return gen_rtx_PLUS (Pmode, base, off);
7429 }
7430 else
7431 {
7432 base = get_thread_pointer (true);
7433 dest = gen_reg_rtx (Pmode);
7434 emit_insn (gen_subsi3 (dest, base, off));
7435 }
7436 break;
7437
7438 case TLS_MODEL_LOCAL_EXEC:
7439 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7440 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7441 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7442 off = gen_rtx_CONST (Pmode, off);
7443
7444 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7445 {
7446 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7447 return gen_rtx_PLUS (Pmode, base, off);
7448 }
7449 else
7450 {
7451 base = get_thread_pointer (true);
7452 dest = gen_reg_rtx (Pmode);
7453 emit_insn (gen_subsi3 (dest, base, off));
7454 }
7455 break;
7456
7457 default:
7458 gcc_unreachable ();
7459 }
7460
7461 return dest;
7462 }
7463
7464 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7465 to symbol DECL. */
7466
7467 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7468 htab_t dllimport_map;
7469
7470 static tree
7471 get_dllimport_decl (tree decl)
7472 {
7473 struct tree_map *h, in;
7474 void **loc;
7475 const char *name;
7476 const char *prefix;
7477 size_t namelen, prefixlen;
7478 char *imp_name;
7479 tree to;
7480 rtx rtl;
7481
7482 if (!dllimport_map)
7483 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7484
7485 in.hash = htab_hash_pointer (decl);
7486 in.base.from = decl;
7487 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7488 h = *loc;
7489 if (h)
7490 return h->to;
7491
7492 *loc = h = ggc_alloc (sizeof (struct tree_map));
7493 h->hash = in.hash;
7494 h->base.from = decl;
7495 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7496 DECL_ARTIFICIAL (to) = 1;
7497 DECL_IGNORED_P (to) = 1;
7498 DECL_EXTERNAL (to) = 1;
7499 TREE_READONLY (to) = 1;
7500
7501 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7502 name = targetm.strip_name_encoding (name);
7503 if (name[0] == FASTCALL_PREFIX)
7504 {
7505 name++;
7506 prefix = "*__imp_";
7507 }
7508 else
7509 prefix = "*__imp__";
7510
7511 namelen = strlen (name);
7512 prefixlen = strlen (prefix);
7513 imp_name = alloca (namelen + prefixlen + 1);
7514 memcpy (imp_name, prefix, prefixlen);
7515 memcpy (imp_name + prefixlen, name, namelen + 1);
7516
7517 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7518 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7519 SET_SYMBOL_REF_DECL (rtl, to);
7520 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7521
7522 rtl = gen_const_mem (Pmode, rtl);
7523 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7524
7525 SET_DECL_RTL (to, rtl);
7526
7527 return to;
7528 }
7529
7530 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7531 true if we require the result be a register. */
7532
7533 static rtx
7534 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7535 {
7536 tree imp_decl;
7537 rtx x;
7538
7539 gcc_assert (SYMBOL_REF_DECL (symbol));
7540 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7541
7542 x = DECL_RTL (imp_decl);
7543 if (want_reg)
7544 x = force_reg (Pmode, x);
7545 return x;
7546 }
7547
7548 /* Try machine-dependent ways of modifying an illegitimate address
7549 to be legitimate. If we find one, return the new, valid address.
7550 This macro is used in only one place: `memory_address' in explow.c.
7551
7552 OLDX is the address as it was before break_out_memory_refs was called.
7553 In some cases it is useful to look at this to decide what needs to be done.
7554
7555 MODE and WIN are passed so that this macro can use
7556 GO_IF_LEGITIMATE_ADDRESS.
7557
7558 It is always safe for this macro to do nothing. It exists to recognize
7559 opportunities to optimize the output.
7560
7561 For the 80386, we handle X+REG by loading X into a register R and
7562 using R+REG. R will go in a general reg and indexing will be used.
7563 However, if REG is a broken-out memory address or multiplication,
7564 nothing needs to be done because REG can certainly go in a general reg.
7565
7566 When -fpic is used, special handling is needed for symbolic references.
7567 See comments by legitimize_pic_address in i386.c for details. */
7568
7569 rtx
7570 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7571 {
7572 int changed = 0;
7573 unsigned log;
7574
7575 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7576 if (log)
7577 return legitimize_tls_address (x, log, false);
7578 if (GET_CODE (x) == CONST
7579 && GET_CODE (XEXP (x, 0)) == PLUS
7580 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7581 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7582 {
7583 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7584 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7585 }
7586
7587 if (flag_pic && SYMBOLIC_CONST (x))
7588 return legitimize_pic_address (x, 0);
7589
7590 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7591 {
7592 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7593 return legitimize_dllimport_symbol (x, true);
7594 if (GET_CODE (x) == CONST
7595 && GET_CODE (XEXP (x, 0)) == PLUS
7596 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7597 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7598 {
7599 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7600 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7601 }
7602 }
7603
7604 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7605 if (GET_CODE (x) == ASHIFT
7606 && CONST_INT_P (XEXP (x, 1))
7607 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7608 {
7609 changed = 1;
7610 log = INTVAL (XEXP (x, 1));
7611 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7612 GEN_INT (1 << log));
7613 }
7614
7615 if (GET_CODE (x) == PLUS)
7616 {
7617 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7618
7619 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7620 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7621 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7622 {
7623 changed = 1;
7624 log = INTVAL (XEXP (XEXP (x, 0), 1));
7625 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7626 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7627 GEN_INT (1 << log));
7628 }
7629
7630 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7631 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7632 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7633 {
7634 changed = 1;
7635 log = INTVAL (XEXP (XEXP (x, 1), 1));
7636 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7637 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7638 GEN_INT (1 << log));
7639 }
7640
7641 /* Put multiply first if it isn't already. */
7642 if (GET_CODE (XEXP (x, 1)) == MULT)
7643 {
7644 rtx tmp = XEXP (x, 0);
7645 XEXP (x, 0) = XEXP (x, 1);
7646 XEXP (x, 1) = tmp;
7647 changed = 1;
7648 }
7649
7650 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7651 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7652 created by virtual register instantiation, register elimination, and
7653 similar optimizations. */
7654 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7655 {
7656 changed = 1;
7657 x = gen_rtx_PLUS (Pmode,
7658 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7659 XEXP (XEXP (x, 1), 0)),
7660 XEXP (XEXP (x, 1), 1));
7661 }
7662
7663 /* Canonicalize
7664 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7665 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7666 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7667 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7668 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7669 && CONSTANT_P (XEXP (x, 1)))
7670 {
7671 rtx constant;
7672 rtx other = NULL_RTX;
7673
7674 if (CONST_INT_P (XEXP (x, 1)))
7675 {
7676 constant = XEXP (x, 1);
7677 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7678 }
7679 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7680 {
7681 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7682 other = XEXP (x, 1);
7683 }
7684 else
7685 constant = 0;
7686
7687 if (constant)
7688 {
7689 changed = 1;
7690 x = gen_rtx_PLUS (Pmode,
7691 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7692 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7693 plus_constant (other, INTVAL (constant)));
7694 }
7695 }
7696
7697 if (changed && legitimate_address_p (mode, x, FALSE))
7698 return x;
7699
7700 if (GET_CODE (XEXP (x, 0)) == MULT)
7701 {
7702 changed = 1;
7703 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7704 }
7705
7706 if (GET_CODE (XEXP (x, 1)) == MULT)
7707 {
7708 changed = 1;
7709 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7710 }
7711
7712 if (changed
7713 && REG_P (XEXP (x, 1))
7714 && REG_P (XEXP (x, 0)))
7715 return x;
7716
7717 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7718 {
7719 changed = 1;
7720 x = legitimize_pic_address (x, 0);
7721 }
7722
7723 if (changed && legitimate_address_p (mode, x, FALSE))
7724 return x;
7725
7726 if (REG_P (XEXP (x, 0)))
7727 {
7728 rtx temp = gen_reg_rtx (Pmode);
7729 rtx val = force_operand (XEXP (x, 1), temp);
7730 if (val != temp)
7731 emit_move_insn (temp, val);
7732
7733 XEXP (x, 1) = temp;
7734 return x;
7735 }
7736
7737 else if (REG_P (XEXP (x, 1)))
7738 {
7739 rtx temp = gen_reg_rtx (Pmode);
7740 rtx val = force_operand (XEXP (x, 0), temp);
7741 if (val != temp)
7742 emit_move_insn (temp, val);
7743
7744 XEXP (x, 0) = temp;
7745 return x;
7746 }
7747 }
7748
7749 return x;
7750 }
7751 \f
7752 /* Print an integer constant expression in assembler syntax. Addition
7753 and subtraction are the only arithmetic that may appear in these
7754 expressions. FILE is the stdio stream to write to, X is the rtx, and
7755 CODE is the operand print code from the output string. */
7756
7757 static void
7758 output_pic_addr_const (FILE *file, rtx x, int code)
7759 {
7760 char buf[256];
7761
7762 switch (GET_CODE (x))
7763 {
7764 case PC:
7765 gcc_assert (flag_pic);
7766 putc ('.', file);
7767 break;
7768
7769 case SYMBOL_REF:
7770 if (! TARGET_MACHO || TARGET_64BIT)
7771 output_addr_const (file, x);
7772 else
7773 {
7774 const char *name = XSTR (x, 0);
7775
7776 /* Mark the decl as referenced so that cgraph will
7777 output the function. */
7778 if (SYMBOL_REF_DECL (x))
7779 mark_decl_referenced (SYMBOL_REF_DECL (x));
7780
7781 #if TARGET_MACHO
7782 if (MACHOPIC_INDIRECT
7783 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7784 name = machopic_indirection_name (x, /*stub_p=*/true);
7785 #endif
7786 assemble_name (file, name);
7787 }
7788 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7789 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7790 fputs ("@PLT", file);
7791 break;
7792
7793 case LABEL_REF:
7794 x = XEXP (x, 0);
7795 /* FALLTHRU */
7796 case CODE_LABEL:
7797 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7798 assemble_name (asm_out_file, buf);
7799 break;
7800
7801 case CONST_INT:
7802 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7803 break;
7804
7805 case CONST:
7806 /* This used to output parentheses around the expression,
7807 but that does not work on the 386 (either ATT or BSD assembler). */
7808 output_pic_addr_const (file, XEXP (x, 0), code);
7809 break;
7810
7811 case CONST_DOUBLE:
7812 if (GET_MODE (x) == VOIDmode)
7813 {
7814 /* We can use %d if the number is <32 bits and positive. */
7815 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7816 fprintf (file, "0x%lx%08lx",
7817 (unsigned long) CONST_DOUBLE_HIGH (x),
7818 (unsigned long) CONST_DOUBLE_LOW (x));
7819 else
7820 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7821 }
7822 else
7823 /* We can't handle floating point constants;
7824 PRINT_OPERAND must handle them. */
7825 output_operand_lossage ("floating constant misused");
7826 break;
7827
7828 case PLUS:
7829 /* Some assemblers need integer constants to appear first. */
7830 if (CONST_INT_P (XEXP (x, 0)))
7831 {
7832 output_pic_addr_const (file, XEXP (x, 0), code);
7833 putc ('+', file);
7834 output_pic_addr_const (file, XEXP (x, 1), code);
7835 }
7836 else
7837 {
7838 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7839 output_pic_addr_const (file, XEXP (x, 1), code);
7840 putc ('+', file);
7841 output_pic_addr_const (file, XEXP (x, 0), code);
7842 }
7843 break;
7844
7845 case MINUS:
7846 if (!TARGET_MACHO)
7847 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7848 output_pic_addr_const (file, XEXP (x, 0), code);
7849 putc ('-', file);
7850 output_pic_addr_const (file, XEXP (x, 1), code);
7851 if (!TARGET_MACHO)
7852 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7853 break;
7854
7855 case UNSPEC:
7856 gcc_assert (XVECLEN (x, 0) == 1);
7857 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7858 switch (XINT (x, 1))
7859 {
7860 case UNSPEC_GOT:
7861 fputs ("@GOT", file);
7862 break;
7863 case UNSPEC_GOTOFF:
7864 fputs ("@GOTOFF", file);
7865 break;
7866 case UNSPEC_PLTOFF:
7867 fputs ("@PLTOFF", file);
7868 break;
7869 case UNSPEC_GOTPCREL:
7870 fputs ("@GOTPCREL(%rip)", file);
7871 break;
7872 case UNSPEC_GOTTPOFF:
7873 /* FIXME: This might be @TPOFF in Sun ld too. */
7874 fputs ("@GOTTPOFF", file);
7875 break;
7876 case UNSPEC_TPOFF:
7877 fputs ("@TPOFF", file);
7878 break;
7879 case UNSPEC_NTPOFF:
7880 if (TARGET_64BIT)
7881 fputs ("@TPOFF", file);
7882 else
7883 fputs ("@NTPOFF", file);
7884 break;
7885 case UNSPEC_DTPOFF:
7886 fputs ("@DTPOFF", file);
7887 break;
7888 case UNSPEC_GOTNTPOFF:
7889 if (TARGET_64BIT)
7890 fputs ("@GOTTPOFF(%rip)", file);
7891 else
7892 fputs ("@GOTNTPOFF", file);
7893 break;
7894 case UNSPEC_INDNTPOFF:
7895 fputs ("@INDNTPOFF", file);
7896 break;
7897 default:
7898 output_operand_lossage ("invalid UNSPEC as operand");
7899 break;
7900 }
7901 break;
7902
7903 default:
7904 output_operand_lossage ("invalid expression as operand");
7905 }
7906 }
7907
7908 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7909 We need to emit DTP-relative relocations. */
7910
7911 static void ATTRIBUTE_UNUSED
7912 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7913 {
7914 fputs (ASM_LONG, file);
7915 output_addr_const (file, x);
7916 fputs ("@DTPOFF", file);
7917 switch (size)
7918 {
7919 case 4:
7920 break;
7921 case 8:
7922 fputs (", 0", file);
7923 break;
7924 default:
7925 gcc_unreachable ();
7926 }
7927 }
7928
7929 /* In the name of slightly smaller debug output, and to cater to
7930 general assembler lossage, recognize PIC+GOTOFF and turn it back
7931 into a direct symbol reference.
7932
7933 On Darwin, this is necessary to avoid a crash, because Darwin
7934 has a different PIC label for each routine but the DWARF debugging
7935 information is not associated with any particular routine, so it's
7936 necessary to remove references to the PIC label from RTL stored by
7937 the DWARF output code. */
7938
7939 static rtx
7940 ix86_delegitimize_address (rtx orig_x)
7941 {
7942 rtx x = orig_x;
7943 /* reg_addend is NULL or a multiple of some register. */
7944 rtx reg_addend = NULL_RTX;
7945 /* const_addend is NULL or a const_int. */
7946 rtx const_addend = NULL_RTX;
7947 /* This is the result, or NULL. */
7948 rtx result = NULL_RTX;
7949
7950 if (MEM_P (x))
7951 x = XEXP (x, 0);
7952
7953 if (TARGET_64BIT)
7954 {
7955 if (GET_CODE (x) != CONST
7956 || GET_CODE (XEXP (x, 0)) != UNSPEC
7957 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7958 || !MEM_P (orig_x))
7959 return orig_x;
7960 return XVECEXP (XEXP (x, 0), 0, 0);
7961 }
7962
7963 if (GET_CODE (x) != PLUS
7964 || GET_CODE (XEXP (x, 1)) != CONST)
7965 return orig_x;
7966
7967 if (REG_P (XEXP (x, 0))
7968 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7969 /* %ebx + GOT/GOTOFF */
7970 ;
7971 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7972 {
7973 /* %ebx + %reg * scale + GOT/GOTOFF */
7974 reg_addend = XEXP (x, 0);
7975 if (REG_P (XEXP (reg_addend, 0))
7976 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7977 reg_addend = XEXP (reg_addend, 1);
7978 else if (REG_P (XEXP (reg_addend, 1))
7979 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7980 reg_addend = XEXP (reg_addend, 0);
7981 else
7982 return orig_x;
7983 if (!REG_P (reg_addend)
7984 && GET_CODE (reg_addend) != MULT
7985 && GET_CODE (reg_addend) != ASHIFT)
7986 return orig_x;
7987 }
7988 else
7989 return orig_x;
7990
7991 x = XEXP (XEXP (x, 1), 0);
7992 if (GET_CODE (x) == PLUS
7993 && CONST_INT_P (XEXP (x, 1)))
7994 {
7995 const_addend = XEXP (x, 1);
7996 x = XEXP (x, 0);
7997 }
7998
7999 if (GET_CODE (x) == UNSPEC
8000 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8001 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8002 result = XVECEXP (x, 0, 0);
8003
8004 if (TARGET_MACHO && darwin_local_data_pic (x)
8005 && !MEM_P (orig_x))
8006 result = XEXP (x, 0);
8007
8008 if (! result)
8009 return orig_x;
8010
8011 if (const_addend)
8012 result = gen_rtx_PLUS (Pmode, result, const_addend);
8013 if (reg_addend)
8014 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8015 return result;
8016 }
8017
8018 /* If X is a machine specific address (i.e. a symbol or label being
8019 referenced as a displacement from the GOT implemented using an
8020 UNSPEC), then return the base term. Otherwise return X. */
8021
8022 rtx
8023 ix86_find_base_term (rtx x)
8024 {
8025 rtx term;
8026
8027 if (TARGET_64BIT)
8028 {
8029 if (GET_CODE (x) != CONST)
8030 return x;
8031 term = XEXP (x, 0);
8032 if (GET_CODE (term) == PLUS
8033 && (CONST_INT_P (XEXP (term, 1))
8034 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8035 term = XEXP (term, 0);
8036 if (GET_CODE (term) != UNSPEC
8037 || XINT (term, 1) != UNSPEC_GOTPCREL)
8038 return x;
8039
8040 term = XVECEXP (term, 0, 0);
8041
8042 if (GET_CODE (term) != SYMBOL_REF
8043 && GET_CODE (term) != LABEL_REF)
8044 return x;
8045
8046 return term;
8047 }
8048
8049 term = ix86_delegitimize_address (x);
8050
8051 if (GET_CODE (term) != SYMBOL_REF
8052 && GET_CODE (term) != LABEL_REF)
8053 return x;
8054
8055 return term;
8056 }
8057 \f
8058 static void
8059 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8060 int fp, FILE *file)
8061 {
8062 const char *suffix;
8063
8064 if (mode == CCFPmode || mode == CCFPUmode)
8065 {
8066 enum rtx_code second_code, bypass_code;
8067 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8068 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8069 code = ix86_fp_compare_code_to_integer (code);
8070 mode = CCmode;
8071 }
8072 if (reverse)
8073 code = reverse_condition (code);
8074
8075 switch (code)
8076 {
8077 case EQ:
8078 suffix = "e";
8079 break;
8080 case NE:
8081 suffix = "ne";
8082 break;
8083 case GT:
8084 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8085 suffix = "g";
8086 break;
8087 case GTU:
8088 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8089 Those same assemblers have the same but opposite lossage on cmov. */
8090 gcc_assert (mode == CCmode);
8091 suffix = fp ? "nbe" : "a";
8092 break;
8093 case LT:
8094 switch (mode)
8095 {
8096 case CCNOmode:
8097 case CCGOCmode:
8098 suffix = "s";
8099 break;
8100
8101 case CCmode:
8102 case CCGCmode:
8103 suffix = "l";
8104 break;
8105
8106 default:
8107 gcc_unreachable ();
8108 }
8109 break;
8110 case LTU:
8111 gcc_assert (mode == CCmode);
8112 suffix = "b";
8113 break;
8114 case GE:
8115 switch (mode)
8116 {
8117 case CCNOmode:
8118 case CCGOCmode:
8119 suffix = "ns";
8120 break;
8121
8122 case CCmode:
8123 case CCGCmode:
8124 suffix = "ge";
8125 break;
8126
8127 default:
8128 gcc_unreachable ();
8129 }
8130 break;
8131 case GEU:
8132 /* ??? As above. */
8133 gcc_assert (mode == CCmode);
8134 suffix = fp ? "nb" : "ae";
8135 break;
8136 case LE:
8137 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8138 suffix = "le";
8139 break;
8140 case LEU:
8141 gcc_assert (mode == CCmode);
8142 suffix = "be";
8143 break;
8144 case UNORDERED:
8145 suffix = fp ? "u" : "p";
8146 break;
8147 case ORDERED:
8148 suffix = fp ? "nu" : "np";
8149 break;
8150 default:
8151 gcc_unreachable ();
8152 }
8153 fputs (suffix, file);
8154 }
8155
8156 /* Print the name of register X to FILE based on its machine mode and number.
8157 If CODE is 'w', pretend the mode is HImode.
8158 If CODE is 'b', pretend the mode is QImode.
8159 If CODE is 'k', pretend the mode is SImode.
8160 If CODE is 'q', pretend the mode is DImode.
8161 If CODE is 'h', pretend the reg is the 'high' byte register.
8162 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8163
8164 void
8165 print_reg (rtx x, int code, FILE *file)
8166 {
8167 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8168 && REGNO (x) != FRAME_POINTER_REGNUM
8169 && REGNO (x) != FLAGS_REG
8170 && REGNO (x) != FPSR_REG
8171 && REGNO (x) != FPCR_REG);
8172
8173 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8174 putc ('%', file);
8175
8176 if (code == 'w' || MMX_REG_P (x))
8177 code = 2;
8178 else if (code == 'b')
8179 code = 1;
8180 else if (code == 'k')
8181 code = 4;
8182 else if (code == 'q')
8183 code = 8;
8184 else if (code == 'y')
8185 code = 3;
8186 else if (code == 'h')
8187 code = 0;
8188 else
8189 code = GET_MODE_SIZE (GET_MODE (x));
8190
8191 /* Irritatingly, AMD extended registers use different naming convention
8192 from the normal registers. */
8193 if (REX_INT_REG_P (x))
8194 {
8195 gcc_assert (TARGET_64BIT);
8196 switch (code)
8197 {
8198 case 0:
8199 error ("extended registers have no high halves");
8200 break;
8201 case 1:
8202 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8203 break;
8204 case 2:
8205 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8206 break;
8207 case 4:
8208 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8209 break;
8210 case 8:
8211 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8212 break;
8213 default:
8214 error ("unsupported operand size for extended register");
8215 break;
8216 }
8217 return;
8218 }
8219 switch (code)
8220 {
8221 case 3:
8222 if (STACK_TOP_P (x))
8223 {
8224 fputs ("st(0)", file);
8225 break;
8226 }
8227 /* FALLTHRU */
8228 case 8:
8229 case 4:
8230 case 12:
8231 if (! ANY_FP_REG_P (x))
8232 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8233 /* FALLTHRU */
8234 case 16:
8235 case 2:
8236 normal:
8237 fputs (hi_reg_name[REGNO (x)], file);
8238 break;
8239 case 1:
8240 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8241 goto normal;
8242 fputs (qi_reg_name[REGNO (x)], file);
8243 break;
8244 case 0:
8245 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8246 goto normal;
8247 fputs (qi_high_reg_name[REGNO (x)], file);
8248 break;
8249 default:
8250 gcc_unreachable ();
8251 }
8252 }
8253
8254 /* Locate some local-dynamic symbol still in use by this function
8255 so that we can print its name in some tls_local_dynamic_base
8256 pattern. */
8257
8258 static int
8259 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8260 {
8261 rtx x = *px;
8262
8263 if (GET_CODE (x) == SYMBOL_REF
8264 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8265 {
8266 cfun->machine->some_ld_name = XSTR (x, 0);
8267 return 1;
8268 }
8269
8270 return 0;
8271 }
8272
8273 static const char *
8274 get_some_local_dynamic_name (void)
8275 {
8276 rtx insn;
8277
8278 if (cfun->machine->some_ld_name)
8279 return cfun->machine->some_ld_name;
8280
8281 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8282 if (INSN_P (insn)
8283 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8284 return cfun->machine->some_ld_name;
8285
8286 gcc_unreachable ();
8287 }
8288
8289 /* Meaning of CODE:
8290 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8291 C -- print opcode suffix for set/cmov insn.
8292 c -- like C, but print reversed condition
8293 F,f -- likewise, but for floating-point.
8294 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8295 otherwise nothing
8296 R -- print the prefix for register names.
8297 z -- print the opcode suffix for the size of the current operand.
8298 * -- print a star (in certain assembler syntax)
8299 A -- print an absolute memory reference.
8300 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8301 s -- print a shift double count, followed by the assemblers argument
8302 delimiter.
8303 b -- print the QImode name of the register for the indicated operand.
8304 %b0 would print %al if operands[0] is reg 0.
8305 w -- likewise, print the HImode name of the register.
8306 k -- likewise, print the SImode name of the register.
8307 q -- likewise, print the DImode name of the register.
8308 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8309 y -- print "st(0)" instead of "st" as a register.
8310 D -- print condition for SSE cmp instruction.
8311 P -- if PIC, print an @PLT suffix.
8312 X -- don't print any sort of PIC '@' suffix for a symbol.
8313 & -- print some in-use local-dynamic symbol name.
8314 H -- print a memory address offset by 8; used for sse high-parts
8315 */
8316
8317 void
8318 print_operand (FILE *file, rtx x, int code)
8319 {
8320 if (code)
8321 {
8322 switch (code)
8323 {
8324 case '*':
8325 if (ASSEMBLER_DIALECT == ASM_ATT)
8326 putc ('*', file);
8327 return;
8328
8329 case '&':
8330 assemble_name (file, get_some_local_dynamic_name ());
8331 return;
8332
8333 case 'A':
8334 switch (ASSEMBLER_DIALECT)
8335 {
8336 case ASM_ATT:
8337 putc ('*', file);
8338 break;
8339
8340 case ASM_INTEL:
8341 /* Intel syntax. For absolute addresses, registers should not
8342 be surrounded by braces. */
8343 if (!REG_P (x))
8344 {
8345 putc ('[', file);
8346 PRINT_OPERAND (file, x, 0);
8347 putc (']', file);
8348 return;
8349 }
8350 break;
8351
8352 default:
8353 gcc_unreachable ();
8354 }
8355
8356 PRINT_OPERAND (file, x, 0);
8357 return;
8358
8359
8360 case 'L':
8361 if (ASSEMBLER_DIALECT == ASM_ATT)
8362 putc ('l', file);
8363 return;
8364
8365 case 'W':
8366 if (ASSEMBLER_DIALECT == ASM_ATT)
8367 putc ('w', file);
8368 return;
8369
8370 case 'B':
8371 if (ASSEMBLER_DIALECT == ASM_ATT)
8372 putc ('b', file);
8373 return;
8374
8375 case 'Q':
8376 if (ASSEMBLER_DIALECT == ASM_ATT)
8377 putc ('l', file);
8378 return;
8379
8380 case 'S':
8381 if (ASSEMBLER_DIALECT == ASM_ATT)
8382 putc ('s', file);
8383 return;
8384
8385 case 'T':
8386 if (ASSEMBLER_DIALECT == ASM_ATT)
8387 putc ('t', file);
8388 return;
8389
8390 case 'z':
8391 /* 387 opcodes don't get size suffixes if the operands are
8392 registers. */
8393 if (STACK_REG_P (x))
8394 return;
8395
8396 /* Likewise if using Intel opcodes. */
8397 if (ASSEMBLER_DIALECT == ASM_INTEL)
8398 return;
8399
8400 /* This is the size of op from size of operand. */
8401 switch (GET_MODE_SIZE (GET_MODE (x)))
8402 {
8403 case 1:
8404 putc ('b', file);
8405 return;
8406
8407 case 2:
8408 if (MEM_P (x))
8409 {
8410 #ifdef HAVE_GAS_FILDS_FISTS
8411 putc ('s', file);
8412 #endif
8413 return;
8414 }
8415 else
8416 putc ('w', file);
8417 return;
8418
8419 case 4:
8420 if (GET_MODE (x) == SFmode)
8421 {
8422 putc ('s', file);
8423 return;
8424 }
8425 else
8426 putc ('l', file);
8427 return;
8428
8429 case 12:
8430 case 16:
8431 putc ('t', file);
8432 return;
8433
8434 case 8:
8435 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8436 {
8437 #ifdef GAS_MNEMONICS
8438 putc ('q', file);
8439 #else
8440 putc ('l', file);
8441 putc ('l', file);
8442 #endif
8443 }
8444 else
8445 putc ('l', file);
8446 return;
8447
8448 default:
8449 gcc_unreachable ();
8450 }
8451
8452 case 'b':
8453 case 'w':
8454 case 'k':
8455 case 'q':
8456 case 'h':
8457 case 'y':
8458 case 'X':
8459 case 'P':
8460 break;
8461
8462 case 's':
8463 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8464 {
8465 PRINT_OPERAND (file, x, 0);
8466 putc (',', file);
8467 }
8468 return;
8469
8470 case 'D':
8471 /* Little bit of braindamage here. The SSE compare instructions
8472 does use completely different names for the comparisons that the
8473 fp conditional moves. */
8474 switch (GET_CODE (x))
8475 {
8476 case EQ:
8477 case UNEQ:
8478 fputs ("eq", file);
8479 break;
8480 case LT:
8481 case UNLT:
8482 fputs ("lt", file);
8483 break;
8484 case LE:
8485 case UNLE:
8486 fputs ("le", file);
8487 break;
8488 case UNORDERED:
8489 fputs ("unord", file);
8490 break;
8491 case NE:
8492 case LTGT:
8493 fputs ("neq", file);
8494 break;
8495 case UNGE:
8496 case GE:
8497 fputs ("nlt", file);
8498 break;
8499 case UNGT:
8500 case GT:
8501 fputs ("nle", file);
8502 break;
8503 case ORDERED:
8504 fputs ("ord", file);
8505 break;
8506 default:
8507 gcc_unreachable ();
8508 }
8509 return;
8510 case 'O':
8511 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8512 if (ASSEMBLER_DIALECT == ASM_ATT)
8513 {
8514 switch (GET_MODE (x))
8515 {
8516 case HImode: putc ('w', file); break;
8517 case SImode:
8518 case SFmode: putc ('l', file); break;
8519 case DImode:
8520 case DFmode: putc ('q', file); break;
8521 default: gcc_unreachable ();
8522 }
8523 putc ('.', file);
8524 }
8525 #endif
8526 return;
8527 case 'C':
8528 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8529 return;
8530 case 'F':
8531 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8532 if (ASSEMBLER_DIALECT == ASM_ATT)
8533 putc ('.', file);
8534 #endif
8535 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8536 return;
8537
8538 /* Like above, but reverse condition */
8539 case 'c':
8540 /* Check to see if argument to %c is really a constant
8541 and not a condition code which needs to be reversed. */
8542 if (!COMPARISON_P (x))
8543 {
8544 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8545 return;
8546 }
8547 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8548 return;
8549 case 'f':
8550 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8551 if (ASSEMBLER_DIALECT == ASM_ATT)
8552 putc ('.', file);
8553 #endif
8554 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8555 return;
8556
8557 case 'H':
8558 /* It doesn't actually matter what mode we use here, as we're
8559 only going to use this for printing. */
8560 x = adjust_address_nv (x, DImode, 8);
8561 break;
8562
8563 case '+':
8564 {
8565 rtx x;
8566
8567 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8568 return;
8569
8570 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8571 if (x)
8572 {
8573 int pred_val = INTVAL (XEXP (x, 0));
8574
8575 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8576 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8577 {
8578 int taken = pred_val > REG_BR_PROB_BASE / 2;
8579 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8580
8581 /* Emit hints only in the case default branch prediction
8582 heuristics would fail. */
8583 if (taken != cputaken)
8584 {
8585 /* We use 3e (DS) prefix for taken branches and
8586 2e (CS) prefix for not taken branches. */
8587 if (taken)
8588 fputs ("ds ; ", file);
8589 else
8590 fputs ("cs ; ", file);
8591 }
8592 }
8593 }
8594 return;
8595 }
8596 default:
8597 output_operand_lossage ("invalid operand code '%c'", code);
8598 }
8599 }
8600
8601 if (REG_P (x))
8602 print_reg (x, code, file);
8603
8604 else if (MEM_P (x))
8605 {
8606 /* No `byte ptr' prefix for call instructions. */
8607 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8608 {
8609 const char * size;
8610 switch (GET_MODE_SIZE (GET_MODE (x)))
8611 {
8612 case 1: size = "BYTE"; break;
8613 case 2: size = "WORD"; break;
8614 case 4: size = "DWORD"; break;
8615 case 8: size = "QWORD"; break;
8616 case 12: size = "XWORD"; break;
8617 case 16: size = "XMMWORD"; break;
8618 default:
8619 gcc_unreachable ();
8620 }
8621
8622 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8623 if (code == 'b')
8624 size = "BYTE";
8625 else if (code == 'w')
8626 size = "WORD";
8627 else if (code == 'k')
8628 size = "DWORD";
8629
8630 fputs (size, file);
8631 fputs (" PTR ", file);
8632 }
8633
8634 x = XEXP (x, 0);
8635 /* Avoid (%rip) for call operands. */
8636 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8637 && !CONST_INT_P (x))
8638 output_addr_const (file, x);
8639 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8640 output_operand_lossage ("invalid constraints for operand");
8641 else
8642 output_address (x);
8643 }
8644
8645 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8646 {
8647 REAL_VALUE_TYPE r;
8648 long l;
8649
8650 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8651 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8652
8653 if (ASSEMBLER_DIALECT == ASM_ATT)
8654 putc ('$', file);
8655 fprintf (file, "0x%08lx", l);
8656 }
8657
8658 /* These float cases don't actually occur as immediate operands. */
8659 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8660 {
8661 char dstr[30];
8662
8663 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8664 fprintf (file, "%s", dstr);
8665 }
8666
8667 else if (GET_CODE (x) == CONST_DOUBLE
8668 && GET_MODE (x) == XFmode)
8669 {
8670 char dstr[30];
8671
8672 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8673 fprintf (file, "%s", dstr);
8674 }
8675
8676 else
8677 {
8678 /* We have patterns that allow zero sets of memory, for instance.
8679 In 64-bit mode, we should probably support all 8-byte vectors,
8680 since we can in fact encode that into an immediate. */
8681 if (GET_CODE (x) == CONST_VECTOR)
8682 {
8683 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8684 x = const0_rtx;
8685 }
8686
8687 if (code != 'P')
8688 {
8689 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8690 {
8691 if (ASSEMBLER_DIALECT == ASM_ATT)
8692 putc ('$', file);
8693 }
8694 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8695 || GET_CODE (x) == LABEL_REF)
8696 {
8697 if (ASSEMBLER_DIALECT == ASM_ATT)
8698 putc ('$', file);
8699 else
8700 fputs ("OFFSET FLAT:", file);
8701 }
8702 }
8703 if (CONST_INT_P (x))
8704 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8705 else if (flag_pic)
8706 output_pic_addr_const (file, x, code);
8707 else
8708 output_addr_const (file, x);
8709 }
8710 }
8711 \f
8712 /* Print a memory operand whose address is ADDR. */
8713
8714 void
8715 print_operand_address (FILE *file, rtx addr)
8716 {
8717 struct ix86_address parts;
8718 rtx base, index, disp;
8719 int scale;
8720 int ok = ix86_decompose_address (addr, &parts);
8721
8722 gcc_assert (ok);
8723
8724 base = parts.base;
8725 index = parts.index;
8726 disp = parts.disp;
8727 scale = parts.scale;
8728
8729 switch (parts.seg)
8730 {
8731 case SEG_DEFAULT:
8732 break;
8733 case SEG_FS:
8734 case SEG_GS:
8735 if (USER_LABEL_PREFIX[0] == 0)
8736 putc ('%', file);
8737 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8738 break;
8739 default:
8740 gcc_unreachable ();
8741 }
8742
8743 if (!base && !index)
8744 {
8745 /* Displacement only requires special attention. */
8746
8747 if (CONST_INT_P (disp))
8748 {
8749 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8750 {
8751 if (USER_LABEL_PREFIX[0] == 0)
8752 putc ('%', file);
8753 fputs ("ds:", file);
8754 }
8755 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8756 }
8757 else if (flag_pic)
8758 output_pic_addr_const (file, disp, 0);
8759 else
8760 output_addr_const (file, disp);
8761
8762 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8763 if (TARGET_64BIT)
8764 {
8765 if (GET_CODE (disp) == CONST
8766 && GET_CODE (XEXP (disp, 0)) == PLUS
8767 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8768 disp = XEXP (XEXP (disp, 0), 0);
8769 if (GET_CODE (disp) == LABEL_REF
8770 || (GET_CODE (disp) == SYMBOL_REF
8771 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8772 fputs ("(%rip)", file);
8773 }
8774 }
8775 else
8776 {
8777 if (ASSEMBLER_DIALECT == ASM_ATT)
8778 {
8779 if (disp)
8780 {
8781 if (flag_pic)
8782 output_pic_addr_const (file, disp, 0);
8783 else if (GET_CODE (disp) == LABEL_REF)
8784 output_asm_label (disp);
8785 else
8786 output_addr_const (file, disp);
8787 }
8788
8789 putc ('(', file);
8790 if (base)
8791 print_reg (base, 0, file);
8792 if (index)
8793 {
8794 putc (',', file);
8795 print_reg (index, 0, file);
8796 if (scale != 1)
8797 fprintf (file, ",%d", scale);
8798 }
8799 putc (')', file);
8800 }
8801 else
8802 {
8803 rtx offset = NULL_RTX;
8804
8805 if (disp)
8806 {
8807 /* Pull out the offset of a symbol; print any symbol itself. */
8808 if (GET_CODE (disp) == CONST
8809 && GET_CODE (XEXP (disp, 0)) == PLUS
8810 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8811 {
8812 offset = XEXP (XEXP (disp, 0), 1);
8813 disp = gen_rtx_CONST (VOIDmode,
8814 XEXP (XEXP (disp, 0), 0));
8815 }
8816
8817 if (flag_pic)
8818 output_pic_addr_const (file, disp, 0);
8819 else if (GET_CODE (disp) == LABEL_REF)
8820 output_asm_label (disp);
8821 else if (CONST_INT_P (disp))
8822 offset = disp;
8823 else
8824 output_addr_const (file, disp);
8825 }
8826
8827 putc ('[', file);
8828 if (base)
8829 {
8830 print_reg (base, 0, file);
8831 if (offset)
8832 {
8833 if (INTVAL (offset) >= 0)
8834 putc ('+', file);
8835 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8836 }
8837 }
8838 else if (offset)
8839 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8840 else
8841 putc ('0', file);
8842
8843 if (index)
8844 {
8845 putc ('+', file);
8846 print_reg (index, 0, file);
8847 if (scale != 1)
8848 fprintf (file, "*%d", scale);
8849 }
8850 putc (']', file);
8851 }
8852 }
8853 }
8854
8855 bool
8856 output_addr_const_extra (FILE *file, rtx x)
8857 {
8858 rtx op;
8859
8860 if (GET_CODE (x) != UNSPEC)
8861 return false;
8862
8863 op = XVECEXP (x, 0, 0);
8864 switch (XINT (x, 1))
8865 {
8866 case UNSPEC_GOTTPOFF:
8867 output_addr_const (file, op);
8868 /* FIXME: This might be @TPOFF in Sun ld. */
8869 fputs ("@GOTTPOFF", file);
8870 break;
8871 case UNSPEC_TPOFF:
8872 output_addr_const (file, op);
8873 fputs ("@TPOFF", file);
8874 break;
8875 case UNSPEC_NTPOFF:
8876 output_addr_const (file, op);
8877 if (TARGET_64BIT)
8878 fputs ("@TPOFF", file);
8879 else
8880 fputs ("@NTPOFF", file);
8881 break;
8882 case UNSPEC_DTPOFF:
8883 output_addr_const (file, op);
8884 fputs ("@DTPOFF", file);
8885 break;
8886 case UNSPEC_GOTNTPOFF:
8887 output_addr_const (file, op);
8888 if (TARGET_64BIT)
8889 fputs ("@GOTTPOFF(%rip)", file);
8890 else
8891 fputs ("@GOTNTPOFF", file);
8892 break;
8893 case UNSPEC_INDNTPOFF:
8894 output_addr_const (file, op);
8895 fputs ("@INDNTPOFF", file);
8896 break;
8897
8898 default:
8899 return false;
8900 }
8901
8902 return true;
8903 }
8904 \f
8905 /* Split one or more DImode RTL references into pairs of SImode
8906 references. The RTL can be REG, offsettable MEM, integer constant, or
8907 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8908 split and "num" is its length. lo_half and hi_half are output arrays
8909 that parallel "operands". */
8910
8911 void
8912 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8913 {
8914 while (num--)
8915 {
8916 rtx op = operands[num];
8917
8918 /* simplify_subreg refuse to split volatile memory addresses,
8919 but we still have to handle it. */
8920 if (MEM_P (op))
8921 {
8922 lo_half[num] = adjust_address (op, SImode, 0);
8923 hi_half[num] = adjust_address (op, SImode, 4);
8924 }
8925 else
8926 {
8927 lo_half[num] = simplify_gen_subreg (SImode, op,
8928 GET_MODE (op) == VOIDmode
8929 ? DImode : GET_MODE (op), 0);
8930 hi_half[num] = simplify_gen_subreg (SImode, op,
8931 GET_MODE (op) == VOIDmode
8932 ? DImode : GET_MODE (op), 4);
8933 }
8934 }
8935 }
8936 /* Split one or more TImode RTL references into pairs of DImode
8937 references. The RTL can be REG, offsettable MEM, integer constant, or
8938 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8939 split and "num" is its length. lo_half and hi_half are output arrays
8940 that parallel "operands". */
8941
8942 void
8943 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8944 {
8945 while (num--)
8946 {
8947 rtx op = operands[num];
8948
8949 /* simplify_subreg refuse to split volatile memory addresses, but we
8950 still have to handle it. */
8951 if (MEM_P (op))
8952 {
8953 lo_half[num] = adjust_address (op, DImode, 0);
8954 hi_half[num] = adjust_address (op, DImode, 8);
8955 }
8956 else
8957 {
8958 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8959 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8960 }
8961 }
8962 }
8963 \f
8964 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8965 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8966 is the expression of the binary operation. The output may either be
8967 emitted here, or returned to the caller, like all output_* functions.
8968
8969 There is no guarantee that the operands are the same mode, as they
8970 might be within FLOAT or FLOAT_EXTEND expressions. */
8971
8972 #ifndef SYSV386_COMPAT
8973 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8974 wants to fix the assemblers because that causes incompatibility
8975 with gcc. No-one wants to fix gcc because that causes
8976 incompatibility with assemblers... You can use the option of
8977 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8978 #define SYSV386_COMPAT 1
8979 #endif
8980
8981 const char *
8982 output_387_binary_op (rtx insn, rtx *operands)
8983 {
8984 static char buf[30];
8985 const char *p;
8986 const char *ssep;
8987 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8988
8989 #ifdef ENABLE_CHECKING
8990 /* Even if we do not want to check the inputs, this documents input
8991 constraints. Which helps in understanding the following code. */
8992 if (STACK_REG_P (operands[0])
8993 && ((REG_P (operands[1])
8994 && REGNO (operands[0]) == REGNO (operands[1])
8995 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8996 || (REG_P (operands[2])
8997 && REGNO (operands[0]) == REGNO (operands[2])
8998 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8999 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9000 ; /* ok */
9001 else
9002 gcc_assert (is_sse);
9003 #endif
9004
9005 switch (GET_CODE (operands[3]))
9006 {
9007 case PLUS:
9008 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9009 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9010 p = "fiadd";
9011 else
9012 p = "fadd";
9013 ssep = "add";
9014 break;
9015
9016 case MINUS:
9017 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9018 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9019 p = "fisub";
9020 else
9021 p = "fsub";
9022 ssep = "sub";
9023 break;
9024
9025 case MULT:
9026 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9027 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9028 p = "fimul";
9029 else
9030 p = "fmul";
9031 ssep = "mul";
9032 break;
9033
9034 case DIV:
9035 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9036 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9037 p = "fidiv";
9038 else
9039 p = "fdiv";
9040 ssep = "div";
9041 break;
9042
9043 default:
9044 gcc_unreachable ();
9045 }
9046
9047 if (is_sse)
9048 {
9049 strcpy (buf, ssep);
9050 if (GET_MODE (operands[0]) == SFmode)
9051 strcat (buf, "ss\t{%2, %0|%0, %2}");
9052 else
9053 strcat (buf, "sd\t{%2, %0|%0, %2}");
9054 return buf;
9055 }
9056 strcpy (buf, p);
9057
9058 switch (GET_CODE (operands[3]))
9059 {
9060 case MULT:
9061 case PLUS:
9062 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9063 {
9064 rtx temp = operands[2];
9065 operands[2] = operands[1];
9066 operands[1] = temp;
9067 }
9068
9069 /* know operands[0] == operands[1]. */
9070
9071 if (MEM_P (operands[2]))
9072 {
9073 p = "%z2\t%2";
9074 break;
9075 }
9076
9077 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9078 {
9079 if (STACK_TOP_P (operands[0]))
9080 /* How is it that we are storing to a dead operand[2]?
9081 Well, presumably operands[1] is dead too. We can't
9082 store the result to st(0) as st(0) gets popped on this
9083 instruction. Instead store to operands[2] (which I
9084 think has to be st(1)). st(1) will be popped later.
9085 gcc <= 2.8.1 didn't have this check and generated
9086 assembly code that the Unixware assembler rejected. */
9087 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9088 else
9089 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9090 break;
9091 }
9092
9093 if (STACK_TOP_P (operands[0]))
9094 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9095 else
9096 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9097 break;
9098
9099 case MINUS:
9100 case DIV:
9101 if (MEM_P (operands[1]))
9102 {
9103 p = "r%z1\t%1";
9104 break;
9105 }
9106
9107 if (MEM_P (operands[2]))
9108 {
9109 p = "%z2\t%2";
9110 break;
9111 }
9112
9113 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9114 {
9115 #if SYSV386_COMPAT
9116 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9117 derived assemblers, confusingly reverse the direction of
9118 the operation for fsub{r} and fdiv{r} when the
9119 destination register is not st(0). The Intel assembler
9120 doesn't have this brain damage. Read !SYSV386_COMPAT to
9121 figure out what the hardware really does. */
9122 if (STACK_TOP_P (operands[0]))
9123 p = "{p\t%0, %2|rp\t%2, %0}";
9124 else
9125 p = "{rp\t%2, %0|p\t%0, %2}";
9126 #else
9127 if (STACK_TOP_P (operands[0]))
9128 /* As above for fmul/fadd, we can't store to st(0). */
9129 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9130 else
9131 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9132 #endif
9133 break;
9134 }
9135
9136 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9137 {
9138 #if SYSV386_COMPAT
9139 if (STACK_TOP_P (operands[0]))
9140 p = "{rp\t%0, %1|p\t%1, %0}";
9141 else
9142 p = "{p\t%1, %0|rp\t%0, %1}";
9143 #else
9144 if (STACK_TOP_P (operands[0]))
9145 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9146 else
9147 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9148 #endif
9149 break;
9150 }
9151
9152 if (STACK_TOP_P (operands[0]))
9153 {
9154 if (STACK_TOP_P (operands[1]))
9155 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9156 else
9157 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9158 break;
9159 }
9160 else if (STACK_TOP_P (operands[1]))
9161 {
9162 #if SYSV386_COMPAT
9163 p = "{\t%1, %0|r\t%0, %1}";
9164 #else
9165 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9166 #endif
9167 }
9168 else
9169 {
9170 #if SYSV386_COMPAT
9171 p = "{r\t%2, %0|\t%0, %2}";
9172 #else
9173 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9174 #endif
9175 }
9176 break;
9177
9178 default:
9179 gcc_unreachable ();
9180 }
9181
9182 strcat (buf, p);
9183 return buf;
9184 }
9185
9186 /* Return needed mode for entity in optimize_mode_switching pass. */
9187
9188 int
9189 ix86_mode_needed (int entity, rtx insn)
9190 {
9191 enum attr_i387_cw mode;
9192
9193 /* The mode UNINITIALIZED is used to store control word after a
9194 function call or ASM pattern. The mode ANY specify that function
9195 has no requirements on the control word and make no changes in the
9196 bits we are interested in. */
9197
9198 if (CALL_P (insn)
9199 || (NONJUMP_INSN_P (insn)
9200 && (asm_noperands (PATTERN (insn)) >= 0
9201 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9202 return I387_CW_UNINITIALIZED;
9203
9204 if (recog_memoized (insn) < 0)
9205 return I387_CW_ANY;
9206
9207 mode = get_attr_i387_cw (insn);
9208
9209 switch (entity)
9210 {
9211 case I387_TRUNC:
9212 if (mode == I387_CW_TRUNC)
9213 return mode;
9214 break;
9215
9216 case I387_FLOOR:
9217 if (mode == I387_CW_FLOOR)
9218 return mode;
9219 break;
9220
9221 case I387_CEIL:
9222 if (mode == I387_CW_CEIL)
9223 return mode;
9224 break;
9225
9226 case I387_MASK_PM:
9227 if (mode == I387_CW_MASK_PM)
9228 return mode;
9229 break;
9230
9231 default:
9232 gcc_unreachable ();
9233 }
9234
9235 return I387_CW_ANY;
9236 }
9237
9238 /* Output code to initialize control word copies used by trunc?f?i and
9239 rounding patterns. CURRENT_MODE is set to current control word,
9240 while NEW_MODE is set to new control word. */
9241
9242 void
9243 emit_i387_cw_initialization (int mode)
9244 {
9245 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9246 rtx new_mode;
9247
9248 int slot;
9249
9250 rtx reg = gen_reg_rtx (HImode);
9251
9252 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9253 emit_move_insn (reg, copy_rtx (stored_mode));
9254
9255 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9256 {
9257 switch (mode)
9258 {
9259 case I387_CW_TRUNC:
9260 /* round toward zero (truncate) */
9261 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9262 slot = SLOT_CW_TRUNC;
9263 break;
9264
9265 case I387_CW_FLOOR:
9266 /* round down toward -oo */
9267 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9268 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9269 slot = SLOT_CW_FLOOR;
9270 break;
9271
9272 case I387_CW_CEIL:
9273 /* round up toward +oo */
9274 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9275 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9276 slot = SLOT_CW_CEIL;
9277 break;
9278
9279 case I387_CW_MASK_PM:
9280 /* mask precision exception for nearbyint() */
9281 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9282 slot = SLOT_CW_MASK_PM;
9283 break;
9284
9285 default:
9286 gcc_unreachable ();
9287 }
9288 }
9289 else
9290 {
9291 switch (mode)
9292 {
9293 case I387_CW_TRUNC:
9294 /* round toward zero (truncate) */
9295 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9296 slot = SLOT_CW_TRUNC;
9297 break;
9298
9299 case I387_CW_FLOOR:
9300 /* round down toward -oo */
9301 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9302 slot = SLOT_CW_FLOOR;
9303 break;
9304
9305 case I387_CW_CEIL:
9306 /* round up toward +oo */
9307 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9308 slot = SLOT_CW_CEIL;
9309 break;
9310
9311 case I387_CW_MASK_PM:
9312 /* mask precision exception for nearbyint() */
9313 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9314 slot = SLOT_CW_MASK_PM;
9315 break;
9316
9317 default:
9318 gcc_unreachable ();
9319 }
9320 }
9321
9322 gcc_assert (slot < MAX_386_STACK_LOCALS);
9323
9324 new_mode = assign_386_stack_local (HImode, slot);
9325 emit_move_insn (new_mode, reg);
9326 }
9327
9328 /* Output code for INSN to convert a float to a signed int. OPERANDS
9329 are the insn operands. The output may be [HSD]Imode and the input
9330 operand may be [SDX]Fmode. */
9331
9332 const char *
9333 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9334 {
9335 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9336 int dimode_p = GET_MODE (operands[0]) == DImode;
9337 int round_mode = get_attr_i387_cw (insn);
9338
9339 /* Jump through a hoop or two for DImode, since the hardware has no
9340 non-popping instruction. We used to do this a different way, but
9341 that was somewhat fragile and broke with post-reload splitters. */
9342 if ((dimode_p || fisttp) && !stack_top_dies)
9343 output_asm_insn ("fld\t%y1", operands);
9344
9345 gcc_assert (STACK_TOP_P (operands[1]));
9346 gcc_assert (MEM_P (operands[0]));
9347 gcc_assert (GET_MODE (operands[1]) != TFmode);
9348
9349 if (fisttp)
9350 output_asm_insn ("fisttp%z0\t%0", operands);
9351 else
9352 {
9353 if (round_mode != I387_CW_ANY)
9354 output_asm_insn ("fldcw\t%3", operands);
9355 if (stack_top_dies || dimode_p)
9356 output_asm_insn ("fistp%z0\t%0", operands);
9357 else
9358 output_asm_insn ("fist%z0\t%0", operands);
9359 if (round_mode != I387_CW_ANY)
9360 output_asm_insn ("fldcw\t%2", operands);
9361 }
9362
9363 return "";
9364 }
9365
9366 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9367 have the values zero or one, indicates the ffreep insn's operand
9368 from the OPERANDS array. */
9369
9370 static const char *
9371 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9372 {
9373 if (TARGET_USE_FFREEP)
9374 #if HAVE_AS_IX86_FFREEP
9375 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9376 #else
9377 {
9378 static char retval[] = ".word\t0xc_df";
9379 int regno = REGNO (operands[opno]);
9380
9381 gcc_assert (FP_REGNO_P (regno));
9382
9383 retval[9] = '0' + (regno - FIRST_STACK_REG);
9384 return retval;
9385 }
9386 #endif
9387
9388 return opno ? "fstp\t%y1" : "fstp\t%y0";
9389 }
9390
9391
9392 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9393 should be used. UNORDERED_P is true when fucom should be used. */
9394
9395 const char *
9396 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9397 {
9398 int stack_top_dies;
9399 rtx cmp_op0, cmp_op1;
9400 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9401
9402 if (eflags_p)
9403 {
9404 cmp_op0 = operands[0];
9405 cmp_op1 = operands[1];
9406 }
9407 else
9408 {
9409 cmp_op0 = operands[1];
9410 cmp_op1 = operands[2];
9411 }
9412
9413 if (is_sse)
9414 {
9415 if (GET_MODE (operands[0]) == SFmode)
9416 if (unordered_p)
9417 return "ucomiss\t{%1, %0|%0, %1}";
9418 else
9419 return "comiss\t{%1, %0|%0, %1}";
9420 else
9421 if (unordered_p)
9422 return "ucomisd\t{%1, %0|%0, %1}";
9423 else
9424 return "comisd\t{%1, %0|%0, %1}";
9425 }
9426
9427 gcc_assert (STACK_TOP_P (cmp_op0));
9428
9429 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9430
9431 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9432 {
9433 if (stack_top_dies)
9434 {
9435 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9436 return output_387_ffreep (operands, 1);
9437 }
9438 else
9439 return "ftst\n\tfnstsw\t%0";
9440 }
9441
9442 if (STACK_REG_P (cmp_op1)
9443 && stack_top_dies
9444 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9445 && REGNO (cmp_op1) != FIRST_STACK_REG)
9446 {
9447 /* If both the top of the 387 stack dies, and the other operand
9448 is also a stack register that dies, then this must be a
9449 `fcompp' float compare */
9450
9451 if (eflags_p)
9452 {
9453 /* There is no double popping fcomi variant. Fortunately,
9454 eflags is immune from the fstp's cc clobbering. */
9455 if (unordered_p)
9456 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9457 else
9458 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9459 return output_387_ffreep (operands, 0);
9460 }
9461 else
9462 {
9463 if (unordered_p)
9464 return "fucompp\n\tfnstsw\t%0";
9465 else
9466 return "fcompp\n\tfnstsw\t%0";
9467 }
9468 }
9469 else
9470 {
9471 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9472
9473 static const char * const alt[16] =
9474 {
9475 "fcom%z2\t%y2\n\tfnstsw\t%0",
9476 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9477 "fucom%z2\t%y2\n\tfnstsw\t%0",
9478 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9479
9480 "ficom%z2\t%y2\n\tfnstsw\t%0",
9481 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9482 NULL,
9483 NULL,
9484
9485 "fcomi\t{%y1, %0|%0, %y1}",
9486 "fcomip\t{%y1, %0|%0, %y1}",
9487 "fucomi\t{%y1, %0|%0, %y1}",
9488 "fucomip\t{%y1, %0|%0, %y1}",
9489
9490 NULL,
9491 NULL,
9492 NULL,
9493 NULL
9494 };
9495
9496 int mask;
9497 const char *ret;
9498
9499 mask = eflags_p << 3;
9500 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9501 mask |= unordered_p << 1;
9502 mask |= stack_top_dies;
9503
9504 gcc_assert (mask < 16);
9505 ret = alt[mask];
9506 gcc_assert (ret);
9507
9508 return ret;
9509 }
9510 }
9511
9512 void
9513 ix86_output_addr_vec_elt (FILE *file, int value)
9514 {
9515 const char *directive = ASM_LONG;
9516
9517 #ifdef ASM_QUAD
9518 if (TARGET_64BIT)
9519 directive = ASM_QUAD;
9520 #else
9521 gcc_assert (!TARGET_64BIT);
9522 #endif
9523
9524 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9525 }
9526
9527 void
9528 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9529 {
9530 const char *directive = ASM_LONG;
9531
9532 #ifdef ASM_QUAD
9533 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9534 directive = ASM_QUAD;
9535 #else
9536 gcc_assert (!TARGET_64BIT);
9537 #endif
9538 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9539 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9540 fprintf (file, "%s%s%d-%s%d\n",
9541 directive, LPREFIX, value, LPREFIX, rel);
9542 else if (HAVE_AS_GOTOFF_IN_DATA)
9543 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9544 #if TARGET_MACHO
9545 else if (TARGET_MACHO)
9546 {
9547 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9548 machopic_output_function_base_name (file);
9549 fprintf(file, "\n");
9550 }
9551 #endif
9552 else
9553 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9554 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9555 }
9556 \f
9557 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9558 for the target. */
9559
9560 void
9561 ix86_expand_clear (rtx dest)
9562 {
9563 rtx tmp;
9564
9565 /* We play register width games, which are only valid after reload. */
9566 gcc_assert (reload_completed);
9567
9568 /* Avoid HImode and its attendant prefix byte. */
9569 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9570 dest = gen_rtx_REG (SImode, REGNO (dest));
9571 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9572
9573 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9574 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9575 {
9576 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9577 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9578 }
9579
9580 emit_insn (tmp);
9581 }
9582
9583 /* X is an unchanging MEM. If it is a constant pool reference, return
9584 the constant pool rtx, else NULL. */
9585
9586 rtx
9587 maybe_get_pool_constant (rtx x)
9588 {
9589 x = ix86_delegitimize_address (XEXP (x, 0));
9590
9591 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9592 return get_pool_constant (x);
9593
9594 return NULL_RTX;
9595 }
9596
9597 void
9598 ix86_expand_move (enum machine_mode mode, rtx operands[])
9599 {
9600 int strict = (reload_in_progress || reload_completed);
9601 rtx op0, op1;
9602 enum tls_model model;
9603
9604 op0 = operands[0];
9605 op1 = operands[1];
9606
9607 if (GET_CODE (op1) == SYMBOL_REF)
9608 {
9609 model = SYMBOL_REF_TLS_MODEL (op1);
9610 if (model)
9611 {
9612 op1 = legitimize_tls_address (op1, model, true);
9613 op1 = force_operand (op1, op0);
9614 if (op1 == op0)
9615 return;
9616 }
9617 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9618 && SYMBOL_REF_DLLIMPORT_P (op1))
9619 op1 = legitimize_dllimport_symbol (op1, false);
9620 }
9621 else if (GET_CODE (op1) == CONST
9622 && GET_CODE (XEXP (op1, 0)) == PLUS
9623 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9624 {
9625 rtx addend = XEXP (XEXP (op1, 0), 1);
9626 rtx symbol = XEXP (XEXP (op1, 0), 0);
9627 rtx tmp = NULL;
9628
9629 model = SYMBOL_REF_TLS_MODEL (symbol);
9630 if (model)
9631 tmp = legitimize_tls_address (symbol, model, true);
9632 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9633 && SYMBOL_REF_DLLIMPORT_P (symbol))
9634 tmp = legitimize_dllimport_symbol (symbol, true);
9635
9636 if (tmp)
9637 {
9638 tmp = force_operand (tmp, NULL);
9639 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9640 op0, 1, OPTAB_DIRECT);
9641 if (tmp == op0)
9642 return;
9643 }
9644 }
9645
9646 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9647 {
9648 if (TARGET_MACHO && !TARGET_64BIT)
9649 {
9650 #if TARGET_MACHO
9651 if (MACHOPIC_PURE)
9652 {
9653 rtx temp = ((reload_in_progress
9654 || ((op0 && REG_P (op0))
9655 && mode == Pmode))
9656 ? op0 : gen_reg_rtx (Pmode));
9657 op1 = machopic_indirect_data_reference (op1, temp);
9658 op1 = machopic_legitimize_pic_address (op1, mode,
9659 temp == op1 ? 0 : temp);
9660 }
9661 else if (MACHOPIC_INDIRECT)
9662 op1 = machopic_indirect_data_reference (op1, 0);
9663 if (op0 == op1)
9664 return;
9665 #endif
9666 }
9667 else
9668 {
9669 if (MEM_P (op0))
9670 op1 = force_reg (Pmode, op1);
9671 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9672 {
9673 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9674 op1 = legitimize_pic_address (op1, reg);
9675 if (op0 == op1)
9676 return;
9677 }
9678 }
9679 }
9680 else
9681 {
9682 if (MEM_P (op0)
9683 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9684 || !push_operand (op0, mode))
9685 && MEM_P (op1))
9686 op1 = force_reg (mode, op1);
9687
9688 if (push_operand (op0, mode)
9689 && ! general_no_elim_operand (op1, mode))
9690 op1 = copy_to_mode_reg (mode, op1);
9691
9692 /* Force large constants in 64bit compilation into register
9693 to get them CSEed. */
9694 if (TARGET_64BIT && mode == DImode
9695 && immediate_operand (op1, mode)
9696 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9697 && !register_operand (op0, mode)
9698 && optimize && !reload_completed && !reload_in_progress)
9699 op1 = copy_to_mode_reg (mode, op1);
9700
9701 if (FLOAT_MODE_P (mode))
9702 {
9703 /* If we are loading a floating point constant to a register,
9704 force the value to memory now, since we'll get better code
9705 out the back end. */
9706
9707 if (strict)
9708 ;
9709 else if (GET_CODE (op1) == CONST_DOUBLE)
9710 {
9711 op1 = validize_mem (force_const_mem (mode, op1));
9712 if (!register_operand (op0, mode))
9713 {
9714 rtx temp = gen_reg_rtx (mode);
9715 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9716 emit_move_insn (op0, temp);
9717 return;
9718 }
9719 }
9720 }
9721 }
9722
9723 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9724 }
9725
9726 void
9727 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9728 {
9729 rtx op0 = operands[0], op1 = operands[1];
9730 unsigned int align = GET_MODE_ALIGNMENT (mode);
9731
9732 /* Force constants other than zero into memory. We do not know how
9733 the instructions used to build constants modify the upper 64 bits
9734 of the register, once we have that information we may be able
9735 to handle some of them more efficiently. */
9736 if ((reload_in_progress | reload_completed) == 0
9737 && register_operand (op0, mode)
9738 && (CONSTANT_P (op1)
9739 || (GET_CODE (op1) == SUBREG
9740 && CONSTANT_P (SUBREG_REG (op1))))
9741 && standard_sse_constant_p (op1) <= 0)
9742 op1 = validize_mem (force_const_mem (mode, op1));
9743
9744 /* TDmode values are passed as TImode on the stack. Timode values
9745 are moved via xmm registers, and moving them to stack can result in
9746 unaligned memory access. Use ix86_expand_vector_move_misalign()
9747 if memory operand is not aligned correctly. */
9748 if (!no_new_pseudos
9749 && (mode == TImode) && !TARGET_64BIT
9750 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9751 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9752 {
9753 rtx tmp[2];
9754
9755 /* ix86_expand_vector_move_misalign() does not like constants ... */
9756 if (CONSTANT_P (op1)
9757 || (GET_CODE (op1) == SUBREG
9758 && CONSTANT_P (SUBREG_REG (op1))))
9759 op1 = validize_mem (force_const_mem (mode, op1));
9760
9761 /* ... nor both arguments in memory. */
9762 if (!register_operand (op0, mode)
9763 && !register_operand (op1, mode))
9764 op1 = force_reg (mode, op1);
9765
9766 tmp[0] = op0; tmp[1] = op1;
9767 ix86_expand_vector_move_misalign (mode, tmp);
9768 return;
9769 }
9770
9771 /* Make operand1 a register if it isn't already. */
9772 if (!no_new_pseudos
9773 && !register_operand (op0, mode)
9774 && !register_operand (op1, mode))
9775 {
9776 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9777 return;
9778 }
9779
9780 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9781 }
9782
9783 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9784 straight to ix86_expand_vector_move. */
9785 /* Code generation for scalar reg-reg moves of single and double precision data:
9786 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9787 movaps reg, reg
9788 else
9789 movss reg, reg
9790 if (x86_sse_partial_reg_dependency == true)
9791 movapd reg, reg
9792 else
9793 movsd reg, reg
9794
9795 Code generation for scalar loads of double precision data:
9796 if (x86_sse_split_regs == true)
9797 movlpd mem, reg (gas syntax)
9798 else
9799 movsd mem, reg
9800
9801 Code generation for unaligned packed loads of single precision data
9802 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9803 if (x86_sse_unaligned_move_optimal)
9804 movups mem, reg
9805
9806 if (x86_sse_partial_reg_dependency == true)
9807 {
9808 xorps reg, reg
9809 movlps mem, reg
9810 movhps mem+8, reg
9811 }
9812 else
9813 {
9814 movlps mem, reg
9815 movhps mem+8, reg
9816 }
9817
9818 Code generation for unaligned packed loads of double precision data
9819 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9820 if (x86_sse_unaligned_move_optimal)
9821 movupd mem, reg
9822
9823 if (x86_sse_split_regs == true)
9824 {
9825 movlpd mem, reg
9826 movhpd mem+8, reg
9827 }
9828 else
9829 {
9830 movsd mem, reg
9831 movhpd mem+8, reg
9832 }
9833 */
9834
9835 void
9836 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9837 {
9838 rtx op0, op1, m;
9839
9840 op0 = operands[0];
9841 op1 = operands[1];
9842
9843 if (MEM_P (op1))
9844 {
9845 /* If we're optimizing for size, movups is the smallest. */
9846 if (optimize_size)
9847 {
9848 op0 = gen_lowpart (V4SFmode, op0);
9849 op1 = gen_lowpart (V4SFmode, op1);
9850 emit_insn (gen_sse_movups (op0, op1));
9851 return;
9852 }
9853
9854 /* ??? If we have typed data, then it would appear that using
9855 movdqu is the only way to get unaligned data loaded with
9856 integer type. */
9857 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9858 {
9859 op0 = gen_lowpart (V16QImode, op0);
9860 op1 = gen_lowpart (V16QImode, op1);
9861 emit_insn (gen_sse2_movdqu (op0, op1));
9862 return;
9863 }
9864
9865 if (TARGET_SSE2 && mode == V2DFmode)
9866 {
9867 rtx zero;
9868
9869 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9870 {
9871 op0 = gen_lowpart (V2DFmode, op0);
9872 op1 = gen_lowpart (V2DFmode, op1);
9873 emit_insn (gen_sse2_movupd (op0, op1));
9874 return;
9875 }
9876
9877 /* When SSE registers are split into halves, we can avoid
9878 writing to the top half twice. */
9879 if (TARGET_SSE_SPLIT_REGS)
9880 {
9881 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9882 zero = op0;
9883 }
9884 else
9885 {
9886 /* ??? Not sure about the best option for the Intel chips.
9887 The following would seem to satisfy; the register is
9888 entirely cleared, breaking the dependency chain. We
9889 then store to the upper half, with a dependency depth
9890 of one. A rumor has it that Intel recommends two movsd
9891 followed by an unpacklpd, but this is unconfirmed. And
9892 given that the dependency depth of the unpacklpd would
9893 still be one, I'm not sure why this would be better. */
9894 zero = CONST0_RTX (V2DFmode);
9895 }
9896
9897 m = adjust_address (op1, DFmode, 0);
9898 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9899 m = adjust_address (op1, DFmode, 8);
9900 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9901 }
9902 else
9903 {
9904 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9905 {
9906 op0 = gen_lowpart (V4SFmode, op0);
9907 op1 = gen_lowpart (V4SFmode, op1);
9908 emit_insn (gen_sse_movups (op0, op1));
9909 return;
9910 }
9911
9912 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9913 emit_move_insn (op0, CONST0_RTX (mode));
9914 else
9915 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9916
9917 if (mode != V4SFmode)
9918 op0 = gen_lowpart (V4SFmode, op0);
9919 m = adjust_address (op1, V2SFmode, 0);
9920 emit_insn (gen_sse_loadlps (op0, op0, m));
9921 m = adjust_address (op1, V2SFmode, 8);
9922 emit_insn (gen_sse_loadhps (op0, op0, m));
9923 }
9924 }
9925 else if (MEM_P (op0))
9926 {
9927 /* If we're optimizing for size, movups is the smallest. */
9928 if (optimize_size)
9929 {
9930 op0 = gen_lowpart (V4SFmode, op0);
9931 op1 = gen_lowpart (V4SFmode, op1);
9932 emit_insn (gen_sse_movups (op0, op1));
9933 return;
9934 }
9935
9936 /* ??? Similar to above, only less clear because of quote
9937 typeless stores unquote. */
9938 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9939 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9940 {
9941 op0 = gen_lowpart (V16QImode, op0);
9942 op1 = gen_lowpart (V16QImode, op1);
9943 emit_insn (gen_sse2_movdqu (op0, op1));
9944 return;
9945 }
9946
9947 if (TARGET_SSE2 && mode == V2DFmode)
9948 {
9949 m = adjust_address (op0, DFmode, 0);
9950 emit_insn (gen_sse2_storelpd (m, op1));
9951 m = adjust_address (op0, DFmode, 8);
9952 emit_insn (gen_sse2_storehpd (m, op1));
9953 }
9954 else
9955 {
9956 if (mode != V4SFmode)
9957 op1 = gen_lowpart (V4SFmode, op1);
9958 m = adjust_address (op0, V2SFmode, 0);
9959 emit_insn (gen_sse_storelps (m, op1));
9960 m = adjust_address (op0, V2SFmode, 8);
9961 emit_insn (gen_sse_storehps (m, op1));
9962 }
9963 }
9964 else
9965 gcc_unreachable ();
9966 }
9967
9968 /* Expand a push in MODE. This is some mode for which we do not support
9969 proper push instructions, at least from the registers that we expect
9970 the value to live in. */
9971
9972 void
9973 ix86_expand_push (enum machine_mode mode, rtx x)
9974 {
9975 rtx tmp;
9976
9977 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9978 GEN_INT (-GET_MODE_SIZE (mode)),
9979 stack_pointer_rtx, 1, OPTAB_DIRECT);
9980 if (tmp != stack_pointer_rtx)
9981 emit_move_insn (stack_pointer_rtx, tmp);
9982
9983 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9984 emit_move_insn (tmp, x);
9985 }
9986
9987 /* Helper function of ix86_fixup_binary_operands to canonicalize
9988 operand order. Returns true if the operands should be swapped. */
9989
9990 static bool
9991 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9992 rtx operands[])
9993 {
9994 rtx dst = operands[0];
9995 rtx src1 = operands[1];
9996 rtx src2 = operands[2];
9997
9998 /* If the operation is not commutative, we can't do anything. */
9999 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10000 return false;
10001
10002 /* Highest priority is that src1 should match dst. */
10003 if (rtx_equal_p (dst, src1))
10004 return false;
10005 if (rtx_equal_p (dst, src2))
10006 return true;
10007
10008 /* Next highest priority is that immediate constants come second. */
10009 if (immediate_operand (src2, mode))
10010 return false;
10011 if (immediate_operand (src1, mode))
10012 return true;
10013
10014 /* Lowest priority is that memory references should come second. */
10015 if (MEM_P (src2))
10016 return false;
10017 if (MEM_P (src1))
10018 return true;
10019
10020 return false;
10021 }
10022
10023
10024 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10025 destination to use for the operation. If different from the true
10026 destination in operands[0], a copy operation will be required. */
10027
10028 rtx
10029 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10030 rtx operands[])
10031 {
10032 rtx dst = operands[0];
10033 rtx src1 = operands[1];
10034 rtx src2 = operands[2];
10035
10036 /* Canonicalize operand order. */
10037 if (ix86_swap_binary_operands_p (code, mode, operands))
10038 {
10039 rtx temp = src1;
10040 src1 = src2;
10041 src2 = temp;
10042 }
10043
10044 /* Both source operands cannot be in memory. */
10045 if (MEM_P (src1) && MEM_P (src2))
10046 {
10047 /* Optimization: Only read from memory once. */
10048 if (rtx_equal_p (src1, src2))
10049 {
10050 src2 = force_reg (mode, src2);
10051 src1 = src2;
10052 }
10053 else
10054 src2 = force_reg (mode, src2);
10055 }
10056
10057 /* If the destination is memory, and we do not have matching source
10058 operands, do things in registers. */
10059 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10060 dst = gen_reg_rtx (mode);
10061
10062 /* Source 1 cannot be a constant. */
10063 if (CONSTANT_P (src1))
10064 src1 = force_reg (mode, src1);
10065
10066 /* Source 1 cannot be a non-matching memory. */
10067 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10068 src1 = force_reg (mode, src1);
10069
10070 operands[1] = src1;
10071 operands[2] = src2;
10072 return dst;
10073 }
10074
10075 /* Similarly, but assume that the destination has already been
10076 set up properly. */
10077
10078 void
10079 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10080 enum machine_mode mode, rtx operands[])
10081 {
10082 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10083 gcc_assert (dst == operands[0]);
10084 }
10085
10086 /* Attempt to expand a binary operator. Make the expansion closer to the
10087 actual machine, then just general_operand, which will allow 3 separate
10088 memory references (one output, two input) in a single insn. */
10089
10090 void
10091 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10092 rtx operands[])
10093 {
10094 rtx src1, src2, dst, op, clob;
10095
10096 dst = ix86_fixup_binary_operands (code, mode, operands);
10097 src1 = operands[1];
10098 src2 = operands[2];
10099
10100 /* Emit the instruction. */
10101
10102 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10103 if (reload_in_progress)
10104 {
10105 /* Reload doesn't know about the flags register, and doesn't know that
10106 it doesn't want to clobber it. We can only do this with PLUS. */
10107 gcc_assert (code == PLUS);
10108 emit_insn (op);
10109 }
10110 else
10111 {
10112 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10113 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10114 }
10115
10116 /* Fix up the destination if needed. */
10117 if (dst != operands[0])
10118 emit_move_insn (operands[0], dst);
10119 }
10120
10121 /* Return TRUE or FALSE depending on whether the binary operator meets the
10122 appropriate constraints. */
10123
10124 int
10125 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10126 rtx operands[3])
10127 {
10128 rtx dst = operands[0];
10129 rtx src1 = operands[1];
10130 rtx src2 = operands[2];
10131
10132 /* Both source operands cannot be in memory. */
10133 if (MEM_P (src1) && MEM_P (src2))
10134 return 0;
10135
10136 /* Canonicalize operand order for commutative operators. */
10137 if (ix86_swap_binary_operands_p (code, mode, operands))
10138 {
10139 rtx temp = src1;
10140 src1 = src2;
10141 src2 = temp;
10142 }
10143
10144 /* If the destination is memory, we must have a matching source operand. */
10145 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10146 return 0;
10147
10148 /* Source 1 cannot be a constant. */
10149 if (CONSTANT_P (src1))
10150 return 0;
10151
10152 /* Source 1 cannot be a non-matching memory. */
10153 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10154 return 0;
10155
10156 return 1;
10157 }
10158
10159 /* Attempt to expand a unary operator. Make the expansion closer to the
10160 actual machine, then just general_operand, which will allow 2 separate
10161 memory references (one output, one input) in a single insn. */
10162
10163 void
10164 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10165 rtx operands[])
10166 {
10167 int matching_memory;
10168 rtx src, dst, op, clob;
10169
10170 dst = operands[0];
10171 src = operands[1];
10172
10173 /* If the destination is memory, and we do not have matching source
10174 operands, do things in registers. */
10175 matching_memory = 0;
10176 if (MEM_P (dst))
10177 {
10178 if (rtx_equal_p (dst, src))
10179 matching_memory = 1;
10180 else
10181 dst = gen_reg_rtx (mode);
10182 }
10183
10184 /* When source operand is memory, destination must match. */
10185 if (MEM_P (src) && !matching_memory)
10186 src = force_reg (mode, src);
10187
10188 /* Emit the instruction. */
10189
10190 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10191 if (reload_in_progress || code == NOT)
10192 {
10193 /* Reload doesn't know about the flags register, and doesn't know that
10194 it doesn't want to clobber it. */
10195 gcc_assert (code == NOT);
10196 emit_insn (op);
10197 }
10198 else
10199 {
10200 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10201 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10202 }
10203
10204 /* Fix up the destination if needed. */
10205 if (dst != operands[0])
10206 emit_move_insn (operands[0], dst);
10207 }
10208
10209 /* Return TRUE or FALSE depending on whether the unary operator meets the
10210 appropriate constraints. */
10211
10212 int
10213 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10214 enum machine_mode mode ATTRIBUTE_UNUSED,
10215 rtx operands[2] ATTRIBUTE_UNUSED)
10216 {
10217 /* If one of operands is memory, source and destination must match. */
10218 if ((MEM_P (operands[0])
10219 || MEM_P (operands[1]))
10220 && ! rtx_equal_p (operands[0], operands[1]))
10221 return FALSE;
10222 return TRUE;
10223 }
10224
10225 /* Post-reload splitter for converting an SF or DFmode value in an
10226 SSE register into an unsigned SImode. */
10227
10228 void
10229 ix86_split_convert_uns_si_sse (rtx operands[])
10230 {
10231 enum machine_mode vecmode;
10232 rtx value, large, zero_or_two31, input, two31, x;
10233
10234 large = operands[1];
10235 zero_or_two31 = operands[2];
10236 input = operands[3];
10237 two31 = operands[4];
10238 vecmode = GET_MODE (large);
10239 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10240
10241 /* Load up the value into the low element. We must ensure that the other
10242 elements are valid floats -- zero is the easiest such value. */
10243 if (MEM_P (input))
10244 {
10245 if (vecmode == V4SFmode)
10246 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10247 else
10248 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10249 }
10250 else
10251 {
10252 input = gen_rtx_REG (vecmode, REGNO (input));
10253 emit_move_insn (value, CONST0_RTX (vecmode));
10254 if (vecmode == V4SFmode)
10255 emit_insn (gen_sse_movss (value, value, input));
10256 else
10257 emit_insn (gen_sse2_movsd (value, value, input));
10258 }
10259
10260 emit_move_insn (large, two31);
10261 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10262
10263 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10264 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10265
10266 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10267 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10268
10269 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10270 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10271
10272 large = gen_rtx_REG (V4SImode, REGNO (large));
10273 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10274
10275 x = gen_rtx_REG (V4SImode, REGNO (value));
10276 if (vecmode == V4SFmode)
10277 emit_insn (gen_sse2_cvttps2dq (x, value));
10278 else
10279 emit_insn (gen_sse2_cvttpd2dq (x, value));
10280 value = x;
10281
10282 emit_insn (gen_xorv4si3 (value, value, large));
10283 }
10284
10285 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10286 Expects the 64-bit DImode to be supplied in a pair of integral
10287 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10288 -mfpmath=sse, !optimize_size only. */
10289
10290 void
10291 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10292 {
10293 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10294 rtx int_xmm, fp_xmm;
10295 rtx biases, exponents;
10296 rtx x;
10297
10298 int_xmm = gen_reg_rtx (V4SImode);
10299 if (TARGET_INTER_UNIT_MOVES)
10300 emit_insn (gen_movdi_to_sse (int_xmm, input));
10301 else if (TARGET_SSE_SPLIT_REGS)
10302 {
10303 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10304 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10305 }
10306 else
10307 {
10308 x = gen_reg_rtx (V2DImode);
10309 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10310 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10311 }
10312
10313 x = gen_rtx_CONST_VECTOR (V4SImode,
10314 gen_rtvec (4, GEN_INT (0x43300000UL),
10315 GEN_INT (0x45300000UL),
10316 const0_rtx, const0_rtx));
10317 exponents = validize_mem (force_const_mem (V4SImode, x));
10318
10319 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10320 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10321
10322 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10323 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10324 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10325 (0x1.0p84 + double(fp_value_hi_xmm)).
10326 Note these exponents differ by 32. */
10327
10328 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10329
10330 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10331 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10332 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10333 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10334 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10335 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10336 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10337 biases = validize_mem (force_const_mem (V2DFmode, biases));
10338 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10339
10340 /* Add the upper and lower DFmode values together. */
10341 if (TARGET_SSE3)
10342 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10343 else
10344 {
10345 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10346 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10347 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10348 }
10349
10350 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10351 }
10352
10353 /* Convert an unsigned SImode value into a DFmode. Only currently used
10354 for SSE, but applicable anywhere. */
10355
10356 void
10357 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10358 {
10359 REAL_VALUE_TYPE TWO31r;
10360 rtx x, fp;
10361
10362 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10363 NULL, 1, OPTAB_DIRECT);
10364
10365 fp = gen_reg_rtx (DFmode);
10366 emit_insn (gen_floatsidf2 (fp, x));
10367
10368 real_ldexp (&TWO31r, &dconst1, 31);
10369 x = const_double_from_real_value (TWO31r, DFmode);
10370
10371 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10372 if (x != target)
10373 emit_move_insn (target, x);
10374 }
10375
10376 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10377 32-bit mode; otherwise we have a direct convert instruction. */
10378
10379 void
10380 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10381 {
10382 REAL_VALUE_TYPE TWO32r;
10383 rtx fp_lo, fp_hi, x;
10384
10385 fp_lo = gen_reg_rtx (DFmode);
10386 fp_hi = gen_reg_rtx (DFmode);
10387
10388 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10389
10390 real_ldexp (&TWO32r, &dconst1, 32);
10391 x = const_double_from_real_value (TWO32r, DFmode);
10392 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10393
10394 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10395
10396 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10397 0, OPTAB_DIRECT);
10398 if (x != target)
10399 emit_move_insn (target, x);
10400 }
10401
10402 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10403 For x86_32, -mfpmath=sse, !optimize_size only. */
10404 void
10405 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10406 {
10407 REAL_VALUE_TYPE ONE16r;
10408 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10409
10410 real_ldexp (&ONE16r, &dconst1, 16);
10411 x = const_double_from_real_value (ONE16r, SFmode);
10412 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10413 NULL, 0, OPTAB_DIRECT);
10414 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10415 NULL, 0, OPTAB_DIRECT);
10416 fp_hi = gen_reg_rtx (SFmode);
10417 fp_lo = gen_reg_rtx (SFmode);
10418 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10419 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10420 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10421 0, OPTAB_DIRECT);
10422 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10423 0, OPTAB_DIRECT);
10424 if (!rtx_equal_p (target, fp_hi))
10425 emit_move_insn (target, fp_hi);
10426 }
10427
10428 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10429 then replicate the value for all elements of the vector
10430 register. */
10431
10432 rtx
10433 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10434 {
10435 rtvec v;
10436 switch (mode)
10437 {
10438 case SFmode:
10439 if (vect)
10440 v = gen_rtvec (4, value, value, value, value);
10441 else
10442 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10443 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10444 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10445
10446 case DFmode:
10447 if (vect)
10448 v = gen_rtvec (2, value, value);
10449 else
10450 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10451 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10452
10453 default:
10454 gcc_unreachable ();
10455 }
10456 }
10457
10458 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10459 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10460 true, then replicate the mask for all elements of the vector register.
10461 If INVERT is true, then create a mask excluding the sign bit. */
10462
10463 rtx
10464 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10465 {
10466 enum machine_mode vec_mode;
10467 HOST_WIDE_INT hi, lo;
10468 int shift = 63;
10469 rtx v;
10470 rtx mask;
10471
10472 /* Find the sign bit, sign extended to 2*HWI. */
10473 if (mode == SFmode)
10474 lo = 0x80000000, hi = lo < 0;
10475 else if (HOST_BITS_PER_WIDE_INT >= 64)
10476 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10477 else
10478 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10479
10480 if (invert)
10481 lo = ~lo, hi = ~hi;
10482
10483 /* Force this value into the low part of a fp vector constant. */
10484 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10485 mask = gen_lowpart (mode, mask);
10486
10487 v = ix86_build_const_vector (mode, vect, mask);
10488 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10489 return force_reg (vec_mode, v);
10490 }
10491
10492 /* Generate code for floating point ABS or NEG. */
10493
10494 void
10495 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10496 rtx operands[])
10497 {
10498 rtx mask, set, use, clob, dst, src;
10499 bool matching_memory;
10500 bool use_sse = false;
10501 bool vector_mode = VECTOR_MODE_P (mode);
10502 enum machine_mode elt_mode = mode;
10503
10504 if (vector_mode)
10505 {
10506 elt_mode = GET_MODE_INNER (mode);
10507 use_sse = true;
10508 }
10509 else if (TARGET_SSE_MATH)
10510 use_sse = SSE_FLOAT_MODE_P (mode);
10511
10512 /* NEG and ABS performed with SSE use bitwise mask operations.
10513 Create the appropriate mask now. */
10514 if (use_sse)
10515 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10516 else
10517 mask = NULL_RTX;
10518
10519 dst = operands[0];
10520 src = operands[1];
10521
10522 /* If the destination is memory, and we don't have matching source
10523 operands or we're using the x87, do things in registers. */
10524 matching_memory = false;
10525 if (MEM_P (dst))
10526 {
10527 if (use_sse && rtx_equal_p (dst, src))
10528 matching_memory = true;
10529 else
10530 dst = gen_reg_rtx (mode);
10531 }
10532 if (MEM_P (src) && !matching_memory)
10533 src = force_reg (mode, src);
10534
10535 if (vector_mode)
10536 {
10537 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10538 set = gen_rtx_SET (VOIDmode, dst, set);
10539 emit_insn (set);
10540 }
10541 else
10542 {
10543 set = gen_rtx_fmt_e (code, mode, src);
10544 set = gen_rtx_SET (VOIDmode, dst, set);
10545 if (mask)
10546 {
10547 use = gen_rtx_USE (VOIDmode, mask);
10548 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10549 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10550 gen_rtvec (3, set, use, clob)));
10551 }
10552 else
10553 emit_insn (set);
10554 }
10555
10556 if (dst != operands[0])
10557 emit_move_insn (operands[0], dst);
10558 }
10559
10560 /* Expand a copysign operation. Special case operand 0 being a constant. */
10561
10562 void
10563 ix86_expand_copysign (rtx operands[])
10564 {
10565 enum machine_mode mode, vmode;
10566 rtx dest, op0, op1, mask, nmask;
10567
10568 dest = operands[0];
10569 op0 = operands[1];
10570 op1 = operands[2];
10571
10572 mode = GET_MODE (dest);
10573 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10574
10575 if (GET_CODE (op0) == CONST_DOUBLE)
10576 {
10577 rtvec v;
10578
10579 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10580 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10581
10582 if (op0 == CONST0_RTX (mode))
10583 op0 = CONST0_RTX (vmode);
10584 else
10585 {
10586 if (mode == SFmode)
10587 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10588 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10589 else
10590 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10591 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10592 }
10593
10594 mask = ix86_build_signbit_mask (mode, 0, 0);
10595
10596 if (mode == SFmode)
10597 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10598 else
10599 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10600 }
10601 else
10602 {
10603 nmask = ix86_build_signbit_mask (mode, 0, 1);
10604 mask = ix86_build_signbit_mask (mode, 0, 0);
10605
10606 if (mode == SFmode)
10607 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10608 else
10609 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10610 }
10611 }
10612
10613 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10614 be a constant, and so has already been expanded into a vector constant. */
10615
10616 void
10617 ix86_split_copysign_const (rtx operands[])
10618 {
10619 enum machine_mode mode, vmode;
10620 rtx dest, op0, op1, mask, x;
10621
10622 dest = operands[0];
10623 op0 = operands[1];
10624 op1 = operands[2];
10625 mask = operands[3];
10626
10627 mode = GET_MODE (dest);
10628 vmode = GET_MODE (mask);
10629
10630 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10631 x = gen_rtx_AND (vmode, dest, mask);
10632 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10633
10634 if (op0 != CONST0_RTX (vmode))
10635 {
10636 x = gen_rtx_IOR (vmode, dest, op0);
10637 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10638 }
10639 }
10640
10641 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10642 so we have to do two masks. */
10643
10644 void
10645 ix86_split_copysign_var (rtx operands[])
10646 {
10647 enum machine_mode mode, vmode;
10648 rtx dest, scratch, op0, op1, mask, nmask, x;
10649
10650 dest = operands[0];
10651 scratch = operands[1];
10652 op0 = operands[2];
10653 op1 = operands[3];
10654 nmask = operands[4];
10655 mask = operands[5];
10656
10657 mode = GET_MODE (dest);
10658 vmode = GET_MODE (mask);
10659
10660 if (rtx_equal_p (op0, op1))
10661 {
10662 /* Shouldn't happen often (it's useless, obviously), but when it does
10663 we'd generate incorrect code if we continue below. */
10664 emit_move_insn (dest, op0);
10665 return;
10666 }
10667
10668 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10669 {
10670 gcc_assert (REGNO (op1) == REGNO (scratch));
10671
10672 x = gen_rtx_AND (vmode, scratch, mask);
10673 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10674
10675 dest = mask;
10676 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10677 x = gen_rtx_NOT (vmode, dest);
10678 x = gen_rtx_AND (vmode, x, op0);
10679 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10680 }
10681 else
10682 {
10683 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10684 {
10685 x = gen_rtx_AND (vmode, scratch, mask);
10686 }
10687 else /* alternative 2,4 */
10688 {
10689 gcc_assert (REGNO (mask) == REGNO (scratch));
10690 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10691 x = gen_rtx_AND (vmode, scratch, op1);
10692 }
10693 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10694
10695 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10696 {
10697 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10698 x = gen_rtx_AND (vmode, dest, nmask);
10699 }
10700 else /* alternative 3,4 */
10701 {
10702 gcc_assert (REGNO (nmask) == REGNO (dest));
10703 dest = nmask;
10704 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10705 x = gen_rtx_AND (vmode, dest, op0);
10706 }
10707 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10708 }
10709
10710 x = gen_rtx_IOR (vmode, dest, scratch);
10711 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10712 }
10713
10714 /* Return TRUE or FALSE depending on whether the first SET in INSN
10715 has source and destination with matching CC modes, and that the
10716 CC mode is at least as constrained as REQ_MODE. */
10717
10718 int
10719 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10720 {
10721 rtx set;
10722 enum machine_mode set_mode;
10723
10724 set = PATTERN (insn);
10725 if (GET_CODE (set) == PARALLEL)
10726 set = XVECEXP (set, 0, 0);
10727 gcc_assert (GET_CODE (set) == SET);
10728 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10729
10730 set_mode = GET_MODE (SET_DEST (set));
10731 switch (set_mode)
10732 {
10733 case CCNOmode:
10734 if (req_mode != CCNOmode
10735 && (req_mode != CCmode
10736 || XEXP (SET_SRC (set), 1) != const0_rtx))
10737 return 0;
10738 break;
10739 case CCmode:
10740 if (req_mode == CCGCmode)
10741 return 0;
10742 /* FALLTHRU */
10743 case CCGCmode:
10744 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10745 return 0;
10746 /* FALLTHRU */
10747 case CCGOCmode:
10748 if (req_mode == CCZmode)
10749 return 0;
10750 /* FALLTHRU */
10751 case CCZmode:
10752 break;
10753
10754 default:
10755 gcc_unreachable ();
10756 }
10757
10758 return (GET_MODE (SET_SRC (set)) == set_mode);
10759 }
10760
10761 /* Generate insn patterns to do an integer compare of OPERANDS. */
10762
10763 static rtx
10764 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10765 {
10766 enum machine_mode cmpmode;
10767 rtx tmp, flags;
10768
10769 cmpmode = SELECT_CC_MODE (code, op0, op1);
10770 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10771
10772 /* This is very simple, but making the interface the same as in the
10773 FP case makes the rest of the code easier. */
10774 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10775 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10776
10777 /* Return the test that should be put into the flags user, i.e.
10778 the bcc, scc, or cmov instruction. */
10779 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10780 }
10781
10782 /* Figure out whether to use ordered or unordered fp comparisons.
10783 Return the appropriate mode to use. */
10784
10785 enum machine_mode
10786 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10787 {
10788 /* ??? In order to make all comparisons reversible, we do all comparisons
10789 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10790 all forms trapping and nontrapping comparisons, we can make inequality
10791 comparisons trapping again, since it results in better code when using
10792 FCOM based compares. */
10793 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10794 }
10795
10796 enum machine_mode
10797 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10798 {
10799 enum machine_mode mode = GET_MODE (op0);
10800
10801 if (SCALAR_FLOAT_MODE_P (mode))
10802 {
10803 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10804 return ix86_fp_compare_mode (code);
10805 }
10806
10807 switch (code)
10808 {
10809 /* Only zero flag is needed. */
10810 case EQ: /* ZF=0 */
10811 case NE: /* ZF!=0 */
10812 return CCZmode;
10813 /* Codes needing carry flag. */
10814 case GEU: /* CF=0 */
10815 case GTU: /* CF=0 & ZF=0 */
10816 case LTU: /* CF=1 */
10817 case LEU: /* CF=1 | ZF=1 */
10818 return CCmode;
10819 /* Codes possibly doable only with sign flag when
10820 comparing against zero. */
10821 case GE: /* SF=OF or SF=0 */
10822 case LT: /* SF<>OF or SF=1 */
10823 if (op1 == const0_rtx)
10824 return CCGOCmode;
10825 else
10826 /* For other cases Carry flag is not required. */
10827 return CCGCmode;
10828 /* Codes doable only with sign flag when comparing
10829 against zero, but we miss jump instruction for it
10830 so we need to use relational tests against overflow
10831 that thus needs to be zero. */
10832 case GT: /* ZF=0 & SF=OF */
10833 case LE: /* ZF=1 | SF<>OF */
10834 if (op1 == const0_rtx)
10835 return CCNOmode;
10836 else
10837 return CCGCmode;
10838 /* strcmp pattern do (use flags) and combine may ask us for proper
10839 mode. */
10840 case USE:
10841 return CCmode;
10842 default:
10843 gcc_unreachable ();
10844 }
10845 }
10846
10847 /* Return the fixed registers used for condition codes. */
10848
10849 static bool
10850 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10851 {
10852 *p1 = FLAGS_REG;
10853 *p2 = FPSR_REG;
10854 return true;
10855 }
10856
10857 /* If two condition code modes are compatible, return a condition code
10858 mode which is compatible with both. Otherwise, return
10859 VOIDmode. */
10860
10861 static enum machine_mode
10862 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10863 {
10864 if (m1 == m2)
10865 return m1;
10866
10867 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10868 return VOIDmode;
10869
10870 if ((m1 == CCGCmode && m2 == CCGOCmode)
10871 || (m1 == CCGOCmode && m2 == CCGCmode))
10872 return CCGCmode;
10873
10874 switch (m1)
10875 {
10876 default:
10877 gcc_unreachable ();
10878
10879 case CCmode:
10880 case CCGCmode:
10881 case CCGOCmode:
10882 case CCNOmode:
10883 case CCZmode:
10884 switch (m2)
10885 {
10886 default:
10887 return VOIDmode;
10888
10889 case CCmode:
10890 case CCGCmode:
10891 case CCGOCmode:
10892 case CCNOmode:
10893 case CCZmode:
10894 return CCmode;
10895 }
10896
10897 case CCFPmode:
10898 case CCFPUmode:
10899 /* These are only compatible with themselves, which we already
10900 checked above. */
10901 return VOIDmode;
10902 }
10903 }
10904
10905 /* Split comparison code CODE into comparisons we can do using branch
10906 instructions. BYPASS_CODE is comparison code for branch that will
10907 branch around FIRST_CODE and SECOND_CODE. If some of branches
10908 is not required, set value to UNKNOWN.
10909 We never require more than two branches. */
10910
10911 void
10912 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10913 enum rtx_code *first_code,
10914 enum rtx_code *second_code)
10915 {
10916 *first_code = code;
10917 *bypass_code = UNKNOWN;
10918 *second_code = UNKNOWN;
10919
10920 /* The fcomi comparison sets flags as follows:
10921
10922 cmp ZF PF CF
10923 > 0 0 0
10924 < 0 0 1
10925 = 1 0 0
10926 un 1 1 1 */
10927
10928 switch (code)
10929 {
10930 case GT: /* GTU - CF=0 & ZF=0 */
10931 case GE: /* GEU - CF=0 */
10932 case ORDERED: /* PF=0 */
10933 case UNORDERED: /* PF=1 */
10934 case UNEQ: /* EQ - ZF=1 */
10935 case UNLT: /* LTU - CF=1 */
10936 case UNLE: /* LEU - CF=1 | ZF=1 */
10937 case LTGT: /* EQ - ZF=0 */
10938 break;
10939 case LT: /* LTU - CF=1 - fails on unordered */
10940 *first_code = UNLT;
10941 *bypass_code = UNORDERED;
10942 break;
10943 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10944 *first_code = UNLE;
10945 *bypass_code = UNORDERED;
10946 break;
10947 case EQ: /* EQ - ZF=1 - fails on unordered */
10948 *first_code = UNEQ;
10949 *bypass_code = UNORDERED;
10950 break;
10951 case NE: /* NE - ZF=0 - fails on unordered */
10952 *first_code = LTGT;
10953 *second_code = UNORDERED;
10954 break;
10955 case UNGE: /* GEU - CF=0 - fails on unordered */
10956 *first_code = GE;
10957 *second_code = UNORDERED;
10958 break;
10959 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10960 *first_code = GT;
10961 *second_code = UNORDERED;
10962 break;
10963 default:
10964 gcc_unreachable ();
10965 }
10966 if (!TARGET_IEEE_FP)
10967 {
10968 *second_code = UNKNOWN;
10969 *bypass_code = UNKNOWN;
10970 }
10971 }
10972
10973 /* Return cost of comparison done fcom + arithmetics operations on AX.
10974 All following functions do use number of instructions as a cost metrics.
10975 In future this should be tweaked to compute bytes for optimize_size and
10976 take into account performance of various instructions on various CPUs. */
10977 static int
10978 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10979 {
10980 if (!TARGET_IEEE_FP)
10981 return 4;
10982 /* The cost of code output by ix86_expand_fp_compare. */
10983 switch (code)
10984 {
10985 case UNLE:
10986 case UNLT:
10987 case LTGT:
10988 case GT:
10989 case GE:
10990 case UNORDERED:
10991 case ORDERED:
10992 case UNEQ:
10993 return 4;
10994 break;
10995 case LT:
10996 case NE:
10997 case EQ:
10998 case UNGE:
10999 return 5;
11000 break;
11001 case LE:
11002 case UNGT:
11003 return 6;
11004 break;
11005 default:
11006 gcc_unreachable ();
11007 }
11008 }
11009
11010 /* Return cost of comparison done using fcomi operation.
11011 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11012 static int
11013 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11014 {
11015 enum rtx_code bypass_code, first_code, second_code;
11016 /* Return arbitrarily high cost when instruction is not supported - this
11017 prevents gcc from using it. */
11018 if (!TARGET_CMOVE)
11019 return 1024;
11020 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11021 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11022 }
11023
11024 /* Return cost of comparison done using sahf operation.
11025 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11026 static int
11027 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11028 {
11029 enum rtx_code bypass_code, first_code, second_code;
11030 /* Return arbitrarily high cost when instruction is not preferred - this
11031 avoids gcc from using it. */
11032 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11033 return 1024;
11034 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11035 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11036 }
11037
11038 /* Compute cost of the comparison done using any method.
11039 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11040 static int
11041 ix86_fp_comparison_cost (enum rtx_code code)
11042 {
11043 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11044 int min;
11045
11046 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11047 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11048
11049 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11050 if (min > sahf_cost)
11051 min = sahf_cost;
11052 if (min > fcomi_cost)
11053 min = fcomi_cost;
11054 return min;
11055 }
11056
11057 /* Return true if we should use an FCOMI instruction for this
11058 fp comparison. */
11059
11060 int
11061 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11062 {
11063 enum rtx_code swapped_code = swap_condition (code);
11064
11065 return ((ix86_fp_comparison_cost (code)
11066 == ix86_fp_comparison_fcomi_cost (code))
11067 || (ix86_fp_comparison_cost (swapped_code)
11068 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11069 }
11070
11071 /* Swap, force into registers, or otherwise massage the two operands
11072 to a fp comparison. The operands are updated in place; the new
11073 comparison code is returned. */
11074
11075 static enum rtx_code
11076 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11077 {
11078 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11079 rtx op0 = *pop0, op1 = *pop1;
11080 enum machine_mode op_mode = GET_MODE (op0);
11081 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11082
11083 /* All of the unordered compare instructions only work on registers.
11084 The same is true of the fcomi compare instructions. The XFmode
11085 compare instructions require registers except when comparing
11086 against zero or when converting operand 1 from fixed point to
11087 floating point. */
11088
11089 if (!is_sse
11090 && (fpcmp_mode == CCFPUmode
11091 || (op_mode == XFmode
11092 && ! (standard_80387_constant_p (op0) == 1
11093 || standard_80387_constant_p (op1) == 1)
11094 && GET_CODE (op1) != FLOAT)
11095 || ix86_use_fcomi_compare (code)))
11096 {
11097 op0 = force_reg (op_mode, op0);
11098 op1 = force_reg (op_mode, op1);
11099 }
11100 else
11101 {
11102 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11103 things around if they appear profitable, otherwise force op0
11104 into a register. */
11105
11106 if (standard_80387_constant_p (op0) == 0
11107 || (MEM_P (op0)
11108 && ! (standard_80387_constant_p (op1) == 0
11109 || MEM_P (op1))))
11110 {
11111 rtx tmp;
11112 tmp = op0, op0 = op1, op1 = tmp;
11113 code = swap_condition (code);
11114 }
11115
11116 if (!REG_P (op0))
11117 op0 = force_reg (op_mode, op0);
11118
11119 if (CONSTANT_P (op1))
11120 {
11121 int tmp = standard_80387_constant_p (op1);
11122 if (tmp == 0)
11123 op1 = validize_mem (force_const_mem (op_mode, op1));
11124 else if (tmp == 1)
11125 {
11126 if (TARGET_CMOVE)
11127 op1 = force_reg (op_mode, op1);
11128 }
11129 else
11130 op1 = force_reg (op_mode, op1);
11131 }
11132 }
11133
11134 /* Try to rearrange the comparison to make it cheaper. */
11135 if (ix86_fp_comparison_cost (code)
11136 > ix86_fp_comparison_cost (swap_condition (code))
11137 && (REG_P (op1) || !no_new_pseudos))
11138 {
11139 rtx tmp;
11140 tmp = op0, op0 = op1, op1 = tmp;
11141 code = swap_condition (code);
11142 if (!REG_P (op0))
11143 op0 = force_reg (op_mode, op0);
11144 }
11145
11146 *pop0 = op0;
11147 *pop1 = op1;
11148 return code;
11149 }
11150
11151 /* Convert comparison codes we use to represent FP comparison to integer
11152 code that will result in proper branch. Return UNKNOWN if no such code
11153 is available. */
11154
11155 enum rtx_code
11156 ix86_fp_compare_code_to_integer (enum rtx_code code)
11157 {
11158 switch (code)
11159 {
11160 case GT:
11161 return GTU;
11162 case GE:
11163 return GEU;
11164 case ORDERED:
11165 case UNORDERED:
11166 return code;
11167 break;
11168 case UNEQ:
11169 return EQ;
11170 break;
11171 case UNLT:
11172 return LTU;
11173 break;
11174 case UNLE:
11175 return LEU;
11176 break;
11177 case LTGT:
11178 return NE;
11179 break;
11180 default:
11181 return UNKNOWN;
11182 }
11183 }
11184
11185 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11186
11187 static rtx
11188 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11189 rtx *second_test, rtx *bypass_test)
11190 {
11191 enum machine_mode fpcmp_mode, intcmp_mode;
11192 rtx tmp, tmp2;
11193 int cost = ix86_fp_comparison_cost (code);
11194 enum rtx_code bypass_code, first_code, second_code;
11195
11196 fpcmp_mode = ix86_fp_compare_mode (code);
11197 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11198
11199 if (second_test)
11200 *second_test = NULL_RTX;
11201 if (bypass_test)
11202 *bypass_test = NULL_RTX;
11203
11204 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11205
11206 /* Do fcomi/sahf based test when profitable. */
11207 if ((TARGET_CMOVE || TARGET_SAHF)
11208 && (bypass_code == UNKNOWN || bypass_test)
11209 && (second_code == UNKNOWN || second_test)
11210 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11211 {
11212 if (TARGET_CMOVE)
11213 {
11214 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11215 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11216 tmp);
11217 emit_insn (tmp);
11218 }
11219 else
11220 {
11221 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11222 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11223 if (!scratch)
11224 scratch = gen_reg_rtx (HImode);
11225 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11226 emit_insn (gen_x86_sahf_1 (scratch));
11227 }
11228
11229 /* The FP codes work out to act like unsigned. */
11230 intcmp_mode = fpcmp_mode;
11231 code = first_code;
11232 if (bypass_code != UNKNOWN)
11233 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11234 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11235 const0_rtx);
11236 if (second_code != UNKNOWN)
11237 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11238 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11239 const0_rtx);
11240 }
11241 else
11242 {
11243 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11244 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11245 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11246 if (!scratch)
11247 scratch = gen_reg_rtx (HImode);
11248 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11249
11250 /* In the unordered case, we have to check C2 for NaN's, which
11251 doesn't happen to work out to anything nice combination-wise.
11252 So do some bit twiddling on the value we've got in AH to come
11253 up with an appropriate set of condition codes. */
11254
11255 intcmp_mode = CCNOmode;
11256 switch (code)
11257 {
11258 case GT:
11259 case UNGT:
11260 if (code == GT || !TARGET_IEEE_FP)
11261 {
11262 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11263 code = EQ;
11264 }
11265 else
11266 {
11267 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11268 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11269 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11270 intcmp_mode = CCmode;
11271 code = GEU;
11272 }
11273 break;
11274 case LT:
11275 case UNLT:
11276 if (code == LT && TARGET_IEEE_FP)
11277 {
11278 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11279 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11280 intcmp_mode = CCmode;
11281 code = EQ;
11282 }
11283 else
11284 {
11285 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11286 code = NE;
11287 }
11288 break;
11289 case GE:
11290 case UNGE:
11291 if (code == GE || !TARGET_IEEE_FP)
11292 {
11293 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11294 code = EQ;
11295 }
11296 else
11297 {
11298 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11299 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11300 GEN_INT (0x01)));
11301 code = NE;
11302 }
11303 break;
11304 case LE:
11305 case UNLE:
11306 if (code == LE && TARGET_IEEE_FP)
11307 {
11308 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11309 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11310 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11311 intcmp_mode = CCmode;
11312 code = LTU;
11313 }
11314 else
11315 {
11316 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11317 code = NE;
11318 }
11319 break;
11320 case EQ:
11321 case UNEQ:
11322 if (code == EQ && TARGET_IEEE_FP)
11323 {
11324 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11325 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11326 intcmp_mode = CCmode;
11327 code = EQ;
11328 }
11329 else
11330 {
11331 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11332 code = NE;
11333 break;
11334 }
11335 break;
11336 case NE:
11337 case LTGT:
11338 if (code == NE && TARGET_IEEE_FP)
11339 {
11340 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11341 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11342 GEN_INT (0x40)));
11343 code = NE;
11344 }
11345 else
11346 {
11347 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11348 code = EQ;
11349 }
11350 break;
11351
11352 case UNORDERED:
11353 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11354 code = NE;
11355 break;
11356 case ORDERED:
11357 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11358 code = EQ;
11359 break;
11360
11361 default:
11362 gcc_unreachable ();
11363 }
11364 }
11365
11366 /* Return the test that should be put into the flags user, i.e.
11367 the bcc, scc, or cmov instruction. */
11368 return gen_rtx_fmt_ee (code, VOIDmode,
11369 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11370 const0_rtx);
11371 }
11372
11373 rtx
11374 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11375 {
11376 rtx op0, op1, ret;
11377 op0 = ix86_compare_op0;
11378 op1 = ix86_compare_op1;
11379
11380 if (second_test)
11381 *second_test = NULL_RTX;
11382 if (bypass_test)
11383 *bypass_test = NULL_RTX;
11384
11385 if (ix86_compare_emitted)
11386 {
11387 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11388 ix86_compare_emitted = NULL_RTX;
11389 }
11390 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11391 {
11392 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11393 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11394 second_test, bypass_test);
11395 }
11396 else
11397 ret = ix86_expand_int_compare (code, op0, op1);
11398
11399 return ret;
11400 }
11401
11402 /* Return true if the CODE will result in nontrivial jump sequence. */
11403 bool
11404 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11405 {
11406 enum rtx_code bypass_code, first_code, second_code;
11407 if (!TARGET_CMOVE)
11408 return true;
11409 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11410 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11411 }
11412
11413 void
11414 ix86_expand_branch (enum rtx_code code, rtx label)
11415 {
11416 rtx tmp;
11417
11418 /* If we have emitted a compare insn, go straight to simple.
11419 ix86_expand_compare won't emit anything if ix86_compare_emitted
11420 is non NULL. */
11421 if (ix86_compare_emitted)
11422 goto simple;
11423
11424 switch (GET_MODE (ix86_compare_op0))
11425 {
11426 case QImode:
11427 case HImode:
11428 case SImode:
11429 simple:
11430 tmp = ix86_expand_compare (code, NULL, NULL);
11431 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11432 gen_rtx_LABEL_REF (VOIDmode, label),
11433 pc_rtx);
11434 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11435 return;
11436
11437 case SFmode:
11438 case DFmode:
11439 case XFmode:
11440 {
11441 rtvec vec;
11442 int use_fcomi;
11443 enum rtx_code bypass_code, first_code, second_code;
11444
11445 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11446 &ix86_compare_op1);
11447
11448 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11449
11450 /* Check whether we will use the natural sequence with one jump. If
11451 so, we can expand jump early. Otherwise delay expansion by
11452 creating compound insn to not confuse optimizers. */
11453 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11454 && TARGET_CMOVE)
11455 {
11456 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11457 gen_rtx_LABEL_REF (VOIDmode, label),
11458 pc_rtx, NULL_RTX, NULL_RTX);
11459 }
11460 else
11461 {
11462 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11463 ix86_compare_op0, ix86_compare_op1);
11464 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11465 gen_rtx_LABEL_REF (VOIDmode, label),
11466 pc_rtx);
11467 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11468
11469 use_fcomi = ix86_use_fcomi_compare (code);
11470 vec = rtvec_alloc (3 + !use_fcomi);
11471 RTVEC_ELT (vec, 0) = tmp;
11472 RTVEC_ELT (vec, 1)
11473 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11474 RTVEC_ELT (vec, 2)
11475 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11476 if (! use_fcomi)
11477 RTVEC_ELT (vec, 3)
11478 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11479
11480 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11481 }
11482 return;
11483 }
11484
11485 case DImode:
11486 if (TARGET_64BIT)
11487 goto simple;
11488 case TImode:
11489 /* Expand DImode branch into multiple compare+branch. */
11490 {
11491 rtx lo[2], hi[2], label2;
11492 enum rtx_code code1, code2, code3;
11493 enum machine_mode submode;
11494
11495 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11496 {
11497 tmp = ix86_compare_op0;
11498 ix86_compare_op0 = ix86_compare_op1;
11499 ix86_compare_op1 = tmp;
11500 code = swap_condition (code);
11501 }
11502 if (GET_MODE (ix86_compare_op0) == DImode)
11503 {
11504 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11505 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11506 submode = SImode;
11507 }
11508 else
11509 {
11510 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11511 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11512 submode = DImode;
11513 }
11514
11515 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11516 avoid two branches. This costs one extra insn, so disable when
11517 optimizing for size. */
11518
11519 if ((code == EQ || code == NE)
11520 && (!optimize_size
11521 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11522 {
11523 rtx xor0, xor1;
11524
11525 xor1 = hi[0];
11526 if (hi[1] != const0_rtx)
11527 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11528 NULL_RTX, 0, OPTAB_WIDEN);
11529
11530 xor0 = lo[0];
11531 if (lo[1] != const0_rtx)
11532 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11533 NULL_RTX, 0, OPTAB_WIDEN);
11534
11535 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11536 NULL_RTX, 0, OPTAB_WIDEN);
11537
11538 ix86_compare_op0 = tmp;
11539 ix86_compare_op1 = const0_rtx;
11540 ix86_expand_branch (code, label);
11541 return;
11542 }
11543
11544 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11545 op1 is a constant and the low word is zero, then we can just
11546 examine the high word. */
11547
11548 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11549 switch (code)
11550 {
11551 case LT: case LTU: case GE: case GEU:
11552 ix86_compare_op0 = hi[0];
11553 ix86_compare_op1 = hi[1];
11554 ix86_expand_branch (code, label);
11555 return;
11556 default:
11557 break;
11558 }
11559
11560 /* Otherwise, we need two or three jumps. */
11561
11562 label2 = gen_label_rtx ();
11563
11564 code1 = code;
11565 code2 = swap_condition (code);
11566 code3 = unsigned_condition (code);
11567
11568 switch (code)
11569 {
11570 case LT: case GT: case LTU: case GTU:
11571 break;
11572
11573 case LE: code1 = LT; code2 = GT; break;
11574 case GE: code1 = GT; code2 = LT; break;
11575 case LEU: code1 = LTU; code2 = GTU; break;
11576 case GEU: code1 = GTU; code2 = LTU; break;
11577
11578 case EQ: code1 = UNKNOWN; code2 = NE; break;
11579 case NE: code2 = UNKNOWN; break;
11580
11581 default:
11582 gcc_unreachable ();
11583 }
11584
11585 /*
11586 * a < b =>
11587 * if (hi(a) < hi(b)) goto true;
11588 * if (hi(a) > hi(b)) goto false;
11589 * if (lo(a) < lo(b)) goto true;
11590 * false:
11591 */
11592
11593 ix86_compare_op0 = hi[0];
11594 ix86_compare_op1 = hi[1];
11595
11596 if (code1 != UNKNOWN)
11597 ix86_expand_branch (code1, label);
11598 if (code2 != UNKNOWN)
11599 ix86_expand_branch (code2, label2);
11600
11601 ix86_compare_op0 = lo[0];
11602 ix86_compare_op1 = lo[1];
11603 ix86_expand_branch (code3, label);
11604
11605 if (code2 != UNKNOWN)
11606 emit_label (label2);
11607 return;
11608 }
11609
11610 default:
11611 gcc_unreachable ();
11612 }
11613 }
11614
11615 /* Split branch based on floating point condition. */
11616 void
11617 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11618 rtx target1, rtx target2, rtx tmp, rtx pushed)
11619 {
11620 rtx second, bypass;
11621 rtx label = NULL_RTX;
11622 rtx condition;
11623 int bypass_probability = -1, second_probability = -1, probability = -1;
11624 rtx i;
11625
11626 if (target2 != pc_rtx)
11627 {
11628 rtx tmp = target2;
11629 code = reverse_condition_maybe_unordered (code);
11630 target2 = target1;
11631 target1 = tmp;
11632 }
11633
11634 condition = ix86_expand_fp_compare (code, op1, op2,
11635 tmp, &second, &bypass);
11636
11637 /* Remove pushed operand from stack. */
11638 if (pushed)
11639 ix86_free_from_memory (GET_MODE (pushed));
11640
11641 if (split_branch_probability >= 0)
11642 {
11643 /* Distribute the probabilities across the jumps.
11644 Assume the BYPASS and SECOND to be always test
11645 for UNORDERED. */
11646 probability = split_branch_probability;
11647
11648 /* Value of 1 is low enough to make no need for probability
11649 to be updated. Later we may run some experiments and see
11650 if unordered values are more frequent in practice. */
11651 if (bypass)
11652 bypass_probability = 1;
11653 if (second)
11654 second_probability = 1;
11655 }
11656 if (bypass != NULL_RTX)
11657 {
11658 label = gen_label_rtx ();
11659 i = emit_jump_insn (gen_rtx_SET
11660 (VOIDmode, pc_rtx,
11661 gen_rtx_IF_THEN_ELSE (VOIDmode,
11662 bypass,
11663 gen_rtx_LABEL_REF (VOIDmode,
11664 label),
11665 pc_rtx)));
11666 if (bypass_probability >= 0)
11667 REG_NOTES (i)
11668 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11669 GEN_INT (bypass_probability),
11670 REG_NOTES (i));
11671 }
11672 i = emit_jump_insn (gen_rtx_SET
11673 (VOIDmode, pc_rtx,
11674 gen_rtx_IF_THEN_ELSE (VOIDmode,
11675 condition, target1, target2)));
11676 if (probability >= 0)
11677 REG_NOTES (i)
11678 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11679 GEN_INT (probability),
11680 REG_NOTES (i));
11681 if (second != NULL_RTX)
11682 {
11683 i = emit_jump_insn (gen_rtx_SET
11684 (VOIDmode, pc_rtx,
11685 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11686 target2)));
11687 if (second_probability >= 0)
11688 REG_NOTES (i)
11689 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11690 GEN_INT (second_probability),
11691 REG_NOTES (i));
11692 }
11693 if (label != NULL_RTX)
11694 emit_label (label);
11695 }
11696
11697 int
11698 ix86_expand_setcc (enum rtx_code code, rtx dest)
11699 {
11700 rtx ret, tmp, tmpreg, equiv;
11701 rtx second_test, bypass_test;
11702
11703 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11704 return 0; /* FAIL */
11705
11706 gcc_assert (GET_MODE (dest) == QImode);
11707
11708 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11709 PUT_MODE (ret, QImode);
11710
11711 tmp = dest;
11712 tmpreg = dest;
11713
11714 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11715 if (bypass_test || second_test)
11716 {
11717 rtx test = second_test;
11718 int bypass = 0;
11719 rtx tmp2 = gen_reg_rtx (QImode);
11720 if (bypass_test)
11721 {
11722 gcc_assert (!second_test);
11723 test = bypass_test;
11724 bypass = 1;
11725 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11726 }
11727 PUT_MODE (test, QImode);
11728 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11729
11730 if (bypass)
11731 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11732 else
11733 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11734 }
11735
11736 /* Attach a REG_EQUAL note describing the comparison result. */
11737 if (ix86_compare_op0 && ix86_compare_op1)
11738 {
11739 equiv = simplify_gen_relational (code, QImode,
11740 GET_MODE (ix86_compare_op0),
11741 ix86_compare_op0, ix86_compare_op1);
11742 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11743 }
11744
11745 return 1; /* DONE */
11746 }
11747
11748 /* Expand comparison setting or clearing carry flag. Return true when
11749 successful and set pop for the operation. */
11750 static bool
11751 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11752 {
11753 enum machine_mode mode =
11754 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11755
11756 /* Do not handle DImode compares that go through special path.
11757 Also we can't deal with FP compares yet. This is possible to add. */
11758 if (mode == (TARGET_64BIT ? TImode : DImode))
11759 return false;
11760
11761 if (SCALAR_FLOAT_MODE_P (mode))
11762 {
11763 rtx second_test = NULL, bypass_test = NULL;
11764 rtx compare_op, compare_seq;
11765
11766 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11767
11768 /* Shortcut: following common codes never translate
11769 into carry flag compares. */
11770 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11771 || code == ORDERED || code == UNORDERED)
11772 return false;
11773
11774 /* These comparisons require zero flag; swap operands so they won't. */
11775 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11776 && !TARGET_IEEE_FP)
11777 {
11778 rtx tmp = op0;
11779 op0 = op1;
11780 op1 = tmp;
11781 code = swap_condition (code);
11782 }
11783
11784 /* Try to expand the comparison and verify that we end up with carry flag
11785 based comparison. This is fails to be true only when we decide to expand
11786 comparison using arithmetic that is not too common scenario. */
11787 start_sequence ();
11788 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11789 &second_test, &bypass_test);
11790 compare_seq = get_insns ();
11791 end_sequence ();
11792
11793 if (second_test || bypass_test)
11794 return false;
11795 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11796 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11797 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11798 else
11799 code = GET_CODE (compare_op);
11800 if (code != LTU && code != GEU)
11801 return false;
11802 emit_insn (compare_seq);
11803 *pop = compare_op;
11804 return true;
11805 }
11806 if (!INTEGRAL_MODE_P (mode))
11807 return false;
11808 switch (code)
11809 {
11810 case LTU:
11811 case GEU:
11812 break;
11813
11814 /* Convert a==0 into (unsigned)a<1. */
11815 case EQ:
11816 case NE:
11817 if (op1 != const0_rtx)
11818 return false;
11819 op1 = const1_rtx;
11820 code = (code == EQ ? LTU : GEU);
11821 break;
11822
11823 /* Convert a>b into b<a or a>=b-1. */
11824 case GTU:
11825 case LEU:
11826 if (CONST_INT_P (op1))
11827 {
11828 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11829 /* Bail out on overflow. We still can swap operands but that
11830 would force loading of the constant into register. */
11831 if (op1 == const0_rtx
11832 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11833 return false;
11834 code = (code == GTU ? GEU : LTU);
11835 }
11836 else
11837 {
11838 rtx tmp = op1;
11839 op1 = op0;
11840 op0 = tmp;
11841 code = (code == GTU ? LTU : GEU);
11842 }
11843 break;
11844
11845 /* Convert a>=0 into (unsigned)a<0x80000000. */
11846 case LT:
11847 case GE:
11848 if (mode == DImode || op1 != const0_rtx)
11849 return false;
11850 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11851 code = (code == LT ? GEU : LTU);
11852 break;
11853 case LE:
11854 case GT:
11855 if (mode == DImode || op1 != constm1_rtx)
11856 return false;
11857 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11858 code = (code == LE ? GEU : LTU);
11859 break;
11860
11861 default:
11862 return false;
11863 }
11864 /* Swapping operands may cause constant to appear as first operand. */
11865 if (!nonimmediate_operand (op0, VOIDmode))
11866 {
11867 if (no_new_pseudos)
11868 return false;
11869 op0 = force_reg (mode, op0);
11870 }
11871 ix86_compare_op0 = op0;
11872 ix86_compare_op1 = op1;
11873 *pop = ix86_expand_compare (code, NULL, NULL);
11874 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11875 return true;
11876 }
11877
11878 int
11879 ix86_expand_int_movcc (rtx operands[])
11880 {
11881 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11882 rtx compare_seq, compare_op;
11883 rtx second_test, bypass_test;
11884 enum machine_mode mode = GET_MODE (operands[0]);
11885 bool sign_bit_compare_p = false;;
11886
11887 start_sequence ();
11888 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11889 compare_seq = get_insns ();
11890 end_sequence ();
11891
11892 compare_code = GET_CODE (compare_op);
11893
11894 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11895 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11896 sign_bit_compare_p = true;
11897
11898 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11899 HImode insns, we'd be swallowed in word prefix ops. */
11900
11901 if ((mode != HImode || TARGET_FAST_PREFIX)
11902 && (mode != (TARGET_64BIT ? TImode : DImode))
11903 && CONST_INT_P (operands[2])
11904 && CONST_INT_P (operands[3]))
11905 {
11906 rtx out = operands[0];
11907 HOST_WIDE_INT ct = INTVAL (operands[2]);
11908 HOST_WIDE_INT cf = INTVAL (operands[3]);
11909 HOST_WIDE_INT diff;
11910
11911 diff = ct - cf;
11912 /* Sign bit compares are better done using shifts than we do by using
11913 sbb. */
11914 if (sign_bit_compare_p
11915 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11916 ix86_compare_op1, &compare_op))
11917 {
11918 /* Detect overlap between destination and compare sources. */
11919 rtx tmp = out;
11920
11921 if (!sign_bit_compare_p)
11922 {
11923 bool fpcmp = false;
11924
11925 compare_code = GET_CODE (compare_op);
11926
11927 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11928 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11929 {
11930 fpcmp = true;
11931 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11932 }
11933
11934 /* To simplify rest of code, restrict to the GEU case. */
11935 if (compare_code == LTU)
11936 {
11937 HOST_WIDE_INT tmp = ct;
11938 ct = cf;
11939 cf = tmp;
11940 compare_code = reverse_condition (compare_code);
11941 code = reverse_condition (code);
11942 }
11943 else
11944 {
11945 if (fpcmp)
11946 PUT_CODE (compare_op,
11947 reverse_condition_maybe_unordered
11948 (GET_CODE (compare_op)));
11949 else
11950 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11951 }
11952 diff = ct - cf;
11953
11954 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11955 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11956 tmp = gen_reg_rtx (mode);
11957
11958 if (mode == DImode)
11959 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11960 else
11961 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11962 }
11963 else
11964 {
11965 if (code == GT || code == GE)
11966 code = reverse_condition (code);
11967 else
11968 {
11969 HOST_WIDE_INT tmp = ct;
11970 ct = cf;
11971 cf = tmp;
11972 diff = ct - cf;
11973 }
11974 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11975 ix86_compare_op1, VOIDmode, 0, -1);
11976 }
11977
11978 if (diff == 1)
11979 {
11980 /*
11981 * cmpl op0,op1
11982 * sbbl dest,dest
11983 * [addl dest, ct]
11984 *
11985 * Size 5 - 8.
11986 */
11987 if (ct)
11988 tmp = expand_simple_binop (mode, PLUS,
11989 tmp, GEN_INT (ct),
11990 copy_rtx (tmp), 1, OPTAB_DIRECT);
11991 }
11992 else if (cf == -1)
11993 {
11994 /*
11995 * cmpl op0,op1
11996 * sbbl dest,dest
11997 * orl $ct, dest
11998 *
11999 * Size 8.
12000 */
12001 tmp = expand_simple_binop (mode, IOR,
12002 tmp, GEN_INT (ct),
12003 copy_rtx (tmp), 1, OPTAB_DIRECT);
12004 }
12005 else if (diff == -1 && ct)
12006 {
12007 /*
12008 * cmpl op0,op1
12009 * sbbl dest,dest
12010 * notl dest
12011 * [addl dest, cf]
12012 *
12013 * Size 8 - 11.
12014 */
12015 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12016 if (cf)
12017 tmp = expand_simple_binop (mode, PLUS,
12018 copy_rtx (tmp), GEN_INT (cf),
12019 copy_rtx (tmp), 1, OPTAB_DIRECT);
12020 }
12021 else
12022 {
12023 /*
12024 * cmpl op0,op1
12025 * sbbl dest,dest
12026 * [notl dest]
12027 * andl cf - ct, dest
12028 * [addl dest, ct]
12029 *
12030 * Size 8 - 11.
12031 */
12032
12033 if (cf == 0)
12034 {
12035 cf = ct;
12036 ct = 0;
12037 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12038 }
12039
12040 tmp = expand_simple_binop (mode, AND,
12041 copy_rtx (tmp),
12042 gen_int_mode (cf - ct, mode),
12043 copy_rtx (tmp), 1, OPTAB_DIRECT);
12044 if (ct)
12045 tmp = expand_simple_binop (mode, PLUS,
12046 copy_rtx (tmp), GEN_INT (ct),
12047 copy_rtx (tmp), 1, OPTAB_DIRECT);
12048 }
12049
12050 if (!rtx_equal_p (tmp, out))
12051 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12052
12053 return 1; /* DONE */
12054 }
12055
12056 if (diff < 0)
12057 {
12058 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12059
12060 HOST_WIDE_INT tmp;
12061 tmp = ct, ct = cf, cf = tmp;
12062 diff = -diff;
12063
12064 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12065 {
12066 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12067
12068 /* We may be reversing unordered compare to normal compare, that
12069 is not valid in general (we may convert non-trapping condition
12070 to trapping one), however on i386 we currently emit all
12071 comparisons unordered. */
12072 compare_code = reverse_condition_maybe_unordered (compare_code);
12073 code = reverse_condition_maybe_unordered (code);
12074 }
12075 else
12076 {
12077 compare_code = reverse_condition (compare_code);
12078 code = reverse_condition (code);
12079 }
12080 }
12081
12082 compare_code = UNKNOWN;
12083 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12084 && CONST_INT_P (ix86_compare_op1))
12085 {
12086 if (ix86_compare_op1 == const0_rtx
12087 && (code == LT || code == GE))
12088 compare_code = code;
12089 else if (ix86_compare_op1 == constm1_rtx)
12090 {
12091 if (code == LE)
12092 compare_code = LT;
12093 else if (code == GT)
12094 compare_code = GE;
12095 }
12096 }
12097
12098 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12099 if (compare_code != UNKNOWN
12100 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12101 && (cf == -1 || ct == -1))
12102 {
12103 /* If lea code below could be used, only optimize
12104 if it results in a 2 insn sequence. */
12105
12106 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12107 || diff == 3 || diff == 5 || diff == 9)
12108 || (compare_code == LT && ct == -1)
12109 || (compare_code == GE && cf == -1))
12110 {
12111 /*
12112 * notl op1 (if necessary)
12113 * sarl $31, op1
12114 * orl cf, op1
12115 */
12116 if (ct != -1)
12117 {
12118 cf = ct;
12119 ct = -1;
12120 code = reverse_condition (code);
12121 }
12122
12123 out = emit_store_flag (out, code, ix86_compare_op0,
12124 ix86_compare_op1, VOIDmode, 0, -1);
12125
12126 out = expand_simple_binop (mode, IOR,
12127 out, GEN_INT (cf),
12128 out, 1, OPTAB_DIRECT);
12129 if (out != operands[0])
12130 emit_move_insn (operands[0], out);
12131
12132 return 1; /* DONE */
12133 }
12134 }
12135
12136
12137 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12138 || diff == 3 || diff == 5 || diff == 9)
12139 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12140 && (mode != DImode
12141 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12142 {
12143 /*
12144 * xorl dest,dest
12145 * cmpl op1,op2
12146 * setcc dest
12147 * lea cf(dest*(ct-cf)),dest
12148 *
12149 * Size 14.
12150 *
12151 * This also catches the degenerate setcc-only case.
12152 */
12153
12154 rtx tmp;
12155 int nops;
12156
12157 out = emit_store_flag (out, code, ix86_compare_op0,
12158 ix86_compare_op1, VOIDmode, 0, 1);
12159
12160 nops = 0;
12161 /* On x86_64 the lea instruction operates on Pmode, so we need
12162 to get arithmetics done in proper mode to match. */
12163 if (diff == 1)
12164 tmp = copy_rtx (out);
12165 else
12166 {
12167 rtx out1;
12168 out1 = copy_rtx (out);
12169 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12170 nops++;
12171 if (diff & 1)
12172 {
12173 tmp = gen_rtx_PLUS (mode, tmp, out1);
12174 nops++;
12175 }
12176 }
12177 if (cf != 0)
12178 {
12179 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12180 nops++;
12181 }
12182 if (!rtx_equal_p (tmp, out))
12183 {
12184 if (nops == 1)
12185 out = force_operand (tmp, copy_rtx (out));
12186 else
12187 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12188 }
12189 if (!rtx_equal_p (out, operands[0]))
12190 emit_move_insn (operands[0], copy_rtx (out));
12191
12192 return 1; /* DONE */
12193 }
12194
12195 /*
12196 * General case: Jumpful:
12197 * xorl dest,dest cmpl op1, op2
12198 * cmpl op1, op2 movl ct, dest
12199 * setcc dest jcc 1f
12200 * decl dest movl cf, dest
12201 * andl (cf-ct),dest 1:
12202 * addl ct,dest
12203 *
12204 * Size 20. Size 14.
12205 *
12206 * This is reasonably steep, but branch mispredict costs are
12207 * high on modern cpus, so consider failing only if optimizing
12208 * for space.
12209 */
12210
12211 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12212 && BRANCH_COST >= 2)
12213 {
12214 if (cf == 0)
12215 {
12216 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12217
12218 cf = ct;
12219 ct = 0;
12220
12221 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12222 {
12223 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12224
12225 /* We may be reversing unordered compare to normal compare,
12226 that is not valid in general (we may convert non-trapping
12227 condition to trapping one), however on i386 we currently
12228 emit all comparisons unordered. */
12229 code = reverse_condition_maybe_unordered (code);
12230 }
12231 else
12232 {
12233 code = reverse_condition (code);
12234 if (compare_code != UNKNOWN)
12235 compare_code = reverse_condition (compare_code);
12236 }
12237 }
12238
12239 if (compare_code != UNKNOWN)
12240 {
12241 /* notl op1 (if needed)
12242 sarl $31, op1
12243 andl (cf-ct), op1
12244 addl ct, op1
12245
12246 For x < 0 (resp. x <= -1) there will be no notl,
12247 so if possible swap the constants to get rid of the
12248 complement.
12249 True/false will be -1/0 while code below (store flag
12250 followed by decrement) is 0/-1, so the constants need
12251 to be exchanged once more. */
12252
12253 if (compare_code == GE || !cf)
12254 {
12255 code = reverse_condition (code);
12256 compare_code = LT;
12257 }
12258 else
12259 {
12260 HOST_WIDE_INT tmp = cf;
12261 cf = ct;
12262 ct = tmp;
12263 }
12264
12265 out = emit_store_flag (out, code, ix86_compare_op0,
12266 ix86_compare_op1, VOIDmode, 0, -1);
12267 }
12268 else
12269 {
12270 out = emit_store_flag (out, code, ix86_compare_op0,
12271 ix86_compare_op1, VOIDmode, 0, 1);
12272
12273 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12274 copy_rtx (out), 1, OPTAB_DIRECT);
12275 }
12276
12277 out = expand_simple_binop (mode, AND, copy_rtx (out),
12278 gen_int_mode (cf - ct, mode),
12279 copy_rtx (out), 1, OPTAB_DIRECT);
12280 if (ct)
12281 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12282 copy_rtx (out), 1, OPTAB_DIRECT);
12283 if (!rtx_equal_p (out, operands[0]))
12284 emit_move_insn (operands[0], copy_rtx (out));
12285
12286 return 1; /* DONE */
12287 }
12288 }
12289
12290 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12291 {
12292 /* Try a few things more with specific constants and a variable. */
12293
12294 optab op;
12295 rtx var, orig_out, out, tmp;
12296
12297 if (BRANCH_COST <= 2)
12298 return 0; /* FAIL */
12299
12300 /* If one of the two operands is an interesting constant, load a
12301 constant with the above and mask it in with a logical operation. */
12302
12303 if (CONST_INT_P (operands[2]))
12304 {
12305 var = operands[3];
12306 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12307 operands[3] = constm1_rtx, op = and_optab;
12308 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12309 operands[3] = const0_rtx, op = ior_optab;
12310 else
12311 return 0; /* FAIL */
12312 }
12313 else if (CONST_INT_P (operands[3]))
12314 {
12315 var = operands[2];
12316 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12317 operands[2] = constm1_rtx, op = and_optab;
12318 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12319 operands[2] = const0_rtx, op = ior_optab;
12320 else
12321 return 0; /* FAIL */
12322 }
12323 else
12324 return 0; /* FAIL */
12325
12326 orig_out = operands[0];
12327 tmp = gen_reg_rtx (mode);
12328 operands[0] = tmp;
12329
12330 /* Recurse to get the constant loaded. */
12331 if (ix86_expand_int_movcc (operands) == 0)
12332 return 0; /* FAIL */
12333
12334 /* Mask in the interesting variable. */
12335 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12336 OPTAB_WIDEN);
12337 if (!rtx_equal_p (out, orig_out))
12338 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12339
12340 return 1; /* DONE */
12341 }
12342
12343 /*
12344 * For comparison with above,
12345 *
12346 * movl cf,dest
12347 * movl ct,tmp
12348 * cmpl op1,op2
12349 * cmovcc tmp,dest
12350 *
12351 * Size 15.
12352 */
12353
12354 if (! nonimmediate_operand (operands[2], mode))
12355 operands[2] = force_reg (mode, operands[2]);
12356 if (! nonimmediate_operand (operands[3], mode))
12357 operands[3] = force_reg (mode, operands[3]);
12358
12359 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12360 {
12361 rtx tmp = gen_reg_rtx (mode);
12362 emit_move_insn (tmp, operands[3]);
12363 operands[3] = tmp;
12364 }
12365 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12366 {
12367 rtx tmp = gen_reg_rtx (mode);
12368 emit_move_insn (tmp, operands[2]);
12369 operands[2] = tmp;
12370 }
12371
12372 if (! register_operand (operands[2], VOIDmode)
12373 && (mode == QImode
12374 || ! register_operand (operands[3], VOIDmode)))
12375 operands[2] = force_reg (mode, operands[2]);
12376
12377 if (mode == QImode
12378 && ! register_operand (operands[3], VOIDmode))
12379 operands[3] = force_reg (mode, operands[3]);
12380
12381 emit_insn (compare_seq);
12382 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12383 gen_rtx_IF_THEN_ELSE (mode,
12384 compare_op, operands[2],
12385 operands[3])));
12386 if (bypass_test)
12387 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12388 gen_rtx_IF_THEN_ELSE (mode,
12389 bypass_test,
12390 copy_rtx (operands[3]),
12391 copy_rtx (operands[0]))));
12392 if (second_test)
12393 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12394 gen_rtx_IF_THEN_ELSE (mode,
12395 second_test,
12396 copy_rtx (operands[2]),
12397 copy_rtx (operands[0]))));
12398
12399 return 1; /* DONE */
12400 }
12401
12402 /* Swap, force into registers, or otherwise massage the two operands
12403 to an sse comparison with a mask result. Thus we differ a bit from
12404 ix86_prepare_fp_compare_args which expects to produce a flags result.
12405
12406 The DEST operand exists to help determine whether to commute commutative
12407 operators. The POP0/POP1 operands are updated in place. The new
12408 comparison code is returned, or UNKNOWN if not implementable. */
12409
12410 static enum rtx_code
12411 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12412 rtx *pop0, rtx *pop1)
12413 {
12414 rtx tmp;
12415
12416 switch (code)
12417 {
12418 case LTGT:
12419 case UNEQ:
12420 /* We have no LTGT as an operator. We could implement it with
12421 NE & ORDERED, but this requires an extra temporary. It's
12422 not clear that it's worth it. */
12423 return UNKNOWN;
12424
12425 case LT:
12426 case LE:
12427 case UNGT:
12428 case UNGE:
12429 /* These are supported directly. */
12430 break;
12431
12432 case EQ:
12433 case NE:
12434 case UNORDERED:
12435 case ORDERED:
12436 /* For commutative operators, try to canonicalize the destination
12437 operand to be first in the comparison - this helps reload to
12438 avoid extra moves. */
12439 if (!dest || !rtx_equal_p (dest, *pop1))
12440 break;
12441 /* FALLTHRU */
12442
12443 case GE:
12444 case GT:
12445 case UNLE:
12446 case UNLT:
12447 /* These are not supported directly. Swap the comparison operands
12448 to transform into something that is supported. */
12449 tmp = *pop0;
12450 *pop0 = *pop1;
12451 *pop1 = tmp;
12452 code = swap_condition (code);
12453 break;
12454
12455 default:
12456 gcc_unreachable ();
12457 }
12458
12459 return code;
12460 }
12461
12462 /* Detect conditional moves that exactly match min/max operational
12463 semantics. Note that this is IEEE safe, as long as we don't
12464 interchange the operands.
12465
12466 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12467 and TRUE if the operation is successful and instructions are emitted. */
12468
12469 static bool
12470 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12471 rtx cmp_op1, rtx if_true, rtx if_false)
12472 {
12473 enum machine_mode mode;
12474 bool is_min;
12475 rtx tmp;
12476
12477 if (code == LT)
12478 ;
12479 else if (code == UNGE)
12480 {
12481 tmp = if_true;
12482 if_true = if_false;
12483 if_false = tmp;
12484 }
12485 else
12486 return false;
12487
12488 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12489 is_min = true;
12490 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12491 is_min = false;
12492 else
12493 return false;
12494
12495 mode = GET_MODE (dest);
12496
12497 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12498 but MODE may be a vector mode and thus not appropriate. */
12499 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12500 {
12501 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12502 rtvec v;
12503
12504 if_true = force_reg (mode, if_true);
12505 v = gen_rtvec (2, if_true, if_false);
12506 tmp = gen_rtx_UNSPEC (mode, v, u);
12507 }
12508 else
12509 {
12510 code = is_min ? SMIN : SMAX;
12511 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12512 }
12513
12514 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12515 return true;
12516 }
12517
12518 /* Expand an sse vector comparison. Return the register with the result. */
12519
12520 static rtx
12521 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12522 rtx op_true, rtx op_false)
12523 {
12524 enum machine_mode mode = GET_MODE (dest);
12525 rtx x;
12526
12527 cmp_op0 = force_reg (mode, cmp_op0);
12528 if (!nonimmediate_operand (cmp_op1, mode))
12529 cmp_op1 = force_reg (mode, cmp_op1);
12530
12531 if (optimize
12532 || reg_overlap_mentioned_p (dest, op_true)
12533 || reg_overlap_mentioned_p (dest, op_false))
12534 dest = gen_reg_rtx (mode);
12535
12536 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12537 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12538
12539 return dest;
12540 }
12541
12542 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12543 operations. This is used for both scalar and vector conditional moves. */
12544
12545 static void
12546 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12547 {
12548 enum machine_mode mode = GET_MODE (dest);
12549 rtx t2, t3, x;
12550
12551 if (op_false == CONST0_RTX (mode))
12552 {
12553 op_true = force_reg (mode, op_true);
12554 x = gen_rtx_AND (mode, cmp, op_true);
12555 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12556 }
12557 else if (op_true == CONST0_RTX (mode))
12558 {
12559 op_false = force_reg (mode, op_false);
12560 x = gen_rtx_NOT (mode, cmp);
12561 x = gen_rtx_AND (mode, x, op_false);
12562 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12563 }
12564 else
12565 {
12566 op_true = force_reg (mode, op_true);
12567 op_false = force_reg (mode, op_false);
12568
12569 t2 = gen_reg_rtx (mode);
12570 if (optimize)
12571 t3 = gen_reg_rtx (mode);
12572 else
12573 t3 = dest;
12574
12575 x = gen_rtx_AND (mode, op_true, cmp);
12576 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12577
12578 x = gen_rtx_NOT (mode, cmp);
12579 x = gen_rtx_AND (mode, x, op_false);
12580 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12581
12582 x = gen_rtx_IOR (mode, t3, t2);
12583 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12584 }
12585 }
12586
12587 /* Expand a floating-point conditional move. Return true if successful. */
12588
12589 int
12590 ix86_expand_fp_movcc (rtx operands[])
12591 {
12592 enum machine_mode mode = GET_MODE (operands[0]);
12593 enum rtx_code code = GET_CODE (operands[1]);
12594 rtx tmp, compare_op, second_test, bypass_test;
12595
12596 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12597 {
12598 enum machine_mode cmode;
12599
12600 /* Since we've no cmove for sse registers, don't force bad register
12601 allocation just to gain access to it. Deny movcc when the
12602 comparison mode doesn't match the move mode. */
12603 cmode = GET_MODE (ix86_compare_op0);
12604 if (cmode == VOIDmode)
12605 cmode = GET_MODE (ix86_compare_op1);
12606 if (cmode != mode)
12607 return 0;
12608
12609 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12610 &ix86_compare_op0,
12611 &ix86_compare_op1);
12612 if (code == UNKNOWN)
12613 return 0;
12614
12615 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12616 ix86_compare_op1, operands[2],
12617 operands[3]))
12618 return 1;
12619
12620 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12621 ix86_compare_op1, operands[2], operands[3]);
12622 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12623 return 1;
12624 }
12625
12626 /* The floating point conditional move instructions don't directly
12627 support conditions resulting from a signed integer comparison. */
12628
12629 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12630
12631 /* The floating point conditional move instructions don't directly
12632 support signed integer comparisons. */
12633
12634 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12635 {
12636 gcc_assert (!second_test && !bypass_test);
12637 tmp = gen_reg_rtx (QImode);
12638 ix86_expand_setcc (code, tmp);
12639 code = NE;
12640 ix86_compare_op0 = tmp;
12641 ix86_compare_op1 = const0_rtx;
12642 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12643 }
12644 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12645 {
12646 tmp = gen_reg_rtx (mode);
12647 emit_move_insn (tmp, operands[3]);
12648 operands[3] = tmp;
12649 }
12650 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12651 {
12652 tmp = gen_reg_rtx (mode);
12653 emit_move_insn (tmp, operands[2]);
12654 operands[2] = tmp;
12655 }
12656
12657 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12658 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12659 operands[2], operands[3])));
12660 if (bypass_test)
12661 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12662 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12663 operands[3], operands[0])));
12664 if (second_test)
12665 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12666 gen_rtx_IF_THEN_ELSE (mode, second_test,
12667 operands[2], operands[0])));
12668
12669 return 1;
12670 }
12671
12672 /* Expand a floating-point vector conditional move; a vcond operation
12673 rather than a movcc operation. */
12674
12675 bool
12676 ix86_expand_fp_vcond (rtx operands[])
12677 {
12678 enum rtx_code code = GET_CODE (operands[3]);
12679 rtx cmp;
12680
12681 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12682 &operands[4], &operands[5]);
12683 if (code == UNKNOWN)
12684 return false;
12685
12686 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12687 operands[5], operands[1], operands[2]))
12688 return true;
12689
12690 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12691 operands[1], operands[2]);
12692 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12693 return true;
12694 }
12695
12696 /* Expand a signed integral vector conditional move. */
12697
12698 bool
12699 ix86_expand_int_vcond (rtx operands[])
12700 {
12701 enum machine_mode mode = GET_MODE (operands[0]);
12702 enum rtx_code code = GET_CODE (operands[3]);
12703 bool negate = false;
12704 rtx x, cop0, cop1;
12705
12706 cop0 = operands[4];
12707 cop1 = operands[5];
12708
12709 /* Canonicalize the comparison to EQ, GT, GTU. */
12710 switch (code)
12711 {
12712 case EQ:
12713 case GT:
12714 case GTU:
12715 break;
12716
12717 case NE:
12718 case LE:
12719 case LEU:
12720 code = reverse_condition (code);
12721 negate = true;
12722 break;
12723
12724 case GE:
12725 case GEU:
12726 code = reverse_condition (code);
12727 negate = true;
12728 /* FALLTHRU */
12729
12730 case LT:
12731 case LTU:
12732 code = swap_condition (code);
12733 x = cop0, cop0 = cop1, cop1 = x;
12734 break;
12735
12736 default:
12737 gcc_unreachable ();
12738 }
12739
12740 /* Unsigned parallel compare is not supported by the hardware. Play some
12741 tricks to turn this into a signed comparison against 0. */
12742 if (code == GTU)
12743 {
12744 cop0 = force_reg (mode, cop0);
12745
12746 switch (mode)
12747 {
12748 case V4SImode:
12749 {
12750 rtx t1, t2, mask;
12751
12752 /* Perform a parallel modulo subtraction. */
12753 t1 = gen_reg_rtx (mode);
12754 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12755
12756 /* Extract the original sign bit of op0. */
12757 mask = GEN_INT (-0x80000000);
12758 mask = gen_rtx_CONST_VECTOR (mode,
12759 gen_rtvec (4, mask, mask, mask, mask));
12760 mask = force_reg (mode, mask);
12761 t2 = gen_reg_rtx (mode);
12762 emit_insn (gen_andv4si3 (t2, cop0, mask));
12763
12764 /* XOR it back into the result of the subtraction. This results
12765 in the sign bit set iff we saw unsigned underflow. */
12766 x = gen_reg_rtx (mode);
12767 emit_insn (gen_xorv4si3 (x, t1, t2));
12768
12769 code = GT;
12770 }
12771 break;
12772
12773 case V16QImode:
12774 case V8HImode:
12775 /* Perform a parallel unsigned saturating subtraction. */
12776 x = gen_reg_rtx (mode);
12777 emit_insn (gen_rtx_SET (VOIDmode, x,
12778 gen_rtx_US_MINUS (mode, cop0, cop1)));
12779
12780 code = EQ;
12781 negate = !negate;
12782 break;
12783
12784 default:
12785 gcc_unreachable ();
12786 }
12787
12788 cop0 = x;
12789 cop1 = CONST0_RTX (mode);
12790 }
12791
12792 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12793 operands[1+negate], operands[2-negate]);
12794
12795 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12796 operands[2-negate]);
12797 return true;
12798 }
12799
12800 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12801 true if we should do zero extension, else sign extension. HIGH_P is
12802 true if we want the N/2 high elements, else the low elements. */
12803
12804 void
12805 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12806 {
12807 enum machine_mode imode = GET_MODE (operands[1]);
12808 rtx (*unpack)(rtx, rtx, rtx);
12809 rtx se, dest;
12810
12811 switch (imode)
12812 {
12813 case V16QImode:
12814 if (high_p)
12815 unpack = gen_vec_interleave_highv16qi;
12816 else
12817 unpack = gen_vec_interleave_lowv16qi;
12818 break;
12819 case V8HImode:
12820 if (high_p)
12821 unpack = gen_vec_interleave_highv8hi;
12822 else
12823 unpack = gen_vec_interleave_lowv8hi;
12824 break;
12825 case V4SImode:
12826 if (high_p)
12827 unpack = gen_vec_interleave_highv4si;
12828 else
12829 unpack = gen_vec_interleave_lowv4si;
12830 break;
12831 default:
12832 gcc_unreachable ();
12833 }
12834
12835 dest = gen_lowpart (imode, operands[0]);
12836
12837 if (unsigned_p)
12838 se = force_reg (imode, CONST0_RTX (imode));
12839 else
12840 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12841 operands[1], pc_rtx, pc_rtx);
12842
12843 emit_insn (unpack (dest, operands[1], se));
12844 }
12845
12846 /* This function performs the same task as ix86_expand_sse_unpack,
12847 but with SSE4.1 instructions. */
12848
12849 void
12850 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12851 {
12852 enum machine_mode imode = GET_MODE (operands[1]);
12853 rtx (*unpack)(rtx, rtx);
12854 rtx src, dest;
12855
12856 switch (imode)
12857 {
12858 case V16QImode:
12859 if (unsigned_p)
12860 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
12861 else
12862 unpack = gen_sse4_1_extendv8qiv8hi2;
12863 break;
12864 case V8HImode:
12865 if (unsigned_p)
12866 unpack = gen_sse4_1_zero_extendv4hiv4si2;
12867 else
12868 unpack = gen_sse4_1_extendv4hiv4si2;
12869 break;
12870 case V4SImode:
12871 if (unsigned_p)
12872 unpack = gen_sse4_1_zero_extendv2siv2di2;
12873 else
12874 unpack = gen_sse4_1_extendv2siv2di2;
12875 break;
12876 default:
12877 gcc_unreachable ();
12878 }
12879
12880 dest = operands[0];
12881 if (high_p)
12882 {
12883 /* Shift higher 8 bytes to lower 8 bytes. */
12884 src = gen_reg_rtx (imode);
12885 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
12886 gen_lowpart (TImode, operands[1]),
12887 GEN_INT (64)));
12888 }
12889 else
12890 src = operands[1];
12891
12892 emit_insn (unpack (dest, src));
12893 }
12894
12895 /* Expand conditional increment or decrement using adb/sbb instructions.
12896 The default case using setcc followed by the conditional move can be
12897 done by generic code. */
12898 int
12899 ix86_expand_int_addcc (rtx operands[])
12900 {
12901 enum rtx_code code = GET_CODE (operands[1]);
12902 rtx compare_op;
12903 rtx val = const0_rtx;
12904 bool fpcmp = false;
12905 enum machine_mode mode = GET_MODE (operands[0]);
12906
12907 if (operands[3] != const1_rtx
12908 && operands[3] != constm1_rtx)
12909 return 0;
12910 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12911 ix86_compare_op1, &compare_op))
12912 return 0;
12913 code = GET_CODE (compare_op);
12914
12915 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12916 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12917 {
12918 fpcmp = true;
12919 code = ix86_fp_compare_code_to_integer (code);
12920 }
12921
12922 if (code != LTU)
12923 {
12924 val = constm1_rtx;
12925 if (fpcmp)
12926 PUT_CODE (compare_op,
12927 reverse_condition_maybe_unordered
12928 (GET_CODE (compare_op)));
12929 else
12930 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12931 }
12932 PUT_MODE (compare_op, mode);
12933
12934 /* Construct either adc or sbb insn. */
12935 if ((code == LTU) == (operands[3] == constm1_rtx))
12936 {
12937 switch (GET_MODE (operands[0]))
12938 {
12939 case QImode:
12940 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12941 break;
12942 case HImode:
12943 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12944 break;
12945 case SImode:
12946 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12947 break;
12948 case DImode:
12949 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12950 break;
12951 default:
12952 gcc_unreachable ();
12953 }
12954 }
12955 else
12956 {
12957 switch (GET_MODE (operands[0]))
12958 {
12959 case QImode:
12960 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12961 break;
12962 case HImode:
12963 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12964 break;
12965 case SImode:
12966 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12967 break;
12968 case DImode:
12969 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12970 break;
12971 default:
12972 gcc_unreachable ();
12973 }
12974 }
12975 return 1; /* DONE */
12976 }
12977
12978
12979 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12980 works for floating pointer parameters and nonoffsetable memories.
12981 For pushes, it returns just stack offsets; the values will be saved
12982 in the right order. Maximally three parts are generated. */
12983
12984 static int
12985 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12986 {
12987 int size;
12988
12989 if (!TARGET_64BIT)
12990 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12991 else
12992 size = (GET_MODE_SIZE (mode) + 4) / 8;
12993
12994 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12995 gcc_assert (size >= 2 && size <= 3);
12996
12997 /* Optimize constant pool reference to immediates. This is used by fp
12998 moves, that force all constants to memory to allow combining. */
12999 if (MEM_P (operand) && MEM_READONLY_P (operand))
13000 {
13001 rtx tmp = maybe_get_pool_constant (operand);
13002 if (tmp)
13003 operand = tmp;
13004 }
13005
13006 if (MEM_P (operand) && !offsettable_memref_p (operand))
13007 {
13008 /* The only non-offsetable memories we handle are pushes. */
13009 int ok = push_operand (operand, VOIDmode);
13010
13011 gcc_assert (ok);
13012
13013 operand = copy_rtx (operand);
13014 PUT_MODE (operand, Pmode);
13015 parts[0] = parts[1] = parts[2] = operand;
13016 return size;
13017 }
13018
13019 if (GET_CODE (operand) == CONST_VECTOR)
13020 {
13021 enum machine_mode imode = int_mode_for_mode (mode);
13022 /* Caution: if we looked through a constant pool memory above,
13023 the operand may actually have a different mode now. That's
13024 ok, since we want to pun this all the way back to an integer. */
13025 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13026 gcc_assert (operand != NULL);
13027 mode = imode;
13028 }
13029
13030 if (!TARGET_64BIT)
13031 {
13032 if (mode == DImode)
13033 split_di (&operand, 1, &parts[0], &parts[1]);
13034 else
13035 {
13036 if (REG_P (operand))
13037 {
13038 gcc_assert (reload_completed);
13039 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13040 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13041 if (size == 3)
13042 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13043 }
13044 else if (offsettable_memref_p (operand))
13045 {
13046 operand = adjust_address (operand, SImode, 0);
13047 parts[0] = operand;
13048 parts[1] = adjust_address (operand, SImode, 4);
13049 if (size == 3)
13050 parts[2] = adjust_address (operand, SImode, 8);
13051 }
13052 else if (GET_CODE (operand) == CONST_DOUBLE)
13053 {
13054 REAL_VALUE_TYPE r;
13055 long l[4];
13056
13057 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13058 switch (mode)
13059 {
13060 case XFmode:
13061 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13062 parts[2] = gen_int_mode (l[2], SImode);
13063 break;
13064 case DFmode:
13065 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13066 break;
13067 default:
13068 gcc_unreachable ();
13069 }
13070 parts[1] = gen_int_mode (l[1], SImode);
13071 parts[0] = gen_int_mode (l[0], SImode);
13072 }
13073 else
13074 gcc_unreachable ();
13075 }
13076 }
13077 else
13078 {
13079 if (mode == TImode)
13080 split_ti (&operand, 1, &parts[0], &parts[1]);
13081 if (mode == XFmode || mode == TFmode)
13082 {
13083 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13084 if (REG_P (operand))
13085 {
13086 gcc_assert (reload_completed);
13087 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13088 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13089 }
13090 else if (offsettable_memref_p (operand))
13091 {
13092 operand = adjust_address (operand, DImode, 0);
13093 parts[0] = operand;
13094 parts[1] = adjust_address (operand, upper_mode, 8);
13095 }
13096 else if (GET_CODE (operand) == CONST_DOUBLE)
13097 {
13098 REAL_VALUE_TYPE r;
13099 long l[4];
13100
13101 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13102 real_to_target (l, &r, mode);
13103
13104 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13105 if (HOST_BITS_PER_WIDE_INT >= 64)
13106 parts[0]
13107 = gen_int_mode
13108 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13109 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13110 DImode);
13111 else
13112 parts[0] = immed_double_const (l[0], l[1], DImode);
13113
13114 if (upper_mode == SImode)
13115 parts[1] = gen_int_mode (l[2], SImode);
13116 else if (HOST_BITS_PER_WIDE_INT >= 64)
13117 parts[1]
13118 = gen_int_mode
13119 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13120 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13121 DImode);
13122 else
13123 parts[1] = immed_double_const (l[2], l[3], DImode);
13124 }
13125 else
13126 gcc_unreachable ();
13127 }
13128 }
13129
13130 return size;
13131 }
13132
13133 /* Emit insns to perform a move or push of DI, DF, and XF values.
13134 Return false when normal moves are needed; true when all required
13135 insns have been emitted. Operands 2-4 contain the input values
13136 int the correct order; operands 5-7 contain the output values. */
13137
13138 void
13139 ix86_split_long_move (rtx operands[])
13140 {
13141 rtx part[2][3];
13142 int nparts;
13143 int push = 0;
13144 int collisions = 0;
13145 enum machine_mode mode = GET_MODE (operands[0]);
13146
13147 /* The DFmode expanders may ask us to move double.
13148 For 64bit target this is single move. By hiding the fact
13149 here we simplify i386.md splitters. */
13150 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13151 {
13152 /* Optimize constant pool reference to immediates. This is used by
13153 fp moves, that force all constants to memory to allow combining. */
13154
13155 if (MEM_P (operands[1])
13156 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13157 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13158 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13159 if (push_operand (operands[0], VOIDmode))
13160 {
13161 operands[0] = copy_rtx (operands[0]);
13162 PUT_MODE (operands[0], Pmode);
13163 }
13164 else
13165 operands[0] = gen_lowpart (DImode, operands[0]);
13166 operands[1] = gen_lowpart (DImode, operands[1]);
13167 emit_move_insn (operands[0], operands[1]);
13168 return;
13169 }
13170
13171 /* The only non-offsettable memory we handle is push. */
13172 if (push_operand (operands[0], VOIDmode))
13173 push = 1;
13174 else
13175 gcc_assert (!MEM_P (operands[0])
13176 || offsettable_memref_p (operands[0]));
13177
13178 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13179 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13180
13181 /* When emitting push, take care for source operands on the stack. */
13182 if (push && MEM_P (operands[1])
13183 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13184 {
13185 if (nparts == 3)
13186 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13187 XEXP (part[1][2], 0));
13188 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13189 XEXP (part[1][1], 0));
13190 }
13191
13192 /* We need to do copy in the right order in case an address register
13193 of the source overlaps the destination. */
13194 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13195 {
13196 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13197 collisions++;
13198 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13199 collisions++;
13200 if (nparts == 3
13201 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13202 collisions++;
13203
13204 /* Collision in the middle part can be handled by reordering. */
13205 if (collisions == 1 && nparts == 3
13206 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13207 {
13208 rtx tmp;
13209 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13210 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13211 }
13212
13213 /* If there are more collisions, we can't handle it by reordering.
13214 Do an lea to the last part and use only one colliding move. */
13215 else if (collisions > 1)
13216 {
13217 rtx base;
13218
13219 collisions = 1;
13220
13221 base = part[0][nparts - 1];
13222
13223 /* Handle the case when the last part isn't valid for lea.
13224 Happens in 64-bit mode storing the 12-byte XFmode. */
13225 if (GET_MODE (base) != Pmode)
13226 base = gen_rtx_REG (Pmode, REGNO (base));
13227
13228 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13229 part[1][0] = replace_equiv_address (part[1][0], base);
13230 part[1][1] = replace_equiv_address (part[1][1],
13231 plus_constant (base, UNITS_PER_WORD));
13232 if (nparts == 3)
13233 part[1][2] = replace_equiv_address (part[1][2],
13234 plus_constant (base, 8));
13235 }
13236 }
13237
13238 if (push)
13239 {
13240 if (!TARGET_64BIT)
13241 {
13242 if (nparts == 3)
13243 {
13244 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13245 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13246 emit_move_insn (part[0][2], part[1][2]);
13247 }
13248 }
13249 else
13250 {
13251 /* In 64bit mode we don't have 32bit push available. In case this is
13252 register, it is OK - we will just use larger counterpart. We also
13253 retype memory - these comes from attempt to avoid REX prefix on
13254 moving of second half of TFmode value. */
13255 if (GET_MODE (part[1][1]) == SImode)
13256 {
13257 switch (GET_CODE (part[1][1]))
13258 {
13259 case MEM:
13260 part[1][1] = adjust_address (part[1][1], DImode, 0);
13261 break;
13262
13263 case REG:
13264 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13265 break;
13266
13267 default:
13268 gcc_unreachable ();
13269 }
13270
13271 if (GET_MODE (part[1][0]) == SImode)
13272 part[1][0] = part[1][1];
13273 }
13274 }
13275 emit_move_insn (part[0][1], part[1][1]);
13276 emit_move_insn (part[0][0], part[1][0]);
13277 return;
13278 }
13279
13280 /* Choose correct order to not overwrite the source before it is copied. */
13281 if ((REG_P (part[0][0])
13282 && REG_P (part[1][1])
13283 && (REGNO (part[0][0]) == REGNO (part[1][1])
13284 || (nparts == 3
13285 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13286 || (collisions > 0
13287 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13288 {
13289 if (nparts == 3)
13290 {
13291 operands[2] = part[0][2];
13292 operands[3] = part[0][1];
13293 operands[4] = part[0][0];
13294 operands[5] = part[1][2];
13295 operands[6] = part[1][1];
13296 operands[7] = part[1][0];
13297 }
13298 else
13299 {
13300 operands[2] = part[0][1];
13301 operands[3] = part[0][0];
13302 operands[5] = part[1][1];
13303 operands[6] = part[1][0];
13304 }
13305 }
13306 else
13307 {
13308 if (nparts == 3)
13309 {
13310 operands[2] = part[0][0];
13311 operands[3] = part[0][1];
13312 operands[4] = part[0][2];
13313 operands[5] = part[1][0];
13314 operands[6] = part[1][1];
13315 operands[7] = part[1][2];
13316 }
13317 else
13318 {
13319 operands[2] = part[0][0];
13320 operands[3] = part[0][1];
13321 operands[5] = part[1][0];
13322 operands[6] = part[1][1];
13323 }
13324 }
13325
13326 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13327 if (optimize_size)
13328 {
13329 if (CONST_INT_P (operands[5])
13330 && operands[5] != const0_rtx
13331 && REG_P (operands[2]))
13332 {
13333 if (CONST_INT_P (operands[6])
13334 && INTVAL (operands[6]) == INTVAL (operands[5]))
13335 operands[6] = operands[2];
13336
13337 if (nparts == 3
13338 && CONST_INT_P (operands[7])
13339 && INTVAL (operands[7]) == INTVAL (operands[5]))
13340 operands[7] = operands[2];
13341 }
13342
13343 if (nparts == 3
13344 && CONST_INT_P (operands[6])
13345 && operands[6] != const0_rtx
13346 && REG_P (operands[3])
13347 && CONST_INT_P (operands[7])
13348 && INTVAL (operands[7]) == INTVAL (operands[6]))
13349 operands[7] = operands[3];
13350 }
13351
13352 emit_move_insn (operands[2], operands[5]);
13353 emit_move_insn (operands[3], operands[6]);
13354 if (nparts == 3)
13355 emit_move_insn (operands[4], operands[7]);
13356
13357 return;
13358 }
13359
13360 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13361 left shift by a constant, either using a single shift or
13362 a sequence of add instructions. */
13363
13364 static void
13365 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13366 {
13367 if (count == 1)
13368 {
13369 emit_insn ((mode == DImode
13370 ? gen_addsi3
13371 : gen_adddi3) (operand, operand, operand));
13372 }
13373 else if (!optimize_size
13374 && count * ix86_cost->add <= ix86_cost->shift_const)
13375 {
13376 int i;
13377 for (i=0; i<count; i++)
13378 {
13379 emit_insn ((mode == DImode
13380 ? gen_addsi3
13381 : gen_adddi3) (operand, operand, operand));
13382 }
13383 }
13384 else
13385 emit_insn ((mode == DImode
13386 ? gen_ashlsi3
13387 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13388 }
13389
13390 void
13391 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13392 {
13393 rtx low[2], high[2];
13394 int count;
13395 const int single_width = mode == DImode ? 32 : 64;
13396
13397 if (CONST_INT_P (operands[2]))
13398 {
13399 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13400 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13401
13402 if (count >= single_width)
13403 {
13404 emit_move_insn (high[0], low[1]);
13405 emit_move_insn (low[0], const0_rtx);
13406
13407 if (count > single_width)
13408 ix86_expand_ashl_const (high[0], count - single_width, mode);
13409 }
13410 else
13411 {
13412 if (!rtx_equal_p (operands[0], operands[1]))
13413 emit_move_insn (operands[0], operands[1]);
13414 emit_insn ((mode == DImode
13415 ? gen_x86_shld_1
13416 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13417 ix86_expand_ashl_const (low[0], count, mode);
13418 }
13419 return;
13420 }
13421
13422 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13423
13424 if (operands[1] == const1_rtx)
13425 {
13426 /* Assuming we've chosen a QImode capable registers, then 1 << N
13427 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13428 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13429 {
13430 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13431
13432 ix86_expand_clear (low[0]);
13433 ix86_expand_clear (high[0]);
13434 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13435
13436 d = gen_lowpart (QImode, low[0]);
13437 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13438 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13439 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13440
13441 d = gen_lowpart (QImode, high[0]);
13442 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13443 s = gen_rtx_NE (QImode, flags, const0_rtx);
13444 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13445 }
13446
13447 /* Otherwise, we can get the same results by manually performing
13448 a bit extract operation on bit 5/6, and then performing the two
13449 shifts. The two methods of getting 0/1 into low/high are exactly
13450 the same size. Avoiding the shift in the bit extract case helps
13451 pentium4 a bit; no one else seems to care much either way. */
13452 else
13453 {
13454 rtx x;
13455
13456 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13457 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13458 else
13459 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13460 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13461
13462 emit_insn ((mode == DImode
13463 ? gen_lshrsi3
13464 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13465 emit_insn ((mode == DImode
13466 ? gen_andsi3
13467 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13468 emit_move_insn (low[0], high[0]);
13469 emit_insn ((mode == DImode
13470 ? gen_xorsi3
13471 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13472 }
13473
13474 emit_insn ((mode == DImode
13475 ? gen_ashlsi3
13476 : gen_ashldi3) (low[0], low[0], operands[2]));
13477 emit_insn ((mode == DImode
13478 ? gen_ashlsi3
13479 : gen_ashldi3) (high[0], high[0], operands[2]));
13480 return;
13481 }
13482
13483 if (operands[1] == constm1_rtx)
13484 {
13485 /* For -1 << N, we can avoid the shld instruction, because we
13486 know that we're shifting 0...31/63 ones into a -1. */
13487 emit_move_insn (low[0], constm1_rtx);
13488 if (optimize_size)
13489 emit_move_insn (high[0], low[0]);
13490 else
13491 emit_move_insn (high[0], constm1_rtx);
13492 }
13493 else
13494 {
13495 if (!rtx_equal_p (operands[0], operands[1]))
13496 emit_move_insn (operands[0], operands[1]);
13497
13498 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13499 emit_insn ((mode == DImode
13500 ? gen_x86_shld_1
13501 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13502 }
13503
13504 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13505
13506 if (TARGET_CMOVE && scratch)
13507 {
13508 ix86_expand_clear (scratch);
13509 emit_insn ((mode == DImode
13510 ? gen_x86_shift_adj_1
13511 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13512 }
13513 else
13514 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13515 }
13516
13517 void
13518 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13519 {
13520 rtx low[2], high[2];
13521 int count;
13522 const int single_width = mode == DImode ? 32 : 64;
13523
13524 if (CONST_INT_P (operands[2]))
13525 {
13526 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13527 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13528
13529 if (count == single_width * 2 - 1)
13530 {
13531 emit_move_insn (high[0], high[1]);
13532 emit_insn ((mode == DImode
13533 ? gen_ashrsi3
13534 : gen_ashrdi3) (high[0], high[0],
13535 GEN_INT (single_width - 1)));
13536 emit_move_insn (low[0], high[0]);
13537
13538 }
13539 else if (count >= single_width)
13540 {
13541 emit_move_insn (low[0], high[1]);
13542 emit_move_insn (high[0], low[0]);
13543 emit_insn ((mode == DImode
13544 ? gen_ashrsi3
13545 : gen_ashrdi3) (high[0], high[0],
13546 GEN_INT (single_width - 1)));
13547 if (count > single_width)
13548 emit_insn ((mode == DImode
13549 ? gen_ashrsi3
13550 : gen_ashrdi3) (low[0], low[0],
13551 GEN_INT (count - single_width)));
13552 }
13553 else
13554 {
13555 if (!rtx_equal_p (operands[0], operands[1]))
13556 emit_move_insn (operands[0], operands[1]);
13557 emit_insn ((mode == DImode
13558 ? gen_x86_shrd_1
13559 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13560 emit_insn ((mode == DImode
13561 ? gen_ashrsi3
13562 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13563 }
13564 }
13565 else
13566 {
13567 if (!rtx_equal_p (operands[0], operands[1]))
13568 emit_move_insn (operands[0], operands[1]);
13569
13570 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13571
13572 emit_insn ((mode == DImode
13573 ? gen_x86_shrd_1
13574 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13575 emit_insn ((mode == DImode
13576 ? gen_ashrsi3
13577 : gen_ashrdi3) (high[0], high[0], operands[2]));
13578
13579 if (TARGET_CMOVE && scratch)
13580 {
13581 emit_move_insn (scratch, high[0]);
13582 emit_insn ((mode == DImode
13583 ? gen_ashrsi3
13584 : gen_ashrdi3) (scratch, scratch,
13585 GEN_INT (single_width - 1)));
13586 emit_insn ((mode == DImode
13587 ? gen_x86_shift_adj_1
13588 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13589 scratch));
13590 }
13591 else
13592 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13593 }
13594 }
13595
13596 void
13597 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13598 {
13599 rtx low[2], high[2];
13600 int count;
13601 const int single_width = mode == DImode ? 32 : 64;
13602
13603 if (CONST_INT_P (operands[2]))
13604 {
13605 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13606 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13607
13608 if (count >= single_width)
13609 {
13610 emit_move_insn (low[0], high[1]);
13611 ix86_expand_clear (high[0]);
13612
13613 if (count > single_width)
13614 emit_insn ((mode == DImode
13615 ? gen_lshrsi3
13616 : gen_lshrdi3) (low[0], low[0],
13617 GEN_INT (count - single_width)));
13618 }
13619 else
13620 {
13621 if (!rtx_equal_p (operands[0], operands[1]))
13622 emit_move_insn (operands[0], operands[1]);
13623 emit_insn ((mode == DImode
13624 ? gen_x86_shrd_1
13625 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13626 emit_insn ((mode == DImode
13627 ? gen_lshrsi3
13628 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13629 }
13630 }
13631 else
13632 {
13633 if (!rtx_equal_p (operands[0], operands[1]))
13634 emit_move_insn (operands[0], operands[1]);
13635
13636 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13637
13638 emit_insn ((mode == DImode
13639 ? gen_x86_shrd_1
13640 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13641 emit_insn ((mode == DImode
13642 ? gen_lshrsi3
13643 : gen_lshrdi3) (high[0], high[0], operands[2]));
13644
13645 /* Heh. By reversing the arguments, we can reuse this pattern. */
13646 if (TARGET_CMOVE && scratch)
13647 {
13648 ix86_expand_clear (scratch);
13649 emit_insn ((mode == DImode
13650 ? gen_x86_shift_adj_1
13651 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13652 scratch));
13653 }
13654 else
13655 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13656 }
13657 }
13658
13659 /* Predict just emitted jump instruction to be taken with probability PROB. */
13660 static void
13661 predict_jump (int prob)
13662 {
13663 rtx insn = get_last_insn ();
13664 gcc_assert (JUMP_P (insn));
13665 REG_NOTES (insn)
13666 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13667 GEN_INT (prob),
13668 REG_NOTES (insn));
13669 }
13670
13671 /* Helper function for the string operations below. Dest VARIABLE whether
13672 it is aligned to VALUE bytes. If true, jump to the label. */
13673 static rtx
13674 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13675 {
13676 rtx label = gen_label_rtx ();
13677 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13678 if (GET_MODE (variable) == DImode)
13679 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13680 else
13681 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13682 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13683 1, label);
13684 if (epilogue)
13685 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13686 else
13687 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13688 return label;
13689 }
13690
13691 /* Adjust COUNTER by the VALUE. */
13692 static void
13693 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13694 {
13695 if (GET_MODE (countreg) == DImode)
13696 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13697 else
13698 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13699 }
13700
13701 /* Zero extend possibly SImode EXP to Pmode register. */
13702 rtx
13703 ix86_zero_extend_to_Pmode (rtx exp)
13704 {
13705 rtx r;
13706 if (GET_MODE (exp) == VOIDmode)
13707 return force_reg (Pmode, exp);
13708 if (GET_MODE (exp) == Pmode)
13709 return copy_to_mode_reg (Pmode, exp);
13710 r = gen_reg_rtx (Pmode);
13711 emit_insn (gen_zero_extendsidi2 (r, exp));
13712 return r;
13713 }
13714
13715 /* Divide COUNTREG by SCALE. */
13716 static rtx
13717 scale_counter (rtx countreg, int scale)
13718 {
13719 rtx sc;
13720 rtx piece_size_mask;
13721
13722 if (scale == 1)
13723 return countreg;
13724 if (CONST_INT_P (countreg))
13725 return GEN_INT (INTVAL (countreg) / scale);
13726 gcc_assert (REG_P (countreg));
13727
13728 piece_size_mask = GEN_INT (scale - 1);
13729 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13730 GEN_INT (exact_log2 (scale)),
13731 NULL, 1, OPTAB_DIRECT);
13732 return sc;
13733 }
13734
13735 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13736 DImode for constant loop counts. */
13737
13738 static enum machine_mode
13739 counter_mode (rtx count_exp)
13740 {
13741 if (GET_MODE (count_exp) != VOIDmode)
13742 return GET_MODE (count_exp);
13743 if (GET_CODE (count_exp) != CONST_INT)
13744 return Pmode;
13745 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13746 return DImode;
13747 return SImode;
13748 }
13749
13750 /* When SRCPTR is non-NULL, output simple loop to move memory
13751 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13752 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13753 equivalent loop to set memory by VALUE (supposed to be in MODE).
13754
13755 The size is rounded down to whole number of chunk size moved at once.
13756 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13757
13758
13759 static void
13760 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13761 rtx destptr, rtx srcptr, rtx value,
13762 rtx count, enum machine_mode mode, int unroll,
13763 int expected_size)
13764 {
13765 rtx out_label, top_label, iter, tmp;
13766 enum machine_mode iter_mode = counter_mode (count);
13767 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13768 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13769 rtx size;
13770 rtx x_addr;
13771 rtx y_addr;
13772 int i;
13773
13774 top_label = gen_label_rtx ();
13775 out_label = gen_label_rtx ();
13776 iter = gen_reg_rtx (iter_mode);
13777
13778 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13779 NULL, 1, OPTAB_DIRECT);
13780 /* Those two should combine. */
13781 if (piece_size == const1_rtx)
13782 {
13783 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13784 true, out_label);
13785 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13786 }
13787 emit_move_insn (iter, const0_rtx);
13788
13789 emit_label (top_label);
13790
13791 tmp = convert_modes (Pmode, iter_mode, iter, true);
13792 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13793 destmem = change_address (destmem, mode, x_addr);
13794
13795 if (srcmem)
13796 {
13797 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13798 srcmem = change_address (srcmem, mode, y_addr);
13799
13800 /* When unrolling for chips that reorder memory reads and writes,
13801 we can save registers by using single temporary.
13802 Also using 4 temporaries is overkill in 32bit mode. */
13803 if (!TARGET_64BIT && 0)
13804 {
13805 for (i = 0; i < unroll; i++)
13806 {
13807 if (i)
13808 {
13809 destmem =
13810 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13811 srcmem =
13812 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13813 }
13814 emit_move_insn (destmem, srcmem);
13815 }
13816 }
13817 else
13818 {
13819 rtx tmpreg[4];
13820 gcc_assert (unroll <= 4);
13821 for (i = 0; i < unroll; i++)
13822 {
13823 tmpreg[i] = gen_reg_rtx (mode);
13824 if (i)
13825 {
13826 srcmem =
13827 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13828 }
13829 emit_move_insn (tmpreg[i], srcmem);
13830 }
13831 for (i = 0; i < unroll; i++)
13832 {
13833 if (i)
13834 {
13835 destmem =
13836 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13837 }
13838 emit_move_insn (destmem, tmpreg[i]);
13839 }
13840 }
13841 }
13842 else
13843 for (i = 0; i < unroll; i++)
13844 {
13845 if (i)
13846 destmem =
13847 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13848 emit_move_insn (destmem, value);
13849 }
13850
13851 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13852 true, OPTAB_LIB_WIDEN);
13853 if (tmp != iter)
13854 emit_move_insn (iter, tmp);
13855
13856 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13857 true, top_label);
13858 if (expected_size != -1)
13859 {
13860 expected_size /= GET_MODE_SIZE (mode) * unroll;
13861 if (expected_size == 0)
13862 predict_jump (0);
13863 else if (expected_size > REG_BR_PROB_BASE)
13864 predict_jump (REG_BR_PROB_BASE - 1);
13865 else
13866 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13867 }
13868 else
13869 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13870 iter = ix86_zero_extend_to_Pmode (iter);
13871 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13872 true, OPTAB_LIB_WIDEN);
13873 if (tmp != destptr)
13874 emit_move_insn (destptr, tmp);
13875 if (srcptr)
13876 {
13877 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13878 true, OPTAB_LIB_WIDEN);
13879 if (tmp != srcptr)
13880 emit_move_insn (srcptr, tmp);
13881 }
13882 emit_label (out_label);
13883 }
13884
13885 /* Output "rep; mov" instruction.
13886 Arguments have same meaning as for previous function */
13887 static void
13888 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13889 rtx destptr, rtx srcptr,
13890 rtx count,
13891 enum machine_mode mode)
13892 {
13893 rtx destexp;
13894 rtx srcexp;
13895 rtx countreg;
13896
13897 /* If the size is known, it is shorter to use rep movs. */
13898 if (mode == QImode && CONST_INT_P (count)
13899 && !(INTVAL (count) & 3))
13900 mode = SImode;
13901
13902 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13903 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13904 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13905 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13906 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13907 if (mode != QImode)
13908 {
13909 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13910 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13911 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13912 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13913 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13914 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13915 }
13916 else
13917 {
13918 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13919 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13920 }
13921 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13922 destexp, srcexp));
13923 }
13924
13925 /* Output "rep; stos" instruction.
13926 Arguments have same meaning as for previous function */
13927 static void
13928 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13929 rtx count,
13930 enum machine_mode mode)
13931 {
13932 rtx destexp;
13933 rtx countreg;
13934
13935 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13936 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13937 value = force_reg (mode, gen_lowpart (mode, value));
13938 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13939 if (mode != QImode)
13940 {
13941 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13942 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13943 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13944 }
13945 else
13946 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13947 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13948 }
13949
13950 static void
13951 emit_strmov (rtx destmem, rtx srcmem,
13952 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13953 {
13954 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13955 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13956 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13957 }
13958
13959 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13960 static void
13961 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13962 rtx destptr, rtx srcptr, rtx count, int max_size)
13963 {
13964 rtx src, dest;
13965 if (CONST_INT_P (count))
13966 {
13967 HOST_WIDE_INT countval = INTVAL (count);
13968 int offset = 0;
13969
13970 if ((countval & 0x10) && max_size > 16)
13971 {
13972 if (TARGET_64BIT)
13973 {
13974 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13975 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13976 }
13977 else
13978 gcc_unreachable ();
13979 offset += 16;
13980 }
13981 if ((countval & 0x08) && max_size > 8)
13982 {
13983 if (TARGET_64BIT)
13984 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13985 else
13986 {
13987 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13988 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13989 }
13990 offset += 8;
13991 }
13992 if ((countval & 0x04) && max_size > 4)
13993 {
13994 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13995 offset += 4;
13996 }
13997 if ((countval & 0x02) && max_size > 2)
13998 {
13999 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14000 offset += 2;
14001 }
14002 if ((countval & 0x01) && max_size > 1)
14003 {
14004 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14005 offset += 1;
14006 }
14007 return;
14008 }
14009 if (max_size > 8)
14010 {
14011 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14012 count, 1, OPTAB_DIRECT);
14013 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14014 count, QImode, 1, 4);
14015 return;
14016 }
14017
14018 /* When there are stringops, we can cheaply increase dest and src pointers.
14019 Otherwise we save code size by maintaining offset (zero is readily
14020 available from preceding rep operation) and using x86 addressing modes.
14021 */
14022 if (TARGET_SINGLE_STRINGOP)
14023 {
14024 if (max_size > 4)
14025 {
14026 rtx label = ix86_expand_aligntest (count, 4, true);
14027 src = change_address (srcmem, SImode, srcptr);
14028 dest = change_address (destmem, SImode, destptr);
14029 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14030 emit_label (label);
14031 LABEL_NUSES (label) = 1;
14032 }
14033 if (max_size > 2)
14034 {
14035 rtx label = ix86_expand_aligntest (count, 2, true);
14036 src = change_address (srcmem, HImode, srcptr);
14037 dest = change_address (destmem, HImode, destptr);
14038 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14039 emit_label (label);
14040 LABEL_NUSES (label) = 1;
14041 }
14042 if (max_size > 1)
14043 {
14044 rtx label = ix86_expand_aligntest (count, 1, true);
14045 src = change_address (srcmem, QImode, srcptr);
14046 dest = change_address (destmem, QImode, destptr);
14047 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14048 emit_label (label);
14049 LABEL_NUSES (label) = 1;
14050 }
14051 }
14052 else
14053 {
14054 rtx offset = force_reg (Pmode, const0_rtx);
14055 rtx tmp;
14056
14057 if (max_size > 4)
14058 {
14059 rtx label = ix86_expand_aligntest (count, 4, true);
14060 src = change_address (srcmem, SImode, srcptr);
14061 dest = change_address (destmem, SImode, destptr);
14062 emit_move_insn (dest, src);
14063 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14064 true, OPTAB_LIB_WIDEN);
14065 if (tmp != offset)
14066 emit_move_insn (offset, tmp);
14067 emit_label (label);
14068 LABEL_NUSES (label) = 1;
14069 }
14070 if (max_size > 2)
14071 {
14072 rtx label = ix86_expand_aligntest (count, 2, true);
14073 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14074 src = change_address (srcmem, HImode, tmp);
14075 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14076 dest = change_address (destmem, HImode, tmp);
14077 emit_move_insn (dest, src);
14078 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14079 true, OPTAB_LIB_WIDEN);
14080 if (tmp != offset)
14081 emit_move_insn (offset, tmp);
14082 emit_label (label);
14083 LABEL_NUSES (label) = 1;
14084 }
14085 if (max_size > 1)
14086 {
14087 rtx label = ix86_expand_aligntest (count, 1, true);
14088 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14089 src = change_address (srcmem, QImode, tmp);
14090 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14091 dest = change_address (destmem, QImode, tmp);
14092 emit_move_insn (dest, src);
14093 emit_label (label);
14094 LABEL_NUSES (label) = 1;
14095 }
14096 }
14097 }
14098
14099 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14100 static void
14101 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14102 rtx count, int max_size)
14103 {
14104 count =
14105 expand_simple_binop (counter_mode (count), AND, count,
14106 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14107 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14108 gen_lowpart (QImode, value), count, QImode,
14109 1, max_size / 2);
14110 }
14111
14112 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14113 static void
14114 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14115 {
14116 rtx dest;
14117
14118 if (CONST_INT_P (count))
14119 {
14120 HOST_WIDE_INT countval = INTVAL (count);
14121 int offset = 0;
14122
14123 if ((countval & 0x10) && max_size > 16)
14124 {
14125 if (TARGET_64BIT)
14126 {
14127 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14128 emit_insn (gen_strset (destptr, dest, value));
14129 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14130 emit_insn (gen_strset (destptr, dest, value));
14131 }
14132 else
14133 gcc_unreachable ();
14134 offset += 16;
14135 }
14136 if ((countval & 0x08) && max_size > 8)
14137 {
14138 if (TARGET_64BIT)
14139 {
14140 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14141 emit_insn (gen_strset (destptr, dest, value));
14142 }
14143 else
14144 {
14145 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14146 emit_insn (gen_strset (destptr, dest, value));
14147 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14148 emit_insn (gen_strset (destptr, dest, value));
14149 }
14150 offset += 8;
14151 }
14152 if ((countval & 0x04) && max_size > 4)
14153 {
14154 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14155 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14156 offset += 4;
14157 }
14158 if ((countval & 0x02) && max_size > 2)
14159 {
14160 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14161 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14162 offset += 2;
14163 }
14164 if ((countval & 0x01) && max_size > 1)
14165 {
14166 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14167 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14168 offset += 1;
14169 }
14170 return;
14171 }
14172 if (max_size > 32)
14173 {
14174 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14175 return;
14176 }
14177 if (max_size > 16)
14178 {
14179 rtx label = ix86_expand_aligntest (count, 16, true);
14180 if (TARGET_64BIT)
14181 {
14182 dest = change_address (destmem, DImode, destptr);
14183 emit_insn (gen_strset (destptr, dest, value));
14184 emit_insn (gen_strset (destptr, dest, value));
14185 }
14186 else
14187 {
14188 dest = change_address (destmem, SImode, destptr);
14189 emit_insn (gen_strset (destptr, dest, value));
14190 emit_insn (gen_strset (destptr, dest, value));
14191 emit_insn (gen_strset (destptr, dest, value));
14192 emit_insn (gen_strset (destptr, dest, value));
14193 }
14194 emit_label (label);
14195 LABEL_NUSES (label) = 1;
14196 }
14197 if (max_size > 8)
14198 {
14199 rtx label = ix86_expand_aligntest (count, 8, true);
14200 if (TARGET_64BIT)
14201 {
14202 dest = change_address (destmem, DImode, destptr);
14203 emit_insn (gen_strset (destptr, dest, value));
14204 }
14205 else
14206 {
14207 dest = change_address (destmem, SImode, destptr);
14208 emit_insn (gen_strset (destptr, dest, value));
14209 emit_insn (gen_strset (destptr, dest, value));
14210 }
14211 emit_label (label);
14212 LABEL_NUSES (label) = 1;
14213 }
14214 if (max_size > 4)
14215 {
14216 rtx label = ix86_expand_aligntest (count, 4, true);
14217 dest = change_address (destmem, SImode, destptr);
14218 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14219 emit_label (label);
14220 LABEL_NUSES (label) = 1;
14221 }
14222 if (max_size > 2)
14223 {
14224 rtx label = ix86_expand_aligntest (count, 2, true);
14225 dest = change_address (destmem, HImode, destptr);
14226 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14227 emit_label (label);
14228 LABEL_NUSES (label) = 1;
14229 }
14230 if (max_size > 1)
14231 {
14232 rtx label = ix86_expand_aligntest (count, 1, true);
14233 dest = change_address (destmem, QImode, destptr);
14234 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14235 emit_label (label);
14236 LABEL_NUSES (label) = 1;
14237 }
14238 }
14239
14240 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14241 DESIRED_ALIGNMENT. */
14242 static void
14243 expand_movmem_prologue (rtx destmem, rtx srcmem,
14244 rtx destptr, rtx srcptr, rtx count,
14245 int align, int desired_alignment)
14246 {
14247 if (align <= 1 && desired_alignment > 1)
14248 {
14249 rtx label = ix86_expand_aligntest (destptr, 1, false);
14250 srcmem = change_address (srcmem, QImode, srcptr);
14251 destmem = change_address (destmem, QImode, destptr);
14252 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14253 ix86_adjust_counter (count, 1);
14254 emit_label (label);
14255 LABEL_NUSES (label) = 1;
14256 }
14257 if (align <= 2 && desired_alignment > 2)
14258 {
14259 rtx label = ix86_expand_aligntest (destptr, 2, false);
14260 srcmem = change_address (srcmem, HImode, srcptr);
14261 destmem = change_address (destmem, HImode, destptr);
14262 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14263 ix86_adjust_counter (count, 2);
14264 emit_label (label);
14265 LABEL_NUSES (label) = 1;
14266 }
14267 if (align <= 4 && desired_alignment > 4)
14268 {
14269 rtx label = ix86_expand_aligntest (destptr, 4, false);
14270 srcmem = change_address (srcmem, SImode, srcptr);
14271 destmem = change_address (destmem, SImode, destptr);
14272 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14273 ix86_adjust_counter (count, 4);
14274 emit_label (label);
14275 LABEL_NUSES (label) = 1;
14276 }
14277 gcc_assert (desired_alignment <= 8);
14278 }
14279
14280 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14281 DESIRED_ALIGNMENT. */
14282 static void
14283 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14284 int align, int desired_alignment)
14285 {
14286 if (align <= 1 && desired_alignment > 1)
14287 {
14288 rtx label = ix86_expand_aligntest (destptr, 1, false);
14289 destmem = change_address (destmem, QImode, destptr);
14290 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14291 ix86_adjust_counter (count, 1);
14292 emit_label (label);
14293 LABEL_NUSES (label) = 1;
14294 }
14295 if (align <= 2 && desired_alignment > 2)
14296 {
14297 rtx label = ix86_expand_aligntest (destptr, 2, false);
14298 destmem = change_address (destmem, HImode, destptr);
14299 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14300 ix86_adjust_counter (count, 2);
14301 emit_label (label);
14302 LABEL_NUSES (label) = 1;
14303 }
14304 if (align <= 4 && desired_alignment > 4)
14305 {
14306 rtx label = ix86_expand_aligntest (destptr, 4, false);
14307 destmem = change_address (destmem, SImode, destptr);
14308 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14309 ix86_adjust_counter (count, 4);
14310 emit_label (label);
14311 LABEL_NUSES (label) = 1;
14312 }
14313 gcc_assert (desired_alignment <= 8);
14314 }
14315
14316 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14317 static enum stringop_alg
14318 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14319 int *dynamic_check)
14320 {
14321 const struct stringop_algs * algs;
14322
14323 *dynamic_check = -1;
14324 if (memset)
14325 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14326 else
14327 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14328 if (stringop_alg != no_stringop)
14329 return stringop_alg;
14330 /* rep; movq or rep; movl is the smallest variant. */
14331 else if (optimize_size)
14332 {
14333 if (!count || (count & 3))
14334 return rep_prefix_1_byte;
14335 else
14336 return rep_prefix_4_byte;
14337 }
14338 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14339 */
14340 else if (expected_size != -1 && expected_size < 4)
14341 return loop_1_byte;
14342 else if (expected_size != -1)
14343 {
14344 unsigned int i;
14345 enum stringop_alg alg = libcall;
14346 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14347 {
14348 gcc_assert (algs->size[i].max);
14349 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14350 {
14351 if (algs->size[i].alg != libcall)
14352 alg = algs->size[i].alg;
14353 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14354 last non-libcall inline algorithm. */
14355 if (TARGET_INLINE_ALL_STRINGOPS)
14356 {
14357 /* When the current size is best to be copied by a libcall,
14358 but we are still forced to inline, run the heuristic bellow
14359 that will pick code for medium sized blocks. */
14360 if (alg != libcall)
14361 return alg;
14362 break;
14363 }
14364 else
14365 return algs->size[i].alg;
14366 }
14367 }
14368 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14369 }
14370 /* When asked to inline the call anyway, try to pick meaningful choice.
14371 We look for maximal size of block that is faster to copy by hand and
14372 take blocks of at most of that size guessing that average size will
14373 be roughly half of the block.
14374
14375 If this turns out to be bad, we might simply specify the preferred
14376 choice in ix86_costs. */
14377 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14378 && algs->unknown_size == libcall)
14379 {
14380 int max = -1;
14381 enum stringop_alg alg;
14382 int i;
14383
14384 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14385 if (algs->size[i].alg != libcall && algs->size[i].alg)
14386 max = algs->size[i].max;
14387 if (max == -1)
14388 max = 4096;
14389 alg = decide_alg (count, max / 2, memset, dynamic_check);
14390 gcc_assert (*dynamic_check == -1);
14391 gcc_assert (alg != libcall);
14392 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14393 *dynamic_check = max;
14394 return alg;
14395 }
14396 return algs->unknown_size;
14397 }
14398
14399 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14400 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14401 static int
14402 decide_alignment (int align,
14403 enum stringop_alg alg,
14404 int expected_size)
14405 {
14406 int desired_align = 0;
14407 switch (alg)
14408 {
14409 case no_stringop:
14410 gcc_unreachable ();
14411 case loop:
14412 case unrolled_loop:
14413 desired_align = GET_MODE_SIZE (Pmode);
14414 break;
14415 case rep_prefix_8_byte:
14416 desired_align = 8;
14417 break;
14418 case rep_prefix_4_byte:
14419 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14420 copying whole cacheline at once. */
14421 if (TARGET_PENTIUMPRO)
14422 desired_align = 8;
14423 else
14424 desired_align = 4;
14425 break;
14426 case rep_prefix_1_byte:
14427 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14428 copying whole cacheline at once. */
14429 if (TARGET_PENTIUMPRO)
14430 desired_align = 8;
14431 else
14432 desired_align = 1;
14433 break;
14434 case loop_1_byte:
14435 desired_align = 1;
14436 break;
14437 case libcall:
14438 return 0;
14439 }
14440
14441 if (optimize_size)
14442 desired_align = 1;
14443 if (desired_align < align)
14444 desired_align = align;
14445 if (expected_size != -1 && expected_size < 4)
14446 desired_align = align;
14447 return desired_align;
14448 }
14449
14450 /* Return the smallest power of 2 greater than VAL. */
14451 static int
14452 smallest_pow2_greater_than (int val)
14453 {
14454 int ret = 1;
14455 while (ret <= val)
14456 ret <<= 1;
14457 return ret;
14458 }
14459
14460 /* Expand string move (memcpy) operation. Use i386 string operations when
14461 profitable. expand_clrmem contains similar code. The code depends upon
14462 architecture, block size and alignment, but always has the same
14463 overall structure:
14464
14465 1) Prologue guard: Conditional that jumps up to epilogues for small
14466 blocks that can be handled by epilogue alone. This is faster but
14467 also needed for correctness, since prologue assume the block is larger
14468 than the desired alignment.
14469
14470 Optional dynamic check for size and libcall for large
14471 blocks is emitted here too, with -minline-stringops-dynamically.
14472
14473 2) Prologue: copy first few bytes in order to get destination aligned
14474 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14475 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14476 We emit either a jump tree on power of two sized blocks, or a byte loop.
14477
14478 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14479 with specified algorithm.
14480
14481 4) Epilogue: code copying tail of the block that is too small to be
14482 handled by main body (or up to size guarded by prologue guard). */
14483
14484 int
14485 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14486 rtx expected_align_exp, rtx expected_size_exp)
14487 {
14488 rtx destreg;
14489 rtx srcreg;
14490 rtx label = NULL;
14491 rtx tmp;
14492 rtx jump_around_label = NULL;
14493 HOST_WIDE_INT align = 1;
14494 unsigned HOST_WIDE_INT count = 0;
14495 HOST_WIDE_INT expected_size = -1;
14496 int size_needed = 0, epilogue_size_needed;
14497 int desired_align = 0;
14498 enum stringop_alg alg;
14499 int dynamic_check;
14500
14501 if (CONST_INT_P (align_exp))
14502 align = INTVAL (align_exp);
14503 /* i386 can do misaligned access on reasonably increased cost. */
14504 if (CONST_INT_P (expected_align_exp)
14505 && INTVAL (expected_align_exp) > align)
14506 align = INTVAL (expected_align_exp);
14507 if (CONST_INT_P (count_exp))
14508 count = expected_size = INTVAL (count_exp);
14509 if (CONST_INT_P (expected_size_exp) && count == 0)
14510 expected_size = INTVAL (expected_size_exp);
14511
14512 /* Step 0: Decide on preferred algorithm, desired alignment and
14513 size of chunks to be copied by main loop. */
14514
14515 alg = decide_alg (count, expected_size, false, &dynamic_check);
14516 desired_align = decide_alignment (align, alg, expected_size);
14517
14518 if (!TARGET_ALIGN_STRINGOPS)
14519 align = desired_align;
14520
14521 if (alg == libcall)
14522 return 0;
14523 gcc_assert (alg != no_stringop);
14524 if (!count)
14525 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14526 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14527 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14528 switch (alg)
14529 {
14530 case libcall:
14531 case no_stringop:
14532 gcc_unreachable ();
14533 case loop:
14534 size_needed = GET_MODE_SIZE (Pmode);
14535 break;
14536 case unrolled_loop:
14537 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14538 break;
14539 case rep_prefix_8_byte:
14540 size_needed = 8;
14541 break;
14542 case rep_prefix_4_byte:
14543 size_needed = 4;
14544 break;
14545 case rep_prefix_1_byte:
14546 case loop_1_byte:
14547 size_needed = 1;
14548 break;
14549 }
14550
14551 epilogue_size_needed = size_needed;
14552
14553 /* Step 1: Prologue guard. */
14554
14555 /* Alignment code needs count to be in register. */
14556 if (CONST_INT_P (count_exp) && desired_align > align)
14557 {
14558 enum machine_mode mode = SImode;
14559 if (TARGET_64BIT && (count & ~0xffffffff))
14560 mode = DImode;
14561 count_exp = force_reg (mode, count_exp);
14562 }
14563 gcc_assert (desired_align >= 1 && align >= 1);
14564
14565 /* Ensure that alignment prologue won't copy past end of block. */
14566 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14567 {
14568 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14569 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14570 Make sure it is power of 2. */
14571 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14572
14573 label = gen_label_rtx ();
14574 emit_cmp_and_jump_insns (count_exp,
14575 GEN_INT (epilogue_size_needed),
14576 LTU, 0, counter_mode (count_exp), 1, label);
14577 if (GET_CODE (count_exp) == CONST_INT)
14578 ;
14579 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14580 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14581 else
14582 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14583 }
14584 /* Emit code to decide on runtime whether library call or inline should be
14585 used. */
14586 if (dynamic_check != -1)
14587 {
14588 rtx hot_label = gen_label_rtx ();
14589 jump_around_label = gen_label_rtx ();
14590 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14591 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14592 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14593 emit_block_move_via_libcall (dst, src, count_exp, false);
14594 emit_jump (jump_around_label);
14595 emit_label (hot_label);
14596 }
14597
14598 /* Step 2: Alignment prologue. */
14599
14600 if (desired_align > align)
14601 {
14602 /* Except for the first move in epilogue, we no longer know
14603 constant offset in aliasing info. It don't seems to worth
14604 the pain to maintain it for the first move, so throw away
14605 the info early. */
14606 src = change_address (src, BLKmode, srcreg);
14607 dst = change_address (dst, BLKmode, destreg);
14608 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14609 desired_align);
14610 }
14611 if (label && size_needed == 1)
14612 {
14613 emit_label (label);
14614 LABEL_NUSES (label) = 1;
14615 label = NULL;
14616 }
14617
14618 /* Step 3: Main loop. */
14619
14620 switch (alg)
14621 {
14622 case libcall:
14623 case no_stringop:
14624 gcc_unreachable ();
14625 case loop_1_byte:
14626 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14627 count_exp, QImode, 1, expected_size);
14628 break;
14629 case loop:
14630 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14631 count_exp, Pmode, 1, expected_size);
14632 break;
14633 case unrolled_loop:
14634 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14635 registers for 4 temporaries anyway. */
14636 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14637 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14638 expected_size);
14639 break;
14640 case rep_prefix_8_byte:
14641 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14642 DImode);
14643 break;
14644 case rep_prefix_4_byte:
14645 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14646 SImode);
14647 break;
14648 case rep_prefix_1_byte:
14649 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14650 QImode);
14651 break;
14652 }
14653 /* Adjust properly the offset of src and dest memory for aliasing. */
14654 if (CONST_INT_P (count_exp))
14655 {
14656 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14657 (count / size_needed) * size_needed);
14658 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14659 (count / size_needed) * size_needed);
14660 }
14661 else
14662 {
14663 src = change_address (src, BLKmode, srcreg);
14664 dst = change_address (dst, BLKmode, destreg);
14665 }
14666
14667 /* Step 4: Epilogue to copy the remaining bytes. */
14668
14669 if (label)
14670 {
14671 /* When the main loop is done, COUNT_EXP might hold original count,
14672 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14673 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14674 bytes. Compensate if needed. */
14675
14676 if (size_needed < epilogue_size_needed)
14677 {
14678 tmp =
14679 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14680 GEN_INT (size_needed - 1), count_exp, 1,
14681 OPTAB_DIRECT);
14682 if (tmp != count_exp)
14683 emit_move_insn (count_exp, tmp);
14684 }
14685 emit_label (label);
14686 LABEL_NUSES (label) = 1;
14687 }
14688
14689 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14690 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14691 epilogue_size_needed);
14692 if (jump_around_label)
14693 emit_label (jump_around_label);
14694 return 1;
14695 }
14696
14697 /* Helper function for memcpy. For QImode value 0xXY produce
14698 0xXYXYXYXY of wide specified by MODE. This is essentially
14699 a * 0x10101010, but we can do slightly better than
14700 synth_mult by unwinding the sequence by hand on CPUs with
14701 slow multiply. */
14702 static rtx
14703 promote_duplicated_reg (enum machine_mode mode, rtx val)
14704 {
14705 enum machine_mode valmode = GET_MODE (val);
14706 rtx tmp;
14707 int nops = mode == DImode ? 3 : 2;
14708
14709 gcc_assert (mode == SImode || mode == DImode);
14710 if (val == const0_rtx)
14711 return copy_to_mode_reg (mode, const0_rtx);
14712 if (CONST_INT_P (val))
14713 {
14714 HOST_WIDE_INT v = INTVAL (val) & 255;
14715
14716 v |= v << 8;
14717 v |= v << 16;
14718 if (mode == DImode)
14719 v |= (v << 16) << 16;
14720 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14721 }
14722
14723 if (valmode == VOIDmode)
14724 valmode = QImode;
14725 if (valmode != QImode)
14726 val = gen_lowpart (QImode, val);
14727 if (mode == QImode)
14728 return val;
14729 if (!TARGET_PARTIAL_REG_STALL)
14730 nops--;
14731 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14732 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14733 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14734 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14735 {
14736 rtx reg = convert_modes (mode, QImode, val, true);
14737 tmp = promote_duplicated_reg (mode, const1_rtx);
14738 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14739 OPTAB_DIRECT);
14740 }
14741 else
14742 {
14743 rtx reg = convert_modes (mode, QImode, val, true);
14744
14745 if (!TARGET_PARTIAL_REG_STALL)
14746 if (mode == SImode)
14747 emit_insn (gen_movsi_insv_1 (reg, reg));
14748 else
14749 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14750 else
14751 {
14752 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14753 NULL, 1, OPTAB_DIRECT);
14754 reg =
14755 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14756 }
14757 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14758 NULL, 1, OPTAB_DIRECT);
14759 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14760 if (mode == SImode)
14761 return reg;
14762 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14763 NULL, 1, OPTAB_DIRECT);
14764 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14765 return reg;
14766 }
14767 }
14768
14769 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14770 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14771 alignment from ALIGN to DESIRED_ALIGN. */
14772 static rtx
14773 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14774 {
14775 rtx promoted_val;
14776
14777 if (TARGET_64BIT
14778 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14779 promoted_val = promote_duplicated_reg (DImode, val);
14780 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14781 promoted_val = promote_duplicated_reg (SImode, val);
14782 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14783 promoted_val = promote_duplicated_reg (HImode, val);
14784 else
14785 promoted_val = val;
14786
14787 return promoted_val;
14788 }
14789
14790 /* Expand string clear operation (bzero). Use i386 string operations when
14791 profitable. See expand_movmem comment for explanation of individual
14792 steps performed. */
14793 int
14794 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14795 rtx expected_align_exp, rtx expected_size_exp)
14796 {
14797 rtx destreg;
14798 rtx label = NULL;
14799 rtx tmp;
14800 rtx jump_around_label = NULL;
14801 HOST_WIDE_INT align = 1;
14802 unsigned HOST_WIDE_INT count = 0;
14803 HOST_WIDE_INT expected_size = -1;
14804 int size_needed = 0, epilogue_size_needed;
14805 int desired_align = 0;
14806 enum stringop_alg alg;
14807 rtx promoted_val = NULL;
14808 bool force_loopy_epilogue = false;
14809 int dynamic_check;
14810
14811 if (CONST_INT_P (align_exp))
14812 align = INTVAL (align_exp);
14813 /* i386 can do misaligned access on reasonably increased cost. */
14814 if (CONST_INT_P (expected_align_exp)
14815 && INTVAL (expected_align_exp) > align)
14816 align = INTVAL (expected_align_exp);
14817 if (CONST_INT_P (count_exp))
14818 count = expected_size = INTVAL (count_exp);
14819 if (CONST_INT_P (expected_size_exp) && count == 0)
14820 expected_size = INTVAL (expected_size_exp);
14821
14822 /* Step 0: Decide on preferred algorithm, desired alignment and
14823 size of chunks to be copied by main loop. */
14824
14825 alg = decide_alg (count, expected_size, true, &dynamic_check);
14826 desired_align = decide_alignment (align, alg, expected_size);
14827
14828 if (!TARGET_ALIGN_STRINGOPS)
14829 align = desired_align;
14830
14831 if (alg == libcall)
14832 return 0;
14833 gcc_assert (alg != no_stringop);
14834 if (!count)
14835 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14836 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14837 switch (alg)
14838 {
14839 case libcall:
14840 case no_stringop:
14841 gcc_unreachable ();
14842 case loop:
14843 size_needed = GET_MODE_SIZE (Pmode);
14844 break;
14845 case unrolled_loop:
14846 size_needed = GET_MODE_SIZE (Pmode) * 4;
14847 break;
14848 case rep_prefix_8_byte:
14849 size_needed = 8;
14850 break;
14851 case rep_prefix_4_byte:
14852 size_needed = 4;
14853 break;
14854 case rep_prefix_1_byte:
14855 case loop_1_byte:
14856 size_needed = 1;
14857 break;
14858 }
14859 epilogue_size_needed = size_needed;
14860
14861 /* Step 1: Prologue guard. */
14862
14863 /* Alignment code needs count to be in register. */
14864 if (CONST_INT_P (count_exp) && desired_align > align)
14865 {
14866 enum machine_mode mode = SImode;
14867 if (TARGET_64BIT && (count & ~0xffffffff))
14868 mode = DImode;
14869 count_exp = force_reg (mode, count_exp);
14870 }
14871 /* Do the cheap promotion to allow better CSE across the
14872 main loop and epilogue (ie one load of the big constant in the
14873 front of all code. */
14874 if (CONST_INT_P (val_exp))
14875 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14876 desired_align, align);
14877 /* Ensure that alignment prologue won't copy past end of block. */
14878 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14879 {
14880 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14881 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14882 Make sure it is power of 2. */
14883 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14884
14885 /* To improve performance of small blocks, we jump around the VAL
14886 promoting mode. This mean that if the promoted VAL is not constant,
14887 we might not use it in the epilogue and have to use byte
14888 loop variant. */
14889 if (epilogue_size_needed > 2 && !promoted_val)
14890 force_loopy_epilogue = true;
14891 label = gen_label_rtx ();
14892 emit_cmp_and_jump_insns (count_exp,
14893 GEN_INT (epilogue_size_needed),
14894 LTU, 0, counter_mode (count_exp), 1, label);
14895 if (GET_CODE (count_exp) == CONST_INT)
14896 ;
14897 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14898 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14899 else
14900 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14901 }
14902 if (dynamic_check != -1)
14903 {
14904 rtx hot_label = gen_label_rtx ();
14905 jump_around_label = gen_label_rtx ();
14906 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14907 LEU, 0, counter_mode (count_exp), 1, hot_label);
14908 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14909 set_storage_via_libcall (dst, count_exp, val_exp, false);
14910 emit_jump (jump_around_label);
14911 emit_label (hot_label);
14912 }
14913
14914 /* Step 2: Alignment prologue. */
14915
14916 /* Do the expensive promotion once we branched off the small blocks. */
14917 if (!promoted_val)
14918 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14919 desired_align, align);
14920 gcc_assert (desired_align >= 1 && align >= 1);
14921
14922 if (desired_align > align)
14923 {
14924 /* Except for the first move in epilogue, we no longer know
14925 constant offset in aliasing info. It don't seems to worth
14926 the pain to maintain it for the first move, so throw away
14927 the info early. */
14928 dst = change_address (dst, BLKmode, destreg);
14929 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14930 desired_align);
14931 }
14932 if (label && size_needed == 1)
14933 {
14934 emit_label (label);
14935 LABEL_NUSES (label) = 1;
14936 label = NULL;
14937 }
14938
14939 /* Step 3: Main loop. */
14940
14941 switch (alg)
14942 {
14943 case libcall:
14944 case no_stringop:
14945 gcc_unreachable ();
14946 case loop_1_byte:
14947 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14948 count_exp, QImode, 1, expected_size);
14949 break;
14950 case loop:
14951 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14952 count_exp, Pmode, 1, expected_size);
14953 break;
14954 case unrolled_loop:
14955 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14956 count_exp, Pmode, 4, expected_size);
14957 break;
14958 case rep_prefix_8_byte:
14959 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14960 DImode);
14961 break;
14962 case rep_prefix_4_byte:
14963 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14964 SImode);
14965 break;
14966 case rep_prefix_1_byte:
14967 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14968 QImode);
14969 break;
14970 }
14971 /* Adjust properly the offset of src and dest memory for aliasing. */
14972 if (CONST_INT_P (count_exp))
14973 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14974 (count / size_needed) * size_needed);
14975 else
14976 dst = change_address (dst, BLKmode, destreg);
14977
14978 /* Step 4: Epilogue to copy the remaining bytes. */
14979
14980 if (label)
14981 {
14982 /* When the main loop is done, COUNT_EXP might hold original count,
14983 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14984 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14985 bytes. Compensate if needed. */
14986
14987 if (size_needed < desired_align - align)
14988 {
14989 tmp =
14990 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14991 GEN_INT (size_needed - 1), count_exp, 1,
14992 OPTAB_DIRECT);
14993 size_needed = desired_align - align + 1;
14994 if (tmp != count_exp)
14995 emit_move_insn (count_exp, tmp);
14996 }
14997 emit_label (label);
14998 LABEL_NUSES (label) = 1;
14999 }
15000 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15001 {
15002 if (force_loopy_epilogue)
15003 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15004 size_needed);
15005 else
15006 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15007 size_needed);
15008 }
15009 if (jump_around_label)
15010 emit_label (jump_around_label);
15011 return 1;
15012 }
15013
15014 /* Expand the appropriate insns for doing strlen if not just doing
15015 repnz; scasb
15016
15017 out = result, initialized with the start address
15018 align_rtx = alignment of the address.
15019 scratch = scratch register, initialized with the startaddress when
15020 not aligned, otherwise undefined
15021
15022 This is just the body. It needs the initializations mentioned above and
15023 some address computing at the end. These things are done in i386.md. */
15024
15025 static void
15026 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15027 {
15028 int align;
15029 rtx tmp;
15030 rtx align_2_label = NULL_RTX;
15031 rtx align_3_label = NULL_RTX;
15032 rtx align_4_label = gen_label_rtx ();
15033 rtx end_0_label = gen_label_rtx ();
15034 rtx mem;
15035 rtx tmpreg = gen_reg_rtx (SImode);
15036 rtx scratch = gen_reg_rtx (SImode);
15037 rtx cmp;
15038
15039 align = 0;
15040 if (CONST_INT_P (align_rtx))
15041 align = INTVAL (align_rtx);
15042
15043 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15044
15045 /* Is there a known alignment and is it less than 4? */
15046 if (align < 4)
15047 {
15048 rtx scratch1 = gen_reg_rtx (Pmode);
15049 emit_move_insn (scratch1, out);
15050 /* Is there a known alignment and is it not 2? */
15051 if (align != 2)
15052 {
15053 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15054 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15055
15056 /* Leave just the 3 lower bits. */
15057 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15058 NULL_RTX, 0, OPTAB_WIDEN);
15059
15060 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15061 Pmode, 1, align_4_label);
15062 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15063 Pmode, 1, align_2_label);
15064 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15065 Pmode, 1, align_3_label);
15066 }
15067 else
15068 {
15069 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15070 check if is aligned to 4 - byte. */
15071
15072 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15073 NULL_RTX, 0, OPTAB_WIDEN);
15074
15075 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15076 Pmode, 1, align_4_label);
15077 }
15078
15079 mem = change_address (src, QImode, out);
15080
15081 /* Now compare the bytes. */
15082
15083 /* Compare the first n unaligned byte on a byte per byte basis. */
15084 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15085 QImode, 1, end_0_label);
15086
15087 /* Increment the address. */
15088 if (TARGET_64BIT)
15089 emit_insn (gen_adddi3 (out, out, const1_rtx));
15090 else
15091 emit_insn (gen_addsi3 (out, out, const1_rtx));
15092
15093 /* Not needed with an alignment of 2 */
15094 if (align != 2)
15095 {
15096 emit_label (align_2_label);
15097
15098 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15099 end_0_label);
15100
15101 if (TARGET_64BIT)
15102 emit_insn (gen_adddi3 (out, out, const1_rtx));
15103 else
15104 emit_insn (gen_addsi3 (out, out, const1_rtx));
15105
15106 emit_label (align_3_label);
15107 }
15108
15109 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15110 end_0_label);
15111
15112 if (TARGET_64BIT)
15113 emit_insn (gen_adddi3 (out, out, const1_rtx));
15114 else
15115 emit_insn (gen_addsi3 (out, out, const1_rtx));
15116 }
15117
15118 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15119 align this loop. It gives only huge programs, but does not help to
15120 speed up. */
15121 emit_label (align_4_label);
15122
15123 mem = change_address (src, SImode, out);
15124 emit_move_insn (scratch, mem);
15125 if (TARGET_64BIT)
15126 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15127 else
15128 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15129
15130 /* This formula yields a nonzero result iff one of the bytes is zero.
15131 This saves three branches inside loop and many cycles. */
15132
15133 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15134 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15135 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15136 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15137 gen_int_mode (0x80808080, SImode)));
15138 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15139 align_4_label);
15140
15141 if (TARGET_CMOVE)
15142 {
15143 rtx reg = gen_reg_rtx (SImode);
15144 rtx reg2 = gen_reg_rtx (Pmode);
15145 emit_move_insn (reg, tmpreg);
15146 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15147
15148 /* If zero is not in the first two bytes, move two bytes forward. */
15149 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15150 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15151 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15152 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15153 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15154 reg,
15155 tmpreg)));
15156 /* Emit lea manually to avoid clobbering of flags. */
15157 emit_insn (gen_rtx_SET (SImode, reg2,
15158 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15159
15160 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15161 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15162 emit_insn (gen_rtx_SET (VOIDmode, out,
15163 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15164 reg2,
15165 out)));
15166
15167 }
15168 else
15169 {
15170 rtx end_2_label = gen_label_rtx ();
15171 /* Is zero in the first two bytes? */
15172
15173 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15174 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15175 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15176 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15177 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15178 pc_rtx);
15179 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15180 JUMP_LABEL (tmp) = end_2_label;
15181
15182 /* Not in the first two. Move two bytes forward. */
15183 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15184 if (TARGET_64BIT)
15185 emit_insn (gen_adddi3 (out, out, const2_rtx));
15186 else
15187 emit_insn (gen_addsi3 (out, out, const2_rtx));
15188
15189 emit_label (end_2_label);
15190
15191 }
15192
15193 /* Avoid branch in fixing the byte. */
15194 tmpreg = gen_lowpart (QImode, tmpreg);
15195 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15196 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15197 if (TARGET_64BIT)
15198 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15199 else
15200 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15201
15202 emit_label (end_0_label);
15203 }
15204
15205 /* Expand strlen. */
15206
15207 int
15208 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15209 {
15210 rtx addr, scratch1, scratch2, scratch3, scratch4;
15211
15212 /* The generic case of strlen expander is long. Avoid it's
15213 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15214
15215 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15216 && !TARGET_INLINE_ALL_STRINGOPS
15217 && !optimize_size
15218 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15219 return 0;
15220
15221 addr = force_reg (Pmode, XEXP (src, 0));
15222 scratch1 = gen_reg_rtx (Pmode);
15223
15224 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15225 && !optimize_size)
15226 {
15227 /* Well it seems that some optimizer does not combine a call like
15228 foo(strlen(bar), strlen(bar));
15229 when the move and the subtraction is done here. It does calculate
15230 the length just once when these instructions are done inside of
15231 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15232 often used and I use one fewer register for the lifetime of
15233 output_strlen_unroll() this is better. */
15234
15235 emit_move_insn (out, addr);
15236
15237 ix86_expand_strlensi_unroll_1 (out, src, align);
15238
15239 /* strlensi_unroll_1 returns the address of the zero at the end of
15240 the string, like memchr(), so compute the length by subtracting
15241 the start address. */
15242 if (TARGET_64BIT)
15243 emit_insn (gen_subdi3 (out, out, addr));
15244 else
15245 emit_insn (gen_subsi3 (out, out, addr));
15246 }
15247 else
15248 {
15249 rtx unspec;
15250 scratch2 = gen_reg_rtx (Pmode);
15251 scratch3 = gen_reg_rtx (Pmode);
15252 scratch4 = force_reg (Pmode, constm1_rtx);
15253
15254 emit_move_insn (scratch3, addr);
15255 eoschar = force_reg (QImode, eoschar);
15256
15257 src = replace_equiv_address_nv (src, scratch3);
15258
15259 /* If .md starts supporting :P, this can be done in .md. */
15260 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15261 scratch4), UNSPEC_SCAS);
15262 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15263 if (TARGET_64BIT)
15264 {
15265 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15266 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15267 }
15268 else
15269 {
15270 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15271 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15272 }
15273 }
15274 return 1;
15275 }
15276
15277 /* For given symbol (function) construct code to compute address of it's PLT
15278 entry in large x86-64 PIC model. */
15279 rtx
15280 construct_plt_address (rtx symbol)
15281 {
15282 rtx tmp = gen_reg_rtx (Pmode);
15283 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15284
15285 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15286 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15287
15288 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15289 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15290 return tmp;
15291 }
15292
15293 void
15294 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15295 rtx callarg2 ATTRIBUTE_UNUSED,
15296 rtx pop, int sibcall)
15297 {
15298 rtx use = NULL, call;
15299
15300 if (pop == const0_rtx)
15301 pop = NULL;
15302 gcc_assert (!TARGET_64BIT || !pop);
15303
15304 if (TARGET_MACHO && !TARGET_64BIT)
15305 {
15306 #if TARGET_MACHO
15307 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15308 fnaddr = machopic_indirect_call_target (fnaddr);
15309 #endif
15310 }
15311 else
15312 {
15313 /* Static functions and indirect calls don't need the pic register. */
15314 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15315 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15316 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15317 use_reg (&use, pic_offset_table_rtx);
15318 }
15319
15320 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15321 {
15322 rtx al = gen_rtx_REG (QImode, 0);
15323 emit_move_insn (al, callarg2);
15324 use_reg (&use, al);
15325 }
15326
15327 if (ix86_cmodel == CM_LARGE_PIC
15328 && GET_CODE (fnaddr) == MEM
15329 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15330 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15331 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15332 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15333 {
15334 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15335 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15336 }
15337 if (sibcall && TARGET_64BIT
15338 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15339 {
15340 rtx addr;
15341 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15342 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15343 emit_move_insn (fnaddr, addr);
15344 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15345 }
15346
15347 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15348 if (retval)
15349 call = gen_rtx_SET (VOIDmode, retval, call);
15350 if (pop)
15351 {
15352 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15353 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15354 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15355 }
15356
15357 call = emit_call_insn (call);
15358 if (use)
15359 CALL_INSN_FUNCTION_USAGE (call) = use;
15360 }
15361
15362 \f
15363 /* Clear stack slot assignments remembered from previous functions.
15364 This is called from INIT_EXPANDERS once before RTL is emitted for each
15365 function. */
15366
15367 static struct machine_function *
15368 ix86_init_machine_status (void)
15369 {
15370 struct machine_function *f;
15371
15372 f = ggc_alloc_cleared (sizeof (struct machine_function));
15373 f->use_fast_prologue_epilogue_nregs = -1;
15374 f->tls_descriptor_call_expanded_p = 0;
15375
15376 return f;
15377 }
15378
15379 /* Return a MEM corresponding to a stack slot with mode MODE.
15380 Allocate a new slot if necessary.
15381
15382 The RTL for a function can have several slots available: N is
15383 which slot to use. */
15384
15385 rtx
15386 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15387 {
15388 struct stack_local_entry *s;
15389
15390 gcc_assert (n < MAX_386_STACK_LOCALS);
15391
15392 for (s = ix86_stack_locals; s; s = s->next)
15393 if (s->mode == mode && s->n == n)
15394 return copy_rtx (s->rtl);
15395
15396 s = (struct stack_local_entry *)
15397 ggc_alloc (sizeof (struct stack_local_entry));
15398 s->n = n;
15399 s->mode = mode;
15400 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15401
15402 s->next = ix86_stack_locals;
15403 ix86_stack_locals = s;
15404 return s->rtl;
15405 }
15406
15407 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15408
15409 static GTY(()) rtx ix86_tls_symbol;
15410 rtx
15411 ix86_tls_get_addr (void)
15412 {
15413
15414 if (!ix86_tls_symbol)
15415 {
15416 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15417 (TARGET_ANY_GNU_TLS
15418 && !TARGET_64BIT)
15419 ? "___tls_get_addr"
15420 : "__tls_get_addr");
15421 }
15422
15423 return ix86_tls_symbol;
15424 }
15425
15426 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15427
15428 static GTY(()) rtx ix86_tls_module_base_symbol;
15429 rtx
15430 ix86_tls_module_base (void)
15431 {
15432
15433 if (!ix86_tls_module_base_symbol)
15434 {
15435 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15436 "_TLS_MODULE_BASE_");
15437 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15438 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15439 }
15440
15441 return ix86_tls_module_base_symbol;
15442 }
15443 \f
15444 /* Calculate the length of the memory address in the instruction
15445 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15446
15447 int
15448 memory_address_length (rtx addr)
15449 {
15450 struct ix86_address parts;
15451 rtx base, index, disp;
15452 int len;
15453 int ok;
15454
15455 if (GET_CODE (addr) == PRE_DEC
15456 || GET_CODE (addr) == POST_INC
15457 || GET_CODE (addr) == PRE_MODIFY
15458 || GET_CODE (addr) == POST_MODIFY)
15459 return 0;
15460
15461 ok = ix86_decompose_address (addr, &parts);
15462 gcc_assert (ok);
15463
15464 if (parts.base && GET_CODE (parts.base) == SUBREG)
15465 parts.base = SUBREG_REG (parts.base);
15466 if (parts.index && GET_CODE (parts.index) == SUBREG)
15467 parts.index = SUBREG_REG (parts.index);
15468
15469 base = parts.base;
15470 index = parts.index;
15471 disp = parts.disp;
15472 len = 0;
15473
15474 /* Rule of thumb:
15475 - esp as the base always wants an index,
15476 - ebp as the base always wants a displacement. */
15477
15478 /* Register Indirect. */
15479 if (base && !index && !disp)
15480 {
15481 /* esp (for its index) and ebp (for its displacement) need
15482 the two-byte modrm form. */
15483 if (addr == stack_pointer_rtx
15484 || addr == arg_pointer_rtx
15485 || addr == frame_pointer_rtx
15486 || addr == hard_frame_pointer_rtx)
15487 len = 1;
15488 }
15489
15490 /* Direct Addressing. */
15491 else if (disp && !base && !index)
15492 len = 4;
15493
15494 else
15495 {
15496 /* Find the length of the displacement constant. */
15497 if (disp)
15498 {
15499 if (base && satisfies_constraint_K (disp))
15500 len = 1;
15501 else
15502 len = 4;
15503 }
15504 /* ebp always wants a displacement. */
15505 else if (base == hard_frame_pointer_rtx)
15506 len = 1;
15507
15508 /* An index requires the two-byte modrm form.... */
15509 if (index
15510 /* ...like esp, which always wants an index. */
15511 || base == stack_pointer_rtx
15512 || base == arg_pointer_rtx
15513 || base == frame_pointer_rtx)
15514 len += 1;
15515 }
15516
15517 return len;
15518 }
15519
15520 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15521 is set, expect that insn have 8bit immediate alternative. */
15522 int
15523 ix86_attr_length_immediate_default (rtx insn, int shortform)
15524 {
15525 int len = 0;
15526 int i;
15527 extract_insn_cached (insn);
15528 for (i = recog_data.n_operands - 1; i >= 0; --i)
15529 if (CONSTANT_P (recog_data.operand[i]))
15530 {
15531 gcc_assert (!len);
15532 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15533 len = 1;
15534 else
15535 {
15536 switch (get_attr_mode (insn))
15537 {
15538 case MODE_QI:
15539 len+=1;
15540 break;
15541 case MODE_HI:
15542 len+=2;
15543 break;
15544 case MODE_SI:
15545 len+=4;
15546 break;
15547 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15548 case MODE_DI:
15549 len+=4;
15550 break;
15551 default:
15552 fatal_insn ("unknown insn mode", insn);
15553 }
15554 }
15555 }
15556 return len;
15557 }
15558 /* Compute default value for "length_address" attribute. */
15559 int
15560 ix86_attr_length_address_default (rtx insn)
15561 {
15562 int i;
15563
15564 if (get_attr_type (insn) == TYPE_LEA)
15565 {
15566 rtx set = PATTERN (insn);
15567
15568 if (GET_CODE (set) == PARALLEL)
15569 set = XVECEXP (set, 0, 0);
15570
15571 gcc_assert (GET_CODE (set) == SET);
15572
15573 return memory_address_length (SET_SRC (set));
15574 }
15575
15576 extract_insn_cached (insn);
15577 for (i = recog_data.n_operands - 1; i >= 0; --i)
15578 if (MEM_P (recog_data.operand[i]))
15579 {
15580 return memory_address_length (XEXP (recog_data.operand[i], 0));
15581 break;
15582 }
15583 return 0;
15584 }
15585 \f
15586 /* Return the maximum number of instructions a cpu can issue. */
15587
15588 static int
15589 ix86_issue_rate (void)
15590 {
15591 switch (ix86_tune)
15592 {
15593 case PROCESSOR_PENTIUM:
15594 case PROCESSOR_K6:
15595 return 2;
15596
15597 case PROCESSOR_PENTIUMPRO:
15598 case PROCESSOR_PENTIUM4:
15599 case PROCESSOR_ATHLON:
15600 case PROCESSOR_K8:
15601 case PROCESSOR_AMDFAM10:
15602 case PROCESSOR_NOCONA:
15603 case PROCESSOR_GENERIC32:
15604 case PROCESSOR_GENERIC64:
15605 return 3;
15606
15607 case PROCESSOR_CORE2:
15608 return 4;
15609
15610 default:
15611 return 1;
15612 }
15613 }
15614
15615 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15616 by DEP_INSN and nothing set by DEP_INSN. */
15617
15618 static int
15619 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15620 {
15621 rtx set, set2;
15622
15623 /* Simplify the test for uninteresting insns. */
15624 if (insn_type != TYPE_SETCC
15625 && insn_type != TYPE_ICMOV
15626 && insn_type != TYPE_FCMOV
15627 && insn_type != TYPE_IBR)
15628 return 0;
15629
15630 if ((set = single_set (dep_insn)) != 0)
15631 {
15632 set = SET_DEST (set);
15633 set2 = NULL_RTX;
15634 }
15635 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15636 && XVECLEN (PATTERN (dep_insn), 0) == 2
15637 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15638 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15639 {
15640 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15641 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15642 }
15643 else
15644 return 0;
15645
15646 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15647 return 0;
15648
15649 /* This test is true if the dependent insn reads the flags but
15650 not any other potentially set register. */
15651 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15652 return 0;
15653
15654 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15655 return 0;
15656
15657 return 1;
15658 }
15659
15660 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15661 address with operands set by DEP_INSN. */
15662
15663 static int
15664 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15665 {
15666 rtx addr;
15667
15668 if (insn_type == TYPE_LEA
15669 && TARGET_PENTIUM)
15670 {
15671 addr = PATTERN (insn);
15672
15673 if (GET_CODE (addr) == PARALLEL)
15674 addr = XVECEXP (addr, 0, 0);
15675
15676 gcc_assert (GET_CODE (addr) == SET);
15677
15678 addr = SET_SRC (addr);
15679 }
15680 else
15681 {
15682 int i;
15683 extract_insn_cached (insn);
15684 for (i = recog_data.n_operands - 1; i >= 0; --i)
15685 if (MEM_P (recog_data.operand[i]))
15686 {
15687 addr = XEXP (recog_data.operand[i], 0);
15688 goto found;
15689 }
15690 return 0;
15691 found:;
15692 }
15693
15694 return modified_in_p (addr, dep_insn);
15695 }
15696
15697 static int
15698 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15699 {
15700 enum attr_type insn_type, dep_insn_type;
15701 enum attr_memory memory;
15702 rtx set, set2;
15703 int dep_insn_code_number;
15704
15705 /* Anti and output dependencies have zero cost on all CPUs. */
15706 if (REG_NOTE_KIND (link) != 0)
15707 return 0;
15708
15709 dep_insn_code_number = recog_memoized (dep_insn);
15710
15711 /* If we can't recognize the insns, we can't really do anything. */
15712 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15713 return cost;
15714
15715 insn_type = get_attr_type (insn);
15716 dep_insn_type = get_attr_type (dep_insn);
15717
15718 switch (ix86_tune)
15719 {
15720 case PROCESSOR_PENTIUM:
15721 /* Address Generation Interlock adds a cycle of latency. */
15722 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15723 cost += 1;
15724
15725 /* ??? Compares pair with jump/setcc. */
15726 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15727 cost = 0;
15728
15729 /* Floating point stores require value to be ready one cycle earlier. */
15730 if (insn_type == TYPE_FMOV
15731 && get_attr_memory (insn) == MEMORY_STORE
15732 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15733 cost += 1;
15734 break;
15735
15736 case PROCESSOR_PENTIUMPRO:
15737 memory = get_attr_memory (insn);
15738
15739 /* INT->FP conversion is expensive. */
15740 if (get_attr_fp_int_src (dep_insn))
15741 cost += 5;
15742
15743 /* There is one cycle extra latency between an FP op and a store. */
15744 if (insn_type == TYPE_FMOV
15745 && (set = single_set (dep_insn)) != NULL_RTX
15746 && (set2 = single_set (insn)) != NULL_RTX
15747 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15748 && MEM_P (SET_DEST (set2)))
15749 cost += 1;
15750
15751 /* Show ability of reorder buffer to hide latency of load by executing
15752 in parallel with previous instruction in case
15753 previous instruction is not needed to compute the address. */
15754 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15755 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15756 {
15757 /* Claim moves to take one cycle, as core can issue one load
15758 at time and the next load can start cycle later. */
15759 if (dep_insn_type == TYPE_IMOV
15760 || dep_insn_type == TYPE_FMOV)
15761 cost = 1;
15762 else if (cost > 1)
15763 cost--;
15764 }
15765 break;
15766
15767 case PROCESSOR_K6:
15768 memory = get_attr_memory (insn);
15769
15770 /* The esp dependency is resolved before the instruction is really
15771 finished. */
15772 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15773 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15774 return 1;
15775
15776 /* INT->FP conversion is expensive. */
15777 if (get_attr_fp_int_src (dep_insn))
15778 cost += 5;
15779
15780 /* Show ability of reorder buffer to hide latency of load by executing
15781 in parallel with previous instruction in case
15782 previous instruction is not needed to compute the address. */
15783 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15784 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15785 {
15786 /* Claim moves to take one cycle, as core can issue one load
15787 at time and the next load can start cycle later. */
15788 if (dep_insn_type == TYPE_IMOV
15789 || dep_insn_type == TYPE_FMOV)
15790 cost = 1;
15791 else if (cost > 2)
15792 cost -= 2;
15793 else
15794 cost = 1;
15795 }
15796 break;
15797
15798 case PROCESSOR_ATHLON:
15799 case PROCESSOR_K8:
15800 case PROCESSOR_AMDFAM10:
15801 case PROCESSOR_GENERIC32:
15802 case PROCESSOR_GENERIC64:
15803 memory = get_attr_memory (insn);
15804
15805 /* Show ability of reorder buffer to hide latency of load by executing
15806 in parallel with previous instruction in case
15807 previous instruction is not needed to compute the address. */
15808 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15809 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15810 {
15811 enum attr_unit unit = get_attr_unit (insn);
15812 int loadcost = 3;
15813
15814 /* Because of the difference between the length of integer and
15815 floating unit pipeline preparation stages, the memory operands
15816 for floating point are cheaper.
15817
15818 ??? For Athlon it the difference is most probably 2. */
15819 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15820 loadcost = 3;
15821 else
15822 loadcost = TARGET_ATHLON ? 2 : 0;
15823
15824 if (cost >= loadcost)
15825 cost -= loadcost;
15826 else
15827 cost = 0;
15828 }
15829
15830 default:
15831 break;
15832 }
15833
15834 return cost;
15835 }
15836
15837 /* How many alternative schedules to try. This should be as wide as the
15838 scheduling freedom in the DFA, but no wider. Making this value too
15839 large results extra work for the scheduler. */
15840
15841 static int
15842 ia32_multipass_dfa_lookahead (void)
15843 {
15844 if (ix86_tune == PROCESSOR_PENTIUM)
15845 return 2;
15846
15847 if (ix86_tune == PROCESSOR_PENTIUMPRO
15848 || ix86_tune == PROCESSOR_K6)
15849 return 1;
15850
15851 else
15852 return 0;
15853 }
15854
15855 \f
15856 /* Compute the alignment given to a constant that is being placed in memory.
15857 EXP is the constant and ALIGN is the alignment that the object would
15858 ordinarily have.
15859 The value of this function is used instead of that alignment to align
15860 the object. */
15861
15862 int
15863 ix86_constant_alignment (tree exp, int align)
15864 {
15865 if (TREE_CODE (exp) == REAL_CST)
15866 {
15867 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15868 return 64;
15869 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15870 return 128;
15871 }
15872 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15873 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15874 return BITS_PER_WORD;
15875
15876 return align;
15877 }
15878
15879 /* Compute the alignment for a static variable.
15880 TYPE is the data type, and ALIGN is the alignment that
15881 the object would ordinarily have. The value of this function is used
15882 instead of that alignment to align the object. */
15883
15884 int
15885 ix86_data_alignment (tree type, int align)
15886 {
15887 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15888
15889 if (AGGREGATE_TYPE_P (type)
15890 && TYPE_SIZE (type)
15891 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15892 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15893 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15894 && align < max_align)
15895 align = max_align;
15896
15897 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15898 to 16byte boundary. */
15899 if (TARGET_64BIT)
15900 {
15901 if (AGGREGATE_TYPE_P (type)
15902 && TYPE_SIZE (type)
15903 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15904 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15905 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15906 return 128;
15907 }
15908
15909 if (TREE_CODE (type) == ARRAY_TYPE)
15910 {
15911 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15912 return 64;
15913 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15914 return 128;
15915 }
15916 else if (TREE_CODE (type) == COMPLEX_TYPE)
15917 {
15918
15919 if (TYPE_MODE (type) == DCmode && align < 64)
15920 return 64;
15921 if (TYPE_MODE (type) == XCmode && align < 128)
15922 return 128;
15923 }
15924 else if ((TREE_CODE (type) == RECORD_TYPE
15925 || TREE_CODE (type) == UNION_TYPE
15926 || TREE_CODE (type) == QUAL_UNION_TYPE)
15927 && TYPE_FIELDS (type))
15928 {
15929 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15930 return 64;
15931 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15932 return 128;
15933 }
15934 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15935 || TREE_CODE (type) == INTEGER_TYPE)
15936 {
15937 if (TYPE_MODE (type) == DFmode && align < 64)
15938 return 64;
15939 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15940 return 128;
15941 }
15942
15943 return align;
15944 }
15945
15946 /* Compute the alignment for a local variable.
15947 TYPE is the data type, and ALIGN is the alignment that
15948 the object would ordinarily have. The value of this macro is used
15949 instead of that alignment to align the object. */
15950
15951 int
15952 ix86_local_alignment (tree type, int align)
15953 {
15954 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15955 to 16byte boundary. */
15956 if (TARGET_64BIT)
15957 {
15958 if (AGGREGATE_TYPE_P (type)
15959 && TYPE_SIZE (type)
15960 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15961 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15962 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15963 return 128;
15964 }
15965 if (TREE_CODE (type) == ARRAY_TYPE)
15966 {
15967 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15968 return 64;
15969 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15970 return 128;
15971 }
15972 else if (TREE_CODE (type) == COMPLEX_TYPE)
15973 {
15974 if (TYPE_MODE (type) == DCmode && align < 64)
15975 return 64;
15976 if (TYPE_MODE (type) == XCmode && align < 128)
15977 return 128;
15978 }
15979 else if ((TREE_CODE (type) == RECORD_TYPE
15980 || TREE_CODE (type) == UNION_TYPE
15981 || TREE_CODE (type) == QUAL_UNION_TYPE)
15982 && TYPE_FIELDS (type))
15983 {
15984 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15985 return 64;
15986 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15987 return 128;
15988 }
15989 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15990 || TREE_CODE (type) == INTEGER_TYPE)
15991 {
15992
15993 if (TYPE_MODE (type) == DFmode && align < 64)
15994 return 64;
15995 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15996 return 128;
15997 }
15998 return align;
15999 }
16000 \f
16001 /* Emit RTL insns to initialize the variable parts of a trampoline.
16002 FNADDR is an RTX for the address of the function's pure code.
16003 CXT is an RTX for the static chain value for the function. */
16004 void
16005 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16006 {
16007 if (!TARGET_64BIT)
16008 {
16009 /* Compute offset from the end of the jmp to the target function. */
16010 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16011 plus_constant (tramp, 10),
16012 NULL_RTX, 1, OPTAB_DIRECT);
16013 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16014 gen_int_mode (0xb9, QImode));
16015 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16016 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16017 gen_int_mode (0xe9, QImode));
16018 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16019 }
16020 else
16021 {
16022 int offset = 0;
16023 /* Try to load address using shorter movl instead of movabs.
16024 We may want to support movq for kernel mode, but kernel does not use
16025 trampolines at the moment. */
16026 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16027 {
16028 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16029 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16030 gen_int_mode (0xbb41, HImode));
16031 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16032 gen_lowpart (SImode, fnaddr));
16033 offset += 6;
16034 }
16035 else
16036 {
16037 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16038 gen_int_mode (0xbb49, HImode));
16039 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16040 fnaddr);
16041 offset += 10;
16042 }
16043 /* Load static chain using movabs to r10. */
16044 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16045 gen_int_mode (0xba49, HImode));
16046 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16047 cxt);
16048 offset += 10;
16049 /* Jump to the r11 */
16050 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16051 gen_int_mode (0xff49, HImode));
16052 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16053 gen_int_mode (0xe3, QImode));
16054 offset += 3;
16055 gcc_assert (offset <= TRAMPOLINE_SIZE);
16056 }
16057
16058 #ifdef ENABLE_EXECUTE_STACK
16059 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16060 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16061 #endif
16062 }
16063 \f
16064 /* Codes for all the SSE/MMX builtins. */
16065 enum ix86_builtins
16066 {
16067 IX86_BUILTIN_ADDPS,
16068 IX86_BUILTIN_ADDSS,
16069 IX86_BUILTIN_DIVPS,
16070 IX86_BUILTIN_DIVSS,
16071 IX86_BUILTIN_MULPS,
16072 IX86_BUILTIN_MULSS,
16073 IX86_BUILTIN_SUBPS,
16074 IX86_BUILTIN_SUBSS,
16075
16076 IX86_BUILTIN_CMPEQPS,
16077 IX86_BUILTIN_CMPLTPS,
16078 IX86_BUILTIN_CMPLEPS,
16079 IX86_BUILTIN_CMPGTPS,
16080 IX86_BUILTIN_CMPGEPS,
16081 IX86_BUILTIN_CMPNEQPS,
16082 IX86_BUILTIN_CMPNLTPS,
16083 IX86_BUILTIN_CMPNLEPS,
16084 IX86_BUILTIN_CMPNGTPS,
16085 IX86_BUILTIN_CMPNGEPS,
16086 IX86_BUILTIN_CMPORDPS,
16087 IX86_BUILTIN_CMPUNORDPS,
16088 IX86_BUILTIN_CMPEQSS,
16089 IX86_BUILTIN_CMPLTSS,
16090 IX86_BUILTIN_CMPLESS,
16091 IX86_BUILTIN_CMPNEQSS,
16092 IX86_BUILTIN_CMPNLTSS,
16093 IX86_BUILTIN_CMPNLESS,
16094 IX86_BUILTIN_CMPNGTSS,
16095 IX86_BUILTIN_CMPNGESS,
16096 IX86_BUILTIN_CMPORDSS,
16097 IX86_BUILTIN_CMPUNORDSS,
16098
16099 IX86_BUILTIN_COMIEQSS,
16100 IX86_BUILTIN_COMILTSS,
16101 IX86_BUILTIN_COMILESS,
16102 IX86_BUILTIN_COMIGTSS,
16103 IX86_BUILTIN_COMIGESS,
16104 IX86_BUILTIN_COMINEQSS,
16105 IX86_BUILTIN_UCOMIEQSS,
16106 IX86_BUILTIN_UCOMILTSS,
16107 IX86_BUILTIN_UCOMILESS,
16108 IX86_BUILTIN_UCOMIGTSS,
16109 IX86_BUILTIN_UCOMIGESS,
16110 IX86_BUILTIN_UCOMINEQSS,
16111
16112 IX86_BUILTIN_CVTPI2PS,
16113 IX86_BUILTIN_CVTPS2PI,
16114 IX86_BUILTIN_CVTSI2SS,
16115 IX86_BUILTIN_CVTSI642SS,
16116 IX86_BUILTIN_CVTSS2SI,
16117 IX86_BUILTIN_CVTSS2SI64,
16118 IX86_BUILTIN_CVTTPS2PI,
16119 IX86_BUILTIN_CVTTSS2SI,
16120 IX86_BUILTIN_CVTTSS2SI64,
16121
16122 IX86_BUILTIN_MAXPS,
16123 IX86_BUILTIN_MAXSS,
16124 IX86_BUILTIN_MINPS,
16125 IX86_BUILTIN_MINSS,
16126
16127 IX86_BUILTIN_LOADUPS,
16128 IX86_BUILTIN_STOREUPS,
16129 IX86_BUILTIN_MOVSS,
16130
16131 IX86_BUILTIN_MOVHLPS,
16132 IX86_BUILTIN_MOVLHPS,
16133 IX86_BUILTIN_LOADHPS,
16134 IX86_BUILTIN_LOADLPS,
16135 IX86_BUILTIN_STOREHPS,
16136 IX86_BUILTIN_STORELPS,
16137
16138 IX86_BUILTIN_MASKMOVQ,
16139 IX86_BUILTIN_MOVMSKPS,
16140 IX86_BUILTIN_PMOVMSKB,
16141
16142 IX86_BUILTIN_MOVNTPS,
16143 IX86_BUILTIN_MOVNTQ,
16144
16145 IX86_BUILTIN_LOADDQU,
16146 IX86_BUILTIN_STOREDQU,
16147
16148 IX86_BUILTIN_PACKSSWB,
16149 IX86_BUILTIN_PACKSSDW,
16150 IX86_BUILTIN_PACKUSWB,
16151
16152 IX86_BUILTIN_PADDB,
16153 IX86_BUILTIN_PADDW,
16154 IX86_BUILTIN_PADDD,
16155 IX86_BUILTIN_PADDQ,
16156 IX86_BUILTIN_PADDSB,
16157 IX86_BUILTIN_PADDSW,
16158 IX86_BUILTIN_PADDUSB,
16159 IX86_BUILTIN_PADDUSW,
16160 IX86_BUILTIN_PSUBB,
16161 IX86_BUILTIN_PSUBW,
16162 IX86_BUILTIN_PSUBD,
16163 IX86_BUILTIN_PSUBQ,
16164 IX86_BUILTIN_PSUBSB,
16165 IX86_BUILTIN_PSUBSW,
16166 IX86_BUILTIN_PSUBUSB,
16167 IX86_BUILTIN_PSUBUSW,
16168
16169 IX86_BUILTIN_PAND,
16170 IX86_BUILTIN_PANDN,
16171 IX86_BUILTIN_POR,
16172 IX86_BUILTIN_PXOR,
16173
16174 IX86_BUILTIN_PAVGB,
16175 IX86_BUILTIN_PAVGW,
16176
16177 IX86_BUILTIN_PCMPEQB,
16178 IX86_BUILTIN_PCMPEQW,
16179 IX86_BUILTIN_PCMPEQD,
16180 IX86_BUILTIN_PCMPGTB,
16181 IX86_BUILTIN_PCMPGTW,
16182 IX86_BUILTIN_PCMPGTD,
16183
16184 IX86_BUILTIN_PMADDWD,
16185
16186 IX86_BUILTIN_PMAXSW,
16187 IX86_BUILTIN_PMAXUB,
16188 IX86_BUILTIN_PMINSW,
16189 IX86_BUILTIN_PMINUB,
16190
16191 IX86_BUILTIN_PMULHUW,
16192 IX86_BUILTIN_PMULHW,
16193 IX86_BUILTIN_PMULLW,
16194
16195 IX86_BUILTIN_PSADBW,
16196 IX86_BUILTIN_PSHUFW,
16197
16198 IX86_BUILTIN_PSLLW,
16199 IX86_BUILTIN_PSLLD,
16200 IX86_BUILTIN_PSLLQ,
16201 IX86_BUILTIN_PSRAW,
16202 IX86_BUILTIN_PSRAD,
16203 IX86_BUILTIN_PSRLW,
16204 IX86_BUILTIN_PSRLD,
16205 IX86_BUILTIN_PSRLQ,
16206 IX86_BUILTIN_PSLLWI,
16207 IX86_BUILTIN_PSLLDI,
16208 IX86_BUILTIN_PSLLQI,
16209 IX86_BUILTIN_PSRAWI,
16210 IX86_BUILTIN_PSRADI,
16211 IX86_BUILTIN_PSRLWI,
16212 IX86_BUILTIN_PSRLDI,
16213 IX86_BUILTIN_PSRLQI,
16214
16215 IX86_BUILTIN_PUNPCKHBW,
16216 IX86_BUILTIN_PUNPCKHWD,
16217 IX86_BUILTIN_PUNPCKHDQ,
16218 IX86_BUILTIN_PUNPCKLBW,
16219 IX86_BUILTIN_PUNPCKLWD,
16220 IX86_BUILTIN_PUNPCKLDQ,
16221
16222 IX86_BUILTIN_SHUFPS,
16223
16224 IX86_BUILTIN_RCPPS,
16225 IX86_BUILTIN_RCPSS,
16226 IX86_BUILTIN_RSQRTPS,
16227 IX86_BUILTIN_RSQRTSS,
16228 IX86_BUILTIN_SQRTPS,
16229 IX86_BUILTIN_SQRTSS,
16230
16231 IX86_BUILTIN_UNPCKHPS,
16232 IX86_BUILTIN_UNPCKLPS,
16233
16234 IX86_BUILTIN_ANDPS,
16235 IX86_BUILTIN_ANDNPS,
16236 IX86_BUILTIN_ORPS,
16237 IX86_BUILTIN_XORPS,
16238
16239 IX86_BUILTIN_EMMS,
16240 IX86_BUILTIN_LDMXCSR,
16241 IX86_BUILTIN_STMXCSR,
16242 IX86_BUILTIN_SFENCE,
16243
16244 /* 3DNow! Original */
16245 IX86_BUILTIN_FEMMS,
16246 IX86_BUILTIN_PAVGUSB,
16247 IX86_BUILTIN_PF2ID,
16248 IX86_BUILTIN_PFACC,
16249 IX86_BUILTIN_PFADD,
16250 IX86_BUILTIN_PFCMPEQ,
16251 IX86_BUILTIN_PFCMPGE,
16252 IX86_BUILTIN_PFCMPGT,
16253 IX86_BUILTIN_PFMAX,
16254 IX86_BUILTIN_PFMIN,
16255 IX86_BUILTIN_PFMUL,
16256 IX86_BUILTIN_PFRCP,
16257 IX86_BUILTIN_PFRCPIT1,
16258 IX86_BUILTIN_PFRCPIT2,
16259 IX86_BUILTIN_PFRSQIT1,
16260 IX86_BUILTIN_PFRSQRT,
16261 IX86_BUILTIN_PFSUB,
16262 IX86_BUILTIN_PFSUBR,
16263 IX86_BUILTIN_PI2FD,
16264 IX86_BUILTIN_PMULHRW,
16265
16266 /* 3DNow! Athlon Extensions */
16267 IX86_BUILTIN_PF2IW,
16268 IX86_BUILTIN_PFNACC,
16269 IX86_BUILTIN_PFPNACC,
16270 IX86_BUILTIN_PI2FW,
16271 IX86_BUILTIN_PSWAPDSI,
16272 IX86_BUILTIN_PSWAPDSF,
16273
16274 /* SSE2 */
16275 IX86_BUILTIN_ADDPD,
16276 IX86_BUILTIN_ADDSD,
16277 IX86_BUILTIN_DIVPD,
16278 IX86_BUILTIN_DIVSD,
16279 IX86_BUILTIN_MULPD,
16280 IX86_BUILTIN_MULSD,
16281 IX86_BUILTIN_SUBPD,
16282 IX86_BUILTIN_SUBSD,
16283
16284 IX86_BUILTIN_CMPEQPD,
16285 IX86_BUILTIN_CMPLTPD,
16286 IX86_BUILTIN_CMPLEPD,
16287 IX86_BUILTIN_CMPGTPD,
16288 IX86_BUILTIN_CMPGEPD,
16289 IX86_BUILTIN_CMPNEQPD,
16290 IX86_BUILTIN_CMPNLTPD,
16291 IX86_BUILTIN_CMPNLEPD,
16292 IX86_BUILTIN_CMPNGTPD,
16293 IX86_BUILTIN_CMPNGEPD,
16294 IX86_BUILTIN_CMPORDPD,
16295 IX86_BUILTIN_CMPUNORDPD,
16296 IX86_BUILTIN_CMPEQSD,
16297 IX86_BUILTIN_CMPLTSD,
16298 IX86_BUILTIN_CMPLESD,
16299 IX86_BUILTIN_CMPNEQSD,
16300 IX86_BUILTIN_CMPNLTSD,
16301 IX86_BUILTIN_CMPNLESD,
16302 IX86_BUILTIN_CMPORDSD,
16303 IX86_BUILTIN_CMPUNORDSD,
16304
16305 IX86_BUILTIN_COMIEQSD,
16306 IX86_BUILTIN_COMILTSD,
16307 IX86_BUILTIN_COMILESD,
16308 IX86_BUILTIN_COMIGTSD,
16309 IX86_BUILTIN_COMIGESD,
16310 IX86_BUILTIN_COMINEQSD,
16311 IX86_BUILTIN_UCOMIEQSD,
16312 IX86_BUILTIN_UCOMILTSD,
16313 IX86_BUILTIN_UCOMILESD,
16314 IX86_BUILTIN_UCOMIGTSD,
16315 IX86_BUILTIN_UCOMIGESD,
16316 IX86_BUILTIN_UCOMINEQSD,
16317
16318 IX86_BUILTIN_MAXPD,
16319 IX86_BUILTIN_MAXSD,
16320 IX86_BUILTIN_MINPD,
16321 IX86_BUILTIN_MINSD,
16322
16323 IX86_BUILTIN_ANDPD,
16324 IX86_BUILTIN_ANDNPD,
16325 IX86_BUILTIN_ORPD,
16326 IX86_BUILTIN_XORPD,
16327
16328 IX86_BUILTIN_SQRTPD,
16329 IX86_BUILTIN_SQRTSD,
16330
16331 IX86_BUILTIN_UNPCKHPD,
16332 IX86_BUILTIN_UNPCKLPD,
16333
16334 IX86_BUILTIN_SHUFPD,
16335
16336 IX86_BUILTIN_LOADUPD,
16337 IX86_BUILTIN_STOREUPD,
16338 IX86_BUILTIN_MOVSD,
16339
16340 IX86_BUILTIN_LOADHPD,
16341 IX86_BUILTIN_LOADLPD,
16342
16343 IX86_BUILTIN_CVTDQ2PD,
16344 IX86_BUILTIN_CVTDQ2PS,
16345
16346 IX86_BUILTIN_CVTPD2DQ,
16347 IX86_BUILTIN_CVTPD2PI,
16348 IX86_BUILTIN_CVTPD2PS,
16349 IX86_BUILTIN_CVTTPD2DQ,
16350 IX86_BUILTIN_CVTTPD2PI,
16351
16352 IX86_BUILTIN_CVTPI2PD,
16353 IX86_BUILTIN_CVTSI2SD,
16354 IX86_BUILTIN_CVTSI642SD,
16355
16356 IX86_BUILTIN_CVTSD2SI,
16357 IX86_BUILTIN_CVTSD2SI64,
16358 IX86_BUILTIN_CVTSD2SS,
16359 IX86_BUILTIN_CVTSS2SD,
16360 IX86_BUILTIN_CVTTSD2SI,
16361 IX86_BUILTIN_CVTTSD2SI64,
16362
16363 IX86_BUILTIN_CVTPS2DQ,
16364 IX86_BUILTIN_CVTPS2PD,
16365 IX86_BUILTIN_CVTTPS2DQ,
16366
16367 IX86_BUILTIN_MOVNTI,
16368 IX86_BUILTIN_MOVNTPD,
16369 IX86_BUILTIN_MOVNTDQ,
16370
16371 /* SSE2 MMX */
16372 IX86_BUILTIN_MASKMOVDQU,
16373 IX86_BUILTIN_MOVMSKPD,
16374 IX86_BUILTIN_PMOVMSKB128,
16375
16376 IX86_BUILTIN_PACKSSWB128,
16377 IX86_BUILTIN_PACKSSDW128,
16378 IX86_BUILTIN_PACKUSWB128,
16379
16380 IX86_BUILTIN_PADDB128,
16381 IX86_BUILTIN_PADDW128,
16382 IX86_BUILTIN_PADDD128,
16383 IX86_BUILTIN_PADDQ128,
16384 IX86_BUILTIN_PADDSB128,
16385 IX86_BUILTIN_PADDSW128,
16386 IX86_BUILTIN_PADDUSB128,
16387 IX86_BUILTIN_PADDUSW128,
16388 IX86_BUILTIN_PSUBB128,
16389 IX86_BUILTIN_PSUBW128,
16390 IX86_BUILTIN_PSUBD128,
16391 IX86_BUILTIN_PSUBQ128,
16392 IX86_BUILTIN_PSUBSB128,
16393 IX86_BUILTIN_PSUBSW128,
16394 IX86_BUILTIN_PSUBUSB128,
16395 IX86_BUILTIN_PSUBUSW128,
16396
16397 IX86_BUILTIN_PAND128,
16398 IX86_BUILTIN_PANDN128,
16399 IX86_BUILTIN_POR128,
16400 IX86_BUILTIN_PXOR128,
16401
16402 IX86_BUILTIN_PAVGB128,
16403 IX86_BUILTIN_PAVGW128,
16404
16405 IX86_BUILTIN_PCMPEQB128,
16406 IX86_BUILTIN_PCMPEQW128,
16407 IX86_BUILTIN_PCMPEQD128,
16408 IX86_BUILTIN_PCMPGTB128,
16409 IX86_BUILTIN_PCMPGTW128,
16410 IX86_BUILTIN_PCMPGTD128,
16411
16412 IX86_BUILTIN_PMADDWD128,
16413
16414 IX86_BUILTIN_PMAXSW128,
16415 IX86_BUILTIN_PMAXUB128,
16416 IX86_BUILTIN_PMINSW128,
16417 IX86_BUILTIN_PMINUB128,
16418
16419 IX86_BUILTIN_PMULUDQ,
16420 IX86_BUILTIN_PMULUDQ128,
16421 IX86_BUILTIN_PMULHUW128,
16422 IX86_BUILTIN_PMULHW128,
16423 IX86_BUILTIN_PMULLW128,
16424
16425 IX86_BUILTIN_PSADBW128,
16426 IX86_BUILTIN_PSHUFHW,
16427 IX86_BUILTIN_PSHUFLW,
16428 IX86_BUILTIN_PSHUFD,
16429
16430 IX86_BUILTIN_PSLLDQI128,
16431 IX86_BUILTIN_PSLLWI128,
16432 IX86_BUILTIN_PSLLDI128,
16433 IX86_BUILTIN_PSLLQI128,
16434 IX86_BUILTIN_PSRAWI128,
16435 IX86_BUILTIN_PSRADI128,
16436 IX86_BUILTIN_PSRLDQI128,
16437 IX86_BUILTIN_PSRLWI128,
16438 IX86_BUILTIN_PSRLDI128,
16439 IX86_BUILTIN_PSRLQI128,
16440
16441 IX86_BUILTIN_PSLLDQ128,
16442 IX86_BUILTIN_PSLLW128,
16443 IX86_BUILTIN_PSLLD128,
16444 IX86_BUILTIN_PSLLQ128,
16445 IX86_BUILTIN_PSRAW128,
16446 IX86_BUILTIN_PSRAD128,
16447 IX86_BUILTIN_PSRLW128,
16448 IX86_BUILTIN_PSRLD128,
16449 IX86_BUILTIN_PSRLQ128,
16450
16451 IX86_BUILTIN_PUNPCKHBW128,
16452 IX86_BUILTIN_PUNPCKHWD128,
16453 IX86_BUILTIN_PUNPCKHDQ128,
16454 IX86_BUILTIN_PUNPCKHQDQ128,
16455 IX86_BUILTIN_PUNPCKLBW128,
16456 IX86_BUILTIN_PUNPCKLWD128,
16457 IX86_BUILTIN_PUNPCKLDQ128,
16458 IX86_BUILTIN_PUNPCKLQDQ128,
16459
16460 IX86_BUILTIN_CLFLUSH,
16461 IX86_BUILTIN_MFENCE,
16462 IX86_BUILTIN_LFENCE,
16463
16464 /* Prescott New Instructions. */
16465 IX86_BUILTIN_ADDSUBPS,
16466 IX86_BUILTIN_HADDPS,
16467 IX86_BUILTIN_HSUBPS,
16468 IX86_BUILTIN_MOVSHDUP,
16469 IX86_BUILTIN_MOVSLDUP,
16470 IX86_BUILTIN_ADDSUBPD,
16471 IX86_BUILTIN_HADDPD,
16472 IX86_BUILTIN_HSUBPD,
16473 IX86_BUILTIN_LDDQU,
16474
16475 IX86_BUILTIN_MONITOR,
16476 IX86_BUILTIN_MWAIT,
16477
16478 /* SSSE3. */
16479 IX86_BUILTIN_PHADDW,
16480 IX86_BUILTIN_PHADDD,
16481 IX86_BUILTIN_PHADDSW,
16482 IX86_BUILTIN_PHSUBW,
16483 IX86_BUILTIN_PHSUBD,
16484 IX86_BUILTIN_PHSUBSW,
16485 IX86_BUILTIN_PMADDUBSW,
16486 IX86_BUILTIN_PMULHRSW,
16487 IX86_BUILTIN_PSHUFB,
16488 IX86_BUILTIN_PSIGNB,
16489 IX86_BUILTIN_PSIGNW,
16490 IX86_BUILTIN_PSIGND,
16491 IX86_BUILTIN_PALIGNR,
16492 IX86_BUILTIN_PABSB,
16493 IX86_BUILTIN_PABSW,
16494 IX86_BUILTIN_PABSD,
16495
16496 IX86_BUILTIN_PHADDW128,
16497 IX86_BUILTIN_PHADDD128,
16498 IX86_BUILTIN_PHADDSW128,
16499 IX86_BUILTIN_PHSUBW128,
16500 IX86_BUILTIN_PHSUBD128,
16501 IX86_BUILTIN_PHSUBSW128,
16502 IX86_BUILTIN_PMADDUBSW128,
16503 IX86_BUILTIN_PMULHRSW128,
16504 IX86_BUILTIN_PSHUFB128,
16505 IX86_BUILTIN_PSIGNB128,
16506 IX86_BUILTIN_PSIGNW128,
16507 IX86_BUILTIN_PSIGND128,
16508 IX86_BUILTIN_PALIGNR128,
16509 IX86_BUILTIN_PABSB128,
16510 IX86_BUILTIN_PABSW128,
16511 IX86_BUILTIN_PABSD128,
16512
16513 /* AMDFAM10 - SSE4A New Instructions. */
16514 IX86_BUILTIN_MOVNTSD,
16515 IX86_BUILTIN_MOVNTSS,
16516 IX86_BUILTIN_EXTRQI,
16517 IX86_BUILTIN_EXTRQ,
16518 IX86_BUILTIN_INSERTQI,
16519 IX86_BUILTIN_INSERTQ,
16520
16521 /* SSE4.1. */
16522 IX86_BUILTIN_BLENDPD,
16523 IX86_BUILTIN_BLENDPS,
16524 IX86_BUILTIN_BLENDVPD,
16525 IX86_BUILTIN_BLENDVPS,
16526 IX86_BUILTIN_PBLENDVB128,
16527 IX86_BUILTIN_PBLENDW128,
16528
16529 IX86_BUILTIN_DPPD,
16530 IX86_BUILTIN_DPPS,
16531
16532 IX86_BUILTIN_INSERTPS128,
16533
16534 IX86_BUILTIN_MOVNTDQA,
16535 IX86_BUILTIN_MPSADBW128,
16536 IX86_BUILTIN_PACKUSDW128,
16537 IX86_BUILTIN_PCMPEQQ,
16538 IX86_BUILTIN_PHMINPOSUW128,
16539
16540 IX86_BUILTIN_PMAXSB128,
16541 IX86_BUILTIN_PMAXSD128,
16542 IX86_BUILTIN_PMAXUD128,
16543 IX86_BUILTIN_PMAXUW128,
16544
16545 IX86_BUILTIN_PMINSB128,
16546 IX86_BUILTIN_PMINSD128,
16547 IX86_BUILTIN_PMINUD128,
16548 IX86_BUILTIN_PMINUW128,
16549
16550 IX86_BUILTIN_PMOVSXBW128,
16551 IX86_BUILTIN_PMOVSXBD128,
16552 IX86_BUILTIN_PMOVSXBQ128,
16553 IX86_BUILTIN_PMOVSXWD128,
16554 IX86_BUILTIN_PMOVSXWQ128,
16555 IX86_BUILTIN_PMOVSXDQ128,
16556
16557 IX86_BUILTIN_PMOVZXBW128,
16558 IX86_BUILTIN_PMOVZXBD128,
16559 IX86_BUILTIN_PMOVZXBQ128,
16560 IX86_BUILTIN_PMOVZXWD128,
16561 IX86_BUILTIN_PMOVZXWQ128,
16562 IX86_BUILTIN_PMOVZXDQ128,
16563
16564 IX86_BUILTIN_PMULDQ128,
16565 IX86_BUILTIN_PMULLD128,
16566
16567 IX86_BUILTIN_ROUNDPD,
16568 IX86_BUILTIN_ROUNDPS,
16569 IX86_BUILTIN_ROUNDSD,
16570 IX86_BUILTIN_ROUNDSS,
16571
16572 IX86_BUILTIN_PTESTZ,
16573 IX86_BUILTIN_PTESTC,
16574 IX86_BUILTIN_PTESTNZC,
16575
16576 IX86_BUILTIN_VEC_INIT_V2SI,
16577 IX86_BUILTIN_VEC_INIT_V4HI,
16578 IX86_BUILTIN_VEC_INIT_V8QI,
16579 IX86_BUILTIN_VEC_EXT_V2DF,
16580 IX86_BUILTIN_VEC_EXT_V2DI,
16581 IX86_BUILTIN_VEC_EXT_V4SF,
16582 IX86_BUILTIN_VEC_EXT_V4SI,
16583 IX86_BUILTIN_VEC_EXT_V8HI,
16584 IX86_BUILTIN_VEC_EXT_V2SI,
16585 IX86_BUILTIN_VEC_EXT_V4HI,
16586 IX86_BUILTIN_VEC_EXT_V16QI,
16587 IX86_BUILTIN_VEC_SET_V2DI,
16588 IX86_BUILTIN_VEC_SET_V4SF,
16589 IX86_BUILTIN_VEC_SET_V4SI,
16590 IX86_BUILTIN_VEC_SET_V8HI,
16591 IX86_BUILTIN_VEC_SET_V4HI,
16592 IX86_BUILTIN_VEC_SET_V16QI,
16593
16594 IX86_BUILTIN_MAX
16595 };
16596
16597 /* Table for the ix86 builtin decls. */
16598 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16599
16600 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16601 * if the target_flags include one of MASK. Stores the function decl
16602 * in the ix86_builtins array.
16603 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16604
16605 static inline tree
16606 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16607 {
16608 tree decl = NULL_TREE;
16609
16610 if (mask & target_flags
16611 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16612 {
16613 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16614 NULL, NULL_TREE);
16615 ix86_builtins[(int) code] = decl;
16616 }
16617
16618 return decl;
16619 }
16620
16621 /* Like def_builtin, but also marks the function decl "const". */
16622
16623 static inline tree
16624 def_builtin_const (int mask, const char *name, tree type,
16625 enum ix86_builtins code)
16626 {
16627 tree decl = def_builtin (mask, name, type, code);
16628 if (decl)
16629 TREE_READONLY (decl) = 1;
16630 return decl;
16631 }
16632
16633 /* Bits for builtin_description.flag. */
16634
16635 /* Set when we don't support the comparison natively, and should
16636 swap_comparison in order to support it. */
16637 #define BUILTIN_DESC_SWAP_OPERANDS 1
16638
16639 struct builtin_description
16640 {
16641 const unsigned int mask;
16642 const enum insn_code icode;
16643 const char *const name;
16644 const enum ix86_builtins code;
16645 const enum rtx_code comparison;
16646 const unsigned int flag;
16647 };
16648
16649 static const struct builtin_description bdesc_comi[] =
16650 {
16651 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16652 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16653 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16654 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16655 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16656 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16657 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16658 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16659 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16660 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16661 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16662 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16663 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16664 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16665 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16666 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16667 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16668 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16669 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16670 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16671 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16672 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16673 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16674 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16675 };
16676
16677 static const struct builtin_description bdesc_ptest[] =
16678 {
16679 /* SSE4.1 */
16680 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
16681 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
16682 { MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
16683 };
16684
16685 /* SSE builtins with 3 arguments and the last argument must be a 8 bit
16686 constant or xmm0. */
16687 static const struct builtin_description bdesc_sse_3arg[] =
16688 {
16689 /* SSE4.1 */
16690 { MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, 0, 0 },
16691 { MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, 0, 0 },
16692 { MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, 0, 0 },
16693 { MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, 0, 0 },
16694 { MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, 0, 0 },
16695 { MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, 0, 0 },
16696 { MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, 0, 0 },
16697 { MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, 0, 0 },
16698 { MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, 0, 0 },
16699 { MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, 0, 0 },
16700 { MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, 0, 0 },
16701 { MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, 0, 0 },
16702 };
16703
16704 static const struct builtin_description bdesc_2arg[] =
16705 {
16706 /* SSE */
16707 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16708 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16709 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16710 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16711 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16712 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16713 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16714 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16715
16716 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16717 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16718 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16719 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16720 BUILTIN_DESC_SWAP_OPERANDS },
16721 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16722 BUILTIN_DESC_SWAP_OPERANDS },
16723 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16724 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16725 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16726 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16727 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16728 BUILTIN_DESC_SWAP_OPERANDS },
16729 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16730 BUILTIN_DESC_SWAP_OPERANDS },
16731 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16732 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16733 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16734 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16735 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16736 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16737 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16738 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16739 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16740 BUILTIN_DESC_SWAP_OPERANDS },
16741 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16742 BUILTIN_DESC_SWAP_OPERANDS },
16743 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16744
16745 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16746 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16747 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16748 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16749
16750 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16751 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16752 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16753 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16754
16755 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16756 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16757 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16758 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16759 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16760
16761 /* MMX */
16762 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16763 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16764 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16765 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16766 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16767 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16768 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16769 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16770
16771 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16772 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16773 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16774 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16775 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16776 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16777 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16778 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16779
16780 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16781 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16782 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16783
16784 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16785 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16786 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16787 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16788
16789 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16790 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16791
16792 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16793 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16794 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16795 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16796 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16797 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16798
16799 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16800 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16801 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16802 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16803
16804 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16805 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16806 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16807 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16808 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16809 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16810
16811 /* Special. */
16812 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16813 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16814 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16815
16816 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16817 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16818 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16819
16820 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16821 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16822 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16823 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16824 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16825 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16826
16827 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16828 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16829 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16830 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16831 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16832 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16833
16834 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16835 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16836 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16837 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16838
16839 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16840 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16841
16842 /* SSE2 */
16843 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16844 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16845 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16846 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16847 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16848 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16849 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16850 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16851
16852 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16853 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16854 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16855 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16856 BUILTIN_DESC_SWAP_OPERANDS },
16857 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16858 BUILTIN_DESC_SWAP_OPERANDS },
16859 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16860 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16861 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16862 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16863 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16864 BUILTIN_DESC_SWAP_OPERANDS },
16865 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16866 BUILTIN_DESC_SWAP_OPERANDS },
16867 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16868 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16869 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16870 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16871 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16872 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16873 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16874 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16875 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16876
16877 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16878 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16879 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16880 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16881
16882 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16883 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16884 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16885 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16886
16887 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16888 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16889 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16890
16891 /* SSE2 MMX */
16892 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16893 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16894 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16895 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16896 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16897 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16898 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16899 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16900
16901 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16902 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16903 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16904 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16905 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16906 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16907 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16908 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16909
16910 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16911 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16912
16913 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16914 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16915 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16916 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16917
16918 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16919 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16920
16921 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16922 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16923 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16924 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16925 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16926 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16927
16928 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16929 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16930 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16931 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16932
16933 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16934 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16935 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16936 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16937 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16938 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16939 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16940 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16941
16942 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16943 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16944 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16945
16946 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16947 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16948
16949 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16950 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16951
16952 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16953 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16954 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16955
16956 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16957 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16958 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16959
16960 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16961 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16962
16963 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16964
16965 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16966 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16967 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16968 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16969
16970 /* SSE3 MMX */
16971 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16972 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16973 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16974 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16975 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16976 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16977
16978 /* SSSE3 */
16979 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16980 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16981 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16982 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16983 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16984 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16985 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16986 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16987 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16988 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16989 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16990 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16991 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16992 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16993 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16994 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16995 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16996 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16997 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16998 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16999 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
17000 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
17001 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
17002 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
17003
17004 /* SSE4.1 */
17005 { MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, 0, 0 },
17006 { MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, 0, 0 },
17007 { MASK_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, 0, 0 },
17008 { MASK_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, 0, 0 },
17009 { MASK_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, 0, 0 },
17010 { MASK_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, 0, 0 },
17011 { MASK_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, 0, 0 },
17012 { MASK_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, 0, 0 },
17013 { MASK_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, 0, 0 },
17014 { MASK_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 },
17015 { MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 },
17016 { MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 },
17017 };
17018
17019 static const struct builtin_description bdesc_1arg[] =
17020 {
17021 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
17022 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
17023
17024 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
17025 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
17026 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
17027
17028 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
17029 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
17030 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
17031 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
17032 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
17033 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
17034
17035 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
17036 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
17037
17038 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
17039
17040 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
17041 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
17042
17043 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
17044 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
17045 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
17046 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
17047 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
17048
17049 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
17050
17051 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
17052 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
17053 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
17054 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
17055
17056 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
17057 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
17058 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
17059
17060 /* SSE3 */
17061 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
17062 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
17063
17064 /* SSSE3 */
17065 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
17066 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
17067 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
17068 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
17069 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
17070 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
17071
17072 /* SSE4.1 */
17073 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, 0, 0 },
17074 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, 0, 0 },
17075 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, 0, 0 },
17076 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, 0, 0 },
17077 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, 0, 0 },
17078 { MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, 0, 0 },
17079 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, 0, 0 },
17080 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, 0, 0 },
17081 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, 0, 0 },
17082 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, 0, 0 },
17083 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, 0, 0 },
17084 { MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, 0, 0 },
17085 { MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, 0, 0 },
17086
17087 /* Fake 1 arg builtins with a constant smaller than 8 bits as the
17088 2nd arg. */
17089 { MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, 0, 0 },
17090 { MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, 0, 0 },
17091 };
17092
17093 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17094 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17095 builtins. */
17096 static void
17097 ix86_init_mmx_sse_builtins (void)
17098 {
17099 const struct builtin_description * d;
17100 size_t i;
17101
17102 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17103 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17104 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17105 tree V2DI_type_node
17106 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17107 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17108 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17109 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17110 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17111 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17112 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17113
17114 tree pchar_type_node = build_pointer_type (char_type_node);
17115 tree pcchar_type_node = build_pointer_type (
17116 build_type_variant (char_type_node, 1, 0));
17117 tree pfloat_type_node = build_pointer_type (float_type_node);
17118 tree pcfloat_type_node = build_pointer_type (
17119 build_type_variant (float_type_node, 1, 0));
17120 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17121 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17122 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17123
17124 /* Comparisons. */
17125 tree int_ftype_v4sf_v4sf
17126 = build_function_type_list (integer_type_node,
17127 V4SF_type_node, V4SF_type_node, NULL_TREE);
17128 tree v4si_ftype_v4sf_v4sf
17129 = build_function_type_list (V4SI_type_node,
17130 V4SF_type_node, V4SF_type_node, NULL_TREE);
17131 /* MMX/SSE/integer conversions. */
17132 tree int_ftype_v4sf
17133 = build_function_type_list (integer_type_node,
17134 V4SF_type_node, NULL_TREE);
17135 tree int64_ftype_v4sf
17136 = build_function_type_list (long_long_integer_type_node,
17137 V4SF_type_node, NULL_TREE);
17138 tree int_ftype_v8qi
17139 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17140 tree v4sf_ftype_v4sf_int
17141 = build_function_type_list (V4SF_type_node,
17142 V4SF_type_node, integer_type_node, NULL_TREE);
17143 tree v4sf_ftype_v4sf_int64
17144 = build_function_type_list (V4SF_type_node,
17145 V4SF_type_node, long_long_integer_type_node,
17146 NULL_TREE);
17147 tree v4sf_ftype_v4sf_v2si
17148 = build_function_type_list (V4SF_type_node,
17149 V4SF_type_node, V2SI_type_node, NULL_TREE);
17150
17151 /* Miscellaneous. */
17152 tree v8qi_ftype_v4hi_v4hi
17153 = build_function_type_list (V8QI_type_node,
17154 V4HI_type_node, V4HI_type_node, NULL_TREE);
17155 tree v4hi_ftype_v2si_v2si
17156 = build_function_type_list (V4HI_type_node,
17157 V2SI_type_node, V2SI_type_node, NULL_TREE);
17158 tree v4sf_ftype_v4sf_v4sf_int
17159 = build_function_type_list (V4SF_type_node,
17160 V4SF_type_node, V4SF_type_node,
17161 integer_type_node, NULL_TREE);
17162 tree v2si_ftype_v4hi_v4hi
17163 = build_function_type_list (V2SI_type_node,
17164 V4HI_type_node, V4HI_type_node, NULL_TREE);
17165 tree v4hi_ftype_v4hi_int
17166 = build_function_type_list (V4HI_type_node,
17167 V4HI_type_node, integer_type_node, NULL_TREE);
17168 tree v4hi_ftype_v4hi_di
17169 = build_function_type_list (V4HI_type_node,
17170 V4HI_type_node, long_long_unsigned_type_node,
17171 NULL_TREE);
17172 tree v2si_ftype_v2si_di
17173 = build_function_type_list (V2SI_type_node,
17174 V2SI_type_node, long_long_unsigned_type_node,
17175 NULL_TREE);
17176 tree void_ftype_void
17177 = build_function_type (void_type_node, void_list_node);
17178 tree void_ftype_unsigned
17179 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17180 tree void_ftype_unsigned_unsigned
17181 = build_function_type_list (void_type_node, unsigned_type_node,
17182 unsigned_type_node, NULL_TREE);
17183 tree void_ftype_pcvoid_unsigned_unsigned
17184 = build_function_type_list (void_type_node, const_ptr_type_node,
17185 unsigned_type_node, unsigned_type_node,
17186 NULL_TREE);
17187 tree unsigned_ftype_void
17188 = build_function_type (unsigned_type_node, void_list_node);
17189 tree v2si_ftype_v4sf
17190 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17191 /* Loads/stores. */
17192 tree void_ftype_v8qi_v8qi_pchar
17193 = build_function_type_list (void_type_node,
17194 V8QI_type_node, V8QI_type_node,
17195 pchar_type_node, NULL_TREE);
17196 tree v4sf_ftype_pcfloat
17197 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17198 /* @@@ the type is bogus */
17199 tree v4sf_ftype_v4sf_pv2si
17200 = build_function_type_list (V4SF_type_node,
17201 V4SF_type_node, pv2si_type_node, NULL_TREE);
17202 tree void_ftype_pv2si_v4sf
17203 = build_function_type_list (void_type_node,
17204 pv2si_type_node, V4SF_type_node, NULL_TREE);
17205 tree void_ftype_pfloat_v4sf
17206 = build_function_type_list (void_type_node,
17207 pfloat_type_node, V4SF_type_node, NULL_TREE);
17208 tree void_ftype_pdi_di
17209 = build_function_type_list (void_type_node,
17210 pdi_type_node, long_long_unsigned_type_node,
17211 NULL_TREE);
17212 tree void_ftype_pv2di_v2di
17213 = build_function_type_list (void_type_node,
17214 pv2di_type_node, V2DI_type_node, NULL_TREE);
17215 /* Normal vector unops. */
17216 tree v4sf_ftype_v4sf
17217 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17218 tree v16qi_ftype_v16qi
17219 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17220 tree v8hi_ftype_v8hi
17221 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17222 tree v4si_ftype_v4si
17223 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17224 tree v8qi_ftype_v8qi
17225 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17226 tree v4hi_ftype_v4hi
17227 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17228
17229 /* Normal vector binops. */
17230 tree v4sf_ftype_v4sf_v4sf
17231 = build_function_type_list (V4SF_type_node,
17232 V4SF_type_node, V4SF_type_node, NULL_TREE);
17233 tree v8qi_ftype_v8qi_v8qi
17234 = build_function_type_list (V8QI_type_node,
17235 V8QI_type_node, V8QI_type_node, NULL_TREE);
17236 tree v4hi_ftype_v4hi_v4hi
17237 = build_function_type_list (V4HI_type_node,
17238 V4HI_type_node, V4HI_type_node, NULL_TREE);
17239 tree v2si_ftype_v2si_v2si
17240 = build_function_type_list (V2SI_type_node,
17241 V2SI_type_node, V2SI_type_node, NULL_TREE);
17242 tree di_ftype_di_di
17243 = build_function_type_list (long_long_unsigned_type_node,
17244 long_long_unsigned_type_node,
17245 long_long_unsigned_type_node, NULL_TREE);
17246
17247 tree di_ftype_di_di_int
17248 = build_function_type_list (long_long_unsigned_type_node,
17249 long_long_unsigned_type_node,
17250 long_long_unsigned_type_node,
17251 integer_type_node, NULL_TREE);
17252
17253 tree v2si_ftype_v2sf
17254 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17255 tree v2sf_ftype_v2si
17256 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17257 tree v2si_ftype_v2si
17258 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17259 tree v2sf_ftype_v2sf
17260 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17261 tree v2sf_ftype_v2sf_v2sf
17262 = build_function_type_list (V2SF_type_node,
17263 V2SF_type_node, V2SF_type_node, NULL_TREE);
17264 tree v2si_ftype_v2sf_v2sf
17265 = build_function_type_list (V2SI_type_node,
17266 V2SF_type_node, V2SF_type_node, NULL_TREE);
17267 tree pint_type_node = build_pointer_type (integer_type_node);
17268 tree pdouble_type_node = build_pointer_type (double_type_node);
17269 tree pcdouble_type_node = build_pointer_type (
17270 build_type_variant (double_type_node, 1, 0));
17271 tree int_ftype_v2df_v2df
17272 = build_function_type_list (integer_type_node,
17273 V2DF_type_node, V2DF_type_node, NULL_TREE);
17274
17275 tree void_ftype_pcvoid
17276 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17277 tree v4sf_ftype_v4si
17278 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17279 tree v4si_ftype_v4sf
17280 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17281 tree v2df_ftype_v4si
17282 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17283 tree v4si_ftype_v2df
17284 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17285 tree v2si_ftype_v2df
17286 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17287 tree v4sf_ftype_v2df
17288 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17289 tree v2df_ftype_v2si
17290 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17291 tree v2df_ftype_v4sf
17292 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17293 tree int_ftype_v2df
17294 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17295 tree int64_ftype_v2df
17296 = build_function_type_list (long_long_integer_type_node,
17297 V2DF_type_node, NULL_TREE);
17298 tree v2df_ftype_v2df_int
17299 = build_function_type_list (V2DF_type_node,
17300 V2DF_type_node, integer_type_node, NULL_TREE);
17301 tree v2df_ftype_v2df_int64
17302 = build_function_type_list (V2DF_type_node,
17303 V2DF_type_node, long_long_integer_type_node,
17304 NULL_TREE);
17305 tree v4sf_ftype_v4sf_v2df
17306 = build_function_type_list (V4SF_type_node,
17307 V4SF_type_node, V2DF_type_node, NULL_TREE);
17308 tree v2df_ftype_v2df_v4sf
17309 = build_function_type_list (V2DF_type_node,
17310 V2DF_type_node, V4SF_type_node, NULL_TREE);
17311 tree v2df_ftype_v2df_v2df_int
17312 = build_function_type_list (V2DF_type_node,
17313 V2DF_type_node, V2DF_type_node,
17314 integer_type_node,
17315 NULL_TREE);
17316 tree v2df_ftype_v2df_pcdouble
17317 = build_function_type_list (V2DF_type_node,
17318 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17319 tree void_ftype_pdouble_v2df
17320 = build_function_type_list (void_type_node,
17321 pdouble_type_node, V2DF_type_node, NULL_TREE);
17322 tree void_ftype_pint_int
17323 = build_function_type_list (void_type_node,
17324 pint_type_node, integer_type_node, NULL_TREE);
17325 tree void_ftype_v16qi_v16qi_pchar
17326 = build_function_type_list (void_type_node,
17327 V16QI_type_node, V16QI_type_node,
17328 pchar_type_node, NULL_TREE);
17329 tree v2df_ftype_pcdouble
17330 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17331 tree v2df_ftype_v2df_v2df
17332 = build_function_type_list (V2DF_type_node,
17333 V2DF_type_node, V2DF_type_node, NULL_TREE);
17334 tree v16qi_ftype_v16qi_v16qi
17335 = build_function_type_list (V16QI_type_node,
17336 V16QI_type_node, V16QI_type_node, NULL_TREE);
17337 tree v8hi_ftype_v8hi_v8hi
17338 = build_function_type_list (V8HI_type_node,
17339 V8HI_type_node, V8HI_type_node, NULL_TREE);
17340 tree v4si_ftype_v4si_v4si
17341 = build_function_type_list (V4SI_type_node,
17342 V4SI_type_node, V4SI_type_node, NULL_TREE);
17343 tree v2di_ftype_v2di_v2di
17344 = build_function_type_list (V2DI_type_node,
17345 V2DI_type_node, V2DI_type_node, NULL_TREE);
17346 tree v2di_ftype_v2df_v2df
17347 = build_function_type_list (V2DI_type_node,
17348 V2DF_type_node, V2DF_type_node, NULL_TREE);
17349 tree v2df_ftype_v2df
17350 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17351 tree v2di_ftype_v2di_int
17352 = build_function_type_list (V2DI_type_node,
17353 V2DI_type_node, integer_type_node, NULL_TREE);
17354 tree v2di_ftype_v2di_v2di_int
17355 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17356 V2DI_type_node, integer_type_node, NULL_TREE);
17357 tree v4si_ftype_v4si_int
17358 = build_function_type_list (V4SI_type_node,
17359 V4SI_type_node, integer_type_node, NULL_TREE);
17360 tree v8hi_ftype_v8hi_int
17361 = build_function_type_list (V8HI_type_node,
17362 V8HI_type_node, integer_type_node, NULL_TREE);
17363 tree v4si_ftype_v8hi_v8hi
17364 = build_function_type_list (V4SI_type_node,
17365 V8HI_type_node, V8HI_type_node, NULL_TREE);
17366 tree di_ftype_v8qi_v8qi
17367 = build_function_type_list (long_long_unsigned_type_node,
17368 V8QI_type_node, V8QI_type_node, NULL_TREE);
17369 tree di_ftype_v2si_v2si
17370 = build_function_type_list (long_long_unsigned_type_node,
17371 V2SI_type_node, V2SI_type_node, NULL_TREE);
17372 tree v2di_ftype_v16qi_v16qi
17373 = build_function_type_list (V2DI_type_node,
17374 V16QI_type_node, V16QI_type_node, NULL_TREE);
17375 tree v2di_ftype_v4si_v4si
17376 = build_function_type_list (V2DI_type_node,
17377 V4SI_type_node, V4SI_type_node, NULL_TREE);
17378 tree int_ftype_v16qi
17379 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17380 tree v16qi_ftype_pcchar
17381 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17382 tree void_ftype_pchar_v16qi
17383 = build_function_type_list (void_type_node,
17384 pchar_type_node, V16QI_type_node, NULL_TREE);
17385
17386 tree v2di_ftype_v2di_unsigned_unsigned
17387 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17388 unsigned_type_node, unsigned_type_node,
17389 NULL_TREE);
17390 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17391 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17392 unsigned_type_node, unsigned_type_node,
17393 NULL_TREE);
17394 tree v2di_ftype_v2di_v16qi
17395 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17396 NULL_TREE);
17397 tree v2df_ftype_v2df_v2df_v2df
17398 = build_function_type_list (V2DF_type_node,
17399 V2DF_type_node, V2DF_type_node,
17400 V2DF_type_node, NULL_TREE);
17401 tree v4sf_ftype_v4sf_v4sf_v4sf
17402 = build_function_type_list (V4SF_type_node,
17403 V4SF_type_node, V4SF_type_node,
17404 V4SF_type_node, NULL_TREE);
17405 tree v8hi_ftype_v16qi
17406 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17407 NULL_TREE);
17408 tree v4si_ftype_v16qi
17409 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17410 NULL_TREE);
17411 tree v2di_ftype_v16qi
17412 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17413 NULL_TREE);
17414 tree v4si_ftype_v8hi
17415 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17416 NULL_TREE);
17417 tree v2di_ftype_v8hi
17418 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17419 NULL_TREE);
17420 tree v2di_ftype_v4si
17421 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17422 NULL_TREE);
17423 tree v2di_ftype_pv2di
17424 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17425 NULL_TREE);
17426 tree v16qi_ftype_v16qi_v16qi_int
17427 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17428 V16QI_type_node, integer_type_node,
17429 NULL_TREE);
17430 tree v16qi_ftype_v16qi_v16qi_v16qi
17431 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17432 V16QI_type_node, V16QI_type_node,
17433 NULL_TREE);
17434 tree v8hi_ftype_v8hi_v8hi_int
17435 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17436 V8HI_type_node, integer_type_node,
17437 NULL_TREE);
17438 tree v4si_ftype_v4si_v4si_int
17439 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17440 V4SI_type_node, integer_type_node,
17441 NULL_TREE);
17442 tree int_ftype_v2di_v2di
17443 = build_function_type_list (integer_type_node,
17444 V2DI_type_node, V2DI_type_node,
17445 NULL_TREE);
17446
17447 tree float80_type;
17448 tree float128_type;
17449 tree ftype;
17450
17451 /* The __float80 type. */
17452 if (TYPE_MODE (long_double_type_node) == XFmode)
17453 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17454 "__float80");
17455 else
17456 {
17457 /* The __float80 type. */
17458 float80_type = make_node (REAL_TYPE);
17459 TYPE_PRECISION (float80_type) = 80;
17460 layout_type (float80_type);
17461 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17462 }
17463
17464 if (TARGET_64BIT)
17465 {
17466 float128_type = make_node (REAL_TYPE);
17467 TYPE_PRECISION (float128_type) = 128;
17468 layout_type (float128_type);
17469 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17470 }
17471
17472 /* Add all SSE builtins that are more or less simple operations on
17473 three operands. */
17474 for (i = 0, d = bdesc_sse_3arg;
17475 i < ARRAY_SIZE (bdesc_sse_3arg);
17476 i++, d++)
17477 {
17478 /* Use one of the operands; the target can have a different mode for
17479 mask-generating compares. */
17480 enum machine_mode mode;
17481 tree type;
17482
17483 if (d->name == 0)
17484 continue;
17485 mode = insn_data[d->icode].operand[1].mode;
17486
17487 switch (mode)
17488 {
17489 case V16QImode:
17490 type = v16qi_ftype_v16qi_v16qi_int;
17491 break;
17492 case V8HImode:
17493 type = v8hi_ftype_v8hi_v8hi_int;
17494 break;
17495 case V4SImode:
17496 type = v4si_ftype_v4si_v4si_int;
17497 break;
17498 case V2DImode:
17499 type = v2di_ftype_v2di_v2di_int;
17500 break;
17501 case V2DFmode:
17502 type = v2df_ftype_v2df_v2df_int;
17503 break;
17504 case V4SFmode:
17505 type = v4sf_ftype_v4sf_v4sf_int;
17506 break;
17507 default:
17508 gcc_unreachable ();
17509 }
17510
17511 /* Override for variable blends. */
17512 switch (d->icode)
17513 {
17514 case CODE_FOR_sse4_1_blendvpd:
17515 type = v2df_ftype_v2df_v2df_v2df;
17516 break;
17517 case CODE_FOR_sse4_1_blendvps:
17518 type = v4sf_ftype_v4sf_v4sf_v4sf;
17519 break;
17520 case CODE_FOR_sse4_1_pblendvb:
17521 type = v16qi_ftype_v16qi_v16qi_v16qi;
17522 break;
17523 default:
17524 break;
17525 }
17526
17527 def_builtin (d->mask, d->name, type, d->code);
17528 }
17529
17530 /* Add all builtins that are more or less simple operations on two
17531 operands. */
17532 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17533 {
17534 /* Use one of the operands; the target can have a different mode for
17535 mask-generating compares. */
17536 enum machine_mode mode;
17537 tree type;
17538
17539 if (d->name == 0)
17540 continue;
17541 mode = insn_data[d->icode].operand[1].mode;
17542
17543 switch (mode)
17544 {
17545 case V16QImode:
17546 type = v16qi_ftype_v16qi_v16qi;
17547 break;
17548 case V8HImode:
17549 type = v8hi_ftype_v8hi_v8hi;
17550 break;
17551 case V4SImode:
17552 type = v4si_ftype_v4si_v4si;
17553 break;
17554 case V2DImode:
17555 type = v2di_ftype_v2di_v2di;
17556 break;
17557 case V2DFmode:
17558 type = v2df_ftype_v2df_v2df;
17559 break;
17560 case V4SFmode:
17561 type = v4sf_ftype_v4sf_v4sf;
17562 break;
17563 case V8QImode:
17564 type = v8qi_ftype_v8qi_v8qi;
17565 break;
17566 case V4HImode:
17567 type = v4hi_ftype_v4hi_v4hi;
17568 break;
17569 case V2SImode:
17570 type = v2si_ftype_v2si_v2si;
17571 break;
17572 case DImode:
17573 type = di_ftype_di_di;
17574 break;
17575
17576 default:
17577 gcc_unreachable ();
17578 }
17579
17580 /* Override for comparisons. */
17581 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17582 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17583 type = v4si_ftype_v4sf_v4sf;
17584
17585 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17586 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17587 type = v2di_ftype_v2df_v2df;
17588
17589 def_builtin (d->mask, d->name, type, d->code);
17590 }
17591
17592 /* Add all builtins that are more or less simple operations on 1 operand. */
17593 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17594 {
17595 enum machine_mode mode;
17596 tree type;
17597
17598 if (d->name == 0)
17599 continue;
17600 mode = insn_data[d->icode].operand[1].mode;
17601
17602 switch (mode)
17603 {
17604 case V16QImode:
17605 type = v16qi_ftype_v16qi;
17606 break;
17607 case V8HImode:
17608 type = v8hi_ftype_v8hi;
17609 break;
17610 case V4SImode:
17611 type = v4si_ftype_v4si;
17612 break;
17613 case V2DFmode:
17614 type = v2df_ftype_v2df;
17615 break;
17616 case V4SFmode:
17617 type = v4sf_ftype_v4sf;
17618 break;
17619 case V8QImode:
17620 type = v8qi_ftype_v8qi;
17621 break;
17622 case V4HImode:
17623 type = v4hi_ftype_v4hi;
17624 break;
17625 case V2SImode:
17626 type = v2si_ftype_v2si;
17627 break;
17628
17629 default:
17630 abort ();
17631 }
17632
17633 def_builtin (d->mask, d->name, type, d->code);
17634 }
17635
17636 /* Add the remaining MMX insns with somewhat more complicated types. */
17637 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17638 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17639 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17640 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17641
17642 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17643 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17644 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17645
17646 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17647 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17648
17649 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17650 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17651
17652 /* comi/ucomi insns. */
17653 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17654 if (d->mask == MASK_SSE2)
17655 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17656 else
17657 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17658
17659 /* ptest insns. */
17660 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
17661 def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
17662
17663 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17664 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17665 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17666
17667 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17668 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17669 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17670 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17671 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17672 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17673 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17674 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17675 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17676 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17677 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17678
17679 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17680
17681 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17682 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17683
17684 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17685 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17686 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17687 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17688
17689 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17690 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17691 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17692 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17693
17694 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17695
17696 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17697
17698 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17699 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17700 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17701 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17702 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17703 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17704
17705 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17706
17707 /* Original 3DNow! */
17708 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17709 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17710 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17711 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17712 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17713 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17714 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17715 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17716 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17717 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17718 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17719 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17720 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17721 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17722 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17723 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17724 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17725 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17726 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17727 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17728
17729 /* 3DNow! extension as used in the Athlon CPU. */
17730 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17731 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17732 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17733 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17734 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17735 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17736
17737 /* SSE2 */
17738 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17739
17740 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17741 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17742
17743 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17744 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17745
17746 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17747 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17748 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17749 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17750 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17751
17752 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17753 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17754 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17755 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17756
17757 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17758 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17759
17760 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17761
17762 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17763 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17764
17765 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17766 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17767 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17768 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17769 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17770
17771 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17772
17773 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17774 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17775 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17776 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17777
17778 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17779 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17780 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17781
17782 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17783 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17784 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17785 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17786
17787 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17788 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17789 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17790
17791 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17792 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17793
17794 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17795 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17796
17797 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17798 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17799 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17800 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17801 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17802 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17803 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17804
17805 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17806 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17807 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17808 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17809 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17810 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17811 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17812
17813 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17814 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17815 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17816 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17817
17818 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17819
17820 /* Prescott New Instructions. */
17821 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17822 void_ftype_pcvoid_unsigned_unsigned,
17823 IX86_BUILTIN_MONITOR);
17824 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17825 void_ftype_unsigned_unsigned,
17826 IX86_BUILTIN_MWAIT);
17827 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17828 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17829
17830 /* SSSE3. */
17831 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17832 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17833 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17834 IX86_BUILTIN_PALIGNR);
17835
17836 /* SSE4.1. */
17837 def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa",
17838 v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
17839 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128",
17840 v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
17841 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128",
17842 v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
17843 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128",
17844 v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
17845 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128",
17846 v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
17847 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128",
17848 v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
17849 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128",
17850 v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
17851 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128",
17852 v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
17853 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128",
17854 v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
17855 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128",
17856 v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
17857 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128",
17858 v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
17859 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128",
17860 v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
17861 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128",
17862 v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
17863 def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128",
17864 v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
17865 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd",
17866 v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
17867 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps",
17868 v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
17869 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd",
17870 v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
17871 def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss",
17872 v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
17873
17874 /* AMDFAM10 SSE4A New built-ins */
17875 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17876 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17877 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17878 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17879 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17880 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17881 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17882 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17883 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17884 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17885 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17886 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17887
17888 /* Access to the vec_init patterns. */
17889 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17890 integer_type_node, NULL_TREE);
17891 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17892 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17893
17894 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17895 short_integer_type_node,
17896 short_integer_type_node,
17897 short_integer_type_node, NULL_TREE);
17898 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17899 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17900
17901 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17902 char_type_node, char_type_node,
17903 char_type_node, char_type_node,
17904 char_type_node, char_type_node,
17905 char_type_node, NULL_TREE);
17906 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17907 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17908
17909 /* Access to the vec_extract patterns. */
17910 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17911 integer_type_node, NULL_TREE);
17912 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2df",
17913 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17914
17915 ftype = build_function_type_list (long_long_integer_type_node,
17916 V2DI_type_node, integer_type_node,
17917 NULL_TREE);
17918 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v2di",
17919 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17920
17921 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17922 integer_type_node, NULL_TREE);
17923 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17924 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17925
17926 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17927 integer_type_node, NULL_TREE);
17928 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v4si",
17929 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17930
17931 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17932 integer_type_node, NULL_TREE);
17933 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v8hi",
17934 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17935
17936 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17937 integer_type_node, NULL_TREE);
17938 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17939 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17940
17941 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17942 integer_type_node, NULL_TREE);
17943 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17944 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17945
17946 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
17947 integer_type_node, NULL_TREE);
17948 def_builtin (MASK_SSE2, "__builtin_ia32_vec_ext_v16qi",
17949 ftype, IX86_BUILTIN_VEC_EXT_V16QI);
17950
17951 /* Access to the vec_set patterns. */
17952 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
17953 intDI_type_node,
17954 integer_type_node, NULL_TREE);
17955 def_builtin (MASK_SSE4_1 | MASK_64BIT, "__builtin_ia32_vec_set_v2di",
17956 ftype, IX86_BUILTIN_VEC_SET_V2DI);
17957
17958 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
17959 float_type_node,
17960 integer_type_node, NULL_TREE);
17961 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf",
17962 ftype, IX86_BUILTIN_VEC_SET_V4SF);
17963
17964 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
17965 intSI_type_node,
17966 integer_type_node, NULL_TREE);
17967 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si",
17968 ftype, IX86_BUILTIN_VEC_SET_V4SI);
17969
17970 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17971 intHI_type_node,
17972 integer_type_node, NULL_TREE);
17973 def_builtin (MASK_SSE2, "__builtin_ia32_vec_set_v8hi",
17974 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17975
17976 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17977 intHI_type_node,
17978 integer_type_node, NULL_TREE);
17979 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17980 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17981
17982 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
17983 intQI_type_node,
17984 integer_type_node, NULL_TREE);
17985 def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi",
17986 ftype, IX86_BUILTIN_VEC_SET_V16QI);
17987 }
17988
17989 static void
17990 ix86_init_builtins (void)
17991 {
17992 if (TARGET_MMX)
17993 ix86_init_mmx_sse_builtins ();
17994 }
17995
17996 /* Errors in the source file can cause expand_expr to return const0_rtx
17997 where we expect a vector. To avoid crashing, use one of the vector
17998 clear instructions. */
17999 static rtx
18000 safe_vector_operand (rtx x, enum machine_mode mode)
18001 {
18002 if (x == const0_rtx)
18003 x = CONST0_RTX (mode);
18004 return x;
18005 }
18006
18007 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18008 4 operands. The third argument must be a constant smaller than 8
18009 bits or xmm0. */
18010
18011 static rtx
18012 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18013 rtx target)
18014 {
18015 rtx pat;
18016 tree arg0 = CALL_EXPR_ARG (exp, 0);
18017 tree arg1 = CALL_EXPR_ARG (exp, 1);
18018 tree arg2 = CALL_EXPR_ARG (exp, 2);
18019 rtx op0 = expand_normal (arg0);
18020 rtx op1 = expand_normal (arg1);
18021 rtx op2 = expand_normal (arg2);
18022 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18023 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18024 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18025 enum machine_mode mode2;
18026 rtx xmm0;
18027
18028 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18029 op0 = copy_to_mode_reg (mode0, op0);
18030 if ((optimize && !register_operand (op1, mode1))
18031 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18032 op1 = copy_to_mode_reg (mode1, op1);
18033
18034 switch (icode)
18035 {
18036 case CODE_FOR_sse4_1_blendvpd:
18037 case CODE_FOR_sse4_1_blendvps:
18038 case CODE_FOR_sse4_1_pblendvb:
18039 /* The third argument of variable blends must be xmm0. */
18040 xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG);
18041 emit_move_insn (xmm0, op2);
18042 op2 = xmm0;
18043 break;
18044 default:
18045 mode2 = insn_data[icode].operand[2].mode;
18046 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18047 {
18048 switch (icode)
18049 {
18050 case CODE_FOR_sse4_1_roundsd:
18051 case CODE_FOR_sse4_1_roundss:
18052 error ("the third argument must be a 4-bit immediate");
18053 break;
18054 default:
18055 error ("the third argument must be a 8-bit immediate");
18056 break;
18057 }
18058 return const0_rtx;
18059 }
18060 break;
18061 }
18062
18063 if (optimize
18064 || target == 0
18065 || GET_MODE (target) != tmode
18066 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18067 target = gen_reg_rtx (tmode);
18068 pat = GEN_FCN (icode) (target, op0, op1, op2);
18069 if (! pat)
18070 return 0;
18071 emit_insn (pat);
18072 return target;
18073 }
18074
18075 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18076
18077 static rtx
18078 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18079 {
18080 rtx pat, xops[3];
18081 tree arg0 = CALL_EXPR_ARG (exp, 0);
18082 tree arg1 = CALL_EXPR_ARG (exp, 1);
18083 rtx op0 = expand_normal (arg0);
18084 rtx op1 = expand_normal (arg1);
18085 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18086 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18087 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18088
18089 if (VECTOR_MODE_P (mode0))
18090 op0 = safe_vector_operand (op0, mode0);
18091 if (VECTOR_MODE_P (mode1))
18092 op1 = safe_vector_operand (op1, mode1);
18093
18094 if (optimize || !target
18095 || GET_MODE (target) != tmode
18096 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18097 target = gen_reg_rtx (tmode);
18098
18099 if (GET_MODE (op1) == SImode && mode1 == TImode)
18100 {
18101 rtx x = gen_reg_rtx (V4SImode);
18102 emit_insn (gen_sse2_loadd (x, op1));
18103 op1 = gen_lowpart (TImode, x);
18104 }
18105
18106 /* The insn must want input operands in the same modes as the
18107 result. */
18108 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
18109 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
18110
18111 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18112 op0 = copy_to_mode_reg (mode0, op0);
18113 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18114 op1 = copy_to_mode_reg (mode1, op1);
18115
18116 /* ??? Using ix86_fixup_binary_operands is problematic when
18117 we've got mismatched modes. Fake it. */
18118
18119 xops[0] = target;
18120 xops[1] = op0;
18121 xops[2] = op1;
18122
18123 if (tmode == mode0 && tmode == mode1)
18124 {
18125 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18126 op0 = xops[1];
18127 op1 = xops[2];
18128 }
18129 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18130 {
18131 op0 = force_reg (mode0, op0);
18132 op1 = force_reg (mode1, op1);
18133 target = gen_reg_rtx (tmode);
18134 }
18135
18136 pat = GEN_FCN (icode) (target, op0, op1);
18137 if (! pat)
18138 return 0;
18139 emit_insn (pat);
18140 return target;
18141 }
18142
18143 /* Subroutine of ix86_expand_builtin to take care of stores. */
18144
18145 static rtx
18146 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18147 {
18148 rtx pat;
18149 tree arg0 = CALL_EXPR_ARG (exp, 0);
18150 tree arg1 = CALL_EXPR_ARG (exp, 1);
18151 rtx op0 = expand_normal (arg0);
18152 rtx op1 = expand_normal (arg1);
18153 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18154 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18155
18156 if (VECTOR_MODE_P (mode1))
18157 op1 = safe_vector_operand (op1, mode1);
18158
18159 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18160 op1 = copy_to_mode_reg (mode1, op1);
18161
18162 pat = GEN_FCN (icode) (op0, op1);
18163 if (pat)
18164 emit_insn (pat);
18165 return 0;
18166 }
18167
18168 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18169
18170 static rtx
18171 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18172 rtx target, int do_load)
18173 {
18174 rtx pat;
18175 tree arg0 = CALL_EXPR_ARG (exp, 0);
18176 rtx op0 = expand_normal (arg0);
18177 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18178 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18179
18180 if (optimize || !target
18181 || GET_MODE (target) != tmode
18182 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18183 target = gen_reg_rtx (tmode);
18184 if (do_load)
18185 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18186 else
18187 {
18188 if (VECTOR_MODE_P (mode0))
18189 op0 = safe_vector_operand (op0, mode0);
18190
18191 if ((optimize && !register_operand (op0, mode0))
18192 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18193 op0 = copy_to_mode_reg (mode0, op0);
18194 }
18195
18196 switch (icode)
18197 {
18198 case CODE_FOR_sse4_1_roundpd:
18199 case CODE_FOR_sse4_1_roundps:
18200 {
18201 tree arg1 = CALL_EXPR_ARG (exp, 1);
18202 rtx op1 = expand_normal (arg1);
18203 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18204
18205 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18206 {
18207 error ("the second argument must be a 4-bit immediate");
18208 return const0_rtx;
18209 }
18210 pat = GEN_FCN (icode) (target, op0, op1);
18211 }
18212 break;
18213 default:
18214 pat = GEN_FCN (icode) (target, op0);
18215 break;
18216 }
18217
18218 if (! pat)
18219 return 0;
18220 emit_insn (pat);
18221 return target;
18222 }
18223
18224 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18225 sqrtss, rsqrtss, rcpss. */
18226
18227 static rtx
18228 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18229 {
18230 rtx pat;
18231 tree arg0 = CALL_EXPR_ARG (exp, 0);
18232 rtx op1, op0 = expand_normal (arg0);
18233 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18234 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18235
18236 if (optimize || !target
18237 || GET_MODE (target) != tmode
18238 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18239 target = gen_reg_rtx (tmode);
18240
18241 if (VECTOR_MODE_P (mode0))
18242 op0 = safe_vector_operand (op0, mode0);
18243
18244 if ((optimize && !register_operand (op0, mode0))
18245 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18246 op0 = copy_to_mode_reg (mode0, op0);
18247
18248 op1 = op0;
18249 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18250 op1 = copy_to_mode_reg (mode0, op1);
18251
18252 pat = GEN_FCN (icode) (target, op0, op1);
18253 if (! pat)
18254 return 0;
18255 emit_insn (pat);
18256 return target;
18257 }
18258
18259 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18260
18261 static rtx
18262 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18263 rtx target)
18264 {
18265 rtx pat;
18266 tree arg0 = CALL_EXPR_ARG (exp, 0);
18267 tree arg1 = CALL_EXPR_ARG (exp, 1);
18268 rtx op0 = expand_normal (arg0);
18269 rtx op1 = expand_normal (arg1);
18270 rtx op2;
18271 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18272 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18273 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18274 enum rtx_code comparison = d->comparison;
18275
18276 if (VECTOR_MODE_P (mode0))
18277 op0 = safe_vector_operand (op0, mode0);
18278 if (VECTOR_MODE_P (mode1))
18279 op1 = safe_vector_operand (op1, mode1);
18280
18281 /* Swap operands if we have a comparison that isn't available in
18282 hardware. */
18283 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18284 {
18285 rtx tmp = gen_reg_rtx (mode1);
18286 emit_move_insn (tmp, op1);
18287 op1 = op0;
18288 op0 = tmp;
18289 }
18290
18291 if (optimize || !target
18292 || GET_MODE (target) != tmode
18293 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18294 target = gen_reg_rtx (tmode);
18295
18296 if ((optimize && !register_operand (op0, mode0))
18297 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18298 op0 = copy_to_mode_reg (mode0, op0);
18299 if ((optimize && !register_operand (op1, mode1))
18300 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18301 op1 = copy_to_mode_reg (mode1, op1);
18302
18303 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18304 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18305 if (! pat)
18306 return 0;
18307 emit_insn (pat);
18308 return target;
18309 }
18310
18311 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18312
18313 static rtx
18314 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18315 rtx target)
18316 {
18317 rtx pat;
18318 tree arg0 = CALL_EXPR_ARG (exp, 0);
18319 tree arg1 = CALL_EXPR_ARG (exp, 1);
18320 rtx op0 = expand_normal (arg0);
18321 rtx op1 = expand_normal (arg1);
18322 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18323 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18324 enum rtx_code comparison = d->comparison;
18325
18326 if (VECTOR_MODE_P (mode0))
18327 op0 = safe_vector_operand (op0, mode0);
18328 if (VECTOR_MODE_P (mode1))
18329 op1 = safe_vector_operand (op1, mode1);
18330
18331 /* Swap operands if we have a comparison that isn't available in
18332 hardware. */
18333 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18334 {
18335 rtx tmp = op1;
18336 op1 = op0;
18337 op0 = tmp;
18338 }
18339
18340 target = gen_reg_rtx (SImode);
18341 emit_move_insn (target, const0_rtx);
18342 target = gen_rtx_SUBREG (QImode, target, 0);
18343
18344 if ((optimize && !register_operand (op0, mode0))
18345 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18346 op0 = copy_to_mode_reg (mode0, op0);
18347 if ((optimize && !register_operand (op1, mode1))
18348 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18349 op1 = copy_to_mode_reg (mode1, op1);
18350
18351 pat = GEN_FCN (d->icode) (op0, op1);
18352 if (! pat)
18353 return 0;
18354 emit_insn (pat);
18355 emit_insn (gen_rtx_SET (VOIDmode,
18356 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18357 gen_rtx_fmt_ee (comparison, QImode,
18358 SET_DEST (pat),
18359 const0_rtx)));
18360
18361 return SUBREG_REG (target);
18362 }
18363
18364 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18365
18366 static rtx
18367 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18368 rtx target)
18369 {
18370 rtx pat;
18371 tree arg0 = CALL_EXPR_ARG (exp, 0);
18372 tree arg1 = CALL_EXPR_ARG (exp, 1);
18373 rtx op0 = expand_normal (arg0);
18374 rtx op1 = expand_normal (arg1);
18375 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18376 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18377 enum rtx_code comparison = d->comparison;
18378
18379 if (VECTOR_MODE_P (mode0))
18380 op0 = safe_vector_operand (op0, mode0);
18381 if (VECTOR_MODE_P (mode1))
18382 op1 = safe_vector_operand (op1, mode1);
18383
18384 target = gen_reg_rtx (SImode);
18385 emit_move_insn (target, const0_rtx);
18386 target = gen_rtx_SUBREG (QImode, target, 0);
18387
18388 if ((optimize && !register_operand (op0, mode0))
18389 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18390 op0 = copy_to_mode_reg (mode0, op0);
18391 if ((optimize && !register_operand (op1, mode1))
18392 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18393 op1 = copy_to_mode_reg (mode1, op1);
18394
18395 pat = GEN_FCN (d->icode) (op0, op1);
18396 if (! pat)
18397 return 0;
18398 emit_insn (pat);
18399 emit_insn (gen_rtx_SET (VOIDmode,
18400 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18401 gen_rtx_fmt_ee (comparison, QImode,
18402 SET_DEST (pat),
18403 const0_rtx)));
18404
18405 return SUBREG_REG (target);
18406 }
18407
18408 /* Return the integer constant in ARG. Constrain it to be in the range
18409 of the subparts of VEC_TYPE; issue an error if not. */
18410
18411 static int
18412 get_element_number (tree vec_type, tree arg)
18413 {
18414 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
18415
18416 if (!host_integerp (arg, 1)
18417 || (elt = tree_low_cst (arg, 1), elt > max))
18418 {
18419 error ("selector must be an integer constant in the range 0..%wi", max);
18420 return 0;
18421 }
18422
18423 return elt;
18424 }
18425
18426 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18427 ix86_expand_vector_init. We DO have language-level syntax for this, in
18428 the form of (type){ init-list }. Except that since we can't place emms
18429 instructions from inside the compiler, we can't allow the use of MMX
18430 registers unless the user explicitly asks for it. So we do *not* define
18431 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
18432 we have builtins invoked by mmintrin.h that gives us license to emit
18433 these sorts of instructions. */
18434
18435 static rtx
18436 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
18437 {
18438 enum machine_mode tmode = TYPE_MODE (type);
18439 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
18440 int i, n_elt = GET_MODE_NUNITS (tmode);
18441 rtvec v = rtvec_alloc (n_elt);
18442
18443 gcc_assert (VECTOR_MODE_P (tmode));
18444 gcc_assert (call_expr_nargs (exp) == n_elt);
18445
18446 for (i = 0; i < n_elt; ++i)
18447 {
18448 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
18449 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
18450 }
18451
18452 if (!target || !register_operand (target, tmode))
18453 target = gen_reg_rtx (tmode);
18454
18455 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
18456 return target;
18457 }
18458
18459 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18460 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
18461 had a language-level syntax for referencing vector elements. */
18462
18463 static rtx
18464 ix86_expand_vec_ext_builtin (tree exp, rtx target)
18465 {
18466 enum machine_mode tmode, mode0;
18467 tree arg0, arg1;
18468 int elt;
18469 rtx op0;
18470
18471 arg0 = CALL_EXPR_ARG (exp, 0);
18472 arg1 = CALL_EXPR_ARG (exp, 1);
18473
18474 op0 = expand_normal (arg0);
18475 elt = get_element_number (TREE_TYPE (arg0), arg1);
18476
18477 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18478 mode0 = TYPE_MODE (TREE_TYPE (arg0));
18479 gcc_assert (VECTOR_MODE_P (mode0));
18480
18481 op0 = force_reg (mode0, op0);
18482
18483 if (optimize || !target || !register_operand (target, tmode))
18484 target = gen_reg_rtx (tmode);
18485
18486 ix86_expand_vector_extract (true, target, op0, elt);
18487
18488 return target;
18489 }
18490
18491 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
18492 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
18493 a language-level syntax for referencing vector elements. */
18494
18495 static rtx
18496 ix86_expand_vec_set_builtin (tree exp)
18497 {
18498 enum machine_mode tmode, mode1;
18499 tree arg0, arg1, arg2;
18500 int elt;
18501 rtx op0, op1, target;
18502
18503 arg0 = CALL_EXPR_ARG (exp, 0);
18504 arg1 = CALL_EXPR_ARG (exp, 1);
18505 arg2 = CALL_EXPR_ARG (exp, 2);
18506
18507 tmode = TYPE_MODE (TREE_TYPE (arg0));
18508 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
18509 gcc_assert (VECTOR_MODE_P (tmode));
18510
18511 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
18512 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
18513 elt = get_element_number (TREE_TYPE (arg0), arg2);
18514
18515 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
18516 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
18517
18518 op0 = force_reg (tmode, op0);
18519 op1 = force_reg (mode1, op1);
18520
18521 /* OP0 is the source of these builtin functions and shouldn't be
18522 modified. Create a copy, use it and return it as target. */
18523 target = gen_reg_rtx (tmode);
18524 emit_move_insn (target, op0);
18525 ix86_expand_vector_set (true, target, op1, elt);
18526
18527 return target;
18528 }
18529
18530 /* Expand an expression EXP that calls a built-in function,
18531 with result going to TARGET if that's convenient
18532 (and in mode MODE if that's convenient).
18533 SUBTARGET may be used as the target for computing one of EXP's operands.
18534 IGNORE is nonzero if the value is to be ignored. */
18535
18536 static rtx
18537 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
18538 enum machine_mode mode ATTRIBUTE_UNUSED,
18539 int ignore ATTRIBUTE_UNUSED)
18540 {
18541 const struct builtin_description *d;
18542 size_t i;
18543 enum insn_code icode;
18544 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18545 tree arg0, arg1, arg2, arg3;
18546 rtx op0, op1, op2, op3, pat;
18547 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
18548 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
18549
18550 switch (fcode)
18551 {
18552 case IX86_BUILTIN_EMMS:
18553 emit_insn (gen_mmx_emms ());
18554 return 0;
18555
18556 case IX86_BUILTIN_SFENCE:
18557 emit_insn (gen_sse_sfence ());
18558 return 0;
18559
18560 case IX86_BUILTIN_MASKMOVQ:
18561 case IX86_BUILTIN_MASKMOVDQU:
18562 icode = (fcode == IX86_BUILTIN_MASKMOVQ
18563 ? CODE_FOR_mmx_maskmovq
18564 : CODE_FOR_sse2_maskmovdqu);
18565 /* Note the arg order is different from the operand order. */
18566 arg1 = CALL_EXPR_ARG (exp, 0);
18567 arg2 = CALL_EXPR_ARG (exp, 1);
18568 arg0 = CALL_EXPR_ARG (exp, 2);
18569 op0 = expand_normal (arg0);
18570 op1 = expand_normal (arg1);
18571 op2 = expand_normal (arg2);
18572 mode0 = insn_data[icode].operand[0].mode;
18573 mode1 = insn_data[icode].operand[1].mode;
18574 mode2 = insn_data[icode].operand[2].mode;
18575
18576 op0 = force_reg (Pmode, op0);
18577 op0 = gen_rtx_MEM (mode1, op0);
18578
18579 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
18580 op0 = copy_to_mode_reg (mode0, op0);
18581 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
18582 op1 = copy_to_mode_reg (mode1, op1);
18583 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
18584 op2 = copy_to_mode_reg (mode2, op2);
18585 pat = GEN_FCN (icode) (op0, op1, op2);
18586 if (! pat)
18587 return 0;
18588 emit_insn (pat);
18589 return 0;
18590
18591 case IX86_BUILTIN_SQRTSS:
18592 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
18593 case IX86_BUILTIN_RSQRTSS:
18594 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
18595 case IX86_BUILTIN_RCPSS:
18596 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
18597
18598 case IX86_BUILTIN_LOADUPS:
18599 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18600
18601 case IX86_BUILTIN_STOREUPS:
18602 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18603
18604 case IX86_BUILTIN_LOADHPS:
18605 case IX86_BUILTIN_LOADLPS:
18606 case IX86_BUILTIN_LOADHPD:
18607 case IX86_BUILTIN_LOADLPD:
18608 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18609 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18610 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18611 : CODE_FOR_sse2_loadlpd);
18612 arg0 = CALL_EXPR_ARG (exp, 0);
18613 arg1 = CALL_EXPR_ARG (exp, 1);
18614 op0 = expand_normal (arg0);
18615 op1 = expand_normal (arg1);
18616 tmode = insn_data[icode].operand[0].mode;
18617 mode0 = insn_data[icode].operand[1].mode;
18618 mode1 = insn_data[icode].operand[2].mode;
18619
18620 op0 = force_reg (mode0, op0);
18621 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18622 if (optimize || target == 0
18623 || GET_MODE (target) != tmode
18624 || !register_operand (target, tmode))
18625 target = gen_reg_rtx (tmode);
18626 pat = GEN_FCN (icode) (target, op0, op1);
18627 if (! pat)
18628 return 0;
18629 emit_insn (pat);
18630 return target;
18631
18632 case IX86_BUILTIN_STOREHPS:
18633 case IX86_BUILTIN_STORELPS:
18634 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18635 : CODE_FOR_sse_storelps);
18636 arg0 = CALL_EXPR_ARG (exp, 0);
18637 arg1 = CALL_EXPR_ARG (exp, 1);
18638 op0 = expand_normal (arg0);
18639 op1 = expand_normal (arg1);
18640 mode0 = insn_data[icode].operand[0].mode;
18641 mode1 = insn_data[icode].operand[1].mode;
18642
18643 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18644 op1 = force_reg (mode1, op1);
18645
18646 pat = GEN_FCN (icode) (op0, op1);
18647 if (! pat)
18648 return 0;
18649 emit_insn (pat);
18650 return const0_rtx;
18651
18652 case IX86_BUILTIN_MOVNTPS:
18653 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18654 case IX86_BUILTIN_MOVNTQ:
18655 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18656
18657 case IX86_BUILTIN_LDMXCSR:
18658 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18659 target = assign_386_stack_local (SImode, SLOT_TEMP);
18660 emit_move_insn (target, op0);
18661 emit_insn (gen_sse_ldmxcsr (target));
18662 return 0;
18663
18664 case IX86_BUILTIN_STMXCSR:
18665 target = assign_386_stack_local (SImode, SLOT_TEMP);
18666 emit_insn (gen_sse_stmxcsr (target));
18667 return copy_to_mode_reg (SImode, target);
18668
18669 case IX86_BUILTIN_SHUFPS:
18670 case IX86_BUILTIN_SHUFPD:
18671 icode = (fcode == IX86_BUILTIN_SHUFPS
18672 ? CODE_FOR_sse_shufps
18673 : CODE_FOR_sse2_shufpd);
18674 arg0 = CALL_EXPR_ARG (exp, 0);
18675 arg1 = CALL_EXPR_ARG (exp, 1);
18676 arg2 = CALL_EXPR_ARG (exp, 2);
18677 op0 = expand_normal (arg0);
18678 op1 = expand_normal (arg1);
18679 op2 = expand_normal (arg2);
18680 tmode = insn_data[icode].operand[0].mode;
18681 mode0 = insn_data[icode].operand[1].mode;
18682 mode1 = insn_data[icode].operand[2].mode;
18683 mode2 = insn_data[icode].operand[3].mode;
18684
18685 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18686 op0 = copy_to_mode_reg (mode0, op0);
18687 if ((optimize && !register_operand (op1, mode1))
18688 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18689 op1 = copy_to_mode_reg (mode1, op1);
18690 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18691 {
18692 /* @@@ better error message */
18693 error ("mask must be an immediate");
18694 return gen_reg_rtx (tmode);
18695 }
18696 if (optimize || target == 0
18697 || GET_MODE (target) != tmode
18698 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18699 target = gen_reg_rtx (tmode);
18700 pat = GEN_FCN (icode) (target, op0, op1, op2);
18701 if (! pat)
18702 return 0;
18703 emit_insn (pat);
18704 return target;
18705
18706 case IX86_BUILTIN_PSHUFW:
18707 case IX86_BUILTIN_PSHUFD:
18708 case IX86_BUILTIN_PSHUFHW:
18709 case IX86_BUILTIN_PSHUFLW:
18710 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18711 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18712 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18713 : CODE_FOR_mmx_pshufw);
18714 arg0 = CALL_EXPR_ARG (exp, 0);
18715 arg1 = CALL_EXPR_ARG (exp, 1);
18716 op0 = expand_normal (arg0);
18717 op1 = expand_normal (arg1);
18718 tmode = insn_data[icode].operand[0].mode;
18719 mode1 = insn_data[icode].operand[1].mode;
18720 mode2 = insn_data[icode].operand[2].mode;
18721
18722 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18723 op0 = copy_to_mode_reg (mode1, op0);
18724 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18725 {
18726 /* @@@ better error message */
18727 error ("mask must be an immediate");
18728 return const0_rtx;
18729 }
18730 if (target == 0
18731 || GET_MODE (target) != tmode
18732 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18733 target = gen_reg_rtx (tmode);
18734 pat = GEN_FCN (icode) (target, op0, op1);
18735 if (! pat)
18736 return 0;
18737 emit_insn (pat);
18738 return target;
18739
18740 case IX86_BUILTIN_PSLLWI128:
18741 icode = CODE_FOR_ashlv8hi3;
18742 goto do_pshifti;
18743 case IX86_BUILTIN_PSLLDI128:
18744 icode = CODE_FOR_ashlv4si3;
18745 goto do_pshifti;
18746 case IX86_BUILTIN_PSLLQI128:
18747 icode = CODE_FOR_ashlv2di3;
18748 goto do_pshifti;
18749 case IX86_BUILTIN_PSRAWI128:
18750 icode = CODE_FOR_ashrv8hi3;
18751 goto do_pshifti;
18752 case IX86_BUILTIN_PSRADI128:
18753 icode = CODE_FOR_ashrv4si3;
18754 goto do_pshifti;
18755 case IX86_BUILTIN_PSRLWI128:
18756 icode = CODE_FOR_lshrv8hi3;
18757 goto do_pshifti;
18758 case IX86_BUILTIN_PSRLDI128:
18759 icode = CODE_FOR_lshrv4si3;
18760 goto do_pshifti;
18761 case IX86_BUILTIN_PSRLQI128:
18762 icode = CODE_FOR_lshrv2di3;
18763 goto do_pshifti;
18764 do_pshifti:
18765 arg0 = CALL_EXPR_ARG (exp, 0);
18766 arg1 = CALL_EXPR_ARG (exp, 1);
18767 op0 = expand_normal (arg0);
18768 op1 = expand_normal (arg1);
18769
18770 if (!CONST_INT_P (op1))
18771 {
18772 error ("shift must be an immediate");
18773 return const0_rtx;
18774 }
18775 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18776 op1 = GEN_INT (255);
18777
18778 tmode = insn_data[icode].operand[0].mode;
18779 mode1 = insn_data[icode].operand[1].mode;
18780 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18781 op0 = copy_to_reg (op0);
18782
18783 target = gen_reg_rtx (tmode);
18784 pat = GEN_FCN (icode) (target, op0, op1);
18785 if (!pat)
18786 return 0;
18787 emit_insn (pat);
18788 return target;
18789
18790 case IX86_BUILTIN_PSLLW128:
18791 icode = CODE_FOR_ashlv8hi3;
18792 goto do_pshift;
18793 case IX86_BUILTIN_PSLLD128:
18794 icode = CODE_FOR_ashlv4si3;
18795 goto do_pshift;
18796 case IX86_BUILTIN_PSLLQ128:
18797 icode = CODE_FOR_ashlv2di3;
18798 goto do_pshift;
18799 case IX86_BUILTIN_PSRAW128:
18800 icode = CODE_FOR_ashrv8hi3;
18801 goto do_pshift;
18802 case IX86_BUILTIN_PSRAD128:
18803 icode = CODE_FOR_ashrv4si3;
18804 goto do_pshift;
18805 case IX86_BUILTIN_PSRLW128:
18806 icode = CODE_FOR_lshrv8hi3;
18807 goto do_pshift;
18808 case IX86_BUILTIN_PSRLD128:
18809 icode = CODE_FOR_lshrv4si3;
18810 goto do_pshift;
18811 case IX86_BUILTIN_PSRLQ128:
18812 icode = CODE_FOR_lshrv2di3;
18813 goto do_pshift;
18814 do_pshift:
18815 arg0 = CALL_EXPR_ARG (exp, 0);
18816 arg1 = CALL_EXPR_ARG (exp, 1);
18817 op0 = expand_normal (arg0);
18818 op1 = expand_normal (arg1);
18819
18820 tmode = insn_data[icode].operand[0].mode;
18821 mode1 = insn_data[icode].operand[1].mode;
18822
18823 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18824 op0 = copy_to_reg (op0);
18825
18826 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18827 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18828 op1 = copy_to_reg (op1);
18829
18830 target = gen_reg_rtx (tmode);
18831 pat = GEN_FCN (icode) (target, op0, op1);
18832 if (!pat)
18833 return 0;
18834 emit_insn (pat);
18835 return target;
18836
18837 case IX86_BUILTIN_PSLLDQI128:
18838 case IX86_BUILTIN_PSRLDQI128:
18839 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18840 : CODE_FOR_sse2_lshrti3);
18841 arg0 = CALL_EXPR_ARG (exp, 0);
18842 arg1 = CALL_EXPR_ARG (exp, 1);
18843 op0 = expand_normal (arg0);
18844 op1 = expand_normal (arg1);
18845 tmode = insn_data[icode].operand[0].mode;
18846 mode1 = insn_data[icode].operand[1].mode;
18847 mode2 = insn_data[icode].operand[2].mode;
18848
18849 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18850 {
18851 op0 = copy_to_reg (op0);
18852 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18853 }
18854 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18855 {
18856 error ("shift must be an immediate");
18857 return const0_rtx;
18858 }
18859 target = gen_reg_rtx (V2DImode);
18860 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18861 op0, op1);
18862 if (! pat)
18863 return 0;
18864 emit_insn (pat);
18865 return target;
18866
18867 case IX86_BUILTIN_FEMMS:
18868 emit_insn (gen_mmx_femms ());
18869 return NULL_RTX;
18870
18871 case IX86_BUILTIN_PAVGUSB:
18872 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18873
18874 case IX86_BUILTIN_PF2ID:
18875 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18876
18877 case IX86_BUILTIN_PFACC:
18878 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18879
18880 case IX86_BUILTIN_PFADD:
18881 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18882
18883 case IX86_BUILTIN_PFCMPEQ:
18884 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18885
18886 case IX86_BUILTIN_PFCMPGE:
18887 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18888
18889 case IX86_BUILTIN_PFCMPGT:
18890 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18891
18892 case IX86_BUILTIN_PFMAX:
18893 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18894
18895 case IX86_BUILTIN_PFMIN:
18896 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18897
18898 case IX86_BUILTIN_PFMUL:
18899 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18900
18901 case IX86_BUILTIN_PFRCP:
18902 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18903
18904 case IX86_BUILTIN_PFRCPIT1:
18905 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18906
18907 case IX86_BUILTIN_PFRCPIT2:
18908 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18909
18910 case IX86_BUILTIN_PFRSQIT1:
18911 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18912
18913 case IX86_BUILTIN_PFRSQRT:
18914 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18915
18916 case IX86_BUILTIN_PFSUB:
18917 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18918
18919 case IX86_BUILTIN_PFSUBR:
18920 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18921
18922 case IX86_BUILTIN_PI2FD:
18923 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18924
18925 case IX86_BUILTIN_PMULHRW:
18926 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18927
18928 case IX86_BUILTIN_PF2IW:
18929 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18930
18931 case IX86_BUILTIN_PFNACC:
18932 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18933
18934 case IX86_BUILTIN_PFPNACC:
18935 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18936
18937 case IX86_BUILTIN_PI2FW:
18938 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18939
18940 case IX86_BUILTIN_PSWAPDSI:
18941 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18942
18943 case IX86_BUILTIN_PSWAPDSF:
18944 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18945
18946 case IX86_BUILTIN_SQRTSD:
18947 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18948 case IX86_BUILTIN_LOADUPD:
18949 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18950 case IX86_BUILTIN_STOREUPD:
18951 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18952
18953 case IX86_BUILTIN_MFENCE:
18954 emit_insn (gen_sse2_mfence ());
18955 return 0;
18956 case IX86_BUILTIN_LFENCE:
18957 emit_insn (gen_sse2_lfence ());
18958 return 0;
18959
18960 case IX86_BUILTIN_CLFLUSH:
18961 arg0 = CALL_EXPR_ARG (exp, 0);
18962 op0 = expand_normal (arg0);
18963 icode = CODE_FOR_sse2_clflush;
18964 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18965 op0 = copy_to_mode_reg (Pmode, op0);
18966
18967 emit_insn (gen_sse2_clflush (op0));
18968 return 0;
18969
18970 case IX86_BUILTIN_MOVNTPD:
18971 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18972 case IX86_BUILTIN_MOVNTDQ:
18973 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18974 case IX86_BUILTIN_MOVNTI:
18975 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18976
18977 case IX86_BUILTIN_LOADDQU:
18978 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18979 case IX86_BUILTIN_STOREDQU:
18980 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18981
18982 case IX86_BUILTIN_MONITOR:
18983 arg0 = CALL_EXPR_ARG (exp, 0);
18984 arg1 = CALL_EXPR_ARG (exp, 1);
18985 arg2 = CALL_EXPR_ARG (exp, 2);
18986 op0 = expand_normal (arg0);
18987 op1 = expand_normal (arg1);
18988 op2 = expand_normal (arg2);
18989 if (!REG_P (op0))
18990 op0 = copy_to_mode_reg (Pmode, op0);
18991 if (!REG_P (op1))
18992 op1 = copy_to_mode_reg (SImode, op1);
18993 if (!REG_P (op2))
18994 op2 = copy_to_mode_reg (SImode, op2);
18995 if (!TARGET_64BIT)
18996 emit_insn (gen_sse3_monitor (op0, op1, op2));
18997 else
18998 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18999 return 0;
19000
19001 case IX86_BUILTIN_MWAIT:
19002 arg0 = CALL_EXPR_ARG (exp, 0);
19003 arg1 = CALL_EXPR_ARG (exp, 1);
19004 op0 = expand_normal (arg0);
19005 op1 = expand_normal (arg1);
19006 if (!REG_P (op0))
19007 op0 = copy_to_mode_reg (SImode, op0);
19008 if (!REG_P (op1))
19009 op1 = copy_to_mode_reg (SImode, op1);
19010 emit_insn (gen_sse3_mwait (op0, op1));
19011 return 0;
19012
19013 case IX86_BUILTIN_LDDQU:
19014 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19015 target, 1);
19016
19017 case IX86_BUILTIN_PALIGNR:
19018 case IX86_BUILTIN_PALIGNR128:
19019 if (fcode == IX86_BUILTIN_PALIGNR)
19020 {
19021 icode = CODE_FOR_ssse3_palignrdi;
19022 mode = DImode;
19023 }
19024 else
19025 {
19026 icode = CODE_FOR_ssse3_palignrti;
19027 mode = V2DImode;
19028 }
19029 arg0 = CALL_EXPR_ARG (exp, 0);
19030 arg1 = CALL_EXPR_ARG (exp, 1);
19031 arg2 = CALL_EXPR_ARG (exp, 2);
19032 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
19033 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
19034 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
19035 tmode = insn_data[icode].operand[0].mode;
19036 mode1 = insn_data[icode].operand[1].mode;
19037 mode2 = insn_data[icode].operand[2].mode;
19038 mode3 = insn_data[icode].operand[3].mode;
19039
19040 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19041 {
19042 op0 = copy_to_reg (op0);
19043 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19044 }
19045 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19046 {
19047 op1 = copy_to_reg (op1);
19048 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19049 }
19050 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19051 {
19052 error ("shift must be an immediate");
19053 return const0_rtx;
19054 }
19055 target = gen_reg_rtx (mode);
19056 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19057 op0, op1, op2);
19058 if (! pat)
19059 return 0;
19060 emit_insn (pat);
19061 return target;
19062
19063 case IX86_BUILTIN_MOVNTDQA:
19064 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19065 target, 1);
19066
19067 case IX86_BUILTIN_MOVNTSD:
19068 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19069
19070 case IX86_BUILTIN_MOVNTSS:
19071 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19072
19073 case IX86_BUILTIN_INSERTQ:
19074 case IX86_BUILTIN_EXTRQ:
19075 icode = (fcode == IX86_BUILTIN_EXTRQ
19076 ? CODE_FOR_sse4a_extrq
19077 : CODE_FOR_sse4a_insertq);
19078 arg0 = CALL_EXPR_ARG (exp, 0);
19079 arg1 = CALL_EXPR_ARG (exp, 1);
19080 op0 = expand_normal (arg0);
19081 op1 = expand_normal (arg1);
19082 tmode = insn_data[icode].operand[0].mode;
19083 mode1 = insn_data[icode].operand[1].mode;
19084 mode2 = insn_data[icode].operand[2].mode;
19085 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19086 op0 = copy_to_mode_reg (mode1, op0);
19087 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19088 op1 = copy_to_mode_reg (mode2, op1);
19089 if (optimize || target == 0
19090 || GET_MODE (target) != tmode
19091 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19092 target = gen_reg_rtx (tmode);
19093 pat = GEN_FCN (icode) (target, op0, op1);
19094 if (! pat)
19095 return NULL_RTX;
19096 emit_insn (pat);
19097 return target;
19098
19099 case IX86_BUILTIN_EXTRQI:
19100 icode = CODE_FOR_sse4a_extrqi;
19101 arg0 = CALL_EXPR_ARG (exp, 0);
19102 arg1 = CALL_EXPR_ARG (exp, 1);
19103 arg2 = CALL_EXPR_ARG (exp, 2);
19104 op0 = expand_normal (arg0);
19105 op1 = expand_normal (arg1);
19106 op2 = expand_normal (arg2);
19107 tmode = insn_data[icode].operand[0].mode;
19108 mode1 = insn_data[icode].operand[1].mode;
19109 mode2 = insn_data[icode].operand[2].mode;
19110 mode3 = insn_data[icode].operand[3].mode;
19111 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19112 op0 = copy_to_mode_reg (mode1, op0);
19113 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19114 {
19115 error ("index mask must be an immediate");
19116 return gen_reg_rtx (tmode);
19117 }
19118 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19119 {
19120 error ("length mask must be an immediate");
19121 return gen_reg_rtx (tmode);
19122 }
19123 if (optimize || target == 0
19124 || GET_MODE (target) != tmode
19125 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19126 target = gen_reg_rtx (tmode);
19127 pat = GEN_FCN (icode) (target, op0, op1, op2);
19128 if (! pat)
19129 return NULL_RTX;
19130 emit_insn (pat);
19131 return target;
19132
19133 case IX86_BUILTIN_INSERTQI:
19134 icode = CODE_FOR_sse4a_insertqi;
19135 arg0 = CALL_EXPR_ARG (exp, 0);
19136 arg1 = CALL_EXPR_ARG (exp, 1);
19137 arg2 = CALL_EXPR_ARG (exp, 2);
19138 arg3 = CALL_EXPR_ARG (exp, 3);
19139 op0 = expand_normal (arg0);
19140 op1 = expand_normal (arg1);
19141 op2 = expand_normal (arg2);
19142 op3 = expand_normal (arg3);
19143 tmode = insn_data[icode].operand[0].mode;
19144 mode1 = insn_data[icode].operand[1].mode;
19145 mode2 = insn_data[icode].operand[2].mode;
19146 mode3 = insn_data[icode].operand[3].mode;
19147 mode4 = insn_data[icode].operand[4].mode;
19148
19149 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19150 op0 = copy_to_mode_reg (mode1, op0);
19151
19152 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19153 op1 = copy_to_mode_reg (mode2, op1);
19154
19155 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19156 {
19157 error ("index mask must be an immediate");
19158 return gen_reg_rtx (tmode);
19159 }
19160 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19161 {
19162 error ("length mask must be an immediate");
19163 return gen_reg_rtx (tmode);
19164 }
19165 if (optimize || target == 0
19166 || GET_MODE (target) != tmode
19167 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19168 target = gen_reg_rtx (tmode);
19169 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19170 if (! pat)
19171 return NULL_RTX;
19172 emit_insn (pat);
19173 return target;
19174
19175 case IX86_BUILTIN_VEC_INIT_V2SI:
19176 case IX86_BUILTIN_VEC_INIT_V4HI:
19177 case IX86_BUILTIN_VEC_INIT_V8QI:
19178 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19179
19180 case IX86_BUILTIN_VEC_EXT_V2DF:
19181 case IX86_BUILTIN_VEC_EXT_V2DI:
19182 case IX86_BUILTIN_VEC_EXT_V4SF:
19183 case IX86_BUILTIN_VEC_EXT_V4SI:
19184 case IX86_BUILTIN_VEC_EXT_V8HI:
19185 case IX86_BUILTIN_VEC_EXT_V2SI:
19186 case IX86_BUILTIN_VEC_EXT_V4HI:
19187 case IX86_BUILTIN_VEC_EXT_V16QI:
19188 return ix86_expand_vec_ext_builtin (exp, target);
19189
19190 case IX86_BUILTIN_VEC_SET_V2DI:
19191 case IX86_BUILTIN_VEC_SET_V4SF:
19192 case IX86_BUILTIN_VEC_SET_V4SI:
19193 case IX86_BUILTIN_VEC_SET_V8HI:
19194 case IX86_BUILTIN_VEC_SET_V4HI:
19195 case IX86_BUILTIN_VEC_SET_V16QI:
19196 return ix86_expand_vec_set_builtin (exp);
19197
19198 default:
19199 break;
19200 }
19201
19202 for (i = 0, d = bdesc_sse_3arg;
19203 i < ARRAY_SIZE (bdesc_sse_3arg);
19204 i++, d++)
19205 if (d->code == fcode)
19206 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19207 target);
19208
19209 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19210 if (d->code == fcode)
19211 {
19212 /* Compares are treated specially. */
19213 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19214 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19215 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19216 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19217 return ix86_expand_sse_compare (d, exp, target);
19218
19219 return ix86_expand_binop_builtin (d->icode, exp, target);
19220 }
19221
19222 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19223 if (d->code == fcode)
19224 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19225
19226 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19227 if (d->code == fcode)
19228 return ix86_expand_sse_comi (d, exp, target);
19229
19230 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19231 if (d->code == fcode)
19232 return ix86_expand_sse_ptest (d, exp, target);
19233
19234 gcc_unreachable ();
19235 }
19236
19237 /* Returns a function decl for a vectorized version of the builtin function
19238 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19239 if it is not available. */
19240
19241 static tree
19242 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
19243 tree type_in)
19244 {
19245 enum machine_mode in_mode, out_mode;
19246 int in_n, out_n;
19247
19248 if (TREE_CODE (type_out) != VECTOR_TYPE
19249 || TREE_CODE (type_in) != VECTOR_TYPE)
19250 return NULL_TREE;
19251
19252 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19253 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19254 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19255 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19256
19257 switch (fn)
19258 {
19259 case BUILT_IN_SQRT:
19260 if (out_mode == DFmode && out_n == 2
19261 && in_mode == DFmode && in_n == 2)
19262 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19263 return NULL_TREE;
19264
19265 case BUILT_IN_SQRTF:
19266 if (out_mode == SFmode && out_n == 4
19267 && in_mode == SFmode && in_n == 4)
19268 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19269 return NULL_TREE;
19270
19271 case BUILT_IN_LRINTF:
19272 if (out_mode == SImode && out_n == 4
19273 && in_mode == SFmode && in_n == 4)
19274 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19275 return NULL_TREE;
19276
19277 default:
19278 ;
19279 }
19280
19281 return NULL_TREE;
19282 }
19283
19284 /* Returns a decl of a function that implements conversion of the
19285 input vector of type TYPE, or NULL_TREE if it is not available. */
19286
19287 static tree
19288 ix86_builtin_conversion (enum tree_code code, tree type)
19289 {
19290 if (TREE_CODE (type) != VECTOR_TYPE)
19291 return NULL_TREE;
19292
19293 switch (code)
19294 {
19295 case FLOAT_EXPR:
19296 switch (TYPE_MODE (type))
19297 {
19298 case V4SImode:
19299 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
19300 default:
19301 return NULL_TREE;
19302 }
19303
19304 case FIX_TRUNC_EXPR:
19305 switch (TYPE_MODE (type))
19306 {
19307 case V4SFmode:
19308 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
19309 default:
19310 return NULL_TREE;
19311 }
19312 default:
19313 return NULL_TREE;
19314
19315 }
19316 }
19317
19318 /* Store OPERAND to the memory after reload is completed. This means
19319 that we can't easily use assign_stack_local. */
19320 rtx
19321 ix86_force_to_memory (enum machine_mode mode, rtx operand)
19322 {
19323 rtx result;
19324
19325 gcc_assert (reload_completed);
19326 if (TARGET_RED_ZONE)
19327 {
19328 result = gen_rtx_MEM (mode,
19329 gen_rtx_PLUS (Pmode,
19330 stack_pointer_rtx,
19331 GEN_INT (-RED_ZONE_SIZE)));
19332 emit_move_insn (result, operand);
19333 }
19334 else if (!TARGET_RED_ZONE && TARGET_64BIT)
19335 {
19336 switch (mode)
19337 {
19338 case HImode:
19339 case SImode:
19340 operand = gen_lowpart (DImode, operand);
19341 /* FALLTHRU */
19342 case DImode:
19343 emit_insn (
19344 gen_rtx_SET (VOIDmode,
19345 gen_rtx_MEM (DImode,
19346 gen_rtx_PRE_DEC (DImode,
19347 stack_pointer_rtx)),
19348 operand));
19349 break;
19350 default:
19351 gcc_unreachable ();
19352 }
19353 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19354 }
19355 else
19356 {
19357 switch (mode)
19358 {
19359 case DImode:
19360 {
19361 rtx operands[2];
19362 split_di (&operand, 1, operands, operands + 1);
19363 emit_insn (
19364 gen_rtx_SET (VOIDmode,
19365 gen_rtx_MEM (SImode,
19366 gen_rtx_PRE_DEC (Pmode,
19367 stack_pointer_rtx)),
19368 operands[1]));
19369 emit_insn (
19370 gen_rtx_SET (VOIDmode,
19371 gen_rtx_MEM (SImode,
19372 gen_rtx_PRE_DEC (Pmode,
19373 stack_pointer_rtx)),
19374 operands[0]));
19375 }
19376 break;
19377 case HImode:
19378 /* Store HImodes as SImodes. */
19379 operand = gen_lowpart (SImode, operand);
19380 /* FALLTHRU */
19381 case SImode:
19382 emit_insn (
19383 gen_rtx_SET (VOIDmode,
19384 gen_rtx_MEM (GET_MODE (operand),
19385 gen_rtx_PRE_DEC (SImode,
19386 stack_pointer_rtx)),
19387 operand));
19388 break;
19389 default:
19390 gcc_unreachable ();
19391 }
19392 result = gen_rtx_MEM (mode, stack_pointer_rtx);
19393 }
19394 return result;
19395 }
19396
19397 /* Free operand from the memory. */
19398 void
19399 ix86_free_from_memory (enum machine_mode mode)
19400 {
19401 if (!TARGET_RED_ZONE)
19402 {
19403 int size;
19404
19405 if (mode == DImode || TARGET_64BIT)
19406 size = 8;
19407 else
19408 size = 4;
19409 /* Use LEA to deallocate stack space. In peephole2 it will be converted
19410 to pop or add instruction if registers are available. */
19411 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
19412 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
19413 GEN_INT (size))));
19414 }
19415 }
19416
19417 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
19418 QImode must go into class Q_REGS.
19419 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
19420 movdf to do mem-to-mem moves through integer regs. */
19421 enum reg_class
19422 ix86_preferred_reload_class (rtx x, enum reg_class class)
19423 {
19424 enum machine_mode mode = GET_MODE (x);
19425
19426 /* We're only allowed to return a subclass of CLASS. Many of the
19427 following checks fail for NO_REGS, so eliminate that early. */
19428 if (class == NO_REGS)
19429 return NO_REGS;
19430
19431 /* All classes can load zeros. */
19432 if (x == CONST0_RTX (mode))
19433 return class;
19434
19435 /* Force constants into memory if we are loading a (nonzero) constant into
19436 an MMX or SSE register. This is because there are no MMX/SSE instructions
19437 to load from a constant. */
19438 if (CONSTANT_P (x)
19439 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
19440 return NO_REGS;
19441
19442 /* Prefer SSE regs only, if we can use them for math. */
19443 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
19444 return SSE_CLASS_P (class) ? class : NO_REGS;
19445
19446 /* Floating-point constants need more complex checks. */
19447 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
19448 {
19449 /* General regs can load everything. */
19450 if (reg_class_subset_p (class, GENERAL_REGS))
19451 return class;
19452
19453 /* Floats can load 0 and 1 plus some others. Note that we eliminated
19454 zero above. We only want to wind up preferring 80387 registers if
19455 we plan on doing computation with them. */
19456 if (TARGET_80387
19457 && standard_80387_constant_p (x))
19458 {
19459 /* Limit class to non-sse. */
19460 if (class == FLOAT_SSE_REGS)
19461 return FLOAT_REGS;
19462 if (class == FP_TOP_SSE_REGS)
19463 return FP_TOP_REG;
19464 if (class == FP_SECOND_SSE_REGS)
19465 return FP_SECOND_REG;
19466 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
19467 return class;
19468 }
19469
19470 return NO_REGS;
19471 }
19472
19473 /* Generally when we see PLUS here, it's the function invariant
19474 (plus soft-fp const_int). Which can only be computed into general
19475 regs. */
19476 if (GET_CODE (x) == PLUS)
19477 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
19478
19479 /* QImode constants are easy to load, but non-constant QImode data
19480 must go into Q_REGS. */
19481 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
19482 {
19483 if (reg_class_subset_p (class, Q_REGS))
19484 return class;
19485 if (reg_class_subset_p (Q_REGS, class))
19486 return Q_REGS;
19487 return NO_REGS;
19488 }
19489
19490 return class;
19491 }
19492
19493 /* Discourage putting floating-point values in SSE registers unless
19494 SSE math is being used, and likewise for the 387 registers. */
19495 enum reg_class
19496 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
19497 {
19498 enum machine_mode mode = GET_MODE (x);
19499
19500 /* Restrict the output reload class to the register bank that we are doing
19501 math on. If we would like not to return a subset of CLASS, reject this
19502 alternative: if reload cannot do this, it will still use its choice. */
19503 mode = GET_MODE (x);
19504 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
19505 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
19506
19507 if (X87_FLOAT_MODE_P (mode))
19508 {
19509 if (class == FP_TOP_SSE_REGS)
19510 return FP_TOP_REG;
19511 else if (class == FP_SECOND_SSE_REGS)
19512 return FP_SECOND_REG;
19513 else
19514 return FLOAT_CLASS_P (class) ? class : NO_REGS;
19515 }
19516
19517 return class;
19518 }
19519
19520 /* If we are copying between general and FP registers, we need a memory
19521 location. The same is true for SSE and MMX registers.
19522
19523 The macro can't work reliably when one of the CLASSES is class containing
19524 registers from multiple units (SSE, MMX, integer). We avoid this by never
19525 combining those units in single alternative in the machine description.
19526 Ensure that this constraint holds to avoid unexpected surprises.
19527
19528 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
19529 enforce these sanity checks. */
19530
19531 int
19532 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
19533 enum machine_mode mode, int strict)
19534 {
19535 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
19536 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
19537 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
19538 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
19539 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
19540 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
19541 {
19542 gcc_assert (!strict);
19543 return true;
19544 }
19545
19546 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
19547 return true;
19548
19549 /* ??? This is a lie. We do have moves between mmx/general, and for
19550 mmx/sse2. But by saying we need secondary memory we discourage the
19551 register allocator from using the mmx registers unless needed. */
19552 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
19553 return true;
19554
19555 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19556 {
19557 /* SSE1 doesn't have any direct moves from other classes. */
19558 if (!TARGET_SSE2)
19559 return true;
19560
19561 /* If the target says that inter-unit moves are more expensive
19562 than moving through memory, then don't generate them. */
19563 if (!TARGET_INTER_UNIT_MOVES)
19564 return true;
19565
19566 /* Between SSE and general, we have moves no larger than word size. */
19567 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19568 return true;
19569 }
19570
19571 return false;
19572 }
19573
19574 /* Return true if the registers in CLASS cannot represent the change from
19575 modes FROM to TO. */
19576
19577 bool
19578 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
19579 enum reg_class class)
19580 {
19581 if (from == to)
19582 return false;
19583
19584 /* x87 registers can't do subreg at all, as all values are reformatted
19585 to extended precision. */
19586 if (MAYBE_FLOAT_CLASS_P (class))
19587 return true;
19588
19589 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
19590 {
19591 /* Vector registers do not support QI or HImode loads. If we don't
19592 disallow a change to these modes, reload will assume it's ok to
19593 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19594 the vec_dupv4hi pattern. */
19595 if (GET_MODE_SIZE (from) < 4)
19596 return true;
19597
19598 /* Vector registers do not support subreg with nonzero offsets, which
19599 are otherwise valid for integer registers. Since we can't see
19600 whether we have a nonzero offset from here, prohibit all
19601 nonparadoxical subregs changing size. */
19602 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
19603 return true;
19604 }
19605
19606 return false;
19607 }
19608
19609 /* Return the cost of moving data from a register in class CLASS1 to
19610 one in class CLASS2.
19611
19612 It is not required that the cost always equal 2 when FROM is the same as TO;
19613 on some machines it is expensive to move between registers if they are not
19614 general registers. */
19615
19616 int
19617 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
19618 enum reg_class class2)
19619 {
19620 /* In case we require secondary memory, compute cost of the store followed
19621 by load. In order to avoid bad register allocation choices, we need
19622 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19623
19624 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19625 {
19626 int cost = 1;
19627
19628 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19629 MEMORY_MOVE_COST (mode, class1, 1));
19630 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19631 MEMORY_MOVE_COST (mode, class2, 1));
19632
19633 /* In case of copying from general_purpose_register we may emit multiple
19634 stores followed by single load causing memory size mismatch stall.
19635 Count this as arbitrarily high cost of 20. */
19636 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19637 cost += 20;
19638
19639 /* In the case of FP/MMX moves, the registers actually overlap, and we
19640 have to switch modes in order to treat them differently. */
19641 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19642 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19643 cost += 20;
19644
19645 return cost;
19646 }
19647
19648 /* Moves between SSE/MMX and integer unit are expensive. */
19649 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19650 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19651 return ix86_cost->mmxsse_to_integer;
19652 if (MAYBE_FLOAT_CLASS_P (class1))
19653 return ix86_cost->fp_move;
19654 if (MAYBE_SSE_CLASS_P (class1))
19655 return ix86_cost->sse_move;
19656 if (MAYBE_MMX_CLASS_P (class1))
19657 return ix86_cost->mmx_move;
19658 return 2;
19659 }
19660
19661 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19662
19663 bool
19664 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19665 {
19666 /* Flags and only flags can only hold CCmode values. */
19667 if (CC_REGNO_P (regno))
19668 return GET_MODE_CLASS (mode) == MODE_CC;
19669 if (GET_MODE_CLASS (mode) == MODE_CC
19670 || GET_MODE_CLASS (mode) == MODE_RANDOM
19671 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19672 return 0;
19673 if (FP_REGNO_P (regno))
19674 return VALID_FP_MODE_P (mode);
19675 if (SSE_REGNO_P (regno))
19676 {
19677 /* We implement the move patterns for all vector modes into and
19678 out of SSE registers, even when no operation instructions
19679 are available. */
19680 return (VALID_SSE_REG_MODE (mode)
19681 || VALID_SSE2_REG_MODE (mode)
19682 || VALID_MMX_REG_MODE (mode)
19683 || VALID_MMX_REG_MODE_3DNOW (mode));
19684 }
19685 if (MMX_REGNO_P (regno))
19686 {
19687 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19688 so if the register is available at all, then we can move data of
19689 the given mode into or out of it. */
19690 return (VALID_MMX_REG_MODE (mode)
19691 || VALID_MMX_REG_MODE_3DNOW (mode));
19692 }
19693
19694 if (mode == QImode)
19695 {
19696 /* Take care for QImode values - they can be in non-QI regs,
19697 but then they do cause partial register stalls. */
19698 if (regno < 4 || TARGET_64BIT)
19699 return 1;
19700 if (!TARGET_PARTIAL_REG_STALL)
19701 return 1;
19702 return reload_in_progress || reload_completed;
19703 }
19704 /* We handle both integer and floats in the general purpose registers. */
19705 else if (VALID_INT_MODE_P (mode))
19706 return 1;
19707 else if (VALID_FP_MODE_P (mode))
19708 return 1;
19709 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19710 on to use that value in smaller contexts, this can easily force a
19711 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19712 supporting DImode, allow it. */
19713 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19714 return 1;
19715
19716 return 0;
19717 }
19718
19719 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19720 tieable integer mode. */
19721
19722 static bool
19723 ix86_tieable_integer_mode_p (enum machine_mode mode)
19724 {
19725 switch (mode)
19726 {
19727 case HImode:
19728 case SImode:
19729 return true;
19730
19731 case QImode:
19732 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19733
19734 case DImode:
19735 return TARGET_64BIT;
19736
19737 default:
19738 return false;
19739 }
19740 }
19741
19742 /* Return true if MODE1 is accessible in a register that can hold MODE2
19743 without copying. That is, all register classes that can hold MODE2
19744 can also hold MODE1. */
19745
19746 bool
19747 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19748 {
19749 if (mode1 == mode2)
19750 return true;
19751
19752 if (ix86_tieable_integer_mode_p (mode1)
19753 && ix86_tieable_integer_mode_p (mode2))
19754 return true;
19755
19756 /* MODE2 being XFmode implies fp stack or general regs, which means we
19757 can tie any smaller floating point modes to it. Note that we do not
19758 tie this with TFmode. */
19759 if (mode2 == XFmode)
19760 return mode1 == SFmode || mode1 == DFmode;
19761
19762 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19763 that we can tie it with SFmode. */
19764 if (mode2 == DFmode)
19765 return mode1 == SFmode;
19766
19767 /* If MODE2 is only appropriate for an SSE register, then tie with
19768 any other mode acceptable to SSE registers. */
19769 if (GET_MODE_SIZE (mode2) == 16
19770 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19771 return (GET_MODE_SIZE (mode1) == 16
19772 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19773
19774 /* If MODE2 is appropriate for an MMX register, then tie
19775 with any other mode acceptable to MMX registers. */
19776 if (GET_MODE_SIZE (mode2) == 8
19777 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19778 return (GET_MODE_SIZE (mode1) == 8
19779 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19780
19781 return false;
19782 }
19783
19784 /* Return the cost of moving data of mode M between a
19785 register and memory. A value of 2 is the default; this cost is
19786 relative to those in `REGISTER_MOVE_COST'.
19787
19788 If moving between registers and memory is more expensive than
19789 between two registers, you should define this macro to express the
19790 relative cost.
19791
19792 Model also increased moving costs of QImode registers in non
19793 Q_REGS classes.
19794 */
19795 int
19796 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19797 {
19798 if (FLOAT_CLASS_P (class))
19799 {
19800 int index;
19801 switch (mode)
19802 {
19803 case SFmode:
19804 index = 0;
19805 break;
19806 case DFmode:
19807 index = 1;
19808 break;
19809 case XFmode:
19810 index = 2;
19811 break;
19812 default:
19813 return 100;
19814 }
19815 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19816 }
19817 if (SSE_CLASS_P (class))
19818 {
19819 int index;
19820 switch (GET_MODE_SIZE (mode))
19821 {
19822 case 4:
19823 index = 0;
19824 break;
19825 case 8:
19826 index = 1;
19827 break;
19828 case 16:
19829 index = 2;
19830 break;
19831 default:
19832 return 100;
19833 }
19834 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19835 }
19836 if (MMX_CLASS_P (class))
19837 {
19838 int index;
19839 switch (GET_MODE_SIZE (mode))
19840 {
19841 case 4:
19842 index = 0;
19843 break;
19844 case 8:
19845 index = 1;
19846 break;
19847 default:
19848 return 100;
19849 }
19850 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19851 }
19852 switch (GET_MODE_SIZE (mode))
19853 {
19854 case 1:
19855 if (in)
19856 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19857 : ix86_cost->movzbl_load);
19858 else
19859 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19860 : ix86_cost->int_store[0] + 4);
19861 break;
19862 case 2:
19863 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19864 default:
19865 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19866 if (mode == TFmode)
19867 mode = XFmode;
19868 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19869 * (((int) GET_MODE_SIZE (mode)
19870 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19871 }
19872 }
19873
19874 /* Compute a (partial) cost for rtx X. Return true if the complete
19875 cost has been computed, and false if subexpressions should be
19876 scanned. In either case, *TOTAL contains the cost result. */
19877
19878 static bool
19879 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19880 {
19881 enum machine_mode mode = GET_MODE (x);
19882
19883 switch (code)
19884 {
19885 case CONST_INT:
19886 case CONST:
19887 case LABEL_REF:
19888 case SYMBOL_REF:
19889 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19890 *total = 3;
19891 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19892 *total = 2;
19893 else if (flag_pic && SYMBOLIC_CONST (x)
19894 && (!TARGET_64BIT
19895 || (!GET_CODE (x) != LABEL_REF
19896 && (GET_CODE (x) != SYMBOL_REF
19897 || !SYMBOL_REF_LOCAL_P (x)))))
19898 *total = 1;
19899 else
19900 *total = 0;
19901 return true;
19902
19903 case CONST_DOUBLE:
19904 if (mode == VOIDmode)
19905 *total = 0;
19906 else
19907 switch (standard_80387_constant_p (x))
19908 {
19909 case 1: /* 0.0 */
19910 *total = 1;
19911 break;
19912 default: /* Other constants */
19913 *total = 2;
19914 break;
19915 case 0:
19916 case -1:
19917 /* Start with (MEM (SYMBOL_REF)), since that's where
19918 it'll probably end up. Add a penalty for size. */
19919 *total = (COSTS_N_INSNS (1)
19920 + (flag_pic != 0 && !TARGET_64BIT)
19921 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19922 break;
19923 }
19924 return true;
19925
19926 case ZERO_EXTEND:
19927 /* The zero extensions is often completely free on x86_64, so make
19928 it as cheap as possible. */
19929 if (TARGET_64BIT && mode == DImode
19930 && GET_MODE (XEXP (x, 0)) == SImode)
19931 *total = 1;
19932 else if (TARGET_ZERO_EXTEND_WITH_AND)
19933 *total = ix86_cost->add;
19934 else
19935 *total = ix86_cost->movzx;
19936 return false;
19937
19938 case SIGN_EXTEND:
19939 *total = ix86_cost->movsx;
19940 return false;
19941
19942 case ASHIFT:
19943 if (CONST_INT_P (XEXP (x, 1))
19944 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19945 {
19946 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19947 if (value == 1)
19948 {
19949 *total = ix86_cost->add;
19950 return false;
19951 }
19952 if ((value == 2 || value == 3)
19953 && ix86_cost->lea <= ix86_cost->shift_const)
19954 {
19955 *total = ix86_cost->lea;
19956 return false;
19957 }
19958 }
19959 /* FALLTHRU */
19960
19961 case ROTATE:
19962 case ASHIFTRT:
19963 case LSHIFTRT:
19964 case ROTATERT:
19965 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19966 {
19967 if (CONST_INT_P (XEXP (x, 1)))
19968 {
19969 if (INTVAL (XEXP (x, 1)) > 32)
19970 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19971 else
19972 *total = ix86_cost->shift_const * 2;
19973 }
19974 else
19975 {
19976 if (GET_CODE (XEXP (x, 1)) == AND)
19977 *total = ix86_cost->shift_var * 2;
19978 else
19979 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19980 }
19981 }
19982 else
19983 {
19984 if (CONST_INT_P (XEXP (x, 1)))
19985 *total = ix86_cost->shift_const;
19986 else
19987 *total = ix86_cost->shift_var;
19988 }
19989 return false;
19990
19991 case MULT:
19992 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19993 {
19994 /* ??? SSE scalar cost should be used here. */
19995 *total = ix86_cost->fmul;
19996 return false;
19997 }
19998 else if (X87_FLOAT_MODE_P (mode))
19999 {
20000 *total = ix86_cost->fmul;
20001 return false;
20002 }
20003 else if (FLOAT_MODE_P (mode))
20004 {
20005 /* ??? SSE vector cost should be used here. */
20006 *total = ix86_cost->fmul;
20007 return false;
20008 }
20009 else
20010 {
20011 rtx op0 = XEXP (x, 0);
20012 rtx op1 = XEXP (x, 1);
20013 int nbits;
20014 if (CONST_INT_P (XEXP (x, 1)))
20015 {
20016 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20017 for (nbits = 0; value != 0; value &= value - 1)
20018 nbits++;
20019 }
20020 else
20021 /* This is arbitrary. */
20022 nbits = 7;
20023
20024 /* Compute costs correctly for widening multiplication. */
20025 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20026 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20027 == GET_MODE_SIZE (mode))
20028 {
20029 int is_mulwiden = 0;
20030 enum machine_mode inner_mode = GET_MODE (op0);
20031
20032 if (GET_CODE (op0) == GET_CODE (op1))
20033 is_mulwiden = 1, op1 = XEXP (op1, 0);
20034 else if (CONST_INT_P (op1))
20035 {
20036 if (GET_CODE (op0) == SIGN_EXTEND)
20037 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20038 == INTVAL (op1);
20039 else
20040 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20041 }
20042
20043 if (is_mulwiden)
20044 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20045 }
20046
20047 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20048 + nbits * ix86_cost->mult_bit
20049 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20050
20051 return true;
20052 }
20053
20054 case DIV:
20055 case UDIV:
20056 case MOD:
20057 case UMOD:
20058 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20059 /* ??? SSE cost should be used here. */
20060 *total = ix86_cost->fdiv;
20061 else if (X87_FLOAT_MODE_P (mode))
20062 *total = ix86_cost->fdiv;
20063 else if (FLOAT_MODE_P (mode))
20064 /* ??? SSE vector cost should be used here. */
20065 *total = ix86_cost->fdiv;
20066 else
20067 *total = ix86_cost->divide[MODE_INDEX (mode)];
20068 return false;
20069
20070 case PLUS:
20071 if (GET_MODE_CLASS (mode) == MODE_INT
20072 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20073 {
20074 if (GET_CODE (XEXP (x, 0)) == PLUS
20075 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20076 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20077 && CONSTANT_P (XEXP (x, 1)))
20078 {
20079 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20080 if (val == 2 || val == 4 || val == 8)
20081 {
20082 *total = ix86_cost->lea;
20083 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20084 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20085 outer_code);
20086 *total += rtx_cost (XEXP (x, 1), outer_code);
20087 return true;
20088 }
20089 }
20090 else if (GET_CODE (XEXP (x, 0)) == MULT
20091 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20092 {
20093 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20094 if (val == 2 || val == 4 || val == 8)
20095 {
20096 *total = ix86_cost->lea;
20097 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20098 *total += rtx_cost (XEXP (x, 1), outer_code);
20099 return true;
20100 }
20101 }
20102 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20103 {
20104 *total = ix86_cost->lea;
20105 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20106 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20107 *total += rtx_cost (XEXP (x, 1), outer_code);
20108 return true;
20109 }
20110 }
20111 /* FALLTHRU */
20112
20113 case MINUS:
20114 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20115 {
20116 /* ??? SSE cost should be used here. */
20117 *total = ix86_cost->fadd;
20118 return false;
20119 }
20120 else if (X87_FLOAT_MODE_P (mode))
20121 {
20122 *total = ix86_cost->fadd;
20123 return false;
20124 }
20125 else if (FLOAT_MODE_P (mode))
20126 {
20127 /* ??? SSE vector cost should be used here. */
20128 *total = ix86_cost->fadd;
20129 return false;
20130 }
20131 /* FALLTHRU */
20132
20133 case AND:
20134 case IOR:
20135 case XOR:
20136 if (!TARGET_64BIT && mode == DImode)
20137 {
20138 *total = (ix86_cost->add * 2
20139 + (rtx_cost (XEXP (x, 0), outer_code)
20140 << (GET_MODE (XEXP (x, 0)) != DImode))
20141 + (rtx_cost (XEXP (x, 1), outer_code)
20142 << (GET_MODE (XEXP (x, 1)) != DImode)));
20143 return true;
20144 }
20145 /* FALLTHRU */
20146
20147 case NEG:
20148 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20149 {
20150 /* ??? SSE cost should be used here. */
20151 *total = ix86_cost->fchs;
20152 return false;
20153 }
20154 else if (X87_FLOAT_MODE_P (mode))
20155 {
20156 *total = ix86_cost->fchs;
20157 return false;
20158 }
20159 else if (FLOAT_MODE_P (mode))
20160 {
20161 /* ??? SSE vector cost should be used here. */
20162 *total = ix86_cost->fchs;
20163 return false;
20164 }
20165 /* FALLTHRU */
20166
20167 case NOT:
20168 if (!TARGET_64BIT && mode == DImode)
20169 *total = ix86_cost->add * 2;
20170 else
20171 *total = ix86_cost->add;
20172 return false;
20173
20174 case COMPARE:
20175 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
20176 && XEXP (XEXP (x, 0), 1) == const1_rtx
20177 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
20178 && XEXP (x, 1) == const0_rtx)
20179 {
20180 /* This kind of construct is implemented using test[bwl].
20181 Treat it as if we had an AND. */
20182 *total = (ix86_cost->add
20183 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
20184 + rtx_cost (const1_rtx, outer_code));
20185 return true;
20186 }
20187 return false;
20188
20189 case FLOAT_EXTEND:
20190 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
20191 *total = 0;
20192 return false;
20193
20194 case ABS:
20195 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20196 /* ??? SSE cost should be used here. */
20197 *total = ix86_cost->fabs;
20198 else if (X87_FLOAT_MODE_P (mode))
20199 *total = ix86_cost->fabs;
20200 else if (FLOAT_MODE_P (mode))
20201 /* ??? SSE vector cost should be used here. */
20202 *total = ix86_cost->fabs;
20203 return false;
20204
20205 case SQRT:
20206 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20207 /* ??? SSE cost should be used here. */
20208 *total = ix86_cost->fsqrt;
20209 else if (X87_FLOAT_MODE_P (mode))
20210 *total = ix86_cost->fsqrt;
20211 else if (FLOAT_MODE_P (mode))
20212 /* ??? SSE vector cost should be used here. */
20213 *total = ix86_cost->fsqrt;
20214 return false;
20215
20216 case UNSPEC:
20217 if (XINT (x, 1) == UNSPEC_TP)
20218 *total = 0;
20219 return false;
20220
20221 default:
20222 return false;
20223 }
20224 }
20225
20226 #if TARGET_MACHO
20227
20228 static int current_machopic_label_num;
20229
20230 /* Given a symbol name and its associated stub, write out the
20231 definition of the stub. */
20232
20233 void
20234 machopic_output_stub (FILE *file, const char *symb, const char *stub)
20235 {
20236 unsigned int length;
20237 char *binder_name, *symbol_name, lazy_ptr_name[32];
20238 int label = ++current_machopic_label_num;
20239
20240 /* For 64-bit we shouldn't get here. */
20241 gcc_assert (!TARGET_64BIT);
20242
20243 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
20244 symb = (*targetm.strip_name_encoding) (symb);
20245
20246 length = strlen (stub);
20247 binder_name = alloca (length + 32);
20248 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
20249
20250 length = strlen (symb);
20251 symbol_name = alloca (length + 32);
20252 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
20253
20254 sprintf (lazy_ptr_name, "L%d$lz", label);
20255
20256 if (MACHOPIC_PURE)
20257 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
20258 else
20259 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
20260
20261 fprintf (file, "%s:\n", stub);
20262 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20263
20264 if (MACHOPIC_PURE)
20265 {
20266 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
20267 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
20268 fprintf (file, "\tjmp\t*%%edx\n");
20269 }
20270 else
20271 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
20272
20273 fprintf (file, "%s:\n", binder_name);
20274
20275 if (MACHOPIC_PURE)
20276 {
20277 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
20278 fprintf (file, "\tpushl\t%%eax\n");
20279 }
20280 else
20281 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
20282
20283 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
20284
20285 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
20286 fprintf (file, "%s:\n", lazy_ptr_name);
20287 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
20288 fprintf (file, "\t.long %s\n", binder_name);
20289 }
20290
20291 void
20292 darwin_x86_file_end (void)
20293 {
20294 darwin_file_end ();
20295 ix86_file_end ();
20296 }
20297 #endif /* TARGET_MACHO */
20298
20299 /* Order the registers for register allocator. */
20300
20301 void
20302 x86_order_regs_for_local_alloc (void)
20303 {
20304 int pos = 0;
20305 int i;
20306
20307 /* First allocate the local general purpose registers. */
20308 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20309 if (GENERAL_REGNO_P (i) && call_used_regs[i])
20310 reg_alloc_order [pos++] = i;
20311
20312 /* Global general purpose registers. */
20313 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
20314 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
20315 reg_alloc_order [pos++] = i;
20316
20317 /* x87 registers come first in case we are doing FP math
20318 using them. */
20319 if (!TARGET_SSE_MATH)
20320 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20321 reg_alloc_order [pos++] = i;
20322
20323 /* SSE registers. */
20324 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
20325 reg_alloc_order [pos++] = i;
20326 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
20327 reg_alloc_order [pos++] = i;
20328
20329 /* x87 registers. */
20330 if (TARGET_SSE_MATH)
20331 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
20332 reg_alloc_order [pos++] = i;
20333
20334 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
20335 reg_alloc_order [pos++] = i;
20336
20337 /* Initialize the rest of array as we do not allocate some registers
20338 at all. */
20339 while (pos < FIRST_PSEUDO_REGISTER)
20340 reg_alloc_order [pos++] = 0;
20341 }
20342
20343 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
20344 struct attribute_spec.handler. */
20345 static tree
20346 ix86_handle_struct_attribute (tree *node, tree name,
20347 tree args ATTRIBUTE_UNUSED,
20348 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
20349 {
20350 tree *type = NULL;
20351 if (DECL_P (*node))
20352 {
20353 if (TREE_CODE (*node) == TYPE_DECL)
20354 type = &TREE_TYPE (*node);
20355 }
20356 else
20357 type = node;
20358
20359 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
20360 || TREE_CODE (*type) == UNION_TYPE)))
20361 {
20362 warning (OPT_Wattributes, "%qs attribute ignored",
20363 IDENTIFIER_POINTER (name));
20364 *no_add_attrs = true;
20365 }
20366
20367 else if ((is_attribute_p ("ms_struct", name)
20368 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
20369 || ((is_attribute_p ("gcc_struct", name)
20370 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
20371 {
20372 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
20373 IDENTIFIER_POINTER (name));
20374 *no_add_attrs = true;
20375 }
20376
20377 return NULL_TREE;
20378 }
20379
20380 static bool
20381 ix86_ms_bitfield_layout_p (tree record_type)
20382 {
20383 return (TARGET_MS_BITFIELD_LAYOUT &&
20384 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
20385 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
20386 }
20387
20388 /* Returns an expression indicating where the this parameter is
20389 located on entry to the FUNCTION. */
20390
20391 static rtx
20392 x86_this_parameter (tree function)
20393 {
20394 tree type = TREE_TYPE (function);
20395 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
20396
20397 if (TARGET_64BIT)
20398 {
20399 const int *parm_regs;
20400
20401 if (TARGET_64BIT_MS_ABI)
20402 parm_regs = x86_64_ms_abi_int_parameter_registers;
20403 else
20404 parm_regs = x86_64_int_parameter_registers;
20405 return gen_rtx_REG (DImode, parm_regs[aggr]);
20406 }
20407
20408 if (ix86_function_regparm (type, function) > 0
20409 && !type_has_variadic_args_p (type))
20410 {
20411 int regno = 0;
20412 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
20413 regno = 2;
20414 return gen_rtx_REG (SImode, regno);
20415 }
20416
20417 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
20418 }
20419
20420 /* Determine whether x86_output_mi_thunk can succeed. */
20421
20422 static bool
20423 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
20424 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
20425 HOST_WIDE_INT vcall_offset, tree function)
20426 {
20427 /* 64-bit can handle anything. */
20428 if (TARGET_64BIT)
20429 return true;
20430
20431 /* For 32-bit, everything's fine if we have one free register. */
20432 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
20433 return true;
20434
20435 /* Need a free register for vcall_offset. */
20436 if (vcall_offset)
20437 return false;
20438
20439 /* Need a free register for GOT references. */
20440 if (flag_pic && !(*targetm.binds_local_p) (function))
20441 return false;
20442
20443 /* Otherwise ok. */
20444 return true;
20445 }
20446
20447 /* Output the assembler code for a thunk function. THUNK_DECL is the
20448 declaration for the thunk function itself, FUNCTION is the decl for
20449 the target function. DELTA is an immediate constant offset to be
20450 added to THIS. If VCALL_OFFSET is nonzero, the word at
20451 *(*this + vcall_offset) should be added to THIS. */
20452
20453 static void
20454 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
20455 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
20456 HOST_WIDE_INT vcall_offset, tree function)
20457 {
20458 rtx xops[3];
20459 rtx this = x86_this_parameter (function);
20460 rtx this_reg, tmp;
20461
20462 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
20463 pull it in now and let DELTA benefit. */
20464 if (REG_P (this))
20465 this_reg = this;
20466 else if (vcall_offset)
20467 {
20468 /* Put the this parameter into %eax. */
20469 xops[0] = this;
20470 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
20471 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20472 }
20473 else
20474 this_reg = NULL_RTX;
20475
20476 /* Adjust the this parameter by a fixed constant. */
20477 if (delta)
20478 {
20479 xops[0] = GEN_INT (delta);
20480 xops[1] = this_reg ? this_reg : this;
20481 if (TARGET_64BIT)
20482 {
20483 if (!x86_64_general_operand (xops[0], DImode))
20484 {
20485 tmp = gen_rtx_REG (DImode, R10_REG);
20486 xops[1] = tmp;
20487 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
20488 xops[0] = tmp;
20489 xops[1] = this;
20490 }
20491 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20492 }
20493 else
20494 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20495 }
20496
20497 /* Adjust the this parameter by a value stored in the vtable. */
20498 if (vcall_offset)
20499 {
20500 if (TARGET_64BIT)
20501 tmp = gen_rtx_REG (DImode, R10_REG);
20502 else
20503 {
20504 int tmp_regno = 2 /* ECX */;
20505 if (lookup_attribute ("fastcall",
20506 TYPE_ATTRIBUTES (TREE_TYPE (function))))
20507 tmp_regno = 0 /* EAX */;
20508 tmp = gen_rtx_REG (SImode, tmp_regno);
20509 }
20510
20511 xops[0] = gen_rtx_MEM (Pmode, this_reg);
20512 xops[1] = tmp;
20513 if (TARGET_64BIT)
20514 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20515 else
20516 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20517
20518 /* Adjust the this parameter. */
20519 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
20520 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
20521 {
20522 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
20523 xops[0] = GEN_INT (vcall_offset);
20524 xops[1] = tmp2;
20525 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
20526 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
20527 }
20528 xops[1] = this_reg;
20529 if (TARGET_64BIT)
20530 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
20531 else
20532 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
20533 }
20534
20535 /* If necessary, drop THIS back to its stack slot. */
20536 if (this_reg && this_reg != this)
20537 {
20538 xops[0] = this_reg;
20539 xops[1] = this;
20540 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
20541 }
20542
20543 xops[0] = XEXP (DECL_RTL (function), 0);
20544 if (TARGET_64BIT)
20545 {
20546 if (!flag_pic || (*targetm.binds_local_p) (function))
20547 output_asm_insn ("jmp\t%P0", xops);
20548 /* All thunks should be in the same object as their target,
20549 and thus binds_local_p should be true. */
20550 else if (TARGET_64BIT_MS_ABI)
20551 gcc_unreachable ();
20552 else
20553 {
20554 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
20555 tmp = gen_rtx_CONST (Pmode, tmp);
20556 tmp = gen_rtx_MEM (QImode, tmp);
20557 xops[0] = tmp;
20558 output_asm_insn ("jmp\t%A0", xops);
20559 }
20560 }
20561 else
20562 {
20563 if (!flag_pic || (*targetm.binds_local_p) (function))
20564 output_asm_insn ("jmp\t%P0", xops);
20565 else
20566 #if TARGET_MACHO
20567 if (TARGET_MACHO)
20568 {
20569 rtx sym_ref = XEXP (DECL_RTL (function), 0);
20570 tmp = (gen_rtx_SYMBOL_REF
20571 (Pmode,
20572 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
20573 tmp = gen_rtx_MEM (QImode, tmp);
20574 xops[0] = tmp;
20575 output_asm_insn ("jmp\t%0", xops);
20576 }
20577 else
20578 #endif /* TARGET_MACHO */
20579 {
20580 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20581 output_set_got (tmp, NULL_RTX);
20582
20583 xops[1] = tmp;
20584 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
20585 output_asm_insn ("jmp\t{*}%1", xops);
20586 }
20587 }
20588 }
20589
20590 static void
20591 x86_file_start (void)
20592 {
20593 default_file_start ();
20594 #if TARGET_MACHO
20595 darwin_file_start ();
20596 #endif
20597 if (X86_FILE_START_VERSION_DIRECTIVE)
20598 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20599 if (X86_FILE_START_FLTUSED)
20600 fputs ("\t.global\t__fltused\n", asm_out_file);
20601 if (ix86_asm_dialect == ASM_INTEL)
20602 fputs ("\t.intel_syntax\n", asm_out_file);
20603 }
20604
20605 int
20606 x86_field_alignment (tree field, int computed)
20607 {
20608 enum machine_mode mode;
20609 tree type = TREE_TYPE (field);
20610
20611 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20612 return computed;
20613 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
20614 ? get_inner_array_type (type) : type);
20615 if (mode == DFmode || mode == DCmode
20616 || GET_MODE_CLASS (mode) == MODE_INT
20617 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20618 return MIN (32, computed);
20619 return computed;
20620 }
20621
20622 /* Output assembler code to FILE to increment profiler label # LABELNO
20623 for profiling a function entry. */
20624 void
20625 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20626 {
20627 if (TARGET_64BIT)
20628 {
20629 #ifndef NO_PROFILE_COUNTERS
20630 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20631 #endif
20632
20633 if (!TARGET_64BIT_MS_ABI && flag_pic)
20634 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20635 else
20636 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20637 }
20638 else if (flag_pic)
20639 {
20640 #ifndef NO_PROFILE_COUNTERS
20641 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20642 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20643 #endif
20644 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20645 }
20646 else
20647 {
20648 #ifndef NO_PROFILE_COUNTERS
20649 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20650 PROFILE_COUNT_REGISTER);
20651 #endif
20652 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20653 }
20654 }
20655
20656 /* We don't have exact information about the insn sizes, but we may assume
20657 quite safely that we are informed about all 1 byte insns and memory
20658 address sizes. This is enough to eliminate unnecessary padding in
20659 99% of cases. */
20660
20661 static int
20662 min_insn_size (rtx insn)
20663 {
20664 int l = 0;
20665
20666 if (!INSN_P (insn) || !active_insn_p (insn))
20667 return 0;
20668
20669 /* Discard alignments we've emit and jump instructions. */
20670 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20671 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20672 return 0;
20673 if (JUMP_P (insn)
20674 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20675 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20676 return 0;
20677
20678 /* Important case - calls are always 5 bytes.
20679 It is common to have many calls in the row. */
20680 if (CALL_P (insn)
20681 && symbolic_reference_mentioned_p (PATTERN (insn))
20682 && !SIBLING_CALL_P (insn))
20683 return 5;
20684 if (get_attr_length (insn) <= 1)
20685 return 1;
20686
20687 /* For normal instructions we may rely on the sizes of addresses
20688 and the presence of symbol to require 4 bytes of encoding.
20689 This is not the case for jumps where references are PC relative. */
20690 if (!JUMP_P (insn))
20691 {
20692 l = get_attr_length_address (insn);
20693 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20694 l = 4;
20695 }
20696 if (l)
20697 return 1+l;
20698 else
20699 return 2;
20700 }
20701
20702 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20703 window. */
20704
20705 static void
20706 ix86_avoid_jump_misspredicts (void)
20707 {
20708 rtx insn, start = get_insns ();
20709 int nbytes = 0, njumps = 0;
20710 int isjump = 0;
20711
20712 /* Look for all minimal intervals of instructions containing 4 jumps.
20713 The intervals are bounded by START and INSN. NBYTES is the total
20714 size of instructions in the interval including INSN and not including
20715 START. When the NBYTES is smaller than 16 bytes, it is possible
20716 that the end of START and INSN ends up in the same 16byte page.
20717
20718 The smallest offset in the page INSN can start is the case where START
20719 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20720 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20721 */
20722 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20723 {
20724
20725 nbytes += min_insn_size (insn);
20726 if (dump_file)
20727 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20728 INSN_UID (insn), min_insn_size (insn));
20729 if ((JUMP_P (insn)
20730 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20731 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20732 || CALL_P (insn))
20733 njumps++;
20734 else
20735 continue;
20736
20737 while (njumps > 3)
20738 {
20739 start = NEXT_INSN (start);
20740 if ((JUMP_P (start)
20741 && GET_CODE (PATTERN (start)) != ADDR_VEC
20742 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20743 || CALL_P (start))
20744 njumps--, isjump = 1;
20745 else
20746 isjump = 0;
20747 nbytes -= min_insn_size (start);
20748 }
20749 gcc_assert (njumps >= 0);
20750 if (dump_file)
20751 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20752 INSN_UID (start), INSN_UID (insn), nbytes);
20753
20754 if (njumps == 3 && isjump && nbytes < 16)
20755 {
20756 int padsize = 15 - nbytes + min_insn_size (insn);
20757
20758 if (dump_file)
20759 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20760 INSN_UID (insn), padsize);
20761 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20762 }
20763 }
20764 }
20765
20766 /* AMD Athlon works faster
20767 when RET is not destination of conditional jump or directly preceded
20768 by other jump instruction. We avoid the penalty by inserting NOP just
20769 before the RET instructions in such cases. */
20770 static void
20771 ix86_pad_returns (void)
20772 {
20773 edge e;
20774 edge_iterator ei;
20775
20776 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20777 {
20778 basic_block bb = e->src;
20779 rtx ret = BB_END (bb);
20780 rtx prev;
20781 bool replace = false;
20782
20783 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20784 || !maybe_hot_bb_p (bb))
20785 continue;
20786 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20787 if (active_insn_p (prev) || LABEL_P (prev))
20788 break;
20789 if (prev && LABEL_P (prev))
20790 {
20791 edge e;
20792 edge_iterator ei;
20793
20794 FOR_EACH_EDGE (e, ei, bb->preds)
20795 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20796 && !(e->flags & EDGE_FALLTHRU))
20797 replace = true;
20798 }
20799 if (!replace)
20800 {
20801 prev = prev_active_insn (ret);
20802 if (prev
20803 && ((JUMP_P (prev) && any_condjump_p (prev))
20804 || CALL_P (prev)))
20805 replace = true;
20806 /* Empty functions get branch mispredict even when the jump destination
20807 is not visible to us. */
20808 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20809 replace = true;
20810 }
20811 if (replace)
20812 {
20813 emit_insn_before (gen_return_internal_long (), ret);
20814 delete_insn (ret);
20815 }
20816 }
20817 }
20818
20819 /* Implement machine specific optimizations. We implement padding of returns
20820 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20821 static void
20822 ix86_reorg (void)
20823 {
20824 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20825 ix86_pad_returns ();
20826 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20827 ix86_avoid_jump_misspredicts ();
20828 }
20829
20830 /* Return nonzero when QImode register that must be represented via REX prefix
20831 is used. */
20832 bool
20833 x86_extended_QIreg_mentioned_p (rtx insn)
20834 {
20835 int i;
20836 extract_insn_cached (insn);
20837 for (i = 0; i < recog_data.n_operands; i++)
20838 if (REG_P (recog_data.operand[i])
20839 && REGNO (recog_data.operand[i]) >= 4)
20840 return true;
20841 return false;
20842 }
20843
20844 /* Return nonzero when P points to register encoded via REX prefix.
20845 Called via for_each_rtx. */
20846 static int
20847 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20848 {
20849 unsigned int regno;
20850 if (!REG_P (*p))
20851 return 0;
20852 regno = REGNO (*p);
20853 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20854 }
20855
20856 /* Return true when INSN mentions register that must be encoded using REX
20857 prefix. */
20858 bool
20859 x86_extended_reg_mentioned_p (rtx insn)
20860 {
20861 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20862 }
20863
20864 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20865 optabs would emit if we didn't have TFmode patterns. */
20866
20867 void
20868 x86_emit_floatuns (rtx operands[2])
20869 {
20870 rtx neglab, donelab, i0, i1, f0, in, out;
20871 enum machine_mode mode, inmode;
20872
20873 inmode = GET_MODE (operands[1]);
20874 gcc_assert (inmode == SImode || inmode == DImode);
20875
20876 out = operands[0];
20877 in = force_reg (inmode, operands[1]);
20878 mode = GET_MODE (out);
20879 neglab = gen_label_rtx ();
20880 donelab = gen_label_rtx ();
20881 f0 = gen_reg_rtx (mode);
20882
20883 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20884
20885 expand_float (out, in, 0);
20886
20887 emit_jump_insn (gen_jump (donelab));
20888 emit_barrier ();
20889
20890 emit_label (neglab);
20891
20892 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20893 1, OPTAB_DIRECT);
20894 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20895 1, OPTAB_DIRECT);
20896 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20897
20898 expand_float (f0, i0, 0);
20899
20900 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20901
20902 emit_label (donelab);
20903 }
20904 \f
20905 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20906 with all elements equal to VAR. Return true if successful. */
20907
20908 static bool
20909 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20910 rtx target, rtx val)
20911 {
20912 enum machine_mode smode, wsmode, wvmode;
20913 rtx x;
20914
20915 switch (mode)
20916 {
20917 case V2SImode:
20918 case V2SFmode:
20919 if (!mmx_ok)
20920 return false;
20921 /* FALLTHRU */
20922
20923 case V2DFmode:
20924 case V2DImode:
20925 case V4SFmode:
20926 case V4SImode:
20927 val = force_reg (GET_MODE_INNER (mode), val);
20928 x = gen_rtx_VEC_DUPLICATE (mode, val);
20929 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20930 return true;
20931
20932 case V4HImode:
20933 if (!mmx_ok)
20934 return false;
20935 if (TARGET_SSE || TARGET_3DNOW_A)
20936 {
20937 val = gen_lowpart (SImode, val);
20938 x = gen_rtx_TRUNCATE (HImode, val);
20939 x = gen_rtx_VEC_DUPLICATE (mode, x);
20940 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20941 return true;
20942 }
20943 else
20944 {
20945 smode = HImode;
20946 wsmode = SImode;
20947 wvmode = V2SImode;
20948 goto widen;
20949 }
20950
20951 case V8QImode:
20952 if (!mmx_ok)
20953 return false;
20954 smode = QImode;
20955 wsmode = HImode;
20956 wvmode = V4HImode;
20957 goto widen;
20958 case V8HImode:
20959 if (TARGET_SSE2)
20960 {
20961 rtx tmp1, tmp2;
20962 /* Extend HImode to SImode using a paradoxical SUBREG. */
20963 tmp1 = gen_reg_rtx (SImode);
20964 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20965 /* Insert the SImode value as low element of V4SImode vector. */
20966 tmp2 = gen_reg_rtx (V4SImode);
20967 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20968 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20969 CONST0_RTX (V4SImode),
20970 const1_rtx);
20971 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20972 /* Cast the V4SImode vector back to a V8HImode vector. */
20973 tmp1 = gen_reg_rtx (V8HImode);
20974 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20975 /* Duplicate the low short through the whole low SImode word. */
20976 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20977 /* Cast the V8HImode vector back to a V4SImode vector. */
20978 tmp2 = gen_reg_rtx (V4SImode);
20979 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20980 /* Replicate the low element of the V4SImode vector. */
20981 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20982 /* Cast the V2SImode back to V8HImode, and store in target. */
20983 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20984 return true;
20985 }
20986 smode = HImode;
20987 wsmode = SImode;
20988 wvmode = V4SImode;
20989 goto widen;
20990 case V16QImode:
20991 if (TARGET_SSE2)
20992 {
20993 rtx tmp1, tmp2;
20994 /* Extend QImode to SImode using a paradoxical SUBREG. */
20995 tmp1 = gen_reg_rtx (SImode);
20996 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20997 /* Insert the SImode value as low element of V4SImode vector. */
20998 tmp2 = gen_reg_rtx (V4SImode);
20999 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21000 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21001 CONST0_RTX (V4SImode),
21002 const1_rtx);
21003 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21004 /* Cast the V4SImode vector back to a V16QImode vector. */
21005 tmp1 = gen_reg_rtx (V16QImode);
21006 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21007 /* Duplicate the low byte through the whole low SImode word. */
21008 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21009 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21010 /* Cast the V16QImode vector back to a V4SImode vector. */
21011 tmp2 = gen_reg_rtx (V4SImode);
21012 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21013 /* Replicate the low element of the V4SImode vector. */
21014 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21015 /* Cast the V2SImode back to V16QImode, and store in target. */
21016 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21017 return true;
21018 }
21019 smode = QImode;
21020 wsmode = HImode;
21021 wvmode = V8HImode;
21022 goto widen;
21023 widen:
21024 /* Replicate the value once into the next wider mode and recurse. */
21025 val = convert_modes (wsmode, smode, val, true);
21026 x = expand_simple_binop (wsmode, ASHIFT, val,
21027 GEN_INT (GET_MODE_BITSIZE (smode)),
21028 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21029 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21030
21031 x = gen_reg_rtx (wvmode);
21032 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21033 gcc_unreachable ();
21034 emit_move_insn (target, gen_lowpart (mode, x));
21035 return true;
21036
21037 default:
21038 return false;
21039 }
21040 }
21041
21042 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21043 whose ONE_VAR element is VAR, and other elements are zero. Return true
21044 if successful. */
21045
21046 static bool
21047 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21048 rtx target, rtx var, int one_var)
21049 {
21050 enum machine_mode vsimode;
21051 rtx new_target;
21052 rtx x, tmp;
21053
21054 switch (mode)
21055 {
21056 case V2SFmode:
21057 case V2SImode:
21058 if (!mmx_ok)
21059 return false;
21060 /* FALLTHRU */
21061
21062 case V2DFmode:
21063 case V2DImode:
21064 if (one_var != 0)
21065 return false;
21066 var = force_reg (GET_MODE_INNER (mode), var);
21067 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21068 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21069 return true;
21070
21071 case V4SFmode:
21072 case V4SImode:
21073 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21074 new_target = gen_reg_rtx (mode);
21075 else
21076 new_target = target;
21077 var = force_reg (GET_MODE_INNER (mode), var);
21078 x = gen_rtx_VEC_DUPLICATE (mode, var);
21079 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21080 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21081 if (one_var != 0)
21082 {
21083 /* We need to shuffle the value to the correct position, so
21084 create a new pseudo to store the intermediate result. */
21085
21086 /* With SSE2, we can use the integer shuffle insns. */
21087 if (mode != V4SFmode && TARGET_SSE2)
21088 {
21089 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21090 GEN_INT (1),
21091 GEN_INT (one_var == 1 ? 0 : 1),
21092 GEN_INT (one_var == 2 ? 0 : 1),
21093 GEN_INT (one_var == 3 ? 0 : 1)));
21094 if (target != new_target)
21095 emit_move_insn (target, new_target);
21096 return true;
21097 }
21098
21099 /* Otherwise convert the intermediate result to V4SFmode and
21100 use the SSE1 shuffle instructions. */
21101 if (mode != V4SFmode)
21102 {
21103 tmp = gen_reg_rtx (V4SFmode);
21104 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21105 }
21106 else
21107 tmp = new_target;
21108
21109 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21110 GEN_INT (1),
21111 GEN_INT (one_var == 1 ? 0 : 1),
21112 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21113 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21114
21115 if (mode != V4SFmode)
21116 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21117 else if (tmp != target)
21118 emit_move_insn (target, tmp);
21119 }
21120 else if (target != new_target)
21121 emit_move_insn (target, new_target);
21122 return true;
21123
21124 case V8HImode:
21125 case V16QImode:
21126 vsimode = V4SImode;
21127 goto widen;
21128 case V4HImode:
21129 case V8QImode:
21130 if (!mmx_ok)
21131 return false;
21132 vsimode = V2SImode;
21133 goto widen;
21134 widen:
21135 if (one_var != 0)
21136 return false;
21137
21138 /* Zero extend the variable element to SImode and recurse. */
21139 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
21140
21141 x = gen_reg_rtx (vsimode);
21142 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
21143 var, one_var))
21144 gcc_unreachable ();
21145
21146 emit_move_insn (target, gen_lowpart (mode, x));
21147 return true;
21148
21149 default:
21150 return false;
21151 }
21152 }
21153
21154 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21155 consisting of the values in VALS. It is known that all elements
21156 except ONE_VAR are constants. Return true if successful. */
21157
21158 static bool
21159 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
21160 rtx target, rtx vals, int one_var)
21161 {
21162 rtx var = XVECEXP (vals, 0, one_var);
21163 enum machine_mode wmode;
21164 rtx const_vec, x;
21165
21166 const_vec = copy_rtx (vals);
21167 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
21168 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
21169
21170 switch (mode)
21171 {
21172 case V2DFmode:
21173 case V2DImode:
21174 case V2SFmode:
21175 case V2SImode:
21176 /* For the two element vectors, it's just as easy to use
21177 the general case. */
21178 return false;
21179
21180 case V4SFmode:
21181 case V4SImode:
21182 case V8HImode:
21183 case V4HImode:
21184 break;
21185
21186 case V16QImode:
21187 wmode = V8HImode;
21188 goto widen;
21189 case V8QImode:
21190 wmode = V4HImode;
21191 goto widen;
21192 widen:
21193 /* There's no way to set one QImode entry easily. Combine
21194 the variable value with its adjacent constant value, and
21195 promote to an HImode set. */
21196 x = XVECEXP (vals, 0, one_var ^ 1);
21197 if (one_var & 1)
21198 {
21199 var = convert_modes (HImode, QImode, var, true);
21200 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
21201 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21202 x = GEN_INT (INTVAL (x) & 0xff);
21203 }
21204 else
21205 {
21206 var = convert_modes (HImode, QImode, var, true);
21207 x = gen_int_mode (INTVAL (x) << 8, HImode);
21208 }
21209 if (x != const0_rtx)
21210 var = expand_simple_binop (HImode, IOR, var, x, var,
21211 1, OPTAB_LIB_WIDEN);
21212
21213 x = gen_reg_rtx (wmode);
21214 emit_move_insn (x, gen_lowpart (wmode, const_vec));
21215 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
21216
21217 emit_move_insn (target, gen_lowpart (mode, x));
21218 return true;
21219
21220 default:
21221 return false;
21222 }
21223
21224 emit_move_insn (target, const_vec);
21225 ix86_expand_vector_set (mmx_ok, target, var, one_var);
21226 return true;
21227 }
21228
21229 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
21230 all values variable, and none identical. */
21231
21232 static void
21233 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
21234 rtx target, rtx vals)
21235 {
21236 enum machine_mode half_mode = GET_MODE_INNER (mode);
21237 rtx op0 = NULL, op1 = NULL;
21238 bool use_vec_concat = false;
21239
21240 switch (mode)
21241 {
21242 case V2SFmode:
21243 case V2SImode:
21244 if (!mmx_ok && !TARGET_SSE)
21245 break;
21246 /* FALLTHRU */
21247
21248 case V2DFmode:
21249 case V2DImode:
21250 /* For the two element vectors, we always implement VEC_CONCAT. */
21251 op0 = XVECEXP (vals, 0, 0);
21252 op1 = XVECEXP (vals, 0, 1);
21253 use_vec_concat = true;
21254 break;
21255
21256 case V4SFmode:
21257 half_mode = V2SFmode;
21258 goto half;
21259 case V4SImode:
21260 half_mode = V2SImode;
21261 goto half;
21262 half:
21263 {
21264 rtvec v;
21265
21266 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
21267 Recurse to load the two halves. */
21268
21269 op0 = gen_reg_rtx (half_mode);
21270 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
21271 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
21272
21273 op1 = gen_reg_rtx (half_mode);
21274 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
21275 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
21276
21277 use_vec_concat = true;
21278 }
21279 break;
21280
21281 case V8HImode:
21282 case V16QImode:
21283 case V4HImode:
21284 case V8QImode:
21285 break;
21286
21287 default:
21288 gcc_unreachable ();
21289 }
21290
21291 if (use_vec_concat)
21292 {
21293 if (!register_operand (op0, half_mode))
21294 op0 = force_reg (half_mode, op0);
21295 if (!register_operand (op1, half_mode))
21296 op1 = force_reg (half_mode, op1);
21297
21298 emit_insn (gen_rtx_SET (VOIDmode, target,
21299 gen_rtx_VEC_CONCAT (mode, op0, op1)));
21300 }
21301 else
21302 {
21303 int i, j, n_elts, n_words, n_elt_per_word;
21304 enum machine_mode inner_mode;
21305 rtx words[4], shift;
21306
21307 inner_mode = GET_MODE_INNER (mode);
21308 n_elts = GET_MODE_NUNITS (mode);
21309 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
21310 n_elt_per_word = n_elts / n_words;
21311 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
21312
21313 for (i = 0; i < n_words; ++i)
21314 {
21315 rtx word = NULL_RTX;
21316
21317 for (j = 0; j < n_elt_per_word; ++j)
21318 {
21319 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
21320 elt = convert_modes (word_mode, inner_mode, elt, true);
21321
21322 if (j == 0)
21323 word = elt;
21324 else
21325 {
21326 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
21327 word, 1, OPTAB_LIB_WIDEN);
21328 word = expand_simple_binop (word_mode, IOR, word, elt,
21329 word, 1, OPTAB_LIB_WIDEN);
21330 }
21331 }
21332
21333 words[i] = word;
21334 }
21335
21336 if (n_words == 1)
21337 emit_move_insn (target, gen_lowpart (mode, words[0]));
21338 else if (n_words == 2)
21339 {
21340 rtx tmp = gen_reg_rtx (mode);
21341 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
21342 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
21343 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
21344 emit_move_insn (target, tmp);
21345 }
21346 else if (n_words == 4)
21347 {
21348 rtx tmp = gen_reg_rtx (V4SImode);
21349 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
21350 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
21351 emit_move_insn (target, gen_lowpart (mode, tmp));
21352 }
21353 else
21354 gcc_unreachable ();
21355 }
21356 }
21357
21358 /* Initialize vector TARGET via VALS. Suppress the use of MMX
21359 instructions unless MMX_OK is true. */
21360
21361 void
21362 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
21363 {
21364 enum machine_mode mode = GET_MODE (target);
21365 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21366 int n_elts = GET_MODE_NUNITS (mode);
21367 int n_var = 0, one_var = -1;
21368 bool all_same = true, all_const_zero = true;
21369 int i;
21370 rtx x;
21371
21372 for (i = 0; i < n_elts; ++i)
21373 {
21374 x = XVECEXP (vals, 0, i);
21375 if (!CONSTANT_P (x))
21376 n_var++, one_var = i;
21377 else if (x != CONST0_RTX (inner_mode))
21378 all_const_zero = false;
21379 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
21380 all_same = false;
21381 }
21382
21383 /* Constants are best loaded from the constant pool. */
21384 if (n_var == 0)
21385 {
21386 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
21387 return;
21388 }
21389
21390 /* If all values are identical, broadcast the value. */
21391 if (all_same
21392 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
21393 XVECEXP (vals, 0, 0)))
21394 return;
21395
21396 /* Values where only one field is non-constant are best loaded from
21397 the pool and overwritten via move later. */
21398 if (n_var == 1)
21399 {
21400 if (all_const_zero
21401 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
21402 XVECEXP (vals, 0, one_var),
21403 one_var))
21404 return;
21405
21406 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
21407 return;
21408 }
21409
21410 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
21411 }
21412
21413 void
21414 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
21415 {
21416 enum machine_mode mode = GET_MODE (target);
21417 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21418 bool use_vec_merge = false;
21419 rtx tmp;
21420
21421 switch (mode)
21422 {
21423 case V2SFmode:
21424 case V2SImode:
21425 if (mmx_ok)
21426 {
21427 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
21428 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
21429 if (elt == 0)
21430 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
21431 else
21432 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
21433 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21434 return;
21435 }
21436 break;
21437
21438 case V2DImode:
21439 use_vec_merge = TARGET_SSE4_1;
21440 if (use_vec_merge)
21441 break;
21442
21443 case V2DFmode:
21444 {
21445 rtx op0, op1;
21446
21447 /* For the two element vectors, we implement a VEC_CONCAT with
21448 the extraction of the other element. */
21449
21450 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
21451 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
21452
21453 if (elt == 0)
21454 op0 = val, op1 = tmp;
21455 else
21456 op0 = tmp, op1 = val;
21457
21458 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
21459 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21460 }
21461 return;
21462
21463 case V4SFmode:
21464 use_vec_merge = TARGET_SSE4_1;
21465 if (use_vec_merge)
21466 break;
21467
21468 switch (elt)
21469 {
21470 case 0:
21471 use_vec_merge = true;
21472 break;
21473
21474 case 1:
21475 /* tmp = target = A B C D */
21476 tmp = copy_to_reg (target);
21477 /* target = A A B B */
21478 emit_insn (gen_sse_unpcklps (target, target, target));
21479 /* target = X A B B */
21480 ix86_expand_vector_set (false, target, val, 0);
21481 /* target = A X C D */
21482 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21483 GEN_INT (1), GEN_INT (0),
21484 GEN_INT (2+4), GEN_INT (3+4)));
21485 return;
21486
21487 case 2:
21488 /* tmp = target = A B C D */
21489 tmp = copy_to_reg (target);
21490 /* tmp = X B C D */
21491 ix86_expand_vector_set (false, tmp, val, 0);
21492 /* target = A B X D */
21493 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21494 GEN_INT (0), GEN_INT (1),
21495 GEN_INT (0+4), GEN_INT (3+4)));
21496 return;
21497
21498 case 3:
21499 /* tmp = target = A B C D */
21500 tmp = copy_to_reg (target);
21501 /* tmp = X B C D */
21502 ix86_expand_vector_set (false, tmp, val, 0);
21503 /* target = A B X D */
21504 emit_insn (gen_sse_shufps_1 (target, target, tmp,
21505 GEN_INT (0), GEN_INT (1),
21506 GEN_INT (2+4), GEN_INT (0+4)));
21507 return;
21508
21509 default:
21510 gcc_unreachable ();
21511 }
21512 break;
21513
21514 case V4SImode:
21515 use_vec_merge = TARGET_SSE4_1;
21516 if (use_vec_merge)
21517 break;
21518
21519 /* Element 0 handled by vec_merge below. */
21520 if (elt == 0)
21521 {
21522 use_vec_merge = true;
21523 break;
21524 }
21525
21526 if (TARGET_SSE2)
21527 {
21528 /* With SSE2, use integer shuffles to swap element 0 and ELT,
21529 store into element 0, then shuffle them back. */
21530
21531 rtx order[4];
21532
21533 order[0] = GEN_INT (elt);
21534 order[1] = const1_rtx;
21535 order[2] = const2_rtx;
21536 order[3] = GEN_INT (3);
21537 order[elt] = const0_rtx;
21538
21539 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21540 order[1], order[2], order[3]));
21541
21542 ix86_expand_vector_set (false, target, val, 0);
21543
21544 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
21545 order[1], order[2], order[3]));
21546 }
21547 else
21548 {
21549 /* For SSE1, we have to reuse the V4SF code. */
21550 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
21551 gen_lowpart (SFmode, val), elt);
21552 }
21553 return;
21554
21555 case V8HImode:
21556 use_vec_merge = TARGET_SSE2;
21557 break;
21558 case V4HImode:
21559 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21560 break;
21561
21562 case V16QImode:
21563 use_vec_merge = TARGET_SSE4_1;
21564 break;
21565
21566 case V8QImode:
21567 default:
21568 break;
21569 }
21570
21571 if (use_vec_merge)
21572 {
21573 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
21574 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
21575 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21576 }
21577 else
21578 {
21579 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21580
21581 emit_move_insn (mem, target);
21582
21583 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21584 emit_move_insn (tmp, val);
21585
21586 emit_move_insn (target, mem);
21587 }
21588 }
21589
21590 void
21591 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
21592 {
21593 enum machine_mode mode = GET_MODE (vec);
21594 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21595 bool use_vec_extr = false;
21596 rtx tmp;
21597
21598 switch (mode)
21599 {
21600 case V2SImode:
21601 case V2SFmode:
21602 if (!mmx_ok)
21603 break;
21604 /* FALLTHRU */
21605
21606 case V2DFmode:
21607 case V2DImode:
21608 use_vec_extr = true;
21609 break;
21610
21611 case V4SFmode:
21612 use_vec_extr = TARGET_SSE4_1;
21613 if (use_vec_extr)
21614 break;
21615
21616 switch (elt)
21617 {
21618 case 0:
21619 tmp = vec;
21620 break;
21621
21622 case 1:
21623 case 3:
21624 tmp = gen_reg_rtx (mode);
21625 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
21626 GEN_INT (elt), GEN_INT (elt),
21627 GEN_INT (elt+4), GEN_INT (elt+4)));
21628 break;
21629
21630 case 2:
21631 tmp = gen_reg_rtx (mode);
21632 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
21633 break;
21634
21635 default:
21636 gcc_unreachable ();
21637 }
21638 vec = tmp;
21639 use_vec_extr = true;
21640 elt = 0;
21641 break;
21642
21643 case V4SImode:
21644 use_vec_extr = TARGET_SSE4_1;
21645 if (use_vec_extr)
21646 break;
21647
21648 if (TARGET_SSE2)
21649 {
21650 switch (elt)
21651 {
21652 case 0:
21653 tmp = vec;
21654 break;
21655
21656 case 1:
21657 case 3:
21658 tmp = gen_reg_rtx (mode);
21659 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21660 GEN_INT (elt), GEN_INT (elt),
21661 GEN_INT (elt), GEN_INT (elt)));
21662 break;
21663
21664 case 2:
21665 tmp = gen_reg_rtx (mode);
21666 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21667 break;
21668
21669 default:
21670 gcc_unreachable ();
21671 }
21672 vec = tmp;
21673 use_vec_extr = true;
21674 elt = 0;
21675 }
21676 else
21677 {
21678 /* For SSE1, we have to reuse the V4SF code. */
21679 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21680 gen_lowpart (V4SFmode, vec), elt);
21681 return;
21682 }
21683 break;
21684
21685 case V8HImode:
21686 use_vec_extr = TARGET_SSE2;
21687 break;
21688 case V4HImode:
21689 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21690 break;
21691
21692 case V16QImode:
21693 use_vec_extr = TARGET_SSE4_1;
21694 break;
21695
21696 case V8QImode:
21697 /* ??? Could extract the appropriate HImode element and shift. */
21698 default:
21699 break;
21700 }
21701
21702 if (use_vec_extr)
21703 {
21704 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21705 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21706
21707 /* Let the rtl optimizers know about the zero extension performed. */
21708 if (inner_mode == QImode || inner_mode == HImode)
21709 {
21710 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21711 target = gen_lowpart (SImode, target);
21712 }
21713
21714 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21715 }
21716 else
21717 {
21718 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21719
21720 emit_move_insn (mem, vec);
21721
21722 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21723 emit_move_insn (target, tmp);
21724 }
21725 }
21726
21727 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21728 pattern to reduce; DEST is the destination; IN is the input vector. */
21729
21730 void
21731 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21732 {
21733 rtx tmp1, tmp2, tmp3;
21734
21735 tmp1 = gen_reg_rtx (V4SFmode);
21736 tmp2 = gen_reg_rtx (V4SFmode);
21737 tmp3 = gen_reg_rtx (V4SFmode);
21738
21739 emit_insn (gen_sse_movhlps (tmp1, in, in));
21740 emit_insn (fn (tmp2, tmp1, in));
21741
21742 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21743 GEN_INT (1), GEN_INT (1),
21744 GEN_INT (1+4), GEN_INT (1+4)));
21745 emit_insn (fn (dest, tmp2, tmp3));
21746 }
21747 \f
21748 /* Target hook for scalar_mode_supported_p. */
21749 static bool
21750 ix86_scalar_mode_supported_p (enum machine_mode mode)
21751 {
21752 if (DECIMAL_FLOAT_MODE_P (mode))
21753 return true;
21754 else if (mode == TFmode)
21755 return TARGET_64BIT;
21756 else
21757 return default_scalar_mode_supported_p (mode);
21758 }
21759
21760 /* Implements target hook vector_mode_supported_p. */
21761 static bool
21762 ix86_vector_mode_supported_p (enum machine_mode mode)
21763 {
21764 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21765 return true;
21766 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21767 return true;
21768 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21769 return true;
21770 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21771 return true;
21772 return false;
21773 }
21774
21775 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21776
21777 We do this in the new i386 backend to maintain source compatibility
21778 with the old cc0-based compiler. */
21779
21780 static tree
21781 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21782 tree inputs ATTRIBUTE_UNUSED,
21783 tree clobbers)
21784 {
21785 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21786 clobbers);
21787 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21788 clobbers);
21789 return clobbers;
21790 }
21791
21792 /* Implements target vector targetm.asm.encode_section_info. This
21793 is not used by netware. */
21794
21795 static void ATTRIBUTE_UNUSED
21796 ix86_encode_section_info (tree decl, rtx rtl, int first)
21797 {
21798 default_encode_section_info (decl, rtl, first);
21799
21800 if (TREE_CODE (decl) == VAR_DECL
21801 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21802 && ix86_in_large_data_p (decl))
21803 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21804 }
21805
21806 /* Worker function for REVERSE_CONDITION. */
21807
21808 enum rtx_code
21809 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21810 {
21811 return (mode != CCFPmode && mode != CCFPUmode
21812 ? reverse_condition (code)
21813 : reverse_condition_maybe_unordered (code));
21814 }
21815
21816 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21817 to OPERANDS[0]. */
21818
21819 const char *
21820 output_387_reg_move (rtx insn, rtx *operands)
21821 {
21822 if (REG_P (operands[0]))
21823 {
21824 if (REG_P (operands[1])
21825 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21826 {
21827 if (REGNO (operands[0]) == FIRST_STACK_REG)
21828 return output_387_ffreep (operands, 0);
21829 return "fstp\t%y0";
21830 }
21831 if (STACK_TOP_P (operands[0]))
21832 return "fld%z1\t%y1";
21833 return "fst\t%y0";
21834 }
21835 else if (MEM_P (operands[0]))
21836 {
21837 gcc_assert (REG_P (operands[1]));
21838 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21839 return "fstp%z0\t%y0";
21840 else
21841 {
21842 /* There is no non-popping store to memory for XFmode.
21843 So if we need one, follow the store with a load. */
21844 if (GET_MODE (operands[0]) == XFmode)
21845 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21846 else
21847 return "fst%z0\t%y0";
21848 }
21849 }
21850 else
21851 gcc_unreachable();
21852 }
21853
21854 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21855 FP status register is set. */
21856
21857 void
21858 ix86_emit_fp_unordered_jump (rtx label)
21859 {
21860 rtx reg = gen_reg_rtx (HImode);
21861 rtx temp;
21862
21863 emit_insn (gen_x86_fnstsw_1 (reg));
21864
21865 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21866 {
21867 emit_insn (gen_x86_sahf_1 (reg));
21868
21869 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21870 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21871 }
21872 else
21873 {
21874 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21875
21876 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21877 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21878 }
21879
21880 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21881 gen_rtx_LABEL_REF (VOIDmode, label),
21882 pc_rtx);
21883 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21884
21885 emit_jump_insn (temp);
21886 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21887 }
21888
21889 /* Output code to perform a log1p XFmode calculation. */
21890
21891 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21892 {
21893 rtx label1 = gen_label_rtx ();
21894 rtx label2 = gen_label_rtx ();
21895
21896 rtx tmp = gen_reg_rtx (XFmode);
21897 rtx tmp2 = gen_reg_rtx (XFmode);
21898
21899 emit_insn (gen_absxf2 (tmp, op1));
21900 emit_insn (gen_cmpxf (tmp,
21901 CONST_DOUBLE_FROM_REAL_VALUE (
21902 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21903 XFmode)));
21904 emit_jump_insn (gen_bge (label1));
21905
21906 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21907 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21908 emit_jump (label2);
21909
21910 emit_label (label1);
21911 emit_move_insn (tmp, CONST1_RTX (XFmode));
21912 emit_insn (gen_addxf3 (tmp, op1, tmp));
21913 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21914 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21915
21916 emit_label (label2);
21917 }
21918
21919 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21920
21921 static void ATTRIBUTE_UNUSED
21922 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21923 tree decl)
21924 {
21925 /* With Binutils 2.15, the "@unwind" marker must be specified on
21926 every occurrence of the ".eh_frame" section, not just the first
21927 one. */
21928 if (TARGET_64BIT
21929 && strcmp (name, ".eh_frame") == 0)
21930 {
21931 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21932 flags & SECTION_WRITE ? "aw" : "a");
21933 return;
21934 }
21935 default_elf_asm_named_section (name, flags, decl);
21936 }
21937
21938 /* Return the mangling of TYPE if it is an extended fundamental type. */
21939
21940 static const char *
21941 ix86_mangle_fundamental_type (tree type)
21942 {
21943 switch (TYPE_MODE (type))
21944 {
21945 case TFmode:
21946 /* __float128 is "g". */
21947 return "g";
21948 case XFmode:
21949 /* "long double" or __float80 is "e". */
21950 return "e";
21951 default:
21952 return NULL;
21953 }
21954 }
21955
21956 /* For 32-bit code we can save PIC register setup by using
21957 __stack_chk_fail_local hidden function instead of calling
21958 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21959 register, so it is better to call __stack_chk_fail directly. */
21960
21961 static tree
21962 ix86_stack_protect_fail (void)
21963 {
21964 return TARGET_64BIT
21965 ? default_external_stack_protect_fail ()
21966 : default_hidden_stack_protect_fail ();
21967 }
21968
21969 /* Select a format to encode pointers in exception handling data. CODE
21970 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21971 true if the symbol may be affected by dynamic relocations.
21972
21973 ??? All x86 object file formats are capable of representing this.
21974 After all, the relocation needed is the same as for the call insn.
21975 Whether or not a particular assembler allows us to enter such, I
21976 guess we'll have to see. */
21977 int
21978 asm_preferred_eh_data_format (int code, int global)
21979 {
21980 if (flag_pic)
21981 {
21982 int type = DW_EH_PE_sdata8;
21983 if (!TARGET_64BIT
21984 || ix86_cmodel == CM_SMALL_PIC
21985 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21986 type = DW_EH_PE_sdata4;
21987 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21988 }
21989 if (ix86_cmodel == CM_SMALL
21990 || (ix86_cmodel == CM_MEDIUM && code))
21991 return DW_EH_PE_udata4;
21992 return DW_EH_PE_absptr;
21993 }
21994 \f
21995 /* Expand copysign from SIGN to the positive value ABS_VALUE
21996 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21997 the sign-bit. */
21998 static void
21999 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
22000 {
22001 enum machine_mode mode = GET_MODE (sign);
22002 rtx sgn = gen_reg_rtx (mode);
22003 if (mask == NULL_RTX)
22004 {
22005 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
22006 if (!VECTOR_MODE_P (mode))
22007 {
22008 /* We need to generate a scalar mode mask in this case. */
22009 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22010 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22011 mask = gen_reg_rtx (mode);
22012 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22013 }
22014 }
22015 else
22016 mask = gen_rtx_NOT (mode, mask);
22017 emit_insn (gen_rtx_SET (VOIDmode, sgn,
22018 gen_rtx_AND (mode, mask, sign)));
22019 emit_insn (gen_rtx_SET (VOIDmode, result,
22020 gen_rtx_IOR (mode, abs_value, sgn)));
22021 }
22022
22023 /* Expand fabs (OP0) and return a new rtx that holds the result. The
22024 mask for masking out the sign-bit is stored in *SMASK, if that is
22025 non-null. */
22026 static rtx
22027 ix86_expand_sse_fabs (rtx op0, rtx *smask)
22028 {
22029 enum machine_mode mode = GET_MODE (op0);
22030 rtx xa, mask;
22031
22032 xa = gen_reg_rtx (mode);
22033 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
22034 if (!VECTOR_MODE_P (mode))
22035 {
22036 /* We need to generate a scalar mode mask in this case. */
22037 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
22038 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
22039 mask = gen_reg_rtx (mode);
22040 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
22041 }
22042 emit_insn (gen_rtx_SET (VOIDmode, xa,
22043 gen_rtx_AND (mode, op0, mask)));
22044
22045 if (smask)
22046 *smask = mask;
22047
22048 return xa;
22049 }
22050
22051 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
22052 swapping the operands if SWAP_OPERANDS is true. The expanded
22053 code is a forward jump to a newly created label in case the
22054 comparison is true. The generated label rtx is returned. */
22055 static rtx
22056 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
22057 bool swap_operands)
22058 {
22059 rtx label, tmp;
22060
22061 if (swap_operands)
22062 {
22063 tmp = op0;
22064 op0 = op1;
22065 op1 = tmp;
22066 }
22067
22068 label = gen_label_rtx ();
22069 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
22070 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22071 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
22072 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
22073 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
22074 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
22075 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
22076 JUMP_LABEL (tmp) = label;
22077
22078 return label;
22079 }
22080
22081 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
22082 using comparison code CODE. Operands are swapped for the comparison if
22083 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
22084 static rtx
22085 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
22086 bool swap_operands)
22087 {
22088 enum machine_mode mode = GET_MODE (op0);
22089 rtx mask = gen_reg_rtx (mode);
22090
22091 if (swap_operands)
22092 {
22093 rtx tmp = op0;
22094 op0 = op1;
22095 op1 = tmp;
22096 }
22097
22098 if (mode == DFmode)
22099 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
22100 gen_rtx_fmt_ee (code, mode, op0, op1)));
22101 else
22102 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
22103 gen_rtx_fmt_ee (code, mode, op0, op1)));
22104
22105 return mask;
22106 }
22107
22108 /* Generate and return a rtx of mode MODE for 2**n where n is the number
22109 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
22110 static rtx
22111 ix86_gen_TWO52 (enum machine_mode mode)
22112 {
22113 REAL_VALUE_TYPE TWO52r;
22114 rtx TWO52;
22115
22116 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
22117 TWO52 = const_double_from_real_value (TWO52r, mode);
22118 TWO52 = force_reg (mode, TWO52);
22119
22120 return TWO52;
22121 }
22122
22123 /* Expand SSE sequence for computing lround from OP1 storing
22124 into OP0. */
22125 void
22126 ix86_expand_lround (rtx op0, rtx op1)
22127 {
22128 /* C code for the stuff we're doing below:
22129 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
22130 return (long)tmp;
22131 */
22132 enum machine_mode mode = GET_MODE (op1);
22133 const struct real_format *fmt;
22134 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22135 rtx adj;
22136
22137 /* load nextafter (0.5, 0.0) */
22138 fmt = REAL_MODE_FORMAT (mode);
22139 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22140 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22141
22142 /* adj = copysign (0.5, op1) */
22143 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
22144 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
22145
22146 /* adj = op1 + adj */
22147 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
22148
22149 /* op0 = (imode)adj */
22150 expand_fix (op0, adj, 0);
22151 }
22152
22153 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
22154 into OPERAND0. */
22155 void
22156 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
22157 {
22158 /* C code for the stuff we're doing below (for do_floor):
22159 xi = (long)op1;
22160 xi -= (double)xi > op1 ? 1 : 0;
22161 return xi;
22162 */
22163 enum machine_mode fmode = GET_MODE (op1);
22164 enum machine_mode imode = GET_MODE (op0);
22165 rtx ireg, freg, label, tmp;
22166
22167 /* reg = (long)op1 */
22168 ireg = gen_reg_rtx (imode);
22169 expand_fix (ireg, op1, 0);
22170
22171 /* freg = (double)reg */
22172 freg = gen_reg_rtx (fmode);
22173 expand_float (freg, ireg, 0);
22174
22175 /* ireg = (freg > op1) ? ireg - 1 : ireg */
22176 label = ix86_expand_sse_compare_and_jump (UNLE,
22177 freg, op1, !do_floor);
22178 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
22179 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
22180 emit_move_insn (ireg, tmp);
22181
22182 emit_label (label);
22183 LABEL_NUSES (label) = 1;
22184
22185 emit_move_insn (op0, ireg);
22186 }
22187
22188 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
22189 result in OPERAND0. */
22190 void
22191 ix86_expand_rint (rtx operand0, rtx operand1)
22192 {
22193 /* C code for the stuff we're doing below:
22194 xa = fabs (operand1);
22195 if (!isless (xa, 2**52))
22196 return operand1;
22197 xa = xa + 2**52 - 2**52;
22198 return copysign (xa, operand1);
22199 */
22200 enum machine_mode mode = GET_MODE (operand0);
22201 rtx res, xa, label, TWO52, mask;
22202
22203 res = gen_reg_rtx (mode);
22204 emit_move_insn (res, operand1);
22205
22206 /* xa = abs (operand1) */
22207 xa = ix86_expand_sse_fabs (res, &mask);
22208
22209 /* if (!isless (xa, TWO52)) goto label; */
22210 TWO52 = ix86_gen_TWO52 (mode);
22211 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22212
22213 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22214 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22215
22216 ix86_sse_copysign_to_positive (res, xa, res, mask);
22217
22218 emit_label (label);
22219 LABEL_NUSES (label) = 1;
22220
22221 emit_move_insn (operand0, res);
22222 }
22223
22224 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22225 into OPERAND0. */
22226 void
22227 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
22228 {
22229 /* C code for the stuff we expand below.
22230 double xa = fabs (x), x2;
22231 if (!isless (xa, TWO52))
22232 return x;
22233 xa = xa + TWO52 - TWO52;
22234 x2 = copysign (xa, x);
22235 Compensate. Floor:
22236 if (x2 > x)
22237 x2 -= 1;
22238 Compensate. Ceil:
22239 if (x2 < x)
22240 x2 -= -1;
22241 return x2;
22242 */
22243 enum machine_mode mode = GET_MODE (operand0);
22244 rtx xa, TWO52, tmp, label, one, res, mask;
22245
22246 TWO52 = ix86_gen_TWO52 (mode);
22247
22248 /* Temporary for holding the result, initialized to the input
22249 operand to ease control flow. */
22250 res = gen_reg_rtx (mode);
22251 emit_move_insn (res, operand1);
22252
22253 /* xa = abs (operand1) */
22254 xa = ix86_expand_sse_fabs (res, &mask);
22255
22256 /* if (!isless (xa, TWO52)) goto label; */
22257 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22258
22259 /* xa = xa + TWO52 - TWO52; */
22260 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22261 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
22262
22263 /* xa = copysign (xa, operand1) */
22264 ix86_sse_copysign_to_positive (xa, xa, res, mask);
22265
22266 /* generate 1.0 or -1.0 */
22267 one = force_reg (mode,
22268 const_double_from_real_value (do_floor
22269 ? dconst1 : dconstm1, mode));
22270
22271 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22272 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22273 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22274 gen_rtx_AND (mode, one, tmp)));
22275 /* We always need to subtract here to preserve signed zero. */
22276 tmp = expand_simple_binop (mode, MINUS,
22277 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22278 emit_move_insn (res, tmp);
22279
22280 emit_label (label);
22281 LABEL_NUSES (label) = 1;
22282
22283 emit_move_insn (operand0, res);
22284 }
22285
22286 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
22287 into OPERAND0. */
22288 void
22289 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
22290 {
22291 /* C code for the stuff we expand below.
22292 double xa = fabs (x), x2;
22293 if (!isless (xa, TWO52))
22294 return x;
22295 x2 = (double)(long)x;
22296 Compensate. Floor:
22297 if (x2 > x)
22298 x2 -= 1;
22299 Compensate. Ceil:
22300 if (x2 < x)
22301 x2 += 1;
22302 if (HONOR_SIGNED_ZEROS (mode))
22303 return copysign (x2, x);
22304 return x2;
22305 */
22306 enum machine_mode mode = GET_MODE (operand0);
22307 rtx xa, xi, TWO52, tmp, label, one, res, mask;
22308
22309 TWO52 = ix86_gen_TWO52 (mode);
22310
22311 /* Temporary for holding the result, initialized to the input
22312 operand to ease control flow. */
22313 res = gen_reg_rtx (mode);
22314 emit_move_insn (res, operand1);
22315
22316 /* xa = abs (operand1) */
22317 xa = ix86_expand_sse_fabs (res, &mask);
22318
22319 /* if (!isless (xa, TWO52)) goto label; */
22320 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22321
22322 /* xa = (double)(long)x */
22323 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22324 expand_fix (xi, res, 0);
22325 expand_float (xa, xi, 0);
22326
22327 /* generate 1.0 */
22328 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22329
22330 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
22331 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
22332 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22333 gen_rtx_AND (mode, one, tmp)));
22334 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
22335 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22336 emit_move_insn (res, tmp);
22337
22338 if (HONOR_SIGNED_ZEROS (mode))
22339 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22340
22341 emit_label (label);
22342 LABEL_NUSES (label) = 1;
22343
22344 emit_move_insn (operand0, res);
22345 }
22346
22347 /* Expand SSE sequence for computing round from OPERAND1 storing
22348 into OPERAND0. Sequence that works without relying on DImode truncation
22349 via cvttsd2siq that is only available on 64bit targets. */
22350 void
22351 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
22352 {
22353 /* C code for the stuff we expand below.
22354 double xa = fabs (x), xa2, x2;
22355 if (!isless (xa, TWO52))
22356 return x;
22357 Using the absolute value and copying back sign makes
22358 -0.0 -> -0.0 correct.
22359 xa2 = xa + TWO52 - TWO52;
22360 Compensate.
22361 dxa = xa2 - xa;
22362 if (dxa <= -0.5)
22363 xa2 += 1;
22364 else if (dxa > 0.5)
22365 xa2 -= 1;
22366 x2 = copysign (xa2, x);
22367 return x2;
22368 */
22369 enum machine_mode mode = GET_MODE (operand0);
22370 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
22371
22372 TWO52 = ix86_gen_TWO52 (mode);
22373
22374 /* Temporary for holding the result, initialized to the input
22375 operand to ease control flow. */
22376 res = gen_reg_rtx (mode);
22377 emit_move_insn (res, operand1);
22378
22379 /* xa = abs (operand1) */
22380 xa = ix86_expand_sse_fabs (res, &mask);
22381
22382 /* if (!isless (xa, TWO52)) goto label; */
22383 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22384
22385 /* xa2 = xa + TWO52 - TWO52; */
22386 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22387 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
22388
22389 /* dxa = xa2 - xa; */
22390 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
22391
22392 /* generate 0.5, 1.0 and -0.5 */
22393 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
22394 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
22395 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
22396 0, OPTAB_DIRECT);
22397
22398 /* Compensate. */
22399 tmp = gen_reg_rtx (mode);
22400 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
22401 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
22402 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22403 gen_rtx_AND (mode, one, tmp)));
22404 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22405 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
22406 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
22407 emit_insn (gen_rtx_SET (VOIDmode, tmp,
22408 gen_rtx_AND (mode, one, tmp)));
22409 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
22410
22411 /* res = copysign (xa2, operand1) */
22412 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
22413
22414 emit_label (label);
22415 LABEL_NUSES (label) = 1;
22416
22417 emit_move_insn (operand0, res);
22418 }
22419
22420 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22421 into OPERAND0. */
22422 void
22423 ix86_expand_trunc (rtx operand0, rtx operand1)
22424 {
22425 /* C code for SSE variant we expand below.
22426 double xa = fabs (x), x2;
22427 if (!isless (xa, TWO52))
22428 return x;
22429 x2 = (double)(long)x;
22430 if (HONOR_SIGNED_ZEROS (mode))
22431 return copysign (x2, x);
22432 return x2;
22433 */
22434 enum machine_mode mode = GET_MODE (operand0);
22435 rtx xa, xi, TWO52, label, res, mask;
22436
22437 TWO52 = ix86_gen_TWO52 (mode);
22438
22439 /* Temporary for holding the result, initialized to the input
22440 operand to ease control flow. */
22441 res = gen_reg_rtx (mode);
22442 emit_move_insn (res, operand1);
22443
22444 /* xa = abs (operand1) */
22445 xa = ix86_expand_sse_fabs (res, &mask);
22446
22447 /* if (!isless (xa, TWO52)) goto label; */
22448 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22449
22450 /* x = (double)(long)x */
22451 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22452 expand_fix (xi, res, 0);
22453 expand_float (res, xi, 0);
22454
22455 if (HONOR_SIGNED_ZEROS (mode))
22456 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
22457
22458 emit_label (label);
22459 LABEL_NUSES (label) = 1;
22460
22461 emit_move_insn (operand0, res);
22462 }
22463
22464 /* Expand SSE sequence for computing trunc from OPERAND1 storing
22465 into OPERAND0. */
22466 void
22467 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
22468 {
22469 enum machine_mode mode = GET_MODE (operand0);
22470 rtx xa, mask, TWO52, label, one, res, smask, tmp;
22471
22472 /* C code for SSE variant we expand below.
22473 double xa = fabs (x), x2;
22474 if (!isless (xa, TWO52))
22475 return x;
22476 xa2 = xa + TWO52 - TWO52;
22477 Compensate:
22478 if (xa2 > xa)
22479 xa2 -= 1.0;
22480 x2 = copysign (xa2, x);
22481 return x2;
22482 */
22483
22484 TWO52 = ix86_gen_TWO52 (mode);
22485
22486 /* Temporary for holding the result, initialized to the input
22487 operand to ease control flow. */
22488 res = gen_reg_rtx (mode);
22489 emit_move_insn (res, operand1);
22490
22491 /* xa = abs (operand1) */
22492 xa = ix86_expand_sse_fabs (res, &smask);
22493
22494 /* if (!isless (xa, TWO52)) goto label; */
22495 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22496
22497 /* res = xa + TWO52 - TWO52; */
22498 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
22499 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
22500 emit_move_insn (res, tmp);
22501
22502 /* generate 1.0 */
22503 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
22504
22505 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
22506 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
22507 emit_insn (gen_rtx_SET (VOIDmode, mask,
22508 gen_rtx_AND (mode, mask, one)));
22509 tmp = expand_simple_binop (mode, MINUS,
22510 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
22511 emit_move_insn (res, tmp);
22512
22513 /* res = copysign (res, operand1) */
22514 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
22515
22516 emit_label (label);
22517 LABEL_NUSES (label) = 1;
22518
22519 emit_move_insn (operand0, res);
22520 }
22521
22522 /* Expand SSE sequence for computing round from OPERAND1 storing
22523 into OPERAND0. */
22524 void
22525 ix86_expand_round (rtx operand0, rtx operand1)
22526 {
22527 /* C code for the stuff we're doing below:
22528 double xa = fabs (x);
22529 if (!isless (xa, TWO52))
22530 return x;
22531 xa = (double)(long)(xa + nextafter (0.5, 0.0));
22532 return copysign (xa, x);
22533 */
22534 enum machine_mode mode = GET_MODE (operand0);
22535 rtx res, TWO52, xa, label, xi, half, mask;
22536 const struct real_format *fmt;
22537 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
22538
22539 /* Temporary for holding the result, initialized to the input
22540 operand to ease control flow. */
22541 res = gen_reg_rtx (mode);
22542 emit_move_insn (res, operand1);
22543
22544 TWO52 = ix86_gen_TWO52 (mode);
22545 xa = ix86_expand_sse_fabs (res, &mask);
22546 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
22547
22548 /* load nextafter (0.5, 0.0) */
22549 fmt = REAL_MODE_FORMAT (mode);
22550 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
22551 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
22552
22553 /* xa = xa + 0.5 */
22554 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
22555 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
22556
22557 /* xa = (double)(int64_t)xa */
22558 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
22559 expand_fix (xi, xa, 0);
22560 expand_float (xa, xi, 0);
22561
22562 /* res = copysign (xa, operand1) */
22563 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
22564
22565 emit_label (label);
22566 LABEL_NUSES (label) = 1;
22567
22568 emit_move_insn (operand0, res);
22569 }
22570
22571 \f
22572 /* Table of valid machine attributes. */
22573 static const struct attribute_spec ix86_attribute_table[] =
22574 {
22575 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
22576 /* Stdcall attribute says callee is responsible for popping arguments
22577 if they are not variable. */
22578 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22579 /* Fastcall attribute says callee is responsible for popping arguments
22580 if they are not variable. */
22581 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22582 /* Cdecl attribute says the callee is a normal C declaration */
22583 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22584 /* Regparm attribute specifies how many integer arguments are to be
22585 passed in registers. */
22586 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
22587 /* Sseregparm attribute says we are using x86_64 calling conventions
22588 for FP arguments. */
22589 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22590 /* force_align_arg_pointer says this function realigns the stack at entry. */
22591 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
22592 false, true, true, ix86_handle_cconv_attribute },
22593 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22594 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
22595 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
22596 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
22597 #endif
22598 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22599 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22600 #ifdef SUBTARGET_ATTRIBUTE_TABLE
22601 SUBTARGET_ATTRIBUTE_TABLE,
22602 #endif
22603 { NULL, 0, 0, false, false, false, NULL }
22604 };
22605
22606 /* Initialize the GCC target structure. */
22607 #undef TARGET_ATTRIBUTE_TABLE
22608 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22609 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22610 # undef TARGET_MERGE_DECL_ATTRIBUTES
22611 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22612 #endif
22613
22614 #undef TARGET_COMP_TYPE_ATTRIBUTES
22615 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22616
22617 #undef TARGET_INIT_BUILTINS
22618 #define TARGET_INIT_BUILTINS ix86_init_builtins
22619 #undef TARGET_EXPAND_BUILTIN
22620 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22621
22622 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22623 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
22624 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
22625 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
22626
22627 #undef TARGET_ASM_FUNCTION_EPILOGUE
22628 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22629
22630 #undef TARGET_ENCODE_SECTION_INFO
22631 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22632 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22633 #else
22634 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22635 #endif
22636
22637 #undef TARGET_ASM_OPEN_PAREN
22638 #define TARGET_ASM_OPEN_PAREN ""
22639 #undef TARGET_ASM_CLOSE_PAREN
22640 #define TARGET_ASM_CLOSE_PAREN ""
22641
22642 #undef TARGET_ASM_ALIGNED_HI_OP
22643 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22644 #undef TARGET_ASM_ALIGNED_SI_OP
22645 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22646 #ifdef ASM_QUAD
22647 #undef TARGET_ASM_ALIGNED_DI_OP
22648 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22649 #endif
22650
22651 #undef TARGET_ASM_UNALIGNED_HI_OP
22652 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22653 #undef TARGET_ASM_UNALIGNED_SI_OP
22654 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22655 #undef TARGET_ASM_UNALIGNED_DI_OP
22656 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22657
22658 #undef TARGET_SCHED_ADJUST_COST
22659 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22660 #undef TARGET_SCHED_ISSUE_RATE
22661 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22664 ia32_multipass_dfa_lookahead
22665
22666 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22667 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22668
22669 #ifdef HAVE_AS_TLS
22670 #undef TARGET_HAVE_TLS
22671 #define TARGET_HAVE_TLS true
22672 #endif
22673 #undef TARGET_CANNOT_FORCE_CONST_MEM
22674 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22675 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22676 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22677
22678 #undef TARGET_DELEGITIMIZE_ADDRESS
22679 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22680
22681 #undef TARGET_MS_BITFIELD_LAYOUT_P
22682 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22683
22684 #if TARGET_MACHO
22685 #undef TARGET_BINDS_LOCAL_P
22686 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22687 #endif
22688 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22689 #undef TARGET_BINDS_LOCAL_P
22690 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22691 #endif
22692
22693 #undef TARGET_ASM_OUTPUT_MI_THUNK
22694 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22695 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22696 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22697
22698 #undef TARGET_ASM_FILE_START
22699 #define TARGET_ASM_FILE_START x86_file_start
22700
22701 #undef TARGET_DEFAULT_TARGET_FLAGS
22702 #define TARGET_DEFAULT_TARGET_FLAGS \
22703 (TARGET_DEFAULT \
22704 | TARGET_64BIT_DEFAULT \
22705 | TARGET_SUBTARGET_DEFAULT \
22706 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22707
22708 #undef TARGET_HANDLE_OPTION
22709 #define TARGET_HANDLE_OPTION ix86_handle_option
22710
22711 #undef TARGET_RTX_COSTS
22712 #define TARGET_RTX_COSTS ix86_rtx_costs
22713 #undef TARGET_ADDRESS_COST
22714 #define TARGET_ADDRESS_COST ix86_address_cost
22715
22716 #undef TARGET_FIXED_CONDITION_CODE_REGS
22717 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22718 #undef TARGET_CC_MODES_COMPATIBLE
22719 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22720
22721 #undef TARGET_MACHINE_DEPENDENT_REORG
22722 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22723
22724 #undef TARGET_BUILD_BUILTIN_VA_LIST
22725 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22726
22727 #undef TARGET_MD_ASM_CLOBBERS
22728 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22729
22730 #undef TARGET_PROMOTE_PROTOTYPES
22731 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22732 #undef TARGET_STRUCT_VALUE_RTX
22733 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22734 #undef TARGET_SETUP_INCOMING_VARARGS
22735 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22736 #undef TARGET_MUST_PASS_IN_STACK
22737 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22738 #undef TARGET_PASS_BY_REFERENCE
22739 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22740 #undef TARGET_INTERNAL_ARG_POINTER
22741 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22742 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22743 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22744 #undef TARGET_STRICT_ARGUMENT_NAMING
22745 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22746
22747 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22748 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22749
22750 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22751 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22752
22753 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22754 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22755
22756 #ifdef HAVE_AS_TLS
22757 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22758 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22759 #endif
22760
22761 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22762 #undef TARGET_INSERT_ATTRIBUTES
22763 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22764 #endif
22765
22766 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22767 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22768
22769 #undef TARGET_STACK_PROTECT_FAIL
22770 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22771
22772 #undef TARGET_FUNCTION_VALUE
22773 #define TARGET_FUNCTION_VALUE ix86_function_value
22774
22775 struct gcc_target targetm = TARGET_INITIALIZER;
22776 \f
22777 #include "gt-i386.h"