re PR target/31989 (Gcc miscompiles C/C++ on Linux/x86-64)
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1030
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1035 0,
1036
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1038 ~m_386,
1039
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1043
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1048
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1057 m_PPRO,
1058
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1061
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1064
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1067
1068 /* X86_TUNE_USE_MOV0 */
1069 m_K6,
1070
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1073
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1075 m_PENT4,
1076
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1078 m_PPRO,
1079
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1081 ~m_PENT,
1082
1083 /* X86_TUNE_READ_MODIFY */
1084 ~(m_PENT | m_PPRO),
1085
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1089
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1092
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1095
1096 /* X86_TUNE_QIMODE_MATH */
1097 ~0,
1098
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1103 ~m_PPRO,
1104
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1106 0,
1107
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1109 m_PPRO,
1110
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1113
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1117
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1120
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1124
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1129
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1132
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1143
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1145 m_AMDFAM10,
1146
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1151 m_ATHLON_K8,
1152
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1155
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1158
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1161
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1164
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1167
1168 /* X86_TUNE_SHIFT1 */
1169 ~m_486,
1170
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1173
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1176
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1180
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1183
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1186
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1189
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1192
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1195
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1197 ~m_K8,
1198
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1200 m_K8 | m_GENERIC64,
1201
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1204 ~(m_386 | m_486),
1205
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1209
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1211 machines. */
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1213
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1215 than a MOV. */
1216 m_PENT,
1217
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1220 m_PENT,
1221
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1225 m_K6,
1226 };
1227
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE */
1231 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1232
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1234 ~m_386,
1235
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1237 ~(m_386 | m_486),
1238
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1240 ~m_386,
1241
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 ~m_386,
1244 };
1245
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1248
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1252
1253 static enum stringop_alg stringop_alg = no_stringop;
1254
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1257 epilogue code. */
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1259
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1264
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1267
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1269 {
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1274 /* FP registers */
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1277 /* arg pointer */
1278 NON_Q_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1282 SSE_REGS, SSE_REGS,
1283 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1284 MMX_REGS, MMX_REGS,
1285 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1288 SSE_REGS, SSE_REGS,
1289 };
1290
1291 /* The "default" register map used in 32bit mode. */
1292
1293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1294 {
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1302 };
1303
1304 static int const x86_64_int_parameter_registers[6] =
1305 {
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1308 };
1309
1310 static int const x86_64_ms_abi_int_parameter_registers[4] =
1311 {
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1314 };
1315
1316 static int const x86_64_int_return_registers[4] =
1317 {
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1319 };
1320
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1323 {
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1331 };
1332
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1377 numbers.
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1386 */
1387 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1388 {
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1396 };
1397
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1400
1401 rtx ix86_compare_op0 = NULL_RTX;
1402 rtx ix86_compare_op1 = NULL_RTX;
1403 rtx ix86_compare_emitted = NULL_RTX;
1404
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1407
1408 /* Define the structure for the machine field in struct function. */
1409
1410 struct stack_local_entry GTY(())
1411 {
1412 unsigned short mode;
1413 unsigned short n;
1414 rtx rtl;
1415 struct stack_local_entry *next;
1416 };
1417
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1420
1421 [arguments]
1422 <- ARG_POINTER
1423 saved pc
1424
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1427 [saved regs]
1428
1429 [padding1] \
1430 )
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1433 [frame] (
1434 )
1435 [padding2] /
1436 */
1437 struct ix86_frame
1438 {
1439 int nregs;
1440 int padding1;
1441 int va_arg_size;
1442 HOST_WIDE_INT frame;
1443 int padding2;
1444 int outgoing_arguments_size;
1445 int red_zone_size;
1446
1447 HOST_WIDE_INT to_allocate;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset;
1450 HOST_WIDE_INT hard_frame_pointer_offset;
1451 HOST_WIDE_INT stack_pointer_offset;
1452
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov;
1456 };
1457
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel;
1460 /* Asm dialect. */
1461 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1462 /* TLS dialects. */
1463 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1464
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath;
1467
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune;
1470
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch;
1473
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse;
1476
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm;
1479
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer;
1482 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1483
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary;
1486
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost;
1489
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1492
1493 int ix86_section_threshold = 65536;
1494
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix[16];
1497 int internal_label_prefix_len;
1498
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1503
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1507 {
1508 X86_64_NO_CLASS,
1509 X86_64_INTEGER_CLASS,
1510 X86_64_INTEGERSI_CLASS,
1511 X86_64_SSE_CLASS,
1512 X86_64_SSESF_CLASS,
1513 X86_64_SSEDF_CLASS,
1514 X86_64_SSEUP_CLASS,
1515 X86_64_X87_CLASS,
1516 X86_64_X87UP_CLASS,
1517 X86_64_COMPLEX_X87_CLASS,
1518 X86_64_MEMORY_CLASS
1519 };
1520 static const char * const x86_64_reg_class_name[] =
1521 {
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1524 };
1525
1526 #define MAX_CLASSES 4
1527
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1530 static bool ext_80387_constants_init = 0;
1531
1532 \f
1533 static struct machine_function * ix86_init_machine_status (void);
1534 static rtx ix86_function_value (tree, tree, bool);
1535 static int ix86_function_regparm (tree, tree);
1536 static void ix86_compute_frame_layout (struct ix86_frame *);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1538 rtx, rtx, int);
1539
1540 \f
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1542 in memory. */
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1545 #endif
1546
1547 /* Implement TARGET_HANDLE_OPTION. */
1548
1549 static bool
1550 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1551 {
1552 switch (code)
1553 {
1554 case OPT_m3dnow:
1555 if (!value)
1556 {
1557 target_flags &= ~MASK_3DNOW_A;
1558 target_flags_explicit |= MASK_3DNOW_A;
1559 }
1560 return true;
1561
1562 case OPT_mmmx:
1563 if (!value)
1564 {
1565 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1566 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1567 }
1568 return true;
1569
1570 case OPT_msse:
1571 if (!value)
1572 {
1573 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1574 | MASK_SSE4A);
1575 target_flags_explicit |= (MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1576 | MASK_SSE4A);
1577 }
1578 return true;
1579
1580 case OPT_msse2:
1581 if (!value)
1582 {
1583 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1584 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1585 }
1586 return true;
1587
1588 case OPT_msse3:
1589 if (!value)
1590 {
1591 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1592 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1593 }
1594 return true;
1595
1596 case OPT_mssse3:
1597 if (!value)
1598 {
1599 target_flags &= ~MASK_SSE4A;
1600 target_flags_explicit |= MASK_SSE4A;
1601 }
1602 return true;
1603
1604 default:
1605 return true;
1606 }
1607 }
1608
1609 /* Sometimes certain combinations of command options do not make
1610 sense on a particular target machine. You can define a macro
1611 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1612 defined, is executed once just after all the command options have
1613 been parsed.
1614
1615 Don't use this macro to turn on various extra optimizations for
1616 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1617
1618 void
1619 override_options (void)
1620 {
1621 int i;
1622 int ix86_tune_defaulted = 0;
1623 unsigned int ix86_arch_mask, ix86_tune_mask;
1624
1625 /* Comes from final.c -- no real reason to change it. */
1626 #define MAX_CODE_ALIGN 16
1627
1628 static struct ptt
1629 {
1630 const struct processor_costs *cost; /* Processor costs */
1631 const int target_enable; /* Target flags to enable. */
1632 const int target_disable; /* Target flags to disable. */
1633 const int align_loop; /* Default alignments. */
1634 const int align_loop_max_skip;
1635 const int align_jump;
1636 const int align_jump_max_skip;
1637 const int align_func;
1638 }
1639 const processor_target_table[PROCESSOR_max] =
1640 {
1641 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1642 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1643 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1644 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1645 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1646 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1647 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1648 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1649 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1650 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1651 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1652 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1653 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1654 {&amdfam10_cost, 0, 0, 32, 24, 32, 7, 32}
1655 };
1656
1657 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1658 static struct pta
1659 {
1660 const char *const name; /* processor name or nickname. */
1661 const enum processor_type processor;
1662 const enum pta_flags
1663 {
1664 PTA_SSE = 1 << 0,
1665 PTA_SSE2 = 1 << 1,
1666 PTA_SSE3 = 1 << 2,
1667 PTA_MMX = 1 << 3,
1668 PTA_PREFETCH_SSE = 1 << 4,
1669 PTA_3DNOW = 1 << 5,
1670 PTA_3DNOW_A = 1 << 6,
1671 PTA_64BIT = 1 << 7,
1672 PTA_SSSE3 = 1 << 8,
1673 PTA_CX16 = 1 << 9,
1674 PTA_POPCNT = 1 << 10,
1675 PTA_ABM = 1 << 11,
1676 PTA_SSE4A = 1 << 12,
1677 PTA_NO_SAHF = 1 << 13
1678 } flags;
1679 }
1680 const processor_alias_table[] =
1681 {
1682 {"i386", PROCESSOR_I386, 0},
1683 {"i486", PROCESSOR_I486, 0},
1684 {"i586", PROCESSOR_PENTIUM, 0},
1685 {"pentium", PROCESSOR_PENTIUM, 0},
1686 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1687 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1688 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1689 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1690 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1691 {"i686", PROCESSOR_PENTIUMPRO, 0},
1692 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1693 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1694 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1695 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1696 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1697 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1698 | PTA_MMX | PTA_PREFETCH_SSE},
1699 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1700 | PTA_MMX | PTA_PREFETCH_SSE},
1701 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1702 | PTA_MMX | PTA_PREFETCH_SSE},
1703 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1704 | PTA_MMX | PTA_PREFETCH_SSE
1705 | PTA_CX16 | PTA_NO_SAHF},
1706 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1707 | PTA_64BIT | PTA_MMX
1708 | PTA_PREFETCH_SSE | PTA_CX16},
1709 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1710 | PTA_3DNOW_A},
1711 {"k6", PROCESSOR_K6, PTA_MMX},
1712 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1713 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1714 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1715 | PTA_3DNOW_A},
1716 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1717 | PTA_3DNOW | PTA_3DNOW_A},
1718 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1719 | PTA_3DNOW_A | PTA_SSE},
1720 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1721 | PTA_3DNOW_A | PTA_SSE},
1722 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1723 | PTA_3DNOW_A | PTA_SSE},
1724 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1725 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1726 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1727 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1728 | PTA_NO_SAHF},
1729 {"k8-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1730 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1731 | PTA_SSE3 | PTA_NO_SAHF},
1732 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1733 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1734 | PTA_SSE2 | PTA_NO_SAHF},
1735 {"opteron-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1736 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1737 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
1738 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1739 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1740 | PTA_SSE2 | PTA_NO_SAHF},
1741 {"athlon64-sse3", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1742 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1743 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
1744 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1745 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1746 | PTA_SSE2 | PTA_NO_SAHF},
1747 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1748 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1749 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1750 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1751 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1752 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1753 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1754 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1755 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1756 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1757 };
1758
1759 int const pta_size = ARRAY_SIZE (processor_alias_table);
1760
1761 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1762 SUBTARGET_OVERRIDE_OPTIONS;
1763 #endif
1764
1765 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1766 SUBSUBTARGET_OVERRIDE_OPTIONS;
1767 #endif
1768
1769 /* -fPIC is the default for x86_64. */
1770 if (TARGET_MACHO && TARGET_64BIT)
1771 flag_pic = 2;
1772
1773 /* Set the default values for switches whose default depends on TARGET_64BIT
1774 in case they weren't overwritten by command line options. */
1775 if (TARGET_64BIT)
1776 {
1777 /* Mach-O doesn't support omitting the frame pointer for now. */
1778 if (flag_omit_frame_pointer == 2)
1779 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1780 if (flag_asynchronous_unwind_tables == 2)
1781 flag_asynchronous_unwind_tables = 1;
1782 if (flag_pcc_struct_return == 2)
1783 flag_pcc_struct_return = 0;
1784 }
1785 else
1786 {
1787 if (flag_omit_frame_pointer == 2)
1788 flag_omit_frame_pointer = 0;
1789 if (flag_asynchronous_unwind_tables == 2)
1790 flag_asynchronous_unwind_tables = 0;
1791 if (flag_pcc_struct_return == 2)
1792 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1793 }
1794
1795 /* Need to check -mtune=generic first. */
1796 if (ix86_tune_string)
1797 {
1798 if (!strcmp (ix86_tune_string, "generic")
1799 || !strcmp (ix86_tune_string, "i686")
1800 /* As special support for cross compilers we read -mtune=native
1801 as -mtune=generic. With native compilers we won't see the
1802 -mtune=native, as it was changed by the driver. */
1803 || !strcmp (ix86_tune_string, "native"))
1804 {
1805 if (TARGET_64BIT)
1806 ix86_tune_string = "generic64";
1807 else
1808 ix86_tune_string = "generic32";
1809 }
1810 else if (!strncmp (ix86_tune_string, "generic", 7))
1811 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1812 }
1813 else
1814 {
1815 if (ix86_arch_string)
1816 ix86_tune_string = ix86_arch_string;
1817 if (!ix86_tune_string)
1818 {
1819 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1820 ix86_tune_defaulted = 1;
1821 }
1822
1823 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1824 need to use a sensible tune option. */
1825 if (!strcmp (ix86_tune_string, "generic")
1826 || !strcmp (ix86_tune_string, "x86-64")
1827 || !strcmp (ix86_tune_string, "i686"))
1828 {
1829 if (TARGET_64BIT)
1830 ix86_tune_string = "generic64";
1831 else
1832 ix86_tune_string = "generic32";
1833 }
1834 }
1835 if (ix86_stringop_string)
1836 {
1837 if (!strcmp (ix86_stringop_string, "rep_byte"))
1838 stringop_alg = rep_prefix_1_byte;
1839 else if (!strcmp (ix86_stringop_string, "libcall"))
1840 stringop_alg = libcall;
1841 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1842 stringop_alg = rep_prefix_4_byte;
1843 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1844 stringop_alg = rep_prefix_8_byte;
1845 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1846 stringop_alg = loop_1_byte;
1847 else if (!strcmp (ix86_stringop_string, "loop"))
1848 stringop_alg = loop;
1849 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1850 stringop_alg = unrolled_loop;
1851 else
1852 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1853 }
1854 if (!strcmp (ix86_tune_string, "x86-64"))
1855 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1856 "-mtune=generic instead as appropriate.");
1857
1858 if (!ix86_arch_string)
1859 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1860 if (!strcmp (ix86_arch_string, "generic"))
1861 error ("generic CPU can be used only for -mtune= switch");
1862 if (!strncmp (ix86_arch_string, "generic", 7))
1863 error ("bad value (%s) for -march= switch", ix86_arch_string);
1864
1865 if (ix86_cmodel_string != 0)
1866 {
1867 if (!strcmp (ix86_cmodel_string, "small"))
1868 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1869 else if (!strcmp (ix86_cmodel_string, "medium"))
1870 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1871 else if (!strcmp (ix86_cmodel_string, "large"))
1872 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1873 else if (flag_pic)
1874 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1875 else if (!strcmp (ix86_cmodel_string, "32"))
1876 ix86_cmodel = CM_32;
1877 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1878 ix86_cmodel = CM_KERNEL;
1879 else
1880 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1881 }
1882 else
1883 {
1884 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1885 use of rip-relative addressing. This eliminates fixups that
1886 would otherwise be needed if this object is to be placed in a
1887 DLL, and is essentially just as efficient as direct addressing. */
1888 if (TARGET_64BIT_MS_ABI)
1889 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1890 else if (TARGET_64BIT)
1891 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1892 else
1893 ix86_cmodel = CM_32;
1894 }
1895 if (ix86_asm_string != 0)
1896 {
1897 if (! TARGET_MACHO
1898 && !strcmp (ix86_asm_string, "intel"))
1899 ix86_asm_dialect = ASM_INTEL;
1900 else if (!strcmp (ix86_asm_string, "att"))
1901 ix86_asm_dialect = ASM_ATT;
1902 else
1903 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1904 }
1905 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1906 error ("code model %qs not supported in the %s bit mode",
1907 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1908 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1909 sorry ("%i-bit mode not compiled in",
1910 (target_flags & MASK_64BIT) ? 64 : 32);
1911
1912 for (i = 0; i < pta_size; i++)
1913 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1914 {
1915 ix86_arch = processor_alias_table[i].processor;
1916 /* Default cpu tuning to the architecture. */
1917 ix86_tune = ix86_arch;
1918 if (processor_alias_table[i].flags & PTA_MMX
1919 && !(target_flags_explicit & MASK_MMX))
1920 target_flags |= MASK_MMX;
1921 if (processor_alias_table[i].flags & PTA_3DNOW
1922 && !(target_flags_explicit & MASK_3DNOW))
1923 target_flags |= MASK_3DNOW;
1924 if (processor_alias_table[i].flags & PTA_3DNOW_A
1925 && !(target_flags_explicit & MASK_3DNOW_A))
1926 target_flags |= MASK_3DNOW_A;
1927 if (processor_alias_table[i].flags & PTA_SSE
1928 && !(target_flags_explicit & MASK_SSE))
1929 target_flags |= MASK_SSE;
1930 if (processor_alias_table[i].flags & PTA_SSE2
1931 && !(target_flags_explicit & MASK_SSE2))
1932 target_flags |= MASK_SSE2;
1933 if (processor_alias_table[i].flags & PTA_SSE3
1934 && !(target_flags_explicit & MASK_SSE3))
1935 target_flags |= MASK_SSE3;
1936 if (processor_alias_table[i].flags & PTA_SSSE3
1937 && !(target_flags_explicit & MASK_SSSE3))
1938 target_flags |= MASK_SSSE3;
1939 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1940 x86_prefetch_sse = true;
1941 if (processor_alias_table[i].flags & PTA_CX16)
1942 x86_cmpxchg16b = true;
1943 if (processor_alias_table[i].flags & PTA_POPCNT
1944 && !(target_flags_explicit & MASK_POPCNT))
1945 target_flags |= MASK_POPCNT;
1946 if (processor_alias_table[i].flags & PTA_ABM
1947 && !(target_flags_explicit & MASK_ABM))
1948 target_flags |= MASK_ABM;
1949 if (processor_alias_table[i].flags & PTA_SSE4A
1950 && !(target_flags_explicit & MASK_SSE4A))
1951 target_flags |= MASK_SSE4A;
1952 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1953 x86_sahf = true;
1954 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1955 error ("CPU you selected does not support x86-64 "
1956 "instruction set");
1957 break;
1958 }
1959
1960 if (i == pta_size)
1961 error ("bad value (%s) for -march= switch", ix86_arch_string);
1962
1963 ix86_arch_mask = 1u << ix86_arch;
1964 for (i = 0; i < X86_ARCH_LAST; ++i)
1965 ix86_arch_features[i] &= ix86_arch_mask;
1966
1967 for (i = 0; i < pta_size; i++)
1968 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1969 {
1970 ix86_tune = processor_alias_table[i].processor;
1971 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1972 {
1973 if (ix86_tune_defaulted)
1974 {
1975 ix86_tune_string = "x86-64";
1976 for (i = 0; i < pta_size; i++)
1977 if (! strcmp (ix86_tune_string,
1978 processor_alias_table[i].name))
1979 break;
1980 ix86_tune = processor_alias_table[i].processor;
1981 }
1982 else
1983 error ("CPU you selected does not support x86-64 "
1984 "instruction set");
1985 }
1986 /* Intel CPUs have always interpreted SSE prefetch instructions as
1987 NOPs; so, we can enable SSE prefetch instructions even when
1988 -mtune (rather than -march) points us to a processor that has them.
1989 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1990 higher processors. */
1991 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1992 x86_prefetch_sse = true;
1993 break;
1994 }
1995 if (i == pta_size)
1996 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1997
1998 ix86_tune_mask = 1u << ix86_tune;
1999 for (i = 0; i < X86_TUNE_LAST; ++i)
2000 ix86_tune_features[i] &= ix86_tune_mask;
2001
2002 if (optimize_size)
2003 ix86_cost = &size_cost;
2004 else
2005 ix86_cost = processor_target_table[ix86_tune].cost;
2006 target_flags |= processor_target_table[ix86_tune].target_enable;
2007 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2008
2009 /* Arrange to set up i386_stack_locals for all functions. */
2010 init_machine_status = ix86_init_machine_status;
2011
2012 /* Validate -mregparm= value. */
2013 if (ix86_regparm_string)
2014 {
2015 if (TARGET_64BIT)
2016 warning (0, "-mregparm is ignored in 64-bit mode");
2017 i = atoi (ix86_regparm_string);
2018 if (i < 0 || i > REGPARM_MAX)
2019 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2020 else
2021 ix86_regparm = i;
2022 }
2023 if (TARGET_64BIT)
2024 ix86_regparm = REGPARM_MAX;
2025
2026 /* If the user has provided any of the -malign-* options,
2027 warn and use that value only if -falign-* is not set.
2028 Remove this code in GCC 3.2 or later. */
2029 if (ix86_align_loops_string)
2030 {
2031 warning (0, "-malign-loops is obsolete, use -falign-loops");
2032 if (align_loops == 0)
2033 {
2034 i = atoi (ix86_align_loops_string);
2035 if (i < 0 || i > MAX_CODE_ALIGN)
2036 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2037 else
2038 align_loops = 1 << i;
2039 }
2040 }
2041
2042 if (ix86_align_jumps_string)
2043 {
2044 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2045 if (align_jumps == 0)
2046 {
2047 i = atoi (ix86_align_jumps_string);
2048 if (i < 0 || i > MAX_CODE_ALIGN)
2049 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2050 else
2051 align_jumps = 1 << i;
2052 }
2053 }
2054
2055 if (ix86_align_funcs_string)
2056 {
2057 warning (0, "-malign-functions is obsolete, use -falign-functions");
2058 if (align_functions == 0)
2059 {
2060 i = atoi (ix86_align_funcs_string);
2061 if (i < 0 || i > MAX_CODE_ALIGN)
2062 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2063 else
2064 align_functions = 1 << i;
2065 }
2066 }
2067
2068 /* Default align_* from the processor table. */
2069 if (align_loops == 0)
2070 {
2071 align_loops = processor_target_table[ix86_tune].align_loop;
2072 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2073 }
2074 if (align_jumps == 0)
2075 {
2076 align_jumps = processor_target_table[ix86_tune].align_jump;
2077 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2078 }
2079 if (align_functions == 0)
2080 {
2081 align_functions = processor_target_table[ix86_tune].align_func;
2082 }
2083
2084 /* Validate -mbranch-cost= value, or provide default. */
2085 ix86_branch_cost = ix86_cost->branch_cost;
2086 if (ix86_branch_cost_string)
2087 {
2088 i = atoi (ix86_branch_cost_string);
2089 if (i < 0 || i > 5)
2090 error ("-mbranch-cost=%d is not between 0 and 5", i);
2091 else
2092 ix86_branch_cost = i;
2093 }
2094 if (ix86_section_threshold_string)
2095 {
2096 i = atoi (ix86_section_threshold_string);
2097 if (i < 0)
2098 error ("-mlarge-data-threshold=%d is negative", i);
2099 else
2100 ix86_section_threshold = i;
2101 }
2102
2103 if (ix86_tls_dialect_string)
2104 {
2105 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2106 ix86_tls_dialect = TLS_DIALECT_GNU;
2107 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2108 ix86_tls_dialect = TLS_DIALECT_GNU2;
2109 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2110 ix86_tls_dialect = TLS_DIALECT_SUN;
2111 else
2112 error ("bad value (%s) for -mtls-dialect= switch",
2113 ix86_tls_dialect_string);
2114 }
2115
2116 if (ix87_precision_string)
2117 {
2118 i = atoi (ix87_precision_string);
2119 if (i != 32 && i != 64 && i != 80)
2120 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2121 }
2122
2123 /* Keep nonleaf frame pointers. */
2124 if (flag_omit_frame_pointer)
2125 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2126 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2127 flag_omit_frame_pointer = 1;
2128
2129 /* If we're doing fast math, we don't care about comparison order
2130 wrt NaNs. This lets us use a shorter comparison sequence. */
2131 if (flag_finite_math_only)
2132 target_flags &= ~MASK_IEEE_FP;
2133
2134 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2135 since the insns won't need emulation. */
2136 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2137 target_flags &= ~MASK_NO_FANCY_MATH_387;
2138
2139 /* Likewise, if the target doesn't have a 387, or we've specified
2140 software floating point, don't use 387 inline intrinsics. */
2141 if (!TARGET_80387)
2142 target_flags |= MASK_NO_FANCY_MATH_387;
2143
2144 /* Turn on SSE3 builtins for -mssse3. */
2145 if (TARGET_SSSE3)
2146 target_flags |= MASK_SSE3;
2147
2148 /* Turn on SSE3 builtins for -msse4a. */
2149 if (TARGET_SSE4A)
2150 target_flags |= MASK_SSE3;
2151
2152 /* Turn on SSE2 builtins for -msse3. */
2153 if (TARGET_SSE3)
2154 target_flags |= MASK_SSE2;
2155
2156 /* Turn on SSE builtins for -msse2. */
2157 if (TARGET_SSE2)
2158 target_flags |= MASK_SSE;
2159
2160 /* Turn on MMX builtins for -msse. */
2161 if (TARGET_SSE)
2162 {
2163 target_flags |= MASK_MMX & ~target_flags_explicit;
2164 x86_prefetch_sse = true;
2165 }
2166
2167 /* Turn on MMX builtins for 3Dnow. */
2168 if (TARGET_3DNOW)
2169 target_flags |= MASK_MMX;
2170
2171 /* Turn on POPCNT builtins for -mabm. */
2172 if (TARGET_ABM)
2173 target_flags |= MASK_POPCNT;
2174
2175 if (TARGET_64BIT)
2176 {
2177 if (TARGET_RTD)
2178 warning (0, "-mrtd is ignored in 64bit mode");
2179
2180 /* Enable by default the SSE and MMX builtins. Do allow the user to
2181 explicitly disable any of these. In particular, disabling SSE and
2182 MMX for kernel code is extremely useful. */
2183 target_flags
2184 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | TARGET_SUBTARGET64_DEFAULT)
2185 & ~target_flags_explicit);
2186 }
2187 else
2188 {
2189 /* i386 ABI does not specify red zone. It still makes sense to use it
2190 when programmer takes care to stack from being destroyed. */
2191 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2192 target_flags |= MASK_NO_RED_ZONE;
2193 }
2194
2195 /* Validate -mpreferred-stack-boundary= value, or provide default.
2196 The default of 128 bits is for Pentium III's SSE __m128. We can't
2197 change it because of optimize_size. Otherwise, we can't mix object
2198 files compiled with -Os and -On. */
2199 ix86_preferred_stack_boundary = 128;
2200 if (ix86_preferred_stack_boundary_string)
2201 {
2202 i = atoi (ix86_preferred_stack_boundary_string);
2203 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2204 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2205 TARGET_64BIT ? 4 : 2);
2206 else
2207 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2208 }
2209
2210 /* Accept -msseregparm only if at least SSE support is enabled. */
2211 if (TARGET_SSEREGPARM
2212 && ! TARGET_SSE)
2213 error ("-msseregparm used without SSE enabled");
2214
2215 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2216 if (ix86_fpmath_string != 0)
2217 {
2218 if (! strcmp (ix86_fpmath_string, "387"))
2219 ix86_fpmath = FPMATH_387;
2220 else if (! strcmp (ix86_fpmath_string, "sse"))
2221 {
2222 if (!TARGET_SSE)
2223 {
2224 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2225 ix86_fpmath = FPMATH_387;
2226 }
2227 else
2228 ix86_fpmath = FPMATH_SSE;
2229 }
2230 else if (! strcmp (ix86_fpmath_string, "387,sse")
2231 || ! strcmp (ix86_fpmath_string, "sse,387"))
2232 {
2233 if (!TARGET_SSE)
2234 {
2235 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2236 ix86_fpmath = FPMATH_387;
2237 }
2238 else if (!TARGET_80387)
2239 {
2240 warning (0, "387 instruction set disabled, using SSE arithmetics");
2241 ix86_fpmath = FPMATH_SSE;
2242 }
2243 else
2244 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2245 }
2246 else
2247 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2248 }
2249
2250 /* If the i387 is disabled, then do not return values in it. */
2251 if (!TARGET_80387)
2252 target_flags &= ~MASK_FLOAT_RETURNS;
2253
2254 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2255 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2256 && !optimize_size)
2257 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2258
2259 /* ??? Unwind info is not correct around the CFG unless either a frame
2260 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2261 unwind info generation to be aware of the CFG and propagating states
2262 around edges. */
2263 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2264 || flag_exceptions || flag_non_call_exceptions)
2265 && flag_omit_frame_pointer
2266 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2267 {
2268 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2269 warning (0, "unwind tables currently require either a frame pointer "
2270 "or -maccumulate-outgoing-args for correctness");
2271 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2272 }
2273
2274 /* For sane SSE instruction set generation we need fcomi instruction.
2275 It is safe to enable all CMOVE instructions. */
2276 if (TARGET_SSE)
2277 TARGET_CMOVE = 1;
2278
2279 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2280 {
2281 char *p;
2282 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2283 p = strchr (internal_label_prefix, 'X');
2284 internal_label_prefix_len = p - internal_label_prefix;
2285 *p = '\0';
2286 }
2287
2288 /* When scheduling description is not available, disable scheduler pass
2289 so it won't slow down the compilation and make x87 code slower. */
2290 if (!TARGET_SCHEDULE)
2291 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2292
2293 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2294 set_param_value ("simultaneous-prefetches",
2295 ix86_cost->simultaneous_prefetches);
2296 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2297 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2298 }
2299 \f
2300 /* Return true if this goes in large data/bss. */
2301
2302 static bool
2303 ix86_in_large_data_p (tree exp)
2304 {
2305 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2306 return false;
2307
2308 /* Functions are never large data. */
2309 if (TREE_CODE (exp) == FUNCTION_DECL)
2310 return false;
2311
2312 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2313 {
2314 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2315 if (strcmp (section, ".ldata") == 0
2316 || strcmp (section, ".lbss") == 0)
2317 return true;
2318 return false;
2319 }
2320 else
2321 {
2322 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2323
2324 /* If this is an incomplete type with size 0, then we can't put it
2325 in data because it might be too big when completed. */
2326 if (!size || size > ix86_section_threshold)
2327 return true;
2328 }
2329
2330 return false;
2331 }
2332
2333 /* Switch to the appropriate section for output of DECL.
2334 DECL is either a `VAR_DECL' node or a constant of some sort.
2335 RELOC indicates whether forming the initial value of DECL requires
2336 link-time relocations. */
2337
2338 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2339 ATTRIBUTE_UNUSED;
2340
2341 static section *
2342 x86_64_elf_select_section (tree decl, int reloc,
2343 unsigned HOST_WIDE_INT align)
2344 {
2345 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2346 && ix86_in_large_data_p (decl))
2347 {
2348 const char *sname = NULL;
2349 unsigned int flags = SECTION_WRITE;
2350 switch (categorize_decl_for_section (decl, reloc))
2351 {
2352 case SECCAT_DATA:
2353 sname = ".ldata";
2354 break;
2355 case SECCAT_DATA_REL:
2356 sname = ".ldata.rel";
2357 break;
2358 case SECCAT_DATA_REL_LOCAL:
2359 sname = ".ldata.rel.local";
2360 break;
2361 case SECCAT_DATA_REL_RO:
2362 sname = ".ldata.rel.ro";
2363 break;
2364 case SECCAT_DATA_REL_RO_LOCAL:
2365 sname = ".ldata.rel.ro.local";
2366 break;
2367 case SECCAT_BSS:
2368 sname = ".lbss";
2369 flags |= SECTION_BSS;
2370 break;
2371 case SECCAT_RODATA:
2372 case SECCAT_RODATA_MERGE_STR:
2373 case SECCAT_RODATA_MERGE_STR_INIT:
2374 case SECCAT_RODATA_MERGE_CONST:
2375 sname = ".lrodata";
2376 flags = 0;
2377 break;
2378 case SECCAT_SRODATA:
2379 case SECCAT_SDATA:
2380 case SECCAT_SBSS:
2381 gcc_unreachable ();
2382 case SECCAT_TEXT:
2383 case SECCAT_TDATA:
2384 case SECCAT_TBSS:
2385 /* We don't split these for medium model. Place them into
2386 default sections and hope for best. */
2387 break;
2388 }
2389 if (sname)
2390 {
2391 /* We might get called with string constants, but get_named_section
2392 doesn't like them as they are not DECLs. Also, we need to set
2393 flags in that case. */
2394 if (!DECL_P (decl))
2395 return get_section (sname, flags, NULL);
2396 return get_named_section (decl, sname, reloc);
2397 }
2398 }
2399 return default_elf_select_section (decl, reloc, align);
2400 }
2401
2402 /* Build up a unique section name, expressed as a
2403 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2404 RELOC indicates whether the initial value of EXP requires
2405 link-time relocations. */
2406
2407 static void ATTRIBUTE_UNUSED
2408 x86_64_elf_unique_section (tree decl, int reloc)
2409 {
2410 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2411 && ix86_in_large_data_p (decl))
2412 {
2413 const char *prefix = NULL;
2414 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2415 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2416
2417 switch (categorize_decl_for_section (decl, reloc))
2418 {
2419 case SECCAT_DATA:
2420 case SECCAT_DATA_REL:
2421 case SECCAT_DATA_REL_LOCAL:
2422 case SECCAT_DATA_REL_RO:
2423 case SECCAT_DATA_REL_RO_LOCAL:
2424 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2425 break;
2426 case SECCAT_BSS:
2427 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2428 break;
2429 case SECCAT_RODATA:
2430 case SECCAT_RODATA_MERGE_STR:
2431 case SECCAT_RODATA_MERGE_STR_INIT:
2432 case SECCAT_RODATA_MERGE_CONST:
2433 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2434 break;
2435 case SECCAT_SRODATA:
2436 case SECCAT_SDATA:
2437 case SECCAT_SBSS:
2438 gcc_unreachable ();
2439 case SECCAT_TEXT:
2440 case SECCAT_TDATA:
2441 case SECCAT_TBSS:
2442 /* We don't split these for medium model. Place them into
2443 default sections and hope for best. */
2444 break;
2445 }
2446 if (prefix)
2447 {
2448 const char *name;
2449 size_t nlen, plen;
2450 char *string;
2451 plen = strlen (prefix);
2452
2453 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2454 name = targetm.strip_name_encoding (name);
2455 nlen = strlen (name);
2456
2457 string = alloca (nlen + plen + 1);
2458 memcpy (string, prefix, plen);
2459 memcpy (string + plen, name, nlen + 1);
2460
2461 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2462 return;
2463 }
2464 }
2465 default_unique_section (decl, reloc);
2466 }
2467
2468 #ifdef COMMON_ASM_OP
2469 /* This says how to output assembler code to declare an
2470 uninitialized external linkage data object.
2471
2472 For medium model x86-64 we need to use .largecomm opcode for
2473 large objects. */
2474 void
2475 x86_elf_aligned_common (FILE *file,
2476 const char *name, unsigned HOST_WIDE_INT size,
2477 int align)
2478 {
2479 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2480 && size > (unsigned int)ix86_section_threshold)
2481 fprintf (file, ".largecomm\t");
2482 else
2483 fprintf (file, "%s", COMMON_ASM_OP);
2484 assemble_name (file, name);
2485 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2486 size, align / BITS_PER_UNIT);
2487 }
2488 #endif
2489
2490 /* Utility function for targets to use in implementing
2491 ASM_OUTPUT_ALIGNED_BSS. */
2492
2493 void
2494 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2495 const char *name, unsigned HOST_WIDE_INT size,
2496 int align)
2497 {
2498 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2499 && size > (unsigned int)ix86_section_threshold)
2500 switch_to_section (get_named_section (decl, ".lbss", 0));
2501 else
2502 switch_to_section (bss_section);
2503 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2504 #ifdef ASM_DECLARE_OBJECT_NAME
2505 last_assemble_variable_decl = decl;
2506 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2507 #else
2508 /* Standard thing is just output label for the object. */
2509 ASM_OUTPUT_LABEL (file, name);
2510 #endif /* ASM_DECLARE_OBJECT_NAME */
2511 ASM_OUTPUT_SKIP (file, size ? size : 1);
2512 }
2513 \f
2514 void
2515 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2516 {
2517 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2518 make the problem with not enough registers even worse. */
2519 #ifdef INSN_SCHEDULING
2520 if (level > 1)
2521 flag_schedule_insns = 0;
2522 #endif
2523
2524 if (TARGET_MACHO)
2525 /* The Darwin libraries never set errno, so we might as well
2526 avoid calling them when that's the only reason we would. */
2527 flag_errno_math = 0;
2528
2529 /* The default values of these switches depend on the TARGET_64BIT
2530 that is not known at this moment. Mark these values with 2 and
2531 let user the to override these. In case there is no command line option
2532 specifying them, we will set the defaults in override_options. */
2533 if (optimize >= 1)
2534 flag_omit_frame_pointer = 2;
2535 flag_pcc_struct_return = 2;
2536 flag_asynchronous_unwind_tables = 2;
2537 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2538 SUBTARGET_OPTIMIZATION_OPTIONS;
2539 #endif
2540 }
2541 \f
2542 /* Decide whether we can make a sibling call to a function. DECL is the
2543 declaration of the function being targeted by the call and EXP is the
2544 CALL_EXPR representing the call. */
2545
2546 static bool
2547 ix86_function_ok_for_sibcall (tree decl, tree exp)
2548 {
2549 tree func;
2550 rtx a, b;
2551
2552 /* If we are generating position-independent code, we cannot sibcall
2553 optimize any indirect call, or a direct call to a global function,
2554 as the PLT requires %ebx be live. */
2555 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2556 return false;
2557
2558 if (decl)
2559 func = decl;
2560 else
2561 {
2562 func = TREE_TYPE (CALL_EXPR_FN (exp));
2563 if (POINTER_TYPE_P (func))
2564 func = TREE_TYPE (func);
2565 }
2566
2567 /* Check that the return value locations are the same. Like
2568 if we are returning floats on the 80387 register stack, we cannot
2569 make a sibcall from a function that doesn't return a float to a
2570 function that does or, conversely, from a function that does return
2571 a float to a function that doesn't; the necessary stack adjustment
2572 would not be executed. This is also the place we notice
2573 differences in the return value ABI. Note that it is ok for one
2574 of the functions to have void return type as long as the return
2575 value of the other is passed in a register. */
2576 a = ix86_function_value (TREE_TYPE (exp), func, false);
2577 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2578 cfun->decl, false);
2579 if (STACK_REG_P (a) || STACK_REG_P (b))
2580 {
2581 if (!rtx_equal_p (a, b))
2582 return false;
2583 }
2584 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2585 ;
2586 else if (!rtx_equal_p (a, b))
2587 return false;
2588
2589 /* If this call is indirect, we'll need to be able to use a call-clobbered
2590 register for the address of the target function. Make sure that all
2591 such registers are not used for passing parameters. */
2592 if (!decl && !TARGET_64BIT)
2593 {
2594 tree type;
2595
2596 /* We're looking at the CALL_EXPR, we need the type of the function. */
2597 type = CALL_EXPR_FN (exp); /* pointer expression */
2598 type = TREE_TYPE (type); /* pointer type */
2599 type = TREE_TYPE (type); /* function type */
2600
2601 if (ix86_function_regparm (type, NULL) >= 3)
2602 {
2603 /* ??? Need to count the actual number of registers to be used,
2604 not the possible number of registers. Fix later. */
2605 return false;
2606 }
2607 }
2608
2609 /* Dllimport'd functions are also called indirectly. */
2610 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2611 && decl && DECL_DLLIMPORT_P (decl)
2612 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2613 return false;
2614
2615 /* If we forced aligned the stack, then sibcalling would unalign the
2616 stack, which may break the called function. */
2617 if (cfun->machine->force_align_arg_pointer)
2618 return false;
2619
2620 /* Otherwise okay. That also includes certain types of indirect calls. */
2621 return true;
2622 }
2623
2624 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2625 calling convention attributes;
2626 arguments as in struct attribute_spec.handler. */
2627
2628 static tree
2629 ix86_handle_cconv_attribute (tree *node, tree name,
2630 tree args,
2631 int flags ATTRIBUTE_UNUSED,
2632 bool *no_add_attrs)
2633 {
2634 if (TREE_CODE (*node) != FUNCTION_TYPE
2635 && TREE_CODE (*node) != METHOD_TYPE
2636 && TREE_CODE (*node) != FIELD_DECL
2637 && TREE_CODE (*node) != TYPE_DECL)
2638 {
2639 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2640 IDENTIFIER_POINTER (name));
2641 *no_add_attrs = true;
2642 return NULL_TREE;
2643 }
2644
2645 /* Can combine regparm with all attributes but fastcall. */
2646 if (is_attribute_p ("regparm", name))
2647 {
2648 tree cst;
2649
2650 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2651 {
2652 error ("fastcall and regparm attributes are not compatible");
2653 }
2654
2655 cst = TREE_VALUE (args);
2656 if (TREE_CODE (cst) != INTEGER_CST)
2657 {
2658 warning (OPT_Wattributes,
2659 "%qs attribute requires an integer constant argument",
2660 IDENTIFIER_POINTER (name));
2661 *no_add_attrs = true;
2662 }
2663 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2664 {
2665 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2666 IDENTIFIER_POINTER (name), REGPARM_MAX);
2667 *no_add_attrs = true;
2668 }
2669
2670 if (!TARGET_64BIT
2671 && lookup_attribute (ix86_force_align_arg_pointer_string,
2672 TYPE_ATTRIBUTES (*node))
2673 && compare_tree_int (cst, REGPARM_MAX-1))
2674 {
2675 error ("%s functions limited to %d register parameters",
2676 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2677 }
2678
2679 return NULL_TREE;
2680 }
2681
2682 if (TARGET_64BIT)
2683 {
2684 /* Do not warn when emulating the MS ABI. */
2685 if (!TARGET_64BIT_MS_ABI)
2686 warning (OPT_Wattributes, "%qs attribute ignored",
2687 IDENTIFIER_POINTER (name));
2688 *no_add_attrs = true;
2689 return NULL_TREE;
2690 }
2691
2692 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2693 if (is_attribute_p ("fastcall", name))
2694 {
2695 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2696 {
2697 error ("fastcall and cdecl attributes are not compatible");
2698 }
2699 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2700 {
2701 error ("fastcall and stdcall attributes are not compatible");
2702 }
2703 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2704 {
2705 error ("fastcall and regparm attributes are not compatible");
2706 }
2707 }
2708
2709 /* Can combine stdcall with fastcall (redundant), regparm and
2710 sseregparm. */
2711 else if (is_attribute_p ("stdcall", name))
2712 {
2713 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2714 {
2715 error ("stdcall and cdecl attributes are not compatible");
2716 }
2717 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2718 {
2719 error ("stdcall and fastcall attributes are not compatible");
2720 }
2721 }
2722
2723 /* Can combine cdecl with regparm and sseregparm. */
2724 else if (is_attribute_p ("cdecl", name))
2725 {
2726 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2727 {
2728 error ("stdcall and cdecl attributes are not compatible");
2729 }
2730 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2731 {
2732 error ("fastcall and cdecl attributes are not compatible");
2733 }
2734 }
2735
2736 /* Can combine sseregparm with all attributes. */
2737
2738 return NULL_TREE;
2739 }
2740
2741 /* Return 0 if the attributes for two types are incompatible, 1 if they
2742 are compatible, and 2 if they are nearly compatible (which causes a
2743 warning to be generated). */
2744
2745 static int
2746 ix86_comp_type_attributes (tree type1, tree type2)
2747 {
2748 /* Check for mismatch of non-default calling convention. */
2749 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2750
2751 if (TREE_CODE (type1) != FUNCTION_TYPE)
2752 return 1;
2753
2754 /* Check for mismatched fastcall/regparm types. */
2755 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2756 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2757 || (ix86_function_regparm (type1, NULL)
2758 != ix86_function_regparm (type2, NULL)))
2759 return 0;
2760
2761 /* Check for mismatched sseregparm types. */
2762 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2763 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2764 return 0;
2765
2766 /* Check for mismatched return types (cdecl vs stdcall). */
2767 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2768 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2769 return 0;
2770
2771 return 1;
2772 }
2773 \f
2774 /* Return the regparm value for a function with the indicated TYPE and DECL.
2775 DECL may be NULL when calling function indirectly
2776 or considering a libcall. */
2777
2778 static int
2779 ix86_function_regparm (tree type, tree decl)
2780 {
2781 tree attr;
2782 int regparm = ix86_regparm;
2783
2784 if (TARGET_64BIT)
2785 return regparm;
2786
2787 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2788 if (attr)
2789 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2790
2791 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2792 return 2;
2793
2794 /* Use register calling convention for local functions when possible. */
2795 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2796 && flag_unit_at_a_time && !profile_flag)
2797 {
2798 struct cgraph_local_info *i = cgraph_local_info (decl);
2799 if (i && i->local)
2800 {
2801 int local_regparm, globals = 0, regno;
2802 struct function *f;
2803
2804 /* Make sure no regparm register is taken by a
2805 global register variable. */
2806 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2807 if (global_regs[local_regparm])
2808 break;
2809
2810 /* We can't use regparm(3) for nested functions as these use
2811 static chain pointer in third argument. */
2812 if (local_regparm == 3
2813 && decl_function_context (decl)
2814 && !DECL_NO_STATIC_CHAIN (decl))
2815 local_regparm = 2;
2816
2817 /* If the function realigns its stackpointer, the prologue will
2818 clobber %ecx. If we've already generated code for the callee,
2819 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2820 scanning the attributes for the self-realigning property. */
2821 f = DECL_STRUCT_FUNCTION (decl);
2822 if (local_regparm == 3
2823 && (f ? !!f->machine->force_align_arg_pointer
2824 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2825 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2826 local_regparm = 2;
2827
2828 /* Each global register variable increases register preassure,
2829 so the more global reg vars there are, the smaller regparm
2830 optimization use, unless requested by the user explicitly. */
2831 for (regno = 0; regno < 6; regno++)
2832 if (global_regs[regno])
2833 globals++;
2834 local_regparm
2835 = globals < local_regparm ? local_regparm - globals : 0;
2836
2837 if (local_regparm > regparm)
2838 regparm = local_regparm;
2839 }
2840 }
2841
2842 return regparm;
2843 }
2844
2845 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2846 DFmode (2) arguments in SSE registers for a function with the
2847 indicated TYPE and DECL. DECL may be NULL when calling function
2848 indirectly or considering a libcall. Otherwise return 0. */
2849
2850 static int
2851 ix86_function_sseregparm (tree type, tree decl)
2852 {
2853 gcc_assert (!TARGET_64BIT);
2854
2855 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2856 by the sseregparm attribute. */
2857 if (TARGET_SSEREGPARM
2858 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2859 {
2860 if (!TARGET_SSE)
2861 {
2862 if (decl)
2863 error ("Calling %qD with attribute sseregparm without "
2864 "SSE/SSE2 enabled", decl);
2865 else
2866 error ("Calling %qT with attribute sseregparm without "
2867 "SSE/SSE2 enabled", type);
2868 return 0;
2869 }
2870
2871 return 2;
2872 }
2873
2874 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2875 (and DFmode for SSE2) arguments in SSE registers. */
2876 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2877 {
2878 struct cgraph_local_info *i = cgraph_local_info (decl);
2879 if (i && i->local)
2880 return TARGET_SSE2 ? 2 : 1;
2881 }
2882
2883 return 0;
2884 }
2885
2886 /* Return true if EAX is live at the start of the function. Used by
2887 ix86_expand_prologue to determine if we need special help before
2888 calling allocate_stack_worker. */
2889
2890 static bool
2891 ix86_eax_live_at_start_p (void)
2892 {
2893 /* Cheat. Don't bother working forward from ix86_function_regparm
2894 to the function type to whether an actual argument is located in
2895 eax. Instead just look at cfg info, which is still close enough
2896 to correct at this point. This gives false positives for broken
2897 functions that might use uninitialized data that happens to be
2898 allocated in eax, but who cares? */
2899 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2900 }
2901
2902 /* Return true if TYPE has a variable argument list. */
2903
2904 static bool
2905 type_has_variadic_args_p (tree type)
2906 {
2907 tree n, t = TYPE_ARG_TYPES (type);
2908
2909 if (t == NULL)
2910 return false;
2911
2912 while ((n = TREE_CHAIN (t)) != NULL)
2913 t = n;
2914
2915 return TREE_VALUE (t) != void_type_node;
2916 }
2917
2918 /* Value is the number of bytes of arguments automatically
2919 popped when returning from a subroutine call.
2920 FUNDECL is the declaration node of the function (as a tree),
2921 FUNTYPE is the data type of the function (as a tree),
2922 or for a library call it is an identifier node for the subroutine name.
2923 SIZE is the number of bytes of arguments passed on the stack.
2924
2925 On the 80386, the RTD insn may be used to pop them if the number
2926 of args is fixed, but if the number is variable then the caller
2927 must pop them all. RTD can't be used for library calls now
2928 because the library is compiled with the Unix compiler.
2929 Use of RTD is a selectable option, since it is incompatible with
2930 standard Unix calling sequences. If the option is not selected,
2931 the caller must always pop the args.
2932
2933 The attribute stdcall is equivalent to RTD on a per module basis. */
2934
2935 int
2936 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2937 {
2938 int rtd;
2939
2940 /* None of the 64-bit ABIs pop arguments. */
2941 if (TARGET_64BIT)
2942 return 0;
2943
2944 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2945
2946 /* Cdecl functions override -mrtd, and never pop the stack. */
2947 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2948 {
2949 /* Stdcall and fastcall functions will pop the stack if not
2950 variable args. */
2951 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2952 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2953 rtd = 1;
2954
2955 if (rtd && ! type_has_variadic_args_p (funtype))
2956 return size;
2957 }
2958
2959 /* Lose any fake structure return argument if it is passed on the stack. */
2960 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2961 && !KEEP_AGGREGATE_RETURN_POINTER)
2962 {
2963 int nregs = ix86_function_regparm (funtype, fundecl);
2964 if (nregs == 0)
2965 return GET_MODE_SIZE (Pmode);
2966 }
2967
2968 return 0;
2969 }
2970 \f
2971 /* Argument support functions. */
2972
2973 /* Return true when register may be used to pass function parameters. */
2974 bool
2975 ix86_function_arg_regno_p (int regno)
2976 {
2977 int i;
2978 const int *parm_regs;
2979
2980 if (!TARGET_64BIT)
2981 {
2982 if (TARGET_MACHO)
2983 return (regno < REGPARM_MAX
2984 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2985 else
2986 return (regno < REGPARM_MAX
2987 || (TARGET_MMX && MMX_REGNO_P (regno)
2988 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2989 || (TARGET_SSE && SSE_REGNO_P (regno)
2990 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2991 }
2992
2993 if (TARGET_MACHO)
2994 {
2995 if (SSE_REGNO_P (regno) && TARGET_SSE)
2996 return true;
2997 }
2998 else
2999 {
3000 if (TARGET_SSE && SSE_REGNO_P (regno)
3001 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3002 return true;
3003 }
3004
3005 /* RAX is used as hidden argument to va_arg functions. */
3006 if (!TARGET_64BIT_MS_ABI && regno == 0)
3007 return true;
3008
3009 if (TARGET_64BIT_MS_ABI)
3010 parm_regs = x86_64_ms_abi_int_parameter_registers;
3011 else
3012 parm_regs = x86_64_int_parameter_registers;
3013 for (i = 0; i < REGPARM_MAX; i++)
3014 if (regno == parm_regs[i])
3015 return true;
3016 return false;
3017 }
3018
3019 /* Return if we do not know how to pass TYPE solely in registers. */
3020
3021 static bool
3022 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3023 {
3024 if (must_pass_in_stack_var_size_or_pad (mode, type))
3025 return true;
3026
3027 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3028 The layout_type routine is crafty and tries to trick us into passing
3029 currently unsupported vector types on the stack by using TImode. */
3030 return (!TARGET_64BIT && mode == TImode
3031 && type && TREE_CODE (type) != VECTOR_TYPE);
3032 }
3033
3034 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3035 for a call to a function whose data type is FNTYPE.
3036 For a library call, FNTYPE is 0. */
3037
3038 void
3039 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3040 tree fntype, /* tree ptr for function decl */
3041 rtx libname, /* SYMBOL_REF of library name or 0 */
3042 tree fndecl)
3043 {
3044 memset (cum, 0, sizeof (*cum));
3045
3046 /* Set up the number of registers to use for passing arguments. */
3047 cum->nregs = ix86_regparm;
3048 if (TARGET_SSE)
3049 cum->sse_nregs = SSE_REGPARM_MAX;
3050 if (TARGET_MMX)
3051 cum->mmx_nregs = MMX_REGPARM_MAX;
3052 cum->warn_sse = true;
3053 cum->warn_mmx = true;
3054 cum->maybe_vaarg = (fntype
3055 ? (!TYPE_ARG_TYPES (fntype)
3056 || type_has_variadic_args_p (fntype))
3057 : !libname);
3058
3059 if (!TARGET_64BIT)
3060 {
3061 /* If there are variable arguments, then we won't pass anything
3062 in registers in 32-bit mode. */
3063 if (cum->maybe_vaarg)
3064 {
3065 cum->nregs = 0;
3066 cum->sse_nregs = 0;
3067 cum->mmx_nregs = 0;
3068 cum->warn_sse = 0;
3069 cum->warn_mmx = 0;
3070 return;
3071 }
3072
3073 /* Use ecx and edx registers if function has fastcall attribute,
3074 else look for regparm information. */
3075 if (fntype)
3076 {
3077 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3078 {
3079 cum->nregs = 2;
3080 cum->fastcall = 1;
3081 }
3082 else
3083 cum->nregs = ix86_function_regparm (fntype, fndecl);
3084 }
3085
3086 /* Set up the number of SSE registers used for passing SFmode
3087 and DFmode arguments. Warn for mismatching ABI. */
3088 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3089 }
3090 }
3091
3092 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3093 But in the case of vector types, it is some vector mode.
3094
3095 When we have only some of our vector isa extensions enabled, then there
3096 are some modes for which vector_mode_supported_p is false. For these
3097 modes, the generic vector support in gcc will choose some non-vector mode
3098 in order to implement the type. By computing the natural mode, we'll
3099 select the proper ABI location for the operand and not depend on whatever
3100 the middle-end decides to do with these vector types. */
3101
3102 static enum machine_mode
3103 type_natural_mode (tree type)
3104 {
3105 enum machine_mode mode = TYPE_MODE (type);
3106
3107 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3108 {
3109 HOST_WIDE_INT size = int_size_in_bytes (type);
3110 if ((size == 8 || size == 16)
3111 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3112 && TYPE_VECTOR_SUBPARTS (type) > 1)
3113 {
3114 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3115
3116 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3117 mode = MIN_MODE_VECTOR_FLOAT;
3118 else
3119 mode = MIN_MODE_VECTOR_INT;
3120
3121 /* Get the mode which has this inner mode and number of units. */
3122 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3123 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3124 && GET_MODE_INNER (mode) == innermode)
3125 return mode;
3126
3127 gcc_unreachable ();
3128 }
3129 }
3130
3131 return mode;
3132 }
3133
3134 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3135 this may not agree with the mode that the type system has chosen for the
3136 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3137 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3138
3139 static rtx
3140 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3141 unsigned int regno)
3142 {
3143 rtx tmp;
3144
3145 if (orig_mode != BLKmode)
3146 tmp = gen_rtx_REG (orig_mode, regno);
3147 else
3148 {
3149 tmp = gen_rtx_REG (mode, regno);
3150 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3151 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3152 }
3153
3154 return tmp;
3155 }
3156
3157 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3158 of this code is to classify each 8bytes of incoming argument by the register
3159 class and assign registers accordingly. */
3160
3161 /* Return the union class of CLASS1 and CLASS2.
3162 See the x86-64 PS ABI for details. */
3163
3164 static enum x86_64_reg_class
3165 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3166 {
3167 /* Rule #1: If both classes are equal, this is the resulting class. */
3168 if (class1 == class2)
3169 return class1;
3170
3171 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3172 the other class. */
3173 if (class1 == X86_64_NO_CLASS)
3174 return class2;
3175 if (class2 == X86_64_NO_CLASS)
3176 return class1;
3177
3178 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3179 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3180 return X86_64_MEMORY_CLASS;
3181
3182 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3183 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3184 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3185 return X86_64_INTEGERSI_CLASS;
3186 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3187 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3188 return X86_64_INTEGER_CLASS;
3189
3190 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3191 MEMORY is used. */
3192 if (class1 == X86_64_X87_CLASS
3193 || class1 == X86_64_X87UP_CLASS
3194 || class1 == X86_64_COMPLEX_X87_CLASS
3195 || class2 == X86_64_X87_CLASS
3196 || class2 == X86_64_X87UP_CLASS
3197 || class2 == X86_64_COMPLEX_X87_CLASS)
3198 return X86_64_MEMORY_CLASS;
3199
3200 /* Rule #6: Otherwise class SSE is used. */
3201 return X86_64_SSE_CLASS;
3202 }
3203
3204 /* Classify the argument of type TYPE and mode MODE.
3205 CLASSES will be filled by the register class used to pass each word
3206 of the operand. The number of words is returned. In case the parameter
3207 should be passed in memory, 0 is returned. As a special case for zero
3208 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3209
3210 BIT_OFFSET is used internally for handling records and specifies offset
3211 of the offset in bits modulo 256 to avoid overflow cases.
3212
3213 See the x86-64 PS ABI for details.
3214 */
3215
3216 static int
3217 classify_argument (enum machine_mode mode, tree type,
3218 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3219 {
3220 HOST_WIDE_INT bytes =
3221 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3222 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3223
3224 /* Variable sized entities are always passed/returned in memory. */
3225 if (bytes < 0)
3226 return 0;
3227
3228 if (mode != VOIDmode
3229 && targetm.calls.must_pass_in_stack (mode, type))
3230 return 0;
3231
3232 if (type && AGGREGATE_TYPE_P (type))
3233 {
3234 int i;
3235 tree field;
3236 enum x86_64_reg_class subclasses[MAX_CLASSES];
3237
3238 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3239 if (bytes > 16)
3240 return 0;
3241
3242 for (i = 0; i < words; i++)
3243 classes[i] = X86_64_NO_CLASS;
3244
3245 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3246 signalize memory class, so handle it as special case. */
3247 if (!words)
3248 {
3249 classes[0] = X86_64_NO_CLASS;
3250 return 1;
3251 }
3252
3253 /* Classify each field of record and merge classes. */
3254 switch (TREE_CODE (type))
3255 {
3256 case RECORD_TYPE:
3257 /* And now merge the fields of structure. */
3258 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3259 {
3260 if (TREE_CODE (field) == FIELD_DECL)
3261 {
3262 int num;
3263
3264 if (TREE_TYPE (field) == error_mark_node)
3265 continue;
3266
3267 /* Bitfields are always classified as integer. Handle them
3268 early, since later code would consider them to be
3269 misaligned integers. */
3270 if (DECL_BIT_FIELD (field))
3271 {
3272 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3273 i < ((int_bit_position (field) + (bit_offset % 64))
3274 + tree_low_cst (DECL_SIZE (field), 0)
3275 + 63) / 8 / 8; i++)
3276 classes[i] =
3277 merge_classes (X86_64_INTEGER_CLASS,
3278 classes[i]);
3279 }
3280 else
3281 {
3282 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3283 TREE_TYPE (field), subclasses,
3284 (int_bit_position (field)
3285 + bit_offset) % 256);
3286 if (!num)
3287 return 0;
3288 for (i = 0; i < num; i++)
3289 {
3290 int pos =
3291 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3292 classes[i + pos] =
3293 merge_classes (subclasses[i], classes[i + pos]);
3294 }
3295 }
3296 }
3297 }
3298 break;
3299
3300 case ARRAY_TYPE:
3301 /* Arrays are handled as small records. */
3302 {
3303 int num;
3304 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3305 TREE_TYPE (type), subclasses, bit_offset);
3306 if (!num)
3307 return 0;
3308
3309 /* The partial classes are now full classes. */
3310 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3311 subclasses[0] = X86_64_SSE_CLASS;
3312 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3313 subclasses[0] = X86_64_INTEGER_CLASS;
3314
3315 for (i = 0; i < words; i++)
3316 classes[i] = subclasses[i % num];
3317
3318 break;
3319 }
3320 case UNION_TYPE:
3321 case QUAL_UNION_TYPE:
3322 /* Unions are similar to RECORD_TYPE but offset is always 0.
3323 */
3324 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3325 {
3326 if (TREE_CODE (field) == FIELD_DECL)
3327 {
3328 int num;
3329
3330 if (TREE_TYPE (field) == error_mark_node)
3331 continue;
3332
3333 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3334 TREE_TYPE (field), subclasses,
3335 bit_offset);
3336 if (!num)
3337 return 0;
3338 for (i = 0; i < num; i++)
3339 classes[i] = merge_classes (subclasses[i], classes[i]);
3340 }
3341 }
3342 break;
3343
3344 default:
3345 gcc_unreachable ();
3346 }
3347
3348 /* Final merger cleanup. */
3349 for (i = 0; i < words; i++)
3350 {
3351 /* If one class is MEMORY, everything should be passed in
3352 memory. */
3353 if (classes[i] == X86_64_MEMORY_CLASS)
3354 return 0;
3355
3356 /* The X86_64_SSEUP_CLASS should be always preceded by
3357 X86_64_SSE_CLASS. */
3358 if (classes[i] == X86_64_SSEUP_CLASS
3359 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3360 classes[i] = X86_64_SSE_CLASS;
3361
3362 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3363 if (classes[i] == X86_64_X87UP_CLASS
3364 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3365 classes[i] = X86_64_SSE_CLASS;
3366 }
3367 return words;
3368 }
3369
3370 /* Compute alignment needed. We align all types to natural boundaries with
3371 exception of XFmode that is aligned to 64bits. */
3372 if (mode != VOIDmode && mode != BLKmode)
3373 {
3374 int mode_alignment = GET_MODE_BITSIZE (mode);
3375
3376 if (mode == XFmode)
3377 mode_alignment = 128;
3378 else if (mode == XCmode)
3379 mode_alignment = 256;
3380 if (COMPLEX_MODE_P (mode))
3381 mode_alignment /= 2;
3382 /* Misaligned fields are always returned in memory. */
3383 if (bit_offset % mode_alignment)
3384 return 0;
3385 }
3386
3387 /* for V1xx modes, just use the base mode */
3388 if (VECTOR_MODE_P (mode)
3389 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3390 mode = GET_MODE_INNER (mode);
3391
3392 /* Classification of atomic types. */
3393 switch (mode)
3394 {
3395 case SDmode:
3396 case DDmode:
3397 classes[0] = X86_64_SSE_CLASS;
3398 return 1;
3399 case TDmode:
3400 classes[0] = X86_64_SSE_CLASS;
3401 classes[1] = X86_64_SSEUP_CLASS;
3402 return 2;
3403 case DImode:
3404 case SImode:
3405 case HImode:
3406 case QImode:
3407 case CSImode:
3408 case CHImode:
3409 case CQImode:
3410 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3411 classes[0] = X86_64_INTEGERSI_CLASS;
3412 else
3413 classes[0] = X86_64_INTEGER_CLASS;
3414 return 1;
3415 case CDImode:
3416 case TImode:
3417 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3418 return 2;
3419 case CTImode:
3420 return 0;
3421 case SFmode:
3422 if (!(bit_offset % 64))
3423 classes[0] = X86_64_SSESF_CLASS;
3424 else
3425 classes[0] = X86_64_SSE_CLASS;
3426 return 1;
3427 case DFmode:
3428 classes[0] = X86_64_SSEDF_CLASS;
3429 return 1;
3430 case XFmode:
3431 classes[0] = X86_64_X87_CLASS;
3432 classes[1] = X86_64_X87UP_CLASS;
3433 return 2;
3434 case TFmode:
3435 classes[0] = X86_64_SSE_CLASS;
3436 classes[1] = X86_64_SSEUP_CLASS;
3437 return 2;
3438 case SCmode:
3439 classes[0] = X86_64_SSE_CLASS;
3440 return 1;
3441 case DCmode:
3442 classes[0] = X86_64_SSEDF_CLASS;
3443 classes[1] = X86_64_SSEDF_CLASS;
3444 return 2;
3445 case XCmode:
3446 classes[0] = X86_64_COMPLEX_X87_CLASS;
3447 return 1;
3448 case TCmode:
3449 /* This modes is larger than 16 bytes. */
3450 return 0;
3451 case V4SFmode:
3452 case V4SImode:
3453 case V16QImode:
3454 case V8HImode:
3455 case V2DFmode:
3456 case V2DImode:
3457 classes[0] = X86_64_SSE_CLASS;
3458 classes[1] = X86_64_SSEUP_CLASS;
3459 return 2;
3460 case V2SFmode:
3461 case V2SImode:
3462 case V4HImode:
3463 case V8QImode:
3464 classes[0] = X86_64_SSE_CLASS;
3465 return 1;
3466 case BLKmode:
3467 case VOIDmode:
3468 return 0;
3469 default:
3470 gcc_assert (VECTOR_MODE_P (mode));
3471
3472 if (bytes > 16)
3473 return 0;
3474
3475 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3476
3477 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3478 classes[0] = X86_64_INTEGERSI_CLASS;
3479 else
3480 classes[0] = X86_64_INTEGER_CLASS;
3481 classes[1] = X86_64_INTEGER_CLASS;
3482 return 1 + (bytes > 8);
3483 }
3484 }
3485
3486 /* Examine the argument and return set number of register required in each
3487 class. Return 0 iff parameter should be passed in memory. */
3488 static int
3489 examine_argument (enum machine_mode mode, tree type, int in_return,
3490 int *int_nregs, int *sse_nregs)
3491 {
3492 enum x86_64_reg_class class[MAX_CLASSES];
3493 int n = classify_argument (mode, type, class, 0);
3494
3495 *int_nregs = 0;
3496 *sse_nregs = 0;
3497 if (!n)
3498 return 0;
3499 for (n--; n >= 0; n--)
3500 switch (class[n])
3501 {
3502 case X86_64_INTEGER_CLASS:
3503 case X86_64_INTEGERSI_CLASS:
3504 (*int_nregs)++;
3505 break;
3506 case X86_64_SSE_CLASS:
3507 case X86_64_SSESF_CLASS:
3508 case X86_64_SSEDF_CLASS:
3509 (*sse_nregs)++;
3510 break;
3511 case X86_64_NO_CLASS:
3512 case X86_64_SSEUP_CLASS:
3513 break;
3514 case X86_64_X87_CLASS:
3515 case X86_64_X87UP_CLASS:
3516 if (!in_return)
3517 return 0;
3518 break;
3519 case X86_64_COMPLEX_X87_CLASS:
3520 return in_return ? 2 : 0;
3521 case X86_64_MEMORY_CLASS:
3522 gcc_unreachable ();
3523 }
3524 return 1;
3525 }
3526
3527 /* Construct container for the argument used by GCC interface. See
3528 FUNCTION_ARG for the detailed description. */
3529
3530 static rtx
3531 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3532 tree type, int in_return, int nintregs, int nsseregs,
3533 const int *intreg, int sse_regno)
3534 {
3535 /* The following variables hold the static issued_error state. */
3536 static bool issued_sse_arg_error;
3537 static bool issued_sse_ret_error;
3538 static bool issued_x87_ret_error;
3539
3540 enum machine_mode tmpmode;
3541 int bytes =
3542 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3543 enum x86_64_reg_class class[MAX_CLASSES];
3544 int n;
3545 int i;
3546 int nexps = 0;
3547 int needed_sseregs, needed_intregs;
3548 rtx exp[MAX_CLASSES];
3549 rtx ret;
3550
3551 n = classify_argument (mode, type, class, 0);
3552 if (!n)
3553 return NULL;
3554 if (!examine_argument (mode, type, in_return, &needed_intregs,
3555 &needed_sseregs))
3556 return NULL;
3557 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3558 return NULL;
3559
3560 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3561 some less clueful developer tries to use floating-point anyway. */
3562 if (needed_sseregs && !TARGET_SSE)
3563 {
3564 if (in_return)
3565 {
3566 if (!issued_sse_ret_error)
3567 {
3568 error ("SSE register return with SSE disabled");
3569 issued_sse_ret_error = true;
3570 }
3571 }
3572 else if (!issued_sse_arg_error)
3573 {
3574 error ("SSE register argument with SSE disabled");
3575 issued_sse_arg_error = true;
3576 }
3577 return NULL;
3578 }
3579
3580 /* Likewise, error if the ABI requires us to return values in the
3581 x87 registers and the user specified -mno-80387. */
3582 if (!TARGET_80387 && in_return)
3583 for (i = 0; i < n; i++)
3584 if (class[i] == X86_64_X87_CLASS
3585 || class[i] == X86_64_X87UP_CLASS
3586 || class[i] == X86_64_COMPLEX_X87_CLASS)
3587 {
3588 if (!issued_x87_ret_error)
3589 {
3590 error ("x87 register return with x87 disabled");
3591 issued_x87_ret_error = true;
3592 }
3593 return NULL;
3594 }
3595
3596 /* First construct simple cases. Avoid SCmode, since we want to use
3597 single register to pass this type. */
3598 if (n == 1 && mode != SCmode)
3599 switch (class[0])
3600 {
3601 case X86_64_INTEGER_CLASS:
3602 case X86_64_INTEGERSI_CLASS:
3603 return gen_rtx_REG (mode, intreg[0]);
3604 case X86_64_SSE_CLASS:
3605 case X86_64_SSESF_CLASS:
3606 case X86_64_SSEDF_CLASS:
3607 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3608 case X86_64_X87_CLASS:
3609 case X86_64_COMPLEX_X87_CLASS:
3610 return gen_rtx_REG (mode, FIRST_STACK_REG);
3611 case X86_64_NO_CLASS:
3612 /* Zero sized array, struct or class. */
3613 return NULL;
3614 default:
3615 gcc_unreachable ();
3616 }
3617 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3618 && mode != BLKmode)
3619 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3620
3621 if (n == 2
3622 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3623 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3624 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3625 && class[1] == X86_64_INTEGER_CLASS
3626 && (mode == CDImode || mode == TImode || mode == TFmode)
3627 && intreg[0] + 1 == intreg[1])
3628 return gen_rtx_REG (mode, intreg[0]);
3629
3630 /* Otherwise figure out the entries of the PARALLEL. */
3631 for (i = 0; i < n; i++)
3632 {
3633 switch (class[i])
3634 {
3635 case X86_64_NO_CLASS:
3636 break;
3637 case X86_64_INTEGER_CLASS:
3638 case X86_64_INTEGERSI_CLASS:
3639 /* Merge TImodes on aligned occasions here too. */
3640 if (i * 8 + 8 > bytes)
3641 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3642 else if (class[i] == X86_64_INTEGERSI_CLASS)
3643 tmpmode = SImode;
3644 else
3645 tmpmode = DImode;
3646 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3647 if (tmpmode == BLKmode)
3648 tmpmode = DImode;
3649 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3650 gen_rtx_REG (tmpmode, *intreg),
3651 GEN_INT (i*8));
3652 intreg++;
3653 break;
3654 case X86_64_SSESF_CLASS:
3655 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3656 gen_rtx_REG (SFmode,
3657 SSE_REGNO (sse_regno)),
3658 GEN_INT (i*8));
3659 sse_regno++;
3660 break;
3661 case X86_64_SSEDF_CLASS:
3662 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3663 gen_rtx_REG (DFmode,
3664 SSE_REGNO (sse_regno)),
3665 GEN_INT (i*8));
3666 sse_regno++;
3667 break;
3668 case X86_64_SSE_CLASS:
3669 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3670 tmpmode = TImode;
3671 else
3672 tmpmode = DImode;
3673 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3674 gen_rtx_REG (tmpmode,
3675 SSE_REGNO (sse_regno)),
3676 GEN_INT (i*8));
3677 if (tmpmode == TImode)
3678 i++;
3679 sse_regno++;
3680 break;
3681 default:
3682 gcc_unreachable ();
3683 }
3684 }
3685
3686 /* Empty aligned struct, union or class. */
3687 if (nexps == 0)
3688 return NULL;
3689
3690 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3691 for (i = 0; i < nexps; i++)
3692 XVECEXP (ret, 0, i) = exp [i];
3693 return ret;
3694 }
3695
3696 /* Update the data in CUM to advance over an argument of mode MODE
3697 and data type TYPE. (TYPE is null for libcalls where that information
3698 may not be available.) */
3699
3700 static void
3701 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3702 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3703 {
3704 switch (mode)
3705 {
3706 default:
3707 break;
3708
3709 case BLKmode:
3710 if (bytes < 0)
3711 break;
3712 /* FALLTHRU */
3713
3714 case DImode:
3715 case SImode:
3716 case HImode:
3717 case QImode:
3718 cum->words += words;
3719 cum->nregs -= words;
3720 cum->regno += words;
3721
3722 if (cum->nregs <= 0)
3723 {
3724 cum->nregs = 0;
3725 cum->regno = 0;
3726 }
3727 break;
3728
3729 case DFmode:
3730 if (cum->float_in_sse < 2)
3731 break;
3732 case SFmode:
3733 if (cum->float_in_sse < 1)
3734 break;
3735 /* FALLTHRU */
3736
3737 case TImode:
3738 case V16QImode:
3739 case V8HImode:
3740 case V4SImode:
3741 case V2DImode:
3742 case V4SFmode:
3743 case V2DFmode:
3744 if (!type || !AGGREGATE_TYPE_P (type))
3745 {
3746 cum->sse_words += words;
3747 cum->sse_nregs -= 1;
3748 cum->sse_regno += 1;
3749 if (cum->sse_nregs <= 0)
3750 {
3751 cum->sse_nregs = 0;
3752 cum->sse_regno = 0;
3753 }
3754 }
3755 break;
3756
3757 case V8QImode:
3758 case V4HImode:
3759 case V2SImode:
3760 case V2SFmode:
3761 if (!type || !AGGREGATE_TYPE_P (type))
3762 {
3763 cum->mmx_words += words;
3764 cum->mmx_nregs -= 1;
3765 cum->mmx_regno += 1;
3766 if (cum->mmx_nregs <= 0)
3767 {
3768 cum->mmx_nregs = 0;
3769 cum->mmx_regno = 0;
3770 }
3771 }
3772 break;
3773 }
3774 }
3775
3776 static void
3777 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3778 tree type, HOST_WIDE_INT words)
3779 {
3780 int int_nregs, sse_nregs;
3781
3782 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3783 cum->words += words;
3784 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3785 {
3786 cum->nregs -= int_nregs;
3787 cum->sse_nregs -= sse_nregs;
3788 cum->regno += int_nregs;
3789 cum->sse_regno += sse_nregs;
3790 }
3791 else
3792 cum->words += words;
3793 }
3794
3795 static void
3796 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3797 HOST_WIDE_INT words)
3798 {
3799 /* Otherwise, this should be passed indirect. */
3800 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3801
3802 cum->words += words;
3803 if (cum->nregs > 0)
3804 {
3805 cum->nregs -= 1;
3806 cum->regno += 1;
3807 }
3808 }
3809
3810 void
3811 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3812 tree type, int named ATTRIBUTE_UNUSED)
3813 {
3814 HOST_WIDE_INT bytes, words;
3815
3816 if (mode == BLKmode)
3817 bytes = int_size_in_bytes (type);
3818 else
3819 bytes = GET_MODE_SIZE (mode);
3820 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3821
3822 if (type)
3823 mode = type_natural_mode (type);
3824
3825 if (TARGET_64BIT_MS_ABI)
3826 function_arg_advance_ms_64 (cum, bytes, words);
3827 else if (TARGET_64BIT)
3828 function_arg_advance_64 (cum, mode, type, words);
3829 else
3830 function_arg_advance_32 (cum, mode, type, bytes, words);
3831 }
3832
3833 /* Define where to put the arguments to a function.
3834 Value is zero to push the argument on the stack,
3835 or a hard register in which to store the argument.
3836
3837 MODE is the argument's machine mode.
3838 TYPE is the data type of the argument (as a tree).
3839 This is null for libcalls where that information may
3840 not be available.
3841 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3842 the preceding args and about the function being called.
3843 NAMED is nonzero if this argument is a named parameter
3844 (otherwise it is an extra parameter matching an ellipsis). */
3845
3846 static rtx
3847 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3848 enum machine_mode orig_mode, tree type,
3849 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3850 {
3851 static bool warnedsse, warnedmmx;
3852
3853 /* Avoid the AL settings for the Unix64 ABI. */
3854 if (mode == VOIDmode)
3855 return constm1_rtx;
3856
3857 switch (mode)
3858 {
3859 default:
3860 break;
3861
3862 case BLKmode:
3863 if (bytes < 0)
3864 break;
3865 /* FALLTHRU */
3866 case DImode:
3867 case SImode:
3868 case HImode:
3869 case QImode:
3870 if (words <= cum->nregs)
3871 {
3872 int regno = cum->regno;
3873
3874 /* Fastcall allocates the first two DWORD (SImode) or
3875 smaller arguments to ECX and EDX. */
3876 if (cum->fastcall)
3877 {
3878 if (mode == BLKmode || mode == DImode)
3879 break;
3880
3881 /* ECX not EAX is the first allocated register. */
3882 if (regno == 0)
3883 regno = 2;
3884 }
3885 return gen_rtx_REG (mode, regno);
3886 }
3887 break;
3888
3889 case DFmode:
3890 if (cum->float_in_sse < 2)
3891 break;
3892 case SFmode:
3893 if (cum->float_in_sse < 1)
3894 break;
3895 /* FALLTHRU */
3896 case TImode:
3897 case V16QImode:
3898 case V8HImode:
3899 case V4SImode:
3900 case V2DImode:
3901 case V4SFmode:
3902 case V2DFmode:
3903 if (!type || !AGGREGATE_TYPE_P (type))
3904 {
3905 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3906 {
3907 warnedsse = true;
3908 warning (0, "SSE vector argument without SSE enabled "
3909 "changes the ABI");
3910 }
3911 if (cum->sse_nregs)
3912 return gen_reg_or_parallel (mode, orig_mode,
3913 cum->sse_regno + FIRST_SSE_REG);
3914 }
3915 break;
3916
3917 case V8QImode:
3918 case V4HImode:
3919 case V2SImode:
3920 case V2SFmode:
3921 if (!type || !AGGREGATE_TYPE_P (type))
3922 {
3923 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3924 {
3925 warnedmmx = true;
3926 warning (0, "MMX vector argument without MMX enabled "
3927 "changes the ABI");
3928 }
3929 if (cum->mmx_nregs)
3930 return gen_reg_or_parallel (mode, orig_mode,
3931 cum->mmx_regno + FIRST_MMX_REG);
3932 }
3933 break;
3934 }
3935
3936 return NULL_RTX;
3937 }
3938
3939 static rtx
3940 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3941 enum machine_mode orig_mode, tree type)
3942 {
3943 /* Handle a hidden AL argument containing number of registers
3944 for varargs x86-64 functions. */
3945 if (mode == VOIDmode)
3946 return GEN_INT (cum->maybe_vaarg
3947 ? (cum->sse_nregs < 0
3948 ? SSE_REGPARM_MAX
3949 : cum->sse_regno)
3950 : -1);
3951
3952 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3953 cum->sse_nregs,
3954 &x86_64_int_parameter_registers [cum->regno],
3955 cum->sse_regno);
3956 }
3957
3958 static rtx
3959 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3960 enum machine_mode orig_mode, int named)
3961 {
3962 unsigned int regno;
3963
3964 /* Avoid the AL settings for the Unix64 ABI. */
3965 if (mode == VOIDmode)
3966 return constm1_rtx;
3967
3968 /* If we've run out of registers, it goes on the stack. */
3969 if (cum->nregs == 0)
3970 return NULL_RTX;
3971
3972 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3973
3974 /* Only floating point modes are passed in anything but integer regs. */
3975 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3976 {
3977 if (named)
3978 regno = cum->regno + FIRST_SSE_REG;
3979 else
3980 {
3981 rtx t1, t2;
3982
3983 /* Unnamed floating parameters are passed in both the
3984 SSE and integer registers. */
3985 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3986 t2 = gen_rtx_REG (mode, regno);
3987 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3988 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3989 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3990 }
3991 }
3992
3993 return gen_reg_or_parallel (mode, orig_mode, regno);
3994 }
3995
3996 rtx
3997 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
3998 tree type, int named)
3999 {
4000 enum machine_mode mode = omode;
4001 HOST_WIDE_INT bytes, words;
4002
4003 if (mode == BLKmode)
4004 bytes = int_size_in_bytes (type);
4005 else
4006 bytes = GET_MODE_SIZE (mode);
4007 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4008
4009 /* To simplify the code below, represent vector types with a vector mode
4010 even if MMX/SSE are not active. */
4011 if (type && TREE_CODE (type) == VECTOR_TYPE)
4012 mode = type_natural_mode (type);
4013
4014 if (TARGET_64BIT_MS_ABI)
4015 return function_arg_ms_64 (cum, mode, omode, named);
4016 else if (TARGET_64BIT)
4017 return function_arg_64 (cum, mode, omode, type);
4018 else
4019 return function_arg_32 (cum, mode, omode, type, bytes, words);
4020 }
4021
4022 /* A C expression that indicates when an argument must be passed by
4023 reference. If nonzero for an argument, a copy of that argument is
4024 made in memory and a pointer to the argument is passed instead of
4025 the argument itself. The pointer is passed in whatever way is
4026 appropriate for passing a pointer to that type. */
4027
4028 static bool
4029 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4030 enum machine_mode mode ATTRIBUTE_UNUSED,
4031 tree type, bool named ATTRIBUTE_UNUSED)
4032 {
4033 if (TARGET_64BIT_MS_ABI)
4034 {
4035 if (type)
4036 {
4037 /* Arrays are passed by reference. */
4038 if (TREE_CODE (type) == ARRAY_TYPE)
4039 return true;
4040
4041 if (AGGREGATE_TYPE_P (type))
4042 {
4043 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4044 are passed by reference. */
4045 int el2 = exact_log2 (int_size_in_bytes (type));
4046 return !(el2 >= 0 && el2 <= 3);
4047 }
4048 }
4049
4050 /* __m128 is passed by reference. */
4051 /* ??? How to handle complex? For now treat them as structs,
4052 and pass them by reference if they're too large. */
4053 if (GET_MODE_SIZE (mode) > 8)
4054 return true;
4055 }
4056 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4057 return 1;
4058
4059 return 0;
4060 }
4061
4062 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4063 ABI. Only called if TARGET_SSE. */
4064 static bool
4065 contains_128bit_aligned_vector_p (tree type)
4066 {
4067 enum machine_mode mode = TYPE_MODE (type);
4068 if (SSE_REG_MODE_P (mode)
4069 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4070 return true;
4071 if (TYPE_ALIGN (type) < 128)
4072 return false;
4073
4074 if (AGGREGATE_TYPE_P (type))
4075 {
4076 /* Walk the aggregates recursively. */
4077 switch (TREE_CODE (type))
4078 {
4079 case RECORD_TYPE:
4080 case UNION_TYPE:
4081 case QUAL_UNION_TYPE:
4082 {
4083 tree field;
4084
4085 /* Walk all the structure fields. */
4086 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4087 {
4088 if (TREE_CODE (field) == FIELD_DECL
4089 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4090 return true;
4091 }
4092 break;
4093 }
4094
4095 case ARRAY_TYPE:
4096 /* Just for use if some languages passes arrays by value. */
4097 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4098 return true;
4099 break;
4100
4101 default:
4102 gcc_unreachable ();
4103 }
4104 }
4105 return false;
4106 }
4107
4108 /* Gives the alignment boundary, in bits, of an argument with the
4109 specified mode and type. */
4110
4111 int
4112 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4113 {
4114 int align;
4115 if (type)
4116 align = TYPE_ALIGN (type);
4117 else
4118 align = GET_MODE_ALIGNMENT (mode);
4119 if (align < PARM_BOUNDARY)
4120 align = PARM_BOUNDARY;
4121 if (!TARGET_64BIT)
4122 {
4123 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4124 make an exception for SSE modes since these require 128bit
4125 alignment.
4126
4127 The handling here differs from field_alignment. ICC aligns MMX
4128 arguments to 4 byte boundaries, while structure fields are aligned
4129 to 8 byte boundaries. */
4130 if (!TARGET_SSE)
4131 align = PARM_BOUNDARY;
4132 else if (!type)
4133 {
4134 if (!SSE_REG_MODE_P (mode))
4135 align = PARM_BOUNDARY;
4136 }
4137 else
4138 {
4139 if (!contains_128bit_aligned_vector_p (type))
4140 align = PARM_BOUNDARY;
4141 }
4142 }
4143 if (align > 128)
4144 align = 128;
4145 return align;
4146 }
4147
4148 /* Return true if N is a possible register number of function value. */
4149
4150 bool
4151 ix86_function_value_regno_p (int regno)
4152 {
4153 switch (regno)
4154 {
4155 case 0:
4156 return true;
4157
4158 case FIRST_FLOAT_REG:
4159 if (TARGET_64BIT_MS_ABI)
4160 return false;
4161 return TARGET_FLOAT_RETURNS_IN_80387;
4162
4163 case FIRST_SSE_REG:
4164 return TARGET_SSE;
4165
4166 case FIRST_MMX_REG:
4167 if (TARGET_MACHO || TARGET_64BIT)
4168 return false;
4169 return TARGET_MMX;
4170 }
4171
4172 return false;
4173 }
4174
4175 /* Define how to find the value returned by a function.
4176 VALTYPE is the data type of the value (as a tree).
4177 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4178 otherwise, FUNC is 0. */
4179
4180 static rtx
4181 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4182 tree fntype, tree fn)
4183 {
4184 unsigned int regno;
4185
4186 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4187 we normally prevent this case when mmx is not available. However
4188 some ABIs may require the result to be returned like DImode. */
4189 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4190 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4191
4192 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4193 we prevent this case when sse is not available. However some ABIs
4194 may require the result to be returned like integer TImode. */
4195 else if (mode == TImode
4196 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4197 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4198
4199 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4200 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4201 regno = FIRST_FLOAT_REG;
4202 else
4203 /* Most things go in %eax. */
4204 regno = 0;
4205
4206 /* Override FP return register with %xmm0 for local functions when
4207 SSE math is enabled or for functions with sseregparm attribute. */
4208 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4209 {
4210 int sse_level = ix86_function_sseregparm (fntype, fn);
4211 if ((sse_level >= 1 && mode == SFmode)
4212 || (sse_level == 2 && mode == DFmode))
4213 regno = FIRST_SSE_REG;
4214 }
4215
4216 return gen_rtx_REG (orig_mode, regno);
4217 }
4218
4219 static rtx
4220 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4221 tree valtype)
4222 {
4223 rtx ret;
4224
4225 /* Handle libcalls, which don't provide a type node. */
4226 if (valtype == NULL)
4227 {
4228 switch (mode)
4229 {
4230 case SFmode:
4231 case SCmode:
4232 case DFmode:
4233 case DCmode:
4234 case TFmode:
4235 case SDmode:
4236 case DDmode:
4237 case TDmode:
4238 return gen_rtx_REG (mode, FIRST_SSE_REG);
4239 case XFmode:
4240 case XCmode:
4241 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4242 case TCmode:
4243 return NULL;
4244 default:
4245 return gen_rtx_REG (mode, 0);
4246 }
4247 }
4248
4249 ret = construct_container (mode, orig_mode, valtype, 1,
4250 REGPARM_MAX, SSE_REGPARM_MAX,
4251 x86_64_int_return_registers, 0);
4252
4253 /* For zero sized structures, construct_container returns NULL, but we
4254 need to keep rest of compiler happy by returning meaningful value. */
4255 if (!ret)
4256 ret = gen_rtx_REG (orig_mode, 0);
4257
4258 return ret;
4259 }
4260
4261 static rtx
4262 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4263 {
4264 unsigned int regno = 0;
4265
4266 if (TARGET_SSE)
4267 {
4268 if (mode == SFmode || mode == DFmode)
4269 regno = FIRST_SSE_REG;
4270 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4271 regno = FIRST_SSE_REG;
4272 }
4273
4274 return gen_rtx_REG (orig_mode, regno);
4275 }
4276
4277 static rtx
4278 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4279 enum machine_mode orig_mode, enum machine_mode mode)
4280 {
4281 tree fn, fntype;
4282
4283 fn = NULL_TREE;
4284 if (fntype_or_decl && DECL_P (fntype_or_decl))
4285 fn = fntype_or_decl;
4286 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4287
4288 if (TARGET_64BIT_MS_ABI)
4289 return function_value_ms_64 (orig_mode, mode);
4290 else if (TARGET_64BIT)
4291 return function_value_64 (orig_mode, mode, valtype);
4292 else
4293 return function_value_32 (orig_mode, mode, fntype, fn);
4294 }
4295
4296 static rtx
4297 ix86_function_value (tree valtype, tree fntype_or_decl,
4298 bool outgoing ATTRIBUTE_UNUSED)
4299 {
4300 enum machine_mode mode, orig_mode;
4301
4302 orig_mode = TYPE_MODE (valtype);
4303 mode = type_natural_mode (valtype);
4304 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4305 }
4306
4307 rtx
4308 ix86_libcall_value (enum machine_mode mode)
4309 {
4310 return ix86_function_value_1 (NULL, NULL, mode, mode);
4311 }
4312
4313 /* Return true iff type is returned in memory. */
4314
4315 static int
4316 return_in_memory_32 (tree type, enum machine_mode mode)
4317 {
4318 HOST_WIDE_INT size;
4319
4320 if (mode == BLKmode)
4321 return 1;
4322
4323 size = int_size_in_bytes (type);
4324
4325 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4326 return 0;
4327
4328 if (VECTOR_MODE_P (mode) || mode == TImode)
4329 {
4330 /* User-created vectors small enough to fit in EAX. */
4331 if (size < 8)
4332 return 0;
4333
4334 /* MMX/3dNow values are returned in MM0,
4335 except when it doesn't exits. */
4336 if (size == 8)
4337 return (TARGET_MMX ? 0 : 1);
4338
4339 /* SSE values are returned in XMM0, except when it doesn't exist. */
4340 if (size == 16)
4341 return (TARGET_SSE ? 0 : 1);
4342 }
4343
4344 if (mode == XFmode)
4345 return 0;
4346
4347 if (mode == TDmode)
4348 return 1;
4349
4350 if (size > 12)
4351 return 1;
4352 return 0;
4353 }
4354
4355 static int
4356 return_in_memory_64 (tree type, enum machine_mode mode)
4357 {
4358 int needed_intregs, needed_sseregs;
4359 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4360 }
4361
4362 static int
4363 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4364 {
4365 HOST_WIDE_INT size = int_size_in_bytes (type);
4366
4367 /* __m128 and friends are returned in xmm0. */
4368 if (size == 16 && VECTOR_MODE_P (mode))
4369 return 0;
4370
4371 /* Otherwise, the size must be exactly in [1248]. */
4372 return (size != 1 && size != 2 && size != 4 && size != 8);
4373 }
4374
4375 int
4376 ix86_return_in_memory (tree type)
4377 {
4378 enum machine_mode mode = type_natural_mode (type);
4379
4380 if (TARGET_64BIT_MS_ABI)
4381 return return_in_memory_ms_64 (type, mode);
4382 else if (TARGET_64BIT)
4383 return return_in_memory_64 (type, mode);
4384 else
4385 return return_in_memory_32 (type, mode);
4386 }
4387
4388 /* Return false iff TYPE is returned in memory. This version is used
4389 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4390 but differs notably in that when MMX is available, 8-byte vectors
4391 are returned in memory, rather than in MMX registers. */
4392
4393 int
4394 ix86_sol10_return_in_memory (tree type)
4395 {
4396 int size;
4397 enum machine_mode mode = type_natural_mode (type);
4398
4399 if (TARGET_64BIT)
4400 return return_in_memory_64 (type, mode);
4401
4402 if (mode == BLKmode)
4403 return 1;
4404
4405 size = int_size_in_bytes (type);
4406
4407 if (VECTOR_MODE_P (mode))
4408 {
4409 /* Return in memory only if MMX registers *are* available. This
4410 seems backwards, but it is consistent with the existing
4411 Solaris x86 ABI. */
4412 if (size == 8)
4413 return TARGET_MMX;
4414 if (size == 16)
4415 return !TARGET_SSE;
4416 }
4417 else if (mode == TImode)
4418 return !TARGET_SSE;
4419 else if (mode == XFmode)
4420 return 0;
4421
4422 return size > 12;
4423 }
4424
4425 /* When returning SSE vector types, we have a choice of either
4426 (1) being abi incompatible with a -march switch, or
4427 (2) generating an error.
4428 Given no good solution, I think the safest thing is one warning.
4429 The user won't be able to use -Werror, but....
4430
4431 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4432 called in response to actually generating a caller or callee that
4433 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4434 via aggregate_value_p for general type probing from tree-ssa. */
4435
4436 static rtx
4437 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4438 {
4439 static bool warnedsse, warnedmmx;
4440
4441 if (!TARGET_64BIT && type)
4442 {
4443 /* Look at the return type of the function, not the function type. */
4444 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4445
4446 if (!TARGET_SSE && !warnedsse)
4447 {
4448 if (mode == TImode
4449 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4450 {
4451 warnedsse = true;
4452 warning (0, "SSE vector return without SSE enabled "
4453 "changes the ABI");
4454 }
4455 }
4456
4457 if (!TARGET_MMX && !warnedmmx)
4458 {
4459 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4460 {
4461 warnedmmx = true;
4462 warning (0, "MMX vector return without MMX enabled "
4463 "changes the ABI");
4464 }
4465 }
4466 }
4467
4468 return NULL;
4469 }
4470
4471 \f
4472 /* Create the va_list data type. */
4473
4474 static tree
4475 ix86_build_builtin_va_list (void)
4476 {
4477 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4478
4479 /* For i386 we use plain pointer to argument area. */
4480 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4481 return build_pointer_type (char_type_node);
4482
4483 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4484 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4485
4486 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4487 unsigned_type_node);
4488 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4489 unsigned_type_node);
4490 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4491 ptr_type_node);
4492 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4493 ptr_type_node);
4494
4495 va_list_gpr_counter_field = f_gpr;
4496 va_list_fpr_counter_field = f_fpr;
4497
4498 DECL_FIELD_CONTEXT (f_gpr) = record;
4499 DECL_FIELD_CONTEXT (f_fpr) = record;
4500 DECL_FIELD_CONTEXT (f_ovf) = record;
4501 DECL_FIELD_CONTEXT (f_sav) = record;
4502
4503 TREE_CHAIN (record) = type_decl;
4504 TYPE_NAME (record) = type_decl;
4505 TYPE_FIELDS (record) = f_gpr;
4506 TREE_CHAIN (f_gpr) = f_fpr;
4507 TREE_CHAIN (f_fpr) = f_ovf;
4508 TREE_CHAIN (f_ovf) = f_sav;
4509
4510 layout_type (record);
4511
4512 /* The correct type is an array type of one element. */
4513 return build_array_type (record, build_index_type (size_zero_node));
4514 }
4515
4516 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4517
4518 static void
4519 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4520 {
4521 rtx save_area, mem;
4522 rtx label;
4523 rtx label_ref;
4524 rtx tmp_reg;
4525 rtx nsse_reg;
4526 int set;
4527 int i;
4528
4529 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4530 return;
4531
4532 /* Indicate to allocate space on the stack for varargs save area. */
4533 ix86_save_varrargs_registers = 1;
4534 cfun->stack_alignment_needed = 128;
4535
4536 save_area = frame_pointer_rtx;
4537 set = get_varargs_alias_set ();
4538
4539 for (i = cum->regno;
4540 i < ix86_regparm
4541 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4542 i++)
4543 {
4544 mem = gen_rtx_MEM (Pmode,
4545 plus_constant (save_area, i * UNITS_PER_WORD));
4546 MEM_NOTRAP_P (mem) = 1;
4547 set_mem_alias_set (mem, set);
4548 emit_move_insn (mem, gen_rtx_REG (Pmode,
4549 x86_64_int_parameter_registers[i]));
4550 }
4551
4552 if (cum->sse_nregs && cfun->va_list_fpr_size)
4553 {
4554 /* Now emit code to save SSE registers. The AX parameter contains number
4555 of SSE parameter registers used to call this function. We use
4556 sse_prologue_save insn template that produces computed jump across
4557 SSE saves. We need some preparation work to get this working. */
4558
4559 label = gen_label_rtx ();
4560 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4561
4562 /* Compute address to jump to :
4563 label - 5*eax + nnamed_sse_arguments*5 */
4564 tmp_reg = gen_reg_rtx (Pmode);
4565 nsse_reg = gen_reg_rtx (Pmode);
4566 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4567 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4568 gen_rtx_MULT (Pmode, nsse_reg,
4569 GEN_INT (4))));
4570 if (cum->sse_regno)
4571 emit_move_insn
4572 (nsse_reg,
4573 gen_rtx_CONST (DImode,
4574 gen_rtx_PLUS (DImode,
4575 label_ref,
4576 GEN_INT (cum->sse_regno * 4))));
4577 else
4578 emit_move_insn (nsse_reg, label_ref);
4579 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4580
4581 /* Compute address of memory block we save into. We always use pointer
4582 pointing 127 bytes after first byte to store - this is needed to keep
4583 instruction size limited by 4 bytes. */
4584 tmp_reg = gen_reg_rtx (Pmode);
4585 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4586 plus_constant (save_area,
4587 8 * REGPARM_MAX + 127)));
4588 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4589 MEM_NOTRAP_P (mem) = 1;
4590 set_mem_alias_set (mem, set);
4591 set_mem_align (mem, BITS_PER_WORD);
4592
4593 /* And finally do the dirty job! */
4594 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4595 GEN_INT (cum->sse_regno), label));
4596 }
4597 }
4598
4599 static void
4600 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4601 {
4602 int set = get_varargs_alias_set ();
4603 int i;
4604
4605 for (i = cum->regno; i < REGPARM_MAX; i++)
4606 {
4607 rtx reg, mem;
4608
4609 mem = gen_rtx_MEM (Pmode,
4610 plus_constant (virtual_incoming_args_rtx,
4611 i * UNITS_PER_WORD));
4612 MEM_NOTRAP_P (mem) = 1;
4613 set_mem_alias_set (mem, set);
4614
4615 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4616 emit_move_insn (mem, reg);
4617 }
4618 }
4619
4620 static void
4621 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4622 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4623 int no_rtl)
4624 {
4625 CUMULATIVE_ARGS next_cum;
4626 tree fntype;
4627 int stdarg_p;
4628
4629 /* This argument doesn't appear to be used anymore. Which is good,
4630 because the old code here didn't suppress rtl generation. */
4631 gcc_assert (!no_rtl);
4632
4633 if (!TARGET_64BIT)
4634 return;
4635
4636 fntype = TREE_TYPE (current_function_decl);
4637 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4638 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4639 != void_type_node));
4640
4641 /* For varargs, we do not want to skip the dummy va_dcl argument.
4642 For stdargs, we do want to skip the last named argument. */
4643 next_cum = *cum;
4644 if (stdarg_p)
4645 function_arg_advance (&next_cum, mode, type, 1);
4646
4647 if (TARGET_64BIT_MS_ABI)
4648 setup_incoming_varargs_ms_64 (&next_cum);
4649 else
4650 setup_incoming_varargs_64 (&next_cum);
4651 }
4652
4653 /* Implement va_start. */
4654
4655 void
4656 ix86_va_start (tree valist, rtx nextarg)
4657 {
4658 HOST_WIDE_INT words, n_gpr, n_fpr;
4659 tree f_gpr, f_fpr, f_ovf, f_sav;
4660 tree gpr, fpr, ovf, sav, t;
4661 tree type;
4662
4663 /* Only 64bit target needs something special. */
4664 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4665 {
4666 std_expand_builtin_va_start (valist, nextarg);
4667 return;
4668 }
4669
4670 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4671 f_fpr = TREE_CHAIN (f_gpr);
4672 f_ovf = TREE_CHAIN (f_fpr);
4673 f_sav = TREE_CHAIN (f_ovf);
4674
4675 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4676 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4677 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4678 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4679 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4680
4681 /* Count number of gp and fp argument registers used. */
4682 words = current_function_args_info.words;
4683 n_gpr = current_function_args_info.regno;
4684 n_fpr = current_function_args_info.sse_regno;
4685
4686 if (cfun->va_list_gpr_size)
4687 {
4688 type = TREE_TYPE (gpr);
4689 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4690 build_int_cst (type, n_gpr * 8));
4691 TREE_SIDE_EFFECTS (t) = 1;
4692 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4693 }
4694
4695 if (cfun->va_list_fpr_size)
4696 {
4697 type = TREE_TYPE (fpr);
4698 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4699 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4700 TREE_SIDE_EFFECTS (t) = 1;
4701 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4702 }
4703
4704 /* Find the overflow area. */
4705 type = TREE_TYPE (ovf);
4706 t = make_tree (type, virtual_incoming_args_rtx);
4707 if (words != 0)
4708 t = build2 (PLUS_EXPR, type, t,
4709 build_int_cst (type, words * UNITS_PER_WORD));
4710 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4711 TREE_SIDE_EFFECTS (t) = 1;
4712 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4713
4714 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4715 {
4716 /* Find the register save area.
4717 Prologue of the function save it right above stack frame. */
4718 type = TREE_TYPE (sav);
4719 t = make_tree (type, frame_pointer_rtx);
4720 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4721 TREE_SIDE_EFFECTS (t) = 1;
4722 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4723 }
4724 }
4725
4726 /* Implement va_arg. */
4727
4728 static tree
4729 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4730 {
4731 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4732 tree f_gpr, f_fpr, f_ovf, f_sav;
4733 tree gpr, fpr, ovf, sav, t;
4734 int size, rsize;
4735 tree lab_false, lab_over = NULL_TREE;
4736 tree addr, t2;
4737 rtx container;
4738 int indirect_p = 0;
4739 tree ptrtype;
4740 enum machine_mode nat_mode;
4741
4742 /* Only 64bit target needs something special. */
4743 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4744 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4745
4746 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4747 f_fpr = TREE_CHAIN (f_gpr);
4748 f_ovf = TREE_CHAIN (f_fpr);
4749 f_sav = TREE_CHAIN (f_ovf);
4750
4751 valist = build_va_arg_indirect_ref (valist);
4752 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4753 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4754 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4755 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4756
4757 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4758 if (indirect_p)
4759 type = build_pointer_type (type);
4760 size = int_size_in_bytes (type);
4761 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4762
4763 nat_mode = type_natural_mode (type);
4764 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4765 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4766
4767 /* Pull the value out of the saved registers. */
4768
4769 addr = create_tmp_var (ptr_type_node, "addr");
4770 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4771
4772 if (container)
4773 {
4774 int needed_intregs, needed_sseregs;
4775 bool need_temp;
4776 tree int_addr, sse_addr;
4777
4778 lab_false = create_artificial_label ();
4779 lab_over = create_artificial_label ();
4780
4781 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4782
4783 need_temp = (!REG_P (container)
4784 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4785 || TYPE_ALIGN (type) > 128));
4786
4787 /* In case we are passing structure, verify that it is consecutive block
4788 on the register save area. If not we need to do moves. */
4789 if (!need_temp && !REG_P (container))
4790 {
4791 /* Verify that all registers are strictly consecutive */
4792 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4793 {
4794 int i;
4795
4796 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4797 {
4798 rtx slot = XVECEXP (container, 0, i);
4799 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4800 || INTVAL (XEXP (slot, 1)) != i * 16)
4801 need_temp = 1;
4802 }
4803 }
4804 else
4805 {
4806 int i;
4807
4808 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4809 {
4810 rtx slot = XVECEXP (container, 0, i);
4811 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4812 || INTVAL (XEXP (slot, 1)) != i * 8)
4813 need_temp = 1;
4814 }
4815 }
4816 }
4817 if (!need_temp)
4818 {
4819 int_addr = addr;
4820 sse_addr = addr;
4821 }
4822 else
4823 {
4824 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4825 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4826 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4827 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4828 }
4829
4830 /* First ensure that we fit completely in registers. */
4831 if (needed_intregs)
4832 {
4833 t = build_int_cst (TREE_TYPE (gpr),
4834 (REGPARM_MAX - needed_intregs + 1) * 8);
4835 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4836 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4837 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4838 gimplify_and_add (t, pre_p);
4839 }
4840 if (needed_sseregs)
4841 {
4842 t = build_int_cst (TREE_TYPE (fpr),
4843 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4844 + REGPARM_MAX * 8);
4845 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4846 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4847 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4848 gimplify_and_add (t, pre_p);
4849 }
4850
4851 /* Compute index to start of area used for integer regs. */
4852 if (needed_intregs)
4853 {
4854 /* int_addr = gpr + sav; */
4855 t = fold_convert (ptr_type_node, gpr);
4856 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4857 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4858 gimplify_and_add (t, pre_p);
4859 }
4860 if (needed_sseregs)
4861 {
4862 /* sse_addr = fpr + sav; */
4863 t = fold_convert (ptr_type_node, fpr);
4864 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4865 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4866 gimplify_and_add (t, pre_p);
4867 }
4868 if (need_temp)
4869 {
4870 int i;
4871 tree temp = create_tmp_var (type, "va_arg_tmp");
4872
4873 /* addr = &temp; */
4874 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4875 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4876 gimplify_and_add (t, pre_p);
4877
4878 for (i = 0; i < XVECLEN (container, 0); i++)
4879 {
4880 rtx slot = XVECEXP (container, 0, i);
4881 rtx reg = XEXP (slot, 0);
4882 enum machine_mode mode = GET_MODE (reg);
4883 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4884 tree addr_type = build_pointer_type (piece_type);
4885 tree src_addr, src;
4886 int src_offset;
4887 tree dest_addr, dest;
4888
4889 if (SSE_REGNO_P (REGNO (reg)))
4890 {
4891 src_addr = sse_addr;
4892 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4893 }
4894 else
4895 {
4896 src_addr = int_addr;
4897 src_offset = REGNO (reg) * 8;
4898 }
4899 src_addr = fold_convert (addr_type, src_addr);
4900 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4901 size_int (src_offset));
4902 src = build_va_arg_indirect_ref (src_addr);
4903
4904 dest_addr = fold_convert (addr_type, addr);
4905 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4906 size_int (INTVAL (XEXP (slot, 1))));
4907 dest = build_va_arg_indirect_ref (dest_addr);
4908
4909 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4910 gimplify_and_add (t, pre_p);
4911 }
4912 }
4913
4914 if (needed_intregs)
4915 {
4916 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4917 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4918 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4919 gimplify_and_add (t, pre_p);
4920 }
4921 if (needed_sseregs)
4922 {
4923 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4924 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4925 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4926 gimplify_and_add (t, pre_p);
4927 }
4928
4929 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4930 gimplify_and_add (t, pre_p);
4931
4932 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4933 append_to_statement_list (t, pre_p);
4934 }
4935
4936 /* ... otherwise out of the overflow area. */
4937
4938 /* Care for on-stack alignment if needed. */
4939 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4940 || integer_zerop (TYPE_SIZE (type)))
4941 t = ovf;
4942 else
4943 {
4944 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4945 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4946 build_int_cst (TREE_TYPE (ovf), align - 1));
4947 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4948 build_int_cst (TREE_TYPE (t), -align));
4949 }
4950 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4951
4952 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4953 gimplify_and_add (t2, pre_p);
4954
4955 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4956 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4957 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4958 gimplify_and_add (t, pre_p);
4959
4960 if (container)
4961 {
4962 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4963 append_to_statement_list (t, pre_p);
4964 }
4965
4966 ptrtype = build_pointer_type (type);
4967 addr = fold_convert (ptrtype, addr);
4968
4969 if (indirect_p)
4970 addr = build_va_arg_indirect_ref (addr);
4971 return build_va_arg_indirect_ref (addr);
4972 }
4973 \f
4974 /* Return nonzero if OPNUM's MEM should be matched
4975 in movabs* patterns. */
4976
4977 int
4978 ix86_check_movabs (rtx insn, int opnum)
4979 {
4980 rtx set, mem;
4981
4982 set = PATTERN (insn);
4983 if (GET_CODE (set) == PARALLEL)
4984 set = XVECEXP (set, 0, 0);
4985 gcc_assert (GET_CODE (set) == SET);
4986 mem = XEXP (set, opnum);
4987 while (GET_CODE (mem) == SUBREG)
4988 mem = SUBREG_REG (mem);
4989 gcc_assert (MEM_P (mem));
4990 return (volatile_ok || !MEM_VOLATILE_P (mem));
4991 }
4992 \f
4993 /* Initialize the table of extra 80387 mathematical constants. */
4994
4995 static void
4996 init_ext_80387_constants (void)
4997 {
4998 static const char * cst[5] =
4999 {
5000 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5001 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5002 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5003 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5004 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5005 };
5006 int i;
5007
5008 for (i = 0; i < 5; i++)
5009 {
5010 real_from_string (&ext_80387_constants_table[i], cst[i]);
5011 /* Ensure each constant is rounded to XFmode precision. */
5012 real_convert (&ext_80387_constants_table[i],
5013 XFmode, &ext_80387_constants_table[i]);
5014 }
5015
5016 ext_80387_constants_init = 1;
5017 }
5018
5019 /* Return true if the constant is something that can be loaded with
5020 a special instruction. */
5021
5022 int
5023 standard_80387_constant_p (rtx x)
5024 {
5025 enum machine_mode mode = GET_MODE (x);
5026
5027 REAL_VALUE_TYPE r;
5028
5029 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5030 return -1;
5031
5032 if (x == CONST0_RTX (mode))
5033 return 1;
5034 if (x == CONST1_RTX (mode))
5035 return 2;
5036
5037 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5038
5039 /* For XFmode constants, try to find a special 80387 instruction when
5040 optimizing for size or on those CPUs that benefit from them. */
5041 if (mode == XFmode
5042 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5043 {
5044 int i;
5045
5046 if (! ext_80387_constants_init)
5047 init_ext_80387_constants ();
5048
5049 for (i = 0; i < 5; i++)
5050 if (real_identical (&r, &ext_80387_constants_table[i]))
5051 return i + 3;
5052 }
5053
5054 /* Load of the constant -0.0 or -1.0 will be split as
5055 fldz;fchs or fld1;fchs sequence. */
5056 if (real_isnegzero (&r))
5057 return 8;
5058 if (real_identical (&r, &dconstm1))
5059 return 9;
5060
5061 return 0;
5062 }
5063
5064 /* Return the opcode of the special instruction to be used to load
5065 the constant X. */
5066
5067 const char *
5068 standard_80387_constant_opcode (rtx x)
5069 {
5070 switch (standard_80387_constant_p (x))
5071 {
5072 case 1:
5073 return "fldz";
5074 case 2:
5075 return "fld1";
5076 case 3:
5077 return "fldlg2";
5078 case 4:
5079 return "fldln2";
5080 case 5:
5081 return "fldl2e";
5082 case 6:
5083 return "fldl2t";
5084 case 7:
5085 return "fldpi";
5086 case 8:
5087 case 9:
5088 return "#";
5089 default:
5090 gcc_unreachable ();
5091 }
5092 }
5093
5094 /* Return the CONST_DOUBLE representing the 80387 constant that is
5095 loaded by the specified special instruction. The argument IDX
5096 matches the return value from standard_80387_constant_p. */
5097
5098 rtx
5099 standard_80387_constant_rtx (int idx)
5100 {
5101 int i;
5102
5103 if (! ext_80387_constants_init)
5104 init_ext_80387_constants ();
5105
5106 switch (idx)
5107 {
5108 case 3:
5109 case 4:
5110 case 5:
5111 case 6:
5112 case 7:
5113 i = idx - 3;
5114 break;
5115
5116 default:
5117 gcc_unreachable ();
5118 }
5119
5120 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5121 XFmode);
5122 }
5123
5124 /* Return 1 if mode is a valid mode for sse. */
5125 static int
5126 standard_sse_mode_p (enum machine_mode mode)
5127 {
5128 switch (mode)
5129 {
5130 case V16QImode:
5131 case V8HImode:
5132 case V4SImode:
5133 case V2DImode:
5134 case V4SFmode:
5135 case V2DFmode:
5136 return 1;
5137
5138 default:
5139 return 0;
5140 }
5141 }
5142
5143 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5144 */
5145 int
5146 standard_sse_constant_p (rtx x)
5147 {
5148 enum machine_mode mode = GET_MODE (x);
5149
5150 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5151 return 1;
5152 if (vector_all_ones_operand (x, mode)
5153 && standard_sse_mode_p (mode))
5154 return TARGET_SSE2 ? 2 : -1;
5155
5156 return 0;
5157 }
5158
5159 /* Return the opcode of the special instruction to be used to load
5160 the constant X. */
5161
5162 const char *
5163 standard_sse_constant_opcode (rtx insn, rtx x)
5164 {
5165 switch (standard_sse_constant_p (x))
5166 {
5167 case 1:
5168 if (get_attr_mode (insn) == MODE_V4SF)
5169 return "xorps\t%0, %0";
5170 else if (get_attr_mode (insn) == MODE_V2DF)
5171 return "xorpd\t%0, %0";
5172 else
5173 return "pxor\t%0, %0";
5174 case 2:
5175 return "pcmpeqd\t%0, %0";
5176 }
5177 gcc_unreachable ();
5178 }
5179
5180 /* Returns 1 if OP contains a symbol reference */
5181
5182 int
5183 symbolic_reference_mentioned_p (rtx op)
5184 {
5185 const char *fmt;
5186 int i;
5187
5188 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5189 return 1;
5190
5191 fmt = GET_RTX_FORMAT (GET_CODE (op));
5192 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5193 {
5194 if (fmt[i] == 'E')
5195 {
5196 int j;
5197
5198 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5199 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5200 return 1;
5201 }
5202
5203 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5204 return 1;
5205 }
5206
5207 return 0;
5208 }
5209
5210 /* Return 1 if it is appropriate to emit `ret' instructions in the
5211 body of a function. Do this only if the epilogue is simple, needing a
5212 couple of insns. Prior to reloading, we can't tell how many registers
5213 must be saved, so return 0 then. Return 0 if there is no frame
5214 marker to de-allocate. */
5215
5216 int
5217 ix86_can_use_return_insn_p (void)
5218 {
5219 struct ix86_frame frame;
5220
5221 if (! reload_completed || frame_pointer_needed)
5222 return 0;
5223
5224 /* Don't allow more than 32 pop, since that's all we can do
5225 with one instruction. */
5226 if (current_function_pops_args
5227 && current_function_args_size >= 32768)
5228 return 0;
5229
5230 ix86_compute_frame_layout (&frame);
5231 return frame.to_allocate == 0 && frame.nregs == 0;
5232 }
5233 \f
5234 /* Value should be nonzero if functions must have frame pointers.
5235 Zero means the frame pointer need not be set up (and parms may
5236 be accessed via the stack pointer) in functions that seem suitable. */
5237
5238 int
5239 ix86_frame_pointer_required (void)
5240 {
5241 /* If we accessed previous frames, then the generated code expects
5242 to be able to access the saved ebp value in our frame. */
5243 if (cfun->machine->accesses_prev_frame)
5244 return 1;
5245
5246 /* Several x86 os'es need a frame pointer for other reasons,
5247 usually pertaining to setjmp. */
5248 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5249 return 1;
5250
5251 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5252 the frame pointer by default. Turn it back on now if we've not
5253 got a leaf function. */
5254 if (TARGET_OMIT_LEAF_FRAME_POINTER
5255 && (!current_function_is_leaf
5256 || ix86_current_function_calls_tls_descriptor))
5257 return 1;
5258
5259 if (current_function_profile)
5260 return 1;
5261
5262 return 0;
5263 }
5264
5265 /* Record that the current function accesses previous call frames. */
5266
5267 void
5268 ix86_setup_frame_addresses (void)
5269 {
5270 cfun->machine->accesses_prev_frame = 1;
5271 }
5272 \f
5273 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5274 # define USE_HIDDEN_LINKONCE 1
5275 #else
5276 # define USE_HIDDEN_LINKONCE 0
5277 #endif
5278
5279 static int pic_labels_used;
5280
5281 /* Fills in the label name that should be used for a pc thunk for
5282 the given register. */
5283
5284 static void
5285 get_pc_thunk_name (char name[32], unsigned int regno)
5286 {
5287 gcc_assert (!TARGET_64BIT);
5288
5289 if (USE_HIDDEN_LINKONCE)
5290 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5291 else
5292 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5293 }
5294
5295
5296 /* This function generates code for -fpic that loads %ebx with
5297 the return address of the caller and then returns. */
5298
5299 void
5300 ix86_file_end (void)
5301 {
5302 rtx xops[2];
5303 int regno;
5304
5305 for (regno = 0; regno < 8; ++regno)
5306 {
5307 char name[32];
5308
5309 if (! ((pic_labels_used >> regno) & 1))
5310 continue;
5311
5312 get_pc_thunk_name (name, regno);
5313
5314 #if TARGET_MACHO
5315 if (TARGET_MACHO)
5316 {
5317 switch_to_section (darwin_sections[text_coal_section]);
5318 fputs ("\t.weak_definition\t", asm_out_file);
5319 assemble_name (asm_out_file, name);
5320 fputs ("\n\t.private_extern\t", asm_out_file);
5321 assemble_name (asm_out_file, name);
5322 fputs ("\n", asm_out_file);
5323 ASM_OUTPUT_LABEL (asm_out_file, name);
5324 }
5325 else
5326 #endif
5327 if (USE_HIDDEN_LINKONCE)
5328 {
5329 tree decl;
5330
5331 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5332 error_mark_node);
5333 TREE_PUBLIC (decl) = 1;
5334 TREE_STATIC (decl) = 1;
5335 DECL_ONE_ONLY (decl) = 1;
5336
5337 (*targetm.asm_out.unique_section) (decl, 0);
5338 switch_to_section (get_named_section (decl, NULL, 0));
5339
5340 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5341 fputs ("\t.hidden\t", asm_out_file);
5342 assemble_name (asm_out_file, name);
5343 fputc ('\n', asm_out_file);
5344 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5345 }
5346 else
5347 {
5348 switch_to_section (text_section);
5349 ASM_OUTPUT_LABEL (asm_out_file, name);
5350 }
5351
5352 xops[0] = gen_rtx_REG (SImode, regno);
5353 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5354 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5355 output_asm_insn ("ret", xops);
5356 }
5357
5358 if (NEED_INDICATE_EXEC_STACK)
5359 file_end_indicate_exec_stack ();
5360 }
5361
5362 /* Emit code for the SET_GOT patterns. */
5363
5364 const char *
5365 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5366 {
5367 rtx xops[3];
5368
5369 xops[0] = dest;
5370
5371 if (TARGET_VXWORKS_RTP && flag_pic)
5372 {
5373 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5374 xops[2] = gen_rtx_MEM (Pmode,
5375 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5376 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5377
5378 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5379 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5380 an unadorned address. */
5381 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5382 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5383 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5384 return "";
5385 }
5386
5387 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5388
5389 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5390 {
5391 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5392
5393 if (!flag_pic)
5394 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5395 else
5396 output_asm_insn ("call\t%a2", xops);
5397
5398 #if TARGET_MACHO
5399 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5400 is what will be referenced by the Mach-O PIC subsystem. */
5401 if (!label)
5402 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5403 #endif
5404
5405 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5406 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5407
5408 if (flag_pic)
5409 output_asm_insn ("pop{l}\t%0", xops);
5410 }
5411 else
5412 {
5413 char name[32];
5414 get_pc_thunk_name (name, REGNO (dest));
5415 pic_labels_used |= 1 << REGNO (dest);
5416
5417 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5418 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5419 output_asm_insn ("call\t%X2", xops);
5420 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5421 is what will be referenced by the Mach-O PIC subsystem. */
5422 #if TARGET_MACHO
5423 if (!label)
5424 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5425 else
5426 targetm.asm_out.internal_label (asm_out_file, "L",
5427 CODE_LABEL_NUMBER (label));
5428 #endif
5429 }
5430
5431 if (TARGET_MACHO)
5432 return "";
5433
5434 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5435 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5436 else
5437 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5438
5439 return "";
5440 }
5441
5442 /* Generate an "push" pattern for input ARG. */
5443
5444 static rtx
5445 gen_push (rtx arg)
5446 {
5447 return gen_rtx_SET (VOIDmode,
5448 gen_rtx_MEM (Pmode,
5449 gen_rtx_PRE_DEC (Pmode,
5450 stack_pointer_rtx)),
5451 arg);
5452 }
5453
5454 /* Return >= 0 if there is an unused call-clobbered register available
5455 for the entire function. */
5456
5457 static unsigned int
5458 ix86_select_alt_pic_regnum (void)
5459 {
5460 if (current_function_is_leaf && !current_function_profile
5461 && !ix86_current_function_calls_tls_descriptor)
5462 {
5463 int i;
5464 for (i = 2; i >= 0; --i)
5465 if (!regs_ever_live[i])
5466 return i;
5467 }
5468
5469 return INVALID_REGNUM;
5470 }
5471
5472 /* Return 1 if we need to save REGNO. */
5473 static int
5474 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5475 {
5476 if (pic_offset_table_rtx
5477 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5478 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5479 || current_function_profile
5480 || current_function_calls_eh_return
5481 || current_function_uses_const_pool))
5482 {
5483 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5484 return 0;
5485 return 1;
5486 }
5487
5488 if (current_function_calls_eh_return && maybe_eh_return)
5489 {
5490 unsigned i;
5491 for (i = 0; ; i++)
5492 {
5493 unsigned test = EH_RETURN_DATA_REGNO (i);
5494 if (test == INVALID_REGNUM)
5495 break;
5496 if (test == regno)
5497 return 1;
5498 }
5499 }
5500
5501 if (cfun->machine->force_align_arg_pointer
5502 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5503 return 1;
5504
5505 return (regs_ever_live[regno]
5506 && !call_used_regs[regno]
5507 && !fixed_regs[regno]
5508 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5509 }
5510
5511 /* Return number of registers to be saved on the stack. */
5512
5513 static int
5514 ix86_nsaved_regs (void)
5515 {
5516 int nregs = 0;
5517 int regno;
5518
5519 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5520 if (ix86_save_reg (regno, true))
5521 nregs++;
5522 return nregs;
5523 }
5524
5525 /* Return the offset between two registers, one to be eliminated, and the other
5526 its replacement, at the start of a routine. */
5527
5528 HOST_WIDE_INT
5529 ix86_initial_elimination_offset (int from, int to)
5530 {
5531 struct ix86_frame frame;
5532 ix86_compute_frame_layout (&frame);
5533
5534 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5535 return frame.hard_frame_pointer_offset;
5536 else if (from == FRAME_POINTER_REGNUM
5537 && to == HARD_FRAME_POINTER_REGNUM)
5538 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5539 else
5540 {
5541 gcc_assert (to == STACK_POINTER_REGNUM);
5542
5543 if (from == ARG_POINTER_REGNUM)
5544 return frame.stack_pointer_offset;
5545
5546 gcc_assert (from == FRAME_POINTER_REGNUM);
5547 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5548 }
5549 }
5550
5551 /* Fill structure ix86_frame about frame of currently computed function. */
5552
5553 static void
5554 ix86_compute_frame_layout (struct ix86_frame *frame)
5555 {
5556 HOST_WIDE_INT total_size;
5557 unsigned int stack_alignment_needed;
5558 HOST_WIDE_INT offset;
5559 unsigned int preferred_alignment;
5560 HOST_WIDE_INT size = get_frame_size ();
5561
5562 frame->nregs = ix86_nsaved_regs ();
5563 total_size = size;
5564
5565 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5566 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5567
5568 /* During reload iteration the amount of registers saved can change.
5569 Recompute the value as needed. Do not recompute when amount of registers
5570 didn't change as reload does multiple calls to the function and does not
5571 expect the decision to change within single iteration. */
5572 if (!optimize_size
5573 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5574 {
5575 int count = frame->nregs;
5576
5577 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5578 /* The fast prologue uses move instead of push to save registers. This
5579 is significantly longer, but also executes faster as modern hardware
5580 can execute the moves in parallel, but can't do that for push/pop.
5581
5582 Be careful about choosing what prologue to emit: When function takes
5583 many instructions to execute we may use slow version as well as in
5584 case function is known to be outside hot spot (this is known with
5585 feedback only). Weight the size of function by number of registers
5586 to save as it is cheap to use one or two push instructions but very
5587 slow to use many of them. */
5588 if (count)
5589 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5590 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5591 || (flag_branch_probabilities
5592 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5593 cfun->machine->use_fast_prologue_epilogue = false;
5594 else
5595 cfun->machine->use_fast_prologue_epilogue
5596 = !expensive_function_p (count);
5597 }
5598 if (TARGET_PROLOGUE_USING_MOVE
5599 && cfun->machine->use_fast_prologue_epilogue)
5600 frame->save_regs_using_mov = true;
5601 else
5602 frame->save_regs_using_mov = false;
5603
5604
5605 /* Skip return address and saved base pointer. */
5606 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5607
5608 frame->hard_frame_pointer_offset = offset;
5609
5610 /* Do some sanity checking of stack_alignment_needed and
5611 preferred_alignment, since i386 port is the only using those features
5612 that may break easily. */
5613
5614 gcc_assert (!size || stack_alignment_needed);
5615 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5616 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5617 gcc_assert (stack_alignment_needed
5618 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5619
5620 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5621 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5622
5623 /* Register save area */
5624 offset += frame->nregs * UNITS_PER_WORD;
5625
5626 /* Va-arg area */
5627 if (ix86_save_varrargs_registers)
5628 {
5629 offset += X86_64_VARARGS_SIZE;
5630 frame->va_arg_size = X86_64_VARARGS_SIZE;
5631 }
5632 else
5633 frame->va_arg_size = 0;
5634
5635 /* Align start of frame for local function. */
5636 frame->padding1 = ((offset + stack_alignment_needed - 1)
5637 & -stack_alignment_needed) - offset;
5638
5639 offset += frame->padding1;
5640
5641 /* Frame pointer points here. */
5642 frame->frame_pointer_offset = offset;
5643
5644 offset += size;
5645
5646 /* Add outgoing arguments area. Can be skipped if we eliminated
5647 all the function calls as dead code.
5648 Skipping is however impossible when function calls alloca. Alloca
5649 expander assumes that last current_function_outgoing_args_size
5650 of stack frame are unused. */
5651 if (ACCUMULATE_OUTGOING_ARGS
5652 && (!current_function_is_leaf || current_function_calls_alloca
5653 || ix86_current_function_calls_tls_descriptor))
5654 {
5655 offset += current_function_outgoing_args_size;
5656 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5657 }
5658 else
5659 frame->outgoing_arguments_size = 0;
5660
5661 /* Align stack boundary. Only needed if we're calling another function
5662 or using alloca. */
5663 if (!current_function_is_leaf || current_function_calls_alloca
5664 || ix86_current_function_calls_tls_descriptor)
5665 frame->padding2 = ((offset + preferred_alignment - 1)
5666 & -preferred_alignment) - offset;
5667 else
5668 frame->padding2 = 0;
5669
5670 offset += frame->padding2;
5671
5672 /* We've reached end of stack frame. */
5673 frame->stack_pointer_offset = offset;
5674
5675 /* Size prologue needs to allocate. */
5676 frame->to_allocate =
5677 (size + frame->padding1 + frame->padding2
5678 + frame->outgoing_arguments_size + frame->va_arg_size);
5679
5680 if ((!frame->to_allocate && frame->nregs <= 1)
5681 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5682 frame->save_regs_using_mov = false;
5683
5684 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5685 && current_function_is_leaf
5686 && !ix86_current_function_calls_tls_descriptor)
5687 {
5688 frame->red_zone_size = frame->to_allocate;
5689 if (frame->save_regs_using_mov)
5690 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5691 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5692 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5693 }
5694 else
5695 frame->red_zone_size = 0;
5696 frame->to_allocate -= frame->red_zone_size;
5697 frame->stack_pointer_offset -= frame->red_zone_size;
5698 #if 0
5699 fprintf (stderr, "\n");
5700 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5701 fprintf (stderr, "size: %ld\n", (long)size);
5702 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5703 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5704 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5705 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5706 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5707 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5708 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5709 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5710 (long)frame->hard_frame_pointer_offset);
5711 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5712 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5713 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5714 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5715 #endif
5716 }
5717
5718 /* Emit code to save registers in the prologue. */
5719
5720 static void
5721 ix86_emit_save_regs (void)
5722 {
5723 unsigned int regno;
5724 rtx insn;
5725
5726 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5727 if (ix86_save_reg (regno, true))
5728 {
5729 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5730 RTX_FRAME_RELATED_P (insn) = 1;
5731 }
5732 }
5733
5734 /* Emit code to save registers using MOV insns. First register
5735 is restored from POINTER + OFFSET. */
5736 static void
5737 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5738 {
5739 unsigned int regno;
5740 rtx insn;
5741
5742 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5743 if (ix86_save_reg (regno, true))
5744 {
5745 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5746 Pmode, offset),
5747 gen_rtx_REG (Pmode, regno));
5748 RTX_FRAME_RELATED_P (insn) = 1;
5749 offset += UNITS_PER_WORD;
5750 }
5751 }
5752
5753 /* Expand prologue or epilogue stack adjustment.
5754 The pattern exist to put a dependency on all ebp-based memory accesses.
5755 STYLE should be negative if instructions should be marked as frame related,
5756 zero if %r11 register is live and cannot be freely used and positive
5757 otherwise. */
5758
5759 static void
5760 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5761 {
5762 rtx insn;
5763
5764 if (! TARGET_64BIT)
5765 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5766 else if (x86_64_immediate_operand (offset, DImode))
5767 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5768 else
5769 {
5770 rtx r11;
5771 /* r11 is used by indirect sibcall return as well, set before the
5772 epilogue and used after the epilogue. ATM indirect sibcall
5773 shouldn't be used together with huge frame sizes in one
5774 function because of the frame_size check in sibcall.c. */
5775 gcc_assert (style);
5776 r11 = gen_rtx_REG (DImode, R11_REG);
5777 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5778 if (style < 0)
5779 RTX_FRAME_RELATED_P (insn) = 1;
5780 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5781 offset));
5782 }
5783 if (style < 0)
5784 RTX_FRAME_RELATED_P (insn) = 1;
5785 }
5786
5787 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5788
5789 static rtx
5790 ix86_internal_arg_pointer (void)
5791 {
5792 bool has_force_align_arg_pointer =
5793 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5794 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5795 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5796 && DECL_NAME (current_function_decl)
5797 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5798 && DECL_FILE_SCOPE_P (current_function_decl))
5799 || ix86_force_align_arg_pointer
5800 || has_force_align_arg_pointer)
5801 {
5802 /* Nested functions can't realign the stack due to a register
5803 conflict. */
5804 if (DECL_CONTEXT (current_function_decl)
5805 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5806 {
5807 if (ix86_force_align_arg_pointer)
5808 warning (0, "-mstackrealign ignored for nested functions");
5809 if (has_force_align_arg_pointer)
5810 error ("%s not supported for nested functions",
5811 ix86_force_align_arg_pointer_string);
5812 return virtual_incoming_args_rtx;
5813 }
5814 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5815 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5816 }
5817 else
5818 return virtual_incoming_args_rtx;
5819 }
5820
5821 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5822 This is called from dwarf2out.c to emit call frame instructions
5823 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5824 static void
5825 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5826 {
5827 rtx unspec = SET_SRC (pattern);
5828 gcc_assert (GET_CODE (unspec) == UNSPEC);
5829
5830 switch (index)
5831 {
5832 case UNSPEC_REG_SAVE:
5833 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5834 SET_DEST (pattern));
5835 break;
5836 case UNSPEC_DEF_CFA:
5837 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5838 INTVAL (XVECEXP (unspec, 0, 0)));
5839 break;
5840 default:
5841 gcc_unreachable ();
5842 }
5843 }
5844
5845 /* Expand the prologue into a bunch of separate insns. */
5846
5847 void
5848 ix86_expand_prologue (void)
5849 {
5850 rtx insn;
5851 bool pic_reg_used;
5852 struct ix86_frame frame;
5853 HOST_WIDE_INT allocate;
5854
5855 ix86_compute_frame_layout (&frame);
5856
5857 if (cfun->machine->force_align_arg_pointer)
5858 {
5859 rtx x, y;
5860
5861 /* Grab the argument pointer. */
5862 x = plus_constant (stack_pointer_rtx, 4);
5863 y = cfun->machine->force_align_arg_pointer;
5864 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5865 RTX_FRAME_RELATED_P (insn) = 1;
5866
5867 /* The unwind info consists of two parts: install the fafp as the cfa,
5868 and record the fafp as the "save register" of the stack pointer.
5869 The later is there in order that the unwinder can see where it
5870 should restore the stack pointer across the and insn. */
5871 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5872 x = gen_rtx_SET (VOIDmode, y, x);
5873 RTX_FRAME_RELATED_P (x) = 1;
5874 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5875 UNSPEC_REG_SAVE);
5876 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5877 RTX_FRAME_RELATED_P (y) = 1;
5878 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5879 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5880 REG_NOTES (insn) = x;
5881
5882 /* Align the stack. */
5883 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5884 GEN_INT (-16)));
5885
5886 /* And here we cheat like madmen with the unwind info. We force the
5887 cfa register back to sp+4, which is exactly what it was at the
5888 start of the function. Re-pushing the return address results in
5889 the return at the same spot relative to the cfa, and thus is
5890 correct wrt the unwind info. */
5891 x = cfun->machine->force_align_arg_pointer;
5892 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5893 insn = emit_insn (gen_push (x));
5894 RTX_FRAME_RELATED_P (insn) = 1;
5895
5896 x = GEN_INT (4);
5897 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5898 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5899 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5900 REG_NOTES (insn) = x;
5901 }
5902
5903 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5904 slower on all targets. Also sdb doesn't like it. */
5905
5906 if (frame_pointer_needed)
5907 {
5908 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5909 RTX_FRAME_RELATED_P (insn) = 1;
5910
5911 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5912 RTX_FRAME_RELATED_P (insn) = 1;
5913 }
5914
5915 allocate = frame.to_allocate;
5916
5917 if (!frame.save_regs_using_mov)
5918 ix86_emit_save_regs ();
5919 else
5920 allocate += frame.nregs * UNITS_PER_WORD;
5921
5922 /* When using red zone we may start register saving before allocating
5923 the stack frame saving one cycle of the prologue. */
5924 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5925 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5926 : stack_pointer_rtx,
5927 -frame.nregs * UNITS_PER_WORD);
5928
5929 if (allocate == 0)
5930 ;
5931 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5932 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5933 GEN_INT (-allocate), -1);
5934 else
5935 {
5936 /* Only valid for Win32. */
5937 rtx eax = gen_rtx_REG (Pmode, 0);
5938 bool eax_live;
5939 rtx t;
5940
5941 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
5942
5943 if (TARGET_64BIT_MS_ABI)
5944 eax_live = false;
5945 else
5946 eax_live = ix86_eax_live_at_start_p ();
5947
5948 if (eax_live)
5949 {
5950 emit_insn (gen_push (eax));
5951 allocate -= UNITS_PER_WORD;
5952 }
5953
5954 emit_move_insn (eax, GEN_INT (allocate));
5955
5956 if (TARGET_64BIT)
5957 insn = gen_allocate_stack_worker_64 (eax);
5958 else
5959 insn = gen_allocate_stack_worker_32 (eax);
5960 insn = emit_insn (insn);
5961 RTX_FRAME_RELATED_P (insn) = 1;
5962 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5963 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5964 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5965 t, REG_NOTES (insn));
5966
5967 if (eax_live)
5968 {
5969 if (frame_pointer_needed)
5970 t = plus_constant (hard_frame_pointer_rtx,
5971 allocate
5972 - frame.to_allocate
5973 - frame.nregs * UNITS_PER_WORD);
5974 else
5975 t = plus_constant (stack_pointer_rtx, allocate);
5976 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
5977 }
5978 }
5979
5980 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5981 {
5982 if (!frame_pointer_needed || !frame.to_allocate)
5983 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5984 else
5985 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5986 -frame.nregs * UNITS_PER_WORD);
5987 }
5988
5989 pic_reg_used = false;
5990 if (pic_offset_table_rtx
5991 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5992 || current_function_profile))
5993 {
5994 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5995
5996 if (alt_pic_reg_used != INVALID_REGNUM)
5997 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5998
5999 pic_reg_used = true;
6000 }
6001
6002 if (pic_reg_used)
6003 {
6004 if (TARGET_64BIT)
6005 {
6006 if (ix86_cmodel == CM_LARGE_PIC)
6007 {
6008 rtx tmp_reg = gen_rtx_REG (DImode,
6009 FIRST_REX_INT_REG + 3 /* R11 */);
6010 rtx label = gen_label_rtx ();
6011 emit_label (label);
6012 LABEL_PRESERVE_P (label) = 1;
6013 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6014 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6015 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6016 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6017 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6018 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6019 pic_offset_table_rtx, tmp_reg));
6020 }
6021 else
6022 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6023 }
6024 else
6025 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6026
6027 /* Even with accurate pre-reload life analysis, we can wind up
6028 deleting all references to the pic register after reload.
6029 Consider if cross-jumping unifies two sides of a branch
6030 controlled by a comparison vs the only read from a global.
6031 In which case, allow the set_got to be deleted, though we're
6032 too late to do anything about the ebx save in the prologue. */
6033 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6034 }
6035
6036 /* Prevent function calls from be scheduled before the call to mcount.
6037 In the pic_reg_used case, make sure that the got load isn't deleted. */
6038 if (current_function_profile)
6039 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6040 }
6041
6042 /* Emit code to restore saved registers using MOV insns. First register
6043 is restored from POINTER + OFFSET. */
6044 static void
6045 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6046 int maybe_eh_return)
6047 {
6048 int regno;
6049 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6050
6051 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6052 if (ix86_save_reg (regno, maybe_eh_return))
6053 {
6054 /* Ensure that adjust_address won't be forced to produce pointer
6055 out of range allowed by x86-64 instruction set. */
6056 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6057 {
6058 rtx r11;
6059
6060 r11 = gen_rtx_REG (DImode, R11_REG);
6061 emit_move_insn (r11, GEN_INT (offset));
6062 emit_insn (gen_adddi3 (r11, r11, pointer));
6063 base_address = gen_rtx_MEM (Pmode, r11);
6064 offset = 0;
6065 }
6066 emit_move_insn (gen_rtx_REG (Pmode, regno),
6067 adjust_address (base_address, Pmode, offset));
6068 offset += UNITS_PER_WORD;
6069 }
6070 }
6071
6072 /* Restore function stack, frame, and registers. */
6073
6074 void
6075 ix86_expand_epilogue (int style)
6076 {
6077 int regno;
6078 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6079 struct ix86_frame frame;
6080 HOST_WIDE_INT offset;
6081
6082 ix86_compute_frame_layout (&frame);
6083
6084 /* Calculate start of saved registers relative to ebp. Special care
6085 must be taken for the normal return case of a function using
6086 eh_return: the eax and edx registers are marked as saved, but not
6087 restored along this path. */
6088 offset = frame.nregs;
6089 if (current_function_calls_eh_return && style != 2)
6090 offset -= 2;
6091 offset *= -UNITS_PER_WORD;
6092
6093 /* If we're only restoring one register and sp is not valid then
6094 using a move instruction to restore the register since it's
6095 less work than reloading sp and popping the register.
6096
6097 The default code result in stack adjustment using add/lea instruction,
6098 while this code results in LEAVE instruction (or discrete equivalent),
6099 so it is profitable in some other cases as well. Especially when there
6100 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6101 and there is exactly one register to pop. This heuristic may need some
6102 tuning in future. */
6103 if ((!sp_valid && frame.nregs <= 1)
6104 || (TARGET_EPILOGUE_USING_MOVE
6105 && cfun->machine->use_fast_prologue_epilogue
6106 && (frame.nregs > 1 || frame.to_allocate))
6107 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6108 || (frame_pointer_needed && TARGET_USE_LEAVE
6109 && cfun->machine->use_fast_prologue_epilogue
6110 && frame.nregs == 1)
6111 || current_function_calls_eh_return)
6112 {
6113 /* Restore registers. We can use ebp or esp to address the memory
6114 locations. If both are available, default to ebp, since offsets
6115 are known to be small. Only exception is esp pointing directly to the
6116 end of block of saved registers, where we may simplify addressing
6117 mode. */
6118
6119 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6120 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6121 frame.to_allocate, style == 2);
6122 else
6123 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6124 offset, style == 2);
6125
6126 /* eh_return epilogues need %ecx added to the stack pointer. */
6127 if (style == 2)
6128 {
6129 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6130
6131 if (frame_pointer_needed)
6132 {
6133 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6134 tmp = plus_constant (tmp, UNITS_PER_WORD);
6135 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6136
6137 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6138 emit_move_insn (hard_frame_pointer_rtx, tmp);
6139
6140 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6141 const0_rtx, style);
6142 }
6143 else
6144 {
6145 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6146 tmp = plus_constant (tmp, (frame.to_allocate
6147 + frame.nregs * UNITS_PER_WORD));
6148 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6149 }
6150 }
6151 else if (!frame_pointer_needed)
6152 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6153 GEN_INT (frame.to_allocate
6154 + frame.nregs * UNITS_PER_WORD),
6155 style);
6156 /* If not an i386, mov & pop is faster than "leave". */
6157 else if (TARGET_USE_LEAVE || optimize_size
6158 || !cfun->machine->use_fast_prologue_epilogue)
6159 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6160 else
6161 {
6162 pro_epilogue_adjust_stack (stack_pointer_rtx,
6163 hard_frame_pointer_rtx,
6164 const0_rtx, style);
6165 if (TARGET_64BIT)
6166 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6167 else
6168 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6169 }
6170 }
6171 else
6172 {
6173 /* First step is to deallocate the stack frame so that we can
6174 pop the registers. */
6175 if (!sp_valid)
6176 {
6177 gcc_assert (frame_pointer_needed);
6178 pro_epilogue_adjust_stack (stack_pointer_rtx,
6179 hard_frame_pointer_rtx,
6180 GEN_INT (offset), style);
6181 }
6182 else if (frame.to_allocate)
6183 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6184 GEN_INT (frame.to_allocate), style);
6185
6186 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6187 if (ix86_save_reg (regno, false))
6188 {
6189 if (TARGET_64BIT)
6190 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6191 else
6192 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6193 }
6194 if (frame_pointer_needed)
6195 {
6196 /* Leave results in shorter dependency chains on CPUs that are
6197 able to grok it fast. */
6198 if (TARGET_USE_LEAVE)
6199 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6200 else if (TARGET_64BIT)
6201 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6202 else
6203 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6204 }
6205 }
6206
6207 if (cfun->machine->force_align_arg_pointer)
6208 {
6209 emit_insn (gen_addsi3 (stack_pointer_rtx,
6210 cfun->machine->force_align_arg_pointer,
6211 GEN_INT (-4)));
6212 }
6213
6214 /* Sibcall epilogues don't want a return instruction. */
6215 if (style == 0)
6216 return;
6217
6218 if (current_function_pops_args && current_function_args_size)
6219 {
6220 rtx popc = GEN_INT (current_function_pops_args);
6221
6222 /* i386 can only pop 64K bytes. If asked to pop more, pop
6223 return address, do explicit add, and jump indirectly to the
6224 caller. */
6225
6226 if (current_function_pops_args >= 65536)
6227 {
6228 rtx ecx = gen_rtx_REG (SImode, 2);
6229
6230 /* There is no "pascal" calling convention in any 64bit ABI. */
6231 gcc_assert (!TARGET_64BIT);
6232
6233 emit_insn (gen_popsi1 (ecx));
6234 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6235 emit_jump_insn (gen_return_indirect_internal (ecx));
6236 }
6237 else
6238 emit_jump_insn (gen_return_pop_internal (popc));
6239 }
6240 else
6241 emit_jump_insn (gen_return_internal ());
6242 }
6243
6244 /* Reset from the function's potential modifications. */
6245
6246 static void
6247 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6248 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6249 {
6250 if (pic_offset_table_rtx)
6251 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6252 #if TARGET_MACHO
6253 /* Mach-O doesn't support labels at the end of objects, so if
6254 it looks like we might want one, insert a NOP. */
6255 {
6256 rtx insn = get_last_insn ();
6257 while (insn
6258 && NOTE_P (insn)
6259 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6260 insn = PREV_INSN (insn);
6261 if (insn
6262 && (LABEL_P (insn)
6263 || (NOTE_P (insn)
6264 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6265 fputs ("\tnop\n", file);
6266 }
6267 #endif
6268
6269 }
6270 \f
6271 /* Extract the parts of an RTL expression that is a valid memory address
6272 for an instruction. Return 0 if the structure of the address is
6273 grossly off. Return -1 if the address contains ASHIFT, so it is not
6274 strictly valid, but still used for computing length of lea instruction. */
6275
6276 int
6277 ix86_decompose_address (rtx addr, struct ix86_address *out)
6278 {
6279 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6280 rtx base_reg, index_reg;
6281 HOST_WIDE_INT scale = 1;
6282 rtx scale_rtx = NULL_RTX;
6283 int retval = 1;
6284 enum ix86_address_seg seg = SEG_DEFAULT;
6285
6286 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6287 base = addr;
6288 else if (GET_CODE (addr) == PLUS)
6289 {
6290 rtx addends[4], op;
6291 int n = 0, i;
6292
6293 op = addr;
6294 do
6295 {
6296 if (n >= 4)
6297 return 0;
6298 addends[n++] = XEXP (op, 1);
6299 op = XEXP (op, 0);
6300 }
6301 while (GET_CODE (op) == PLUS);
6302 if (n >= 4)
6303 return 0;
6304 addends[n] = op;
6305
6306 for (i = n; i >= 0; --i)
6307 {
6308 op = addends[i];
6309 switch (GET_CODE (op))
6310 {
6311 case MULT:
6312 if (index)
6313 return 0;
6314 index = XEXP (op, 0);
6315 scale_rtx = XEXP (op, 1);
6316 break;
6317
6318 case UNSPEC:
6319 if (XINT (op, 1) == UNSPEC_TP
6320 && TARGET_TLS_DIRECT_SEG_REFS
6321 && seg == SEG_DEFAULT)
6322 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6323 else
6324 return 0;
6325 break;
6326
6327 case REG:
6328 case SUBREG:
6329 if (!base)
6330 base = op;
6331 else if (!index)
6332 index = op;
6333 else
6334 return 0;
6335 break;
6336
6337 case CONST:
6338 case CONST_INT:
6339 case SYMBOL_REF:
6340 case LABEL_REF:
6341 if (disp)
6342 return 0;
6343 disp = op;
6344 break;
6345
6346 default:
6347 return 0;
6348 }
6349 }
6350 }
6351 else if (GET_CODE (addr) == MULT)
6352 {
6353 index = XEXP (addr, 0); /* index*scale */
6354 scale_rtx = XEXP (addr, 1);
6355 }
6356 else if (GET_CODE (addr) == ASHIFT)
6357 {
6358 rtx tmp;
6359
6360 /* We're called for lea too, which implements ashift on occasion. */
6361 index = XEXP (addr, 0);
6362 tmp = XEXP (addr, 1);
6363 if (!CONST_INT_P (tmp))
6364 return 0;
6365 scale = INTVAL (tmp);
6366 if ((unsigned HOST_WIDE_INT) scale > 3)
6367 return 0;
6368 scale = 1 << scale;
6369 retval = -1;
6370 }
6371 else
6372 disp = addr; /* displacement */
6373
6374 /* Extract the integral value of scale. */
6375 if (scale_rtx)
6376 {
6377 if (!CONST_INT_P (scale_rtx))
6378 return 0;
6379 scale = INTVAL (scale_rtx);
6380 }
6381
6382 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6383 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6384
6385 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6386 if (base_reg && index_reg && scale == 1
6387 && (index_reg == arg_pointer_rtx
6388 || index_reg == frame_pointer_rtx
6389 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6390 {
6391 rtx tmp;
6392 tmp = base, base = index, index = tmp;
6393 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6394 }
6395
6396 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6397 if ((base_reg == hard_frame_pointer_rtx
6398 || base_reg == frame_pointer_rtx
6399 || base_reg == arg_pointer_rtx) && !disp)
6400 disp = const0_rtx;
6401
6402 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6403 Avoid this by transforming to [%esi+0]. */
6404 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6405 && base_reg && !index_reg && !disp
6406 && REG_P (base_reg)
6407 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6408 disp = const0_rtx;
6409
6410 /* Special case: encode reg+reg instead of reg*2. */
6411 if (!base && index && scale && scale == 2)
6412 base = index, base_reg = index_reg, scale = 1;
6413
6414 /* Special case: scaling cannot be encoded without base or displacement. */
6415 if (!base && !disp && index && scale != 1)
6416 disp = const0_rtx;
6417
6418 out->base = base;
6419 out->index = index;
6420 out->disp = disp;
6421 out->scale = scale;
6422 out->seg = seg;
6423
6424 return retval;
6425 }
6426 \f
6427 /* Return cost of the memory address x.
6428 For i386, it is better to use a complex address than let gcc copy
6429 the address into a reg and make a new pseudo. But not if the address
6430 requires to two regs - that would mean more pseudos with longer
6431 lifetimes. */
6432 static int
6433 ix86_address_cost (rtx x)
6434 {
6435 struct ix86_address parts;
6436 int cost = 1;
6437 int ok = ix86_decompose_address (x, &parts);
6438
6439 gcc_assert (ok);
6440
6441 if (parts.base && GET_CODE (parts.base) == SUBREG)
6442 parts.base = SUBREG_REG (parts.base);
6443 if (parts.index && GET_CODE (parts.index) == SUBREG)
6444 parts.index = SUBREG_REG (parts.index);
6445
6446 /* More complex memory references are better. */
6447 if (parts.disp && parts.disp != const0_rtx)
6448 cost--;
6449 if (parts.seg != SEG_DEFAULT)
6450 cost--;
6451
6452 /* Attempt to minimize number of registers in the address. */
6453 if ((parts.base
6454 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6455 || (parts.index
6456 && (!REG_P (parts.index)
6457 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6458 cost++;
6459
6460 if (parts.base
6461 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6462 && parts.index
6463 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6464 && parts.base != parts.index)
6465 cost++;
6466
6467 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6468 since it's predecode logic can't detect the length of instructions
6469 and it degenerates to vector decoded. Increase cost of such
6470 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6471 to split such addresses or even refuse such addresses at all.
6472
6473 Following addressing modes are affected:
6474 [base+scale*index]
6475 [scale*index+disp]
6476 [base+index]
6477
6478 The first and last case may be avoidable by explicitly coding the zero in
6479 memory address, but I don't have AMD-K6 machine handy to check this
6480 theory. */
6481
6482 if (TARGET_K6
6483 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6484 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6485 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6486 cost += 10;
6487
6488 return cost;
6489 }
6490 \f
6491 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6492 this is used for to form addresses to local data when -fPIC is in
6493 use. */
6494
6495 static bool
6496 darwin_local_data_pic (rtx disp)
6497 {
6498 if (GET_CODE (disp) == MINUS)
6499 {
6500 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6501 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6502 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6503 {
6504 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6505 if (! strcmp (sym_name, "<pic base>"))
6506 return true;
6507 }
6508 }
6509
6510 return false;
6511 }
6512
6513 /* Determine if a given RTX is a valid constant. We already know this
6514 satisfies CONSTANT_P. */
6515
6516 bool
6517 legitimate_constant_p (rtx x)
6518 {
6519 switch (GET_CODE (x))
6520 {
6521 case CONST:
6522 x = XEXP (x, 0);
6523
6524 if (GET_CODE (x) == PLUS)
6525 {
6526 if (!CONST_INT_P (XEXP (x, 1)))
6527 return false;
6528 x = XEXP (x, 0);
6529 }
6530
6531 if (TARGET_MACHO && darwin_local_data_pic (x))
6532 return true;
6533
6534 /* Only some unspecs are valid as "constants". */
6535 if (GET_CODE (x) == UNSPEC)
6536 switch (XINT (x, 1))
6537 {
6538 case UNSPEC_GOT:
6539 case UNSPEC_GOTOFF:
6540 case UNSPEC_PLTOFF:
6541 return TARGET_64BIT;
6542 case UNSPEC_TPOFF:
6543 case UNSPEC_NTPOFF:
6544 x = XVECEXP (x, 0, 0);
6545 return (GET_CODE (x) == SYMBOL_REF
6546 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6547 case UNSPEC_DTPOFF:
6548 x = XVECEXP (x, 0, 0);
6549 return (GET_CODE (x) == SYMBOL_REF
6550 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6551 default:
6552 return false;
6553 }
6554
6555 /* We must have drilled down to a symbol. */
6556 if (GET_CODE (x) == LABEL_REF)
6557 return true;
6558 if (GET_CODE (x) != SYMBOL_REF)
6559 return false;
6560 /* FALLTHRU */
6561
6562 case SYMBOL_REF:
6563 /* TLS symbols are never valid. */
6564 if (SYMBOL_REF_TLS_MODEL (x))
6565 return false;
6566
6567 /* DLLIMPORT symbols are never valid. */
6568 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6569 && SYMBOL_REF_DLLIMPORT_P (x))
6570 return false;
6571 break;
6572
6573 case CONST_DOUBLE:
6574 if (GET_MODE (x) == TImode
6575 && x != CONST0_RTX (TImode)
6576 && !TARGET_64BIT)
6577 return false;
6578 break;
6579
6580 case CONST_VECTOR:
6581 if (x == CONST0_RTX (GET_MODE (x)))
6582 return true;
6583 return false;
6584
6585 default:
6586 break;
6587 }
6588
6589 /* Otherwise we handle everything else in the move patterns. */
6590 return true;
6591 }
6592
6593 /* Determine if it's legal to put X into the constant pool. This
6594 is not possible for the address of thread-local symbols, which
6595 is checked above. */
6596
6597 static bool
6598 ix86_cannot_force_const_mem (rtx x)
6599 {
6600 /* We can always put integral constants and vectors in memory. */
6601 switch (GET_CODE (x))
6602 {
6603 case CONST_INT:
6604 case CONST_DOUBLE:
6605 case CONST_VECTOR:
6606 return false;
6607
6608 default:
6609 break;
6610 }
6611 return !legitimate_constant_p (x);
6612 }
6613
6614 /* Determine if a given RTX is a valid constant address. */
6615
6616 bool
6617 constant_address_p (rtx x)
6618 {
6619 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6620 }
6621
6622 /* Nonzero if the constant value X is a legitimate general operand
6623 when generating PIC code. It is given that flag_pic is on and
6624 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6625
6626 bool
6627 legitimate_pic_operand_p (rtx x)
6628 {
6629 rtx inner;
6630
6631 switch (GET_CODE (x))
6632 {
6633 case CONST:
6634 inner = XEXP (x, 0);
6635 if (GET_CODE (inner) == PLUS
6636 && CONST_INT_P (XEXP (inner, 1)))
6637 inner = XEXP (inner, 0);
6638
6639 /* Only some unspecs are valid as "constants". */
6640 if (GET_CODE (inner) == UNSPEC)
6641 switch (XINT (inner, 1))
6642 {
6643 case UNSPEC_GOT:
6644 case UNSPEC_GOTOFF:
6645 case UNSPEC_PLTOFF:
6646 return TARGET_64BIT;
6647 case UNSPEC_TPOFF:
6648 x = XVECEXP (inner, 0, 0);
6649 return (GET_CODE (x) == SYMBOL_REF
6650 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6651 default:
6652 return false;
6653 }
6654 /* FALLTHRU */
6655
6656 case SYMBOL_REF:
6657 case LABEL_REF:
6658 return legitimate_pic_address_disp_p (x);
6659
6660 default:
6661 return true;
6662 }
6663 }
6664
6665 /* Determine if a given CONST RTX is a valid memory displacement
6666 in PIC mode. */
6667
6668 int
6669 legitimate_pic_address_disp_p (rtx disp)
6670 {
6671 bool saw_plus;
6672
6673 /* In 64bit mode we can allow direct addresses of symbols and labels
6674 when they are not dynamic symbols. */
6675 if (TARGET_64BIT)
6676 {
6677 rtx op0 = disp, op1;
6678
6679 switch (GET_CODE (disp))
6680 {
6681 case LABEL_REF:
6682 return true;
6683
6684 case CONST:
6685 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6686 break;
6687 op0 = XEXP (XEXP (disp, 0), 0);
6688 op1 = XEXP (XEXP (disp, 0), 1);
6689 if (!CONST_INT_P (op1)
6690 || INTVAL (op1) >= 16*1024*1024
6691 || INTVAL (op1) < -16*1024*1024)
6692 break;
6693 if (GET_CODE (op0) == LABEL_REF)
6694 return true;
6695 if (GET_CODE (op0) != SYMBOL_REF)
6696 break;
6697 /* FALLTHRU */
6698
6699 case SYMBOL_REF:
6700 /* TLS references should always be enclosed in UNSPEC. */
6701 if (SYMBOL_REF_TLS_MODEL (op0))
6702 return false;
6703 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6704 && ix86_cmodel != CM_LARGE_PIC)
6705 return true;
6706 break;
6707
6708 default:
6709 break;
6710 }
6711 }
6712 if (GET_CODE (disp) != CONST)
6713 return 0;
6714 disp = XEXP (disp, 0);
6715
6716 if (TARGET_64BIT)
6717 {
6718 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6719 of GOT tables. We should not need these anyway. */
6720 if (GET_CODE (disp) != UNSPEC
6721 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6722 && XINT (disp, 1) != UNSPEC_GOTOFF
6723 && XINT (disp, 1) != UNSPEC_PLTOFF))
6724 return 0;
6725
6726 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6727 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6728 return 0;
6729 return 1;
6730 }
6731
6732 saw_plus = false;
6733 if (GET_CODE (disp) == PLUS)
6734 {
6735 if (!CONST_INT_P (XEXP (disp, 1)))
6736 return 0;
6737 disp = XEXP (disp, 0);
6738 saw_plus = true;
6739 }
6740
6741 if (TARGET_MACHO && darwin_local_data_pic (disp))
6742 return 1;
6743
6744 if (GET_CODE (disp) != UNSPEC)
6745 return 0;
6746
6747 switch (XINT (disp, 1))
6748 {
6749 case UNSPEC_GOT:
6750 if (saw_plus)
6751 return false;
6752 /* We need to check for both symbols and labels because VxWorks loads
6753 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6754 details. */
6755 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6756 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6757 case UNSPEC_GOTOFF:
6758 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6759 While ABI specify also 32bit relocation but we don't produce it in
6760 small PIC model at all. */
6761 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6762 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6763 && !TARGET_64BIT)
6764 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6765 return false;
6766 case UNSPEC_GOTTPOFF:
6767 case UNSPEC_GOTNTPOFF:
6768 case UNSPEC_INDNTPOFF:
6769 if (saw_plus)
6770 return false;
6771 disp = XVECEXP (disp, 0, 0);
6772 return (GET_CODE (disp) == SYMBOL_REF
6773 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6774 case UNSPEC_NTPOFF:
6775 disp = XVECEXP (disp, 0, 0);
6776 return (GET_CODE (disp) == SYMBOL_REF
6777 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6778 case UNSPEC_DTPOFF:
6779 disp = XVECEXP (disp, 0, 0);
6780 return (GET_CODE (disp) == SYMBOL_REF
6781 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6782 }
6783
6784 return 0;
6785 }
6786
6787 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6788 memory address for an instruction. The MODE argument is the machine mode
6789 for the MEM expression that wants to use this address.
6790
6791 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6792 convert common non-canonical forms to canonical form so that they will
6793 be recognized. */
6794
6795 int
6796 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6797 rtx addr, int strict)
6798 {
6799 struct ix86_address parts;
6800 rtx base, index, disp;
6801 HOST_WIDE_INT scale;
6802 const char *reason = NULL;
6803 rtx reason_rtx = NULL_RTX;
6804
6805 if (ix86_decompose_address (addr, &parts) <= 0)
6806 {
6807 reason = "decomposition failed";
6808 goto report_error;
6809 }
6810
6811 base = parts.base;
6812 index = parts.index;
6813 disp = parts.disp;
6814 scale = parts.scale;
6815
6816 /* Validate base register.
6817
6818 Don't allow SUBREG's that span more than a word here. It can lead to spill
6819 failures when the base is one word out of a two word structure, which is
6820 represented internally as a DImode int. */
6821
6822 if (base)
6823 {
6824 rtx reg;
6825 reason_rtx = base;
6826
6827 if (REG_P (base))
6828 reg = base;
6829 else if (GET_CODE (base) == SUBREG
6830 && REG_P (SUBREG_REG (base))
6831 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6832 <= UNITS_PER_WORD)
6833 reg = SUBREG_REG (base);
6834 else
6835 {
6836 reason = "base is not a register";
6837 goto report_error;
6838 }
6839
6840 if (GET_MODE (base) != Pmode)
6841 {
6842 reason = "base is not in Pmode";
6843 goto report_error;
6844 }
6845
6846 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6847 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6848 {
6849 reason = "base is not valid";
6850 goto report_error;
6851 }
6852 }
6853
6854 /* Validate index register.
6855
6856 Don't allow SUBREG's that span more than a word here -- same as above. */
6857
6858 if (index)
6859 {
6860 rtx reg;
6861 reason_rtx = index;
6862
6863 if (REG_P (index))
6864 reg = index;
6865 else if (GET_CODE (index) == SUBREG
6866 && REG_P (SUBREG_REG (index))
6867 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6868 <= UNITS_PER_WORD)
6869 reg = SUBREG_REG (index);
6870 else
6871 {
6872 reason = "index is not a register";
6873 goto report_error;
6874 }
6875
6876 if (GET_MODE (index) != Pmode)
6877 {
6878 reason = "index is not in Pmode";
6879 goto report_error;
6880 }
6881
6882 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6883 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6884 {
6885 reason = "index is not valid";
6886 goto report_error;
6887 }
6888 }
6889
6890 /* Validate scale factor. */
6891 if (scale != 1)
6892 {
6893 reason_rtx = GEN_INT (scale);
6894 if (!index)
6895 {
6896 reason = "scale without index";
6897 goto report_error;
6898 }
6899
6900 if (scale != 2 && scale != 4 && scale != 8)
6901 {
6902 reason = "scale is not a valid multiplier";
6903 goto report_error;
6904 }
6905 }
6906
6907 /* Validate displacement. */
6908 if (disp)
6909 {
6910 reason_rtx = disp;
6911
6912 if (GET_CODE (disp) == CONST
6913 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6914 switch (XINT (XEXP (disp, 0), 1))
6915 {
6916 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6917 used. While ABI specify also 32bit relocations, we don't produce
6918 them at all and use IP relative instead. */
6919 case UNSPEC_GOT:
6920 case UNSPEC_GOTOFF:
6921 gcc_assert (flag_pic);
6922 if (!TARGET_64BIT)
6923 goto is_legitimate_pic;
6924 reason = "64bit address unspec";
6925 goto report_error;
6926
6927 case UNSPEC_GOTPCREL:
6928 gcc_assert (flag_pic);
6929 goto is_legitimate_pic;
6930
6931 case UNSPEC_GOTTPOFF:
6932 case UNSPEC_GOTNTPOFF:
6933 case UNSPEC_INDNTPOFF:
6934 case UNSPEC_NTPOFF:
6935 case UNSPEC_DTPOFF:
6936 break;
6937
6938 default:
6939 reason = "invalid address unspec";
6940 goto report_error;
6941 }
6942
6943 else if (SYMBOLIC_CONST (disp)
6944 && (flag_pic
6945 || (TARGET_MACHO
6946 #if TARGET_MACHO
6947 && MACHOPIC_INDIRECT
6948 && !machopic_operand_p (disp)
6949 #endif
6950 )))
6951 {
6952
6953 is_legitimate_pic:
6954 if (TARGET_64BIT && (index || base))
6955 {
6956 /* foo@dtpoff(%rX) is ok. */
6957 if (GET_CODE (disp) != CONST
6958 || GET_CODE (XEXP (disp, 0)) != PLUS
6959 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6960 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6961 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6962 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6963 {
6964 reason = "non-constant pic memory reference";
6965 goto report_error;
6966 }
6967 }
6968 else if (! legitimate_pic_address_disp_p (disp))
6969 {
6970 reason = "displacement is an invalid pic construct";
6971 goto report_error;
6972 }
6973
6974 /* This code used to verify that a symbolic pic displacement
6975 includes the pic_offset_table_rtx register.
6976
6977 While this is good idea, unfortunately these constructs may
6978 be created by "adds using lea" optimization for incorrect
6979 code like:
6980
6981 int a;
6982 int foo(int i)
6983 {
6984 return *(&a+i);
6985 }
6986
6987 This code is nonsensical, but results in addressing
6988 GOT table with pic_offset_table_rtx base. We can't
6989 just refuse it easily, since it gets matched by
6990 "addsi3" pattern, that later gets split to lea in the
6991 case output register differs from input. While this
6992 can be handled by separate addsi pattern for this case
6993 that never results in lea, this seems to be easier and
6994 correct fix for crash to disable this test. */
6995 }
6996 else if (GET_CODE (disp) != LABEL_REF
6997 && !CONST_INT_P (disp)
6998 && (GET_CODE (disp) != CONST
6999 || !legitimate_constant_p (disp))
7000 && (GET_CODE (disp) != SYMBOL_REF
7001 || !legitimate_constant_p (disp)))
7002 {
7003 reason = "displacement is not constant";
7004 goto report_error;
7005 }
7006 else if (TARGET_64BIT
7007 && !x86_64_immediate_operand (disp, VOIDmode))
7008 {
7009 reason = "displacement is out of range";
7010 goto report_error;
7011 }
7012 }
7013
7014 /* Everything looks valid. */
7015 return TRUE;
7016
7017 report_error:
7018 return FALSE;
7019 }
7020 \f
7021 /* Return a unique alias set for the GOT. */
7022
7023 static HOST_WIDE_INT
7024 ix86_GOT_alias_set (void)
7025 {
7026 static HOST_WIDE_INT set = -1;
7027 if (set == -1)
7028 set = new_alias_set ();
7029 return set;
7030 }
7031
7032 /* Return a legitimate reference for ORIG (an address) using the
7033 register REG. If REG is 0, a new pseudo is generated.
7034
7035 There are two types of references that must be handled:
7036
7037 1. Global data references must load the address from the GOT, via
7038 the PIC reg. An insn is emitted to do this load, and the reg is
7039 returned.
7040
7041 2. Static data references, constant pool addresses, and code labels
7042 compute the address as an offset from the GOT, whose base is in
7043 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7044 differentiate them from global data objects. The returned
7045 address is the PIC reg + an unspec constant.
7046
7047 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7048 reg also appears in the address. */
7049
7050 static rtx
7051 legitimize_pic_address (rtx orig, rtx reg)
7052 {
7053 rtx addr = orig;
7054 rtx new = orig;
7055 rtx base;
7056
7057 #if TARGET_MACHO
7058 if (TARGET_MACHO && !TARGET_64BIT)
7059 {
7060 if (reg == 0)
7061 reg = gen_reg_rtx (Pmode);
7062 /* Use the generic Mach-O PIC machinery. */
7063 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7064 }
7065 #endif
7066
7067 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7068 new = addr;
7069 else if (TARGET_64BIT
7070 && ix86_cmodel != CM_SMALL_PIC
7071 && gotoff_operand (addr, Pmode))
7072 {
7073 rtx tmpreg;
7074 /* This symbol may be referenced via a displacement from the PIC
7075 base address (@GOTOFF). */
7076
7077 if (reload_in_progress)
7078 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7079 if (GET_CODE (addr) == CONST)
7080 addr = XEXP (addr, 0);
7081 if (GET_CODE (addr) == PLUS)
7082 {
7083 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7084 UNSPEC_GOTOFF);
7085 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7086 }
7087 else
7088 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7089 new = gen_rtx_CONST (Pmode, new);
7090 if (!reg)
7091 tmpreg = gen_reg_rtx (Pmode);
7092 else
7093 tmpreg = reg;
7094 emit_move_insn (tmpreg, new);
7095
7096 if (reg != 0)
7097 {
7098 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7099 tmpreg, 1, OPTAB_DIRECT);
7100 new = reg;
7101 }
7102 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7103 }
7104 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7105 {
7106 /* This symbol may be referenced via a displacement from the PIC
7107 base address (@GOTOFF). */
7108
7109 if (reload_in_progress)
7110 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7111 if (GET_CODE (addr) == CONST)
7112 addr = XEXP (addr, 0);
7113 if (GET_CODE (addr) == PLUS)
7114 {
7115 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7116 UNSPEC_GOTOFF);
7117 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7118 }
7119 else
7120 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7121 new = gen_rtx_CONST (Pmode, new);
7122 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7123
7124 if (reg != 0)
7125 {
7126 emit_move_insn (reg, new);
7127 new = reg;
7128 }
7129 }
7130 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7131 /* We can't use @GOTOFF for text labels on VxWorks;
7132 see gotoff_operand. */
7133 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7134 {
7135 /* Given that we've already handled dllimport variables separately
7136 in legitimize_address, and all other variables should satisfy
7137 legitimate_pic_address_disp_p, we should never arrive here. */
7138 gcc_assert (!TARGET_64BIT_MS_ABI);
7139
7140 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7141 {
7142 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7143 new = gen_rtx_CONST (Pmode, new);
7144 new = gen_const_mem (Pmode, new);
7145 set_mem_alias_set (new, ix86_GOT_alias_set ());
7146
7147 if (reg == 0)
7148 reg = gen_reg_rtx (Pmode);
7149 /* Use directly gen_movsi, otherwise the address is loaded
7150 into register for CSE. We don't want to CSE this addresses,
7151 instead we CSE addresses from the GOT table, so skip this. */
7152 emit_insn (gen_movsi (reg, new));
7153 new = reg;
7154 }
7155 else
7156 {
7157 /* This symbol must be referenced via a load from the
7158 Global Offset Table (@GOT). */
7159
7160 if (reload_in_progress)
7161 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7162 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7163 new = gen_rtx_CONST (Pmode, new);
7164 if (TARGET_64BIT)
7165 new = force_reg (Pmode, new);
7166 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7167 new = gen_const_mem (Pmode, new);
7168 set_mem_alias_set (new, ix86_GOT_alias_set ());
7169
7170 if (reg == 0)
7171 reg = gen_reg_rtx (Pmode);
7172 emit_move_insn (reg, new);
7173 new = reg;
7174 }
7175 }
7176 else
7177 {
7178 if (CONST_INT_P (addr)
7179 && !x86_64_immediate_operand (addr, VOIDmode))
7180 {
7181 if (reg)
7182 {
7183 emit_move_insn (reg, addr);
7184 new = reg;
7185 }
7186 else
7187 new = force_reg (Pmode, addr);
7188 }
7189 else if (GET_CODE (addr) == CONST)
7190 {
7191 addr = XEXP (addr, 0);
7192
7193 /* We must match stuff we generate before. Assume the only
7194 unspecs that can get here are ours. Not that we could do
7195 anything with them anyway.... */
7196 if (GET_CODE (addr) == UNSPEC
7197 || (GET_CODE (addr) == PLUS
7198 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7199 return orig;
7200 gcc_assert (GET_CODE (addr) == PLUS);
7201 }
7202 if (GET_CODE (addr) == PLUS)
7203 {
7204 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7205
7206 /* Check first to see if this is a constant offset from a @GOTOFF
7207 symbol reference. */
7208 if (gotoff_operand (op0, Pmode)
7209 && CONST_INT_P (op1))
7210 {
7211 if (!TARGET_64BIT)
7212 {
7213 if (reload_in_progress)
7214 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7215 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7216 UNSPEC_GOTOFF);
7217 new = gen_rtx_PLUS (Pmode, new, op1);
7218 new = gen_rtx_CONST (Pmode, new);
7219 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7220
7221 if (reg != 0)
7222 {
7223 emit_move_insn (reg, new);
7224 new = reg;
7225 }
7226 }
7227 else
7228 {
7229 if (INTVAL (op1) < -16*1024*1024
7230 || INTVAL (op1) >= 16*1024*1024)
7231 {
7232 if (!x86_64_immediate_operand (op1, Pmode))
7233 op1 = force_reg (Pmode, op1);
7234 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7235 }
7236 }
7237 }
7238 else
7239 {
7240 base = legitimize_pic_address (XEXP (addr, 0), reg);
7241 new = legitimize_pic_address (XEXP (addr, 1),
7242 base == reg ? NULL_RTX : reg);
7243
7244 if (CONST_INT_P (new))
7245 new = plus_constant (base, INTVAL (new));
7246 else
7247 {
7248 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7249 {
7250 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7251 new = XEXP (new, 1);
7252 }
7253 new = gen_rtx_PLUS (Pmode, base, new);
7254 }
7255 }
7256 }
7257 }
7258 return new;
7259 }
7260 \f
7261 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7262
7263 static rtx
7264 get_thread_pointer (int to_reg)
7265 {
7266 rtx tp, reg, insn;
7267
7268 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7269 if (!to_reg)
7270 return tp;
7271
7272 reg = gen_reg_rtx (Pmode);
7273 insn = gen_rtx_SET (VOIDmode, reg, tp);
7274 insn = emit_insn (insn);
7275
7276 return reg;
7277 }
7278
7279 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7280 false if we expect this to be used for a memory address and true if
7281 we expect to load the address into a register. */
7282
7283 static rtx
7284 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7285 {
7286 rtx dest, base, off, pic, tp;
7287 int type;
7288
7289 switch (model)
7290 {
7291 case TLS_MODEL_GLOBAL_DYNAMIC:
7292 dest = gen_reg_rtx (Pmode);
7293 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7294
7295 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7296 {
7297 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7298
7299 start_sequence ();
7300 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7301 insns = get_insns ();
7302 end_sequence ();
7303
7304 CONST_OR_PURE_CALL_P (insns) = 1;
7305 emit_libcall_block (insns, dest, rax, x);
7306 }
7307 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7308 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7309 else
7310 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7311
7312 if (TARGET_GNU2_TLS)
7313 {
7314 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7315
7316 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7317 }
7318 break;
7319
7320 case TLS_MODEL_LOCAL_DYNAMIC:
7321 base = gen_reg_rtx (Pmode);
7322 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7323
7324 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7325 {
7326 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7327
7328 start_sequence ();
7329 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7330 insns = get_insns ();
7331 end_sequence ();
7332
7333 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7334 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7335 CONST_OR_PURE_CALL_P (insns) = 1;
7336 emit_libcall_block (insns, base, rax, note);
7337 }
7338 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7339 emit_insn (gen_tls_local_dynamic_base_64 (base));
7340 else
7341 emit_insn (gen_tls_local_dynamic_base_32 (base));
7342
7343 if (TARGET_GNU2_TLS)
7344 {
7345 rtx x = ix86_tls_module_base ();
7346
7347 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7348 gen_rtx_MINUS (Pmode, x, tp));
7349 }
7350
7351 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7352 off = gen_rtx_CONST (Pmode, off);
7353
7354 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7355
7356 if (TARGET_GNU2_TLS)
7357 {
7358 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7359
7360 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7361 }
7362
7363 break;
7364
7365 case TLS_MODEL_INITIAL_EXEC:
7366 if (TARGET_64BIT)
7367 {
7368 pic = NULL;
7369 type = UNSPEC_GOTNTPOFF;
7370 }
7371 else if (flag_pic)
7372 {
7373 if (reload_in_progress)
7374 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7375 pic = pic_offset_table_rtx;
7376 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7377 }
7378 else if (!TARGET_ANY_GNU_TLS)
7379 {
7380 pic = gen_reg_rtx (Pmode);
7381 emit_insn (gen_set_got (pic));
7382 type = UNSPEC_GOTTPOFF;
7383 }
7384 else
7385 {
7386 pic = NULL;
7387 type = UNSPEC_INDNTPOFF;
7388 }
7389
7390 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7391 off = gen_rtx_CONST (Pmode, off);
7392 if (pic)
7393 off = gen_rtx_PLUS (Pmode, pic, off);
7394 off = gen_const_mem (Pmode, off);
7395 set_mem_alias_set (off, ix86_GOT_alias_set ());
7396
7397 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7398 {
7399 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7400 off = force_reg (Pmode, off);
7401 return gen_rtx_PLUS (Pmode, base, off);
7402 }
7403 else
7404 {
7405 base = get_thread_pointer (true);
7406 dest = gen_reg_rtx (Pmode);
7407 emit_insn (gen_subsi3 (dest, base, off));
7408 }
7409 break;
7410
7411 case TLS_MODEL_LOCAL_EXEC:
7412 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7413 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7414 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7415 off = gen_rtx_CONST (Pmode, off);
7416
7417 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7418 {
7419 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7420 return gen_rtx_PLUS (Pmode, base, off);
7421 }
7422 else
7423 {
7424 base = get_thread_pointer (true);
7425 dest = gen_reg_rtx (Pmode);
7426 emit_insn (gen_subsi3 (dest, base, off));
7427 }
7428 break;
7429
7430 default:
7431 gcc_unreachable ();
7432 }
7433
7434 return dest;
7435 }
7436
7437 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7438 to symbol DECL. */
7439
7440 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7441 htab_t dllimport_map;
7442
7443 static tree
7444 get_dllimport_decl (tree decl)
7445 {
7446 struct tree_map *h, in;
7447 void **loc;
7448 const char *name;
7449 const char *prefix;
7450 size_t namelen, prefixlen;
7451 char *imp_name;
7452 tree to;
7453 rtx rtl;
7454
7455 if (!dllimport_map)
7456 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7457
7458 in.hash = htab_hash_pointer (decl);
7459 in.base.from = decl;
7460 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7461 h = *loc;
7462 if (h)
7463 return h->to;
7464
7465 *loc = h = ggc_alloc (sizeof (struct tree_map));
7466 h->hash = in.hash;
7467 h->base.from = decl;
7468 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7469 DECL_ARTIFICIAL (to) = 1;
7470 DECL_IGNORED_P (to) = 1;
7471 DECL_EXTERNAL (to) = 1;
7472 TREE_READONLY (to) = 1;
7473
7474 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7475 name = targetm.strip_name_encoding (name);
7476 if (name[0] == FASTCALL_PREFIX)
7477 {
7478 name++;
7479 prefix = "*__imp_";
7480 }
7481 else
7482 prefix = "*__imp__";
7483
7484 namelen = strlen (name);
7485 prefixlen = strlen (prefix);
7486 imp_name = alloca (namelen + prefixlen + 1);
7487 memcpy (imp_name, prefix, prefixlen);
7488 memcpy (imp_name + prefixlen, name, namelen + 1);
7489
7490 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7491 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7492 SET_SYMBOL_REF_DECL (rtl, to);
7493 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7494
7495 rtl = gen_const_mem (Pmode, rtl);
7496 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7497
7498 SET_DECL_RTL (to, rtl);
7499
7500 return to;
7501 }
7502
7503 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7504 true if we require the result be a register. */
7505
7506 static rtx
7507 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7508 {
7509 tree imp_decl;
7510 rtx x;
7511
7512 gcc_assert (SYMBOL_REF_DECL (symbol));
7513 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7514
7515 x = DECL_RTL (imp_decl);
7516 if (want_reg)
7517 x = force_reg (Pmode, x);
7518 return x;
7519 }
7520
7521 /* Try machine-dependent ways of modifying an illegitimate address
7522 to be legitimate. If we find one, return the new, valid address.
7523 This macro is used in only one place: `memory_address' in explow.c.
7524
7525 OLDX is the address as it was before break_out_memory_refs was called.
7526 In some cases it is useful to look at this to decide what needs to be done.
7527
7528 MODE and WIN are passed so that this macro can use
7529 GO_IF_LEGITIMATE_ADDRESS.
7530
7531 It is always safe for this macro to do nothing. It exists to recognize
7532 opportunities to optimize the output.
7533
7534 For the 80386, we handle X+REG by loading X into a register R and
7535 using R+REG. R will go in a general reg and indexing will be used.
7536 However, if REG is a broken-out memory address or multiplication,
7537 nothing needs to be done because REG can certainly go in a general reg.
7538
7539 When -fpic is used, special handling is needed for symbolic references.
7540 See comments by legitimize_pic_address in i386.c for details. */
7541
7542 rtx
7543 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7544 {
7545 int changed = 0;
7546 unsigned log;
7547
7548 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7549 if (log)
7550 return legitimize_tls_address (x, log, false);
7551 if (GET_CODE (x) == CONST
7552 && GET_CODE (XEXP (x, 0)) == PLUS
7553 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7554 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7555 {
7556 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7557 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7558 }
7559
7560 if (flag_pic && SYMBOLIC_CONST (x))
7561 return legitimize_pic_address (x, 0);
7562
7563 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7564 {
7565 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7566 return legitimize_dllimport_symbol (x, true);
7567 if (GET_CODE (x) == CONST
7568 && GET_CODE (XEXP (x, 0)) == PLUS
7569 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7570 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7571 {
7572 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7573 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7574 }
7575 }
7576
7577 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7578 if (GET_CODE (x) == ASHIFT
7579 && CONST_INT_P (XEXP (x, 1))
7580 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7581 {
7582 changed = 1;
7583 log = INTVAL (XEXP (x, 1));
7584 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7585 GEN_INT (1 << log));
7586 }
7587
7588 if (GET_CODE (x) == PLUS)
7589 {
7590 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7591
7592 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7593 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7594 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7595 {
7596 changed = 1;
7597 log = INTVAL (XEXP (XEXP (x, 0), 1));
7598 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7599 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7600 GEN_INT (1 << log));
7601 }
7602
7603 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7604 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7605 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7606 {
7607 changed = 1;
7608 log = INTVAL (XEXP (XEXP (x, 1), 1));
7609 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7610 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7611 GEN_INT (1 << log));
7612 }
7613
7614 /* Put multiply first if it isn't already. */
7615 if (GET_CODE (XEXP (x, 1)) == MULT)
7616 {
7617 rtx tmp = XEXP (x, 0);
7618 XEXP (x, 0) = XEXP (x, 1);
7619 XEXP (x, 1) = tmp;
7620 changed = 1;
7621 }
7622
7623 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7624 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7625 created by virtual register instantiation, register elimination, and
7626 similar optimizations. */
7627 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7628 {
7629 changed = 1;
7630 x = gen_rtx_PLUS (Pmode,
7631 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7632 XEXP (XEXP (x, 1), 0)),
7633 XEXP (XEXP (x, 1), 1));
7634 }
7635
7636 /* Canonicalize
7637 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7638 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7639 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7640 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7641 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7642 && CONSTANT_P (XEXP (x, 1)))
7643 {
7644 rtx constant;
7645 rtx other = NULL_RTX;
7646
7647 if (CONST_INT_P (XEXP (x, 1)))
7648 {
7649 constant = XEXP (x, 1);
7650 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7651 }
7652 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7653 {
7654 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7655 other = XEXP (x, 1);
7656 }
7657 else
7658 constant = 0;
7659
7660 if (constant)
7661 {
7662 changed = 1;
7663 x = gen_rtx_PLUS (Pmode,
7664 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7665 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7666 plus_constant (other, INTVAL (constant)));
7667 }
7668 }
7669
7670 if (changed && legitimate_address_p (mode, x, FALSE))
7671 return x;
7672
7673 if (GET_CODE (XEXP (x, 0)) == MULT)
7674 {
7675 changed = 1;
7676 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7677 }
7678
7679 if (GET_CODE (XEXP (x, 1)) == MULT)
7680 {
7681 changed = 1;
7682 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7683 }
7684
7685 if (changed
7686 && REG_P (XEXP (x, 1))
7687 && REG_P (XEXP (x, 0)))
7688 return x;
7689
7690 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7691 {
7692 changed = 1;
7693 x = legitimize_pic_address (x, 0);
7694 }
7695
7696 if (changed && legitimate_address_p (mode, x, FALSE))
7697 return x;
7698
7699 if (REG_P (XEXP (x, 0)))
7700 {
7701 rtx temp = gen_reg_rtx (Pmode);
7702 rtx val = force_operand (XEXP (x, 1), temp);
7703 if (val != temp)
7704 emit_move_insn (temp, val);
7705
7706 XEXP (x, 1) = temp;
7707 return x;
7708 }
7709
7710 else if (REG_P (XEXP (x, 1)))
7711 {
7712 rtx temp = gen_reg_rtx (Pmode);
7713 rtx val = force_operand (XEXP (x, 0), temp);
7714 if (val != temp)
7715 emit_move_insn (temp, val);
7716
7717 XEXP (x, 0) = temp;
7718 return x;
7719 }
7720 }
7721
7722 return x;
7723 }
7724 \f
7725 /* Print an integer constant expression in assembler syntax. Addition
7726 and subtraction are the only arithmetic that may appear in these
7727 expressions. FILE is the stdio stream to write to, X is the rtx, and
7728 CODE is the operand print code from the output string. */
7729
7730 static void
7731 output_pic_addr_const (FILE *file, rtx x, int code)
7732 {
7733 char buf[256];
7734
7735 switch (GET_CODE (x))
7736 {
7737 case PC:
7738 gcc_assert (flag_pic);
7739 putc ('.', file);
7740 break;
7741
7742 case SYMBOL_REF:
7743 if (! TARGET_MACHO || TARGET_64BIT)
7744 output_addr_const (file, x);
7745 else
7746 {
7747 const char *name = XSTR (x, 0);
7748
7749 /* Mark the decl as referenced so that cgraph will
7750 output the function. */
7751 if (SYMBOL_REF_DECL (x))
7752 mark_decl_referenced (SYMBOL_REF_DECL (x));
7753
7754 #if TARGET_MACHO
7755 if (MACHOPIC_INDIRECT
7756 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7757 name = machopic_indirection_name (x, /*stub_p=*/true);
7758 #endif
7759 assemble_name (file, name);
7760 }
7761 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7762 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7763 fputs ("@PLT", file);
7764 break;
7765
7766 case LABEL_REF:
7767 x = XEXP (x, 0);
7768 /* FALLTHRU */
7769 case CODE_LABEL:
7770 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7771 assemble_name (asm_out_file, buf);
7772 break;
7773
7774 case CONST_INT:
7775 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7776 break;
7777
7778 case CONST:
7779 /* This used to output parentheses around the expression,
7780 but that does not work on the 386 (either ATT or BSD assembler). */
7781 output_pic_addr_const (file, XEXP (x, 0), code);
7782 break;
7783
7784 case CONST_DOUBLE:
7785 if (GET_MODE (x) == VOIDmode)
7786 {
7787 /* We can use %d if the number is <32 bits and positive. */
7788 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7789 fprintf (file, "0x%lx%08lx",
7790 (unsigned long) CONST_DOUBLE_HIGH (x),
7791 (unsigned long) CONST_DOUBLE_LOW (x));
7792 else
7793 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7794 }
7795 else
7796 /* We can't handle floating point constants;
7797 PRINT_OPERAND must handle them. */
7798 output_operand_lossage ("floating constant misused");
7799 break;
7800
7801 case PLUS:
7802 /* Some assemblers need integer constants to appear first. */
7803 if (CONST_INT_P (XEXP (x, 0)))
7804 {
7805 output_pic_addr_const (file, XEXP (x, 0), code);
7806 putc ('+', file);
7807 output_pic_addr_const (file, XEXP (x, 1), code);
7808 }
7809 else
7810 {
7811 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7812 output_pic_addr_const (file, XEXP (x, 1), code);
7813 putc ('+', file);
7814 output_pic_addr_const (file, XEXP (x, 0), code);
7815 }
7816 break;
7817
7818 case MINUS:
7819 if (!TARGET_MACHO)
7820 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7821 output_pic_addr_const (file, XEXP (x, 0), code);
7822 putc ('-', file);
7823 output_pic_addr_const (file, XEXP (x, 1), code);
7824 if (!TARGET_MACHO)
7825 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7826 break;
7827
7828 case UNSPEC:
7829 gcc_assert (XVECLEN (x, 0) == 1);
7830 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7831 switch (XINT (x, 1))
7832 {
7833 case UNSPEC_GOT:
7834 fputs ("@GOT", file);
7835 break;
7836 case UNSPEC_GOTOFF:
7837 fputs ("@GOTOFF", file);
7838 break;
7839 case UNSPEC_PLTOFF:
7840 fputs ("@PLTOFF", file);
7841 break;
7842 case UNSPEC_GOTPCREL:
7843 fputs ("@GOTPCREL(%rip)", file);
7844 break;
7845 case UNSPEC_GOTTPOFF:
7846 /* FIXME: This might be @TPOFF in Sun ld too. */
7847 fputs ("@GOTTPOFF", file);
7848 break;
7849 case UNSPEC_TPOFF:
7850 fputs ("@TPOFF", file);
7851 break;
7852 case UNSPEC_NTPOFF:
7853 if (TARGET_64BIT)
7854 fputs ("@TPOFF", file);
7855 else
7856 fputs ("@NTPOFF", file);
7857 break;
7858 case UNSPEC_DTPOFF:
7859 fputs ("@DTPOFF", file);
7860 break;
7861 case UNSPEC_GOTNTPOFF:
7862 if (TARGET_64BIT)
7863 fputs ("@GOTTPOFF(%rip)", file);
7864 else
7865 fputs ("@GOTNTPOFF", file);
7866 break;
7867 case UNSPEC_INDNTPOFF:
7868 fputs ("@INDNTPOFF", file);
7869 break;
7870 default:
7871 output_operand_lossage ("invalid UNSPEC as operand");
7872 break;
7873 }
7874 break;
7875
7876 default:
7877 output_operand_lossage ("invalid expression as operand");
7878 }
7879 }
7880
7881 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7882 We need to emit DTP-relative relocations. */
7883
7884 static void ATTRIBUTE_UNUSED
7885 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7886 {
7887 fputs (ASM_LONG, file);
7888 output_addr_const (file, x);
7889 fputs ("@DTPOFF", file);
7890 switch (size)
7891 {
7892 case 4:
7893 break;
7894 case 8:
7895 fputs (", 0", file);
7896 break;
7897 default:
7898 gcc_unreachable ();
7899 }
7900 }
7901
7902 /* In the name of slightly smaller debug output, and to cater to
7903 general assembler lossage, recognize PIC+GOTOFF and turn it back
7904 into a direct symbol reference.
7905
7906 On Darwin, this is necessary to avoid a crash, because Darwin
7907 has a different PIC label for each routine but the DWARF debugging
7908 information is not associated with any particular routine, so it's
7909 necessary to remove references to the PIC label from RTL stored by
7910 the DWARF output code. */
7911
7912 static rtx
7913 ix86_delegitimize_address (rtx orig_x)
7914 {
7915 rtx x = orig_x;
7916 /* reg_addend is NULL or a multiple of some register. */
7917 rtx reg_addend = NULL_RTX;
7918 /* const_addend is NULL or a const_int. */
7919 rtx const_addend = NULL_RTX;
7920 /* This is the result, or NULL. */
7921 rtx result = NULL_RTX;
7922
7923 if (MEM_P (x))
7924 x = XEXP (x, 0);
7925
7926 if (TARGET_64BIT)
7927 {
7928 if (GET_CODE (x) != CONST
7929 || GET_CODE (XEXP (x, 0)) != UNSPEC
7930 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7931 || !MEM_P (orig_x))
7932 return orig_x;
7933 return XVECEXP (XEXP (x, 0), 0, 0);
7934 }
7935
7936 if (GET_CODE (x) != PLUS
7937 || GET_CODE (XEXP (x, 1)) != CONST)
7938 return orig_x;
7939
7940 if (REG_P (XEXP (x, 0))
7941 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7942 /* %ebx + GOT/GOTOFF */
7943 ;
7944 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7945 {
7946 /* %ebx + %reg * scale + GOT/GOTOFF */
7947 reg_addend = XEXP (x, 0);
7948 if (REG_P (XEXP (reg_addend, 0))
7949 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7950 reg_addend = XEXP (reg_addend, 1);
7951 else if (REG_P (XEXP (reg_addend, 1))
7952 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7953 reg_addend = XEXP (reg_addend, 0);
7954 else
7955 return orig_x;
7956 if (!REG_P (reg_addend)
7957 && GET_CODE (reg_addend) != MULT
7958 && GET_CODE (reg_addend) != ASHIFT)
7959 return orig_x;
7960 }
7961 else
7962 return orig_x;
7963
7964 x = XEXP (XEXP (x, 1), 0);
7965 if (GET_CODE (x) == PLUS
7966 && CONST_INT_P (XEXP (x, 1)))
7967 {
7968 const_addend = XEXP (x, 1);
7969 x = XEXP (x, 0);
7970 }
7971
7972 if (GET_CODE (x) == UNSPEC
7973 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7974 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7975 result = XVECEXP (x, 0, 0);
7976
7977 if (TARGET_MACHO && darwin_local_data_pic (x)
7978 && !MEM_P (orig_x))
7979 result = XEXP (x, 0);
7980
7981 if (! result)
7982 return orig_x;
7983
7984 if (const_addend)
7985 result = gen_rtx_PLUS (Pmode, result, const_addend);
7986 if (reg_addend)
7987 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7988 return result;
7989 }
7990
7991 /* If X is a machine specific address (i.e. a symbol or label being
7992 referenced as a displacement from the GOT implemented using an
7993 UNSPEC), then return the base term. Otherwise return X. */
7994
7995 rtx
7996 ix86_find_base_term (rtx x)
7997 {
7998 rtx term;
7999
8000 if (TARGET_64BIT)
8001 {
8002 if (GET_CODE (x) != CONST)
8003 return x;
8004 term = XEXP (x, 0);
8005 if (GET_CODE (term) == PLUS
8006 && (CONST_INT_P (XEXP (term, 1))
8007 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8008 term = XEXP (term, 0);
8009 if (GET_CODE (term) != UNSPEC
8010 || XINT (term, 1) != UNSPEC_GOTPCREL)
8011 return x;
8012
8013 term = XVECEXP (term, 0, 0);
8014
8015 if (GET_CODE (term) != SYMBOL_REF
8016 && GET_CODE (term) != LABEL_REF)
8017 return x;
8018
8019 return term;
8020 }
8021
8022 term = ix86_delegitimize_address (x);
8023
8024 if (GET_CODE (term) != SYMBOL_REF
8025 && GET_CODE (term) != LABEL_REF)
8026 return x;
8027
8028 return term;
8029 }
8030 \f
8031 static void
8032 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8033 int fp, FILE *file)
8034 {
8035 const char *suffix;
8036
8037 if (mode == CCFPmode || mode == CCFPUmode)
8038 {
8039 enum rtx_code second_code, bypass_code;
8040 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8041 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8042 code = ix86_fp_compare_code_to_integer (code);
8043 mode = CCmode;
8044 }
8045 if (reverse)
8046 code = reverse_condition (code);
8047
8048 switch (code)
8049 {
8050 case EQ:
8051 suffix = "e";
8052 break;
8053 case NE:
8054 suffix = "ne";
8055 break;
8056 case GT:
8057 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8058 suffix = "g";
8059 break;
8060 case GTU:
8061 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8062 Those same assemblers have the same but opposite lossage on cmov. */
8063 gcc_assert (mode == CCmode);
8064 suffix = fp ? "nbe" : "a";
8065 break;
8066 case LT:
8067 switch (mode)
8068 {
8069 case CCNOmode:
8070 case CCGOCmode:
8071 suffix = "s";
8072 break;
8073
8074 case CCmode:
8075 case CCGCmode:
8076 suffix = "l";
8077 break;
8078
8079 default:
8080 gcc_unreachable ();
8081 }
8082 break;
8083 case LTU:
8084 gcc_assert (mode == CCmode);
8085 suffix = "b";
8086 break;
8087 case GE:
8088 switch (mode)
8089 {
8090 case CCNOmode:
8091 case CCGOCmode:
8092 suffix = "ns";
8093 break;
8094
8095 case CCmode:
8096 case CCGCmode:
8097 suffix = "ge";
8098 break;
8099
8100 default:
8101 gcc_unreachable ();
8102 }
8103 break;
8104 case GEU:
8105 /* ??? As above. */
8106 gcc_assert (mode == CCmode);
8107 suffix = fp ? "nb" : "ae";
8108 break;
8109 case LE:
8110 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8111 suffix = "le";
8112 break;
8113 case LEU:
8114 gcc_assert (mode == CCmode);
8115 suffix = "be";
8116 break;
8117 case UNORDERED:
8118 suffix = fp ? "u" : "p";
8119 break;
8120 case ORDERED:
8121 suffix = fp ? "nu" : "np";
8122 break;
8123 default:
8124 gcc_unreachable ();
8125 }
8126 fputs (suffix, file);
8127 }
8128
8129 /* Print the name of register X to FILE based on its machine mode and number.
8130 If CODE is 'w', pretend the mode is HImode.
8131 If CODE is 'b', pretend the mode is QImode.
8132 If CODE is 'k', pretend the mode is SImode.
8133 If CODE is 'q', pretend the mode is DImode.
8134 If CODE is 'h', pretend the reg is the 'high' byte register.
8135 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8136
8137 void
8138 print_reg (rtx x, int code, FILE *file)
8139 {
8140 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8141 && REGNO (x) != FRAME_POINTER_REGNUM
8142 && REGNO (x) != FLAGS_REG
8143 && REGNO (x) != FPSR_REG
8144 && REGNO (x) != FPCR_REG);
8145
8146 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8147 putc ('%', file);
8148
8149 if (code == 'w' || MMX_REG_P (x))
8150 code = 2;
8151 else if (code == 'b')
8152 code = 1;
8153 else if (code == 'k')
8154 code = 4;
8155 else if (code == 'q')
8156 code = 8;
8157 else if (code == 'y')
8158 code = 3;
8159 else if (code == 'h')
8160 code = 0;
8161 else
8162 code = GET_MODE_SIZE (GET_MODE (x));
8163
8164 /* Irritatingly, AMD extended registers use different naming convention
8165 from the normal registers. */
8166 if (REX_INT_REG_P (x))
8167 {
8168 gcc_assert (TARGET_64BIT);
8169 switch (code)
8170 {
8171 case 0:
8172 error ("extended registers have no high halves");
8173 break;
8174 case 1:
8175 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8176 break;
8177 case 2:
8178 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8179 break;
8180 case 4:
8181 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8182 break;
8183 case 8:
8184 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8185 break;
8186 default:
8187 error ("unsupported operand size for extended register");
8188 break;
8189 }
8190 return;
8191 }
8192 switch (code)
8193 {
8194 case 3:
8195 if (STACK_TOP_P (x))
8196 {
8197 fputs ("st(0)", file);
8198 break;
8199 }
8200 /* FALLTHRU */
8201 case 8:
8202 case 4:
8203 case 12:
8204 if (! ANY_FP_REG_P (x))
8205 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8206 /* FALLTHRU */
8207 case 16:
8208 case 2:
8209 normal:
8210 fputs (hi_reg_name[REGNO (x)], file);
8211 break;
8212 case 1:
8213 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8214 goto normal;
8215 fputs (qi_reg_name[REGNO (x)], file);
8216 break;
8217 case 0:
8218 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8219 goto normal;
8220 fputs (qi_high_reg_name[REGNO (x)], file);
8221 break;
8222 default:
8223 gcc_unreachable ();
8224 }
8225 }
8226
8227 /* Locate some local-dynamic symbol still in use by this function
8228 so that we can print its name in some tls_local_dynamic_base
8229 pattern. */
8230
8231 static int
8232 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8233 {
8234 rtx x = *px;
8235
8236 if (GET_CODE (x) == SYMBOL_REF
8237 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8238 {
8239 cfun->machine->some_ld_name = XSTR (x, 0);
8240 return 1;
8241 }
8242
8243 return 0;
8244 }
8245
8246 static const char *
8247 get_some_local_dynamic_name (void)
8248 {
8249 rtx insn;
8250
8251 if (cfun->machine->some_ld_name)
8252 return cfun->machine->some_ld_name;
8253
8254 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8255 if (INSN_P (insn)
8256 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8257 return cfun->machine->some_ld_name;
8258
8259 gcc_unreachable ();
8260 }
8261
8262 /* Meaning of CODE:
8263 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8264 C -- print opcode suffix for set/cmov insn.
8265 c -- like C, but print reversed condition
8266 F,f -- likewise, but for floating-point.
8267 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8268 otherwise nothing
8269 R -- print the prefix for register names.
8270 z -- print the opcode suffix for the size of the current operand.
8271 * -- print a star (in certain assembler syntax)
8272 A -- print an absolute memory reference.
8273 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8274 s -- print a shift double count, followed by the assemblers argument
8275 delimiter.
8276 b -- print the QImode name of the register for the indicated operand.
8277 %b0 would print %al if operands[0] is reg 0.
8278 w -- likewise, print the HImode name of the register.
8279 k -- likewise, print the SImode name of the register.
8280 q -- likewise, print the DImode name of the register.
8281 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8282 y -- print "st(0)" instead of "st" as a register.
8283 D -- print condition for SSE cmp instruction.
8284 P -- if PIC, print an @PLT suffix.
8285 X -- don't print any sort of PIC '@' suffix for a symbol.
8286 & -- print some in-use local-dynamic symbol name.
8287 H -- print a memory address offset by 8; used for sse high-parts
8288 */
8289
8290 void
8291 print_operand (FILE *file, rtx x, int code)
8292 {
8293 if (code)
8294 {
8295 switch (code)
8296 {
8297 case '*':
8298 if (ASSEMBLER_DIALECT == ASM_ATT)
8299 putc ('*', file);
8300 return;
8301
8302 case '&':
8303 assemble_name (file, get_some_local_dynamic_name ());
8304 return;
8305
8306 case 'A':
8307 switch (ASSEMBLER_DIALECT)
8308 {
8309 case ASM_ATT:
8310 putc ('*', file);
8311 break;
8312
8313 case ASM_INTEL:
8314 /* Intel syntax. For absolute addresses, registers should not
8315 be surrounded by braces. */
8316 if (!REG_P (x))
8317 {
8318 putc ('[', file);
8319 PRINT_OPERAND (file, x, 0);
8320 putc (']', file);
8321 return;
8322 }
8323 break;
8324
8325 default:
8326 gcc_unreachable ();
8327 }
8328
8329 PRINT_OPERAND (file, x, 0);
8330 return;
8331
8332
8333 case 'L':
8334 if (ASSEMBLER_DIALECT == ASM_ATT)
8335 putc ('l', file);
8336 return;
8337
8338 case 'W':
8339 if (ASSEMBLER_DIALECT == ASM_ATT)
8340 putc ('w', file);
8341 return;
8342
8343 case 'B':
8344 if (ASSEMBLER_DIALECT == ASM_ATT)
8345 putc ('b', file);
8346 return;
8347
8348 case 'Q':
8349 if (ASSEMBLER_DIALECT == ASM_ATT)
8350 putc ('l', file);
8351 return;
8352
8353 case 'S':
8354 if (ASSEMBLER_DIALECT == ASM_ATT)
8355 putc ('s', file);
8356 return;
8357
8358 case 'T':
8359 if (ASSEMBLER_DIALECT == ASM_ATT)
8360 putc ('t', file);
8361 return;
8362
8363 case 'z':
8364 /* 387 opcodes don't get size suffixes if the operands are
8365 registers. */
8366 if (STACK_REG_P (x))
8367 return;
8368
8369 /* Likewise if using Intel opcodes. */
8370 if (ASSEMBLER_DIALECT == ASM_INTEL)
8371 return;
8372
8373 /* This is the size of op from size of operand. */
8374 switch (GET_MODE_SIZE (GET_MODE (x)))
8375 {
8376 case 1:
8377 putc ('b', file);
8378 return;
8379
8380 case 2:
8381 if (MEM_P (x))
8382 {
8383 #ifdef HAVE_GAS_FILDS_FISTS
8384 putc ('s', file);
8385 #endif
8386 return;
8387 }
8388 else
8389 putc ('w', file);
8390 return;
8391
8392 case 4:
8393 if (GET_MODE (x) == SFmode)
8394 {
8395 putc ('s', file);
8396 return;
8397 }
8398 else
8399 putc ('l', file);
8400 return;
8401
8402 case 12:
8403 case 16:
8404 putc ('t', file);
8405 return;
8406
8407 case 8:
8408 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8409 {
8410 #ifdef GAS_MNEMONICS
8411 putc ('q', file);
8412 #else
8413 putc ('l', file);
8414 putc ('l', file);
8415 #endif
8416 }
8417 else
8418 putc ('l', file);
8419 return;
8420
8421 default:
8422 gcc_unreachable ();
8423 }
8424
8425 case 'b':
8426 case 'w':
8427 case 'k':
8428 case 'q':
8429 case 'h':
8430 case 'y':
8431 case 'X':
8432 case 'P':
8433 break;
8434
8435 case 's':
8436 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8437 {
8438 PRINT_OPERAND (file, x, 0);
8439 putc (',', file);
8440 }
8441 return;
8442
8443 case 'D':
8444 /* Little bit of braindamage here. The SSE compare instructions
8445 does use completely different names for the comparisons that the
8446 fp conditional moves. */
8447 switch (GET_CODE (x))
8448 {
8449 case EQ:
8450 case UNEQ:
8451 fputs ("eq", file);
8452 break;
8453 case LT:
8454 case UNLT:
8455 fputs ("lt", file);
8456 break;
8457 case LE:
8458 case UNLE:
8459 fputs ("le", file);
8460 break;
8461 case UNORDERED:
8462 fputs ("unord", file);
8463 break;
8464 case NE:
8465 case LTGT:
8466 fputs ("neq", file);
8467 break;
8468 case UNGE:
8469 case GE:
8470 fputs ("nlt", file);
8471 break;
8472 case UNGT:
8473 case GT:
8474 fputs ("nle", file);
8475 break;
8476 case ORDERED:
8477 fputs ("ord", file);
8478 break;
8479 default:
8480 gcc_unreachable ();
8481 }
8482 return;
8483 case 'O':
8484 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8485 if (ASSEMBLER_DIALECT == ASM_ATT)
8486 {
8487 switch (GET_MODE (x))
8488 {
8489 case HImode: putc ('w', file); break;
8490 case SImode:
8491 case SFmode: putc ('l', file); break;
8492 case DImode:
8493 case DFmode: putc ('q', file); break;
8494 default: gcc_unreachable ();
8495 }
8496 putc ('.', file);
8497 }
8498 #endif
8499 return;
8500 case 'C':
8501 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8502 return;
8503 case 'F':
8504 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8505 if (ASSEMBLER_DIALECT == ASM_ATT)
8506 putc ('.', file);
8507 #endif
8508 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8509 return;
8510
8511 /* Like above, but reverse condition */
8512 case 'c':
8513 /* Check to see if argument to %c is really a constant
8514 and not a condition code which needs to be reversed. */
8515 if (!COMPARISON_P (x))
8516 {
8517 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8518 return;
8519 }
8520 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8521 return;
8522 case 'f':
8523 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8524 if (ASSEMBLER_DIALECT == ASM_ATT)
8525 putc ('.', file);
8526 #endif
8527 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8528 return;
8529
8530 case 'H':
8531 /* It doesn't actually matter what mode we use here, as we're
8532 only going to use this for printing. */
8533 x = adjust_address_nv (x, DImode, 8);
8534 break;
8535
8536 case '+':
8537 {
8538 rtx x;
8539
8540 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8541 return;
8542
8543 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8544 if (x)
8545 {
8546 int pred_val = INTVAL (XEXP (x, 0));
8547
8548 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8549 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8550 {
8551 int taken = pred_val > REG_BR_PROB_BASE / 2;
8552 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8553
8554 /* Emit hints only in the case default branch prediction
8555 heuristics would fail. */
8556 if (taken != cputaken)
8557 {
8558 /* We use 3e (DS) prefix for taken branches and
8559 2e (CS) prefix for not taken branches. */
8560 if (taken)
8561 fputs ("ds ; ", file);
8562 else
8563 fputs ("cs ; ", file);
8564 }
8565 }
8566 }
8567 return;
8568 }
8569 default:
8570 output_operand_lossage ("invalid operand code '%c'", code);
8571 }
8572 }
8573
8574 if (REG_P (x))
8575 print_reg (x, code, file);
8576
8577 else if (MEM_P (x))
8578 {
8579 /* No `byte ptr' prefix for call instructions. */
8580 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8581 {
8582 const char * size;
8583 switch (GET_MODE_SIZE (GET_MODE (x)))
8584 {
8585 case 1: size = "BYTE"; break;
8586 case 2: size = "WORD"; break;
8587 case 4: size = "DWORD"; break;
8588 case 8: size = "QWORD"; break;
8589 case 12: size = "XWORD"; break;
8590 case 16: size = "XMMWORD"; break;
8591 default:
8592 gcc_unreachable ();
8593 }
8594
8595 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8596 if (code == 'b')
8597 size = "BYTE";
8598 else if (code == 'w')
8599 size = "WORD";
8600 else if (code == 'k')
8601 size = "DWORD";
8602
8603 fputs (size, file);
8604 fputs (" PTR ", file);
8605 }
8606
8607 x = XEXP (x, 0);
8608 /* Avoid (%rip) for call operands. */
8609 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8610 && !CONST_INT_P (x))
8611 output_addr_const (file, x);
8612 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8613 output_operand_lossage ("invalid constraints for operand");
8614 else
8615 output_address (x);
8616 }
8617
8618 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8619 {
8620 REAL_VALUE_TYPE r;
8621 long l;
8622
8623 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8624 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8625
8626 if (ASSEMBLER_DIALECT == ASM_ATT)
8627 putc ('$', file);
8628 fprintf (file, "0x%08lx", l);
8629 }
8630
8631 /* These float cases don't actually occur as immediate operands. */
8632 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8633 {
8634 char dstr[30];
8635
8636 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8637 fprintf (file, "%s", dstr);
8638 }
8639
8640 else if (GET_CODE (x) == CONST_DOUBLE
8641 && GET_MODE (x) == XFmode)
8642 {
8643 char dstr[30];
8644
8645 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8646 fprintf (file, "%s", dstr);
8647 }
8648
8649 else
8650 {
8651 /* We have patterns that allow zero sets of memory, for instance.
8652 In 64-bit mode, we should probably support all 8-byte vectors,
8653 since we can in fact encode that into an immediate. */
8654 if (GET_CODE (x) == CONST_VECTOR)
8655 {
8656 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8657 x = const0_rtx;
8658 }
8659
8660 if (code != 'P')
8661 {
8662 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8663 {
8664 if (ASSEMBLER_DIALECT == ASM_ATT)
8665 putc ('$', file);
8666 }
8667 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8668 || GET_CODE (x) == LABEL_REF)
8669 {
8670 if (ASSEMBLER_DIALECT == ASM_ATT)
8671 putc ('$', file);
8672 else
8673 fputs ("OFFSET FLAT:", file);
8674 }
8675 }
8676 if (CONST_INT_P (x))
8677 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8678 else if (flag_pic)
8679 output_pic_addr_const (file, x, code);
8680 else
8681 output_addr_const (file, x);
8682 }
8683 }
8684 \f
8685 /* Print a memory operand whose address is ADDR. */
8686
8687 void
8688 print_operand_address (FILE *file, rtx addr)
8689 {
8690 struct ix86_address parts;
8691 rtx base, index, disp;
8692 int scale;
8693 int ok = ix86_decompose_address (addr, &parts);
8694
8695 gcc_assert (ok);
8696
8697 base = parts.base;
8698 index = parts.index;
8699 disp = parts.disp;
8700 scale = parts.scale;
8701
8702 switch (parts.seg)
8703 {
8704 case SEG_DEFAULT:
8705 break;
8706 case SEG_FS:
8707 case SEG_GS:
8708 if (USER_LABEL_PREFIX[0] == 0)
8709 putc ('%', file);
8710 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8711 break;
8712 default:
8713 gcc_unreachable ();
8714 }
8715
8716 if (!base && !index)
8717 {
8718 /* Displacement only requires special attention. */
8719
8720 if (CONST_INT_P (disp))
8721 {
8722 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8723 {
8724 if (USER_LABEL_PREFIX[0] == 0)
8725 putc ('%', file);
8726 fputs ("ds:", file);
8727 }
8728 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8729 }
8730 else if (flag_pic)
8731 output_pic_addr_const (file, disp, 0);
8732 else
8733 output_addr_const (file, disp);
8734
8735 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8736 if (TARGET_64BIT)
8737 {
8738 if (GET_CODE (disp) == CONST
8739 && GET_CODE (XEXP (disp, 0)) == PLUS
8740 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8741 disp = XEXP (XEXP (disp, 0), 0);
8742 if (GET_CODE (disp) == LABEL_REF
8743 || (GET_CODE (disp) == SYMBOL_REF
8744 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8745 fputs ("(%rip)", file);
8746 }
8747 }
8748 else
8749 {
8750 if (ASSEMBLER_DIALECT == ASM_ATT)
8751 {
8752 if (disp)
8753 {
8754 if (flag_pic)
8755 output_pic_addr_const (file, disp, 0);
8756 else if (GET_CODE (disp) == LABEL_REF)
8757 output_asm_label (disp);
8758 else
8759 output_addr_const (file, disp);
8760 }
8761
8762 putc ('(', file);
8763 if (base)
8764 print_reg (base, 0, file);
8765 if (index)
8766 {
8767 putc (',', file);
8768 print_reg (index, 0, file);
8769 if (scale != 1)
8770 fprintf (file, ",%d", scale);
8771 }
8772 putc (')', file);
8773 }
8774 else
8775 {
8776 rtx offset = NULL_RTX;
8777
8778 if (disp)
8779 {
8780 /* Pull out the offset of a symbol; print any symbol itself. */
8781 if (GET_CODE (disp) == CONST
8782 && GET_CODE (XEXP (disp, 0)) == PLUS
8783 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8784 {
8785 offset = XEXP (XEXP (disp, 0), 1);
8786 disp = gen_rtx_CONST (VOIDmode,
8787 XEXP (XEXP (disp, 0), 0));
8788 }
8789
8790 if (flag_pic)
8791 output_pic_addr_const (file, disp, 0);
8792 else if (GET_CODE (disp) == LABEL_REF)
8793 output_asm_label (disp);
8794 else if (CONST_INT_P (disp))
8795 offset = disp;
8796 else
8797 output_addr_const (file, disp);
8798 }
8799
8800 putc ('[', file);
8801 if (base)
8802 {
8803 print_reg (base, 0, file);
8804 if (offset)
8805 {
8806 if (INTVAL (offset) >= 0)
8807 putc ('+', file);
8808 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8809 }
8810 }
8811 else if (offset)
8812 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8813 else
8814 putc ('0', file);
8815
8816 if (index)
8817 {
8818 putc ('+', file);
8819 print_reg (index, 0, file);
8820 if (scale != 1)
8821 fprintf (file, "*%d", scale);
8822 }
8823 putc (']', file);
8824 }
8825 }
8826 }
8827
8828 bool
8829 output_addr_const_extra (FILE *file, rtx x)
8830 {
8831 rtx op;
8832
8833 if (GET_CODE (x) != UNSPEC)
8834 return false;
8835
8836 op = XVECEXP (x, 0, 0);
8837 switch (XINT (x, 1))
8838 {
8839 case UNSPEC_GOTTPOFF:
8840 output_addr_const (file, op);
8841 /* FIXME: This might be @TPOFF in Sun ld. */
8842 fputs ("@GOTTPOFF", file);
8843 break;
8844 case UNSPEC_TPOFF:
8845 output_addr_const (file, op);
8846 fputs ("@TPOFF", file);
8847 break;
8848 case UNSPEC_NTPOFF:
8849 output_addr_const (file, op);
8850 if (TARGET_64BIT)
8851 fputs ("@TPOFF", file);
8852 else
8853 fputs ("@NTPOFF", file);
8854 break;
8855 case UNSPEC_DTPOFF:
8856 output_addr_const (file, op);
8857 fputs ("@DTPOFF", file);
8858 break;
8859 case UNSPEC_GOTNTPOFF:
8860 output_addr_const (file, op);
8861 if (TARGET_64BIT)
8862 fputs ("@GOTTPOFF(%rip)", file);
8863 else
8864 fputs ("@GOTNTPOFF", file);
8865 break;
8866 case UNSPEC_INDNTPOFF:
8867 output_addr_const (file, op);
8868 fputs ("@INDNTPOFF", file);
8869 break;
8870
8871 default:
8872 return false;
8873 }
8874
8875 return true;
8876 }
8877 \f
8878 /* Split one or more DImode RTL references into pairs of SImode
8879 references. The RTL can be REG, offsettable MEM, integer constant, or
8880 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8881 split and "num" is its length. lo_half and hi_half are output arrays
8882 that parallel "operands". */
8883
8884 void
8885 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8886 {
8887 while (num--)
8888 {
8889 rtx op = operands[num];
8890
8891 /* simplify_subreg refuse to split volatile memory addresses,
8892 but we still have to handle it. */
8893 if (MEM_P (op))
8894 {
8895 lo_half[num] = adjust_address (op, SImode, 0);
8896 hi_half[num] = adjust_address (op, SImode, 4);
8897 }
8898 else
8899 {
8900 lo_half[num] = simplify_gen_subreg (SImode, op,
8901 GET_MODE (op) == VOIDmode
8902 ? DImode : GET_MODE (op), 0);
8903 hi_half[num] = simplify_gen_subreg (SImode, op,
8904 GET_MODE (op) == VOIDmode
8905 ? DImode : GET_MODE (op), 4);
8906 }
8907 }
8908 }
8909 /* Split one or more TImode RTL references into pairs of DImode
8910 references. The RTL can be REG, offsettable MEM, integer constant, or
8911 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8912 split and "num" is its length. lo_half and hi_half are output arrays
8913 that parallel "operands". */
8914
8915 void
8916 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8917 {
8918 while (num--)
8919 {
8920 rtx op = operands[num];
8921
8922 /* simplify_subreg refuse to split volatile memory addresses, but we
8923 still have to handle it. */
8924 if (MEM_P (op))
8925 {
8926 lo_half[num] = adjust_address (op, DImode, 0);
8927 hi_half[num] = adjust_address (op, DImode, 8);
8928 }
8929 else
8930 {
8931 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8932 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8933 }
8934 }
8935 }
8936 \f
8937 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8938 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8939 is the expression of the binary operation. The output may either be
8940 emitted here, or returned to the caller, like all output_* functions.
8941
8942 There is no guarantee that the operands are the same mode, as they
8943 might be within FLOAT or FLOAT_EXTEND expressions. */
8944
8945 #ifndef SYSV386_COMPAT
8946 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8947 wants to fix the assemblers because that causes incompatibility
8948 with gcc. No-one wants to fix gcc because that causes
8949 incompatibility with assemblers... You can use the option of
8950 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8951 #define SYSV386_COMPAT 1
8952 #endif
8953
8954 const char *
8955 output_387_binary_op (rtx insn, rtx *operands)
8956 {
8957 static char buf[30];
8958 const char *p;
8959 const char *ssep;
8960 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8961
8962 #ifdef ENABLE_CHECKING
8963 /* Even if we do not want to check the inputs, this documents input
8964 constraints. Which helps in understanding the following code. */
8965 if (STACK_REG_P (operands[0])
8966 && ((REG_P (operands[1])
8967 && REGNO (operands[0]) == REGNO (operands[1])
8968 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8969 || (REG_P (operands[2])
8970 && REGNO (operands[0]) == REGNO (operands[2])
8971 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8972 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8973 ; /* ok */
8974 else
8975 gcc_assert (is_sse);
8976 #endif
8977
8978 switch (GET_CODE (operands[3]))
8979 {
8980 case PLUS:
8981 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8982 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8983 p = "fiadd";
8984 else
8985 p = "fadd";
8986 ssep = "add";
8987 break;
8988
8989 case MINUS:
8990 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8991 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8992 p = "fisub";
8993 else
8994 p = "fsub";
8995 ssep = "sub";
8996 break;
8997
8998 case MULT:
8999 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9000 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9001 p = "fimul";
9002 else
9003 p = "fmul";
9004 ssep = "mul";
9005 break;
9006
9007 case DIV:
9008 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9009 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9010 p = "fidiv";
9011 else
9012 p = "fdiv";
9013 ssep = "div";
9014 break;
9015
9016 default:
9017 gcc_unreachable ();
9018 }
9019
9020 if (is_sse)
9021 {
9022 strcpy (buf, ssep);
9023 if (GET_MODE (operands[0]) == SFmode)
9024 strcat (buf, "ss\t{%2, %0|%0, %2}");
9025 else
9026 strcat (buf, "sd\t{%2, %0|%0, %2}");
9027 return buf;
9028 }
9029 strcpy (buf, p);
9030
9031 switch (GET_CODE (operands[3]))
9032 {
9033 case MULT:
9034 case PLUS:
9035 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9036 {
9037 rtx temp = operands[2];
9038 operands[2] = operands[1];
9039 operands[1] = temp;
9040 }
9041
9042 /* know operands[0] == operands[1]. */
9043
9044 if (MEM_P (operands[2]))
9045 {
9046 p = "%z2\t%2";
9047 break;
9048 }
9049
9050 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9051 {
9052 if (STACK_TOP_P (operands[0]))
9053 /* How is it that we are storing to a dead operand[2]?
9054 Well, presumably operands[1] is dead too. We can't
9055 store the result to st(0) as st(0) gets popped on this
9056 instruction. Instead store to operands[2] (which I
9057 think has to be st(1)). st(1) will be popped later.
9058 gcc <= 2.8.1 didn't have this check and generated
9059 assembly code that the Unixware assembler rejected. */
9060 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9061 else
9062 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9063 break;
9064 }
9065
9066 if (STACK_TOP_P (operands[0]))
9067 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9068 else
9069 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9070 break;
9071
9072 case MINUS:
9073 case DIV:
9074 if (MEM_P (operands[1]))
9075 {
9076 p = "r%z1\t%1";
9077 break;
9078 }
9079
9080 if (MEM_P (operands[2]))
9081 {
9082 p = "%z2\t%2";
9083 break;
9084 }
9085
9086 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9087 {
9088 #if SYSV386_COMPAT
9089 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9090 derived assemblers, confusingly reverse the direction of
9091 the operation for fsub{r} and fdiv{r} when the
9092 destination register is not st(0). The Intel assembler
9093 doesn't have this brain damage. Read !SYSV386_COMPAT to
9094 figure out what the hardware really does. */
9095 if (STACK_TOP_P (operands[0]))
9096 p = "{p\t%0, %2|rp\t%2, %0}";
9097 else
9098 p = "{rp\t%2, %0|p\t%0, %2}";
9099 #else
9100 if (STACK_TOP_P (operands[0]))
9101 /* As above for fmul/fadd, we can't store to st(0). */
9102 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9103 else
9104 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9105 #endif
9106 break;
9107 }
9108
9109 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9110 {
9111 #if SYSV386_COMPAT
9112 if (STACK_TOP_P (operands[0]))
9113 p = "{rp\t%0, %1|p\t%1, %0}";
9114 else
9115 p = "{p\t%1, %0|rp\t%0, %1}";
9116 #else
9117 if (STACK_TOP_P (operands[0]))
9118 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9119 else
9120 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9121 #endif
9122 break;
9123 }
9124
9125 if (STACK_TOP_P (operands[0]))
9126 {
9127 if (STACK_TOP_P (operands[1]))
9128 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9129 else
9130 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9131 break;
9132 }
9133 else if (STACK_TOP_P (operands[1]))
9134 {
9135 #if SYSV386_COMPAT
9136 p = "{\t%1, %0|r\t%0, %1}";
9137 #else
9138 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9139 #endif
9140 }
9141 else
9142 {
9143 #if SYSV386_COMPAT
9144 p = "{r\t%2, %0|\t%0, %2}";
9145 #else
9146 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9147 #endif
9148 }
9149 break;
9150
9151 default:
9152 gcc_unreachable ();
9153 }
9154
9155 strcat (buf, p);
9156 return buf;
9157 }
9158
9159 /* Return needed mode for entity in optimize_mode_switching pass. */
9160
9161 int
9162 ix86_mode_needed (int entity, rtx insn)
9163 {
9164 enum attr_i387_cw mode;
9165
9166 /* The mode UNINITIALIZED is used to store control word after a
9167 function call or ASM pattern. The mode ANY specify that function
9168 has no requirements on the control word and make no changes in the
9169 bits we are interested in. */
9170
9171 if (CALL_P (insn)
9172 || (NONJUMP_INSN_P (insn)
9173 && (asm_noperands (PATTERN (insn)) >= 0
9174 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9175 return I387_CW_UNINITIALIZED;
9176
9177 if (recog_memoized (insn) < 0)
9178 return I387_CW_ANY;
9179
9180 mode = get_attr_i387_cw (insn);
9181
9182 switch (entity)
9183 {
9184 case I387_TRUNC:
9185 if (mode == I387_CW_TRUNC)
9186 return mode;
9187 break;
9188
9189 case I387_FLOOR:
9190 if (mode == I387_CW_FLOOR)
9191 return mode;
9192 break;
9193
9194 case I387_CEIL:
9195 if (mode == I387_CW_CEIL)
9196 return mode;
9197 break;
9198
9199 case I387_MASK_PM:
9200 if (mode == I387_CW_MASK_PM)
9201 return mode;
9202 break;
9203
9204 default:
9205 gcc_unreachable ();
9206 }
9207
9208 return I387_CW_ANY;
9209 }
9210
9211 /* Output code to initialize control word copies used by trunc?f?i and
9212 rounding patterns. CURRENT_MODE is set to current control word,
9213 while NEW_MODE is set to new control word. */
9214
9215 void
9216 emit_i387_cw_initialization (int mode)
9217 {
9218 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9219 rtx new_mode;
9220
9221 int slot;
9222
9223 rtx reg = gen_reg_rtx (HImode);
9224
9225 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9226 emit_move_insn (reg, copy_rtx (stored_mode));
9227
9228 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9229 {
9230 switch (mode)
9231 {
9232 case I387_CW_TRUNC:
9233 /* round toward zero (truncate) */
9234 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9235 slot = SLOT_CW_TRUNC;
9236 break;
9237
9238 case I387_CW_FLOOR:
9239 /* round down toward -oo */
9240 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9241 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9242 slot = SLOT_CW_FLOOR;
9243 break;
9244
9245 case I387_CW_CEIL:
9246 /* round up toward +oo */
9247 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9248 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9249 slot = SLOT_CW_CEIL;
9250 break;
9251
9252 case I387_CW_MASK_PM:
9253 /* mask precision exception for nearbyint() */
9254 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9255 slot = SLOT_CW_MASK_PM;
9256 break;
9257
9258 default:
9259 gcc_unreachable ();
9260 }
9261 }
9262 else
9263 {
9264 switch (mode)
9265 {
9266 case I387_CW_TRUNC:
9267 /* round toward zero (truncate) */
9268 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9269 slot = SLOT_CW_TRUNC;
9270 break;
9271
9272 case I387_CW_FLOOR:
9273 /* round down toward -oo */
9274 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9275 slot = SLOT_CW_FLOOR;
9276 break;
9277
9278 case I387_CW_CEIL:
9279 /* round up toward +oo */
9280 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9281 slot = SLOT_CW_CEIL;
9282 break;
9283
9284 case I387_CW_MASK_PM:
9285 /* mask precision exception for nearbyint() */
9286 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9287 slot = SLOT_CW_MASK_PM;
9288 break;
9289
9290 default:
9291 gcc_unreachable ();
9292 }
9293 }
9294
9295 gcc_assert (slot < MAX_386_STACK_LOCALS);
9296
9297 new_mode = assign_386_stack_local (HImode, slot);
9298 emit_move_insn (new_mode, reg);
9299 }
9300
9301 /* Output code for INSN to convert a float to a signed int. OPERANDS
9302 are the insn operands. The output may be [HSD]Imode and the input
9303 operand may be [SDX]Fmode. */
9304
9305 const char *
9306 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9307 {
9308 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9309 int dimode_p = GET_MODE (operands[0]) == DImode;
9310 int round_mode = get_attr_i387_cw (insn);
9311
9312 /* Jump through a hoop or two for DImode, since the hardware has no
9313 non-popping instruction. We used to do this a different way, but
9314 that was somewhat fragile and broke with post-reload splitters. */
9315 if ((dimode_p || fisttp) && !stack_top_dies)
9316 output_asm_insn ("fld\t%y1", operands);
9317
9318 gcc_assert (STACK_TOP_P (operands[1]));
9319 gcc_assert (MEM_P (operands[0]));
9320 gcc_assert (GET_MODE (operands[1]) != TFmode);
9321
9322 if (fisttp)
9323 output_asm_insn ("fisttp%z0\t%0", operands);
9324 else
9325 {
9326 if (round_mode != I387_CW_ANY)
9327 output_asm_insn ("fldcw\t%3", operands);
9328 if (stack_top_dies || dimode_p)
9329 output_asm_insn ("fistp%z0\t%0", operands);
9330 else
9331 output_asm_insn ("fist%z0\t%0", operands);
9332 if (round_mode != I387_CW_ANY)
9333 output_asm_insn ("fldcw\t%2", operands);
9334 }
9335
9336 return "";
9337 }
9338
9339 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9340 have the values zero or one, indicates the ffreep insn's operand
9341 from the OPERANDS array. */
9342
9343 static const char *
9344 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9345 {
9346 if (TARGET_USE_FFREEP)
9347 #if HAVE_AS_IX86_FFREEP
9348 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9349 #else
9350 {
9351 static char retval[] = ".word\t0xc_df";
9352 int regno = REGNO (operands[opno]);
9353
9354 gcc_assert (FP_REGNO_P (regno));
9355
9356 retval[9] = '0' + (regno - FIRST_STACK_REG);
9357 return retval;
9358 }
9359 #endif
9360
9361 return opno ? "fstp\t%y1" : "fstp\t%y0";
9362 }
9363
9364
9365 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9366 should be used. UNORDERED_P is true when fucom should be used. */
9367
9368 const char *
9369 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9370 {
9371 int stack_top_dies;
9372 rtx cmp_op0, cmp_op1;
9373 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9374
9375 if (eflags_p)
9376 {
9377 cmp_op0 = operands[0];
9378 cmp_op1 = operands[1];
9379 }
9380 else
9381 {
9382 cmp_op0 = operands[1];
9383 cmp_op1 = operands[2];
9384 }
9385
9386 if (is_sse)
9387 {
9388 if (GET_MODE (operands[0]) == SFmode)
9389 if (unordered_p)
9390 return "ucomiss\t{%1, %0|%0, %1}";
9391 else
9392 return "comiss\t{%1, %0|%0, %1}";
9393 else
9394 if (unordered_p)
9395 return "ucomisd\t{%1, %0|%0, %1}";
9396 else
9397 return "comisd\t{%1, %0|%0, %1}";
9398 }
9399
9400 gcc_assert (STACK_TOP_P (cmp_op0));
9401
9402 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9403
9404 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9405 {
9406 if (stack_top_dies)
9407 {
9408 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9409 return output_387_ffreep (operands, 1);
9410 }
9411 else
9412 return "ftst\n\tfnstsw\t%0";
9413 }
9414
9415 if (STACK_REG_P (cmp_op1)
9416 && stack_top_dies
9417 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9418 && REGNO (cmp_op1) != FIRST_STACK_REG)
9419 {
9420 /* If both the top of the 387 stack dies, and the other operand
9421 is also a stack register that dies, then this must be a
9422 `fcompp' float compare */
9423
9424 if (eflags_p)
9425 {
9426 /* There is no double popping fcomi variant. Fortunately,
9427 eflags is immune from the fstp's cc clobbering. */
9428 if (unordered_p)
9429 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9430 else
9431 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9432 return output_387_ffreep (operands, 0);
9433 }
9434 else
9435 {
9436 if (unordered_p)
9437 return "fucompp\n\tfnstsw\t%0";
9438 else
9439 return "fcompp\n\tfnstsw\t%0";
9440 }
9441 }
9442 else
9443 {
9444 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9445
9446 static const char * const alt[16] =
9447 {
9448 "fcom%z2\t%y2\n\tfnstsw\t%0",
9449 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9450 "fucom%z2\t%y2\n\tfnstsw\t%0",
9451 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9452
9453 "ficom%z2\t%y2\n\tfnstsw\t%0",
9454 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9455 NULL,
9456 NULL,
9457
9458 "fcomi\t{%y1, %0|%0, %y1}",
9459 "fcomip\t{%y1, %0|%0, %y1}",
9460 "fucomi\t{%y1, %0|%0, %y1}",
9461 "fucomip\t{%y1, %0|%0, %y1}",
9462
9463 NULL,
9464 NULL,
9465 NULL,
9466 NULL
9467 };
9468
9469 int mask;
9470 const char *ret;
9471
9472 mask = eflags_p << 3;
9473 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9474 mask |= unordered_p << 1;
9475 mask |= stack_top_dies;
9476
9477 gcc_assert (mask < 16);
9478 ret = alt[mask];
9479 gcc_assert (ret);
9480
9481 return ret;
9482 }
9483 }
9484
9485 void
9486 ix86_output_addr_vec_elt (FILE *file, int value)
9487 {
9488 const char *directive = ASM_LONG;
9489
9490 #ifdef ASM_QUAD
9491 if (TARGET_64BIT)
9492 directive = ASM_QUAD;
9493 #else
9494 gcc_assert (!TARGET_64BIT);
9495 #endif
9496
9497 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9498 }
9499
9500 void
9501 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9502 {
9503 const char *directive = ASM_LONG;
9504
9505 #ifdef ASM_QUAD
9506 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9507 directive = ASM_QUAD;
9508 #else
9509 gcc_assert (!TARGET_64BIT);
9510 #endif
9511 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9512 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9513 fprintf (file, "%s%s%d-%s%d\n",
9514 directive, LPREFIX, value, LPREFIX, rel);
9515 else if (HAVE_AS_GOTOFF_IN_DATA)
9516 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9517 #if TARGET_MACHO
9518 else if (TARGET_MACHO)
9519 {
9520 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9521 machopic_output_function_base_name (file);
9522 fprintf(file, "\n");
9523 }
9524 #endif
9525 else
9526 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9527 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9528 }
9529 \f
9530 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9531 for the target. */
9532
9533 void
9534 ix86_expand_clear (rtx dest)
9535 {
9536 rtx tmp;
9537
9538 /* We play register width games, which are only valid after reload. */
9539 gcc_assert (reload_completed);
9540
9541 /* Avoid HImode and its attendant prefix byte. */
9542 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9543 dest = gen_rtx_REG (SImode, REGNO (dest));
9544 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9545
9546 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9547 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9548 {
9549 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9550 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9551 }
9552
9553 emit_insn (tmp);
9554 }
9555
9556 /* X is an unchanging MEM. If it is a constant pool reference, return
9557 the constant pool rtx, else NULL. */
9558
9559 rtx
9560 maybe_get_pool_constant (rtx x)
9561 {
9562 x = ix86_delegitimize_address (XEXP (x, 0));
9563
9564 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9565 return get_pool_constant (x);
9566
9567 return NULL_RTX;
9568 }
9569
9570 void
9571 ix86_expand_move (enum machine_mode mode, rtx operands[])
9572 {
9573 int strict = (reload_in_progress || reload_completed);
9574 rtx op0, op1;
9575 enum tls_model model;
9576
9577 op0 = operands[0];
9578 op1 = operands[1];
9579
9580 if (GET_CODE (op1) == SYMBOL_REF)
9581 {
9582 model = SYMBOL_REF_TLS_MODEL (op1);
9583 if (model)
9584 {
9585 op1 = legitimize_tls_address (op1, model, true);
9586 op1 = force_operand (op1, op0);
9587 if (op1 == op0)
9588 return;
9589 }
9590 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9591 && SYMBOL_REF_DLLIMPORT_P (op1))
9592 op1 = legitimize_dllimport_symbol (op1, false);
9593 }
9594 else if (GET_CODE (op1) == CONST
9595 && GET_CODE (XEXP (op1, 0)) == PLUS
9596 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9597 {
9598 rtx addend = XEXP (XEXP (op1, 0), 1);
9599 rtx symbol = XEXP (XEXP (op1, 0), 0);
9600 rtx tmp = NULL;
9601
9602 model = SYMBOL_REF_TLS_MODEL (symbol);
9603 if (model)
9604 tmp = legitimize_tls_address (symbol, model, true);
9605 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9606 && SYMBOL_REF_DLLIMPORT_P (symbol))
9607 tmp = legitimize_dllimport_symbol (symbol, true);
9608
9609 if (tmp)
9610 {
9611 tmp = force_operand (tmp, NULL);
9612 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9613 op0, 1, OPTAB_DIRECT);
9614 if (tmp == op0)
9615 return;
9616 }
9617 }
9618
9619 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9620 {
9621 if (TARGET_MACHO && !TARGET_64BIT)
9622 {
9623 #if TARGET_MACHO
9624 if (MACHOPIC_PURE)
9625 {
9626 rtx temp = ((reload_in_progress
9627 || ((op0 && REG_P (op0))
9628 && mode == Pmode))
9629 ? op0 : gen_reg_rtx (Pmode));
9630 op1 = machopic_indirect_data_reference (op1, temp);
9631 op1 = machopic_legitimize_pic_address (op1, mode,
9632 temp == op1 ? 0 : temp);
9633 }
9634 else if (MACHOPIC_INDIRECT)
9635 op1 = machopic_indirect_data_reference (op1, 0);
9636 if (op0 == op1)
9637 return;
9638 #endif
9639 }
9640 else
9641 {
9642 if (MEM_P (op0))
9643 op1 = force_reg (Pmode, op1);
9644 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9645 {
9646 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9647 op1 = legitimize_pic_address (op1, reg);
9648 if (op0 == op1)
9649 return;
9650 }
9651 }
9652 }
9653 else
9654 {
9655 if (MEM_P (op0)
9656 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9657 || !push_operand (op0, mode))
9658 && MEM_P (op1))
9659 op1 = force_reg (mode, op1);
9660
9661 if (push_operand (op0, mode)
9662 && ! general_no_elim_operand (op1, mode))
9663 op1 = copy_to_mode_reg (mode, op1);
9664
9665 /* Force large constants in 64bit compilation into register
9666 to get them CSEed. */
9667 if (TARGET_64BIT && mode == DImode
9668 && immediate_operand (op1, mode)
9669 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9670 && !register_operand (op0, mode)
9671 && optimize && !reload_completed && !reload_in_progress)
9672 op1 = copy_to_mode_reg (mode, op1);
9673
9674 if (FLOAT_MODE_P (mode))
9675 {
9676 /* If we are loading a floating point constant to a register,
9677 force the value to memory now, since we'll get better code
9678 out the back end. */
9679
9680 if (strict)
9681 ;
9682 else if (GET_CODE (op1) == CONST_DOUBLE)
9683 {
9684 op1 = validize_mem (force_const_mem (mode, op1));
9685 if (!register_operand (op0, mode))
9686 {
9687 rtx temp = gen_reg_rtx (mode);
9688 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9689 emit_move_insn (op0, temp);
9690 return;
9691 }
9692 }
9693 }
9694 }
9695
9696 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9697 }
9698
9699 void
9700 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9701 {
9702 rtx op0 = operands[0], op1 = operands[1];
9703
9704 /* Force constants other than zero into memory. We do not know how
9705 the instructions used to build constants modify the upper 64 bits
9706 of the register, once we have that information we may be able
9707 to handle some of them more efficiently. */
9708 if ((reload_in_progress | reload_completed) == 0
9709 && register_operand (op0, mode)
9710 && CONSTANT_P (op1)
9711 && standard_sse_constant_p (op1) <= 0)
9712 op1 = validize_mem (force_const_mem (mode, op1));
9713
9714 /* Make operand1 a register if it isn't already. */
9715 if (!no_new_pseudos
9716 && !register_operand (op0, mode)
9717 && !register_operand (op1, mode))
9718 {
9719 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9720 return;
9721 }
9722
9723 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9724 }
9725
9726 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9727 straight to ix86_expand_vector_move. */
9728 /* Code generation for scalar reg-reg moves of single and double precision data:
9729 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9730 movaps reg, reg
9731 else
9732 movss reg, reg
9733 if (x86_sse_partial_reg_dependency == true)
9734 movapd reg, reg
9735 else
9736 movsd reg, reg
9737
9738 Code generation for scalar loads of double precision data:
9739 if (x86_sse_split_regs == true)
9740 movlpd mem, reg (gas syntax)
9741 else
9742 movsd mem, reg
9743
9744 Code generation for unaligned packed loads of single precision data
9745 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9746 if (x86_sse_unaligned_move_optimal)
9747 movups mem, reg
9748
9749 if (x86_sse_partial_reg_dependency == true)
9750 {
9751 xorps reg, reg
9752 movlps mem, reg
9753 movhps mem+8, reg
9754 }
9755 else
9756 {
9757 movlps mem, reg
9758 movhps mem+8, reg
9759 }
9760
9761 Code generation for unaligned packed loads of double precision data
9762 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9763 if (x86_sse_unaligned_move_optimal)
9764 movupd mem, reg
9765
9766 if (x86_sse_split_regs == true)
9767 {
9768 movlpd mem, reg
9769 movhpd mem+8, reg
9770 }
9771 else
9772 {
9773 movsd mem, reg
9774 movhpd mem+8, reg
9775 }
9776 */
9777
9778 void
9779 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9780 {
9781 rtx op0, op1, m;
9782
9783 op0 = operands[0];
9784 op1 = operands[1];
9785
9786 if (MEM_P (op1))
9787 {
9788 /* If we're optimizing for size, movups is the smallest. */
9789 if (optimize_size)
9790 {
9791 op0 = gen_lowpart (V4SFmode, op0);
9792 op1 = gen_lowpart (V4SFmode, op1);
9793 emit_insn (gen_sse_movups (op0, op1));
9794 return;
9795 }
9796
9797 /* ??? If we have typed data, then it would appear that using
9798 movdqu is the only way to get unaligned data loaded with
9799 integer type. */
9800 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9801 {
9802 op0 = gen_lowpart (V16QImode, op0);
9803 op1 = gen_lowpart (V16QImode, op1);
9804 emit_insn (gen_sse2_movdqu (op0, op1));
9805 return;
9806 }
9807
9808 if (TARGET_SSE2 && mode == V2DFmode)
9809 {
9810 rtx zero;
9811
9812 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9813 {
9814 op0 = gen_lowpart (V2DFmode, op0);
9815 op1 = gen_lowpart (V2DFmode, op1);
9816 emit_insn (gen_sse2_movupd (op0, op1));
9817 return;
9818 }
9819
9820 /* When SSE registers are split into halves, we can avoid
9821 writing to the top half twice. */
9822 if (TARGET_SSE_SPLIT_REGS)
9823 {
9824 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9825 zero = op0;
9826 }
9827 else
9828 {
9829 /* ??? Not sure about the best option for the Intel chips.
9830 The following would seem to satisfy; the register is
9831 entirely cleared, breaking the dependency chain. We
9832 then store to the upper half, with a dependency depth
9833 of one. A rumor has it that Intel recommends two movsd
9834 followed by an unpacklpd, but this is unconfirmed. And
9835 given that the dependency depth of the unpacklpd would
9836 still be one, I'm not sure why this would be better. */
9837 zero = CONST0_RTX (V2DFmode);
9838 }
9839
9840 m = adjust_address (op1, DFmode, 0);
9841 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9842 m = adjust_address (op1, DFmode, 8);
9843 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9844 }
9845 else
9846 {
9847 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9848 {
9849 op0 = gen_lowpart (V4SFmode, op0);
9850 op1 = gen_lowpart (V4SFmode, op1);
9851 emit_insn (gen_sse_movups (op0, op1));
9852 return;
9853 }
9854
9855 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9856 emit_move_insn (op0, CONST0_RTX (mode));
9857 else
9858 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9859
9860 if (mode != V4SFmode)
9861 op0 = gen_lowpart (V4SFmode, op0);
9862 m = adjust_address (op1, V2SFmode, 0);
9863 emit_insn (gen_sse_loadlps (op0, op0, m));
9864 m = adjust_address (op1, V2SFmode, 8);
9865 emit_insn (gen_sse_loadhps (op0, op0, m));
9866 }
9867 }
9868 else if (MEM_P (op0))
9869 {
9870 /* If we're optimizing for size, movups is the smallest. */
9871 if (optimize_size)
9872 {
9873 op0 = gen_lowpart (V4SFmode, op0);
9874 op1 = gen_lowpart (V4SFmode, op1);
9875 emit_insn (gen_sse_movups (op0, op1));
9876 return;
9877 }
9878
9879 /* ??? Similar to above, only less clear because of quote
9880 typeless stores unquote. */
9881 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9882 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9883 {
9884 op0 = gen_lowpart (V16QImode, op0);
9885 op1 = gen_lowpart (V16QImode, op1);
9886 emit_insn (gen_sse2_movdqu (op0, op1));
9887 return;
9888 }
9889
9890 if (TARGET_SSE2 && mode == V2DFmode)
9891 {
9892 m = adjust_address (op0, DFmode, 0);
9893 emit_insn (gen_sse2_storelpd (m, op1));
9894 m = adjust_address (op0, DFmode, 8);
9895 emit_insn (gen_sse2_storehpd (m, op1));
9896 }
9897 else
9898 {
9899 if (mode != V4SFmode)
9900 op1 = gen_lowpart (V4SFmode, op1);
9901 m = adjust_address (op0, V2SFmode, 0);
9902 emit_insn (gen_sse_storelps (m, op1));
9903 m = adjust_address (op0, V2SFmode, 8);
9904 emit_insn (gen_sse_storehps (m, op1));
9905 }
9906 }
9907 else
9908 gcc_unreachable ();
9909 }
9910
9911 /* Expand a push in MODE. This is some mode for which we do not support
9912 proper push instructions, at least from the registers that we expect
9913 the value to live in. */
9914
9915 void
9916 ix86_expand_push (enum machine_mode mode, rtx x)
9917 {
9918 rtx tmp;
9919
9920 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9921 GEN_INT (-GET_MODE_SIZE (mode)),
9922 stack_pointer_rtx, 1, OPTAB_DIRECT);
9923 if (tmp != stack_pointer_rtx)
9924 emit_move_insn (stack_pointer_rtx, tmp);
9925
9926 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9927 emit_move_insn (tmp, x);
9928 }
9929
9930 /* Helper function of ix86_fixup_binary_operands to canonicalize
9931 operand order. Returns true if the operands should be swapped. */
9932
9933 static bool
9934 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9935 rtx operands[])
9936 {
9937 rtx dst = operands[0];
9938 rtx src1 = operands[1];
9939 rtx src2 = operands[2];
9940
9941 /* If the operation is not commutative, we can't do anything. */
9942 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9943 return false;
9944
9945 /* Highest priority is that src1 should match dst. */
9946 if (rtx_equal_p (dst, src1))
9947 return false;
9948 if (rtx_equal_p (dst, src2))
9949 return true;
9950
9951 /* Next highest priority is that immediate constants come second. */
9952 if (immediate_operand (src2, mode))
9953 return false;
9954 if (immediate_operand (src1, mode))
9955 return true;
9956
9957 /* Lowest priority is that memory references should come second. */
9958 if (MEM_P (src2))
9959 return false;
9960 if (MEM_P (src1))
9961 return true;
9962
9963 return false;
9964 }
9965
9966
9967 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9968 destination to use for the operation. If different from the true
9969 destination in operands[0], a copy operation will be required. */
9970
9971 rtx
9972 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9973 rtx operands[])
9974 {
9975 rtx dst = operands[0];
9976 rtx src1 = operands[1];
9977 rtx src2 = operands[2];
9978
9979 /* Canonicalize operand order. */
9980 if (ix86_swap_binary_operands_p (code, mode, operands))
9981 {
9982 rtx temp = src1;
9983 src1 = src2;
9984 src2 = temp;
9985 }
9986
9987 /* Both source operands cannot be in memory. */
9988 if (MEM_P (src1) && MEM_P (src2))
9989 {
9990 /* Optimization: Only read from memory once. */
9991 if (rtx_equal_p (src1, src2))
9992 {
9993 src2 = force_reg (mode, src2);
9994 src1 = src2;
9995 }
9996 else
9997 src2 = force_reg (mode, src2);
9998 }
9999
10000 /* If the destination is memory, and we do not have matching source
10001 operands, do things in registers. */
10002 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10003 dst = gen_reg_rtx (mode);
10004
10005 /* Source 1 cannot be a constant. */
10006 if (CONSTANT_P (src1))
10007 src1 = force_reg (mode, src1);
10008
10009 /* Source 1 cannot be a non-matching memory. */
10010 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10011 src1 = force_reg (mode, src1);
10012
10013 operands[1] = src1;
10014 operands[2] = src2;
10015 return dst;
10016 }
10017
10018 /* Similarly, but assume that the destination has already been
10019 set up properly. */
10020
10021 void
10022 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10023 enum machine_mode mode, rtx operands[])
10024 {
10025 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10026 gcc_assert (dst == operands[0]);
10027 }
10028
10029 /* Attempt to expand a binary operator. Make the expansion closer to the
10030 actual machine, then just general_operand, which will allow 3 separate
10031 memory references (one output, two input) in a single insn. */
10032
10033 void
10034 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10035 rtx operands[])
10036 {
10037 rtx src1, src2, dst, op, clob;
10038
10039 dst = ix86_fixup_binary_operands (code, mode, operands);
10040 src1 = operands[1];
10041 src2 = operands[2];
10042
10043 /* Emit the instruction. */
10044
10045 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10046 if (reload_in_progress)
10047 {
10048 /* Reload doesn't know about the flags register, and doesn't know that
10049 it doesn't want to clobber it. We can only do this with PLUS. */
10050 gcc_assert (code == PLUS);
10051 emit_insn (op);
10052 }
10053 else
10054 {
10055 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10056 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10057 }
10058
10059 /* Fix up the destination if needed. */
10060 if (dst != operands[0])
10061 emit_move_insn (operands[0], dst);
10062 }
10063
10064 /* Return TRUE or FALSE depending on whether the binary operator meets the
10065 appropriate constraints. */
10066
10067 int
10068 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10069 rtx operands[3])
10070 {
10071 rtx dst = operands[0];
10072 rtx src1 = operands[1];
10073 rtx src2 = operands[2];
10074
10075 /* Both source operands cannot be in memory. */
10076 if (MEM_P (src1) && MEM_P (src2))
10077 return 0;
10078
10079 /* Canonicalize operand order for commutative operators. */
10080 if (ix86_swap_binary_operands_p (code, mode, operands))
10081 {
10082 rtx temp = src1;
10083 src1 = src2;
10084 src2 = temp;
10085 }
10086
10087 /* If the destination is memory, we must have a matching source operand. */
10088 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10089 return 0;
10090
10091 /* Source 1 cannot be a constant. */
10092 if (CONSTANT_P (src1))
10093 return 0;
10094
10095 /* Source 1 cannot be a non-matching memory. */
10096 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10097 return 0;
10098
10099 return 1;
10100 }
10101
10102 /* Attempt to expand a unary operator. Make the expansion closer to the
10103 actual machine, then just general_operand, which will allow 2 separate
10104 memory references (one output, one input) in a single insn. */
10105
10106 void
10107 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10108 rtx operands[])
10109 {
10110 int matching_memory;
10111 rtx src, dst, op, clob;
10112
10113 dst = operands[0];
10114 src = operands[1];
10115
10116 /* If the destination is memory, and we do not have matching source
10117 operands, do things in registers. */
10118 matching_memory = 0;
10119 if (MEM_P (dst))
10120 {
10121 if (rtx_equal_p (dst, src))
10122 matching_memory = 1;
10123 else
10124 dst = gen_reg_rtx (mode);
10125 }
10126
10127 /* When source operand is memory, destination must match. */
10128 if (MEM_P (src) && !matching_memory)
10129 src = force_reg (mode, src);
10130
10131 /* Emit the instruction. */
10132
10133 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10134 if (reload_in_progress || code == NOT)
10135 {
10136 /* Reload doesn't know about the flags register, and doesn't know that
10137 it doesn't want to clobber it. */
10138 gcc_assert (code == NOT);
10139 emit_insn (op);
10140 }
10141 else
10142 {
10143 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10144 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10145 }
10146
10147 /* Fix up the destination if needed. */
10148 if (dst != operands[0])
10149 emit_move_insn (operands[0], dst);
10150 }
10151
10152 /* Return TRUE or FALSE depending on whether the unary operator meets the
10153 appropriate constraints. */
10154
10155 int
10156 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10157 enum machine_mode mode ATTRIBUTE_UNUSED,
10158 rtx operands[2] ATTRIBUTE_UNUSED)
10159 {
10160 /* If one of operands is memory, source and destination must match. */
10161 if ((MEM_P (operands[0])
10162 || MEM_P (operands[1]))
10163 && ! rtx_equal_p (operands[0], operands[1]))
10164 return FALSE;
10165 return TRUE;
10166 }
10167
10168 /* Post-reload splitter for converting an SF or DFmode value in an
10169 SSE register into an unsigned SImode. */
10170
10171 void
10172 ix86_split_convert_uns_si_sse (rtx operands[])
10173 {
10174 enum machine_mode vecmode;
10175 rtx value, large, zero_or_two31, input, two31, x;
10176
10177 large = operands[1];
10178 zero_or_two31 = operands[2];
10179 input = operands[3];
10180 two31 = operands[4];
10181 vecmode = GET_MODE (large);
10182 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10183
10184 /* Load up the value into the low element. We must ensure that the other
10185 elements are valid floats -- zero is the easiest such value. */
10186 if (MEM_P (input))
10187 {
10188 if (vecmode == V4SFmode)
10189 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10190 else
10191 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10192 }
10193 else
10194 {
10195 input = gen_rtx_REG (vecmode, REGNO (input));
10196 emit_move_insn (value, CONST0_RTX (vecmode));
10197 if (vecmode == V4SFmode)
10198 emit_insn (gen_sse_movss (value, value, input));
10199 else
10200 emit_insn (gen_sse2_movsd (value, value, input));
10201 }
10202
10203 emit_move_insn (large, two31);
10204 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10205
10206 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10207 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10208
10209 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10210 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10211
10212 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10213 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10214
10215 large = gen_rtx_REG (V4SImode, REGNO (large));
10216 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10217
10218 x = gen_rtx_REG (V4SImode, REGNO (value));
10219 if (vecmode == V4SFmode)
10220 emit_insn (gen_sse2_cvttps2dq (x, value));
10221 else
10222 emit_insn (gen_sse2_cvttpd2dq (x, value));
10223 value = x;
10224
10225 emit_insn (gen_xorv4si3 (value, value, large));
10226 }
10227
10228 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10229 Expects the 64-bit DImode to be supplied in a pair of integral
10230 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10231 -mfpmath=sse, !optimize_size only. */
10232
10233 void
10234 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10235 {
10236 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10237 rtx int_xmm, fp_xmm;
10238 rtx biases, exponents;
10239 rtx x;
10240
10241 int_xmm = gen_reg_rtx (V4SImode);
10242 if (TARGET_INTER_UNIT_MOVES)
10243 emit_insn (gen_movdi_to_sse (int_xmm, input));
10244 else if (TARGET_SSE_SPLIT_REGS)
10245 {
10246 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10247 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10248 }
10249 else
10250 {
10251 x = gen_reg_rtx (V2DImode);
10252 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10253 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10254 }
10255
10256 x = gen_rtx_CONST_VECTOR (V4SImode,
10257 gen_rtvec (4, GEN_INT (0x43300000UL),
10258 GEN_INT (0x45300000UL),
10259 const0_rtx, const0_rtx));
10260 exponents = validize_mem (force_const_mem (V4SImode, x));
10261
10262 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10263 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10264
10265 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10266 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10267 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10268 (0x1.0p84 + double(fp_value_hi_xmm)).
10269 Note these exponents differ by 32. */
10270
10271 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10272
10273 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10274 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10275 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10276 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10277 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10278 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10279 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10280 biases = validize_mem (force_const_mem (V2DFmode, biases));
10281 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10282
10283 /* Add the upper and lower DFmode values together. */
10284 if (TARGET_SSE3)
10285 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10286 else
10287 {
10288 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10289 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10290 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10291 }
10292
10293 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10294 }
10295
10296 /* Convert an unsigned SImode value into a DFmode. Only currently used
10297 for SSE, but applicable anywhere. */
10298
10299 void
10300 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10301 {
10302 REAL_VALUE_TYPE TWO31r;
10303 rtx x, fp;
10304
10305 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10306 NULL, 1, OPTAB_DIRECT);
10307
10308 fp = gen_reg_rtx (DFmode);
10309 emit_insn (gen_floatsidf2 (fp, x));
10310
10311 real_ldexp (&TWO31r, &dconst1, 31);
10312 x = const_double_from_real_value (TWO31r, DFmode);
10313
10314 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10315 if (x != target)
10316 emit_move_insn (target, x);
10317 }
10318
10319 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10320 32-bit mode; otherwise we have a direct convert instruction. */
10321
10322 void
10323 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10324 {
10325 REAL_VALUE_TYPE TWO32r;
10326 rtx fp_lo, fp_hi, x;
10327
10328 fp_lo = gen_reg_rtx (DFmode);
10329 fp_hi = gen_reg_rtx (DFmode);
10330
10331 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10332
10333 real_ldexp (&TWO32r, &dconst1, 32);
10334 x = const_double_from_real_value (TWO32r, DFmode);
10335 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10336
10337 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10338
10339 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10340 0, OPTAB_DIRECT);
10341 if (x != target)
10342 emit_move_insn (target, x);
10343 }
10344
10345 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10346 For x86_32, -mfpmath=sse, !optimize_size only. */
10347 void
10348 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10349 {
10350 REAL_VALUE_TYPE ONE16r;
10351 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10352
10353 real_ldexp (&ONE16r, &dconst1, 16);
10354 x = const_double_from_real_value (ONE16r, SFmode);
10355 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10356 NULL, 0, OPTAB_DIRECT);
10357 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10358 NULL, 0, OPTAB_DIRECT);
10359 fp_hi = gen_reg_rtx (SFmode);
10360 fp_lo = gen_reg_rtx (SFmode);
10361 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10362 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10363 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10364 0, OPTAB_DIRECT);
10365 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10366 0, OPTAB_DIRECT);
10367 if (!rtx_equal_p (target, fp_hi))
10368 emit_move_insn (target, fp_hi);
10369 }
10370
10371 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10372 then replicate the value for all elements of the vector
10373 register. */
10374
10375 rtx
10376 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10377 {
10378 rtvec v;
10379 switch (mode)
10380 {
10381 case SFmode:
10382 if (vect)
10383 v = gen_rtvec (4, value, value, value, value);
10384 else
10385 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10386 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10387 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10388
10389 case DFmode:
10390 if (vect)
10391 v = gen_rtvec (2, value, value);
10392 else
10393 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10394 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10395
10396 default:
10397 gcc_unreachable ();
10398 }
10399 }
10400
10401 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10402 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10403 true, then replicate the mask for all elements of the vector register.
10404 If INVERT is true, then create a mask excluding the sign bit. */
10405
10406 rtx
10407 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10408 {
10409 enum machine_mode vec_mode;
10410 HOST_WIDE_INT hi, lo;
10411 int shift = 63;
10412 rtx v;
10413 rtx mask;
10414
10415 /* Find the sign bit, sign extended to 2*HWI. */
10416 if (mode == SFmode)
10417 lo = 0x80000000, hi = lo < 0;
10418 else if (HOST_BITS_PER_WIDE_INT >= 64)
10419 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10420 else
10421 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10422
10423 if (invert)
10424 lo = ~lo, hi = ~hi;
10425
10426 /* Force this value into the low part of a fp vector constant. */
10427 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10428 mask = gen_lowpart (mode, mask);
10429
10430 v = ix86_build_const_vector (mode, vect, mask);
10431 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10432 return force_reg (vec_mode, v);
10433 }
10434
10435 /* Generate code for floating point ABS or NEG. */
10436
10437 void
10438 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10439 rtx operands[])
10440 {
10441 rtx mask, set, use, clob, dst, src;
10442 bool matching_memory;
10443 bool use_sse = false;
10444 bool vector_mode = VECTOR_MODE_P (mode);
10445 enum machine_mode elt_mode = mode;
10446
10447 if (vector_mode)
10448 {
10449 elt_mode = GET_MODE_INNER (mode);
10450 use_sse = true;
10451 }
10452 else if (TARGET_SSE_MATH)
10453 use_sse = SSE_FLOAT_MODE_P (mode);
10454
10455 /* NEG and ABS performed with SSE use bitwise mask operations.
10456 Create the appropriate mask now. */
10457 if (use_sse)
10458 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10459 else
10460 mask = NULL_RTX;
10461
10462 dst = operands[0];
10463 src = operands[1];
10464
10465 /* If the destination is memory, and we don't have matching source
10466 operands or we're using the x87, do things in registers. */
10467 matching_memory = false;
10468 if (MEM_P (dst))
10469 {
10470 if (use_sse && rtx_equal_p (dst, src))
10471 matching_memory = true;
10472 else
10473 dst = gen_reg_rtx (mode);
10474 }
10475 if (MEM_P (src) && !matching_memory)
10476 src = force_reg (mode, src);
10477
10478 if (vector_mode)
10479 {
10480 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10481 set = gen_rtx_SET (VOIDmode, dst, set);
10482 emit_insn (set);
10483 }
10484 else
10485 {
10486 set = gen_rtx_fmt_e (code, mode, src);
10487 set = gen_rtx_SET (VOIDmode, dst, set);
10488 if (mask)
10489 {
10490 use = gen_rtx_USE (VOIDmode, mask);
10491 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10492 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10493 gen_rtvec (3, set, use, clob)));
10494 }
10495 else
10496 emit_insn (set);
10497 }
10498
10499 if (dst != operands[0])
10500 emit_move_insn (operands[0], dst);
10501 }
10502
10503 /* Expand a copysign operation. Special case operand 0 being a constant. */
10504
10505 void
10506 ix86_expand_copysign (rtx operands[])
10507 {
10508 enum machine_mode mode, vmode;
10509 rtx dest, op0, op1, mask, nmask;
10510
10511 dest = operands[0];
10512 op0 = operands[1];
10513 op1 = operands[2];
10514
10515 mode = GET_MODE (dest);
10516 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10517
10518 if (GET_CODE (op0) == CONST_DOUBLE)
10519 {
10520 rtvec v;
10521
10522 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10523 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10524
10525 if (op0 == CONST0_RTX (mode))
10526 op0 = CONST0_RTX (vmode);
10527 else
10528 {
10529 if (mode == SFmode)
10530 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10531 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10532 else
10533 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10534 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10535 }
10536
10537 mask = ix86_build_signbit_mask (mode, 0, 0);
10538
10539 if (mode == SFmode)
10540 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10541 else
10542 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10543 }
10544 else
10545 {
10546 nmask = ix86_build_signbit_mask (mode, 0, 1);
10547 mask = ix86_build_signbit_mask (mode, 0, 0);
10548
10549 if (mode == SFmode)
10550 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10551 else
10552 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10553 }
10554 }
10555
10556 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10557 be a constant, and so has already been expanded into a vector constant. */
10558
10559 void
10560 ix86_split_copysign_const (rtx operands[])
10561 {
10562 enum machine_mode mode, vmode;
10563 rtx dest, op0, op1, mask, x;
10564
10565 dest = operands[0];
10566 op0 = operands[1];
10567 op1 = operands[2];
10568 mask = operands[3];
10569
10570 mode = GET_MODE (dest);
10571 vmode = GET_MODE (mask);
10572
10573 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10574 x = gen_rtx_AND (vmode, dest, mask);
10575 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10576
10577 if (op0 != CONST0_RTX (vmode))
10578 {
10579 x = gen_rtx_IOR (vmode, dest, op0);
10580 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10581 }
10582 }
10583
10584 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10585 so we have to do two masks. */
10586
10587 void
10588 ix86_split_copysign_var (rtx operands[])
10589 {
10590 enum machine_mode mode, vmode;
10591 rtx dest, scratch, op0, op1, mask, nmask, x;
10592
10593 dest = operands[0];
10594 scratch = operands[1];
10595 op0 = operands[2];
10596 op1 = operands[3];
10597 nmask = operands[4];
10598 mask = operands[5];
10599
10600 mode = GET_MODE (dest);
10601 vmode = GET_MODE (mask);
10602
10603 if (rtx_equal_p (op0, op1))
10604 {
10605 /* Shouldn't happen often (it's useless, obviously), but when it does
10606 we'd generate incorrect code if we continue below. */
10607 emit_move_insn (dest, op0);
10608 return;
10609 }
10610
10611 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10612 {
10613 gcc_assert (REGNO (op1) == REGNO (scratch));
10614
10615 x = gen_rtx_AND (vmode, scratch, mask);
10616 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10617
10618 dest = mask;
10619 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10620 x = gen_rtx_NOT (vmode, dest);
10621 x = gen_rtx_AND (vmode, x, op0);
10622 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10623 }
10624 else
10625 {
10626 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10627 {
10628 x = gen_rtx_AND (vmode, scratch, mask);
10629 }
10630 else /* alternative 2,4 */
10631 {
10632 gcc_assert (REGNO (mask) == REGNO (scratch));
10633 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10634 x = gen_rtx_AND (vmode, scratch, op1);
10635 }
10636 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10637
10638 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10639 {
10640 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10641 x = gen_rtx_AND (vmode, dest, nmask);
10642 }
10643 else /* alternative 3,4 */
10644 {
10645 gcc_assert (REGNO (nmask) == REGNO (dest));
10646 dest = nmask;
10647 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10648 x = gen_rtx_AND (vmode, dest, op0);
10649 }
10650 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10651 }
10652
10653 x = gen_rtx_IOR (vmode, dest, scratch);
10654 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10655 }
10656
10657 /* Return TRUE or FALSE depending on whether the first SET in INSN
10658 has source and destination with matching CC modes, and that the
10659 CC mode is at least as constrained as REQ_MODE. */
10660
10661 int
10662 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10663 {
10664 rtx set;
10665 enum machine_mode set_mode;
10666
10667 set = PATTERN (insn);
10668 if (GET_CODE (set) == PARALLEL)
10669 set = XVECEXP (set, 0, 0);
10670 gcc_assert (GET_CODE (set) == SET);
10671 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10672
10673 set_mode = GET_MODE (SET_DEST (set));
10674 switch (set_mode)
10675 {
10676 case CCNOmode:
10677 if (req_mode != CCNOmode
10678 && (req_mode != CCmode
10679 || XEXP (SET_SRC (set), 1) != const0_rtx))
10680 return 0;
10681 break;
10682 case CCmode:
10683 if (req_mode == CCGCmode)
10684 return 0;
10685 /* FALLTHRU */
10686 case CCGCmode:
10687 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10688 return 0;
10689 /* FALLTHRU */
10690 case CCGOCmode:
10691 if (req_mode == CCZmode)
10692 return 0;
10693 /* FALLTHRU */
10694 case CCZmode:
10695 break;
10696
10697 default:
10698 gcc_unreachable ();
10699 }
10700
10701 return (GET_MODE (SET_SRC (set)) == set_mode);
10702 }
10703
10704 /* Generate insn patterns to do an integer compare of OPERANDS. */
10705
10706 static rtx
10707 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10708 {
10709 enum machine_mode cmpmode;
10710 rtx tmp, flags;
10711
10712 cmpmode = SELECT_CC_MODE (code, op0, op1);
10713 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10714
10715 /* This is very simple, but making the interface the same as in the
10716 FP case makes the rest of the code easier. */
10717 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10718 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10719
10720 /* Return the test that should be put into the flags user, i.e.
10721 the bcc, scc, or cmov instruction. */
10722 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10723 }
10724
10725 /* Figure out whether to use ordered or unordered fp comparisons.
10726 Return the appropriate mode to use. */
10727
10728 enum machine_mode
10729 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10730 {
10731 /* ??? In order to make all comparisons reversible, we do all comparisons
10732 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10733 all forms trapping and nontrapping comparisons, we can make inequality
10734 comparisons trapping again, since it results in better code when using
10735 FCOM based compares. */
10736 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10737 }
10738
10739 enum machine_mode
10740 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10741 {
10742 enum machine_mode mode = GET_MODE (op0);
10743
10744 if (SCALAR_FLOAT_MODE_P (mode))
10745 {
10746 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10747 return ix86_fp_compare_mode (code);
10748 }
10749
10750 switch (code)
10751 {
10752 /* Only zero flag is needed. */
10753 case EQ: /* ZF=0 */
10754 case NE: /* ZF!=0 */
10755 return CCZmode;
10756 /* Codes needing carry flag. */
10757 case GEU: /* CF=0 */
10758 case GTU: /* CF=0 & ZF=0 */
10759 case LTU: /* CF=1 */
10760 case LEU: /* CF=1 | ZF=1 */
10761 return CCmode;
10762 /* Codes possibly doable only with sign flag when
10763 comparing against zero. */
10764 case GE: /* SF=OF or SF=0 */
10765 case LT: /* SF<>OF or SF=1 */
10766 if (op1 == const0_rtx)
10767 return CCGOCmode;
10768 else
10769 /* For other cases Carry flag is not required. */
10770 return CCGCmode;
10771 /* Codes doable only with sign flag when comparing
10772 against zero, but we miss jump instruction for it
10773 so we need to use relational tests against overflow
10774 that thus needs to be zero. */
10775 case GT: /* ZF=0 & SF=OF */
10776 case LE: /* ZF=1 | SF<>OF */
10777 if (op1 == const0_rtx)
10778 return CCNOmode;
10779 else
10780 return CCGCmode;
10781 /* strcmp pattern do (use flags) and combine may ask us for proper
10782 mode. */
10783 case USE:
10784 return CCmode;
10785 default:
10786 gcc_unreachable ();
10787 }
10788 }
10789
10790 /* Return the fixed registers used for condition codes. */
10791
10792 static bool
10793 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10794 {
10795 *p1 = FLAGS_REG;
10796 *p2 = FPSR_REG;
10797 return true;
10798 }
10799
10800 /* If two condition code modes are compatible, return a condition code
10801 mode which is compatible with both. Otherwise, return
10802 VOIDmode. */
10803
10804 static enum machine_mode
10805 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10806 {
10807 if (m1 == m2)
10808 return m1;
10809
10810 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10811 return VOIDmode;
10812
10813 if ((m1 == CCGCmode && m2 == CCGOCmode)
10814 || (m1 == CCGOCmode && m2 == CCGCmode))
10815 return CCGCmode;
10816
10817 switch (m1)
10818 {
10819 default:
10820 gcc_unreachable ();
10821
10822 case CCmode:
10823 case CCGCmode:
10824 case CCGOCmode:
10825 case CCNOmode:
10826 case CCZmode:
10827 switch (m2)
10828 {
10829 default:
10830 return VOIDmode;
10831
10832 case CCmode:
10833 case CCGCmode:
10834 case CCGOCmode:
10835 case CCNOmode:
10836 case CCZmode:
10837 return CCmode;
10838 }
10839
10840 case CCFPmode:
10841 case CCFPUmode:
10842 /* These are only compatible with themselves, which we already
10843 checked above. */
10844 return VOIDmode;
10845 }
10846 }
10847
10848 /* Split comparison code CODE into comparisons we can do using branch
10849 instructions. BYPASS_CODE is comparison code for branch that will
10850 branch around FIRST_CODE and SECOND_CODE. If some of branches
10851 is not required, set value to UNKNOWN.
10852 We never require more than two branches. */
10853
10854 void
10855 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10856 enum rtx_code *first_code,
10857 enum rtx_code *second_code)
10858 {
10859 *first_code = code;
10860 *bypass_code = UNKNOWN;
10861 *second_code = UNKNOWN;
10862
10863 /* The fcomi comparison sets flags as follows:
10864
10865 cmp ZF PF CF
10866 > 0 0 0
10867 < 0 0 1
10868 = 1 0 0
10869 un 1 1 1 */
10870
10871 switch (code)
10872 {
10873 case GT: /* GTU - CF=0 & ZF=0 */
10874 case GE: /* GEU - CF=0 */
10875 case ORDERED: /* PF=0 */
10876 case UNORDERED: /* PF=1 */
10877 case UNEQ: /* EQ - ZF=1 */
10878 case UNLT: /* LTU - CF=1 */
10879 case UNLE: /* LEU - CF=1 | ZF=1 */
10880 case LTGT: /* EQ - ZF=0 */
10881 break;
10882 case LT: /* LTU - CF=1 - fails on unordered */
10883 *first_code = UNLT;
10884 *bypass_code = UNORDERED;
10885 break;
10886 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10887 *first_code = UNLE;
10888 *bypass_code = UNORDERED;
10889 break;
10890 case EQ: /* EQ - ZF=1 - fails on unordered */
10891 *first_code = UNEQ;
10892 *bypass_code = UNORDERED;
10893 break;
10894 case NE: /* NE - ZF=0 - fails on unordered */
10895 *first_code = LTGT;
10896 *second_code = UNORDERED;
10897 break;
10898 case UNGE: /* GEU - CF=0 - fails on unordered */
10899 *first_code = GE;
10900 *second_code = UNORDERED;
10901 break;
10902 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10903 *first_code = GT;
10904 *second_code = UNORDERED;
10905 break;
10906 default:
10907 gcc_unreachable ();
10908 }
10909 if (!TARGET_IEEE_FP)
10910 {
10911 *second_code = UNKNOWN;
10912 *bypass_code = UNKNOWN;
10913 }
10914 }
10915
10916 /* Return cost of comparison done fcom + arithmetics operations on AX.
10917 All following functions do use number of instructions as a cost metrics.
10918 In future this should be tweaked to compute bytes for optimize_size and
10919 take into account performance of various instructions on various CPUs. */
10920 static int
10921 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10922 {
10923 if (!TARGET_IEEE_FP)
10924 return 4;
10925 /* The cost of code output by ix86_expand_fp_compare. */
10926 switch (code)
10927 {
10928 case UNLE:
10929 case UNLT:
10930 case LTGT:
10931 case GT:
10932 case GE:
10933 case UNORDERED:
10934 case ORDERED:
10935 case UNEQ:
10936 return 4;
10937 break;
10938 case LT:
10939 case NE:
10940 case EQ:
10941 case UNGE:
10942 return 5;
10943 break;
10944 case LE:
10945 case UNGT:
10946 return 6;
10947 break;
10948 default:
10949 gcc_unreachable ();
10950 }
10951 }
10952
10953 /* Return cost of comparison done using fcomi operation.
10954 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10955 static int
10956 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10957 {
10958 enum rtx_code bypass_code, first_code, second_code;
10959 /* Return arbitrarily high cost when instruction is not supported - this
10960 prevents gcc from using it. */
10961 if (!TARGET_CMOVE)
10962 return 1024;
10963 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10964 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10965 }
10966
10967 /* Return cost of comparison done using sahf operation.
10968 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10969 static int
10970 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10971 {
10972 enum rtx_code bypass_code, first_code, second_code;
10973 /* Return arbitrarily high cost when instruction is not preferred - this
10974 avoids gcc from using it. */
10975 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10976 return 1024;
10977 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10978 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10979 }
10980
10981 /* Compute cost of the comparison done using any method.
10982 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10983 static int
10984 ix86_fp_comparison_cost (enum rtx_code code)
10985 {
10986 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10987 int min;
10988
10989 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10990 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10991
10992 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10993 if (min > sahf_cost)
10994 min = sahf_cost;
10995 if (min > fcomi_cost)
10996 min = fcomi_cost;
10997 return min;
10998 }
10999
11000 /* Return true if we should use an FCOMI instruction for this
11001 fp comparison. */
11002
11003 int
11004 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11005 {
11006 enum rtx_code swapped_code = swap_condition (code);
11007
11008 return ((ix86_fp_comparison_cost (code)
11009 == ix86_fp_comparison_fcomi_cost (code))
11010 || (ix86_fp_comparison_cost (swapped_code)
11011 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11012 }
11013
11014 /* Swap, force into registers, or otherwise massage the two operands
11015 to a fp comparison. The operands are updated in place; the new
11016 comparison code is returned. */
11017
11018 static enum rtx_code
11019 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11020 {
11021 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11022 rtx op0 = *pop0, op1 = *pop1;
11023 enum machine_mode op_mode = GET_MODE (op0);
11024 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11025
11026 /* All of the unordered compare instructions only work on registers.
11027 The same is true of the fcomi compare instructions. The XFmode
11028 compare instructions require registers except when comparing
11029 against zero or when converting operand 1 from fixed point to
11030 floating point. */
11031
11032 if (!is_sse
11033 && (fpcmp_mode == CCFPUmode
11034 || (op_mode == XFmode
11035 && ! (standard_80387_constant_p (op0) == 1
11036 || standard_80387_constant_p (op1) == 1)
11037 && GET_CODE (op1) != FLOAT)
11038 || ix86_use_fcomi_compare (code)))
11039 {
11040 op0 = force_reg (op_mode, op0);
11041 op1 = force_reg (op_mode, op1);
11042 }
11043 else
11044 {
11045 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11046 things around if they appear profitable, otherwise force op0
11047 into a register. */
11048
11049 if (standard_80387_constant_p (op0) == 0
11050 || (MEM_P (op0)
11051 && ! (standard_80387_constant_p (op1) == 0
11052 || MEM_P (op1))))
11053 {
11054 rtx tmp;
11055 tmp = op0, op0 = op1, op1 = tmp;
11056 code = swap_condition (code);
11057 }
11058
11059 if (!REG_P (op0))
11060 op0 = force_reg (op_mode, op0);
11061
11062 if (CONSTANT_P (op1))
11063 {
11064 int tmp = standard_80387_constant_p (op1);
11065 if (tmp == 0)
11066 op1 = validize_mem (force_const_mem (op_mode, op1));
11067 else if (tmp == 1)
11068 {
11069 if (TARGET_CMOVE)
11070 op1 = force_reg (op_mode, op1);
11071 }
11072 else
11073 op1 = force_reg (op_mode, op1);
11074 }
11075 }
11076
11077 /* Try to rearrange the comparison to make it cheaper. */
11078 if (ix86_fp_comparison_cost (code)
11079 > ix86_fp_comparison_cost (swap_condition (code))
11080 && (REG_P (op1) || !no_new_pseudos))
11081 {
11082 rtx tmp;
11083 tmp = op0, op0 = op1, op1 = tmp;
11084 code = swap_condition (code);
11085 if (!REG_P (op0))
11086 op0 = force_reg (op_mode, op0);
11087 }
11088
11089 *pop0 = op0;
11090 *pop1 = op1;
11091 return code;
11092 }
11093
11094 /* Convert comparison codes we use to represent FP comparison to integer
11095 code that will result in proper branch. Return UNKNOWN if no such code
11096 is available. */
11097
11098 enum rtx_code
11099 ix86_fp_compare_code_to_integer (enum rtx_code code)
11100 {
11101 switch (code)
11102 {
11103 case GT:
11104 return GTU;
11105 case GE:
11106 return GEU;
11107 case ORDERED:
11108 case UNORDERED:
11109 return code;
11110 break;
11111 case UNEQ:
11112 return EQ;
11113 break;
11114 case UNLT:
11115 return LTU;
11116 break;
11117 case UNLE:
11118 return LEU;
11119 break;
11120 case LTGT:
11121 return NE;
11122 break;
11123 default:
11124 return UNKNOWN;
11125 }
11126 }
11127
11128 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11129
11130 static rtx
11131 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11132 rtx *second_test, rtx *bypass_test)
11133 {
11134 enum machine_mode fpcmp_mode, intcmp_mode;
11135 rtx tmp, tmp2;
11136 int cost = ix86_fp_comparison_cost (code);
11137 enum rtx_code bypass_code, first_code, second_code;
11138
11139 fpcmp_mode = ix86_fp_compare_mode (code);
11140 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11141
11142 if (second_test)
11143 *second_test = NULL_RTX;
11144 if (bypass_test)
11145 *bypass_test = NULL_RTX;
11146
11147 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11148
11149 /* Do fcomi/sahf based test when profitable. */
11150 if ((TARGET_CMOVE || TARGET_SAHF)
11151 && (bypass_code == UNKNOWN || bypass_test)
11152 && (second_code == UNKNOWN || second_test)
11153 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11154 {
11155 if (TARGET_CMOVE)
11156 {
11157 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11158 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11159 tmp);
11160 emit_insn (tmp);
11161 }
11162 else
11163 {
11164 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11165 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11166 if (!scratch)
11167 scratch = gen_reg_rtx (HImode);
11168 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11169 emit_insn (gen_x86_sahf_1 (scratch));
11170 }
11171
11172 /* The FP codes work out to act like unsigned. */
11173 intcmp_mode = fpcmp_mode;
11174 code = first_code;
11175 if (bypass_code != UNKNOWN)
11176 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11177 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11178 const0_rtx);
11179 if (second_code != UNKNOWN)
11180 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11181 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11182 const0_rtx);
11183 }
11184 else
11185 {
11186 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11187 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11188 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11189 if (!scratch)
11190 scratch = gen_reg_rtx (HImode);
11191 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11192
11193 /* In the unordered case, we have to check C2 for NaN's, which
11194 doesn't happen to work out to anything nice combination-wise.
11195 So do some bit twiddling on the value we've got in AH to come
11196 up with an appropriate set of condition codes. */
11197
11198 intcmp_mode = CCNOmode;
11199 switch (code)
11200 {
11201 case GT:
11202 case UNGT:
11203 if (code == GT || !TARGET_IEEE_FP)
11204 {
11205 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11206 code = EQ;
11207 }
11208 else
11209 {
11210 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11211 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11212 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11213 intcmp_mode = CCmode;
11214 code = GEU;
11215 }
11216 break;
11217 case LT:
11218 case UNLT:
11219 if (code == LT && TARGET_IEEE_FP)
11220 {
11221 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11222 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11223 intcmp_mode = CCmode;
11224 code = EQ;
11225 }
11226 else
11227 {
11228 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11229 code = NE;
11230 }
11231 break;
11232 case GE:
11233 case UNGE:
11234 if (code == GE || !TARGET_IEEE_FP)
11235 {
11236 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11237 code = EQ;
11238 }
11239 else
11240 {
11241 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11242 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11243 GEN_INT (0x01)));
11244 code = NE;
11245 }
11246 break;
11247 case LE:
11248 case UNLE:
11249 if (code == LE && TARGET_IEEE_FP)
11250 {
11251 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11252 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11253 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11254 intcmp_mode = CCmode;
11255 code = LTU;
11256 }
11257 else
11258 {
11259 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11260 code = NE;
11261 }
11262 break;
11263 case EQ:
11264 case UNEQ:
11265 if (code == EQ && TARGET_IEEE_FP)
11266 {
11267 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11268 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11269 intcmp_mode = CCmode;
11270 code = EQ;
11271 }
11272 else
11273 {
11274 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11275 code = NE;
11276 break;
11277 }
11278 break;
11279 case NE:
11280 case LTGT:
11281 if (code == NE && TARGET_IEEE_FP)
11282 {
11283 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11284 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11285 GEN_INT (0x40)));
11286 code = NE;
11287 }
11288 else
11289 {
11290 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11291 code = EQ;
11292 }
11293 break;
11294
11295 case UNORDERED:
11296 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11297 code = NE;
11298 break;
11299 case ORDERED:
11300 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11301 code = EQ;
11302 break;
11303
11304 default:
11305 gcc_unreachable ();
11306 }
11307 }
11308
11309 /* Return the test that should be put into the flags user, i.e.
11310 the bcc, scc, or cmov instruction. */
11311 return gen_rtx_fmt_ee (code, VOIDmode,
11312 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11313 const0_rtx);
11314 }
11315
11316 rtx
11317 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11318 {
11319 rtx op0, op1, ret;
11320 op0 = ix86_compare_op0;
11321 op1 = ix86_compare_op1;
11322
11323 if (second_test)
11324 *second_test = NULL_RTX;
11325 if (bypass_test)
11326 *bypass_test = NULL_RTX;
11327
11328 if (ix86_compare_emitted)
11329 {
11330 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11331 ix86_compare_emitted = NULL_RTX;
11332 }
11333 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11334 {
11335 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11336 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11337 second_test, bypass_test);
11338 }
11339 else
11340 ret = ix86_expand_int_compare (code, op0, op1);
11341
11342 return ret;
11343 }
11344
11345 /* Return true if the CODE will result in nontrivial jump sequence. */
11346 bool
11347 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11348 {
11349 enum rtx_code bypass_code, first_code, second_code;
11350 if (!TARGET_CMOVE)
11351 return true;
11352 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11353 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11354 }
11355
11356 void
11357 ix86_expand_branch (enum rtx_code code, rtx label)
11358 {
11359 rtx tmp;
11360
11361 /* If we have emitted a compare insn, go straight to simple.
11362 ix86_expand_compare won't emit anything if ix86_compare_emitted
11363 is non NULL. */
11364 if (ix86_compare_emitted)
11365 goto simple;
11366
11367 switch (GET_MODE (ix86_compare_op0))
11368 {
11369 case QImode:
11370 case HImode:
11371 case SImode:
11372 simple:
11373 tmp = ix86_expand_compare (code, NULL, NULL);
11374 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11375 gen_rtx_LABEL_REF (VOIDmode, label),
11376 pc_rtx);
11377 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11378 return;
11379
11380 case SFmode:
11381 case DFmode:
11382 case XFmode:
11383 {
11384 rtvec vec;
11385 int use_fcomi;
11386 enum rtx_code bypass_code, first_code, second_code;
11387
11388 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11389 &ix86_compare_op1);
11390
11391 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11392
11393 /* Check whether we will use the natural sequence with one jump. If
11394 so, we can expand jump early. Otherwise delay expansion by
11395 creating compound insn to not confuse optimizers. */
11396 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11397 && TARGET_CMOVE)
11398 {
11399 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11400 gen_rtx_LABEL_REF (VOIDmode, label),
11401 pc_rtx, NULL_RTX, NULL_RTX);
11402 }
11403 else
11404 {
11405 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11406 ix86_compare_op0, ix86_compare_op1);
11407 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11408 gen_rtx_LABEL_REF (VOIDmode, label),
11409 pc_rtx);
11410 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11411
11412 use_fcomi = ix86_use_fcomi_compare (code);
11413 vec = rtvec_alloc (3 + !use_fcomi);
11414 RTVEC_ELT (vec, 0) = tmp;
11415 RTVEC_ELT (vec, 1)
11416 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11417 RTVEC_ELT (vec, 2)
11418 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11419 if (! use_fcomi)
11420 RTVEC_ELT (vec, 3)
11421 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11422
11423 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11424 }
11425 return;
11426 }
11427
11428 case DImode:
11429 if (TARGET_64BIT)
11430 goto simple;
11431 case TImode:
11432 /* Expand DImode branch into multiple compare+branch. */
11433 {
11434 rtx lo[2], hi[2], label2;
11435 enum rtx_code code1, code2, code3;
11436 enum machine_mode submode;
11437
11438 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11439 {
11440 tmp = ix86_compare_op0;
11441 ix86_compare_op0 = ix86_compare_op1;
11442 ix86_compare_op1 = tmp;
11443 code = swap_condition (code);
11444 }
11445 if (GET_MODE (ix86_compare_op0) == DImode)
11446 {
11447 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11448 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11449 submode = SImode;
11450 }
11451 else
11452 {
11453 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11454 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11455 submode = DImode;
11456 }
11457
11458 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11459 avoid two branches. This costs one extra insn, so disable when
11460 optimizing for size. */
11461
11462 if ((code == EQ || code == NE)
11463 && (!optimize_size
11464 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11465 {
11466 rtx xor0, xor1;
11467
11468 xor1 = hi[0];
11469 if (hi[1] != const0_rtx)
11470 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11471 NULL_RTX, 0, OPTAB_WIDEN);
11472
11473 xor0 = lo[0];
11474 if (lo[1] != const0_rtx)
11475 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11476 NULL_RTX, 0, OPTAB_WIDEN);
11477
11478 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11479 NULL_RTX, 0, OPTAB_WIDEN);
11480
11481 ix86_compare_op0 = tmp;
11482 ix86_compare_op1 = const0_rtx;
11483 ix86_expand_branch (code, label);
11484 return;
11485 }
11486
11487 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11488 op1 is a constant and the low word is zero, then we can just
11489 examine the high word. */
11490
11491 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11492 switch (code)
11493 {
11494 case LT: case LTU: case GE: case GEU:
11495 ix86_compare_op0 = hi[0];
11496 ix86_compare_op1 = hi[1];
11497 ix86_expand_branch (code, label);
11498 return;
11499 default:
11500 break;
11501 }
11502
11503 /* Otherwise, we need two or three jumps. */
11504
11505 label2 = gen_label_rtx ();
11506
11507 code1 = code;
11508 code2 = swap_condition (code);
11509 code3 = unsigned_condition (code);
11510
11511 switch (code)
11512 {
11513 case LT: case GT: case LTU: case GTU:
11514 break;
11515
11516 case LE: code1 = LT; code2 = GT; break;
11517 case GE: code1 = GT; code2 = LT; break;
11518 case LEU: code1 = LTU; code2 = GTU; break;
11519 case GEU: code1 = GTU; code2 = LTU; break;
11520
11521 case EQ: code1 = UNKNOWN; code2 = NE; break;
11522 case NE: code2 = UNKNOWN; break;
11523
11524 default:
11525 gcc_unreachable ();
11526 }
11527
11528 /*
11529 * a < b =>
11530 * if (hi(a) < hi(b)) goto true;
11531 * if (hi(a) > hi(b)) goto false;
11532 * if (lo(a) < lo(b)) goto true;
11533 * false:
11534 */
11535
11536 ix86_compare_op0 = hi[0];
11537 ix86_compare_op1 = hi[1];
11538
11539 if (code1 != UNKNOWN)
11540 ix86_expand_branch (code1, label);
11541 if (code2 != UNKNOWN)
11542 ix86_expand_branch (code2, label2);
11543
11544 ix86_compare_op0 = lo[0];
11545 ix86_compare_op1 = lo[1];
11546 ix86_expand_branch (code3, label);
11547
11548 if (code2 != UNKNOWN)
11549 emit_label (label2);
11550 return;
11551 }
11552
11553 default:
11554 gcc_unreachable ();
11555 }
11556 }
11557
11558 /* Split branch based on floating point condition. */
11559 void
11560 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11561 rtx target1, rtx target2, rtx tmp, rtx pushed)
11562 {
11563 rtx second, bypass;
11564 rtx label = NULL_RTX;
11565 rtx condition;
11566 int bypass_probability = -1, second_probability = -1, probability = -1;
11567 rtx i;
11568
11569 if (target2 != pc_rtx)
11570 {
11571 rtx tmp = target2;
11572 code = reverse_condition_maybe_unordered (code);
11573 target2 = target1;
11574 target1 = tmp;
11575 }
11576
11577 condition = ix86_expand_fp_compare (code, op1, op2,
11578 tmp, &second, &bypass);
11579
11580 /* Remove pushed operand from stack. */
11581 if (pushed)
11582 ix86_free_from_memory (GET_MODE (pushed));
11583
11584 if (split_branch_probability >= 0)
11585 {
11586 /* Distribute the probabilities across the jumps.
11587 Assume the BYPASS and SECOND to be always test
11588 for UNORDERED. */
11589 probability = split_branch_probability;
11590
11591 /* Value of 1 is low enough to make no need for probability
11592 to be updated. Later we may run some experiments and see
11593 if unordered values are more frequent in practice. */
11594 if (bypass)
11595 bypass_probability = 1;
11596 if (second)
11597 second_probability = 1;
11598 }
11599 if (bypass != NULL_RTX)
11600 {
11601 label = gen_label_rtx ();
11602 i = emit_jump_insn (gen_rtx_SET
11603 (VOIDmode, pc_rtx,
11604 gen_rtx_IF_THEN_ELSE (VOIDmode,
11605 bypass,
11606 gen_rtx_LABEL_REF (VOIDmode,
11607 label),
11608 pc_rtx)));
11609 if (bypass_probability >= 0)
11610 REG_NOTES (i)
11611 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11612 GEN_INT (bypass_probability),
11613 REG_NOTES (i));
11614 }
11615 i = emit_jump_insn (gen_rtx_SET
11616 (VOIDmode, pc_rtx,
11617 gen_rtx_IF_THEN_ELSE (VOIDmode,
11618 condition, target1, target2)));
11619 if (probability >= 0)
11620 REG_NOTES (i)
11621 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11622 GEN_INT (probability),
11623 REG_NOTES (i));
11624 if (second != NULL_RTX)
11625 {
11626 i = emit_jump_insn (gen_rtx_SET
11627 (VOIDmode, pc_rtx,
11628 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11629 target2)));
11630 if (second_probability >= 0)
11631 REG_NOTES (i)
11632 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11633 GEN_INT (second_probability),
11634 REG_NOTES (i));
11635 }
11636 if (label != NULL_RTX)
11637 emit_label (label);
11638 }
11639
11640 int
11641 ix86_expand_setcc (enum rtx_code code, rtx dest)
11642 {
11643 rtx ret, tmp, tmpreg, equiv;
11644 rtx second_test, bypass_test;
11645
11646 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11647 return 0; /* FAIL */
11648
11649 gcc_assert (GET_MODE (dest) == QImode);
11650
11651 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11652 PUT_MODE (ret, QImode);
11653
11654 tmp = dest;
11655 tmpreg = dest;
11656
11657 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11658 if (bypass_test || second_test)
11659 {
11660 rtx test = second_test;
11661 int bypass = 0;
11662 rtx tmp2 = gen_reg_rtx (QImode);
11663 if (bypass_test)
11664 {
11665 gcc_assert (!second_test);
11666 test = bypass_test;
11667 bypass = 1;
11668 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11669 }
11670 PUT_MODE (test, QImode);
11671 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11672
11673 if (bypass)
11674 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11675 else
11676 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11677 }
11678
11679 /* Attach a REG_EQUAL note describing the comparison result. */
11680 if (ix86_compare_op0 && ix86_compare_op1)
11681 {
11682 equiv = simplify_gen_relational (code, QImode,
11683 GET_MODE (ix86_compare_op0),
11684 ix86_compare_op0, ix86_compare_op1);
11685 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11686 }
11687
11688 return 1; /* DONE */
11689 }
11690
11691 /* Expand comparison setting or clearing carry flag. Return true when
11692 successful and set pop for the operation. */
11693 static bool
11694 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11695 {
11696 enum machine_mode mode =
11697 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11698
11699 /* Do not handle DImode compares that go through special path.
11700 Also we can't deal with FP compares yet. This is possible to add. */
11701 if (mode == (TARGET_64BIT ? TImode : DImode))
11702 return false;
11703
11704 if (SCALAR_FLOAT_MODE_P (mode))
11705 {
11706 rtx second_test = NULL, bypass_test = NULL;
11707 rtx compare_op, compare_seq;
11708
11709 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11710
11711 /* Shortcut: following common codes never translate
11712 into carry flag compares. */
11713 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11714 || code == ORDERED || code == UNORDERED)
11715 return false;
11716
11717 /* These comparisons require zero flag; swap operands so they won't. */
11718 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11719 && !TARGET_IEEE_FP)
11720 {
11721 rtx tmp = op0;
11722 op0 = op1;
11723 op1 = tmp;
11724 code = swap_condition (code);
11725 }
11726
11727 /* Try to expand the comparison and verify that we end up with carry flag
11728 based comparison. This is fails to be true only when we decide to expand
11729 comparison using arithmetic that is not too common scenario. */
11730 start_sequence ();
11731 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11732 &second_test, &bypass_test);
11733 compare_seq = get_insns ();
11734 end_sequence ();
11735
11736 if (second_test || bypass_test)
11737 return false;
11738 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11739 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11740 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11741 else
11742 code = GET_CODE (compare_op);
11743 if (code != LTU && code != GEU)
11744 return false;
11745 emit_insn (compare_seq);
11746 *pop = compare_op;
11747 return true;
11748 }
11749 if (!INTEGRAL_MODE_P (mode))
11750 return false;
11751 switch (code)
11752 {
11753 case LTU:
11754 case GEU:
11755 break;
11756
11757 /* Convert a==0 into (unsigned)a<1. */
11758 case EQ:
11759 case NE:
11760 if (op1 != const0_rtx)
11761 return false;
11762 op1 = const1_rtx;
11763 code = (code == EQ ? LTU : GEU);
11764 break;
11765
11766 /* Convert a>b into b<a or a>=b-1. */
11767 case GTU:
11768 case LEU:
11769 if (CONST_INT_P (op1))
11770 {
11771 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11772 /* Bail out on overflow. We still can swap operands but that
11773 would force loading of the constant into register. */
11774 if (op1 == const0_rtx
11775 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11776 return false;
11777 code = (code == GTU ? GEU : LTU);
11778 }
11779 else
11780 {
11781 rtx tmp = op1;
11782 op1 = op0;
11783 op0 = tmp;
11784 code = (code == GTU ? LTU : GEU);
11785 }
11786 break;
11787
11788 /* Convert a>=0 into (unsigned)a<0x80000000. */
11789 case LT:
11790 case GE:
11791 if (mode == DImode || op1 != const0_rtx)
11792 return false;
11793 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11794 code = (code == LT ? GEU : LTU);
11795 break;
11796 case LE:
11797 case GT:
11798 if (mode == DImode || op1 != constm1_rtx)
11799 return false;
11800 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11801 code = (code == LE ? GEU : LTU);
11802 break;
11803
11804 default:
11805 return false;
11806 }
11807 /* Swapping operands may cause constant to appear as first operand. */
11808 if (!nonimmediate_operand (op0, VOIDmode))
11809 {
11810 if (no_new_pseudos)
11811 return false;
11812 op0 = force_reg (mode, op0);
11813 }
11814 ix86_compare_op0 = op0;
11815 ix86_compare_op1 = op1;
11816 *pop = ix86_expand_compare (code, NULL, NULL);
11817 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11818 return true;
11819 }
11820
11821 int
11822 ix86_expand_int_movcc (rtx operands[])
11823 {
11824 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11825 rtx compare_seq, compare_op;
11826 rtx second_test, bypass_test;
11827 enum machine_mode mode = GET_MODE (operands[0]);
11828 bool sign_bit_compare_p = false;;
11829
11830 start_sequence ();
11831 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11832 compare_seq = get_insns ();
11833 end_sequence ();
11834
11835 compare_code = GET_CODE (compare_op);
11836
11837 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11838 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11839 sign_bit_compare_p = true;
11840
11841 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11842 HImode insns, we'd be swallowed in word prefix ops. */
11843
11844 if ((mode != HImode || TARGET_FAST_PREFIX)
11845 && (mode != (TARGET_64BIT ? TImode : DImode))
11846 && CONST_INT_P (operands[2])
11847 && CONST_INT_P (operands[3]))
11848 {
11849 rtx out = operands[0];
11850 HOST_WIDE_INT ct = INTVAL (operands[2]);
11851 HOST_WIDE_INT cf = INTVAL (operands[3]);
11852 HOST_WIDE_INT diff;
11853
11854 diff = ct - cf;
11855 /* Sign bit compares are better done using shifts than we do by using
11856 sbb. */
11857 if (sign_bit_compare_p
11858 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11859 ix86_compare_op1, &compare_op))
11860 {
11861 /* Detect overlap between destination and compare sources. */
11862 rtx tmp = out;
11863
11864 if (!sign_bit_compare_p)
11865 {
11866 bool fpcmp = false;
11867
11868 compare_code = GET_CODE (compare_op);
11869
11870 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11871 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11872 {
11873 fpcmp = true;
11874 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11875 }
11876
11877 /* To simplify rest of code, restrict to the GEU case. */
11878 if (compare_code == LTU)
11879 {
11880 HOST_WIDE_INT tmp = ct;
11881 ct = cf;
11882 cf = tmp;
11883 compare_code = reverse_condition (compare_code);
11884 code = reverse_condition (code);
11885 }
11886 else
11887 {
11888 if (fpcmp)
11889 PUT_CODE (compare_op,
11890 reverse_condition_maybe_unordered
11891 (GET_CODE (compare_op)));
11892 else
11893 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11894 }
11895 diff = ct - cf;
11896
11897 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11898 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11899 tmp = gen_reg_rtx (mode);
11900
11901 if (mode == DImode)
11902 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11903 else
11904 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11905 }
11906 else
11907 {
11908 if (code == GT || code == GE)
11909 code = reverse_condition (code);
11910 else
11911 {
11912 HOST_WIDE_INT tmp = ct;
11913 ct = cf;
11914 cf = tmp;
11915 diff = ct - cf;
11916 }
11917 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11918 ix86_compare_op1, VOIDmode, 0, -1);
11919 }
11920
11921 if (diff == 1)
11922 {
11923 /*
11924 * cmpl op0,op1
11925 * sbbl dest,dest
11926 * [addl dest, ct]
11927 *
11928 * Size 5 - 8.
11929 */
11930 if (ct)
11931 tmp = expand_simple_binop (mode, PLUS,
11932 tmp, GEN_INT (ct),
11933 copy_rtx (tmp), 1, OPTAB_DIRECT);
11934 }
11935 else if (cf == -1)
11936 {
11937 /*
11938 * cmpl op0,op1
11939 * sbbl dest,dest
11940 * orl $ct, dest
11941 *
11942 * Size 8.
11943 */
11944 tmp = expand_simple_binop (mode, IOR,
11945 tmp, GEN_INT (ct),
11946 copy_rtx (tmp), 1, OPTAB_DIRECT);
11947 }
11948 else if (diff == -1 && ct)
11949 {
11950 /*
11951 * cmpl op0,op1
11952 * sbbl dest,dest
11953 * notl dest
11954 * [addl dest, cf]
11955 *
11956 * Size 8 - 11.
11957 */
11958 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11959 if (cf)
11960 tmp = expand_simple_binop (mode, PLUS,
11961 copy_rtx (tmp), GEN_INT (cf),
11962 copy_rtx (tmp), 1, OPTAB_DIRECT);
11963 }
11964 else
11965 {
11966 /*
11967 * cmpl op0,op1
11968 * sbbl dest,dest
11969 * [notl dest]
11970 * andl cf - ct, dest
11971 * [addl dest, ct]
11972 *
11973 * Size 8 - 11.
11974 */
11975
11976 if (cf == 0)
11977 {
11978 cf = ct;
11979 ct = 0;
11980 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11981 }
11982
11983 tmp = expand_simple_binop (mode, AND,
11984 copy_rtx (tmp),
11985 gen_int_mode (cf - ct, mode),
11986 copy_rtx (tmp), 1, OPTAB_DIRECT);
11987 if (ct)
11988 tmp = expand_simple_binop (mode, PLUS,
11989 copy_rtx (tmp), GEN_INT (ct),
11990 copy_rtx (tmp), 1, OPTAB_DIRECT);
11991 }
11992
11993 if (!rtx_equal_p (tmp, out))
11994 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11995
11996 return 1; /* DONE */
11997 }
11998
11999 if (diff < 0)
12000 {
12001 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12002
12003 HOST_WIDE_INT tmp;
12004 tmp = ct, ct = cf, cf = tmp;
12005 diff = -diff;
12006
12007 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12008 {
12009 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12010
12011 /* We may be reversing unordered compare to normal compare, that
12012 is not valid in general (we may convert non-trapping condition
12013 to trapping one), however on i386 we currently emit all
12014 comparisons unordered. */
12015 compare_code = reverse_condition_maybe_unordered (compare_code);
12016 code = reverse_condition_maybe_unordered (code);
12017 }
12018 else
12019 {
12020 compare_code = reverse_condition (compare_code);
12021 code = reverse_condition (code);
12022 }
12023 }
12024
12025 compare_code = UNKNOWN;
12026 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12027 && CONST_INT_P (ix86_compare_op1))
12028 {
12029 if (ix86_compare_op1 == const0_rtx
12030 && (code == LT || code == GE))
12031 compare_code = code;
12032 else if (ix86_compare_op1 == constm1_rtx)
12033 {
12034 if (code == LE)
12035 compare_code = LT;
12036 else if (code == GT)
12037 compare_code = GE;
12038 }
12039 }
12040
12041 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12042 if (compare_code != UNKNOWN
12043 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12044 && (cf == -1 || ct == -1))
12045 {
12046 /* If lea code below could be used, only optimize
12047 if it results in a 2 insn sequence. */
12048
12049 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12050 || diff == 3 || diff == 5 || diff == 9)
12051 || (compare_code == LT && ct == -1)
12052 || (compare_code == GE && cf == -1))
12053 {
12054 /*
12055 * notl op1 (if necessary)
12056 * sarl $31, op1
12057 * orl cf, op1
12058 */
12059 if (ct != -1)
12060 {
12061 cf = ct;
12062 ct = -1;
12063 code = reverse_condition (code);
12064 }
12065
12066 out = emit_store_flag (out, code, ix86_compare_op0,
12067 ix86_compare_op1, VOIDmode, 0, -1);
12068
12069 out = expand_simple_binop (mode, IOR,
12070 out, GEN_INT (cf),
12071 out, 1, OPTAB_DIRECT);
12072 if (out != operands[0])
12073 emit_move_insn (operands[0], out);
12074
12075 return 1; /* DONE */
12076 }
12077 }
12078
12079
12080 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12081 || diff == 3 || diff == 5 || diff == 9)
12082 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12083 && (mode != DImode
12084 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12085 {
12086 /*
12087 * xorl dest,dest
12088 * cmpl op1,op2
12089 * setcc dest
12090 * lea cf(dest*(ct-cf)),dest
12091 *
12092 * Size 14.
12093 *
12094 * This also catches the degenerate setcc-only case.
12095 */
12096
12097 rtx tmp;
12098 int nops;
12099
12100 out = emit_store_flag (out, code, ix86_compare_op0,
12101 ix86_compare_op1, VOIDmode, 0, 1);
12102
12103 nops = 0;
12104 /* On x86_64 the lea instruction operates on Pmode, so we need
12105 to get arithmetics done in proper mode to match. */
12106 if (diff == 1)
12107 tmp = copy_rtx (out);
12108 else
12109 {
12110 rtx out1;
12111 out1 = copy_rtx (out);
12112 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12113 nops++;
12114 if (diff & 1)
12115 {
12116 tmp = gen_rtx_PLUS (mode, tmp, out1);
12117 nops++;
12118 }
12119 }
12120 if (cf != 0)
12121 {
12122 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12123 nops++;
12124 }
12125 if (!rtx_equal_p (tmp, out))
12126 {
12127 if (nops == 1)
12128 out = force_operand (tmp, copy_rtx (out));
12129 else
12130 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12131 }
12132 if (!rtx_equal_p (out, operands[0]))
12133 emit_move_insn (operands[0], copy_rtx (out));
12134
12135 return 1; /* DONE */
12136 }
12137
12138 /*
12139 * General case: Jumpful:
12140 * xorl dest,dest cmpl op1, op2
12141 * cmpl op1, op2 movl ct, dest
12142 * setcc dest jcc 1f
12143 * decl dest movl cf, dest
12144 * andl (cf-ct),dest 1:
12145 * addl ct,dest
12146 *
12147 * Size 20. Size 14.
12148 *
12149 * This is reasonably steep, but branch mispredict costs are
12150 * high on modern cpus, so consider failing only if optimizing
12151 * for space.
12152 */
12153
12154 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12155 && BRANCH_COST >= 2)
12156 {
12157 if (cf == 0)
12158 {
12159 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12160
12161 cf = ct;
12162 ct = 0;
12163
12164 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12165 {
12166 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12167
12168 /* We may be reversing unordered compare to normal compare,
12169 that is not valid in general (we may convert non-trapping
12170 condition to trapping one), however on i386 we currently
12171 emit all comparisons unordered. */
12172 code = reverse_condition_maybe_unordered (code);
12173 }
12174 else
12175 {
12176 code = reverse_condition (code);
12177 if (compare_code != UNKNOWN)
12178 compare_code = reverse_condition (compare_code);
12179 }
12180 }
12181
12182 if (compare_code != UNKNOWN)
12183 {
12184 /* notl op1 (if needed)
12185 sarl $31, op1
12186 andl (cf-ct), op1
12187 addl ct, op1
12188
12189 For x < 0 (resp. x <= -1) there will be no notl,
12190 so if possible swap the constants to get rid of the
12191 complement.
12192 True/false will be -1/0 while code below (store flag
12193 followed by decrement) is 0/-1, so the constants need
12194 to be exchanged once more. */
12195
12196 if (compare_code == GE || !cf)
12197 {
12198 code = reverse_condition (code);
12199 compare_code = LT;
12200 }
12201 else
12202 {
12203 HOST_WIDE_INT tmp = cf;
12204 cf = ct;
12205 ct = tmp;
12206 }
12207
12208 out = emit_store_flag (out, code, ix86_compare_op0,
12209 ix86_compare_op1, VOIDmode, 0, -1);
12210 }
12211 else
12212 {
12213 out = emit_store_flag (out, code, ix86_compare_op0,
12214 ix86_compare_op1, VOIDmode, 0, 1);
12215
12216 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12217 copy_rtx (out), 1, OPTAB_DIRECT);
12218 }
12219
12220 out = expand_simple_binop (mode, AND, copy_rtx (out),
12221 gen_int_mode (cf - ct, mode),
12222 copy_rtx (out), 1, OPTAB_DIRECT);
12223 if (ct)
12224 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12225 copy_rtx (out), 1, OPTAB_DIRECT);
12226 if (!rtx_equal_p (out, operands[0]))
12227 emit_move_insn (operands[0], copy_rtx (out));
12228
12229 return 1; /* DONE */
12230 }
12231 }
12232
12233 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12234 {
12235 /* Try a few things more with specific constants and a variable. */
12236
12237 optab op;
12238 rtx var, orig_out, out, tmp;
12239
12240 if (BRANCH_COST <= 2)
12241 return 0; /* FAIL */
12242
12243 /* If one of the two operands is an interesting constant, load a
12244 constant with the above and mask it in with a logical operation. */
12245
12246 if (CONST_INT_P (operands[2]))
12247 {
12248 var = operands[3];
12249 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12250 operands[3] = constm1_rtx, op = and_optab;
12251 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12252 operands[3] = const0_rtx, op = ior_optab;
12253 else
12254 return 0; /* FAIL */
12255 }
12256 else if (CONST_INT_P (operands[3]))
12257 {
12258 var = operands[2];
12259 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12260 operands[2] = constm1_rtx, op = and_optab;
12261 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12262 operands[2] = const0_rtx, op = ior_optab;
12263 else
12264 return 0; /* FAIL */
12265 }
12266 else
12267 return 0; /* FAIL */
12268
12269 orig_out = operands[0];
12270 tmp = gen_reg_rtx (mode);
12271 operands[0] = tmp;
12272
12273 /* Recurse to get the constant loaded. */
12274 if (ix86_expand_int_movcc (operands) == 0)
12275 return 0; /* FAIL */
12276
12277 /* Mask in the interesting variable. */
12278 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12279 OPTAB_WIDEN);
12280 if (!rtx_equal_p (out, orig_out))
12281 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12282
12283 return 1; /* DONE */
12284 }
12285
12286 /*
12287 * For comparison with above,
12288 *
12289 * movl cf,dest
12290 * movl ct,tmp
12291 * cmpl op1,op2
12292 * cmovcc tmp,dest
12293 *
12294 * Size 15.
12295 */
12296
12297 if (! nonimmediate_operand (operands[2], mode))
12298 operands[2] = force_reg (mode, operands[2]);
12299 if (! nonimmediate_operand (operands[3], mode))
12300 operands[3] = force_reg (mode, operands[3]);
12301
12302 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12303 {
12304 rtx tmp = gen_reg_rtx (mode);
12305 emit_move_insn (tmp, operands[3]);
12306 operands[3] = tmp;
12307 }
12308 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12309 {
12310 rtx tmp = gen_reg_rtx (mode);
12311 emit_move_insn (tmp, operands[2]);
12312 operands[2] = tmp;
12313 }
12314
12315 if (! register_operand (operands[2], VOIDmode)
12316 && (mode == QImode
12317 || ! register_operand (operands[3], VOIDmode)))
12318 operands[2] = force_reg (mode, operands[2]);
12319
12320 if (mode == QImode
12321 && ! register_operand (operands[3], VOIDmode))
12322 operands[3] = force_reg (mode, operands[3]);
12323
12324 emit_insn (compare_seq);
12325 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12326 gen_rtx_IF_THEN_ELSE (mode,
12327 compare_op, operands[2],
12328 operands[3])));
12329 if (bypass_test)
12330 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12331 gen_rtx_IF_THEN_ELSE (mode,
12332 bypass_test,
12333 copy_rtx (operands[3]),
12334 copy_rtx (operands[0]))));
12335 if (second_test)
12336 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12337 gen_rtx_IF_THEN_ELSE (mode,
12338 second_test,
12339 copy_rtx (operands[2]),
12340 copy_rtx (operands[0]))));
12341
12342 return 1; /* DONE */
12343 }
12344
12345 /* Swap, force into registers, or otherwise massage the two operands
12346 to an sse comparison with a mask result. Thus we differ a bit from
12347 ix86_prepare_fp_compare_args which expects to produce a flags result.
12348
12349 The DEST operand exists to help determine whether to commute commutative
12350 operators. The POP0/POP1 operands are updated in place. The new
12351 comparison code is returned, or UNKNOWN if not implementable. */
12352
12353 static enum rtx_code
12354 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12355 rtx *pop0, rtx *pop1)
12356 {
12357 rtx tmp;
12358
12359 switch (code)
12360 {
12361 case LTGT:
12362 case UNEQ:
12363 /* We have no LTGT as an operator. We could implement it with
12364 NE & ORDERED, but this requires an extra temporary. It's
12365 not clear that it's worth it. */
12366 return UNKNOWN;
12367
12368 case LT:
12369 case LE:
12370 case UNGT:
12371 case UNGE:
12372 /* These are supported directly. */
12373 break;
12374
12375 case EQ:
12376 case NE:
12377 case UNORDERED:
12378 case ORDERED:
12379 /* For commutative operators, try to canonicalize the destination
12380 operand to be first in the comparison - this helps reload to
12381 avoid extra moves. */
12382 if (!dest || !rtx_equal_p (dest, *pop1))
12383 break;
12384 /* FALLTHRU */
12385
12386 case GE:
12387 case GT:
12388 case UNLE:
12389 case UNLT:
12390 /* These are not supported directly. Swap the comparison operands
12391 to transform into something that is supported. */
12392 tmp = *pop0;
12393 *pop0 = *pop1;
12394 *pop1 = tmp;
12395 code = swap_condition (code);
12396 break;
12397
12398 default:
12399 gcc_unreachable ();
12400 }
12401
12402 return code;
12403 }
12404
12405 /* Detect conditional moves that exactly match min/max operational
12406 semantics. Note that this is IEEE safe, as long as we don't
12407 interchange the operands.
12408
12409 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12410 and TRUE if the operation is successful and instructions are emitted. */
12411
12412 static bool
12413 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12414 rtx cmp_op1, rtx if_true, rtx if_false)
12415 {
12416 enum machine_mode mode;
12417 bool is_min;
12418 rtx tmp;
12419
12420 if (code == LT)
12421 ;
12422 else if (code == UNGE)
12423 {
12424 tmp = if_true;
12425 if_true = if_false;
12426 if_false = tmp;
12427 }
12428 else
12429 return false;
12430
12431 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12432 is_min = true;
12433 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12434 is_min = false;
12435 else
12436 return false;
12437
12438 mode = GET_MODE (dest);
12439
12440 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12441 but MODE may be a vector mode and thus not appropriate. */
12442 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12443 {
12444 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12445 rtvec v;
12446
12447 if_true = force_reg (mode, if_true);
12448 v = gen_rtvec (2, if_true, if_false);
12449 tmp = gen_rtx_UNSPEC (mode, v, u);
12450 }
12451 else
12452 {
12453 code = is_min ? SMIN : SMAX;
12454 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12455 }
12456
12457 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12458 return true;
12459 }
12460
12461 /* Expand an sse vector comparison. Return the register with the result. */
12462
12463 static rtx
12464 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12465 rtx op_true, rtx op_false)
12466 {
12467 enum machine_mode mode = GET_MODE (dest);
12468 rtx x;
12469
12470 cmp_op0 = force_reg (mode, cmp_op0);
12471 if (!nonimmediate_operand (cmp_op1, mode))
12472 cmp_op1 = force_reg (mode, cmp_op1);
12473
12474 if (optimize
12475 || reg_overlap_mentioned_p (dest, op_true)
12476 || reg_overlap_mentioned_p (dest, op_false))
12477 dest = gen_reg_rtx (mode);
12478
12479 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12480 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12481
12482 return dest;
12483 }
12484
12485 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12486 operations. This is used for both scalar and vector conditional moves. */
12487
12488 static void
12489 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12490 {
12491 enum machine_mode mode = GET_MODE (dest);
12492 rtx t2, t3, x;
12493
12494 if (op_false == CONST0_RTX (mode))
12495 {
12496 op_true = force_reg (mode, op_true);
12497 x = gen_rtx_AND (mode, cmp, op_true);
12498 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12499 }
12500 else if (op_true == CONST0_RTX (mode))
12501 {
12502 op_false = force_reg (mode, op_false);
12503 x = gen_rtx_NOT (mode, cmp);
12504 x = gen_rtx_AND (mode, x, op_false);
12505 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12506 }
12507 else
12508 {
12509 op_true = force_reg (mode, op_true);
12510 op_false = force_reg (mode, op_false);
12511
12512 t2 = gen_reg_rtx (mode);
12513 if (optimize)
12514 t3 = gen_reg_rtx (mode);
12515 else
12516 t3 = dest;
12517
12518 x = gen_rtx_AND (mode, op_true, cmp);
12519 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12520
12521 x = gen_rtx_NOT (mode, cmp);
12522 x = gen_rtx_AND (mode, x, op_false);
12523 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12524
12525 x = gen_rtx_IOR (mode, t3, t2);
12526 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12527 }
12528 }
12529
12530 /* Expand a floating-point conditional move. Return true if successful. */
12531
12532 int
12533 ix86_expand_fp_movcc (rtx operands[])
12534 {
12535 enum machine_mode mode = GET_MODE (operands[0]);
12536 enum rtx_code code = GET_CODE (operands[1]);
12537 rtx tmp, compare_op, second_test, bypass_test;
12538
12539 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12540 {
12541 enum machine_mode cmode;
12542
12543 /* Since we've no cmove for sse registers, don't force bad register
12544 allocation just to gain access to it. Deny movcc when the
12545 comparison mode doesn't match the move mode. */
12546 cmode = GET_MODE (ix86_compare_op0);
12547 if (cmode == VOIDmode)
12548 cmode = GET_MODE (ix86_compare_op1);
12549 if (cmode != mode)
12550 return 0;
12551
12552 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12553 &ix86_compare_op0,
12554 &ix86_compare_op1);
12555 if (code == UNKNOWN)
12556 return 0;
12557
12558 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12559 ix86_compare_op1, operands[2],
12560 operands[3]))
12561 return 1;
12562
12563 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12564 ix86_compare_op1, operands[2], operands[3]);
12565 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12566 return 1;
12567 }
12568
12569 /* The floating point conditional move instructions don't directly
12570 support conditions resulting from a signed integer comparison. */
12571
12572 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12573
12574 /* The floating point conditional move instructions don't directly
12575 support signed integer comparisons. */
12576
12577 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12578 {
12579 gcc_assert (!second_test && !bypass_test);
12580 tmp = gen_reg_rtx (QImode);
12581 ix86_expand_setcc (code, tmp);
12582 code = NE;
12583 ix86_compare_op0 = tmp;
12584 ix86_compare_op1 = const0_rtx;
12585 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12586 }
12587 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12588 {
12589 tmp = gen_reg_rtx (mode);
12590 emit_move_insn (tmp, operands[3]);
12591 operands[3] = tmp;
12592 }
12593 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12594 {
12595 tmp = gen_reg_rtx (mode);
12596 emit_move_insn (tmp, operands[2]);
12597 operands[2] = tmp;
12598 }
12599
12600 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12601 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12602 operands[2], operands[3])));
12603 if (bypass_test)
12604 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12605 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12606 operands[3], operands[0])));
12607 if (second_test)
12608 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12609 gen_rtx_IF_THEN_ELSE (mode, second_test,
12610 operands[2], operands[0])));
12611
12612 return 1;
12613 }
12614
12615 /* Expand a floating-point vector conditional move; a vcond operation
12616 rather than a movcc operation. */
12617
12618 bool
12619 ix86_expand_fp_vcond (rtx operands[])
12620 {
12621 enum rtx_code code = GET_CODE (operands[3]);
12622 rtx cmp;
12623
12624 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12625 &operands[4], &operands[5]);
12626 if (code == UNKNOWN)
12627 return false;
12628
12629 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12630 operands[5], operands[1], operands[2]))
12631 return true;
12632
12633 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12634 operands[1], operands[2]);
12635 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12636 return true;
12637 }
12638
12639 /* Expand a signed integral vector conditional move. */
12640
12641 bool
12642 ix86_expand_int_vcond (rtx operands[])
12643 {
12644 enum machine_mode mode = GET_MODE (operands[0]);
12645 enum rtx_code code = GET_CODE (operands[3]);
12646 bool negate = false;
12647 rtx x, cop0, cop1;
12648
12649 cop0 = operands[4];
12650 cop1 = operands[5];
12651
12652 /* Canonicalize the comparison to EQ, GT, GTU. */
12653 switch (code)
12654 {
12655 case EQ:
12656 case GT:
12657 case GTU:
12658 break;
12659
12660 case NE:
12661 case LE:
12662 case LEU:
12663 code = reverse_condition (code);
12664 negate = true;
12665 break;
12666
12667 case GE:
12668 case GEU:
12669 code = reverse_condition (code);
12670 negate = true;
12671 /* FALLTHRU */
12672
12673 case LT:
12674 case LTU:
12675 code = swap_condition (code);
12676 x = cop0, cop0 = cop1, cop1 = x;
12677 break;
12678
12679 default:
12680 gcc_unreachable ();
12681 }
12682
12683 /* Unsigned parallel compare is not supported by the hardware. Play some
12684 tricks to turn this into a signed comparison against 0. */
12685 if (code == GTU)
12686 {
12687 cop0 = force_reg (mode, cop0);
12688
12689 switch (mode)
12690 {
12691 case V4SImode:
12692 {
12693 rtx t1, t2, mask;
12694
12695 /* Perform a parallel modulo subtraction. */
12696 t1 = gen_reg_rtx (mode);
12697 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12698
12699 /* Extract the original sign bit of op0. */
12700 mask = GEN_INT (-0x80000000);
12701 mask = gen_rtx_CONST_VECTOR (mode,
12702 gen_rtvec (4, mask, mask, mask, mask));
12703 mask = force_reg (mode, mask);
12704 t2 = gen_reg_rtx (mode);
12705 emit_insn (gen_andv4si3 (t2, cop0, mask));
12706
12707 /* XOR it back into the result of the subtraction. This results
12708 in the sign bit set iff we saw unsigned underflow. */
12709 x = gen_reg_rtx (mode);
12710 emit_insn (gen_xorv4si3 (x, t1, t2));
12711
12712 code = GT;
12713 }
12714 break;
12715
12716 case V16QImode:
12717 case V8HImode:
12718 /* Perform a parallel unsigned saturating subtraction. */
12719 x = gen_reg_rtx (mode);
12720 emit_insn (gen_rtx_SET (VOIDmode, x,
12721 gen_rtx_US_MINUS (mode, cop0, cop1)));
12722
12723 code = EQ;
12724 negate = !negate;
12725 break;
12726
12727 default:
12728 gcc_unreachable ();
12729 }
12730
12731 cop0 = x;
12732 cop1 = CONST0_RTX (mode);
12733 }
12734
12735 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12736 operands[1+negate], operands[2-negate]);
12737
12738 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12739 operands[2-negate]);
12740 return true;
12741 }
12742
12743 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12744 true if we should do zero extension, else sign extension. HIGH_P is
12745 true if we want the N/2 high elements, else the low elements. */
12746
12747 void
12748 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12749 {
12750 enum machine_mode imode = GET_MODE (operands[1]);
12751 rtx (*unpack)(rtx, rtx, rtx);
12752 rtx se, dest;
12753
12754 switch (imode)
12755 {
12756 case V16QImode:
12757 if (high_p)
12758 unpack = gen_vec_interleave_highv16qi;
12759 else
12760 unpack = gen_vec_interleave_lowv16qi;
12761 break;
12762 case V8HImode:
12763 if (high_p)
12764 unpack = gen_vec_interleave_highv8hi;
12765 else
12766 unpack = gen_vec_interleave_lowv8hi;
12767 break;
12768 case V4SImode:
12769 if (high_p)
12770 unpack = gen_vec_interleave_highv4si;
12771 else
12772 unpack = gen_vec_interleave_lowv4si;
12773 break;
12774 default:
12775 gcc_unreachable ();
12776 }
12777
12778 dest = gen_lowpart (imode, operands[0]);
12779
12780 if (unsigned_p)
12781 se = force_reg (imode, CONST0_RTX (imode));
12782 else
12783 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12784 operands[1], pc_rtx, pc_rtx);
12785
12786 emit_insn (unpack (dest, operands[1], se));
12787 }
12788
12789 /* Expand conditional increment or decrement using adb/sbb instructions.
12790 The default case using setcc followed by the conditional move can be
12791 done by generic code. */
12792 int
12793 ix86_expand_int_addcc (rtx operands[])
12794 {
12795 enum rtx_code code = GET_CODE (operands[1]);
12796 rtx compare_op;
12797 rtx val = const0_rtx;
12798 bool fpcmp = false;
12799 enum machine_mode mode = GET_MODE (operands[0]);
12800
12801 if (operands[3] != const1_rtx
12802 && operands[3] != constm1_rtx)
12803 return 0;
12804 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12805 ix86_compare_op1, &compare_op))
12806 return 0;
12807 code = GET_CODE (compare_op);
12808
12809 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12810 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12811 {
12812 fpcmp = true;
12813 code = ix86_fp_compare_code_to_integer (code);
12814 }
12815
12816 if (code != LTU)
12817 {
12818 val = constm1_rtx;
12819 if (fpcmp)
12820 PUT_CODE (compare_op,
12821 reverse_condition_maybe_unordered
12822 (GET_CODE (compare_op)));
12823 else
12824 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12825 }
12826 PUT_MODE (compare_op, mode);
12827
12828 /* Construct either adc or sbb insn. */
12829 if ((code == LTU) == (operands[3] == constm1_rtx))
12830 {
12831 switch (GET_MODE (operands[0]))
12832 {
12833 case QImode:
12834 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12835 break;
12836 case HImode:
12837 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12838 break;
12839 case SImode:
12840 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12841 break;
12842 case DImode:
12843 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12844 break;
12845 default:
12846 gcc_unreachable ();
12847 }
12848 }
12849 else
12850 {
12851 switch (GET_MODE (operands[0]))
12852 {
12853 case QImode:
12854 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12855 break;
12856 case HImode:
12857 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12858 break;
12859 case SImode:
12860 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12861 break;
12862 case DImode:
12863 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12864 break;
12865 default:
12866 gcc_unreachable ();
12867 }
12868 }
12869 return 1; /* DONE */
12870 }
12871
12872
12873 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12874 works for floating pointer parameters and nonoffsetable memories.
12875 For pushes, it returns just stack offsets; the values will be saved
12876 in the right order. Maximally three parts are generated. */
12877
12878 static int
12879 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12880 {
12881 int size;
12882
12883 if (!TARGET_64BIT)
12884 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12885 else
12886 size = (GET_MODE_SIZE (mode) + 4) / 8;
12887
12888 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12889 gcc_assert (size >= 2 && size <= 3);
12890
12891 /* Optimize constant pool reference to immediates. This is used by fp
12892 moves, that force all constants to memory to allow combining. */
12893 if (MEM_P (operand) && MEM_READONLY_P (operand))
12894 {
12895 rtx tmp = maybe_get_pool_constant (operand);
12896 if (tmp)
12897 operand = tmp;
12898 }
12899
12900 if (MEM_P (operand) && !offsettable_memref_p (operand))
12901 {
12902 /* The only non-offsetable memories we handle are pushes. */
12903 int ok = push_operand (operand, VOIDmode);
12904
12905 gcc_assert (ok);
12906
12907 operand = copy_rtx (operand);
12908 PUT_MODE (operand, Pmode);
12909 parts[0] = parts[1] = parts[2] = operand;
12910 return size;
12911 }
12912
12913 if (GET_CODE (operand) == CONST_VECTOR)
12914 {
12915 enum machine_mode imode = int_mode_for_mode (mode);
12916 /* Caution: if we looked through a constant pool memory above,
12917 the operand may actually have a different mode now. That's
12918 ok, since we want to pun this all the way back to an integer. */
12919 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12920 gcc_assert (operand != NULL);
12921 mode = imode;
12922 }
12923
12924 if (!TARGET_64BIT)
12925 {
12926 if (mode == DImode)
12927 split_di (&operand, 1, &parts[0], &parts[1]);
12928 else
12929 {
12930 if (REG_P (operand))
12931 {
12932 gcc_assert (reload_completed);
12933 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12934 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12935 if (size == 3)
12936 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12937 }
12938 else if (offsettable_memref_p (operand))
12939 {
12940 operand = adjust_address (operand, SImode, 0);
12941 parts[0] = operand;
12942 parts[1] = adjust_address (operand, SImode, 4);
12943 if (size == 3)
12944 parts[2] = adjust_address (operand, SImode, 8);
12945 }
12946 else if (GET_CODE (operand) == CONST_DOUBLE)
12947 {
12948 REAL_VALUE_TYPE r;
12949 long l[4];
12950
12951 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12952 switch (mode)
12953 {
12954 case XFmode:
12955 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12956 parts[2] = gen_int_mode (l[2], SImode);
12957 break;
12958 case DFmode:
12959 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12960 break;
12961 default:
12962 gcc_unreachable ();
12963 }
12964 parts[1] = gen_int_mode (l[1], SImode);
12965 parts[0] = gen_int_mode (l[0], SImode);
12966 }
12967 else
12968 gcc_unreachable ();
12969 }
12970 }
12971 else
12972 {
12973 if (mode == TImode)
12974 split_ti (&operand, 1, &parts[0], &parts[1]);
12975 if (mode == XFmode || mode == TFmode)
12976 {
12977 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12978 if (REG_P (operand))
12979 {
12980 gcc_assert (reload_completed);
12981 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12982 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12983 }
12984 else if (offsettable_memref_p (operand))
12985 {
12986 operand = adjust_address (operand, DImode, 0);
12987 parts[0] = operand;
12988 parts[1] = adjust_address (operand, upper_mode, 8);
12989 }
12990 else if (GET_CODE (operand) == CONST_DOUBLE)
12991 {
12992 REAL_VALUE_TYPE r;
12993 long l[4];
12994
12995 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12996 real_to_target (l, &r, mode);
12997
12998 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12999 if (HOST_BITS_PER_WIDE_INT >= 64)
13000 parts[0]
13001 = gen_int_mode
13002 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13003 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13004 DImode);
13005 else
13006 parts[0] = immed_double_const (l[0], l[1], DImode);
13007
13008 if (upper_mode == SImode)
13009 parts[1] = gen_int_mode (l[2], SImode);
13010 else if (HOST_BITS_PER_WIDE_INT >= 64)
13011 parts[1]
13012 = gen_int_mode
13013 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13014 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13015 DImode);
13016 else
13017 parts[1] = immed_double_const (l[2], l[3], DImode);
13018 }
13019 else
13020 gcc_unreachable ();
13021 }
13022 }
13023
13024 return size;
13025 }
13026
13027 /* Emit insns to perform a move or push of DI, DF, and XF values.
13028 Return false when normal moves are needed; true when all required
13029 insns have been emitted. Operands 2-4 contain the input values
13030 int the correct order; operands 5-7 contain the output values. */
13031
13032 void
13033 ix86_split_long_move (rtx operands[])
13034 {
13035 rtx part[2][3];
13036 int nparts;
13037 int push = 0;
13038 int collisions = 0;
13039 enum machine_mode mode = GET_MODE (operands[0]);
13040
13041 /* The DFmode expanders may ask us to move double.
13042 For 64bit target this is single move. By hiding the fact
13043 here we simplify i386.md splitters. */
13044 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13045 {
13046 /* Optimize constant pool reference to immediates. This is used by
13047 fp moves, that force all constants to memory to allow combining. */
13048
13049 if (MEM_P (operands[1])
13050 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13051 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13052 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13053 if (push_operand (operands[0], VOIDmode))
13054 {
13055 operands[0] = copy_rtx (operands[0]);
13056 PUT_MODE (operands[0], Pmode);
13057 }
13058 else
13059 operands[0] = gen_lowpart (DImode, operands[0]);
13060 operands[1] = gen_lowpart (DImode, operands[1]);
13061 emit_move_insn (operands[0], operands[1]);
13062 return;
13063 }
13064
13065 /* The only non-offsettable memory we handle is push. */
13066 if (push_operand (operands[0], VOIDmode))
13067 push = 1;
13068 else
13069 gcc_assert (!MEM_P (operands[0])
13070 || offsettable_memref_p (operands[0]));
13071
13072 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13073 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13074
13075 /* When emitting push, take care for source operands on the stack. */
13076 if (push && MEM_P (operands[1])
13077 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13078 {
13079 if (nparts == 3)
13080 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13081 XEXP (part[1][2], 0));
13082 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13083 XEXP (part[1][1], 0));
13084 }
13085
13086 /* We need to do copy in the right order in case an address register
13087 of the source overlaps the destination. */
13088 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13089 {
13090 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13091 collisions++;
13092 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13093 collisions++;
13094 if (nparts == 3
13095 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13096 collisions++;
13097
13098 /* Collision in the middle part can be handled by reordering. */
13099 if (collisions == 1 && nparts == 3
13100 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13101 {
13102 rtx tmp;
13103 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13104 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13105 }
13106
13107 /* If there are more collisions, we can't handle it by reordering.
13108 Do an lea to the last part and use only one colliding move. */
13109 else if (collisions > 1)
13110 {
13111 rtx base;
13112
13113 collisions = 1;
13114
13115 base = part[0][nparts - 1];
13116
13117 /* Handle the case when the last part isn't valid for lea.
13118 Happens in 64-bit mode storing the 12-byte XFmode. */
13119 if (GET_MODE (base) != Pmode)
13120 base = gen_rtx_REG (Pmode, REGNO (base));
13121
13122 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13123 part[1][0] = replace_equiv_address (part[1][0], base);
13124 part[1][1] = replace_equiv_address (part[1][1],
13125 plus_constant (base, UNITS_PER_WORD));
13126 if (nparts == 3)
13127 part[1][2] = replace_equiv_address (part[1][2],
13128 plus_constant (base, 8));
13129 }
13130 }
13131
13132 if (push)
13133 {
13134 if (!TARGET_64BIT)
13135 {
13136 if (nparts == 3)
13137 {
13138 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13139 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13140 emit_move_insn (part[0][2], part[1][2]);
13141 }
13142 }
13143 else
13144 {
13145 /* In 64bit mode we don't have 32bit push available. In case this is
13146 register, it is OK - we will just use larger counterpart. We also
13147 retype memory - these comes from attempt to avoid REX prefix on
13148 moving of second half of TFmode value. */
13149 if (GET_MODE (part[1][1]) == SImode)
13150 {
13151 switch (GET_CODE (part[1][1]))
13152 {
13153 case MEM:
13154 part[1][1] = adjust_address (part[1][1], DImode, 0);
13155 break;
13156
13157 case REG:
13158 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13159 break;
13160
13161 default:
13162 gcc_unreachable ();
13163 }
13164
13165 if (GET_MODE (part[1][0]) == SImode)
13166 part[1][0] = part[1][1];
13167 }
13168 }
13169 emit_move_insn (part[0][1], part[1][1]);
13170 emit_move_insn (part[0][0], part[1][0]);
13171 return;
13172 }
13173
13174 /* Choose correct order to not overwrite the source before it is copied. */
13175 if ((REG_P (part[0][0])
13176 && REG_P (part[1][1])
13177 && (REGNO (part[0][0]) == REGNO (part[1][1])
13178 || (nparts == 3
13179 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13180 || (collisions > 0
13181 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13182 {
13183 if (nparts == 3)
13184 {
13185 operands[2] = part[0][2];
13186 operands[3] = part[0][1];
13187 operands[4] = part[0][0];
13188 operands[5] = part[1][2];
13189 operands[6] = part[1][1];
13190 operands[7] = part[1][0];
13191 }
13192 else
13193 {
13194 operands[2] = part[0][1];
13195 operands[3] = part[0][0];
13196 operands[5] = part[1][1];
13197 operands[6] = part[1][0];
13198 }
13199 }
13200 else
13201 {
13202 if (nparts == 3)
13203 {
13204 operands[2] = part[0][0];
13205 operands[3] = part[0][1];
13206 operands[4] = part[0][2];
13207 operands[5] = part[1][0];
13208 operands[6] = part[1][1];
13209 operands[7] = part[1][2];
13210 }
13211 else
13212 {
13213 operands[2] = part[0][0];
13214 operands[3] = part[0][1];
13215 operands[5] = part[1][0];
13216 operands[6] = part[1][1];
13217 }
13218 }
13219
13220 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13221 if (optimize_size)
13222 {
13223 if (CONST_INT_P (operands[5])
13224 && operands[5] != const0_rtx
13225 && REG_P (operands[2]))
13226 {
13227 if (CONST_INT_P (operands[6])
13228 && INTVAL (operands[6]) == INTVAL (operands[5]))
13229 operands[6] = operands[2];
13230
13231 if (nparts == 3
13232 && CONST_INT_P (operands[7])
13233 && INTVAL (operands[7]) == INTVAL (operands[5]))
13234 operands[7] = operands[2];
13235 }
13236
13237 if (nparts == 3
13238 && CONST_INT_P (operands[6])
13239 && operands[6] != const0_rtx
13240 && REG_P (operands[3])
13241 && CONST_INT_P (operands[7])
13242 && INTVAL (operands[7]) == INTVAL (operands[6]))
13243 operands[7] = operands[3];
13244 }
13245
13246 emit_move_insn (operands[2], operands[5]);
13247 emit_move_insn (operands[3], operands[6]);
13248 if (nparts == 3)
13249 emit_move_insn (operands[4], operands[7]);
13250
13251 return;
13252 }
13253
13254 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13255 left shift by a constant, either using a single shift or
13256 a sequence of add instructions. */
13257
13258 static void
13259 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13260 {
13261 if (count == 1)
13262 {
13263 emit_insn ((mode == DImode
13264 ? gen_addsi3
13265 : gen_adddi3) (operand, operand, operand));
13266 }
13267 else if (!optimize_size
13268 && count * ix86_cost->add <= ix86_cost->shift_const)
13269 {
13270 int i;
13271 for (i=0; i<count; i++)
13272 {
13273 emit_insn ((mode == DImode
13274 ? gen_addsi3
13275 : gen_adddi3) (operand, operand, operand));
13276 }
13277 }
13278 else
13279 emit_insn ((mode == DImode
13280 ? gen_ashlsi3
13281 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13282 }
13283
13284 void
13285 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13286 {
13287 rtx low[2], high[2];
13288 int count;
13289 const int single_width = mode == DImode ? 32 : 64;
13290
13291 if (CONST_INT_P (operands[2]))
13292 {
13293 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13294 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13295
13296 if (count >= single_width)
13297 {
13298 emit_move_insn (high[0], low[1]);
13299 emit_move_insn (low[0], const0_rtx);
13300
13301 if (count > single_width)
13302 ix86_expand_ashl_const (high[0], count - single_width, mode);
13303 }
13304 else
13305 {
13306 if (!rtx_equal_p (operands[0], operands[1]))
13307 emit_move_insn (operands[0], operands[1]);
13308 emit_insn ((mode == DImode
13309 ? gen_x86_shld_1
13310 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13311 ix86_expand_ashl_const (low[0], count, mode);
13312 }
13313 return;
13314 }
13315
13316 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13317
13318 if (operands[1] == const1_rtx)
13319 {
13320 /* Assuming we've chosen a QImode capable registers, then 1 << N
13321 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13322 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13323 {
13324 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13325
13326 ix86_expand_clear (low[0]);
13327 ix86_expand_clear (high[0]);
13328 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13329
13330 d = gen_lowpart (QImode, low[0]);
13331 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13332 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13333 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13334
13335 d = gen_lowpart (QImode, high[0]);
13336 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13337 s = gen_rtx_NE (QImode, flags, const0_rtx);
13338 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13339 }
13340
13341 /* Otherwise, we can get the same results by manually performing
13342 a bit extract operation on bit 5/6, and then performing the two
13343 shifts. The two methods of getting 0/1 into low/high are exactly
13344 the same size. Avoiding the shift in the bit extract case helps
13345 pentium4 a bit; no one else seems to care much either way. */
13346 else
13347 {
13348 rtx x;
13349
13350 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13351 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13352 else
13353 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13354 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13355
13356 emit_insn ((mode == DImode
13357 ? gen_lshrsi3
13358 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13359 emit_insn ((mode == DImode
13360 ? gen_andsi3
13361 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13362 emit_move_insn (low[0], high[0]);
13363 emit_insn ((mode == DImode
13364 ? gen_xorsi3
13365 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13366 }
13367
13368 emit_insn ((mode == DImode
13369 ? gen_ashlsi3
13370 : gen_ashldi3) (low[0], low[0], operands[2]));
13371 emit_insn ((mode == DImode
13372 ? gen_ashlsi3
13373 : gen_ashldi3) (high[0], high[0], operands[2]));
13374 return;
13375 }
13376
13377 if (operands[1] == constm1_rtx)
13378 {
13379 /* For -1 << N, we can avoid the shld instruction, because we
13380 know that we're shifting 0...31/63 ones into a -1. */
13381 emit_move_insn (low[0], constm1_rtx);
13382 if (optimize_size)
13383 emit_move_insn (high[0], low[0]);
13384 else
13385 emit_move_insn (high[0], constm1_rtx);
13386 }
13387 else
13388 {
13389 if (!rtx_equal_p (operands[0], operands[1]))
13390 emit_move_insn (operands[0], operands[1]);
13391
13392 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13393 emit_insn ((mode == DImode
13394 ? gen_x86_shld_1
13395 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13396 }
13397
13398 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13399
13400 if (TARGET_CMOVE && scratch)
13401 {
13402 ix86_expand_clear (scratch);
13403 emit_insn ((mode == DImode
13404 ? gen_x86_shift_adj_1
13405 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13406 }
13407 else
13408 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13409 }
13410
13411 void
13412 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13413 {
13414 rtx low[2], high[2];
13415 int count;
13416 const int single_width = mode == DImode ? 32 : 64;
13417
13418 if (CONST_INT_P (operands[2]))
13419 {
13420 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13421 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13422
13423 if (count == single_width * 2 - 1)
13424 {
13425 emit_move_insn (high[0], high[1]);
13426 emit_insn ((mode == DImode
13427 ? gen_ashrsi3
13428 : gen_ashrdi3) (high[0], high[0],
13429 GEN_INT (single_width - 1)));
13430 emit_move_insn (low[0], high[0]);
13431
13432 }
13433 else if (count >= single_width)
13434 {
13435 emit_move_insn (low[0], high[1]);
13436 emit_move_insn (high[0], low[0]);
13437 emit_insn ((mode == DImode
13438 ? gen_ashrsi3
13439 : gen_ashrdi3) (high[0], high[0],
13440 GEN_INT (single_width - 1)));
13441 if (count > single_width)
13442 emit_insn ((mode == DImode
13443 ? gen_ashrsi3
13444 : gen_ashrdi3) (low[0], low[0],
13445 GEN_INT (count - single_width)));
13446 }
13447 else
13448 {
13449 if (!rtx_equal_p (operands[0], operands[1]))
13450 emit_move_insn (operands[0], operands[1]);
13451 emit_insn ((mode == DImode
13452 ? gen_x86_shrd_1
13453 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13454 emit_insn ((mode == DImode
13455 ? gen_ashrsi3
13456 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13457 }
13458 }
13459 else
13460 {
13461 if (!rtx_equal_p (operands[0], operands[1]))
13462 emit_move_insn (operands[0], operands[1]);
13463
13464 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13465
13466 emit_insn ((mode == DImode
13467 ? gen_x86_shrd_1
13468 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13469 emit_insn ((mode == DImode
13470 ? gen_ashrsi3
13471 : gen_ashrdi3) (high[0], high[0], operands[2]));
13472
13473 if (TARGET_CMOVE && scratch)
13474 {
13475 emit_move_insn (scratch, high[0]);
13476 emit_insn ((mode == DImode
13477 ? gen_ashrsi3
13478 : gen_ashrdi3) (scratch, scratch,
13479 GEN_INT (single_width - 1)));
13480 emit_insn ((mode == DImode
13481 ? gen_x86_shift_adj_1
13482 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13483 scratch));
13484 }
13485 else
13486 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13487 }
13488 }
13489
13490 void
13491 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13492 {
13493 rtx low[2], high[2];
13494 int count;
13495 const int single_width = mode == DImode ? 32 : 64;
13496
13497 if (CONST_INT_P (operands[2]))
13498 {
13499 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13500 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13501
13502 if (count >= single_width)
13503 {
13504 emit_move_insn (low[0], high[1]);
13505 ix86_expand_clear (high[0]);
13506
13507 if (count > single_width)
13508 emit_insn ((mode == DImode
13509 ? gen_lshrsi3
13510 : gen_lshrdi3) (low[0], low[0],
13511 GEN_INT (count - single_width)));
13512 }
13513 else
13514 {
13515 if (!rtx_equal_p (operands[0], operands[1]))
13516 emit_move_insn (operands[0], operands[1]);
13517 emit_insn ((mode == DImode
13518 ? gen_x86_shrd_1
13519 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13520 emit_insn ((mode == DImode
13521 ? gen_lshrsi3
13522 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13523 }
13524 }
13525 else
13526 {
13527 if (!rtx_equal_p (operands[0], operands[1]))
13528 emit_move_insn (operands[0], operands[1]);
13529
13530 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13531
13532 emit_insn ((mode == DImode
13533 ? gen_x86_shrd_1
13534 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13535 emit_insn ((mode == DImode
13536 ? gen_lshrsi3
13537 : gen_lshrdi3) (high[0], high[0], operands[2]));
13538
13539 /* Heh. By reversing the arguments, we can reuse this pattern. */
13540 if (TARGET_CMOVE && scratch)
13541 {
13542 ix86_expand_clear (scratch);
13543 emit_insn ((mode == DImode
13544 ? gen_x86_shift_adj_1
13545 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13546 scratch));
13547 }
13548 else
13549 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13550 }
13551 }
13552
13553 /* Predict just emitted jump instruction to be taken with probability PROB. */
13554 static void
13555 predict_jump (int prob)
13556 {
13557 rtx insn = get_last_insn ();
13558 gcc_assert (JUMP_P (insn));
13559 REG_NOTES (insn)
13560 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13561 GEN_INT (prob),
13562 REG_NOTES (insn));
13563 }
13564
13565 /* Helper function for the string operations below. Dest VARIABLE whether
13566 it is aligned to VALUE bytes. If true, jump to the label. */
13567 static rtx
13568 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13569 {
13570 rtx label = gen_label_rtx ();
13571 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13572 if (GET_MODE (variable) == DImode)
13573 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13574 else
13575 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13576 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13577 1, label);
13578 if (epilogue)
13579 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13580 else
13581 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13582 return label;
13583 }
13584
13585 /* Adjust COUNTER by the VALUE. */
13586 static void
13587 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13588 {
13589 if (GET_MODE (countreg) == DImode)
13590 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13591 else
13592 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13593 }
13594
13595 /* Zero extend possibly SImode EXP to Pmode register. */
13596 rtx
13597 ix86_zero_extend_to_Pmode (rtx exp)
13598 {
13599 rtx r;
13600 if (GET_MODE (exp) == VOIDmode)
13601 return force_reg (Pmode, exp);
13602 if (GET_MODE (exp) == Pmode)
13603 return copy_to_mode_reg (Pmode, exp);
13604 r = gen_reg_rtx (Pmode);
13605 emit_insn (gen_zero_extendsidi2 (r, exp));
13606 return r;
13607 }
13608
13609 /* Divide COUNTREG by SCALE. */
13610 static rtx
13611 scale_counter (rtx countreg, int scale)
13612 {
13613 rtx sc;
13614 rtx piece_size_mask;
13615
13616 if (scale == 1)
13617 return countreg;
13618 if (CONST_INT_P (countreg))
13619 return GEN_INT (INTVAL (countreg) / scale);
13620 gcc_assert (REG_P (countreg));
13621
13622 piece_size_mask = GEN_INT (scale - 1);
13623 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13624 GEN_INT (exact_log2 (scale)),
13625 NULL, 1, OPTAB_DIRECT);
13626 return sc;
13627 }
13628
13629 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13630 DImode for constant loop counts. */
13631
13632 static enum machine_mode
13633 counter_mode (rtx count_exp)
13634 {
13635 if (GET_MODE (count_exp) != VOIDmode)
13636 return GET_MODE (count_exp);
13637 if (GET_CODE (count_exp) != CONST_INT)
13638 return Pmode;
13639 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13640 return DImode;
13641 return SImode;
13642 }
13643
13644 /* When SRCPTR is non-NULL, output simple loop to move memory
13645 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13646 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13647 equivalent loop to set memory by VALUE (supposed to be in MODE).
13648
13649 The size is rounded down to whole number of chunk size moved at once.
13650 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13651
13652
13653 static void
13654 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13655 rtx destptr, rtx srcptr, rtx value,
13656 rtx count, enum machine_mode mode, int unroll,
13657 int expected_size)
13658 {
13659 rtx out_label, top_label, iter, tmp;
13660 enum machine_mode iter_mode = counter_mode (count);
13661 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13662 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13663 rtx size;
13664 rtx x_addr;
13665 rtx y_addr;
13666 int i;
13667
13668 top_label = gen_label_rtx ();
13669 out_label = gen_label_rtx ();
13670 iter = gen_reg_rtx (iter_mode);
13671
13672 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13673 NULL, 1, OPTAB_DIRECT);
13674 /* Those two should combine. */
13675 if (piece_size == const1_rtx)
13676 {
13677 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13678 true, out_label);
13679 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13680 }
13681 emit_move_insn (iter, const0_rtx);
13682
13683 emit_label (top_label);
13684
13685 tmp = convert_modes (Pmode, iter_mode, iter, true);
13686 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13687 destmem = change_address (destmem, mode, x_addr);
13688
13689 if (srcmem)
13690 {
13691 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13692 srcmem = change_address (srcmem, mode, y_addr);
13693
13694 /* When unrolling for chips that reorder memory reads and writes,
13695 we can save registers by using single temporary.
13696 Also using 4 temporaries is overkill in 32bit mode. */
13697 if (!TARGET_64BIT && 0)
13698 {
13699 for (i = 0; i < unroll; i++)
13700 {
13701 if (i)
13702 {
13703 destmem =
13704 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13705 srcmem =
13706 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13707 }
13708 emit_move_insn (destmem, srcmem);
13709 }
13710 }
13711 else
13712 {
13713 rtx tmpreg[4];
13714 gcc_assert (unroll <= 4);
13715 for (i = 0; i < unroll; i++)
13716 {
13717 tmpreg[i] = gen_reg_rtx (mode);
13718 if (i)
13719 {
13720 srcmem =
13721 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13722 }
13723 emit_move_insn (tmpreg[i], srcmem);
13724 }
13725 for (i = 0; i < unroll; i++)
13726 {
13727 if (i)
13728 {
13729 destmem =
13730 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13731 }
13732 emit_move_insn (destmem, tmpreg[i]);
13733 }
13734 }
13735 }
13736 else
13737 for (i = 0; i < unroll; i++)
13738 {
13739 if (i)
13740 destmem =
13741 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13742 emit_move_insn (destmem, value);
13743 }
13744
13745 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13746 true, OPTAB_LIB_WIDEN);
13747 if (tmp != iter)
13748 emit_move_insn (iter, tmp);
13749
13750 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13751 true, top_label);
13752 if (expected_size != -1)
13753 {
13754 expected_size /= GET_MODE_SIZE (mode) * unroll;
13755 if (expected_size == 0)
13756 predict_jump (0);
13757 else if (expected_size > REG_BR_PROB_BASE)
13758 predict_jump (REG_BR_PROB_BASE - 1);
13759 else
13760 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13761 }
13762 else
13763 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13764 iter = ix86_zero_extend_to_Pmode (iter);
13765 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13766 true, OPTAB_LIB_WIDEN);
13767 if (tmp != destptr)
13768 emit_move_insn (destptr, tmp);
13769 if (srcptr)
13770 {
13771 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13772 true, OPTAB_LIB_WIDEN);
13773 if (tmp != srcptr)
13774 emit_move_insn (srcptr, tmp);
13775 }
13776 emit_label (out_label);
13777 }
13778
13779 /* Output "rep; mov" instruction.
13780 Arguments have same meaning as for previous function */
13781 static void
13782 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13783 rtx destptr, rtx srcptr,
13784 rtx count,
13785 enum machine_mode mode)
13786 {
13787 rtx destexp;
13788 rtx srcexp;
13789 rtx countreg;
13790
13791 /* If the size is known, it is shorter to use rep movs. */
13792 if (mode == QImode && CONST_INT_P (count)
13793 && !(INTVAL (count) & 3))
13794 mode = SImode;
13795
13796 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13797 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13798 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13799 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13800 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13801 if (mode != QImode)
13802 {
13803 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13804 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13805 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13806 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13807 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13808 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13809 }
13810 else
13811 {
13812 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13813 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13814 }
13815 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13816 destexp, srcexp));
13817 }
13818
13819 /* Output "rep; stos" instruction.
13820 Arguments have same meaning as for previous function */
13821 static void
13822 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13823 rtx count,
13824 enum machine_mode mode)
13825 {
13826 rtx destexp;
13827 rtx countreg;
13828
13829 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13830 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13831 value = force_reg (mode, gen_lowpart (mode, value));
13832 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13833 if (mode != QImode)
13834 {
13835 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13836 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13837 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13838 }
13839 else
13840 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13841 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13842 }
13843
13844 static void
13845 emit_strmov (rtx destmem, rtx srcmem,
13846 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13847 {
13848 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13849 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13850 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13851 }
13852
13853 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13854 static void
13855 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13856 rtx destptr, rtx srcptr, rtx count, int max_size)
13857 {
13858 rtx src, dest;
13859 if (CONST_INT_P (count))
13860 {
13861 HOST_WIDE_INT countval = INTVAL (count);
13862 int offset = 0;
13863
13864 if ((countval & 0x10) && max_size > 16)
13865 {
13866 if (TARGET_64BIT)
13867 {
13868 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13869 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13870 }
13871 else
13872 gcc_unreachable ();
13873 offset += 16;
13874 }
13875 if ((countval & 0x08) && max_size > 8)
13876 {
13877 if (TARGET_64BIT)
13878 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13879 else
13880 {
13881 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13882 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13883 }
13884 offset += 8;
13885 }
13886 if ((countval & 0x04) && max_size > 4)
13887 {
13888 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13889 offset += 4;
13890 }
13891 if ((countval & 0x02) && max_size > 2)
13892 {
13893 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13894 offset += 2;
13895 }
13896 if ((countval & 0x01) && max_size > 1)
13897 {
13898 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13899 offset += 1;
13900 }
13901 return;
13902 }
13903 if (max_size > 8)
13904 {
13905 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13906 count, 1, OPTAB_DIRECT);
13907 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13908 count, QImode, 1, 4);
13909 return;
13910 }
13911
13912 /* When there are stringops, we can cheaply increase dest and src pointers.
13913 Otherwise we save code size by maintaining offset (zero is readily
13914 available from preceding rep operation) and using x86 addressing modes.
13915 */
13916 if (TARGET_SINGLE_STRINGOP)
13917 {
13918 if (max_size > 4)
13919 {
13920 rtx label = ix86_expand_aligntest (count, 4, true);
13921 src = change_address (srcmem, SImode, srcptr);
13922 dest = change_address (destmem, SImode, destptr);
13923 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13924 emit_label (label);
13925 LABEL_NUSES (label) = 1;
13926 }
13927 if (max_size > 2)
13928 {
13929 rtx label = ix86_expand_aligntest (count, 2, true);
13930 src = change_address (srcmem, HImode, srcptr);
13931 dest = change_address (destmem, HImode, destptr);
13932 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13933 emit_label (label);
13934 LABEL_NUSES (label) = 1;
13935 }
13936 if (max_size > 1)
13937 {
13938 rtx label = ix86_expand_aligntest (count, 1, true);
13939 src = change_address (srcmem, QImode, srcptr);
13940 dest = change_address (destmem, QImode, destptr);
13941 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13942 emit_label (label);
13943 LABEL_NUSES (label) = 1;
13944 }
13945 }
13946 else
13947 {
13948 rtx offset = force_reg (Pmode, const0_rtx);
13949 rtx tmp;
13950
13951 if (max_size > 4)
13952 {
13953 rtx label = ix86_expand_aligntest (count, 4, true);
13954 src = change_address (srcmem, SImode, srcptr);
13955 dest = change_address (destmem, SImode, destptr);
13956 emit_move_insn (dest, src);
13957 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13958 true, OPTAB_LIB_WIDEN);
13959 if (tmp != offset)
13960 emit_move_insn (offset, tmp);
13961 emit_label (label);
13962 LABEL_NUSES (label) = 1;
13963 }
13964 if (max_size > 2)
13965 {
13966 rtx label = ix86_expand_aligntest (count, 2, true);
13967 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13968 src = change_address (srcmem, HImode, tmp);
13969 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13970 dest = change_address (destmem, HImode, tmp);
13971 emit_move_insn (dest, src);
13972 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13973 true, OPTAB_LIB_WIDEN);
13974 if (tmp != offset)
13975 emit_move_insn (offset, tmp);
13976 emit_label (label);
13977 LABEL_NUSES (label) = 1;
13978 }
13979 if (max_size > 1)
13980 {
13981 rtx label = ix86_expand_aligntest (count, 1, true);
13982 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13983 src = change_address (srcmem, QImode, tmp);
13984 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13985 dest = change_address (destmem, QImode, tmp);
13986 emit_move_insn (dest, src);
13987 emit_label (label);
13988 LABEL_NUSES (label) = 1;
13989 }
13990 }
13991 }
13992
13993 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13994 static void
13995 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13996 rtx count, int max_size)
13997 {
13998 count =
13999 expand_simple_binop (counter_mode (count), AND, count,
14000 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14001 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14002 gen_lowpart (QImode, value), count, QImode,
14003 1, max_size / 2);
14004 }
14005
14006 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14007 static void
14008 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14009 {
14010 rtx dest;
14011
14012 if (CONST_INT_P (count))
14013 {
14014 HOST_WIDE_INT countval = INTVAL (count);
14015 int offset = 0;
14016
14017 if ((countval & 0x10) && max_size > 16)
14018 {
14019 if (TARGET_64BIT)
14020 {
14021 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14022 emit_insn (gen_strset (destptr, dest, value));
14023 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14024 emit_insn (gen_strset (destptr, dest, value));
14025 }
14026 else
14027 gcc_unreachable ();
14028 offset += 16;
14029 }
14030 if ((countval & 0x08) && max_size > 8)
14031 {
14032 if (TARGET_64BIT)
14033 {
14034 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14035 emit_insn (gen_strset (destptr, dest, value));
14036 }
14037 else
14038 {
14039 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14040 emit_insn (gen_strset (destptr, dest, value));
14041 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14042 emit_insn (gen_strset (destptr, dest, value));
14043 }
14044 offset += 8;
14045 }
14046 if ((countval & 0x04) && max_size > 4)
14047 {
14048 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14049 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14050 offset += 4;
14051 }
14052 if ((countval & 0x02) && max_size > 2)
14053 {
14054 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14055 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14056 offset += 2;
14057 }
14058 if ((countval & 0x01) && max_size > 1)
14059 {
14060 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14061 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14062 offset += 1;
14063 }
14064 return;
14065 }
14066 if (max_size > 32)
14067 {
14068 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14069 return;
14070 }
14071 if (max_size > 16)
14072 {
14073 rtx label = ix86_expand_aligntest (count, 16, true);
14074 if (TARGET_64BIT)
14075 {
14076 dest = change_address (destmem, DImode, destptr);
14077 emit_insn (gen_strset (destptr, dest, value));
14078 emit_insn (gen_strset (destptr, dest, value));
14079 }
14080 else
14081 {
14082 dest = change_address (destmem, SImode, destptr);
14083 emit_insn (gen_strset (destptr, dest, value));
14084 emit_insn (gen_strset (destptr, dest, value));
14085 emit_insn (gen_strset (destptr, dest, value));
14086 emit_insn (gen_strset (destptr, dest, value));
14087 }
14088 emit_label (label);
14089 LABEL_NUSES (label) = 1;
14090 }
14091 if (max_size > 8)
14092 {
14093 rtx label = ix86_expand_aligntest (count, 8, true);
14094 if (TARGET_64BIT)
14095 {
14096 dest = change_address (destmem, DImode, destptr);
14097 emit_insn (gen_strset (destptr, dest, value));
14098 }
14099 else
14100 {
14101 dest = change_address (destmem, SImode, destptr);
14102 emit_insn (gen_strset (destptr, dest, value));
14103 emit_insn (gen_strset (destptr, dest, value));
14104 }
14105 emit_label (label);
14106 LABEL_NUSES (label) = 1;
14107 }
14108 if (max_size > 4)
14109 {
14110 rtx label = ix86_expand_aligntest (count, 4, true);
14111 dest = change_address (destmem, SImode, destptr);
14112 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14113 emit_label (label);
14114 LABEL_NUSES (label) = 1;
14115 }
14116 if (max_size > 2)
14117 {
14118 rtx label = ix86_expand_aligntest (count, 2, true);
14119 dest = change_address (destmem, HImode, destptr);
14120 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14121 emit_label (label);
14122 LABEL_NUSES (label) = 1;
14123 }
14124 if (max_size > 1)
14125 {
14126 rtx label = ix86_expand_aligntest (count, 1, true);
14127 dest = change_address (destmem, QImode, destptr);
14128 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14129 emit_label (label);
14130 LABEL_NUSES (label) = 1;
14131 }
14132 }
14133
14134 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14135 DESIRED_ALIGNMENT. */
14136 static void
14137 expand_movmem_prologue (rtx destmem, rtx srcmem,
14138 rtx destptr, rtx srcptr, rtx count,
14139 int align, int desired_alignment)
14140 {
14141 if (align <= 1 && desired_alignment > 1)
14142 {
14143 rtx label = ix86_expand_aligntest (destptr, 1, false);
14144 srcmem = change_address (srcmem, QImode, srcptr);
14145 destmem = change_address (destmem, QImode, destptr);
14146 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14147 ix86_adjust_counter (count, 1);
14148 emit_label (label);
14149 LABEL_NUSES (label) = 1;
14150 }
14151 if (align <= 2 && desired_alignment > 2)
14152 {
14153 rtx label = ix86_expand_aligntest (destptr, 2, false);
14154 srcmem = change_address (srcmem, HImode, srcptr);
14155 destmem = change_address (destmem, HImode, destptr);
14156 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14157 ix86_adjust_counter (count, 2);
14158 emit_label (label);
14159 LABEL_NUSES (label) = 1;
14160 }
14161 if (align <= 4 && desired_alignment > 4)
14162 {
14163 rtx label = ix86_expand_aligntest (destptr, 4, false);
14164 srcmem = change_address (srcmem, SImode, srcptr);
14165 destmem = change_address (destmem, SImode, destptr);
14166 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14167 ix86_adjust_counter (count, 4);
14168 emit_label (label);
14169 LABEL_NUSES (label) = 1;
14170 }
14171 gcc_assert (desired_alignment <= 8);
14172 }
14173
14174 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14175 DESIRED_ALIGNMENT. */
14176 static void
14177 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14178 int align, int desired_alignment)
14179 {
14180 if (align <= 1 && desired_alignment > 1)
14181 {
14182 rtx label = ix86_expand_aligntest (destptr, 1, false);
14183 destmem = change_address (destmem, QImode, destptr);
14184 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14185 ix86_adjust_counter (count, 1);
14186 emit_label (label);
14187 LABEL_NUSES (label) = 1;
14188 }
14189 if (align <= 2 && desired_alignment > 2)
14190 {
14191 rtx label = ix86_expand_aligntest (destptr, 2, false);
14192 destmem = change_address (destmem, HImode, destptr);
14193 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14194 ix86_adjust_counter (count, 2);
14195 emit_label (label);
14196 LABEL_NUSES (label) = 1;
14197 }
14198 if (align <= 4 && desired_alignment > 4)
14199 {
14200 rtx label = ix86_expand_aligntest (destptr, 4, false);
14201 destmem = change_address (destmem, SImode, destptr);
14202 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14203 ix86_adjust_counter (count, 4);
14204 emit_label (label);
14205 LABEL_NUSES (label) = 1;
14206 }
14207 gcc_assert (desired_alignment <= 8);
14208 }
14209
14210 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14211 static enum stringop_alg
14212 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14213 int *dynamic_check)
14214 {
14215 const struct stringop_algs * algs;
14216
14217 *dynamic_check = -1;
14218 if (memset)
14219 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14220 else
14221 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14222 if (stringop_alg != no_stringop)
14223 return stringop_alg;
14224 /* rep; movq or rep; movl is the smallest variant. */
14225 else if (optimize_size)
14226 {
14227 if (!count || (count & 3))
14228 return rep_prefix_1_byte;
14229 else
14230 return rep_prefix_4_byte;
14231 }
14232 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14233 */
14234 else if (expected_size != -1 && expected_size < 4)
14235 return loop_1_byte;
14236 else if (expected_size != -1)
14237 {
14238 unsigned int i;
14239 enum stringop_alg alg = libcall;
14240 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14241 {
14242 gcc_assert (algs->size[i].max);
14243 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14244 {
14245 if (algs->size[i].alg != libcall)
14246 alg = algs->size[i].alg;
14247 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14248 last non-libcall inline algorithm. */
14249 if (TARGET_INLINE_ALL_STRINGOPS)
14250 {
14251 /* When the current size is best to be copied by a libcall,
14252 but we are still forced to inline, run the heuristic bellow
14253 that will pick code for medium sized blocks. */
14254 if (alg != libcall)
14255 return alg;
14256 break;
14257 }
14258 else
14259 return algs->size[i].alg;
14260 }
14261 }
14262 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14263 }
14264 /* When asked to inline the call anyway, try to pick meaningful choice.
14265 We look for maximal size of block that is faster to copy by hand and
14266 take blocks of at most of that size guessing that average size will
14267 be roughly half of the block.
14268
14269 If this turns out to be bad, we might simply specify the preferred
14270 choice in ix86_costs. */
14271 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14272 && algs->unknown_size == libcall)
14273 {
14274 int max = -1;
14275 enum stringop_alg alg;
14276 int i;
14277
14278 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14279 if (algs->size[i].alg != libcall && algs->size[i].alg)
14280 max = algs->size[i].max;
14281 if (max == -1)
14282 max = 4096;
14283 alg = decide_alg (count, max / 2, memset, dynamic_check);
14284 gcc_assert (*dynamic_check == -1);
14285 gcc_assert (alg != libcall);
14286 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14287 *dynamic_check = max;
14288 return alg;
14289 }
14290 return algs->unknown_size;
14291 }
14292
14293 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14294 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14295 static int
14296 decide_alignment (int align,
14297 enum stringop_alg alg,
14298 int expected_size)
14299 {
14300 int desired_align = 0;
14301 switch (alg)
14302 {
14303 case no_stringop:
14304 gcc_unreachable ();
14305 case loop:
14306 case unrolled_loop:
14307 desired_align = GET_MODE_SIZE (Pmode);
14308 break;
14309 case rep_prefix_8_byte:
14310 desired_align = 8;
14311 break;
14312 case rep_prefix_4_byte:
14313 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14314 copying whole cacheline at once. */
14315 if (TARGET_PENTIUMPRO)
14316 desired_align = 8;
14317 else
14318 desired_align = 4;
14319 break;
14320 case rep_prefix_1_byte:
14321 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14322 copying whole cacheline at once. */
14323 if (TARGET_PENTIUMPRO)
14324 desired_align = 8;
14325 else
14326 desired_align = 1;
14327 break;
14328 case loop_1_byte:
14329 desired_align = 1;
14330 break;
14331 case libcall:
14332 return 0;
14333 }
14334
14335 if (optimize_size)
14336 desired_align = 1;
14337 if (desired_align < align)
14338 desired_align = align;
14339 if (expected_size != -1 && expected_size < 4)
14340 desired_align = align;
14341 return desired_align;
14342 }
14343
14344 /* Return the smallest power of 2 greater than VAL. */
14345 static int
14346 smallest_pow2_greater_than (int val)
14347 {
14348 int ret = 1;
14349 while (ret <= val)
14350 ret <<= 1;
14351 return ret;
14352 }
14353
14354 /* Expand string move (memcpy) operation. Use i386 string operations when
14355 profitable. expand_clrmem contains similar code. The code depends upon
14356 architecture, block size and alignment, but always has the same
14357 overall structure:
14358
14359 1) Prologue guard: Conditional that jumps up to epilogues for small
14360 blocks that can be handled by epilogue alone. This is faster but
14361 also needed for correctness, since prologue assume the block is larger
14362 than the desired alignment.
14363
14364 Optional dynamic check for size and libcall for large
14365 blocks is emitted here too, with -minline-stringops-dynamically.
14366
14367 2) Prologue: copy first few bytes in order to get destination aligned
14368 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14369 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14370 We emit either a jump tree on power of two sized blocks, or a byte loop.
14371
14372 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14373 with specified algorithm.
14374
14375 4) Epilogue: code copying tail of the block that is too small to be
14376 handled by main body (or up to size guarded by prologue guard). */
14377
14378 int
14379 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14380 rtx expected_align_exp, rtx expected_size_exp)
14381 {
14382 rtx destreg;
14383 rtx srcreg;
14384 rtx label = NULL;
14385 rtx tmp;
14386 rtx jump_around_label = NULL;
14387 HOST_WIDE_INT align = 1;
14388 unsigned HOST_WIDE_INT count = 0;
14389 HOST_WIDE_INT expected_size = -1;
14390 int size_needed = 0, epilogue_size_needed;
14391 int desired_align = 0;
14392 enum stringop_alg alg;
14393 int dynamic_check;
14394
14395 if (CONST_INT_P (align_exp))
14396 align = INTVAL (align_exp);
14397 /* i386 can do misaligned access on reasonably increased cost. */
14398 if (CONST_INT_P (expected_align_exp)
14399 && INTVAL (expected_align_exp) > align)
14400 align = INTVAL (expected_align_exp);
14401 if (CONST_INT_P (count_exp))
14402 count = expected_size = INTVAL (count_exp);
14403 if (CONST_INT_P (expected_size_exp) && count == 0)
14404 expected_size = INTVAL (expected_size_exp);
14405
14406 /* Step 0: Decide on preferred algorithm, desired alignment and
14407 size of chunks to be copied by main loop. */
14408
14409 alg = decide_alg (count, expected_size, false, &dynamic_check);
14410 desired_align = decide_alignment (align, alg, expected_size);
14411
14412 if (!TARGET_ALIGN_STRINGOPS)
14413 align = desired_align;
14414
14415 if (alg == libcall)
14416 return 0;
14417 gcc_assert (alg != no_stringop);
14418 if (!count)
14419 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14420 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14421 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14422 switch (alg)
14423 {
14424 case libcall:
14425 case no_stringop:
14426 gcc_unreachable ();
14427 case loop:
14428 size_needed = GET_MODE_SIZE (Pmode);
14429 break;
14430 case unrolled_loop:
14431 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14432 break;
14433 case rep_prefix_8_byte:
14434 size_needed = 8;
14435 break;
14436 case rep_prefix_4_byte:
14437 size_needed = 4;
14438 break;
14439 case rep_prefix_1_byte:
14440 case loop_1_byte:
14441 size_needed = 1;
14442 break;
14443 }
14444
14445 epilogue_size_needed = size_needed;
14446
14447 /* Step 1: Prologue guard. */
14448
14449 /* Alignment code needs count to be in register. */
14450 if (CONST_INT_P (count_exp) && desired_align > align)
14451 {
14452 enum machine_mode mode = SImode;
14453 if (TARGET_64BIT && (count & ~0xffffffff))
14454 mode = DImode;
14455 count_exp = force_reg (mode, count_exp);
14456 }
14457 gcc_assert (desired_align >= 1 && align >= 1);
14458
14459 /* Ensure that alignment prologue won't copy past end of block. */
14460 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14461 {
14462 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14463 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14464 Make sure it is power of 2. */
14465 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14466
14467 label = gen_label_rtx ();
14468 emit_cmp_and_jump_insns (count_exp,
14469 GEN_INT (epilogue_size_needed),
14470 LTU, 0, counter_mode (count_exp), 1, label);
14471 if (GET_CODE (count_exp) == CONST_INT)
14472 ;
14473 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14474 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14475 else
14476 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14477 }
14478 /* Emit code to decide on runtime whether library call or inline should be
14479 used. */
14480 if (dynamic_check != -1)
14481 {
14482 rtx hot_label = gen_label_rtx ();
14483 jump_around_label = gen_label_rtx ();
14484 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14485 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14486 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14487 emit_block_move_via_libcall (dst, src, count_exp, false);
14488 emit_jump (jump_around_label);
14489 emit_label (hot_label);
14490 }
14491
14492 /* Step 2: Alignment prologue. */
14493
14494 if (desired_align > align)
14495 {
14496 /* Except for the first move in epilogue, we no longer know
14497 constant offset in aliasing info. It don't seems to worth
14498 the pain to maintain it for the first move, so throw away
14499 the info early. */
14500 src = change_address (src, BLKmode, srcreg);
14501 dst = change_address (dst, BLKmode, destreg);
14502 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14503 desired_align);
14504 }
14505 if (label && size_needed == 1)
14506 {
14507 emit_label (label);
14508 LABEL_NUSES (label) = 1;
14509 label = NULL;
14510 }
14511
14512 /* Step 3: Main loop. */
14513
14514 switch (alg)
14515 {
14516 case libcall:
14517 case no_stringop:
14518 gcc_unreachable ();
14519 case loop_1_byte:
14520 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14521 count_exp, QImode, 1, expected_size);
14522 break;
14523 case loop:
14524 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14525 count_exp, Pmode, 1, expected_size);
14526 break;
14527 case unrolled_loop:
14528 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14529 registers for 4 temporaries anyway. */
14530 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14531 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14532 expected_size);
14533 break;
14534 case rep_prefix_8_byte:
14535 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14536 DImode);
14537 break;
14538 case rep_prefix_4_byte:
14539 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14540 SImode);
14541 break;
14542 case rep_prefix_1_byte:
14543 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14544 QImode);
14545 break;
14546 }
14547 /* Adjust properly the offset of src and dest memory for aliasing. */
14548 if (CONST_INT_P (count_exp))
14549 {
14550 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14551 (count / size_needed) * size_needed);
14552 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14553 (count / size_needed) * size_needed);
14554 }
14555 else
14556 {
14557 src = change_address (src, BLKmode, srcreg);
14558 dst = change_address (dst, BLKmode, destreg);
14559 }
14560
14561 /* Step 4: Epilogue to copy the remaining bytes. */
14562
14563 if (label)
14564 {
14565 /* When the main loop is done, COUNT_EXP might hold original count,
14566 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14567 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14568 bytes. Compensate if needed. */
14569
14570 if (size_needed < epilogue_size_needed)
14571 {
14572 tmp =
14573 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14574 GEN_INT (size_needed - 1), count_exp, 1,
14575 OPTAB_DIRECT);
14576 if (tmp != count_exp)
14577 emit_move_insn (count_exp, tmp);
14578 }
14579 emit_label (label);
14580 LABEL_NUSES (label) = 1;
14581 }
14582
14583 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14584 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14585 epilogue_size_needed);
14586 if (jump_around_label)
14587 emit_label (jump_around_label);
14588 return 1;
14589 }
14590
14591 /* Helper function for memcpy. For QImode value 0xXY produce
14592 0xXYXYXYXY of wide specified by MODE. This is essentially
14593 a * 0x10101010, but we can do slightly better than
14594 synth_mult by unwinding the sequence by hand on CPUs with
14595 slow multiply. */
14596 static rtx
14597 promote_duplicated_reg (enum machine_mode mode, rtx val)
14598 {
14599 enum machine_mode valmode = GET_MODE (val);
14600 rtx tmp;
14601 int nops = mode == DImode ? 3 : 2;
14602
14603 gcc_assert (mode == SImode || mode == DImode);
14604 if (val == const0_rtx)
14605 return copy_to_mode_reg (mode, const0_rtx);
14606 if (CONST_INT_P (val))
14607 {
14608 HOST_WIDE_INT v = INTVAL (val) & 255;
14609
14610 v |= v << 8;
14611 v |= v << 16;
14612 if (mode == DImode)
14613 v |= (v << 16) << 16;
14614 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14615 }
14616
14617 if (valmode == VOIDmode)
14618 valmode = QImode;
14619 if (valmode != QImode)
14620 val = gen_lowpart (QImode, val);
14621 if (mode == QImode)
14622 return val;
14623 if (!TARGET_PARTIAL_REG_STALL)
14624 nops--;
14625 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14626 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14627 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14628 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14629 {
14630 rtx reg = convert_modes (mode, QImode, val, true);
14631 tmp = promote_duplicated_reg (mode, const1_rtx);
14632 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14633 OPTAB_DIRECT);
14634 }
14635 else
14636 {
14637 rtx reg = convert_modes (mode, QImode, val, true);
14638
14639 if (!TARGET_PARTIAL_REG_STALL)
14640 if (mode == SImode)
14641 emit_insn (gen_movsi_insv_1 (reg, reg));
14642 else
14643 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14644 else
14645 {
14646 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14647 NULL, 1, OPTAB_DIRECT);
14648 reg =
14649 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14650 }
14651 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14652 NULL, 1, OPTAB_DIRECT);
14653 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14654 if (mode == SImode)
14655 return reg;
14656 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14657 NULL, 1, OPTAB_DIRECT);
14658 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14659 return reg;
14660 }
14661 }
14662
14663 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14664 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14665 alignment from ALIGN to DESIRED_ALIGN. */
14666 static rtx
14667 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14668 {
14669 rtx promoted_val;
14670
14671 if (TARGET_64BIT
14672 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14673 promoted_val = promote_duplicated_reg (DImode, val);
14674 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14675 promoted_val = promote_duplicated_reg (SImode, val);
14676 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14677 promoted_val = promote_duplicated_reg (HImode, val);
14678 else
14679 promoted_val = val;
14680
14681 return promoted_val;
14682 }
14683
14684 /* Expand string clear operation (bzero). Use i386 string operations when
14685 profitable. See expand_movmem comment for explanation of individual
14686 steps performed. */
14687 int
14688 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14689 rtx expected_align_exp, rtx expected_size_exp)
14690 {
14691 rtx destreg;
14692 rtx label = NULL;
14693 rtx tmp;
14694 rtx jump_around_label = NULL;
14695 HOST_WIDE_INT align = 1;
14696 unsigned HOST_WIDE_INT count = 0;
14697 HOST_WIDE_INT expected_size = -1;
14698 int size_needed = 0, epilogue_size_needed;
14699 int desired_align = 0;
14700 enum stringop_alg alg;
14701 rtx promoted_val = NULL;
14702 bool force_loopy_epilogue = false;
14703 int dynamic_check;
14704
14705 if (CONST_INT_P (align_exp))
14706 align = INTVAL (align_exp);
14707 /* i386 can do misaligned access on reasonably increased cost. */
14708 if (CONST_INT_P (expected_align_exp)
14709 && INTVAL (expected_align_exp) > align)
14710 align = INTVAL (expected_align_exp);
14711 if (CONST_INT_P (count_exp))
14712 count = expected_size = INTVAL (count_exp);
14713 if (CONST_INT_P (expected_size_exp) && count == 0)
14714 expected_size = INTVAL (expected_size_exp);
14715
14716 /* Step 0: Decide on preferred algorithm, desired alignment and
14717 size of chunks to be copied by main loop. */
14718
14719 alg = decide_alg (count, expected_size, true, &dynamic_check);
14720 desired_align = decide_alignment (align, alg, expected_size);
14721
14722 if (!TARGET_ALIGN_STRINGOPS)
14723 align = desired_align;
14724
14725 if (alg == libcall)
14726 return 0;
14727 gcc_assert (alg != no_stringop);
14728 if (!count)
14729 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14730 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14731 switch (alg)
14732 {
14733 case libcall:
14734 case no_stringop:
14735 gcc_unreachable ();
14736 case loop:
14737 size_needed = GET_MODE_SIZE (Pmode);
14738 break;
14739 case unrolled_loop:
14740 size_needed = GET_MODE_SIZE (Pmode) * 4;
14741 break;
14742 case rep_prefix_8_byte:
14743 size_needed = 8;
14744 break;
14745 case rep_prefix_4_byte:
14746 size_needed = 4;
14747 break;
14748 case rep_prefix_1_byte:
14749 case loop_1_byte:
14750 size_needed = 1;
14751 break;
14752 }
14753 epilogue_size_needed = size_needed;
14754
14755 /* Step 1: Prologue guard. */
14756
14757 /* Alignment code needs count to be in register. */
14758 if (CONST_INT_P (count_exp) && desired_align > align)
14759 {
14760 enum machine_mode mode = SImode;
14761 if (TARGET_64BIT && (count & ~0xffffffff))
14762 mode = DImode;
14763 count_exp = force_reg (mode, count_exp);
14764 }
14765 /* Do the cheap promotion to allow better CSE across the
14766 main loop and epilogue (ie one load of the big constant in the
14767 front of all code. */
14768 if (CONST_INT_P (val_exp))
14769 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14770 desired_align, align);
14771 /* Ensure that alignment prologue won't copy past end of block. */
14772 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14773 {
14774 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14775 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14776 Make sure it is power of 2. */
14777 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14778
14779 /* To improve performance of small blocks, we jump around the VAL
14780 promoting mode. This mean that if the promoted VAL is not constant,
14781 we might not use it in the epilogue and have to use byte
14782 loop variant. */
14783 if (epilogue_size_needed > 2 && !promoted_val)
14784 force_loopy_epilogue = true;
14785 label = gen_label_rtx ();
14786 emit_cmp_and_jump_insns (count_exp,
14787 GEN_INT (epilogue_size_needed),
14788 LTU, 0, counter_mode (count_exp), 1, label);
14789 if (GET_CODE (count_exp) == CONST_INT)
14790 ;
14791 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14792 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14793 else
14794 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14795 }
14796 if (dynamic_check != -1)
14797 {
14798 rtx hot_label = gen_label_rtx ();
14799 jump_around_label = gen_label_rtx ();
14800 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14801 LEU, 0, counter_mode (count_exp), 1, hot_label);
14802 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14803 set_storage_via_libcall (dst, count_exp, val_exp, false);
14804 emit_jump (jump_around_label);
14805 emit_label (hot_label);
14806 }
14807
14808 /* Step 2: Alignment prologue. */
14809
14810 /* Do the expensive promotion once we branched off the small blocks. */
14811 if (!promoted_val)
14812 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14813 desired_align, align);
14814 gcc_assert (desired_align >= 1 && align >= 1);
14815
14816 if (desired_align > align)
14817 {
14818 /* Except for the first move in epilogue, we no longer know
14819 constant offset in aliasing info. It don't seems to worth
14820 the pain to maintain it for the first move, so throw away
14821 the info early. */
14822 dst = change_address (dst, BLKmode, destreg);
14823 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14824 desired_align);
14825 }
14826 if (label && size_needed == 1)
14827 {
14828 emit_label (label);
14829 LABEL_NUSES (label) = 1;
14830 label = NULL;
14831 }
14832
14833 /* Step 3: Main loop. */
14834
14835 switch (alg)
14836 {
14837 case libcall:
14838 case no_stringop:
14839 gcc_unreachable ();
14840 case loop_1_byte:
14841 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14842 count_exp, QImode, 1, expected_size);
14843 break;
14844 case loop:
14845 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14846 count_exp, Pmode, 1, expected_size);
14847 break;
14848 case unrolled_loop:
14849 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14850 count_exp, Pmode, 4, expected_size);
14851 break;
14852 case rep_prefix_8_byte:
14853 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14854 DImode);
14855 break;
14856 case rep_prefix_4_byte:
14857 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14858 SImode);
14859 break;
14860 case rep_prefix_1_byte:
14861 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14862 QImode);
14863 break;
14864 }
14865 /* Adjust properly the offset of src and dest memory for aliasing. */
14866 if (CONST_INT_P (count_exp))
14867 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14868 (count / size_needed) * size_needed);
14869 else
14870 dst = change_address (dst, BLKmode, destreg);
14871
14872 /* Step 4: Epilogue to copy the remaining bytes. */
14873
14874 if (label)
14875 {
14876 /* When the main loop is done, COUNT_EXP might hold original count,
14877 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14878 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14879 bytes. Compensate if needed. */
14880
14881 if (size_needed < desired_align - align)
14882 {
14883 tmp =
14884 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14885 GEN_INT (size_needed - 1), count_exp, 1,
14886 OPTAB_DIRECT);
14887 size_needed = desired_align - align + 1;
14888 if (tmp != count_exp)
14889 emit_move_insn (count_exp, tmp);
14890 }
14891 emit_label (label);
14892 LABEL_NUSES (label) = 1;
14893 }
14894 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14895 {
14896 if (force_loopy_epilogue)
14897 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14898 size_needed);
14899 else
14900 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14901 size_needed);
14902 }
14903 if (jump_around_label)
14904 emit_label (jump_around_label);
14905 return 1;
14906 }
14907
14908 /* Expand the appropriate insns for doing strlen if not just doing
14909 repnz; scasb
14910
14911 out = result, initialized with the start address
14912 align_rtx = alignment of the address.
14913 scratch = scratch register, initialized with the startaddress when
14914 not aligned, otherwise undefined
14915
14916 This is just the body. It needs the initializations mentioned above and
14917 some address computing at the end. These things are done in i386.md. */
14918
14919 static void
14920 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14921 {
14922 int align;
14923 rtx tmp;
14924 rtx align_2_label = NULL_RTX;
14925 rtx align_3_label = NULL_RTX;
14926 rtx align_4_label = gen_label_rtx ();
14927 rtx end_0_label = gen_label_rtx ();
14928 rtx mem;
14929 rtx tmpreg = gen_reg_rtx (SImode);
14930 rtx scratch = gen_reg_rtx (SImode);
14931 rtx cmp;
14932
14933 align = 0;
14934 if (CONST_INT_P (align_rtx))
14935 align = INTVAL (align_rtx);
14936
14937 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14938
14939 /* Is there a known alignment and is it less than 4? */
14940 if (align < 4)
14941 {
14942 rtx scratch1 = gen_reg_rtx (Pmode);
14943 emit_move_insn (scratch1, out);
14944 /* Is there a known alignment and is it not 2? */
14945 if (align != 2)
14946 {
14947 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14948 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14949
14950 /* Leave just the 3 lower bits. */
14951 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14952 NULL_RTX, 0, OPTAB_WIDEN);
14953
14954 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14955 Pmode, 1, align_4_label);
14956 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14957 Pmode, 1, align_2_label);
14958 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14959 Pmode, 1, align_3_label);
14960 }
14961 else
14962 {
14963 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14964 check if is aligned to 4 - byte. */
14965
14966 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14967 NULL_RTX, 0, OPTAB_WIDEN);
14968
14969 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14970 Pmode, 1, align_4_label);
14971 }
14972
14973 mem = change_address (src, QImode, out);
14974
14975 /* Now compare the bytes. */
14976
14977 /* Compare the first n unaligned byte on a byte per byte basis. */
14978 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14979 QImode, 1, end_0_label);
14980
14981 /* Increment the address. */
14982 if (TARGET_64BIT)
14983 emit_insn (gen_adddi3 (out, out, const1_rtx));
14984 else
14985 emit_insn (gen_addsi3 (out, out, const1_rtx));
14986
14987 /* Not needed with an alignment of 2 */
14988 if (align != 2)
14989 {
14990 emit_label (align_2_label);
14991
14992 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14993 end_0_label);
14994
14995 if (TARGET_64BIT)
14996 emit_insn (gen_adddi3 (out, out, const1_rtx));
14997 else
14998 emit_insn (gen_addsi3 (out, out, const1_rtx));
14999
15000 emit_label (align_3_label);
15001 }
15002
15003 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15004 end_0_label);
15005
15006 if (TARGET_64BIT)
15007 emit_insn (gen_adddi3 (out, out, const1_rtx));
15008 else
15009 emit_insn (gen_addsi3 (out, out, const1_rtx));
15010 }
15011
15012 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15013 align this loop. It gives only huge programs, but does not help to
15014 speed up. */
15015 emit_label (align_4_label);
15016
15017 mem = change_address (src, SImode, out);
15018 emit_move_insn (scratch, mem);
15019 if (TARGET_64BIT)
15020 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15021 else
15022 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15023
15024 /* This formula yields a nonzero result iff one of the bytes is zero.
15025 This saves three branches inside loop and many cycles. */
15026
15027 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15028 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15029 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15030 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15031 gen_int_mode (0x80808080, SImode)));
15032 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15033 align_4_label);
15034
15035 if (TARGET_CMOVE)
15036 {
15037 rtx reg = gen_reg_rtx (SImode);
15038 rtx reg2 = gen_reg_rtx (Pmode);
15039 emit_move_insn (reg, tmpreg);
15040 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15041
15042 /* If zero is not in the first two bytes, move two bytes forward. */
15043 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15044 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15045 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15046 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15047 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15048 reg,
15049 tmpreg)));
15050 /* Emit lea manually to avoid clobbering of flags. */
15051 emit_insn (gen_rtx_SET (SImode, reg2,
15052 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15053
15054 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15055 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15056 emit_insn (gen_rtx_SET (VOIDmode, out,
15057 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15058 reg2,
15059 out)));
15060
15061 }
15062 else
15063 {
15064 rtx end_2_label = gen_label_rtx ();
15065 /* Is zero in the first two bytes? */
15066
15067 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15068 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15069 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15070 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15071 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15072 pc_rtx);
15073 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15074 JUMP_LABEL (tmp) = end_2_label;
15075
15076 /* Not in the first two. Move two bytes forward. */
15077 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15078 if (TARGET_64BIT)
15079 emit_insn (gen_adddi3 (out, out, const2_rtx));
15080 else
15081 emit_insn (gen_addsi3 (out, out, const2_rtx));
15082
15083 emit_label (end_2_label);
15084
15085 }
15086
15087 /* Avoid branch in fixing the byte. */
15088 tmpreg = gen_lowpart (QImode, tmpreg);
15089 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15090 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15091 if (TARGET_64BIT)
15092 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15093 else
15094 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15095
15096 emit_label (end_0_label);
15097 }
15098
15099 /* Expand strlen. */
15100
15101 int
15102 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15103 {
15104 rtx addr, scratch1, scratch2, scratch3, scratch4;
15105
15106 /* The generic case of strlen expander is long. Avoid it's
15107 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15108
15109 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15110 && !TARGET_INLINE_ALL_STRINGOPS
15111 && !optimize_size
15112 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15113 return 0;
15114
15115 addr = force_reg (Pmode, XEXP (src, 0));
15116 scratch1 = gen_reg_rtx (Pmode);
15117
15118 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15119 && !optimize_size)
15120 {
15121 /* Well it seems that some optimizer does not combine a call like
15122 foo(strlen(bar), strlen(bar));
15123 when the move and the subtraction is done here. It does calculate
15124 the length just once when these instructions are done inside of
15125 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15126 often used and I use one fewer register for the lifetime of
15127 output_strlen_unroll() this is better. */
15128
15129 emit_move_insn (out, addr);
15130
15131 ix86_expand_strlensi_unroll_1 (out, src, align);
15132
15133 /* strlensi_unroll_1 returns the address of the zero at the end of
15134 the string, like memchr(), so compute the length by subtracting
15135 the start address. */
15136 if (TARGET_64BIT)
15137 emit_insn (gen_subdi3 (out, out, addr));
15138 else
15139 emit_insn (gen_subsi3 (out, out, addr));
15140 }
15141 else
15142 {
15143 rtx unspec;
15144 scratch2 = gen_reg_rtx (Pmode);
15145 scratch3 = gen_reg_rtx (Pmode);
15146 scratch4 = force_reg (Pmode, constm1_rtx);
15147
15148 emit_move_insn (scratch3, addr);
15149 eoschar = force_reg (QImode, eoschar);
15150
15151 src = replace_equiv_address_nv (src, scratch3);
15152
15153 /* If .md starts supporting :P, this can be done in .md. */
15154 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15155 scratch4), UNSPEC_SCAS);
15156 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15157 if (TARGET_64BIT)
15158 {
15159 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15160 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15161 }
15162 else
15163 {
15164 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15165 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15166 }
15167 }
15168 return 1;
15169 }
15170
15171 /* For given symbol (function) construct code to compute address of it's PLT
15172 entry in large x86-64 PIC model. */
15173 rtx
15174 construct_plt_address (rtx symbol)
15175 {
15176 rtx tmp = gen_reg_rtx (Pmode);
15177 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15178
15179 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15180 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15181
15182 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15183 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15184 return tmp;
15185 }
15186
15187 void
15188 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15189 rtx callarg2 ATTRIBUTE_UNUSED,
15190 rtx pop, int sibcall)
15191 {
15192 rtx use = NULL, call;
15193
15194 if (pop == const0_rtx)
15195 pop = NULL;
15196 gcc_assert (!TARGET_64BIT || !pop);
15197
15198 if (TARGET_MACHO && !TARGET_64BIT)
15199 {
15200 #if TARGET_MACHO
15201 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15202 fnaddr = machopic_indirect_call_target (fnaddr);
15203 #endif
15204 }
15205 else
15206 {
15207 /* Static functions and indirect calls don't need the pic register. */
15208 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15209 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15210 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15211 use_reg (&use, pic_offset_table_rtx);
15212 }
15213
15214 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15215 {
15216 rtx al = gen_rtx_REG (QImode, 0);
15217 emit_move_insn (al, callarg2);
15218 use_reg (&use, al);
15219 }
15220
15221 if (ix86_cmodel == CM_LARGE_PIC
15222 && GET_CODE (fnaddr) == MEM
15223 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15224 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15225 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15226 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15227 {
15228 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15229 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15230 }
15231 if (sibcall && TARGET_64BIT
15232 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15233 {
15234 rtx addr;
15235 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15236 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15237 emit_move_insn (fnaddr, addr);
15238 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15239 }
15240
15241 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15242 if (retval)
15243 call = gen_rtx_SET (VOIDmode, retval, call);
15244 if (pop)
15245 {
15246 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15247 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15248 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15249 }
15250
15251 call = emit_call_insn (call);
15252 if (use)
15253 CALL_INSN_FUNCTION_USAGE (call) = use;
15254 }
15255
15256 \f
15257 /* Clear stack slot assignments remembered from previous functions.
15258 This is called from INIT_EXPANDERS once before RTL is emitted for each
15259 function. */
15260
15261 static struct machine_function *
15262 ix86_init_machine_status (void)
15263 {
15264 struct machine_function *f;
15265
15266 f = ggc_alloc_cleared (sizeof (struct machine_function));
15267 f->use_fast_prologue_epilogue_nregs = -1;
15268 f->tls_descriptor_call_expanded_p = 0;
15269
15270 return f;
15271 }
15272
15273 /* Return a MEM corresponding to a stack slot with mode MODE.
15274 Allocate a new slot if necessary.
15275
15276 The RTL for a function can have several slots available: N is
15277 which slot to use. */
15278
15279 rtx
15280 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15281 {
15282 struct stack_local_entry *s;
15283
15284 gcc_assert (n < MAX_386_STACK_LOCALS);
15285
15286 for (s = ix86_stack_locals; s; s = s->next)
15287 if (s->mode == mode && s->n == n)
15288 return copy_rtx (s->rtl);
15289
15290 s = (struct stack_local_entry *)
15291 ggc_alloc (sizeof (struct stack_local_entry));
15292 s->n = n;
15293 s->mode = mode;
15294 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15295
15296 s->next = ix86_stack_locals;
15297 ix86_stack_locals = s;
15298 return s->rtl;
15299 }
15300
15301 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15302
15303 static GTY(()) rtx ix86_tls_symbol;
15304 rtx
15305 ix86_tls_get_addr (void)
15306 {
15307
15308 if (!ix86_tls_symbol)
15309 {
15310 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15311 (TARGET_ANY_GNU_TLS
15312 && !TARGET_64BIT)
15313 ? "___tls_get_addr"
15314 : "__tls_get_addr");
15315 }
15316
15317 return ix86_tls_symbol;
15318 }
15319
15320 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15321
15322 static GTY(()) rtx ix86_tls_module_base_symbol;
15323 rtx
15324 ix86_tls_module_base (void)
15325 {
15326
15327 if (!ix86_tls_module_base_symbol)
15328 {
15329 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15330 "_TLS_MODULE_BASE_");
15331 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15332 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15333 }
15334
15335 return ix86_tls_module_base_symbol;
15336 }
15337 \f
15338 /* Calculate the length of the memory address in the instruction
15339 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15340
15341 int
15342 memory_address_length (rtx addr)
15343 {
15344 struct ix86_address parts;
15345 rtx base, index, disp;
15346 int len;
15347 int ok;
15348
15349 if (GET_CODE (addr) == PRE_DEC
15350 || GET_CODE (addr) == POST_INC
15351 || GET_CODE (addr) == PRE_MODIFY
15352 || GET_CODE (addr) == POST_MODIFY)
15353 return 0;
15354
15355 ok = ix86_decompose_address (addr, &parts);
15356 gcc_assert (ok);
15357
15358 if (parts.base && GET_CODE (parts.base) == SUBREG)
15359 parts.base = SUBREG_REG (parts.base);
15360 if (parts.index && GET_CODE (parts.index) == SUBREG)
15361 parts.index = SUBREG_REG (parts.index);
15362
15363 base = parts.base;
15364 index = parts.index;
15365 disp = parts.disp;
15366 len = 0;
15367
15368 /* Rule of thumb:
15369 - esp as the base always wants an index,
15370 - ebp as the base always wants a displacement. */
15371
15372 /* Register Indirect. */
15373 if (base && !index && !disp)
15374 {
15375 /* esp (for its index) and ebp (for its displacement) need
15376 the two-byte modrm form. */
15377 if (addr == stack_pointer_rtx
15378 || addr == arg_pointer_rtx
15379 || addr == frame_pointer_rtx
15380 || addr == hard_frame_pointer_rtx)
15381 len = 1;
15382 }
15383
15384 /* Direct Addressing. */
15385 else if (disp && !base && !index)
15386 len = 4;
15387
15388 else
15389 {
15390 /* Find the length of the displacement constant. */
15391 if (disp)
15392 {
15393 if (base && satisfies_constraint_K (disp))
15394 len = 1;
15395 else
15396 len = 4;
15397 }
15398 /* ebp always wants a displacement. */
15399 else if (base == hard_frame_pointer_rtx)
15400 len = 1;
15401
15402 /* An index requires the two-byte modrm form.... */
15403 if (index
15404 /* ...like esp, which always wants an index. */
15405 || base == stack_pointer_rtx
15406 || base == arg_pointer_rtx
15407 || base == frame_pointer_rtx)
15408 len += 1;
15409 }
15410
15411 return len;
15412 }
15413
15414 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15415 is set, expect that insn have 8bit immediate alternative. */
15416 int
15417 ix86_attr_length_immediate_default (rtx insn, int shortform)
15418 {
15419 int len = 0;
15420 int i;
15421 extract_insn_cached (insn);
15422 for (i = recog_data.n_operands - 1; i >= 0; --i)
15423 if (CONSTANT_P (recog_data.operand[i]))
15424 {
15425 gcc_assert (!len);
15426 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15427 len = 1;
15428 else
15429 {
15430 switch (get_attr_mode (insn))
15431 {
15432 case MODE_QI:
15433 len+=1;
15434 break;
15435 case MODE_HI:
15436 len+=2;
15437 break;
15438 case MODE_SI:
15439 len+=4;
15440 break;
15441 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15442 case MODE_DI:
15443 len+=4;
15444 break;
15445 default:
15446 fatal_insn ("unknown insn mode", insn);
15447 }
15448 }
15449 }
15450 return len;
15451 }
15452 /* Compute default value for "length_address" attribute. */
15453 int
15454 ix86_attr_length_address_default (rtx insn)
15455 {
15456 int i;
15457
15458 if (get_attr_type (insn) == TYPE_LEA)
15459 {
15460 rtx set = PATTERN (insn);
15461
15462 if (GET_CODE (set) == PARALLEL)
15463 set = XVECEXP (set, 0, 0);
15464
15465 gcc_assert (GET_CODE (set) == SET);
15466
15467 return memory_address_length (SET_SRC (set));
15468 }
15469
15470 extract_insn_cached (insn);
15471 for (i = recog_data.n_operands - 1; i >= 0; --i)
15472 if (MEM_P (recog_data.operand[i]))
15473 {
15474 return memory_address_length (XEXP (recog_data.operand[i], 0));
15475 break;
15476 }
15477 return 0;
15478 }
15479 \f
15480 /* Return the maximum number of instructions a cpu can issue. */
15481
15482 static int
15483 ix86_issue_rate (void)
15484 {
15485 switch (ix86_tune)
15486 {
15487 case PROCESSOR_PENTIUM:
15488 case PROCESSOR_K6:
15489 return 2;
15490
15491 case PROCESSOR_PENTIUMPRO:
15492 case PROCESSOR_PENTIUM4:
15493 case PROCESSOR_ATHLON:
15494 case PROCESSOR_K8:
15495 case PROCESSOR_AMDFAM10:
15496 case PROCESSOR_NOCONA:
15497 case PROCESSOR_GENERIC32:
15498 case PROCESSOR_GENERIC64:
15499 return 3;
15500
15501 case PROCESSOR_CORE2:
15502 return 4;
15503
15504 default:
15505 return 1;
15506 }
15507 }
15508
15509 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15510 by DEP_INSN and nothing set by DEP_INSN. */
15511
15512 static int
15513 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15514 {
15515 rtx set, set2;
15516
15517 /* Simplify the test for uninteresting insns. */
15518 if (insn_type != TYPE_SETCC
15519 && insn_type != TYPE_ICMOV
15520 && insn_type != TYPE_FCMOV
15521 && insn_type != TYPE_IBR)
15522 return 0;
15523
15524 if ((set = single_set (dep_insn)) != 0)
15525 {
15526 set = SET_DEST (set);
15527 set2 = NULL_RTX;
15528 }
15529 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15530 && XVECLEN (PATTERN (dep_insn), 0) == 2
15531 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15532 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15533 {
15534 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15535 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15536 }
15537 else
15538 return 0;
15539
15540 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15541 return 0;
15542
15543 /* This test is true if the dependent insn reads the flags but
15544 not any other potentially set register. */
15545 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15546 return 0;
15547
15548 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15549 return 0;
15550
15551 return 1;
15552 }
15553
15554 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15555 address with operands set by DEP_INSN. */
15556
15557 static int
15558 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15559 {
15560 rtx addr;
15561
15562 if (insn_type == TYPE_LEA
15563 && TARGET_PENTIUM)
15564 {
15565 addr = PATTERN (insn);
15566
15567 if (GET_CODE (addr) == PARALLEL)
15568 addr = XVECEXP (addr, 0, 0);
15569
15570 gcc_assert (GET_CODE (addr) == SET);
15571
15572 addr = SET_SRC (addr);
15573 }
15574 else
15575 {
15576 int i;
15577 extract_insn_cached (insn);
15578 for (i = recog_data.n_operands - 1; i >= 0; --i)
15579 if (MEM_P (recog_data.operand[i]))
15580 {
15581 addr = XEXP (recog_data.operand[i], 0);
15582 goto found;
15583 }
15584 return 0;
15585 found:;
15586 }
15587
15588 return modified_in_p (addr, dep_insn);
15589 }
15590
15591 static int
15592 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15593 {
15594 enum attr_type insn_type, dep_insn_type;
15595 enum attr_memory memory;
15596 rtx set, set2;
15597 int dep_insn_code_number;
15598
15599 /* Anti and output dependencies have zero cost on all CPUs. */
15600 if (REG_NOTE_KIND (link) != 0)
15601 return 0;
15602
15603 dep_insn_code_number = recog_memoized (dep_insn);
15604
15605 /* If we can't recognize the insns, we can't really do anything. */
15606 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15607 return cost;
15608
15609 insn_type = get_attr_type (insn);
15610 dep_insn_type = get_attr_type (dep_insn);
15611
15612 switch (ix86_tune)
15613 {
15614 case PROCESSOR_PENTIUM:
15615 /* Address Generation Interlock adds a cycle of latency. */
15616 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15617 cost += 1;
15618
15619 /* ??? Compares pair with jump/setcc. */
15620 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15621 cost = 0;
15622
15623 /* Floating point stores require value to be ready one cycle earlier. */
15624 if (insn_type == TYPE_FMOV
15625 && get_attr_memory (insn) == MEMORY_STORE
15626 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15627 cost += 1;
15628 break;
15629
15630 case PROCESSOR_PENTIUMPRO:
15631 memory = get_attr_memory (insn);
15632
15633 /* INT->FP conversion is expensive. */
15634 if (get_attr_fp_int_src (dep_insn))
15635 cost += 5;
15636
15637 /* There is one cycle extra latency between an FP op and a store. */
15638 if (insn_type == TYPE_FMOV
15639 && (set = single_set (dep_insn)) != NULL_RTX
15640 && (set2 = single_set (insn)) != NULL_RTX
15641 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15642 && MEM_P (SET_DEST (set2)))
15643 cost += 1;
15644
15645 /* Show ability of reorder buffer to hide latency of load by executing
15646 in parallel with previous instruction in case
15647 previous instruction is not needed to compute the address. */
15648 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15649 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15650 {
15651 /* Claim moves to take one cycle, as core can issue one load
15652 at time and the next load can start cycle later. */
15653 if (dep_insn_type == TYPE_IMOV
15654 || dep_insn_type == TYPE_FMOV)
15655 cost = 1;
15656 else if (cost > 1)
15657 cost--;
15658 }
15659 break;
15660
15661 case PROCESSOR_K6:
15662 memory = get_attr_memory (insn);
15663
15664 /* The esp dependency is resolved before the instruction is really
15665 finished. */
15666 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15667 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15668 return 1;
15669
15670 /* INT->FP conversion is expensive. */
15671 if (get_attr_fp_int_src (dep_insn))
15672 cost += 5;
15673
15674 /* Show ability of reorder buffer to hide latency of load by executing
15675 in parallel with previous instruction in case
15676 previous instruction is not needed to compute the address. */
15677 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15678 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15679 {
15680 /* Claim moves to take one cycle, as core can issue one load
15681 at time and the next load can start cycle later. */
15682 if (dep_insn_type == TYPE_IMOV
15683 || dep_insn_type == TYPE_FMOV)
15684 cost = 1;
15685 else if (cost > 2)
15686 cost -= 2;
15687 else
15688 cost = 1;
15689 }
15690 break;
15691
15692 case PROCESSOR_ATHLON:
15693 case PROCESSOR_K8:
15694 case PROCESSOR_AMDFAM10:
15695 case PROCESSOR_GENERIC32:
15696 case PROCESSOR_GENERIC64:
15697 memory = get_attr_memory (insn);
15698
15699 /* Show ability of reorder buffer to hide latency of load by executing
15700 in parallel with previous instruction in case
15701 previous instruction is not needed to compute the address. */
15702 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15703 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15704 {
15705 enum attr_unit unit = get_attr_unit (insn);
15706 int loadcost = 3;
15707
15708 /* Because of the difference between the length of integer and
15709 floating unit pipeline preparation stages, the memory operands
15710 for floating point are cheaper.
15711
15712 ??? For Athlon it the difference is most probably 2. */
15713 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15714 loadcost = 3;
15715 else
15716 loadcost = TARGET_ATHLON ? 2 : 0;
15717
15718 if (cost >= loadcost)
15719 cost -= loadcost;
15720 else
15721 cost = 0;
15722 }
15723
15724 default:
15725 break;
15726 }
15727
15728 return cost;
15729 }
15730
15731 /* How many alternative schedules to try. This should be as wide as the
15732 scheduling freedom in the DFA, but no wider. Making this value too
15733 large results extra work for the scheduler. */
15734
15735 static int
15736 ia32_multipass_dfa_lookahead (void)
15737 {
15738 if (ix86_tune == PROCESSOR_PENTIUM)
15739 return 2;
15740
15741 if (ix86_tune == PROCESSOR_PENTIUMPRO
15742 || ix86_tune == PROCESSOR_K6)
15743 return 1;
15744
15745 else
15746 return 0;
15747 }
15748
15749 \f
15750 /* Compute the alignment given to a constant that is being placed in memory.
15751 EXP is the constant and ALIGN is the alignment that the object would
15752 ordinarily have.
15753 The value of this function is used instead of that alignment to align
15754 the object. */
15755
15756 int
15757 ix86_constant_alignment (tree exp, int align)
15758 {
15759 if (TREE_CODE (exp) == REAL_CST)
15760 {
15761 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15762 return 64;
15763 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15764 return 128;
15765 }
15766 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15767 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15768 return BITS_PER_WORD;
15769
15770 return align;
15771 }
15772
15773 /* Compute the alignment for a static variable.
15774 TYPE is the data type, and ALIGN is the alignment that
15775 the object would ordinarily have. The value of this function is used
15776 instead of that alignment to align the object. */
15777
15778 int
15779 ix86_data_alignment (tree type, int align)
15780 {
15781 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15782
15783 if (AGGREGATE_TYPE_P (type)
15784 && TYPE_SIZE (type)
15785 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15786 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15787 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15788 && align < max_align)
15789 align = max_align;
15790
15791 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15792 to 16byte boundary. */
15793 if (TARGET_64BIT)
15794 {
15795 if (AGGREGATE_TYPE_P (type)
15796 && TYPE_SIZE (type)
15797 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15798 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15799 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15800 return 128;
15801 }
15802
15803 if (TREE_CODE (type) == ARRAY_TYPE)
15804 {
15805 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15806 return 64;
15807 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15808 return 128;
15809 }
15810 else if (TREE_CODE (type) == COMPLEX_TYPE)
15811 {
15812
15813 if (TYPE_MODE (type) == DCmode && align < 64)
15814 return 64;
15815 if (TYPE_MODE (type) == XCmode && align < 128)
15816 return 128;
15817 }
15818 else if ((TREE_CODE (type) == RECORD_TYPE
15819 || TREE_CODE (type) == UNION_TYPE
15820 || TREE_CODE (type) == QUAL_UNION_TYPE)
15821 && TYPE_FIELDS (type))
15822 {
15823 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15824 return 64;
15825 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15826 return 128;
15827 }
15828 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15829 || TREE_CODE (type) == INTEGER_TYPE)
15830 {
15831 if (TYPE_MODE (type) == DFmode && align < 64)
15832 return 64;
15833 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15834 return 128;
15835 }
15836
15837 return align;
15838 }
15839
15840 /* Compute the alignment for a local variable.
15841 TYPE is the data type, and ALIGN is the alignment that
15842 the object would ordinarily have. The value of this macro is used
15843 instead of that alignment to align the object. */
15844
15845 int
15846 ix86_local_alignment (tree type, int align)
15847 {
15848 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15849 to 16byte boundary. */
15850 if (TARGET_64BIT)
15851 {
15852 if (AGGREGATE_TYPE_P (type)
15853 && TYPE_SIZE (type)
15854 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15855 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15856 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15857 return 128;
15858 }
15859 if (TREE_CODE (type) == ARRAY_TYPE)
15860 {
15861 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15862 return 64;
15863 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15864 return 128;
15865 }
15866 else if (TREE_CODE (type) == COMPLEX_TYPE)
15867 {
15868 if (TYPE_MODE (type) == DCmode && align < 64)
15869 return 64;
15870 if (TYPE_MODE (type) == XCmode && align < 128)
15871 return 128;
15872 }
15873 else if ((TREE_CODE (type) == RECORD_TYPE
15874 || TREE_CODE (type) == UNION_TYPE
15875 || TREE_CODE (type) == QUAL_UNION_TYPE)
15876 && TYPE_FIELDS (type))
15877 {
15878 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15879 return 64;
15880 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15881 return 128;
15882 }
15883 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15884 || TREE_CODE (type) == INTEGER_TYPE)
15885 {
15886
15887 if (TYPE_MODE (type) == DFmode && align < 64)
15888 return 64;
15889 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15890 return 128;
15891 }
15892 return align;
15893 }
15894 \f
15895 /* Emit RTL insns to initialize the variable parts of a trampoline.
15896 FNADDR is an RTX for the address of the function's pure code.
15897 CXT is an RTX for the static chain value for the function. */
15898 void
15899 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15900 {
15901 if (!TARGET_64BIT)
15902 {
15903 /* Compute offset from the end of the jmp to the target function. */
15904 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15905 plus_constant (tramp, 10),
15906 NULL_RTX, 1, OPTAB_DIRECT);
15907 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15908 gen_int_mode (0xb9, QImode));
15909 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15910 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15911 gen_int_mode (0xe9, QImode));
15912 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15913 }
15914 else
15915 {
15916 int offset = 0;
15917 /* Try to load address using shorter movl instead of movabs.
15918 We may want to support movq for kernel mode, but kernel does not use
15919 trampolines at the moment. */
15920 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15921 {
15922 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15923 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15924 gen_int_mode (0xbb41, HImode));
15925 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15926 gen_lowpart (SImode, fnaddr));
15927 offset += 6;
15928 }
15929 else
15930 {
15931 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15932 gen_int_mode (0xbb49, HImode));
15933 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15934 fnaddr);
15935 offset += 10;
15936 }
15937 /* Load static chain using movabs to r10. */
15938 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15939 gen_int_mode (0xba49, HImode));
15940 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15941 cxt);
15942 offset += 10;
15943 /* Jump to the r11 */
15944 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15945 gen_int_mode (0xff49, HImode));
15946 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15947 gen_int_mode (0xe3, QImode));
15948 offset += 3;
15949 gcc_assert (offset <= TRAMPOLINE_SIZE);
15950 }
15951
15952 #ifdef ENABLE_EXECUTE_STACK
15953 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15954 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15955 #endif
15956 }
15957 \f
15958 /* Codes for all the SSE/MMX builtins. */
15959 enum ix86_builtins
15960 {
15961 IX86_BUILTIN_ADDPS,
15962 IX86_BUILTIN_ADDSS,
15963 IX86_BUILTIN_DIVPS,
15964 IX86_BUILTIN_DIVSS,
15965 IX86_BUILTIN_MULPS,
15966 IX86_BUILTIN_MULSS,
15967 IX86_BUILTIN_SUBPS,
15968 IX86_BUILTIN_SUBSS,
15969
15970 IX86_BUILTIN_CMPEQPS,
15971 IX86_BUILTIN_CMPLTPS,
15972 IX86_BUILTIN_CMPLEPS,
15973 IX86_BUILTIN_CMPGTPS,
15974 IX86_BUILTIN_CMPGEPS,
15975 IX86_BUILTIN_CMPNEQPS,
15976 IX86_BUILTIN_CMPNLTPS,
15977 IX86_BUILTIN_CMPNLEPS,
15978 IX86_BUILTIN_CMPNGTPS,
15979 IX86_BUILTIN_CMPNGEPS,
15980 IX86_BUILTIN_CMPORDPS,
15981 IX86_BUILTIN_CMPUNORDPS,
15982 IX86_BUILTIN_CMPEQSS,
15983 IX86_BUILTIN_CMPLTSS,
15984 IX86_BUILTIN_CMPLESS,
15985 IX86_BUILTIN_CMPNEQSS,
15986 IX86_BUILTIN_CMPNLTSS,
15987 IX86_BUILTIN_CMPNLESS,
15988 IX86_BUILTIN_CMPNGTSS,
15989 IX86_BUILTIN_CMPNGESS,
15990 IX86_BUILTIN_CMPORDSS,
15991 IX86_BUILTIN_CMPUNORDSS,
15992
15993 IX86_BUILTIN_COMIEQSS,
15994 IX86_BUILTIN_COMILTSS,
15995 IX86_BUILTIN_COMILESS,
15996 IX86_BUILTIN_COMIGTSS,
15997 IX86_BUILTIN_COMIGESS,
15998 IX86_BUILTIN_COMINEQSS,
15999 IX86_BUILTIN_UCOMIEQSS,
16000 IX86_BUILTIN_UCOMILTSS,
16001 IX86_BUILTIN_UCOMILESS,
16002 IX86_BUILTIN_UCOMIGTSS,
16003 IX86_BUILTIN_UCOMIGESS,
16004 IX86_BUILTIN_UCOMINEQSS,
16005
16006 IX86_BUILTIN_CVTPI2PS,
16007 IX86_BUILTIN_CVTPS2PI,
16008 IX86_BUILTIN_CVTSI2SS,
16009 IX86_BUILTIN_CVTSI642SS,
16010 IX86_BUILTIN_CVTSS2SI,
16011 IX86_BUILTIN_CVTSS2SI64,
16012 IX86_BUILTIN_CVTTPS2PI,
16013 IX86_BUILTIN_CVTTSS2SI,
16014 IX86_BUILTIN_CVTTSS2SI64,
16015
16016 IX86_BUILTIN_MAXPS,
16017 IX86_BUILTIN_MAXSS,
16018 IX86_BUILTIN_MINPS,
16019 IX86_BUILTIN_MINSS,
16020
16021 IX86_BUILTIN_LOADUPS,
16022 IX86_BUILTIN_STOREUPS,
16023 IX86_BUILTIN_MOVSS,
16024
16025 IX86_BUILTIN_MOVHLPS,
16026 IX86_BUILTIN_MOVLHPS,
16027 IX86_BUILTIN_LOADHPS,
16028 IX86_BUILTIN_LOADLPS,
16029 IX86_BUILTIN_STOREHPS,
16030 IX86_BUILTIN_STORELPS,
16031
16032 IX86_BUILTIN_MASKMOVQ,
16033 IX86_BUILTIN_MOVMSKPS,
16034 IX86_BUILTIN_PMOVMSKB,
16035
16036 IX86_BUILTIN_MOVNTPS,
16037 IX86_BUILTIN_MOVNTQ,
16038
16039 IX86_BUILTIN_LOADDQU,
16040 IX86_BUILTIN_STOREDQU,
16041
16042 IX86_BUILTIN_PACKSSWB,
16043 IX86_BUILTIN_PACKSSDW,
16044 IX86_BUILTIN_PACKUSWB,
16045
16046 IX86_BUILTIN_PADDB,
16047 IX86_BUILTIN_PADDW,
16048 IX86_BUILTIN_PADDD,
16049 IX86_BUILTIN_PADDQ,
16050 IX86_BUILTIN_PADDSB,
16051 IX86_BUILTIN_PADDSW,
16052 IX86_BUILTIN_PADDUSB,
16053 IX86_BUILTIN_PADDUSW,
16054 IX86_BUILTIN_PSUBB,
16055 IX86_BUILTIN_PSUBW,
16056 IX86_BUILTIN_PSUBD,
16057 IX86_BUILTIN_PSUBQ,
16058 IX86_BUILTIN_PSUBSB,
16059 IX86_BUILTIN_PSUBSW,
16060 IX86_BUILTIN_PSUBUSB,
16061 IX86_BUILTIN_PSUBUSW,
16062
16063 IX86_BUILTIN_PAND,
16064 IX86_BUILTIN_PANDN,
16065 IX86_BUILTIN_POR,
16066 IX86_BUILTIN_PXOR,
16067
16068 IX86_BUILTIN_PAVGB,
16069 IX86_BUILTIN_PAVGW,
16070
16071 IX86_BUILTIN_PCMPEQB,
16072 IX86_BUILTIN_PCMPEQW,
16073 IX86_BUILTIN_PCMPEQD,
16074 IX86_BUILTIN_PCMPGTB,
16075 IX86_BUILTIN_PCMPGTW,
16076 IX86_BUILTIN_PCMPGTD,
16077
16078 IX86_BUILTIN_PMADDWD,
16079
16080 IX86_BUILTIN_PMAXSW,
16081 IX86_BUILTIN_PMAXUB,
16082 IX86_BUILTIN_PMINSW,
16083 IX86_BUILTIN_PMINUB,
16084
16085 IX86_BUILTIN_PMULHUW,
16086 IX86_BUILTIN_PMULHW,
16087 IX86_BUILTIN_PMULLW,
16088
16089 IX86_BUILTIN_PSADBW,
16090 IX86_BUILTIN_PSHUFW,
16091
16092 IX86_BUILTIN_PSLLW,
16093 IX86_BUILTIN_PSLLD,
16094 IX86_BUILTIN_PSLLQ,
16095 IX86_BUILTIN_PSRAW,
16096 IX86_BUILTIN_PSRAD,
16097 IX86_BUILTIN_PSRLW,
16098 IX86_BUILTIN_PSRLD,
16099 IX86_BUILTIN_PSRLQ,
16100 IX86_BUILTIN_PSLLWI,
16101 IX86_BUILTIN_PSLLDI,
16102 IX86_BUILTIN_PSLLQI,
16103 IX86_BUILTIN_PSRAWI,
16104 IX86_BUILTIN_PSRADI,
16105 IX86_BUILTIN_PSRLWI,
16106 IX86_BUILTIN_PSRLDI,
16107 IX86_BUILTIN_PSRLQI,
16108
16109 IX86_BUILTIN_PUNPCKHBW,
16110 IX86_BUILTIN_PUNPCKHWD,
16111 IX86_BUILTIN_PUNPCKHDQ,
16112 IX86_BUILTIN_PUNPCKLBW,
16113 IX86_BUILTIN_PUNPCKLWD,
16114 IX86_BUILTIN_PUNPCKLDQ,
16115
16116 IX86_BUILTIN_SHUFPS,
16117
16118 IX86_BUILTIN_RCPPS,
16119 IX86_BUILTIN_RCPSS,
16120 IX86_BUILTIN_RSQRTPS,
16121 IX86_BUILTIN_RSQRTSS,
16122 IX86_BUILTIN_SQRTPS,
16123 IX86_BUILTIN_SQRTSS,
16124
16125 IX86_BUILTIN_UNPCKHPS,
16126 IX86_BUILTIN_UNPCKLPS,
16127
16128 IX86_BUILTIN_ANDPS,
16129 IX86_BUILTIN_ANDNPS,
16130 IX86_BUILTIN_ORPS,
16131 IX86_BUILTIN_XORPS,
16132
16133 IX86_BUILTIN_EMMS,
16134 IX86_BUILTIN_LDMXCSR,
16135 IX86_BUILTIN_STMXCSR,
16136 IX86_BUILTIN_SFENCE,
16137
16138 /* 3DNow! Original */
16139 IX86_BUILTIN_FEMMS,
16140 IX86_BUILTIN_PAVGUSB,
16141 IX86_BUILTIN_PF2ID,
16142 IX86_BUILTIN_PFACC,
16143 IX86_BUILTIN_PFADD,
16144 IX86_BUILTIN_PFCMPEQ,
16145 IX86_BUILTIN_PFCMPGE,
16146 IX86_BUILTIN_PFCMPGT,
16147 IX86_BUILTIN_PFMAX,
16148 IX86_BUILTIN_PFMIN,
16149 IX86_BUILTIN_PFMUL,
16150 IX86_BUILTIN_PFRCP,
16151 IX86_BUILTIN_PFRCPIT1,
16152 IX86_BUILTIN_PFRCPIT2,
16153 IX86_BUILTIN_PFRSQIT1,
16154 IX86_BUILTIN_PFRSQRT,
16155 IX86_BUILTIN_PFSUB,
16156 IX86_BUILTIN_PFSUBR,
16157 IX86_BUILTIN_PI2FD,
16158 IX86_BUILTIN_PMULHRW,
16159
16160 /* 3DNow! Athlon Extensions */
16161 IX86_BUILTIN_PF2IW,
16162 IX86_BUILTIN_PFNACC,
16163 IX86_BUILTIN_PFPNACC,
16164 IX86_BUILTIN_PI2FW,
16165 IX86_BUILTIN_PSWAPDSI,
16166 IX86_BUILTIN_PSWAPDSF,
16167
16168 /* SSE2 */
16169 IX86_BUILTIN_ADDPD,
16170 IX86_BUILTIN_ADDSD,
16171 IX86_BUILTIN_DIVPD,
16172 IX86_BUILTIN_DIVSD,
16173 IX86_BUILTIN_MULPD,
16174 IX86_BUILTIN_MULSD,
16175 IX86_BUILTIN_SUBPD,
16176 IX86_BUILTIN_SUBSD,
16177
16178 IX86_BUILTIN_CMPEQPD,
16179 IX86_BUILTIN_CMPLTPD,
16180 IX86_BUILTIN_CMPLEPD,
16181 IX86_BUILTIN_CMPGTPD,
16182 IX86_BUILTIN_CMPGEPD,
16183 IX86_BUILTIN_CMPNEQPD,
16184 IX86_BUILTIN_CMPNLTPD,
16185 IX86_BUILTIN_CMPNLEPD,
16186 IX86_BUILTIN_CMPNGTPD,
16187 IX86_BUILTIN_CMPNGEPD,
16188 IX86_BUILTIN_CMPORDPD,
16189 IX86_BUILTIN_CMPUNORDPD,
16190 IX86_BUILTIN_CMPEQSD,
16191 IX86_BUILTIN_CMPLTSD,
16192 IX86_BUILTIN_CMPLESD,
16193 IX86_BUILTIN_CMPNEQSD,
16194 IX86_BUILTIN_CMPNLTSD,
16195 IX86_BUILTIN_CMPNLESD,
16196 IX86_BUILTIN_CMPORDSD,
16197 IX86_BUILTIN_CMPUNORDSD,
16198
16199 IX86_BUILTIN_COMIEQSD,
16200 IX86_BUILTIN_COMILTSD,
16201 IX86_BUILTIN_COMILESD,
16202 IX86_BUILTIN_COMIGTSD,
16203 IX86_BUILTIN_COMIGESD,
16204 IX86_BUILTIN_COMINEQSD,
16205 IX86_BUILTIN_UCOMIEQSD,
16206 IX86_BUILTIN_UCOMILTSD,
16207 IX86_BUILTIN_UCOMILESD,
16208 IX86_BUILTIN_UCOMIGTSD,
16209 IX86_BUILTIN_UCOMIGESD,
16210 IX86_BUILTIN_UCOMINEQSD,
16211
16212 IX86_BUILTIN_MAXPD,
16213 IX86_BUILTIN_MAXSD,
16214 IX86_BUILTIN_MINPD,
16215 IX86_BUILTIN_MINSD,
16216
16217 IX86_BUILTIN_ANDPD,
16218 IX86_BUILTIN_ANDNPD,
16219 IX86_BUILTIN_ORPD,
16220 IX86_BUILTIN_XORPD,
16221
16222 IX86_BUILTIN_SQRTPD,
16223 IX86_BUILTIN_SQRTSD,
16224
16225 IX86_BUILTIN_UNPCKHPD,
16226 IX86_BUILTIN_UNPCKLPD,
16227
16228 IX86_BUILTIN_SHUFPD,
16229
16230 IX86_BUILTIN_LOADUPD,
16231 IX86_BUILTIN_STOREUPD,
16232 IX86_BUILTIN_MOVSD,
16233
16234 IX86_BUILTIN_LOADHPD,
16235 IX86_BUILTIN_LOADLPD,
16236
16237 IX86_BUILTIN_CVTDQ2PD,
16238 IX86_BUILTIN_CVTDQ2PS,
16239
16240 IX86_BUILTIN_CVTPD2DQ,
16241 IX86_BUILTIN_CVTPD2PI,
16242 IX86_BUILTIN_CVTPD2PS,
16243 IX86_BUILTIN_CVTTPD2DQ,
16244 IX86_BUILTIN_CVTTPD2PI,
16245
16246 IX86_BUILTIN_CVTPI2PD,
16247 IX86_BUILTIN_CVTSI2SD,
16248 IX86_BUILTIN_CVTSI642SD,
16249
16250 IX86_BUILTIN_CVTSD2SI,
16251 IX86_BUILTIN_CVTSD2SI64,
16252 IX86_BUILTIN_CVTSD2SS,
16253 IX86_BUILTIN_CVTSS2SD,
16254 IX86_BUILTIN_CVTTSD2SI,
16255 IX86_BUILTIN_CVTTSD2SI64,
16256
16257 IX86_BUILTIN_CVTPS2DQ,
16258 IX86_BUILTIN_CVTPS2PD,
16259 IX86_BUILTIN_CVTTPS2DQ,
16260
16261 IX86_BUILTIN_MOVNTI,
16262 IX86_BUILTIN_MOVNTPD,
16263 IX86_BUILTIN_MOVNTDQ,
16264
16265 /* SSE2 MMX */
16266 IX86_BUILTIN_MASKMOVDQU,
16267 IX86_BUILTIN_MOVMSKPD,
16268 IX86_BUILTIN_PMOVMSKB128,
16269
16270 IX86_BUILTIN_PACKSSWB128,
16271 IX86_BUILTIN_PACKSSDW128,
16272 IX86_BUILTIN_PACKUSWB128,
16273
16274 IX86_BUILTIN_PADDB128,
16275 IX86_BUILTIN_PADDW128,
16276 IX86_BUILTIN_PADDD128,
16277 IX86_BUILTIN_PADDQ128,
16278 IX86_BUILTIN_PADDSB128,
16279 IX86_BUILTIN_PADDSW128,
16280 IX86_BUILTIN_PADDUSB128,
16281 IX86_BUILTIN_PADDUSW128,
16282 IX86_BUILTIN_PSUBB128,
16283 IX86_BUILTIN_PSUBW128,
16284 IX86_BUILTIN_PSUBD128,
16285 IX86_BUILTIN_PSUBQ128,
16286 IX86_BUILTIN_PSUBSB128,
16287 IX86_BUILTIN_PSUBSW128,
16288 IX86_BUILTIN_PSUBUSB128,
16289 IX86_BUILTIN_PSUBUSW128,
16290
16291 IX86_BUILTIN_PAND128,
16292 IX86_BUILTIN_PANDN128,
16293 IX86_BUILTIN_POR128,
16294 IX86_BUILTIN_PXOR128,
16295
16296 IX86_BUILTIN_PAVGB128,
16297 IX86_BUILTIN_PAVGW128,
16298
16299 IX86_BUILTIN_PCMPEQB128,
16300 IX86_BUILTIN_PCMPEQW128,
16301 IX86_BUILTIN_PCMPEQD128,
16302 IX86_BUILTIN_PCMPGTB128,
16303 IX86_BUILTIN_PCMPGTW128,
16304 IX86_BUILTIN_PCMPGTD128,
16305
16306 IX86_BUILTIN_PMADDWD128,
16307
16308 IX86_BUILTIN_PMAXSW128,
16309 IX86_BUILTIN_PMAXUB128,
16310 IX86_BUILTIN_PMINSW128,
16311 IX86_BUILTIN_PMINUB128,
16312
16313 IX86_BUILTIN_PMULUDQ,
16314 IX86_BUILTIN_PMULUDQ128,
16315 IX86_BUILTIN_PMULHUW128,
16316 IX86_BUILTIN_PMULHW128,
16317 IX86_BUILTIN_PMULLW128,
16318
16319 IX86_BUILTIN_PSADBW128,
16320 IX86_BUILTIN_PSHUFHW,
16321 IX86_BUILTIN_PSHUFLW,
16322 IX86_BUILTIN_PSHUFD,
16323
16324 IX86_BUILTIN_PSLLDQI128,
16325 IX86_BUILTIN_PSLLWI128,
16326 IX86_BUILTIN_PSLLDI128,
16327 IX86_BUILTIN_PSLLQI128,
16328 IX86_BUILTIN_PSRAWI128,
16329 IX86_BUILTIN_PSRADI128,
16330 IX86_BUILTIN_PSRLDQI128,
16331 IX86_BUILTIN_PSRLWI128,
16332 IX86_BUILTIN_PSRLDI128,
16333 IX86_BUILTIN_PSRLQI128,
16334
16335 IX86_BUILTIN_PSLLDQ128,
16336 IX86_BUILTIN_PSLLW128,
16337 IX86_BUILTIN_PSLLD128,
16338 IX86_BUILTIN_PSLLQ128,
16339 IX86_BUILTIN_PSRAW128,
16340 IX86_BUILTIN_PSRAD128,
16341 IX86_BUILTIN_PSRLW128,
16342 IX86_BUILTIN_PSRLD128,
16343 IX86_BUILTIN_PSRLQ128,
16344
16345 IX86_BUILTIN_PUNPCKHBW128,
16346 IX86_BUILTIN_PUNPCKHWD128,
16347 IX86_BUILTIN_PUNPCKHDQ128,
16348 IX86_BUILTIN_PUNPCKHQDQ128,
16349 IX86_BUILTIN_PUNPCKLBW128,
16350 IX86_BUILTIN_PUNPCKLWD128,
16351 IX86_BUILTIN_PUNPCKLDQ128,
16352 IX86_BUILTIN_PUNPCKLQDQ128,
16353
16354 IX86_BUILTIN_CLFLUSH,
16355 IX86_BUILTIN_MFENCE,
16356 IX86_BUILTIN_LFENCE,
16357
16358 /* Prescott New Instructions. */
16359 IX86_BUILTIN_ADDSUBPS,
16360 IX86_BUILTIN_HADDPS,
16361 IX86_BUILTIN_HSUBPS,
16362 IX86_BUILTIN_MOVSHDUP,
16363 IX86_BUILTIN_MOVSLDUP,
16364 IX86_BUILTIN_ADDSUBPD,
16365 IX86_BUILTIN_HADDPD,
16366 IX86_BUILTIN_HSUBPD,
16367 IX86_BUILTIN_LDDQU,
16368
16369 IX86_BUILTIN_MONITOR,
16370 IX86_BUILTIN_MWAIT,
16371
16372 /* SSSE3. */
16373 IX86_BUILTIN_PHADDW,
16374 IX86_BUILTIN_PHADDD,
16375 IX86_BUILTIN_PHADDSW,
16376 IX86_BUILTIN_PHSUBW,
16377 IX86_BUILTIN_PHSUBD,
16378 IX86_BUILTIN_PHSUBSW,
16379 IX86_BUILTIN_PMADDUBSW,
16380 IX86_BUILTIN_PMULHRSW,
16381 IX86_BUILTIN_PSHUFB,
16382 IX86_BUILTIN_PSIGNB,
16383 IX86_BUILTIN_PSIGNW,
16384 IX86_BUILTIN_PSIGND,
16385 IX86_BUILTIN_PALIGNR,
16386 IX86_BUILTIN_PABSB,
16387 IX86_BUILTIN_PABSW,
16388 IX86_BUILTIN_PABSD,
16389
16390 IX86_BUILTIN_PHADDW128,
16391 IX86_BUILTIN_PHADDD128,
16392 IX86_BUILTIN_PHADDSW128,
16393 IX86_BUILTIN_PHSUBW128,
16394 IX86_BUILTIN_PHSUBD128,
16395 IX86_BUILTIN_PHSUBSW128,
16396 IX86_BUILTIN_PMADDUBSW128,
16397 IX86_BUILTIN_PMULHRSW128,
16398 IX86_BUILTIN_PSHUFB128,
16399 IX86_BUILTIN_PSIGNB128,
16400 IX86_BUILTIN_PSIGNW128,
16401 IX86_BUILTIN_PSIGND128,
16402 IX86_BUILTIN_PALIGNR128,
16403 IX86_BUILTIN_PABSB128,
16404 IX86_BUILTIN_PABSW128,
16405 IX86_BUILTIN_PABSD128,
16406
16407 /* AMDFAM10 - SSE4A New Instructions. */
16408 IX86_BUILTIN_MOVNTSD,
16409 IX86_BUILTIN_MOVNTSS,
16410 IX86_BUILTIN_EXTRQI,
16411 IX86_BUILTIN_EXTRQ,
16412 IX86_BUILTIN_INSERTQI,
16413 IX86_BUILTIN_INSERTQ,
16414
16415 IX86_BUILTIN_VEC_INIT_V2SI,
16416 IX86_BUILTIN_VEC_INIT_V4HI,
16417 IX86_BUILTIN_VEC_INIT_V8QI,
16418 IX86_BUILTIN_VEC_EXT_V2DF,
16419 IX86_BUILTIN_VEC_EXT_V2DI,
16420 IX86_BUILTIN_VEC_EXT_V4SF,
16421 IX86_BUILTIN_VEC_EXT_V4SI,
16422 IX86_BUILTIN_VEC_EXT_V8HI,
16423 IX86_BUILTIN_VEC_EXT_V2SI,
16424 IX86_BUILTIN_VEC_EXT_V4HI,
16425 IX86_BUILTIN_VEC_SET_V8HI,
16426 IX86_BUILTIN_VEC_SET_V4HI,
16427
16428 IX86_BUILTIN_MAX
16429 };
16430
16431 /* Table for the ix86 builtin decls. */
16432 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16433
16434 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16435 * if the target_flags include one of MASK. Stores the function decl
16436 * in the ix86_builtins array.
16437 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16438
16439 static inline tree
16440 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16441 {
16442 tree decl = NULL_TREE;
16443
16444 if (mask & target_flags
16445 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16446 {
16447 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16448 NULL, NULL_TREE);
16449 ix86_builtins[(int) code] = decl;
16450 }
16451
16452 return decl;
16453 }
16454
16455 /* Like def_builtin, but also marks the function decl "const". */
16456
16457 static inline tree
16458 def_builtin_const (int mask, const char *name, tree type,
16459 enum ix86_builtins code)
16460 {
16461 tree decl = def_builtin (mask, name, type, code);
16462 if (decl)
16463 TREE_READONLY (decl) = 1;
16464 return decl;
16465 }
16466
16467 /* Bits for builtin_description.flag. */
16468
16469 /* Set when we don't support the comparison natively, and should
16470 swap_comparison in order to support it. */
16471 #define BUILTIN_DESC_SWAP_OPERANDS 1
16472
16473 struct builtin_description
16474 {
16475 const unsigned int mask;
16476 const enum insn_code icode;
16477 const char *const name;
16478 const enum ix86_builtins code;
16479 const enum rtx_code comparison;
16480 const unsigned int flag;
16481 };
16482
16483 static const struct builtin_description bdesc_comi[] =
16484 {
16485 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16486 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16487 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16488 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16489 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16490 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16491 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16492 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16493 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16494 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16495 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16496 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16497 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16498 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16499 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16500 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16501 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16502 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16503 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16504 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16505 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16506 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16507 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16508 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16509 };
16510
16511 static const struct builtin_description bdesc_2arg[] =
16512 {
16513 /* SSE */
16514 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16515 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16516 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16517 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16518 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16519 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16520 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16521 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16522
16523 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16524 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16525 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16526 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16527 BUILTIN_DESC_SWAP_OPERANDS },
16528 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16529 BUILTIN_DESC_SWAP_OPERANDS },
16530 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16531 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16532 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16533 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16534 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16535 BUILTIN_DESC_SWAP_OPERANDS },
16536 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16537 BUILTIN_DESC_SWAP_OPERANDS },
16538 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16539 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16540 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16541 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16542 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16543 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16544 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16545 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16546 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16547 BUILTIN_DESC_SWAP_OPERANDS },
16548 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16549 BUILTIN_DESC_SWAP_OPERANDS },
16550 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16551
16552 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16553 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16554 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16555 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16556
16557 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16558 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16559 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16560 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16561
16562 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16563 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16564 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16565 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16566 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16567
16568 /* MMX */
16569 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16570 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16571 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16572 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16573 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16574 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16575 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16576 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16577
16578 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16579 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16580 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16581 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16582 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16583 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16584 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16585 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16586
16587 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16588 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16589 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16590
16591 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16592 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16593 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16594 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16595
16596 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16597 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16598
16599 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16600 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16601 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16602 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16603 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16604 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16605
16606 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16607 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16608 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16609 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16610
16611 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16612 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16613 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16614 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16615 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16616 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16617
16618 /* Special. */
16619 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16620 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16621 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16622
16623 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16624 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16625 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16626
16627 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16628 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16629 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16630 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16631 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16632 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16633
16634 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16635 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16636 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16637 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16638 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16639 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16640
16641 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16642 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16643 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16644 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16645
16646 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16647 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16648
16649 /* SSE2 */
16650 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16651 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16652 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16653 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16654 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16655 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16656 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16657 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16658
16659 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16660 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16661 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16662 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16663 BUILTIN_DESC_SWAP_OPERANDS },
16664 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16665 BUILTIN_DESC_SWAP_OPERANDS },
16666 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16667 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16668 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16669 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16670 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16671 BUILTIN_DESC_SWAP_OPERANDS },
16672 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16673 BUILTIN_DESC_SWAP_OPERANDS },
16674 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16675 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16676 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16677 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16678 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16679 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16680 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16681 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16682 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16683
16684 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16685 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16686 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16687 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16688
16689 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16690 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16691 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16692 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16693
16694 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16695 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16696 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16697
16698 /* SSE2 MMX */
16699 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16700 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16701 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16702 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16703 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16704 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16705 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16706 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16707
16708 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16709 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16710 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16711 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16712 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16713 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16714 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16715 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16716
16717 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16718 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16719
16720 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16721 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16722 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16723 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16724
16725 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16726 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16727
16728 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16729 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16730 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16731 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16732 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16733 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16734
16735 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16736 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16737 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16738 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16739
16740 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16741 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16742 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16743 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16744 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16745 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16746 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16747 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16748
16749 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16750 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16751 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16752
16753 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16754 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16755
16756 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16757 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16758
16759 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16760 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16761 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16762
16763 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16764 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16765 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16766
16767 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16768 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16769
16770 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16771
16772 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16773 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16774 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16775 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16776
16777 /* SSE3 MMX */
16778 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16779 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16780 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16781 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16782 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16783 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16784
16785 /* SSSE3 */
16786 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16787 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16788 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16789 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16790 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16791 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16792 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16793 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16794 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16795 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16796 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16797 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16798 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16799 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16800 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16801 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16802 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16803 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16804 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16805 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16806 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16807 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16808 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16809 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16810 };
16811
16812 static const struct builtin_description bdesc_1arg[] =
16813 {
16814 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16815 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16816
16817 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16818 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16819 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16820
16821 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16822 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16823 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16824 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16825 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16826 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16827
16828 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16829 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16830
16831 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16832
16833 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16834 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16835
16836 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16837 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16838 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16839 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16840 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16841
16842 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16843
16844 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16845 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16846 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16847 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16848
16849 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16850 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16851 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16852
16853 /* SSE3 */
16854 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16855 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16856
16857 /* SSSE3 */
16858 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16859 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16860 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16861 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16862 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16863 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16864 };
16865
16866 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16867 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16868 builtins. */
16869 static void
16870 ix86_init_mmx_sse_builtins (void)
16871 {
16872 const struct builtin_description * d;
16873 size_t i;
16874
16875 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16876 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16877 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16878 tree V2DI_type_node
16879 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16880 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16881 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16882 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16883 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16884 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16885 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16886
16887 tree pchar_type_node = build_pointer_type (char_type_node);
16888 tree pcchar_type_node = build_pointer_type (
16889 build_type_variant (char_type_node, 1, 0));
16890 tree pfloat_type_node = build_pointer_type (float_type_node);
16891 tree pcfloat_type_node = build_pointer_type (
16892 build_type_variant (float_type_node, 1, 0));
16893 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16894 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16895 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16896
16897 /* Comparisons. */
16898 tree int_ftype_v4sf_v4sf
16899 = build_function_type_list (integer_type_node,
16900 V4SF_type_node, V4SF_type_node, NULL_TREE);
16901 tree v4si_ftype_v4sf_v4sf
16902 = build_function_type_list (V4SI_type_node,
16903 V4SF_type_node, V4SF_type_node, NULL_TREE);
16904 /* MMX/SSE/integer conversions. */
16905 tree int_ftype_v4sf
16906 = build_function_type_list (integer_type_node,
16907 V4SF_type_node, NULL_TREE);
16908 tree int64_ftype_v4sf
16909 = build_function_type_list (long_long_integer_type_node,
16910 V4SF_type_node, NULL_TREE);
16911 tree int_ftype_v8qi
16912 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16913 tree v4sf_ftype_v4sf_int
16914 = build_function_type_list (V4SF_type_node,
16915 V4SF_type_node, integer_type_node, NULL_TREE);
16916 tree v4sf_ftype_v4sf_int64
16917 = build_function_type_list (V4SF_type_node,
16918 V4SF_type_node, long_long_integer_type_node,
16919 NULL_TREE);
16920 tree v4sf_ftype_v4sf_v2si
16921 = build_function_type_list (V4SF_type_node,
16922 V4SF_type_node, V2SI_type_node, NULL_TREE);
16923
16924 /* Miscellaneous. */
16925 tree v8qi_ftype_v4hi_v4hi
16926 = build_function_type_list (V8QI_type_node,
16927 V4HI_type_node, V4HI_type_node, NULL_TREE);
16928 tree v4hi_ftype_v2si_v2si
16929 = build_function_type_list (V4HI_type_node,
16930 V2SI_type_node, V2SI_type_node, NULL_TREE);
16931 tree v4sf_ftype_v4sf_v4sf_int
16932 = build_function_type_list (V4SF_type_node,
16933 V4SF_type_node, V4SF_type_node,
16934 integer_type_node, NULL_TREE);
16935 tree v2si_ftype_v4hi_v4hi
16936 = build_function_type_list (V2SI_type_node,
16937 V4HI_type_node, V4HI_type_node, NULL_TREE);
16938 tree v4hi_ftype_v4hi_int
16939 = build_function_type_list (V4HI_type_node,
16940 V4HI_type_node, integer_type_node, NULL_TREE);
16941 tree v4hi_ftype_v4hi_di
16942 = build_function_type_list (V4HI_type_node,
16943 V4HI_type_node, long_long_unsigned_type_node,
16944 NULL_TREE);
16945 tree v2si_ftype_v2si_di
16946 = build_function_type_list (V2SI_type_node,
16947 V2SI_type_node, long_long_unsigned_type_node,
16948 NULL_TREE);
16949 tree void_ftype_void
16950 = build_function_type (void_type_node, void_list_node);
16951 tree void_ftype_unsigned
16952 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16953 tree void_ftype_unsigned_unsigned
16954 = build_function_type_list (void_type_node, unsigned_type_node,
16955 unsigned_type_node, NULL_TREE);
16956 tree void_ftype_pcvoid_unsigned_unsigned
16957 = build_function_type_list (void_type_node, const_ptr_type_node,
16958 unsigned_type_node, unsigned_type_node,
16959 NULL_TREE);
16960 tree unsigned_ftype_void
16961 = build_function_type (unsigned_type_node, void_list_node);
16962 tree v2si_ftype_v4sf
16963 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16964 /* Loads/stores. */
16965 tree void_ftype_v8qi_v8qi_pchar
16966 = build_function_type_list (void_type_node,
16967 V8QI_type_node, V8QI_type_node,
16968 pchar_type_node, NULL_TREE);
16969 tree v4sf_ftype_pcfloat
16970 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16971 /* @@@ the type is bogus */
16972 tree v4sf_ftype_v4sf_pv2si
16973 = build_function_type_list (V4SF_type_node,
16974 V4SF_type_node, pv2si_type_node, NULL_TREE);
16975 tree void_ftype_pv2si_v4sf
16976 = build_function_type_list (void_type_node,
16977 pv2si_type_node, V4SF_type_node, NULL_TREE);
16978 tree void_ftype_pfloat_v4sf
16979 = build_function_type_list (void_type_node,
16980 pfloat_type_node, V4SF_type_node, NULL_TREE);
16981 tree void_ftype_pdi_di
16982 = build_function_type_list (void_type_node,
16983 pdi_type_node, long_long_unsigned_type_node,
16984 NULL_TREE);
16985 tree void_ftype_pv2di_v2di
16986 = build_function_type_list (void_type_node,
16987 pv2di_type_node, V2DI_type_node, NULL_TREE);
16988 /* Normal vector unops. */
16989 tree v4sf_ftype_v4sf
16990 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16991 tree v16qi_ftype_v16qi
16992 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16993 tree v8hi_ftype_v8hi
16994 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16995 tree v4si_ftype_v4si
16996 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16997 tree v8qi_ftype_v8qi
16998 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16999 tree v4hi_ftype_v4hi
17000 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17001
17002 /* Normal vector binops. */
17003 tree v4sf_ftype_v4sf_v4sf
17004 = build_function_type_list (V4SF_type_node,
17005 V4SF_type_node, V4SF_type_node, NULL_TREE);
17006 tree v8qi_ftype_v8qi_v8qi
17007 = build_function_type_list (V8QI_type_node,
17008 V8QI_type_node, V8QI_type_node, NULL_TREE);
17009 tree v4hi_ftype_v4hi_v4hi
17010 = build_function_type_list (V4HI_type_node,
17011 V4HI_type_node, V4HI_type_node, NULL_TREE);
17012 tree v2si_ftype_v2si_v2si
17013 = build_function_type_list (V2SI_type_node,
17014 V2SI_type_node, V2SI_type_node, NULL_TREE);
17015 tree di_ftype_di_di
17016 = build_function_type_list (long_long_unsigned_type_node,
17017 long_long_unsigned_type_node,
17018 long_long_unsigned_type_node, NULL_TREE);
17019
17020 tree di_ftype_di_di_int
17021 = build_function_type_list (long_long_unsigned_type_node,
17022 long_long_unsigned_type_node,
17023 long_long_unsigned_type_node,
17024 integer_type_node, NULL_TREE);
17025
17026 tree v2si_ftype_v2sf
17027 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17028 tree v2sf_ftype_v2si
17029 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17030 tree v2si_ftype_v2si
17031 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17032 tree v2sf_ftype_v2sf
17033 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17034 tree v2sf_ftype_v2sf_v2sf
17035 = build_function_type_list (V2SF_type_node,
17036 V2SF_type_node, V2SF_type_node, NULL_TREE);
17037 tree v2si_ftype_v2sf_v2sf
17038 = build_function_type_list (V2SI_type_node,
17039 V2SF_type_node, V2SF_type_node, NULL_TREE);
17040 tree pint_type_node = build_pointer_type (integer_type_node);
17041 tree pdouble_type_node = build_pointer_type (double_type_node);
17042 tree pcdouble_type_node = build_pointer_type (
17043 build_type_variant (double_type_node, 1, 0));
17044 tree int_ftype_v2df_v2df
17045 = build_function_type_list (integer_type_node,
17046 V2DF_type_node, V2DF_type_node, NULL_TREE);
17047
17048 tree void_ftype_pcvoid
17049 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17050 tree v4sf_ftype_v4si
17051 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17052 tree v4si_ftype_v4sf
17053 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17054 tree v2df_ftype_v4si
17055 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17056 tree v4si_ftype_v2df
17057 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17058 tree v2si_ftype_v2df
17059 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17060 tree v4sf_ftype_v2df
17061 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17062 tree v2df_ftype_v2si
17063 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17064 tree v2df_ftype_v4sf
17065 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17066 tree int_ftype_v2df
17067 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17068 tree int64_ftype_v2df
17069 = build_function_type_list (long_long_integer_type_node,
17070 V2DF_type_node, NULL_TREE);
17071 tree v2df_ftype_v2df_int
17072 = build_function_type_list (V2DF_type_node,
17073 V2DF_type_node, integer_type_node, NULL_TREE);
17074 tree v2df_ftype_v2df_int64
17075 = build_function_type_list (V2DF_type_node,
17076 V2DF_type_node, long_long_integer_type_node,
17077 NULL_TREE);
17078 tree v4sf_ftype_v4sf_v2df
17079 = build_function_type_list (V4SF_type_node,
17080 V4SF_type_node, V2DF_type_node, NULL_TREE);
17081 tree v2df_ftype_v2df_v4sf
17082 = build_function_type_list (V2DF_type_node,
17083 V2DF_type_node, V4SF_type_node, NULL_TREE);
17084 tree v2df_ftype_v2df_v2df_int
17085 = build_function_type_list (V2DF_type_node,
17086 V2DF_type_node, V2DF_type_node,
17087 integer_type_node,
17088 NULL_TREE);
17089 tree v2df_ftype_v2df_pcdouble
17090 = build_function_type_list (V2DF_type_node,
17091 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17092 tree void_ftype_pdouble_v2df
17093 = build_function_type_list (void_type_node,
17094 pdouble_type_node, V2DF_type_node, NULL_TREE);
17095 tree void_ftype_pint_int
17096 = build_function_type_list (void_type_node,
17097 pint_type_node, integer_type_node, NULL_TREE);
17098 tree void_ftype_v16qi_v16qi_pchar
17099 = build_function_type_list (void_type_node,
17100 V16QI_type_node, V16QI_type_node,
17101 pchar_type_node, NULL_TREE);
17102 tree v2df_ftype_pcdouble
17103 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17104 tree v2df_ftype_v2df_v2df
17105 = build_function_type_list (V2DF_type_node,
17106 V2DF_type_node, V2DF_type_node, NULL_TREE);
17107 tree v16qi_ftype_v16qi_v16qi
17108 = build_function_type_list (V16QI_type_node,
17109 V16QI_type_node, V16QI_type_node, NULL_TREE);
17110 tree v8hi_ftype_v8hi_v8hi
17111 = build_function_type_list (V8HI_type_node,
17112 V8HI_type_node, V8HI_type_node, NULL_TREE);
17113 tree v4si_ftype_v4si_v4si
17114 = build_function_type_list (V4SI_type_node,
17115 V4SI_type_node, V4SI_type_node, NULL_TREE);
17116 tree v2di_ftype_v2di_v2di
17117 = build_function_type_list (V2DI_type_node,
17118 V2DI_type_node, V2DI_type_node, NULL_TREE);
17119 tree v2di_ftype_v2df_v2df
17120 = build_function_type_list (V2DI_type_node,
17121 V2DF_type_node, V2DF_type_node, NULL_TREE);
17122 tree v2df_ftype_v2df
17123 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17124 tree v2di_ftype_v2di_int
17125 = build_function_type_list (V2DI_type_node,
17126 V2DI_type_node, integer_type_node, NULL_TREE);
17127 tree v2di_ftype_v2di_v2di_int
17128 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17129 V2DI_type_node, integer_type_node, NULL_TREE);
17130 tree v4si_ftype_v4si_int
17131 = build_function_type_list (V4SI_type_node,
17132 V4SI_type_node, integer_type_node, NULL_TREE);
17133 tree v8hi_ftype_v8hi_int
17134 = build_function_type_list (V8HI_type_node,
17135 V8HI_type_node, integer_type_node, NULL_TREE);
17136 tree v4si_ftype_v8hi_v8hi
17137 = build_function_type_list (V4SI_type_node,
17138 V8HI_type_node, V8HI_type_node, NULL_TREE);
17139 tree di_ftype_v8qi_v8qi
17140 = build_function_type_list (long_long_unsigned_type_node,
17141 V8QI_type_node, V8QI_type_node, NULL_TREE);
17142 tree di_ftype_v2si_v2si
17143 = build_function_type_list (long_long_unsigned_type_node,
17144 V2SI_type_node, V2SI_type_node, NULL_TREE);
17145 tree v2di_ftype_v16qi_v16qi
17146 = build_function_type_list (V2DI_type_node,
17147 V16QI_type_node, V16QI_type_node, NULL_TREE);
17148 tree v2di_ftype_v4si_v4si
17149 = build_function_type_list (V2DI_type_node,
17150 V4SI_type_node, V4SI_type_node, NULL_TREE);
17151 tree int_ftype_v16qi
17152 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17153 tree v16qi_ftype_pcchar
17154 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17155 tree void_ftype_pchar_v16qi
17156 = build_function_type_list (void_type_node,
17157 pchar_type_node, V16QI_type_node, NULL_TREE);
17158
17159 tree v2di_ftype_v2di_unsigned_unsigned
17160 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17161 unsigned_type_node, unsigned_type_node,
17162 NULL_TREE);
17163 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17164 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17165 unsigned_type_node, unsigned_type_node,
17166 NULL_TREE);
17167 tree v2di_ftype_v2di_v16qi
17168 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17169 NULL_TREE);
17170
17171 tree float80_type;
17172 tree float128_type;
17173 tree ftype;
17174
17175 /* The __float80 type. */
17176 if (TYPE_MODE (long_double_type_node) == XFmode)
17177 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17178 "__float80");
17179 else
17180 {
17181 /* The __float80 type. */
17182 float80_type = make_node (REAL_TYPE);
17183 TYPE_PRECISION (float80_type) = 80;
17184 layout_type (float80_type);
17185 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17186 }
17187
17188 if (TARGET_64BIT)
17189 {
17190 float128_type = make_node (REAL_TYPE);
17191 TYPE_PRECISION (float128_type) = 128;
17192 layout_type (float128_type);
17193 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17194 }
17195
17196 /* Add all builtins that are more or less simple operations on two
17197 operands. */
17198 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17199 {
17200 /* Use one of the operands; the target can have a different mode for
17201 mask-generating compares. */
17202 enum machine_mode mode;
17203 tree type;
17204
17205 if (d->name == 0)
17206 continue;
17207 mode = insn_data[d->icode].operand[1].mode;
17208
17209 switch (mode)
17210 {
17211 case V16QImode:
17212 type = v16qi_ftype_v16qi_v16qi;
17213 break;
17214 case V8HImode:
17215 type = v8hi_ftype_v8hi_v8hi;
17216 break;
17217 case V4SImode:
17218 type = v4si_ftype_v4si_v4si;
17219 break;
17220 case V2DImode:
17221 type = v2di_ftype_v2di_v2di;
17222 break;
17223 case V2DFmode:
17224 type = v2df_ftype_v2df_v2df;
17225 break;
17226 case V4SFmode:
17227 type = v4sf_ftype_v4sf_v4sf;
17228 break;
17229 case V8QImode:
17230 type = v8qi_ftype_v8qi_v8qi;
17231 break;
17232 case V4HImode:
17233 type = v4hi_ftype_v4hi_v4hi;
17234 break;
17235 case V2SImode:
17236 type = v2si_ftype_v2si_v2si;
17237 break;
17238 case DImode:
17239 type = di_ftype_di_di;
17240 break;
17241
17242 default:
17243 gcc_unreachable ();
17244 }
17245
17246 /* Override for comparisons. */
17247 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17248 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17249 type = v4si_ftype_v4sf_v4sf;
17250
17251 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17252 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17253 type = v2di_ftype_v2df_v2df;
17254
17255 def_builtin (d->mask, d->name, type, d->code);
17256 }
17257
17258 /* Add all builtins that are more or less simple operations on 1 operand. */
17259 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17260 {
17261 enum machine_mode mode;
17262 tree type;
17263
17264 if (d->name == 0)
17265 continue;
17266 mode = insn_data[d->icode].operand[1].mode;
17267
17268 switch (mode)
17269 {
17270 case V16QImode:
17271 type = v16qi_ftype_v16qi;
17272 break;
17273 case V8HImode:
17274 type = v8hi_ftype_v8hi;
17275 break;
17276 case V4SImode:
17277 type = v4si_ftype_v4si;
17278 break;
17279 case V2DFmode:
17280 type = v2df_ftype_v2df;
17281 break;
17282 case V4SFmode:
17283 type = v4sf_ftype_v4sf;
17284 break;
17285 case V8QImode:
17286 type = v8qi_ftype_v8qi;
17287 break;
17288 case V4HImode:
17289 type = v4hi_ftype_v4hi;
17290 break;
17291 case V2SImode:
17292 type = v2si_ftype_v2si;
17293 break;
17294
17295 default:
17296 abort ();
17297 }
17298
17299 def_builtin (d->mask, d->name, type, d->code);
17300 }
17301
17302 /* Add the remaining MMX insns with somewhat more complicated types. */
17303 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17304 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17305 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17306 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17307
17308 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17309 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17310 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17311
17312 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17313 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17314
17315 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17316 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17317
17318 /* comi/ucomi insns. */
17319 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17320 if (d->mask == MASK_SSE2)
17321 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17322 else
17323 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17324
17325 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17326 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17327 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17328
17329 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17330 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17331 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17332 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17333 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17334 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17335 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17336 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17337 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17338 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17339 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17340
17341 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17342
17343 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17344 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17345
17346 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17347 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17348 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17349 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17350
17351 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17352 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17353 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17354 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17355
17356 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17357
17358 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17359
17360 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17361 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17362 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17363 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17364 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17365 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17366
17367 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17368
17369 /* Original 3DNow! */
17370 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17371 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17372 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17373 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17374 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17375 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17376 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17377 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17378 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17379 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17380 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17381 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17382 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17383 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17384 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17385 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17386 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17387 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17388 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17389 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17390
17391 /* 3DNow! extension as used in the Athlon CPU. */
17392 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17393 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17394 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17395 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17396 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17397 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17398
17399 /* SSE2 */
17400 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17401
17402 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17403 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17404
17405 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17406 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17407
17408 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17409 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17410 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17411 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17412 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17413
17414 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17415 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17416 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17417 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17418
17419 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17420 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17421
17422 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17423
17424 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17425 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17426
17427 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17428 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17429 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17430 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17431 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17432
17433 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17434
17435 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17436 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17437 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17438 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17439
17440 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17441 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17442 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17443
17444 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17445 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17446 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17447 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17448
17449 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17450 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17451 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17452
17453 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17454 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17455
17456 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17457 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17458
17459 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17460 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17461 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17462 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17463 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17464 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17465 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17466
17467 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17468 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17469 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17470 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17471 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17472 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17473 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17474
17475 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17476 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17477 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17478 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17479
17480 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17481
17482 /* Prescott New Instructions. */
17483 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17484 void_ftype_pcvoid_unsigned_unsigned,
17485 IX86_BUILTIN_MONITOR);
17486 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17487 void_ftype_unsigned_unsigned,
17488 IX86_BUILTIN_MWAIT);
17489 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17490 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17491
17492 /* SSSE3. */
17493 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17494 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17495 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17496 IX86_BUILTIN_PALIGNR);
17497
17498 /* AMDFAM10 SSE4A New built-ins */
17499 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17500 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17501 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17502 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17503 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17504 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17505 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17506 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17507 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17508 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17509 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17510 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17511
17512 /* Access to the vec_init patterns. */
17513 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17514 integer_type_node, NULL_TREE);
17515 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17516 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17517
17518 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17519 short_integer_type_node,
17520 short_integer_type_node,
17521 short_integer_type_node, NULL_TREE);
17522 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17523 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17524
17525 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17526 char_type_node, char_type_node,
17527 char_type_node, char_type_node,
17528 char_type_node, char_type_node,
17529 char_type_node, NULL_TREE);
17530 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17531 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17532
17533 /* Access to the vec_extract patterns. */
17534 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17535 integer_type_node, NULL_TREE);
17536 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17537 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17538
17539 ftype = build_function_type_list (long_long_integer_type_node,
17540 V2DI_type_node, integer_type_node,
17541 NULL_TREE);
17542 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17543 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17544
17545 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17546 integer_type_node, NULL_TREE);
17547 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17548 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17549
17550 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17551 integer_type_node, NULL_TREE);
17552 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17553 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17554
17555 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17556 integer_type_node, NULL_TREE);
17557 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17558 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17559
17560 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17561 integer_type_node, NULL_TREE);
17562 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17563 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17564
17565 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17566 integer_type_node, NULL_TREE);
17567 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17568 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17569
17570 /* Access to the vec_set patterns. */
17571 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17572 intHI_type_node,
17573 integer_type_node, NULL_TREE);
17574 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17575 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17576
17577 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17578 intHI_type_node,
17579 integer_type_node, NULL_TREE);
17580 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17581 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17582 }
17583
17584 static void
17585 ix86_init_builtins (void)
17586 {
17587 if (TARGET_MMX)
17588 ix86_init_mmx_sse_builtins ();
17589 }
17590
17591 /* Errors in the source file can cause expand_expr to return const0_rtx
17592 where we expect a vector. To avoid crashing, use one of the vector
17593 clear instructions. */
17594 static rtx
17595 safe_vector_operand (rtx x, enum machine_mode mode)
17596 {
17597 if (x == const0_rtx)
17598 x = CONST0_RTX (mode);
17599 return x;
17600 }
17601
17602 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17603
17604 static rtx
17605 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17606 {
17607 rtx pat, xops[3];
17608 tree arg0 = CALL_EXPR_ARG (exp, 0);
17609 tree arg1 = CALL_EXPR_ARG (exp, 1);
17610 rtx op0 = expand_normal (arg0);
17611 rtx op1 = expand_normal (arg1);
17612 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17613 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17614 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17615
17616 if (VECTOR_MODE_P (mode0))
17617 op0 = safe_vector_operand (op0, mode0);
17618 if (VECTOR_MODE_P (mode1))
17619 op1 = safe_vector_operand (op1, mode1);
17620
17621 if (optimize || !target
17622 || GET_MODE (target) != tmode
17623 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17624 target = gen_reg_rtx (tmode);
17625
17626 if (GET_MODE (op1) == SImode && mode1 == TImode)
17627 {
17628 rtx x = gen_reg_rtx (V4SImode);
17629 emit_insn (gen_sse2_loadd (x, op1));
17630 op1 = gen_lowpart (TImode, x);
17631 }
17632
17633 /* The insn must want input operands in the same modes as the
17634 result. */
17635 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17636 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17637
17638 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17639 op0 = copy_to_mode_reg (mode0, op0);
17640 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17641 op1 = copy_to_mode_reg (mode1, op1);
17642
17643 /* ??? Using ix86_fixup_binary_operands is problematic when
17644 we've got mismatched modes. Fake it. */
17645
17646 xops[0] = target;
17647 xops[1] = op0;
17648 xops[2] = op1;
17649
17650 if (tmode == mode0 && tmode == mode1)
17651 {
17652 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17653 op0 = xops[1];
17654 op1 = xops[2];
17655 }
17656 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17657 {
17658 op0 = force_reg (mode0, op0);
17659 op1 = force_reg (mode1, op1);
17660 target = gen_reg_rtx (tmode);
17661 }
17662
17663 pat = GEN_FCN (icode) (target, op0, op1);
17664 if (! pat)
17665 return 0;
17666 emit_insn (pat);
17667 return target;
17668 }
17669
17670 /* Subroutine of ix86_expand_builtin to take care of stores. */
17671
17672 static rtx
17673 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17674 {
17675 rtx pat;
17676 tree arg0 = CALL_EXPR_ARG (exp, 0);
17677 tree arg1 = CALL_EXPR_ARG (exp, 1);
17678 rtx op0 = expand_normal (arg0);
17679 rtx op1 = expand_normal (arg1);
17680 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17681 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17682
17683 if (VECTOR_MODE_P (mode1))
17684 op1 = safe_vector_operand (op1, mode1);
17685
17686 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17687 op1 = copy_to_mode_reg (mode1, op1);
17688
17689 pat = GEN_FCN (icode) (op0, op1);
17690 if (pat)
17691 emit_insn (pat);
17692 return 0;
17693 }
17694
17695 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17696
17697 static rtx
17698 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17699 rtx target, int do_load)
17700 {
17701 rtx pat;
17702 tree arg0 = CALL_EXPR_ARG (exp, 0);
17703 rtx op0 = expand_normal (arg0);
17704 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17705 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17706
17707 if (optimize || !target
17708 || GET_MODE (target) != tmode
17709 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17710 target = gen_reg_rtx (tmode);
17711 if (do_load)
17712 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17713 else
17714 {
17715 if (VECTOR_MODE_P (mode0))
17716 op0 = safe_vector_operand (op0, mode0);
17717
17718 if ((optimize && !register_operand (op0, mode0))
17719 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17720 op0 = copy_to_mode_reg (mode0, op0);
17721 }
17722
17723 pat = GEN_FCN (icode) (target, op0);
17724 if (! pat)
17725 return 0;
17726 emit_insn (pat);
17727 return target;
17728 }
17729
17730 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17731 sqrtss, rsqrtss, rcpss. */
17732
17733 static rtx
17734 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17735 {
17736 rtx pat;
17737 tree arg0 = CALL_EXPR_ARG (exp, 0);
17738 rtx op1, op0 = expand_normal (arg0);
17739 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17740 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17741
17742 if (optimize || !target
17743 || GET_MODE (target) != tmode
17744 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17745 target = gen_reg_rtx (tmode);
17746
17747 if (VECTOR_MODE_P (mode0))
17748 op0 = safe_vector_operand (op0, mode0);
17749
17750 if ((optimize && !register_operand (op0, mode0))
17751 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17752 op0 = copy_to_mode_reg (mode0, op0);
17753
17754 op1 = op0;
17755 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17756 op1 = copy_to_mode_reg (mode0, op1);
17757
17758 pat = GEN_FCN (icode) (target, op0, op1);
17759 if (! pat)
17760 return 0;
17761 emit_insn (pat);
17762 return target;
17763 }
17764
17765 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17766
17767 static rtx
17768 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17769 rtx target)
17770 {
17771 rtx pat;
17772 tree arg0 = CALL_EXPR_ARG (exp, 0);
17773 tree arg1 = CALL_EXPR_ARG (exp, 1);
17774 rtx op0 = expand_normal (arg0);
17775 rtx op1 = expand_normal (arg1);
17776 rtx op2;
17777 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17778 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17779 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17780 enum rtx_code comparison = d->comparison;
17781
17782 if (VECTOR_MODE_P (mode0))
17783 op0 = safe_vector_operand (op0, mode0);
17784 if (VECTOR_MODE_P (mode1))
17785 op1 = safe_vector_operand (op1, mode1);
17786
17787 /* Swap operands if we have a comparison that isn't available in
17788 hardware. */
17789 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17790 {
17791 rtx tmp = gen_reg_rtx (mode1);
17792 emit_move_insn (tmp, op1);
17793 op1 = op0;
17794 op0 = tmp;
17795 }
17796
17797 if (optimize || !target
17798 || GET_MODE (target) != tmode
17799 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17800 target = gen_reg_rtx (tmode);
17801
17802 if ((optimize && !register_operand (op0, mode0))
17803 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17804 op0 = copy_to_mode_reg (mode0, op0);
17805 if ((optimize && !register_operand (op1, mode1))
17806 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17807 op1 = copy_to_mode_reg (mode1, op1);
17808
17809 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17810 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17811 if (! pat)
17812 return 0;
17813 emit_insn (pat);
17814 return target;
17815 }
17816
17817 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17818
17819 static rtx
17820 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17821 rtx target)
17822 {
17823 rtx pat;
17824 tree arg0 = CALL_EXPR_ARG (exp, 0);
17825 tree arg1 = CALL_EXPR_ARG (exp, 1);
17826 rtx op0 = expand_normal (arg0);
17827 rtx op1 = expand_normal (arg1);
17828 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17829 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17830 enum rtx_code comparison = d->comparison;
17831
17832 if (VECTOR_MODE_P (mode0))
17833 op0 = safe_vector_operand (op0, mode0);
17834 if (VECTOR_MODE_P (mode1))
17835 op1 = safe_vector_operand (op1, mode1);
17836
17837 /* Swap operands if we have a comparison that isn't available in
17838 hardware. */
17839 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17840 {
17841 rtx tmp = op1;
17842 op1 = op0;
17843 op0 = tmp;
17844 }
17845
17846 target = gen_reg_rtx (SImode);
17847 emit_move_insn (target, const0_rtx);
17848 target = gen_rtx_SUBREG (QImode, target, 0);
17849
17850 if ((optimize && !register_operand (op0, mode0))
17851 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17852 op0 = copy_to_mode_reg (mode0, op0);
17853 if ((optimize && !register_operand (op1, mode1))
17854 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17855 op1 = copy_to_mode_reg (mode1, op1);
17856
17857 pat = GEN_FCN (d->icode) (op0, op1);
17858 if (! pat)
17859 return 0;
17860 emit_insn (pat);
17861 emit_insn (gen_rtx_SET (VOIDmode,
17862 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17863 gen_rtx_fmt_ee (comparison, QImode,
17864 SET_DEST (pat),
17865 const0_rtx)));
17866
17867 return SUBREG_REG (target);
17868 }
17869
17870 /* Return the integer constant in ARG. Constrain it to be in the range
17871 of the subparts of VEC_TYPE; issue an error if not. */
17872
17873 static int
17874 get_element_number (tree vec_type, tree arg)
17875 {
17876 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17877
17878 if (!host_integerp (arg, 1)
17879 || (elt = tree_low_cst (arg, 1), elt > max))
17880 {
17881 error ("selector must be an integer constant in the range 0..%wi", max);
17882 return 0;
17883 }
17884
17885 return elt;
17886 }
17887
17888 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17889 ix86_expand_vector_init. We DO have language-level syntax for this, in
17890 the form of (type){ init-list }. Except that since we can't place emms
17891 instructions from inside the compiler, we can't allow the use of MMX
17892 registers unless the user explicitly asks for it. So we do *not* define
17893 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17894 we have builtins invoked by mmintrin.h that gives us license to emit
17895 these sorts of instructions. */
17896
17897 static rtx
17898 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17899 {
17900 enum machine_mode tmode = TYPE_MODE (type);
17901 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17902 int i, n_elt = GET_MODE_NUNITS (tmode);
17903 rtvec v = rtvec_alloc (n_elt);
17904
17905 gcc_assert (VECTOR_MODE_P (tmode));
17906 gcc_assert (call_expr_nargs (exp) == n_elt);
17907
17908 for (i = 0; i < n_elt; ++i)
17909 {
17910 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17911 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17912 }
17913
17914 if (!target || !register_operand (target, tmode))
17915 target = gen_reg_rtx (tmode);
17916
17917 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17918 return target;
17919 }
17920
17921 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17922 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17923 had a language-level syntax for referencing vector elements. */
17924
17925 static rtx
17926 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17927 {
17928 enum machine_mode tmode, mode0;
17929 tree arg0, arg1;
17930 int elt;
17931 rtx op0;
17932
17933 arg0 = CALL_EXPR_ARG (exp, 0);
17934 arg1 = CALL_EXPR_ARG (exp, 1);
17935
17936 op0 = expand_normal (arg0);
17937 elt = get_element_number (TREE_TYPE (arg0), arg1);
17938
17939 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17940 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17941 gcc_assert (VECTOR_MODE_P (mode0));
17942
17943 op0 = force_reg (mode0, op0);
17944
17945 if (optimize || !target || !register_operand (target, tmode))
17946 target = gen_reg_rtx (tmode);
17947
17948 ix86_expand_vector_extract (true, target, op0, elt);
17949
17950 return target;
17951 }
17952
17953 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17954 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17955 a language-level syntax for referencing vector elements. */
17956
17957 static rtx
17958 ix86_expand_vec_set_builtin (tree exp)
17959 {
17960 enum machine_mode tmode, mode1;
17961 tree arg0, arg1, arg2;
17962 int elt;
17963 rtx op0, op1, target;
17964
17965 arg0 = CALL_EXPR_ARG (exp, 0);
17966 arg1 = CALL_EXPR_ARG (exp, 1);
17967 arg2 = CALL_EXPR_ARG (exp, 2);
17968
17969 tmode = TYPE_MODE (TREE_TYPE (arg0));
17970 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17971 gcc_assert (VECTOR_MODE_P (tmode));
17972
17973 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17974 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17975 elt = get_element_number (TREE_TYPE (arg0), arg2);
17976
17977 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17978 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17979
17980 op0 = force_reg (tmode, op0);
17981 op1 = force_reg (mode1, op1);
17982
17983 /* OP0 is the source of these builtin functions and shouldn't be
17984 modified. Create a copy, use it and return it as target. */
17985 target = gen_reg_rtx (tmode);
17986 emit_move_insn (target, op0);
17987 ix86_expand_vector_set (true, target, op1, elt);
17988
17989 return target;
17990 }
17991
17992 /* Expand an expression EXP that calls a built-in function,
17993 with result going to TARGET if that's convenient
17994 (and in mode MODE if that's convenient).
17995 SUBTARGET may be used as the target for computing one of EXP's operands.
17996 IGNORE is nonzero if the value is to be ignored. */
17997
17998 static rtx
17999 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
18000 enum machine_mode mode ATTRIBUTE_UNUSED,
18001 int ignore ATTRIBUTE_UNUSED)
18002 {
18003 const struct builtin_description *d;
18004 size_t i;
18005 enum insn_code icode;
18006 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
18007 tree arg0, arg1, arg2, arg3;
18008 rtx op0, op1, op2, op3, pat;
18009 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
18010 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
18011
18012 switch (fcode)
18013 {
18014 case IX86_BUILTIN_EMMS:
18015 emit_insn (gen_mmx_emms ());
18016 return 0;
18017
18018 case IX86_BUILTIN_SFENCE:
18019 emit_insn (gen_sse_sfence ());
18020 return 0;
18021
18022 case IX86_BUILTIN_MASKMOVQ:
18023 case IX86_BUILTIN_MASKMOVDQU:
18024 icode = (fcode == IX86_BUILTIN_MASKMOVQ
18025 ? CODE_FOR_mmx_maskmovq
18026 : CODE_FOR_sse2_maskmovdqu);
18027 /* Note the arg order is different from the operand order. */
18028 arg1 = CALL_EXPR_ARG (exp, 0);
18029 arg2 = CALL_EXPR_ARG (exp, 1);
18030 arg0 = CALL_EXPR_ARG (exp, 2);
18031 op0 = expand_normal (arg0);
18032 op1 = expand_normal (arg1);
18033 op2 = expand_normal (arg2);
18034 mode0 = insn_data[icode].operand[0].mode;
18035 mode1 = insn_data[icode].operand[1].mode;
18036 mode2 = insn_data[icode].operand[2].mode;
18037
18038 op0 = force_reg (Pmode, op0);
18039 op0 = gen_rtx_MEM (mode1, op0);
18040
18041 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
18042 op0 = copy_to_mode_reg (mode0, op0);
18043 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
18044 op1 = copy_to_mode_reg (mode1, op1);
18045 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
18046 op2 = copy_to_mode_reg (mode2, op2);
18047 pat = GEN_FCN (icode) (op0, op1, op2);
18048 if (! pat)
18049 return 0;
18050 emit_insn (pat);
18051 return 0;
18052
18053 case IX86_BUILTIN_SQRTSS:
18054 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
18055 case IX86_BUILTIN_RSQRTSS:
18056 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
18057 case IX86_BUILTIN_RCPSS:
18058 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
18059
18060 case IX86_BUILTIN_LOADUPS:
18061 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18062
18063 case IX86_BUILTIN_STOREUPS:
18064 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18065
18066 case IX86_BUILTIN_LOADHPS:
18067 case IX86_BUILTIN_LOADLPS:
18068 case IX86_BUILTIN_LOADHPD:
18069 case IX86_BUILTIN_LOADLPD:
18070 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18071 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18072 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18073 : CODE_FOR_sse2_loadlpd);
18074 arg0 = CALL_EXPR_ARG (exp, 0);
18075 arg1 = CALL_EXPR_ARG (exp, 1);
18076 op0 = expand_normal (arg0);
18077 op1 = expand_normal (arg1);
18078 tmode = insn_data[icode].operand[0].mode;
18079 mode0 = insn_data[icode].operand[1].mode;
18080 mode1 = insn_data[icode].operand[2].mode;
18081
18082 op0 = force_reg (mode0, op0);
18083 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18084 if (optimize || target == 0
18085 || GET_MODE (target) != tmode
18086 || !register_operand (target, tmode))
18087 target = gen_reg_rtx (tmode);
18088 pat = GEN_FCN (icode) (target, op0, op1);
18089 if (! pat)
18090 return 0;
18091 emit_insn (pat);
18092 return target;
18093
18094 case IX86_BUILTIN_STOREHPS:
18095 case IX86_BUILTIN_STORELPS:
18096 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18097 : CODE_FOR_sse_storelps);
18098 arg0 = CALL_EXPR_ARG (exp, 0);
18099 arg1 = CALL_EXPR_ARG (exp, 1);
18100 op0 = expand_normal (arg0);
18101 op1 = expand_normal (arg1);
18102 mode0 = insn_data[icode].operand[0].mode;
18103 mode1 = insn_data[icode].operand[1].mode;
18104
18105 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18106 op1 = force_reg (mode1, op1);
18107
18108 pat = GEN_FCN (icode) (op0, op1);
18109 if (! pat)
18110 return 0;
18111 emit_insn (pat);
18112 return const0_rtx;
18113
18114 case IX86_BUILTIN_MOVNTPS:
18115 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18116 case IX86_BUILTIN_MOVNTQ:
18117 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18118
18119 case IX86_BUILTIN_LDMXCSR:
18120 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18121 target = assign_386_stack_local (SImode, SLOT_TEMP);
18122 emit_move_insn (target, op0);
18123 emit_insn (gen_sse_ldmxcsr (target));
18124 return 0;
18125
18126 case IX86_BUILTIN_STMXCSR:
18127 target = assign_386_stack_local (SImode, SLOT_TEMP);
18128 emit_insn (gen_sse_stmxcsr (target));
18129 return copy_to_mode_reg (SImode, target);
18130
18131 case IX86_BUILTIN_SHUFPS:
18132 case IX86_BUILTIN_SHUFPD:
18133 icode = (fcode == IX86_BUILTIN_SHUFPS
18134 ? CODE_FOR_sse_shufps
18135 : CODE_FOR_sse2_shufpd);
18136 arg0 = CALL_EXPR_ARG (exp, 0);
18137 arg1 = CALL_EXPR_ARG (exp, 1);
18138 arg2 = CALL_EXPR_ARG (exp, 2);
18139 op0 = expand_normal (arg0);
18140 op1 = expand_normal (arg1);
18141 op2 = expand_normal (arg2);
18142 tmode = insn_data[icode].operand[0].mode;
18143 mode0 = insn_data[icode].operand[1].mode;
18144 mode1 = insn_data[icode].operand[2].mode;
18145 mode2 = insn_data[icode].operand[3].mode;
18146
18147 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18148 op0 = copy_to_mode_reg (mode0, op0);
18149 if ((optimize && !register_operand (op1, mode1))
18150 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18151 op1 = copy_to_mode_reg (mode1, op1);
18152 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18153 {
18154 /* @@@ better error message */
18155 error ("mask must be an immediate");
18156 return gen_reg_rtx (tmode);
18157 }
18158 if (optimize || target == 0
18159 || GET_MODE (target) != tmode
18160 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18161 target = gen_reg_rtx (tmode);
18162 pat = GEN_FCN (icode) (target, op0, op1, op2);
18163 if (! pat)
18164 return 0;
18165 emit_insn (pat);
18166 return target;
18167
18168 case IX86_BUILTIN_PSHUFW:
18169 case IX86_BUILTIN_PSHUFD:
18170 case IX86_BUILTIN_PSHUFHW:
18171 case IX86_BUILTIN_PSHUFLW:
18172 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18173 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18174 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18175 : CODE_FOR_mmx_pshufw);
18176 arg0 = CALL_EXPR_ARG (exp, 0);
18177 arg1 = CALL_EXPR_ARG (exp, 1);
18178 op0 = expand_normal (arg0);
18179 op1 = expand_normal (arg1);
18180 tmode = insn_data[icode].operand[0].mode;
18181 mode1 = insn_data[icode].operand[1].mode;
18182 mode2 = insn_data[icode].operand[2].mode;
18183
18184 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18185 op0 = copy_to_mode_reg (mode1, op0);
18186 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18187 {
18188 /* @@@ better error message */
18189 error ("mask must be an immediate");
18190 return const0_rtx;
18191 }
18192 if (target == 0
18193 || GET_MODE (target) != tmode
18194 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18195 target = gen_reg_rtx (tmode);
18196 pat = GEN_FCN (icode) (target, op0, op1);
18197 if (! pat)
18198 return 0;
18199 emit_insn (pat);
18200 return target;
18201
18202 case IX86_BUILTIN_PSLLWI128:
18203 icode = CODE_FOR_ashlv8hi3;
18204 goto do_pshifti;
18205 case IX86_BUILTIN_PSLLDI128:
18206 icode = CODE_FOR_ashlv4si3;
18207 goto do_pshifti;
18208 case IX86_BUILTIN_PSLLQI128:
18209 icode = CODE_FOR_ashlv2di3;
18210 goto do_pshifti;
18211 case IX86_BUILTIN_PSRAWI128:
18212 icode = CODE_FOR_ashrv8hi3;
18213 goto do_pshifti;
18214 case IX86_BUILTIN_PSRADI128:
18215 icode = CODE_FOR_ashrv4si3;
18216 goto do_pshifti;
18217 case IX86_BUILTIN_PSRLWI128:
18218 icode = CODE_FOR_lshrv8hi3;
18219 goto do_pshifti;
18220 case IX86_BUILTIN_PSRLDI128:
18221 icode = CODE_FOR_lshrv4si3;
18222 goto do_pshifti;
18223 case IX86_BUILTIN_PSRLQI128:
18224 icode = CODE_FOR_lshrv2di3;
18225 goto do_pshifti;
18226 do_pshifti:
18227 arg0 = CALL_EXPR_ARG (exp, 0);
18228 arg1 = CALL_EXPR_ARG (exp, 1);
18229 op0 = expand_normal (arg0);
18230 op1 = expand_normal (arg1);
18231
18232 if (!CONST_INT_P (op1))
18233 {
18234 error ("shift must be an immediate");
18235 return const0_rtx;
18236 }
18237 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18238 op1 = GEN_INT (255);
18239
18240 tmode = insn_data[icode].operand[0].mode;
18241 mode1 = insn_data[icode].operand[1].mode;
18242 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18243 op0 = copy_to_reg (op0);
18244
18245 target = gen_reg_rtx (tmode);
18246 pat = GEN_FCN (icode) (target, op0, op1);
18247 if (!pat)
18248 return 0;
18249 emit_insn (pat);
18250 return target;
18251
18252 case IX86_BUILTIN_PSLLW128:
18253 icode = CODE_FOR_ashlv8hi3;
18254 goto do_pshift;
18255 case IX86_BUILTIN_PSLLD128:
18256 icode = CODE_FOR_ashlv4si3;
18257 goto do_pshift;
18258 case IX86_BUILTIN_PSLLQ128:
18259 icode = CODE_FOR_ashlv2di3;
18260 goto do_pshift;
18261 case IX86_BUILTIN_PSRAW128:
18262 icode = CODE_FOR_ashrv8hi3;
18263 goto do_pshift;
18264 case IX86_BUILTIN_PSRAD128:
18265 icode = CODE_FOR_ashrv4si3;
18266 goto do_pshift;
18267 case IX86_BUILTIN_PSRLW128:
18268 icode = CODE_FOR_lshrv8hi3;
18269 goto do_pshift;
18270 case IX86_BUILTIN_PSRLD128:
18271 icode = CODE_FOR_lshrv4si3;
18272 goto do_pshift;
18273 case IX86_BUILTIN_PSRLQ128:
18274 icode = CODE_FOR_lshrv2di3;
18275 goto do_pshift;
18276 do_pshift:
18277 arg0 = CALL_EXPR_ARG (exp, 0);
18278 arg1 = CALL_EXPR_ARG (exp, 1);
18279 op0 = expand_normal (arg0);
18280 op1 = expand_normal (arg1);
18281
18282 tmode = insn_data[icode].operand[0].mode;
18283 mode1 = insn_data[icode].operand[1].mode;
18284
18285 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18286 op0 = copy_to_reg (op0);
18287
18288 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18289 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18290 op1 = copy_to_reg (op1);
18291
18292 target = gen_reg_rtx (tmode);
18293 pat = GEN_FCN (icode) (target, op0, op1);
18294 if (!pat)
18295 return 0;
18296 emit_insn (pat);
18297 return target;
18298
18299 case IX86_BUILTIN_PSLLDQI128:
18300 case IX86_BUILTIN_PSRLDQI128:
18301 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18302 : CODE_FOR_sse2_lshrti3);
18303 arg0 = CALL_EXPR_ARG (exp, 0);
18304 arg1 = CALL_EXPR_ARG (exp, 1);
18305 op0 = expand_normal (arg0);
18306 op1 = expand_normal (arg1);
18307 tmode = insn_data[icode].operand[0].mode;
18308 mode1 = insn_data[icode].operand[1].mode;
18309 mode2 = insn_data[icode].operand[2].mode;
18310
18311 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18312 {
18313 op0 = copy_to_reg (op0);
18314 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18315 }
18316 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18317 {
18318 error ("shift must be an immediate");
18319 return const0_rtx;
18320 }
18321 target = gen_reg_rtx (V2DImode);
18322 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18323 op0, op1);
18324 if (! pat)
18325 return 0;
18326 emit_insn (pat);
18327 return target;
18328
18329 case IX86_BUILTIN_FEMMS:
18330 emit_insn (gen_mmx_femms ());
18331 return NULL_RTX;
18332
18333 case IX86_BUILTIN_PAVGUSB:
18334 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18335
18336 case IX86_BUILTIN_PF2ID:
18337 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18338
18339 case IX86_BUILTIN_PFACC:
18340 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18341
18342 case IX86_BUILTIN_PFADD:
18343 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18344
18345 case IX86_BUILTIN_PFCMPEQ:
18346 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18347
18348 case IX86_BUILTIN_PFCMPGE:
18349 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18350
18351 case IX86_BUILTIN_PFCMPGT:
18352 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18353
18354 case IX86_BUILTIN_PFMAX:
18355 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18356
18357 case IX86_BUILTIN_PFMIN:
18358 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18359
18360 case IX86_BUILTIN_PFMUL:
18361 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18362
18363 case IX86_BUILTIN_PFRCP:
18364 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18365
18366 case IX86_BUILTIN_PFRCPIT1:
18367 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18368
18369 case IX86_BUILTIN_PFRCPIT2:
18370 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18371
18372 case IX86_BUILTIN_PFRSQIT1:
18373 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18374
18375 case IX86_BUILTIN_PFRSQRT:
18376 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18377
18378 case IX86_BUILTIN_PFSUB:
18379 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18380
18381 case IX86_BUILTIN_PFSUBR:
18382 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18383
18384 case IX86_BUILTIN_PI2FD:
18385 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18386
18387 case IX86_BUILTIN_PMULHRW:
18388 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18389
18390 case IX86_BUILTIN_PF2IW:
18391 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18392
18393 case IX86_BUILTIN_PFNACC:
18394 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18395
18396 case IX86_BUILTIN_PFPNACC:
18397 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18398
18399 case IX86_BUILTIN_PI2FW:
18400 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18401
18402 case IX86_BUILTIN_PSWAPDSI:
18403 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18404
18405 case IX86_BUILTIN_PSWAPDSF:
18406 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18407
18408 case IX86_BUILTIN_SQRTSD:
18409 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18410 case IX86_BUILTIN_LOADUPD:
18411 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18412 case IX86_BUILTIN_STOREUPD:
18413 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18414
18415 case IX86_BUILTIN_MFENCE:
18416 emit_insn (gen_sse2_mfence ());
18417 return 0;
18418 case IX86_BUILTIN_LFENCE:
18419 emit_insn (gen_sse2_lfence ());
18420 return 0;
18421
18422 case IX86_BUILTIN_CLFLUSH:
18423 arg0 = CALL_EXPR_ARG (exp, 0);
18424 op0 = expand_normal (arg0);
18425 icode = CODE_FOR_sse2_clflush;
18426 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18427 op0 = copy_to_mode_reg (Pmode, op0);
18428
18429 emit_insn (gen_sse2_clflush (op0));
18430 return 0;
18431
18432 case IX86_BUILTIN_MOVNTPD:
18433 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18434 case IX86_BUILTIN_MOVNTDQ:
18435 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18436 case IX86_BUILTIN_MOVNTI:
18437 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18438
18439 case IX86_BUILTIN_LOADDQU:
18440 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18441 case IX86_BUILTIN_STOREDQU:
18442 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18443
18444 case IX86_BUILTIN_MONITOR:
18445 arg0 = CALL_EXPR_ARG (exp, 0);
18446 arg1 = CALL_EXPR_ARG (exp, 1);
18447 arg2 = CALL_EXPR_ARG (exp, 2);
18448 op0 = expand_normal (arg0);
18449 op1 = expand_normal (arg1);
18450 op2 = expand_normal (arg2);
18451 if (!REG_P (op0))
18452 op0 = copy_to_mode_reg (Pmode, op0);
18453 if (!REG_P (op1))
18454 op1 = copy_to_mode_reg (SImode, op1);
18455 if (!REG_P (op2))
18456 op2 = copy_to_mode_reg (SImode, op2);
18457 if (!TARGET_64BIT)
18458 emit_insn (gen_sse3_monitor (op0, op1, op2));
18459 else
18460 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18461 return 0;
18462
18463 case IX86_BUILTIN_MWAIT:
18464 arg0 = CALL_EXPR_ARG (exp, 0);
18465 arg1 = CALL_EXPR_ARG (exp, 1);
18466 op0 = expand_normal (arg0);
18467 op1 = expand_normal (arg1);
18468 if (!REG_P (op0))
18469 op0 = copy_to_mode_reg (SImode, op0);
18470 if (!REG_P (op1))
18471 op1 = copy_to_mode_reg (SImode, op1);
18472 emit_insn (gen_sse3_mwait (op0, op1));
18473 return 0;
18474
18475 case IX86_BUILTIN_LDDQU:
18476 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18477 target, 1);
18478
18479 case IX86_BUILTIN_PALIGNR:
18480 case IX86_BUILTIN_PALIGNR128:
18481 if (fcode == IX86_BUILTIN_PALIGNR)
18482 {
18483 icode = CODE_FOR_ssse3_palignrdi;
18484 mode = DImode;
18485 }
18486 else
18487 {
18488 icode = CODE_FOR_ssse3_palignrti;
18489 mode = V2DImode;
18490 }
18491 arg0 = CALL_EXPR_ARG (exp, 0);
18492 arg1 = CALL_EXPR_ARG (exp, 1);
18493 arg2 = CALL_EXPR_ARG (exp, 2);
18494 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18495 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18496 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18497 tmode = insn_data[icode].operand[0].mode;
18498 mode1 = insn_data[icode].operand[1].mode;
18499 mode2 = insn_data[icode].operand[2].mode;
18500 mode3 = insn_data[icode].operand[3].mode;
18501
18502 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18503 {
18504 op0 = copy_to_reg (op0);
18505 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18506 }
18507 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18508 {
18509 op1 = copy_to_reg (op1);
18510 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18511 }
18512 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18513 {
18514 error ("shift must be an immediate");
18515 return const0_rtx;
18516 }
18517 target = gen_reg_rtx (mode);
18518 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18519 op0, op1, op2);
18520 if (! pat)
18521 return 0;
18522 emit_insn (pat);
18523 return target;
18524
18525 case IX86_BUILTIN_MOVNTSD:
18526 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18527
18528 case IX86_BUILTIN_MOVNTSS:
18529 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18530
18531 case IX86_BUILTIN_INSERTQ:
18532 case IX86_BUILTIN_EXTRQ:
18533 icode = (fcode == IX86_BUILTIN_EXTRQ
18534 ? CODE_FOR_sse4a_extrq
18535 : CODE_FOR_sse4a_insertq);
18536 arg0 = CALL_EXPR_ARG (exp, 0);
18537 arg1 = CALL_EXPR_ARG (exp, 1);
18538 op0 = expand_normal (arg0);
18539 op1 = expand_normal (arg1);
18540 tmode = insn_data[icode].operand[0].mode;
18541 mode1 = insn_data[icode].operand[1].mode;
18542 mode2 = insn_data[icode].operand[2].mode;
18543 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18544 op0 = copy_to_mode_reg (mode1, op0);
18545 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18546 op1 = copy_to_mode_reg (mode2, op1);
18547 if (optimize || target == 0
18548 || GET_MODE (target) != tmode
18549 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18550 target = gen_reg_rtx (tmode);
18551 pat = GEN_FCN (icode) (target, op0, op1);
18552 if (! pat)
18553 return NULL_RTX;
18554 emit_insn (pat);
18555 return target;
18556
18557 case IX86_BUILTIN_EXTRQI:
18558 icode = CODE_FOR_sse4a_extrqi;
18559 arg0 = CALL_EXPR_ARG (exp, 0);
18560 arg1 = CALL_EXPR_ARG (exp, 1);
18561 arg2 = CALL_EXPR_ARG (exp, 2);
18562 op0 = expand_normal (arg0);
18563 op1 = expand_normal (arg1);
18564 op2 = expand_normal (arg2);
18565 tmode = insn_data[icode].operand[0].mode;
18566 mode1 = insn_data[icode].operand[1].mode;
18567 mode2 = insn_data[icode].operand[2].mode;
18568 mode3 = insn_data[icode].operand[3].mode;
18569 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18570 op0 = copy_to_mode_reg (mode1, op0);
18571 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18572 {
18573 error ("index mask must be an immediate");
18574 return gen_reg_rtx (tmode);
18575 }
18576 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18577 {
18578 error ("length mask must be an immediate");
18579 return gen_reg_rtx (tmode);
18580 }
18581 if (optimize || target == 0
18582 || GET_MODE (target) != tmode
18583 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18584 target = gen_reg_rtx (tmode);
18585 pat = GEN_FCN (icode) (target, op0, op1, op2);
18586 if (! pat)
18587 return NULL_RTX;
18588 emit_insn (pat);
18589 return target;
18590
18591 case IX86_BUILTIN_INSERTQI:
18592 icode = CODE_FOR_sse4a_insertqi;
18593 arg0 = CALL_EXPR_ARG (exp, 0);
18594 arg1 = CALL_EXPR_ARG (exp, 1);
18595 arg2 = CALL_EXPR_ARG (exp, 2);
18596 arg3 = CALL_EXPR_ARG (exp, 3);
18597 op0 = expand_normal (arg0);
18598 op1 = expand_normal (arg1);
18599 op2 = expand_normal (arg2);
18600 op3 = expand_normal (arg3);
18601 tmode = insn_data[icode].operand[0].mode;
18602 mode1 = insn_data[icode].operand[1].mode;
18603 mode2 = insn_data[icode].operand[2].mode;
18604 mode3 = insn_data[icode].operand[3].mode;
18605 mode4 = insn_data[icode].operand[4].mode;
18606
18607 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18608 op0 = copy_to_mode_reg (mode1, op0);
18609
18610 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18611 op1 = copy_to_mode_reg (mode2, op1);
18612
18613 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18614 {
18615 error ("index mask must be an immediate");
18616 return gen_reg_rtx (tmode);
18617 }
18618 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18619 {
18620 error ("length mask must be an immediate");
18621 return gen_reg_rtx (tmode);
18622 }
18623 if (optimize || target == 0
18624 || GET_MODE (target) != tmode
18625 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18626 target = gen_reg_rtx (tmode);
18627 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18628 if (! pat)
18629 return NULL_RTX;
18630 emit_insn (pat);
18631 return target;
18632
18633 case IX86_BUILTIN_VEC_INIT_V2SI:
18634 case IX86_BUILTIN_VEC_INIT_V4HI:
18635 case IX86_BUILTIN_VEC_INIT_V8QI:
18636 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18637
18638 case IX86_BUILTIN_VEC_EXT_V2DF:
18639 case IX86_BUILTIN_VEC_EXT_V2DI:
18640 case IX86_BUILTIN_VEC_EXT_V4SF:
18641 case IX86_BUILTIN_VEC_EXT_V4SI:
18642 case IX86_BUILTIN_VEC_EXT_V8HI:
18643 case IX86_BUILTIN_VEC_EXT_V2SI:
18644 case IX86_BUILTIN_VEC_EXT_V4HI:
18645 return ix86_expand_vec_ext_builtin (exp, target);
18646
18647 case IX86_BUILTIN_VEC_SET_V8HI:
18648 case IX86_BUILTIN_VEC_SET_V4HI:
18649 return ix86_expand_vec_set_builtin (exp);
18650
18651 default:
18652 break;
18653 }
18654
18655 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18656 if (d->code == fcode)
18657 {
18658 /* Compares are treated specially. */
18659 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18660 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18661 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18662 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18663 return ix86_expand_sse_compare (d, exp, target);
18664
18665 return ix86_expand_binop_builtin (d->icode, exp, target);
18666 }
18667
18668 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18669 if (d->code == fcode)
18670 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18671
18672 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18673 if (d->code == fcode)
18674 return ix86_expand_sse_comi (d, exp, target);
18675
18676 gcc_unreachable ();
18677 }
18678
18679 /* Returns a function decl for a vectorized version of the builtin function
18680 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18681 if it is not available. */
18682
18683 static tree
18684 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18685 tree type_in)
18686 {
18687 enum machine_mode in_mode, out_mode;
18688 int in_n, out_n;
18689
18690 if (TREE_CODE (type_out) != VECTOR_TYPE
18691 || TREE_CODE (type_in) != VECTOR_TYPE)
18692 return NULL_TREE;
18693
18694 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18695 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18696 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18697 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18698
18699 switch (fn)
18700 {
18701 case BUILT_IN_SQRT:
18702 if (out_mode == DFmode && out_n == 2
18703 && in_mode == DFmode && in_n == 2)
18704 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18705 return NULL_TREE;
18706
18707 case BUILT_IN_SQRTF:
18708 if (out_mode == SFmode && out_n == 4
18709 && in_mode == SFmode && in_n == 4)
18710 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18711 return NULL_TREE;
18712
18713 case BUILT_IN_LRINTF:
18714 if (out_mode == SImode && out_n == 4
18715 && in_mode == SFmode && in_n == 4)
18716 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18717 return NULL_TREE;
18718
18719 default:
18720 ;
18721 }
18722
18723 return NULL_TREE;
18724 }
18725
18726 /* Returns a decl of a function that implements conversion of the
18727 input vector of type TYPE, or NULL_TREE if it is not available. */
18728
18729 static tree
18730 ix86_builtin_conversion (enum tree_code code, tree type)
18731 {
18732 if (TREE_CODE (type) != VECTOR_TYPE)
18733 return NULL_TREE;
18734
18735 switch (code)
18736 {
18737 case FLOAT_EXPR:
18738 switch (TYPE_MODE (type))
18739 {
18740 case V4SImode:
18741 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18742 default:
18743 return NULL_TREE;
18744 }
18745
18746 case FIX_TRUNC_EXPR:
18747 switch (TYPE_MODE (type))
18748 {
18749 case V4SFmode:
18750 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18751 default:
18752 return NULL_TREE;
18753 }
18754 default:
18755 return NULL_TREE;
18756
18757 }
18758 }
18759
18760 /* Store OPERAND to the memory after reload is completed. This means
18761 that we can't easily use assign_stack_local. */
18762 rtx
18763 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18764 {
18765 rtx result;
18766
18767 gcc_assert (reload_completed);
18768 if (TARGET_RED_ZONE)
18769 {
18770 result = gen_rtx_MEM (mode,
18771 gen_rtx_PLUS (Pmode,
18772 stack_pointer_rtx,
18773 GEN_INT (-RED_ZONE_SIZE)));
18774 emit_move_insn (result, operand);
18775 }
18776 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18777 {
18778 switch (mode)
18779 {
18780 case HImode:
18781 case SImode:
18782 operand = gen_lowpart (DImode, operand);
18783 /* FALLTHRU */
18784 case DImode:
18785 emit_insn (
18786 gen_rtx_SET (VOIDmode,
18787 gen_rtx_MEM (DImode,
18788 gen_rtx_PRE_DEC (DImode,
18789 stack_pointer_rtx)),
18790 operand));
18791 break;
18792 default:
18793 gcc_unreachable ();
18794 }
18795 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18796 }
18797 else
18798 {
18799 switch (mode)
18800 {
18801 case DImode:
18802 {
18803 rtx operands[2];
18804 split_di (&operand, 1, operands, operands + 1);
18805 emit_insn (
18806 gen_rtx_SET (VOIDmode,
18807 gen_rtx_MEM (SImode,
18808 gen_rtx_PRE_DEC (Pmode,
18809 stack_pointer_rtx)),
18810 operands[1]));
18811 emit_insn (
18812 gen_rtx_SET (VOIDmode,
18813 gen_rtx_MEM (SImode,
18814 gen_rtx_PRE_DEC (Pmode,
18815 stack_pointer_rtx)),
18816 operands[0]));
18817 }
18818 break;
18819 case HImode:
18820 /* Store HImodes as SImodes. */
18821 operand = gen_lowpart (SImode, operand);
18822 /* FALLTHRU */
18823 case SImode:
18824 emit_insn (
18825 gen_rtx_SET (VOIDmode,
18826 gen_rtx_MEM (GET_MODE (operand),
18827 gen_rtx_PRE_DEC (SImode,
18828 stack_pointer_rtx)),
18829 operand));
18830 break;
18831 default:
18832 gcc_unreachable ();
18833 }
18834 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18835 }
18836 return result;
18837 }
18838
18839 /* Free operand from the memory. */
18840 void
18841 ix86_free_from_memory (enum machine_mode mode)
18842 {
18843 if (!TARGET_RED_ZONE)
18844 {
18845 int size;
18846
18847 if (mode == DImode || TARGET_64BIT)
18848 size = 8;
18849 else
18850 size = 4;
18851 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18852 to pop or add instruction if registers are available. */
18853 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18854 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18855 GEN_INT (size))));
18856 }
18857 }
18858
18859 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18860 QImode must go into class Q_REGS.
18861 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18862 movdf to do mem-to-mem moves through integer regs. */
18863 enum reg_class
18864 ix86_preferred_reload_class (rtx x, enum reg_class class)
18865 {
18866 enum machine_mode mode = GET_MODE (x);
18867
18868 /* We're only allowed to return a subclass of CLASS. Many of the
18869 following checks fail for NO_REGS, so eliminate that early. */
18870 if (class == NO_REGS)
18871 return NO_REGS;
18872
18873 /* All classes can load zeros. */
18874 if (x == CONST0_RTX (mode))
18875 return class;
18876
18877 /* Force constants into memory if we are loading a (nonzero) constant into
18878 an MMX or SSE register. This is because there are no MMX/SSE instructions
18879 to load from a constant. */
18880 if (CONSTANT_P (x)
18881 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18882 return NO_REGS;
18883
18884 /* Prefer SSE regs only, if we can use them for math. */
18885 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18886 return SSE_CLASS_P (class) ? class : NO_REGS;
18887
18888 /* Floating-point constants need more complex checks. */
18889 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18890 {
18891 /* General regs can load everything. */
18892 if (reg_class_subset_p (class, GENERAL_REGS))
18893 return class;
18894
18895 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18896 zero above. We only want to wind up preferring 80387 registers if
18897 we plan on doing computation with them. */
18898 if (TARGET_80387
18899 && standard_80387_constant_p (x))
18900 {
18901 /* Limit class to non-sse. */
18902 if (class == FLOAT_SSE_REGS)
18903 return FLOAT_REGS;
18904 if (class == FP_TOP_SSE_REGS)
18905 return FP_TOP_REG;
18906 if (class == FP_SECOND_SSE_REGS)
18907 return FP_SECOND_REG;
18908 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18909 return class;
18910 }
18911
18912 return NO_REGS;
18913 }
18914
18915 /* Generally when we see PLUS here, it's the function invariant
18916 (plus soft-fp const_int). Which can only be computed into general
18917 regs. */
18918 if (GET_CODE (x) == PLUS)
18919 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18920
18921 /* QImode constants are easy to load, but non-constant QImode data
18922 must go into Q_REGS. */
18923 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18924 {
18925 if (reg_class_subset_p (class, Q_REGS))
18926 return class;
18927 if (reg_class_subset_p (Q_REGS, class))
18928 return Q_REGS;
18929 return NO_REGS;
18930 }
18931
18932 return class;
18933 }
18934
18935 /* Discourage putting floating-point values in SSE registers unless
18936 SSE math is being used, and likewise for the 387 registers. */
18937 enum reg_class
18938 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18939 {
18940 enum machine_mode mode = GET_MODE (x);
18941
18942 /* Restrict the output reload class to the register bank that we are doing
18943 math on. If we would like not to return a subset of CLASS, reject this
18944 alternative: if reload cannot do this, it will still use its choice. */
18945 mode = GET_MODE (x);
18946 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18947 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18948
18949 if (X87_FLOAT_MODE_P (mode))
18950 {
18951 if (class == FP_TOP_SSE_REGS)
18952 return FP_TOP_REG;
18953 else if (class == FP_SECOND_SSE_REGS)
18954 return FP_SECOND_REG;
18955 else
18956 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18957 }
18958
18959 return class;
18960 }
18961
18962 /* If we are copying between general and FP registers, we need a memory
18963 location. The same is true for SSE and MMX registers.
18964
18965 The macro can't work reliably when one of the CLASSES is class containing
18966 registers from multiple units (SSE, MMX, integer). We avoid this by never
18967 combining those units in single alternative in the machine description.
18968 Ensure that this constraint holds to avoid unexpected surprises.
18969
18970 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18971 enforce these sanity checks. */
18972
18973 int
18974 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18975 enum machine_mode mode, int strict)
18976 {
18977 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18978 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18979 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18980 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18981 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18982 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18983 {
18984 gcc_assert (!strict);
18985 return true;
18986 }
18987
18988 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18989 return true;
18990
18991 /* ??? This is a lie. We do have moves between mmx/general, and for
18992 mmx/sse2. But by saying we need secondary memory we discourage the
18993 register allocator from using the mmx registers unless needed. */
18994 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18995 return true;
18996
18997 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18998 {
18999 /* SSE1 doesn't have any direct moves from other classes. */
19000 if (!TARGET_SSE2)
19001 return true;
19002
19003 /* If the target says that inter-unit moves are more expensive
19004 than moving through memory, then don't generate them. */
19005 if (!TARGET_INTER_UNIT_MOVES)
19006 return true;
19007
19008 /* Between SSE and general, we have moves no larger than word size. */
19009 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
19010 return true;
19011 }
19012
19013 return false;
19014 }
19015
19016 /* Return true if the registers in CLASS cannot represent the change from
19017 modes FROM to TO. */
19018
19019 bool
19020 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
19021 enum reg_class class)
19022 {
19023 if (from == to)
19024 return false;
19025
19026 /* x87 registers can't do subreg at all, as all values are reformatted
19027 to extended precision. */
19028 if (MAYBE_FLOAT_CLASS_P (class))
19029 return true;
19030
19031 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
19032 {
19033 /* Vector registers do not support QI or HImode loads. If we don't
19034 disallow a change to these modes, reload will assume it's ok to
19035 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
19036 the vec_dupv4hi pattern. */
19037 if (GET_MODE_SIZE (from) < 4)
19038 return true;
19039
19040 /* Vector registers do not support subreg with nonzero offsets, which
19041 are otherwise valid for integer registers. Since we can't see
19042 whether we have a nonzero offset from here, prohibit all
19043 nonparadoxical subregs changing size. */
19044 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
19045 return true;
19046 }
19047
19048 return false;
19049 }
19050
19051 /* Return the cost of moving data from a register in class CLASS1 to
19052 one in class CLASS2.
19053
19054 It is not required that the cost always equal 2 when FROM is the same as TO;
19055 on some machines it is expensive to move between registers if they are not
19056 general registers. */
19057
19058 int
19059 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
19060 enum reg_class class2)
19061 {
19062 /* In case we require secondary memory, compute cost of the store followed
19063 by load. In order to avoid bad register allocation choices, we need
19064 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19065
19066 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19067 {
19068 int cost = 1;
19069
19070 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19071 MEMORY_MOVE_COST (mode, class1, 1));
19072 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19073 MEMORY_MOVE_COST (mode, class2, 1));
19074
19075 /* In case of copying from general_purpose_register we may emit multiple
19076 stores followed by single load causing memory size mismatch stall.
19077 Count this as arbitrarily high cost of 20. */
19078 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19079 cost += 20;
19080
19081 /* In the case of FP/MMX moves, the registers actually overlap, and we
19082 have to switch modes in order to treat them differently. */
19083 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19084 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19085 cost += 20;
19086
19087 return cost;
19088 }
19089
19090 /* Moves between SSE/MMX and integer unit are expensive. */
19091 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19092 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19093 return ix86_cost->mmxsse_to_integer;
19094 if (MAYBE_FLOAT_CLASS_P (class1))
19095 return ix86_cost->fp_move;
19096 if (MAYBE_SSE_CLASS_P (class1))
19097 return ix86_cost->sse_move;
19098 if (MAYBE_MMX_CLASS_P (class1))
19099 return ix86_cost->mmx_move;
19100 return 2;
19101 }
19102
19103 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19104
19105 bool
19106 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19107 {
19108 /* Flags and only flags can only hold CCmode values. */
19109 if (CC_REGNO_P (regno))
19110 return GET_MODE_CLASS (mode) == MODE_CC;
19111 if (GET_MODE_CLASS (mode) == MODE_CC
19112 || GET_MODE_CLASS (mode) == MODE_RANDOM
19113 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19114 return 0;
19115 if (FP_REGNO_P (regno))
19116 return VALID_FP_MODE_P (mode);
19117 if (SSE_REGNO_P (regno))
19118 {
19119 /* We implement the move patterns for all vector modes into and
19120 out of SSE registers, even when no operation instructions
19121 are available. */
19122 return (VALID_SSE_REG_MODE (mode)
19123 || VALID_SSE2_REG_MODE (mode)
19124 || VALID_MMX_REG_MODE (mode)
19125 || VALID_MMX_REG_MODE_3DNOW (mode));
19126 }
19127 if (MMX_REGNO_P (regno))
19128 {
19129 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19130 so if the register is available at all, then we can move data of
19131 the given mode into or out of it. */
19132 return (VALID_MMX_REG_MODE (mode)
19133 || VALID_MMX_REG_MODE_3DNOW (mode));
19134 }
19135
19136 if (mode == QImode)
19137 {
19138 /* Take care for QImode values - they can be in non-QI regs,
19139 but then they do cause partial register stalls. */
19140 if (regno < 4 || TARGET_64BIT)
19141 return 1;
19142 if (!TARGET_PARTIAL_REG_STALL)
19143 return 1;
19144 return reload_in_progress || reload_completed;
19145 }
19146 /* We handle both integer and floats in the general purpose registers. */
19147 else if (VALID_INT_MODE_P (mode))
19148 return 1;
19149 else if (VALID_FP_MODE_P (mode))
19150 return 1;
19151 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19152 on to use that value in smaller contexts, this can easily force a
19153 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19154 supporting DImode, allow it. */
19155 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19156 return 1;
19157
19158 return 0;
19159 }
19160
19161 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19162 tieable integer mode. */
19163
19164 static bool
19165 ix86_tieable_integer_mode_p (enum machine_mode mode)
19166 {
19167 switch (mode)
19168 {
19169 case HImode:
19170 case SImode:
19171 return true;
19172
19173 case QImode:
19174 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19175
19176 case DImode:
19177 return TARGET_64BIT;
19178
19179 default:
19180 return false;
19181 }
19182 }
19183
19184 /* Return true if MODE1 is accessible in a register that can hold MODE2
19185 without copying. That is, all register classes that can hold MODE2
19186 can also hold MODE1. */
19187
19188 bool
19189 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19190 {
19191 if (mode1 == mode2)
19192 return true;
19193
19194 if (ix86_tieable_integer_mode_p (mode1)
19195 && ix86_tieable_integer_mode_p (mode2))
19196 return true;
19197
19198 /* MODE2 being XFmode implies fp stack or general regs, which means we
19199 can tie any smaller floating point modes to it. Note that we do not
19200 tie this with TFmode. */
19201 if (mode2 == XFmode)
19202 return mode1 == SFmode || mode1 == DFmode;
19203
19204 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19205 that we can tie it with SFmode. */
19206 if (mode2 == DFmode)
19207 return mode1 == SFmode;
19208
19209 /* If MODE2 is only appropriate for an SSE register, then tie with
19210 any other mode acceptable to SSE registers. */
19211 if (GET_MODE_SIZE (mode2) == 16
19212 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19213 return (GET_MODE_SIZE (mode1) == 16
19214 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19215
19216 /* If MODE2 is appropriate for an MMX register, then tie
19217 with any other mode acceptable to MMX registers. */
19218 if (GET_MODE_SIZE (mode2) == 8
19219 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19220 return (GET_MODE_SIZE (mode1) == 8
19221 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19222
19223 return false;
19224 }
19225
19226 /* Return the cost of moving data of mode M between a
19227 register and memory. A value of 2 is the default; this cost is
19228 relative to those in `REGISTER_MOVE_COST'.
19229
19230 If moving between registers and memory is more expensive than
19231 between two registers, you should define this macro to express the
19232 relative cost.
19233
19234 Model also increased moving costs of QImode registers in non
19235 Q_REGS classes.
19236 */
19237 int
19238 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19239 {
19240 if (FLOAT_CLASS_P (class))
19241 {
19242 int index;
19243 switch (mode)
19244 {
19245 case SFmode:
19246 index = 0;
19247 break;
19248 case DFmode:
19249 index = 1;
19250 break;
19251 case XFmode:
19252 index = 2;
19253 break;
19254 default:
19255 return 100;
19256 }
19257 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19258 }
19259 if (SSE_CLASS_P (class))
19260 {
19261 int index;
19262 switch (GET_MODE_SIZE (mode))
19263 {
19264 case 4:
19265 index = 0;
19266 break;
19267 case 8:
19268 index = 1;
19269 break;
19270 case 16:
19271 index = 2;
19272 break;
19273 default:
19274 return 100;
19275 }
19276 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19277 }
19278 if (MMX_CLASS_P (class))
19279 {
19280 int index;
19281 switch (GET_MODE_SIZE (mode))
19282 {
19283 case 4:
19284 index = 0;
19285 break;
19286 case 8:
19287 index = 1;
19288 break;
19289 default:
19290 return 100;
19291 }
19292 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19293 }
19294 switch (GET_MODE_SIZE (mode))
19295 {
19296 case 1:
19297 if (in)
19298 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19299 : ix86_cost->movzbl_load);
19300 else
19301 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19302 : ix86_cost->int_store[0] + 4);
19303 break;
19304 case 2:
19305 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19306 default:
19307 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19308 if (mode == TFmode)
19309 mode = XFmode;
19310 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19311 * (((int) GET_MODE_SIZE (mode)
19312 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19313 }
19314 }
19315
19316 /* Compute a (partial) cost for rtx X. Return true if the complete
19317 cost has been computed, and false if subexpressions should be
19318 scanned. In either case, *TOTAL contains the cost result. */
19319
19320 static bool
19321 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19322 {
19323 enum machine_mode mode = GET_MODE (x);
19324
19325 switch (code)
19326 {
19327 case CONST_INT:
19328 case CONST:
19329 case LABEL_REF:
19330 case SYMBOL_REF:
19331 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19332 *total = 3;
19333 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19334 *total = 2;
19335 else if (flag_pic && SYMBOLIC_CONST (x)
19336 && (!TARGET_64BIT
19337 || (!GET_CODE (x) != LABEL_REF
19338 && (GET_CODE (x) != SYMBOL_REF
19339 || !SYMBOL_REF_LOCAL_P (x)))))
19340 *total = 1;
19341 else
19342 *total = 0;
19343 return true;
19344
19345 case CONST_DOUBLE:
19346 if (mode == VOIDmode)
19347 *total = 0;
19348 else
19349 switch (standard_80387_constant_p (x))
19350 {
19351 case 1: /* 0.0 */
19352 *total = 1;
19353 break;
19354 default: /* Other constants */
19355 *total = 2;
19356 break;
19357 case 0:
19358 case -1:
19359 /* Start with (MEM (SYMBOL_REF)), since that's where
19360 it'll probably end up. Add a penalty for size. */
19361 *total = (COSTS_N_INSNS (1)
19362 + (flag_pic != 0 && !TARGET_64BIT)
19363 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19364 break;
19365 }
19366 return true;
19367
19368 case ZERO_EXTEND:
19369 /* The zero extensions is often completely free on x86_64, so make
19370 it as cheap as possible. */
19371 if (TARGET_64BIT && mode == DImode
19372 && GET_MODE (XEXP (x, 0)) == SImode)
19373 *total = 1;
19374 else if (TARGET_ZERO_EXTEND_WITH_AND)
19375 *total = ix86_cost->add;
19376 else
19377 *total = ix86_cost->movzx;
19378 return false;
19379
19380 case SIGN_EXTEND:
19381 *total = ix86_cost->movsx;
19382 return false;
19383
19384 case ASHIFT:
19385 if (CONST_INT_P (XEXP (x, 1))
19386 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19387 {
19388 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19389 if (value == 1)
19390 {
19391 *total = ix86_cost->add;
19392 return false;
19393 }
19394 if ((value == 2 || value == 3)
19395 && ix86_cost->lea <= ix86_cost->shift_const)
19396 {
19397 *total = ix86_cost->lea;
19398 return false;
19399 }
19400 }
19401 /* FALLTHRU */
19402
19403 case ROTATE:
19404 case ASHIFTRT:
19405 case LSHIFTRT:
19406 case ROTATERT:
19407 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19408 {
19409 if (CONST_INT_P (XEXP (x, 1)))
19410 {
19411 if (INTVAL (XEXP (x, 1)) > 32)
19412 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19413 else
19414 *total = ix86_cost->shift_const * 2;
19415 }
19416 else
19417 {
19418 if (GET_CODE (XEXP (x, 1)) == AND)
19419 *total = ix86_cost->shift_var * 2;
19420 else
19421 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19422 }
19423 }
19424 else
19425 {
19426 if (CONST_INT_P (XEXP (x, 1)))
19427 *total = ix86_cost->shift_const;
19428 else
19429 *total = ix86_cost->shift_var;
19430 }
19431 return false;
19432
19433 case MULT:
19434 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19435 {
19436 /* ??? SSE scalar cost should be used here. */
19437 *total = ix86_cost->fmul;
19438 return false;
19439 }
19440 else if (X87_FLOAT_MODE_P (mode))
19441 {
19442 *total = ix86_cost->fmul;
19443 return false;
19444 }
19445 else if (FLOAT_MODE_P (mode))
19446 {
19447 /* ??? SSE vector cost should be used here. */
19448 *total = ix86_cost->fmul;
19449 return false;
19450 }
19451 else
19452 {
19453 rtx op0 = XEXP (x, 0);
19454 rtx op1 = XEXP (x, 1);
19455 int nbits;
19456 if (CONST_INT_P (XEXP (x, 1)))
19457 {
19458 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19459 for (nbits = 0; value != 0; value &= value - 1)
19460 nbits++;
19461 }
19462 else
19463 /* This is arbitrary. */
19464 nbits = 7;
19465
19466 /* Compute costs correctly for widening multiplication. */
19467 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19468 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19469 == GET_MODE_SIZE (mode))
19470 {
19471 int is_mulwiden = 0;
19472 enum machine_mode inner_mode = GET_MODE (op0);
19473
19474 if (GET_CODE (op0) == GET_CODE (op1))
19475 is_mulwiden = 1, op1 = XEXP (op1, 0);
19476 else if (CONST_INT_P (op1))
19477 {
19478 if (GET_CODE (op0) == SIGN_EXTEND)
19479 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19480 == INTVAL (op1);
19481 else
19482 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19483 }
19484
19485 if (is_mulwiden)
19486 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19487 }
19488
19489 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19490 + nbits * ix86_cost->mult_bit
19491 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19492
19493 return true;
19494 }
19495
19496 case DIV:
19497 case UDIV:
19498 case MOD:
19499 case UMOD:
19500 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19501 /* ??? SSE cost should be used here. */
19502 *total = ix86_cost->fdiv;
19503 else if (X87_FLOAT_MODE_P (mode))
19504 *total = ix86_cost->fdiv;
19505 else if (FLOAT_MODE_P (mode))
19506 /* ??? SSE vector cost should be used here. */
19507 *total = ix86_cost->fdiv;
19508 else
19509 *total = ix86_cost->divide[MODE_INDEX (mode)];
19510 return false;
19511
19512 case PLUS:
19513 if (GET_MODE_CLASS (mode) == MODE_INT
19514 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19515 {
19516 if (GET_CODE (XEXP (x, 0)) == PLUS
19517 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19518 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19519 && CONSTANT_P (XEXP (x, 1)))
19520 {
19521 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19522 if (val == 2 || val == 4 || val == 8)
19523 {
19524 *total = ix86_cost->lea;
19525 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19526 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19527 outer_code);
19528 *total += rtx_cost (XEXP (x, 1), outer_code);
19529 return true;
19530 }
19531 }
19532 else if (GET_CODE (XEXP (x, 0)) == MULT
19533 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19534 {
19535 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19536 if (val == 2 || val == 4 || val == 8)
19537 {
19538 *total = ix86_cost->lea;
19539 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19540 *total += rtx_cost (XEXP (x, 1), outer_code);
19541 return true;
19542 }
19543 }
19544 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19545 {
19546 *total = ix86_cost->lea;
19547 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19548 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19549 *total += rtx_cost (XEXP (x, 1), outer_code);
19550 return true;
19551 }
19552 }
19553 /* FALLTHRU */
19554
19555 case MINUS:
19556 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19557 {
19558 /* ??? SSE cost should be used here. */
19559 *total = ix86_cost->fadd;
19560 return false;
19561 }
19562 else if (X87_FLOAT_MODE_P (mode))
19563 {
19564 *total = ix86_cost->fadd;
19565 return false;
19566 }
19567 else if (FLOAT_MODE_P (mode))
19568 {
19569 /* ??? SSE vector cost should be used here. */
19570 *total = ix86_cost->fadd;
19571 return false;
19572 }
19573 /* FALLTHRU */
19574
19575 case AND:
19576 case IOR:
19577 case XOR:
19578 if (!TARGET_64BIT && mode == DImode)
19579 {
19580 *total = (ix86_cost->add * 2
19581 + (rtx_cost (XEXP (x, 0), outer_code)
19582 << (GET_MODE (XEXP (x, 0)) != DImode))
19583 + (rtx_cost (XEXP (x, 1), outer_code)
19584 << (GET_MODE (XEXP (x, 1)) != DImode)));
19585 return true;
19586 }
19587 /* FALLTHRU */
19588
19589 case NEG:
19590 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19591 {
19592 /* ??? SSE cost should be used here. */
19593 *total = ix86_cost->fchs;
19594 return false;
19595 }
19596 else if (X87_FLOAT_MODE_P (mode))
19597 {
19598 *total = ix86_cost->fchs;
19599 return false;
19600 }
19601 else if (FLOAT_MODE_P (mode))
19602 {
19603 /* ??? SSE vector cost should be used here. */
19604 *total = ix86_cost->fchs;
19605 return false;
19606 }
19607 /* FALLTHRU */
19608
19609 case NOT:
19610 if (!TARGET_64BIT && mode == DImode)
19611 *total = ix86_cost->add * 2;
19612 else
19613 *total = ix86_cost->add;
19614 return false;
19615
19616 case COMPARE:
19617 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19618 && XEXP (XEXP (x, 0), 1) == const1_rtx
19619 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19620 && XEXP (x, 1) == const0_rtx)
19621 {
19622 /* This kind of construct is implemented using test[bwl].
19623 Treat it as if we had an AND. */
19624 *total = (ix86_cost->add
19625 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19626 + rtx_cost (const1_rtx, outer_code));
19627 return true;
19628 }
19629 return false;
19630
19631 case FLOAT_EXTEND:
19632 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19633 *total = 0;
19634 return false;
19635
19636 case ABS:
19637 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19638 /* ??? SSE cost should be used here. */
19639 *total = ix86_cost->fabs;
19640 else if (X87_FLOAT_MODE_P (mode))
19641 *total = ix86_cost->fabs;
19642 else if (FLOAT_MODE_P (mode))
19643 /* ??? SSE vector cost should be used here. */
19644 *total = ix86_cost->fabs;
19645 return false;
19646
19647 case SQRT:
19648 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19649 /* ??? SSE cost should be used here. */
19650 *total = ix86_cost->fsqrt;
19651 else if (X87_FLOAT_MODE_P (mode))
19652 *total = ix86_cost->fsqrt;
19653 else if (FLOAT_MODE_P (mode))
19654 /* ??? SSE vector cost should be used here. */
19655 *total = ix86_cost->fsqrt;
19656 return false;
19657
19658 case UNSPEC:
19659 if (XINT (x, 1) == UNSPEC_TP)
19660 *total = 0;
19661 return false;
19662
19663 default:
19664 return false;
19665 }
19666 }
19667
19668 #if TARGET_MACHO
19669
19670 static int current_machopic_label_num;
19671
19672 /* Given a symbol name and its associated stub, write out the
19673 definition of the stub. */
19674
19675 void
19676 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19677 {
19678 unsigned int length;
19679 char *binder_name, *symbol_name, lazy_ptr_name[32];
19680 int label = ++current_machopic_label_num;
19681
19682 /* For 64-bit we shouldn't get here. */
19683 gcc_assert (!TARGET_64BIT);
19684
19685 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19686 symb = (*targetm.strip_name_encoding) (symb);
19687
19688 length = strlen (stub);
19689 binder_name = alloca (length + 32);
19690 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19691
19692 length = strlen (symb);
19693 symbol_name = alloca (length + 32);
19694 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19695
19696 sprintf (lazy_ptr_name, "L%d$lz", label);
19697
19698 if (MACHOPIC_PURE)
19699 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19700 else
19701 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19702
19703 fprintf (file, "%s:\n", stub);
19704 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19705
19706 if (MACHOPIC_PURE)
19707 {
19708 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19709 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19710 fprintf (file, "\tjmp\t*%%edx\n");
19711 }
19712 else
19713 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19714
19715 fprintf (file, "%s:\n", binder_name);
19716
19717 if (MACHOPIC_PURE)
19718 {
19719 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19720 fprintf (file, "\tpushl\t%%eax\n");
19721 }
19722 else
19723 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19724
19725 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19726
19727 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19728 fprintf (file, "%s:\n", lazy_ptr_name);
19729 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19730 fprintf (file, "\t.long %s\n", binder_name);
19731 }
19732
19733 void
19734 darwin_x86_file_end (void)
19735 {
19736 darwin_file_end ();
19737 ix86_file_end ();
19738 }
19739 #endif /* TARGET_MACHO */
19740
19741 /* Order the registers for register allocator. */
19742
19743 void
19744 x86_order_regs_for_local_alloc (void)
19745 {
19746 int pos = 0;
19747 int i;
19748
19749 /* First allocate the local general purpose registers. */
19750 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19751 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19752 reg_alloc_order [pos++] = i;
19753
19754 /* Global general purpose registers. */
19755 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19756 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19757 reg_alloc_order [pos++] = i;
19758
19759 /* x87 registers come first in case we are doing FP math
19760 using them. */
19761 if (!TARGET_SSE_MATH)
19762 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19763 reg_alloc_order [pos++] = i;
19764
19765 /* SSE registers. */
19766 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19767 reg_alloc_order [pos++] = i;
19768 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19769 reg_alloc_order [pos++] = i;
19770
19771 /* x87 registers. */
19772 if (TARGET_SSE_MATH)
19773 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19774 reg_alloc_order [pos++] = i;
19775
19776 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19777 reg_alloc_order [pos++] = i;
19778
19779 /* Initialize the rest of array as we do not allocate some registers
19780 at all. */
19781 while (pos < FIRST_PSEUDO_REGISTER)
19782 reg_alloc_order [pos++] = 0;
19783 }
19784
19785 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19786 struct attribute_spec.handler. */
19787 static tree
19788 ix86_handle_struct_attribute (tree *node, tree name,
19789 tree args ATTRIBUTE_UNUSED,
19790 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19791 {
19792 tree *type = NULL;
19793 if (DECL_P (*node))
19794 {
19795 if (TREE_CODE (*node) == TYPE_DECL)
19796 type = &TREE_TYPE (*node);
19797 }
19798 else
19799 type = node;
19800
19801 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19802 || TREE_CODE (*type) == UNION_TYPE)))
19803 {
19804 warning (OPT_Wattributes, "%qs attribute ignored",
19805 IDENTIFIER_POINTER (name));
19806 *no_add_attrs = true;
19807 }
19808
19809 else if ((is_attribute_p ("ms_struct", name)
19810 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19811 || ((is_attribute_p ("gcc_struct", name)
19812 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19813 {
19814 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19815 IDENTIFIER_POINTER (name));
19816 *no_add_attrs = true;
19817 }
19818
19819 return NULL_TREE;
19820 }
19821
19822 static bool
19823 ix86_ms_bitfield_layout_p (tree record_type)
19824 {
19825 return (TARGET_MS_BITFIELD_LAYOUT &&
19826 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19827 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19828 }
19829
19830 /* Returns an expression indicating where the this parameter is
19831 located on entry to the FUNCTION. */
19832
19833 static rtx
19834 x86_this_parameter (tree function)
19835 {
19836 tree type = TREE_TYPE (function);
19837 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
19838
19839 if (TARGET_64BIT)
19840 {
19841 const int *parm_regs;
19842
19843 if (TARGET_64BIT_MS_ABI)
19844 parm_regs = x86_64_ms_abi_int_parameter_registers;
19845 else
19846 parm_regs = x86_64_int_parameter_registers;
19847 return gen_rtx_REG (DImode, parm_regs[aggr]);
19848 }
19849
19850 if (ix86_function_regparm (type, function) > 0
19851 && !type_has_variadic_args_p (type))
19852 {
19853 int regno = 0;
19854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19855 regno = 2;
19856 return gen_rtx_REG (SImode, regno);
19857 }
19858
19859 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
19860 }
19861
19862 /* Determine whether x86_output_mi_thunk can succeed. */
19863
19864 static bool
19865 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19866 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19867 HOST_WIDE_INT vcall_offset, tree function)
19868 {
19869 /* 64-bit can handle anything. */
19870 if (TARGET_64BIT)
19871 return true;
19872
19873 /* For 32-bit, everything's fine if we have one free register. */
19874 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19875 return true;
19876
19877 /* Need a free register for vcall_offset. */
19878 if (vcall_offset)
19879 return false;
19880
19881 /* Need a free register for GOT references. */
19882 if (flag_pic && !(*targetm.binds_local_p) (function))
19883 return false;
19884
19885 /* Otherwise ok. */
19886 return true;
19887 }
19888
19889 /* Output the assembler code for a thunk function. THUNK_DECL is the
19890 declaration for the thunk function itself, FUNCTION is the decl for
19891 the target function. DELTA is an immediate constant offset to be
19892 added to THIS. If VCALL_OFFSET is nonzero, the word at
19893 *(*this + vcall_offset) should be added to THIS. */
19894
19895 static void
19896 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19897 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19898 HOST_WIDE_INT vcall_offset, tree function)
19899 {
19900 rtx xops[3];
19901 rtx this = x86_this_parameter (function);
19902 rtx this_reg, tmp;
19903
19904 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19905 pull it in now and let DELTA benefit. */
19906 if (REG_P (this))
19907 this_reg = this;
19908 else if (vcall_offset)
19909 {
19910 /* Put the this parameter into %eax. */
19911 xops[0] = this;
19912 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19913 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19914 }
19915 else
19916 this_reg = NULL_RTX;
19917
19918 /* Adjust the this parameter by a fixed constant. */
19919 if (delta)
19920 {
19921 xops[0] = GEN_INT (delta);
19922 xops[1] = this_reg ? this_reg : this;
19923 if (TARGET_64BIT)
19924 {
19925 if (!x86_64_general_operand (xops[0], DImode))
19926 {
19927 tmp = gen_rtx_REG (DImode, R10_REG);
19928 xops[1] = tmp;
19929 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19930 xops[0] = tmp;
19931 xops[1] = this;
19932 }
19933 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19934 }
19935 else
19936 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19937 }
19938
19939 /* Adjust the this parameter by a value stored in the vtable. */
19940 if (vcall_offset)
19941 {
19942 if (TARGET_64BIT)
19943 tmp = gen_rtx_REG (DImode, R10_REG);
19944 else
19945 {
19946 int tmp_regno = 2 /* ECX */;
19947 if (lookup_attribute ("fastcall",
19948 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19949 tmp_regno = 0 /* EAX */;
19950 tmp = gen_rtx_REG (SImode, tmp_regno);
19951 }
19952
19953 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19954 xops[1] = tmp;
19955 if (TARGET_64BIT)
19956 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19957 else
19958 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19959
19960 /* Adjust the this parameter. */
19961 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19962 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19963 {
19964 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19965 xops[0] = GEN_INT (vcall_offset);
19966 xops[1] = tmp2;
19967 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19968 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19969 }
19970 xops[1] = this_reg;
19971 if (TARGET_64BIT)
19972 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19973 else
19974 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19975 }
19976
19977 /* If necessary, drop THIS back to its stack slot. */
19978 if (this_reg && this_reg != this)
19979 {
19980 xops[0] = this_reg;
19981 xops[1] = this;
19982 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19983 }
19984
19985 xops[0] = XEXP (DECL_RTL (function), 0);
19986 if (TARGET_64BIT)
19987 {
19988 if (!flag_pic || (*targetm.binds_local_p) (function))
19989 output_asm_insn ("jmp\t%P0", xops);
19990 /* All thunks should be in the same object as their target,
19991 and thus binds_local_p should be true. */
19992 else if (TARGET_64BIT_MS_ABI)
19993 gcc_unreachable ();
19994 else
19995 {
19996 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19997 tmp = gen_rtx_CONST (Pmode, tmp);
19998 tmp = gen_rtx_MEM (QImode, tmp);
19999 xops[0] = tmp;
20000 output_asm_insn ("jmp\t%A0", xops);
20001 }
20002 }
20003 else
20004 {
20005 if (!flag_pic || (*targetm.binds_local_p) (function))
20006 output_asm_insn ("jmp\t%P0", xops);
20007 else
20008 #if TARGET_MACHO
20009 if (TARGET_MACHO)
20010 {
20011 rtx sym_ref = XEXP (DECL_RTL (function), 0);
20012 tmp = (gen_rtx_SYMBOL_REF
20013 (Pmode,
20014 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
20015 tmp = gen_rtx_MEM (QImode, tmp);
20016 xops[0] = tmp;
20017 output_asm_insn ("jmp\t%0", xops);
20018 }
20019 else
20020 #endif /* TARGET_MACHO */
20021 {
20022 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
20023 output_set_got (tmp, NULL_RTX);
20024
20025 xops[1] = tmp;
20026 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
20027 output_asm_insn ("jmp\t{*}%1", xops);
20028 }
20029 }
20030 }
20031
20032 static void
20033 x86_file_start (void)
20034 {
20035 default_file_start ();
20036 #if TARGET_MACHO
20037 darwin_file_start ();
20038 #endif
20039 if (X86_FILE_START_VERSION_DIRECTIVE)
20040 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
20041 if (X86_FILE_START_FLTUSED)
20042 fputs ("\t.global\t__fltused\n", asm_out_file);
20043 if (ix86_asm_dialect == ASM_INTEL)
20044 fputs ("\t.intel_syntax\n", asm_out_file);
20045 }
20046
20047 int
20048 x86_field_alignment (tree field, int computed)
20049 {
20050 enum machine_mode mode;
20051 tree type = TREE_TYPE (field);
20052
20053 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
20054 return computed;
20055 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
20056 ? get_inner_array_type (type) : type);
20057 if (mode == DFmode || mode == DCmode
20058 || GET_MODE_CLASS (mode) == MODE_INT
20059 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20060 return MIN (32, computed);
20061 return computed;
20062 }
20063
20064 /* Output assembler code to FILE to increment profiler label # LABELNO
20065 for profiling a function entry. */
20066 void
20067 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20068 {
20069 if (TARGET_64BIT)
20070 {
20071 #ifndef NO_PROFILE_COUNTERS
20072 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20073 #endif
20074
20075 if (!TARGET_64BIT_MS_ABI && flag_pic)
20076 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20077 else
20078 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20079 }
20080 else if (flag_pic)
20081 {
20082 #ifndef NO_PROFILE_COUNTERS
20083 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20084 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20085 #endif
20086 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20087 }
20088 else
20089 {
20090 #ifndef NO_PROFILE_COUNTERS
20091 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20092 PROFILE_COUNT_REGISTER);
20093 #endif
20094 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20095 }
20096 }
20097
20098 /* We don't have exact information about the insn sizes, but we may assume
20099 quite safely that we are informed about all 1 byte insns and memory
20100 address sizes. This is enough to eliminate unnecessary padding in
20101 99% of cases. */
20102
20103 static int
20104 min_insn_size (rtx insn)
20105 {
20106 int l = 0;
20107
20108 if (!INSN_P (insn) || !active_insn_p (insn))
20109 return 0;
20110
20111 /* Discard alignments we've emit and jump instructions. */
20112 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20113 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20114 return 0;
20115 if (JUMP_P (insn)
20116 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20117 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20118 return 0;
20119
20120 /* Important case - calls are always 5 bytes.
20121 It is common to have many calls in the row. */
20122 if (CALL_P (insn)
20123 && symbolic_reference_mentioned_p (PATTERN (insn))
20124 && !SIBLING_CALL_P (insn))
20125 return 5;
20126 if (get_attr_length (insn) <= 1)
20127 return 1;
20128
20129 /* For normal instructions we may rely on the sizes of addresses
20130 and the presence of symbol to require 4 bytes of encoding.
20131 This is not the case for jumps where references are PC relative. */
20132 if (!JUMP_P (insn))
20133 {
20134 l = get_attr_length_address (insn);
20135 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20136 l = 4;
20137 }
20138 if (l)
20139 return 1+l;
20140 else
20141 return 2;
20142 }
20143
20144 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20145 window. */
20146
20147 static void
20148 ix86_avoid_jump_misspredicts (void)
20149 {
20150 rtx insn, start = get_insns ();
20151 int nbytes = 0, njumps = 0;
20152 int isjump = 0;
20153
20154 /* Look for all minimal intervals of instructions containing 4 jumps.
20155 The intervals are bounded by START and INSN. NBYTES is the total
20156 size of instructions in the interval including INSN and not including
20157 START. When the NBYTES is smaller than 16 bytes, it is possible
20158 that the end of START and INSN ends up in the same 16byte page.
20159
20160 The smallest offset in the page INSN can start is the case where START
20161 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20162 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20163 */
20164 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20165 {
20166
20167 nbytes += min_insn_size (insn);
20168 if (dump_file)
20169 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20170 INSN_UID (insn), min_insn_size (insn));
20171 if ((JUMP_P (insn)
20172 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20173 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20174 || CALL_P (insn))
20175 njumps++;
20176 else
20177 continue;
20178
20179 while (njumps > 3)
20180 {
20181 start = NEXT_INSN (start);
20182 if ((JUMP_P (start)
20183 && GET_CODE (PATTERN (start)) != ADDR_VEC
20184 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20185 || CALL_P (start))
20186 njumps--, isjump = 1;
20187 else
20188 isjump = 0;
20189 nbytes -= min_insn_size (start);
20190 }
20191 gcc_assert (njumps >= 0);
20192 if (dump_file)
20193 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20194 INSN_UID (start), INSN_UID (insn), nbytes);
20195
20196 if (njumps == 3 && isjump && nbytes < 16)
20197 {
20198 int padsize = 15 - nbytes + min_insn_size (insn);
20199
20200 if (dump_file)
20201 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20202 INSN_UID (insn), padsize);
20203 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20204 }
20205 }
20206 }
20207
20208 /* AMD Athlon works faster
20209 when RET is not destination of conditional jump or directly preceded
20210 by other jump instruction. We avoid the penalty by inserting NOP just
20211 before the RET instructions in such cases. */
20212 static void
20213 ix86_pad_returns (void)
20214 {
20215 edge e;
20216 edge_iterator ei;
20217
20218 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20219 {
20220 basic_block bb = e->src;
20221 rtx ret = BB_END (bb);
20222 rtx prev;
20223 bool replace = false;
20224
20225 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20226 || !maybe_hot_bb_p (bb))
20227 continue;
20228 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20229 if (active_insn_p (prev) || LABEL_P (prev))
20230 break;
20231 if (prev && LABEL_P (prev))
20232 {
20233 edge e;
20234 edge_iterator ei;
20235
20236 FOR_EACH_EDGE (e, ei, bb->preds)
20237 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20238 && !(e->flags & EDGE_FALLTHRU))
20239 replace = true;
20240 }
20241 if (!replace)
20242 {
20243 prev = prev_active_insn (ret);
20244 if (prev
20245 && ((JUMP_P (prev) && any_condjump_p (prev))
20246 || CALL_P (prev)))
20247 replace = true;
20248 /* Empty functions get branch mispredict even when the jump destination
20249 is not visible to us. */
20250 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20251 replace = true;
20252 }
20253 if (replace)
20254 {
20255 emit_insn_before (gen_return_internal_long (), ret);
20256 delete_insn (ret);
20257 }
20258 }
20259 }
20260
20261 /* Implement machine specific optimizations. We implement padding of returns
20262 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20263 static void
20264 ix86_reorg (void)
20265 {
20266 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20267 ix86_pad_returns ();
20268 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20269 ix86_avoid_jump_misspredicts ();
20270 }
20271
20272 /* Return nonzero when QImode register that must be represented via REX prefix
20273 is used. */
20274 bool
20275 x86_extended_QIreg_mentioned_p (rtx insn)
20276 {
20277 int i;
20278 extract_insn_cached (insn);
20279 for (i = 0; i < recog_data.n_operands; i++)
20280 if (REG_P (recog_data.operand[i])
20281 && REGNO (recog_data.operand[i]) >= 4)
20282 return true;
20283 return false;
20284 }
20285
20286 /* Return nonzero when P points to register encoded via REX prefix.
20287 Called via for_each_rtx. */
20288 static int
20289 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20290 {
20291 unsigned int regno;
20292 if (!REG_P (*p))
20293 return 0;
20294 regno = REGNO (*p);
20295 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20296 }
20297
20298 /* Return true when INSN mentions register that must be encoded using REX
20299 prefix. */
20300 bool
20301 x86_extended_reg_mentioned_p (rtx insn)
20302 {
20303 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20304 }
20305
20306 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20307 optabs would emit if we didn't have TFmode patterns. */
20308
20309 void
20310 x86_emit_floatuns (rtx operands[2])
20311 {
20312 rtx neglab, donelab, i0, i1, f0, in, out;
20313 enum machine_mode mode, inmode;
20314
20315 inmode = GET_MODE (operands[1]);
20316 gcc_assert (inmode == SImode || inmode == DImode);
20317
20318 out = operands[0];
20319 in = force_reg (inmode, operands[1]);
20320 mode = GET_MODE (out);
20321 neglab = gen_label_rtx ();
20322 donelab = gen_label_rtx ();
20323 f0 = gen_reg_rtx (mode);
20324
20325 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20326
20327 expand_float (out, in, 0);
20328
20329 emit_jump_insn (gen_jump (donelab));
20330 emit_barrier ();
20331
20332 emit_label (neglab);
20333
20334 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20335 1, OPTAB_DIRECT);
20336 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20337 1, OPTAB_DIRECT);
20338 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20339
20340 expand_float (f0, i0, 0);
20341
20342 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20343
20344 emit_label (donelab);
20345 }
20346 \f
20347 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20348 with all elements equal to VAR. Return true if successful. */
20349
20350 static bool
20351 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20352 rtx target, rtx val)
20353 {
20354 enum machine_mode smode, wsmode, wvmode;
20355 rtx x;
20356
20357 switch (mode)
20358 {
20359 case V2SImode:
20360 case V2SFmode:
20361 if (!mmx_ok)
20362 return false;
20363 /* FALLTHRU */
20364
20365 case V2DFmode:
20366 case V2DImode:
20367 case V4SFmode:
20368 case V4SImode:
20369 val = force_reg (GET_MODE_INNER (mode), val);
20370 x = gen_rtx_VEC_DUPLICATE (mode, val);
20371 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20372 return true;
20373
20374 case V4HImode:
20375 if (!mmx_ok)
20376 return false;
20377 if (TARGET_SSE || TARGET_3DNOW_A)
20378 {
20379 val = gen_lowpart (SImode, val);
20380 x = gen_rtx_TRUNCATE (HImode, val);
20381 x = gen_rtx_VEC_DUPLICATE (mode, x);
20382 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20383 return true;
20384 }
20385 else
20386 {
20387 smode = HImode;
20388 wsmode = SImode;
20389 wvmode = V2SImode;
20390 goto widen;
20391 }
20392
20393 case V8QImode:
20394 if (!mmx_ok)
20395 return false;
20396 smode = QImode;
20397 wsmode = HImode;
20398 wvmode = V4HImode;
20399 goto widen;
20400 case V8HImode:
20401 if (TARGET_SSE2)
20402 {
20403 rtx tmp1, tmp2;
20404 /* Extend HImode to SImode using a paradoxical SUBREG. */
20405 tmp1 = gen_reg_rtx (SImode);
20406 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20407 /* Insert the SImode value as low element of V4SImode vector. */
20408 tmp2 = gen_reg_rtx (V4SImode);
20409 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20410 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20411 CONST0_RTX (V4SImode),
20412 const1_rtx);
20413 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20414 /* Cast the V4SImode vector back to a V8HImode vector. */
20415 tmp1 = gen_reg_rtx (V8HImode);
20416 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20417 /* Duplicate the low short through the whole low SImode word. */
20418 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20419 /* Cast the V8HImode vector back to a V4SImode vector. */
20420 tmp2 = gen_reg_rtx (V4SImode);
20421 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20422 /* Replicate the low element of the V4SImode vector. */
20423 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20424 /* Cast the V2SImode back to V8HImode, and store in target. */
20425 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20426 return true;
20427 }
20428 smode = HImode;
20429 wsmode = SImode;
20430 wvmode = V4SImode;
20431 goto widen;
20432 case V16QImode:
20433 if (TARGET_SSE2)
20434 {
20435 rtx tmp1, tmp2;
20436 /* Extend QImode to SImode using a paradoxical SUBREG. */
20437 tmp1 = gen_reg_rtx (SImode);
20438 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20439 /* Insert the SImode value as low element of V4SImode vector. */
20440 tmp2 = gen_reg_rtx (V4SImode);
20441 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20442 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20443 CONST0_RTX (V4SImode),
20444 const1_rtx);
20445 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20446 /* Cast the V4SImode vector back to a V16QImode vector. */
20447 tmp1 = gen_reg_rtx (V16QImode);
20448 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20449 /* Duplicate the low byte through the whole low SImode word. */
20450 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20451 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20452 /* Cast the V16QImode vector back to a V4SImode vector. */
20453 tmp2 = gen_reg_rtx (V4SImode);
20454 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20455 /* Replicate the low element of the V4SImode vector. */
20456 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20457 /* Cast the V2SImode back to V16QImode, and store in target. */
20458 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20459 return true;
20460 }
20461 smode = QImode;
20462 wsmode = HImode;
20463 wvmode = V8HImode;
20464 goto widen;
20465 widen:
20466 /* Replicate the value once into the next wider mode and recurse. */
20467 val = convert_modes (wsmode, smode, val, true);
20468 x = expand_simple_binop (wsmode, ASHIFT, val,
20469 GEN_INT (GET_MODE_BITSIZE (smode)),
20470 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20471 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20472
20473 x = gen_reg_rtx (wvmode);
20474 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20475 gcc_unreachable ();
20476 emit_move_insn (target, gen_lowpart (mode, x));
20477 return true;
20478
20479 default:
20480 return false;
20481 }
20482 }
20483
20484 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20485 whose ONE_VAR element is VAR, and other elements are zero. Return true
20486 if successful. */
20487
20488 static bool
20489 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20490 rtx target, rtx var, int one_var)
20491 {
20492 enum machine_mode vsimode;
20493 rtx new_target;
20494 rtx x, tmp;
20495
20496 switch (mode)
20497 {
20498 case V2SFmode:
20499 case V2SImode:
20500 if (!mmx_ok)
20501 return false;
20502 /* FALLTHRU */
20503
20504 case V2DFmode:
20505 case V2DImode:
20506 if (one_var != 0)
20507 return false;
20508 var = force_reg (GET_MODE_INNER (mode), var);
20509 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20510 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20511 return true;
20512
20513 case V4SFmode:
20514 case V4SImode:
20515 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20516 new_target = gen_reg_rtx (mode);
20517 else
20518 new_target = target;
20519 var = force_reg (GET_MODE_INNER (mode), var);
20520 x = gen_rtx_VEC_DUPLICATE (mode, var);
20521 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20522 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20523 if (one_var != 0)
20524 {
20525 /* We need to shuffle the value to the correct position, so
20526 create a new pseudo to store the intermediate result. */
20527
20528 /* With SSE2, we can use the integer shuffle insns. */
20529 if (mode != V4SFmode && TARGET_SSE2)
20530 {
20531 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20532 GEN_INT (1),
20533 GEN_INT (one_var == 1 ? 0 : 1),
20534 GEN_INT (one_var == 2 ? 0 : 1),
20535 GEN_INT (one_var == 3 ? 0 : 1)));
20536 if (target != new_target)
20537 emit_move_insn (target, new_target);
20538 return true;
20539 }
20540
20541 /* Otherwise convert the intermediate result to V4SFmode and
20542 use the SSE1 shuffle instructions. */
20543 if (mode != V4SFmode)
20544 {
20545 tmp = gen_reg_rtx (V4SFmode);
20546 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20547 }
20548 else
20549 tmp = new_target;
20550
20551 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20552 GEN_INT (1),
20553 GEN_INT (one_var == 1 ? 0 : 1),
20554 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20555 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20556
20557 if (mode != V4SFmode)
20558 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20559 else if (tmp != target)
20560 emit_move_insn (target, tmp);
20561 }
20562 else if (target != new_target)
20563 emit_move_insn (target, new_target);
20564 return true;
20565
20566 case V8HImode:
20567 case V16QImode:
20568 vsimode = V4SImode;
20569 goto widen;
20570 case V4HImode:
20571 case V8QImode:
20572 if (!mmx_ok)
20573 return false;
20574 vsimode = V2SImode;
20575 goto widen;
20576 widen:
20577 if (one_var != 0)
20578 return false;
20579
20580 /* Zero extend the variable element to SImode and recurse. */
20581 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20582
20583 x = gen_reg_rtx (vsimode);
20584 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20585 var, one_var))
20586 gcc_unreachable ();
20587
20588 emit_move_insn (target, gen_lowpart (mode, x));
20589 return true;
20590
20591 default:
20592 return false;
20593 }
20594 }
20595
20596 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20597 consisting of the values in VALS. It is known that all elements
20598 except ONE_VAR are constants. Return true if successful. */
20599
20600 static bool
20601 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20602 rtx target, rtx vals, int one_var)
20603 {
20604 rtx var = XVECEXP (vals, 0, one_var);
20605 enum machine_mode wmode;
20606 rtx const_vec, x;
20607
20608 const_vec = copy_rtx (vals);
20609 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20610 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20611
20612 switch (mode)
20613 {
20614 case V2DFmode:
20615 case V2DImode:
20616 case V2SFmode:
20617 case V2SImode:
20618 /* For the two element vectors, it's just as easy to use
20619 the general case. */
20620 return false;
20621
20622 case V4SFmode:
20623 case V4SImode:
20624 case V8HImode:
20625 case V4HImode:
20626 break;
20627
20628 case V16QImode:
20629 wmode = V8HImode;
20630 goto widen;
20631 case V8QImode:
20632 wmode = V4HImode;
20633 goto widen;
20634 widen:
20635 /* There's no way to set one QImode entry easily. Combine
20636 the variable value with its adjacent constant value, and
20637 promote to an HImode set. */
20638 x = XVECEXP (vals, 0, one_var ^ 1);
20639 if (one_var & 1)
20640 {
20641 var = convert_modes (HImode, QImode, var, true);
20642 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20643 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20644 x = GEN_INT (INTVAL (x) & 0xff);
20645 }
20646 else
20647 {
20648 var = convert_modes (HImode, QImode, var, true);
20649 x = gen_int_mode (INTVAL (x) << 8, HImode);
20650 }
20651 if (x != const0_rtx)
20652 var = expand_simple_binop (HImode, IOR, var, x, var,
20653 1, OPTAB_LIB_WIDEN);
20654
20655 x = gen_reg_rtx (wmode);
20656 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20657 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20658
20659 emit_move_insn (target, gen_lowpart (mode, x));
20660 return true;
20661
20662 default:
20663 return false;
20664 }
20665
20666 emit_move_insn (target, const_vec);
20667 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20668 return true;
20669 }
20670
20671 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20672 all values variable, and none identical. */
20673
20674 static void
20675 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20676 rtx target, rtx vals)
20677 {
20678 enum machine_mode half_mode = GET_MODE_INNER (mode);
20679 rtx op0 = NULL, op1 = NULL;
20680 bool use_vec_concat = false;
20681
20682 switch (mode)
20683 {
20684 case V2SFmode:
20685 case V2SImode:
20686 if (!mmx_ok && !TARGET_SSE)
20687 break;
20688 /* FALLTHRU */
20689
20690 case V2DFmode:
20691 case V2DImode:
20692 /* For the two element vectors, we always implement VEC_CONCAT. */
20693 op0 = XVECEXP (vals, 0, 0);
20694 op1 = XVECEXP (vals, 0, 1);
20695 use_vec_concat = true;
20696 break;
20697
20698 case V4SFmode:
20699 half_mode = V2SFmode;
20700 goto half;
20701 case V4SImode:
20702 half_mode = V2SImode;
20703 goto half;
20704 half:
20705 {
20706 rtvec v;
20707
20708 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20709 Recurse to load the two halves. */
20710
20711 op0 = gen_reg_rtx (half_mode);
20712 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20713 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20714
20715 op1 = gen_reg_rtx (half_mode);
20716 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20717 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20718
20719 use_vec_concat = true;
20720 }
20721 break;
20722
20723 case V8HImode:
20724 case V16QImode:
20725 case V4HImode:
20726 case V8QImode:
20727 break;
20728
20729 default:
20730 gcc_unreachable ();
20731 }
20732
20733 if (use_vec_concat)
20734 {
20735 if (!register_operand (op0, half_mode))
20736 op0 = force_reg (half_mode, op0);
20737 if (!register_operand (op1, half_mode))
20738 op1 = force_reg (half_mode, op1);
20739
20740 emit_insn (gen_rtx_SET (VOIDmode, target,
20741 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20742 }
20743 else
20744 {
20745 int i, j, n_elts, n_words, n_elt_per_word;
20746 enum machine_mode inner_mode;
20747 rtx words[4], shift;
20748
20749 inner_mode = GET_MODE_INNER (mode);
20750 n_elts = GET_MODE_NUNITS (mode);
20751 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20752 n_elt_per_word = n_elts / n_words;
20753 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20754
20755 for (i = 0; i < n_words; ++i)
20756 {
20757 rtx word = NULL_RTX;
20758
20759 for (j = 0; j < n_elt_per_word; ++j)
20760 {
20761 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20762 elt = convert_modes (word_mode, inner_mode, elt, true);
20763
20764 if (j == 0)
20765 word = elt;
20766 else
20767 {
20768 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20769 word, 1, OPTAB_LIB_WIDEN);
20770 word = expand_simple_binop (word_mode, IOR, word, elt,
20771 word, 1, OPTAB_LIB_WIDEN);
20772 }
20773 }
20774
20775 words[i] = word;
20776 }
20777
20778 if (n_words == 1)
20779 emit_move_insn (target, gen_lowpart (mode, words[0]));
20780 else if (n_words == 2)
20781 {
20782 rtx tmp = gen_reg_rtx (mode);
20783 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20784 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20785 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20786 emit_move_insn (target, tmp);
20787 }
20788 else if (n_words == 4)
20789 {
20790 rtx tmp = gen_reg_rtx (V4SImode);
20791 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20792 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20793 emit_move_insn (target, gen_lowpart (mode, tmp));
20794 }
20795 else
20796 gcc_unreachable ();
20797 }
20798 }
20799
20800 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20801 instructions unless MMX_OK is true. */
20802
20803 void
20804 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20805 {
20806 enum machine_mode mode = GET_MODE (target);
20807 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20808 int n_elts = GET_MODE_NUNITS (mode);
20809 int n_var = 0, one_var = -1;
20810 bool all_same = true, all_const_zero = true;
20811 int i;
20812 rtx x;
20813
20814 for (i = 0; i < n_elts; ++i)
20815 {
20816 x = XVECEXP (vals, 0, i);
20817 if (!CONSTANT_P (x))
20818 n_var++, one_var = i;
20819 else if (x != CONST0_RTX (inner_mode))
20820 all_const_zero = false;
20821 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20822 all_same = false;
20823 }
20824
20825 /* Constants are best loaded from the constant pool. */
20826 if (n_var == 0)
20827 {
20828 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20829 return;
20830 }
20831
20832 /* If all values are identical, broadcast the value. */
20833 if (all_same
20834 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20835 XVECEXP (vals, 0, 0)))
20836 return;
20837
20838 /* Values where only one field is non-constant are best loaded from
20839 the pool and overwritten via move later. */
20840 if (n_var == 1)
20841 {
20842 if (all_const_zero
20843 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20844 XVECEXP (vals, 0, one_var),
20845 one_var))
20846 return;
20847
20848 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20849 return;
20850 }
20851
20852 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20853 }
20854
20855 void
20856 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20857 {
20858 enum machine_mode mode = GET_MODE (target);
20859 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20860 bool use_vec_merge = false;
20861 rtx tmp;
20862
20863 switch (mode)
20864 {
20865 case V2SFmode:
20866 case V2SImode:
20867 if (mmx_ok)
20868 {
20869 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20870 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20871 if (elt == 0)
20872 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20873 else
20874 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20875 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20876 return;
20877 }
20878 break;
20879
20880 case V2DFmode:
20881 case V2DImode:
20882 {
20883 rtx op0, op1;
20884
20885 /* For the two element vectors, we implement a VEC_CONCAT with
20886 the extraction of the other element. */
20887
20888 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20889 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20890
20891 if (elt == 0)
20892 op0 = val, op1 = tmp;
20893 else
20894 op0 = tmp, op1 = val;
20895
20896 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20897 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20898 }
20899 return;
20900
20901 case V4SFmode:
20902 switch (elt)
20903 {
20904 case 0:
20905 use_vec_merge = true;
20906 break;
20907
20908 case 1:
20909 /* tmp = target = A B C D */
20910 tmp = copy_to_reg (target);
20911 /* target = A A B B */
20912 emit_insn (gen_sse_unpcklps (target, target, target));
20913 /* target = X A B B */
20914 ix86_expand_vector_set (false, target, val, 0);
20915 /* target = A X C D */
20916 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20917 GEN_INT (1), GEN_INT (0),
20918 GEN_INT (2+4), GEN_INT (3+4)));
20919 return;
20920
20921 case 2:
20922 /* tmp = target = A B C D */
20923 tmp = copy_to_reg (target);
20924 /* tmp = X B C D */
20925 ix86_expand_vector_set (false, tmp, val, 0);
20926 /* target = A B X D */
20927 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20928 GEN_INT (0), GEN_INT (1),
20929 GEN_INT (0+4), GEN_INT (3+4)));
20930 return;
20931
20932 case 3:
20933 /* tmp = target = A B C D */
20934 tmp = copy_to_reg (target);
20935 /* tmp = X B C D */
20936 ix86_expand_vector_set (false, tmp, val, 0);
20937 /* target = A B X D */
20938 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20939 GEN_INT (0), GEN_INT (1),
20940 GEN_INT (2+4), GEN_INT (0+4)));
20941 return;
20942
20943 default:
20944 gcc_unreachable ();
20945 }
20946 break;
20947
20948 case V4SImode:
20949 /* Element 0 handled by vec_merge below. */
20950 if (elt == 0)
20951 {
20952 use_vec_merge = true;
20953 break;
20954 }
20955
20956 if (TARGET_SSE2)
20957 {
20958 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20959 store into element 0, then shuffle them back. */
20960
20961 rtx order[4];
20962
20963 order[0] = GEN_INT (elt);
20964 order[1] = const1_rtx;
20965 order[2] = const2_rtx;
20966 order[3] = GEN_INT (3);
20967 order[elt] = const0_rtx;
20968
20969 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20970 order[1], order[2], order[3]));
20971
20972 ix86_expand_vector_set (false, target, val, 0);
20973
20974 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20975 order[1], order[2], order[3]));
20976 }
20977 else
20978 {
20979 /* For SSE1, we have to reuse the V4SF code. */
20980 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20981 gen_lowpart (SFmode, val), elt);
20982 }
20983 return;
20984
20985 case V8HImode:
20986 use_vec_merge = TARGET_SSE2;
20987 break;
20988 case V4HImode:
20989 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20990 break;
20991
20992 case V16QImode:
20993 case V8QImode:
20994 default:
20995 break;
20996 }
20997
20998 if (use_vec_merge)
20999 {
21000 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
21001 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
21002 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21003 }
21004 else
21005 {
21006 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21007
21008 emit_move_insn (mem, target);
21009
21010 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21011 emit_move_insn (tmp, val);
21012
21013 emit_move_insn (target, mem);
21014 }
21015 }
21016
21017 void
21018 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
21019 {
21020 enum machine_mode mode = GET_MODE (vec);
21021 enum machine_mode inner_mode = GET_MODE_INNER (mode);
21022 bool use_vec_extr = false;
21023 rtx tmp;
21024
21025 switch (mode)
21026 {
21027 case V2SImode:
21028 case V2SFmode:
21029 if (!mmx_ok)
21030 break;
21031 /* FALLTHRU */
21032
21033 case V2DFmode:
21034 case V2DImode:
21035 use_vec_extr = true;
21036 break;
21037
21038 case V4SFmode:
21039 switch (elt)
21040 {
21041 case 0:
21042 tmp = vec;
21043 break;
21044
21045 case 1:
21046 case 3:
21047 tmp = gen_reg_rtx (mode);
21048 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
21049 GEN_INT (elt), GEN_INT (elt),
21050 GEN_INT (elt+4), GEN_INT (elt+4)));
21051 break;
21052
21053 case 2:
21054 tmp = gen_reg_rtx (mode);
21055 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
21056 break;
21057
21058 default:
21059 gcc_unreachable ();
21060 }
21061 vec = tmp;
21062 use_vec_extr = true;
21063 elt = 0;
21064 break;
21065
21066 case V4SImode:
21067 if (TARGET_SSE2)
21068 {
21069 switch (elt)
21070 {
21071 case 0:
21072 tmp = vec;
21073 break;
21074
21075 case 1:
21076 case 3:
21077 tmp = gen_reg_rtx (mode);
21078 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21079 GEN_INT (elt), GEN_INT (elt),
21080 GEN_INT (elt), GEN_INT (elt)));
21081 break;
21082
21083 case 2:
21084 tmp = gen_reg_rtx (mode);
21085 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21086 break;
21087
21088 default:
21089 gcc_unreachable ();
21090 }
21091 vec = tmp;
21092 use_vec_extr = true;
21093 elt = 0;
21094 }
21095 else
21096 {
21097 /* For SSE1, we have to reuse the V4SF code. */
21098 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21099 gen_lowpart (V4SFmode, vec), elt);
21100 return;
21101 }
21102 break;
21103
21104 case V8HImode:
21105 use_vec_extr = TARGET_SSE2;
21106 break;
21107 case V4HImode:
21108 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21109 break;
21110
21111 case V16QImode:
21112 case V8QImode:
21113 /* ??? Could extract the appropriate HImode element and shift. */
21114 default:
21115 break;
21116 }
21117
21118 if (use_vec_extr)
21119 {
21120 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21121 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21122
21123 /* Let the rtl optimizers know about the zero extension performed. */
21124 if (inner_mode == HImode)
21125 {
21126 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21127 target = gen_lowpart (SImode, target);
21128 }
21129
21130 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21131 }
21132 else
21133 {
21134 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21135
21136 emit_move_insn (mem, vec);
21137
21138 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21139 emit_move_insn (target, tmp);
21140 }
21141 }
21142
21143 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21144 pattern to reduce; DEST is the destination; IN is the input vector. */
21145
21146 void
21147 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21148 {
21149 rtx tmp1, tmp2, tmp3;
21150
21151 tmp1 = gen_reg_rtx (V4SFmode);
21152 tmp2 = gen_reg_rtx (V4SFmode);
21153 tmp3 = gen_reg_rtx (V4SFmode);
21154
21155 emit_insn (gen_sse_movhlps (tmp1, in, in));
21156 emit_insn (fn (tmp2, tmp1, in));
21157
21158 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21159 GEN_INT (1), GEN_INT (1),
21160 GEN_INT (1+4), GEN_INT (1+4)));
21161 emit_insn (fn (dest, tmp2, tmp3));
21162 }
21163 \f
21164 /* Target hook for scalar_mode_supported_p. */
21165 static bool
21166 ix86_scalar_mode_supported_p (enum machine_mode mode)
21167 {
21168 if (DECIMAL_FLOAT_MODE_P (mode))
21169 return true;
21170 else if (mode == TFmode)
21171 return TARGET_64BIT;
21172 else
21173 return default_scalar_mode_supported_p (mode);
21174 }
21175
21176 /* Implements target hook vector_mode_supported_p. */
21177 static bool
21178 ix86_vector_mode_supported_p (enum machine_mode mode)
21179 {
21180 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21181 return true;
21182 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21183 return true;
21184 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21185 return true;
21186 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21187 return true;
21188 return false;
21189 }
21190
21191 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21192
21193 We do this in the new i386 backend to maintain source compatibility
21194 with the old cc0-based compiler. */
21195
21196 static tree
21197 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21198 tree inputs ATTRIBUTE_UNUSED,
21199 tree clobbers)
21200 {
21201 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21202 clobbers);
21203 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21204 clobbers);
21205 return clobbers;
21206 }
21207
21208 /* Implements target vector targetm.asm.encode_section_info. This
21209 is not used by netware. */
21210
21211 static void ATTRIBUTE_UNUSED
21212 ix86_encode_section_info (tree decl, rtx rtl, int first)
21213 {
21214 default_encode_section_info (decl, rtl, first);
21215
21216 if (TREE_CODE (decl) == VAR_DECL
21217 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21218 && ix86_in_large_data_p (decl))
21219 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21220 }
21221
21222 /* Worker function for REVERSE_CONDITION. */
21223
21224 enum rtx_code
21225 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21226 {
21227 return (mode != CCFPmode && mode != CCFPUmode
21228 ? reverse_condition (code)
21229 : reverse_condition_maybe_unordered (code));
21230 }
21231
21232 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21233 to OPERANDS[0]. */
21234
21235 const char *
21236 output_387_reg_move (rtx insn, rtx *operands)
21237 {
21238 if (REG_P (operands[0]))
21239 {
21240 if (REG_P (operands[1])
21241 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21242 {
21243 if (REGNO (operands[0]) == FIRST_STACK_REG)
21244 return output_387_ffreep (operands, 0);
21245 return "fstp\t%y0";
21246 }
21247 if (STACK_TOP_P (operands[0]))
21248 return "fld%z1\t%y1";
21249 return "fst\t%y0";
21250 }
21251 else if (MEM_P (operands[0]))
21252 {
21253 gcc_assert (REG_P (operands[1]));
21254 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21255 return "fstp%z0\t%y0";
21256 else
21257 {
21258 /* There is no non-popping store to memory for XFmode.
21259 So if we need one, follow the store with a load. */
21260 if (GET_MODE (operands[0]) == XFmode)
21261 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21262 else
21263 return "fst%z0\t%y0";
21264 }
21265 }
21266 else
21267 gcc_unreachable();
21268 }
21269
21270 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21271 FP status register is set. */
21272
21273 void
21274 ix86_emit_fp_unordered_jump (rtx label)
21275 {
21276 rtx reg = gen_reg_rtx (HImode);
21277 rtx temp;
21278
21279 emit_insn (gen_x86_fnstsw_1 (reg));
21280
21281 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21282 {
21283 emit_insn (gen_x86_sahf_1 (reg));
21284
21285 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21286 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21287 }
21288 else
21289 {
21290 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21291
21292 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21293 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21294 }
21295
21296 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21297 gen_rtx_LABEL_REF (VOIDmode, label),
21298 pc_rtx);
21299 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21300
21301 emit_jump_insn (temp);
21302 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21303 }
21304
21305 /* Output code to perform a log1p XFmode calculation. */
21306
21307 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21308 {
21309 rtx label1 = gen_label_rtx ();
21310 rtx label2 = gen_label_rtx ();
21311
21312 rtx tmp = gen_reg_rtx (XFmode);
21313 rtx tmp2 = gen_reg_rtx (XFmode);
21314
21315 emit_insn (gen_absxf2 (tmp, op1));
21316 emit_insn (gen_cmpxf (tmp,
21317 CONST_DOUBLE_FROM_REAL_VALUE (
21318 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21319 XFmode)));
21320 emit_jump_insn (gen_bge (label1));
21321
21322 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21323 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21324 emit_jump (label2);
21325
21326 emit_label (label1);
21327 emit_move_insn (tmp, CONST1_RTX (XFmode));
21328 emit_insn (gen_addxf3 (tmp, op1, tmp));
21329 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21330 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21331
21332 emit_label (label2);
21333 }
21334
21335 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21336
21337 static void ATTRIBUTE_UNUSED
21338 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21339 tree decl)
21340 {
21341 /* With Binutils 2.15, the "@unwind" marker must be specified on
21342 every occurrence of the ".eh_frame" section, not just the first
21343 one. */
21344 if (TARGET_64BIT
21345 && strcmp (name, ".eh_frame") == 0)
21346 {
21347 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21348 flags & SECTION_WRITE ? "aw" : "a");
21349 return;
21350 }
21351 default_elf_asm_named_section (name, flags, decl);
21352 }
21353
21354 /* Return the mangling of TYPE if it is an extended fundamental type. */
21355
21356 static const char *
21357 ix86_mangle_fundamental_type (tree type)
21358 {
21359 switch (TYPE_MODE (type))
21360 {
21361 case TFmode:
21362 /* __float128 is "g". */
21363 return "g";
21364 case XFmode:
21365 /* "long double" or __float80 is "e". */
21366 return "e";
21367 default:
21368 return NULL;
21369 }
21370 }
21371
21372 /* For 32-bit code we can save PIC register setup by using
21373 __stack_chk_fail_local hidden function instead of calling
21374 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21375 register, so it is better to call __stack_chk_fail directly. */
21376
21377 static tree
21378 ix86_stack_protect_fail (void)
21379 {
21380 return TARGET_64BIT
21381 ? default_external_stack_protect_fail ()
21382 : default_hidden_stack_protect_fail ();
21383 }
21384
21385 /* Select a format to encode pointers in exception handling data. CODE
21386 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21387 true if the symbol may be affected by dynamic relocations.
21388
21389 ??? All x86 object file formats are capable of representing this.
21390 After all, the relocation needed is the same as for the call insn.
21391 Whether or not a particular assembler allows us to enter such, I
21392 guess we'll have to see. */
21393 int
21394 asm_preferred_eh_data_format (int code, int global)
21395 {
21396 if (flag_pic)
21397 {
21398 int type = DW_EH_PE_sdata8;
21399 if (!TARGET_64BIT
21400 || ix86_cmodel == CM_SMALL_PIC
21401 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21402 type = DW_EH_PE_sdata4;
21403 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21404 }
21405 if (ix86_cmodel == CM_SMALL
21406 || (ix86_cmodel == CM_MEDIUM && code))
21407 return DW_EH_PE_udata4;
21408 return DW_EH_PE_absptr;
21409 }
21410 \f
21411 /* Expand copysign from SIGN to the positive value ABS_VALUE
21412 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21413 the sign-bit. */
21414 static void
21415 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21416 {
21417 enum machine_mode mode = GET_MODE (sign);
21418 rtx sgn = gen_reg_rtx (mode);
21419 if (mask == NULL_RTX)
21420 {
21421 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21422 if (!VECTOR_MODE_P (mode))
21423 {
21424 /* We need to generate a scalar mode mask in this case. */
21425 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21426 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21427 mask = gen_reg_rtx (mode);
21428 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21429 }
21430 }
21431 else
21432 mask = gen_rtx_NOT (mode, mask);
21433 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21434 gen_rtx_AND (mode, mask, sign)));
21435 emit_insn (gen_rtx_SET (VOIDmode, result,
21436 gen_rtx_IOR (mode, abs_value, sgn)));
21437 }
21438
21439 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21440 mask for masking out the sign-bit is stored in *SMASK, if that is
21441 non-null. */
21442 static rtx
21443 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21444 {
21445 enum machine_mode mode = GET_MODE (op0);
21446 rtx xa, mask;
21447
21448 xa = gen_reg_rtx (mode);
21449 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21450 if (!VECTOR_MODE_P (mode))
21451 {
21452 /* We need to generate a scalar mode mask in this case. */
21453 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21454 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21455 mask = gen_reg_rtx (mode);
21456 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21457 }
21458 emit_insn (gen_rtx_SET (VOIDmode, xa,
21459 gen_rtx_AND (mode, op0, mask)));
21460
21461 if (smask)
21462 *smask = mask;
21463
21464 return xa;
21465 }
21466
21467 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21468 swapping the operands if SWAP_OPERANDS is true. The expanded
21469 code is a forward jump to a newly created label in case the
21470 comparison is true. The generated label rtx is returned. */
21471 static rtx
21472 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21473 bool swap_operands)
21474 {
21475 rtx label, tmp;
21476
21477 if (swap_operands)
21478 {
21479 tmp = op0;
21480 op0 = op1;
21481 op1 = tmp;
21482 }
21483
21484 label = gen_label_rtx ();
21485 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21486 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21487 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21488 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21489 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21490 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21491 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21492 JUMP_LABEL (tmp) = label;
21493
21494 return label;
21495 }
21496
21497 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21498 using comparison code CODE. Operands are swapped for the comparison if
21499 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21500 static rtx
21501 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21502 bool swap_operands)
21503 {
21504 enum machine_mode mode = GET_MODE (op0);
21505 rtx mask = gen_reg_rtx (mode);
21506
21507 if (swap_operands)
21508 {
21509 rtx tmp = op0;
21510 op0 = op1;
21511 op1 = tmp;
21512 }
21513
21514 if (mode == DFmode)
21515 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21516 gen_rtx_fmt_ee (code, mode, op0, op1)));
21517 else
21518 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21519 gen_rtx_fmt_ee (code, mode, op0, op1)));
21520
21521 return mask;
21522 }
21523
21524 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21525 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21526 static rtx
21527 ix86_gen_TWO52 (enum machine_mode mode)
21528 {
21529 REAL_VALUE_TYPE TWO52r;
21530 rtx TWO52;
21531
21532 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21533 TWO52 = const_double_from_real_value (TWO52r, mode);
21534 TWO52 = force_reg (mode, TWO52);
21535
21536 return TWO52;
21537 }
21538
21539 /* Expand SSE sequence for computing lround from OP1 storing
21540 into OP0. */
21541 void
21542 ix86_expand_lround (rtx op0, rtx op1)
21543 {
21544 /* C code for the stuff we're doing below:
21545 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21546 return (long)tmp;
21547 */
21548 enum machine_mode mode = GET_MODE (op1);
21549 const struct real_format *fmt;
21550 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21551 rtx adj;
21552
21553 /* load nextafter (0.5, 0.0) */
21554 fmt = REAL_MODE_FORMAT (mode);
21555 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21556 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21557
21558 /* adj = copysign (0.5, op1) */
21559 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21560 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21561
21562 /* adj = op1 + adj */
21563 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21564
21565 /* op0 = (imode)adj */
21566 expand_fix (op0, adj, 0);
21567 }
21568
21569 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21570 into OPERAND0. */
21571 void
21572 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21573 {
21574 /* C code for the stuff we're doing below (for do_floor):
21575 xi = (long)op1;
21576 xi -= (double)xi > op1 ? 1 : 0;
21577 return xi;
21578 */
21579 enum machine_mode fmode = GET_MODE (op1);
21580 enum machine_mode imode = GET_MODE (op0);
21581 rtx ireg, freg, label, tmp;
21582
21583 /* reg = (long)op1 */
21584 ireg = gen_reg_rtx (imode);
21585 expand_fix (ireg, op1, 0);
21586
21587 /* freg = (double)reg */
21588 freg = gen_reg_rtx (fmode);
21589 expand_float (freg, ireg, 0);
21590
21591 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21592 label = ix86_expand_sse_compare_and_jump (UNLE,
21593 freg, op1, !do_floor);
21594 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21595 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21596 emit_move_insn (ireg, tmp);
21597
21598 emit_label (label);
21599 LABEL_NUSES (label) = 1;
21600
21601 emit_move_insn (op0, ireg);
21602 }
21603
21604 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21605 result in OPERAND0. */
21606 void
21607 ix86_expand_rint (rtx operand0, rtx operand1)
21608 {
21609 /* C code for the stuff we're doing below:
21610 xa = fabs (operand1);
21611 if (!isless (xa, 2**52))
21612 return operand1;
21613 xa = xa + 2**52 - 2**52;
21614 return copysign (xa, operand1);
21615 */
21616 enum machine_mode mode = GET_MODE (operand0);
21617 rtx res, xa, label, TWO52, mask;
21618
21619 res = gen_reg_rtx (mode);
21620 emit_move_insn (res, operand1);
21621
21622 /* xa = abs (operand1) */
21623 xa = ix86_expand_sse_fabs (res, &mask);
21624
21625 /* if (!isless (xa, TWO52)) goto label; */
21626 TWO52 = ix86_gen_TWO52 (mode);
21627 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21628
21629 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21630 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21631
21632 ix86_sse_copysign_to_positive (res, xa, res, mask);
21633
21634 emit_label (label);
21635 LABEL_NUSES (label) = 1;
21636
21637 emit_move_insn (operand0, res);
21638 }
21639
21640 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21641 into OPERAND0. */
21642 void
21643 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21644 {
21645 /* C code for the stuff we expand below.
21646 double xa = fabs (x), x2;
21647 if (!isless (xa, TWO52))
21648 return x;
21649 xa = xa + TWO52 - TWO52;
21650 x2 = copysign (xa, x);
21651 Compensate. Floor:
21652 if (x2 > x)
21653 x2 -= 1;
21654 Compensate. Ceil:
21655 if (x2 < x)
21656 x2 -= -1;
21657 return x2;
21658 */
21659 enum machine_mode mode = GET_MODE (operand0);
21660 rtx xa, TWO52, tmp, label, one, res, mask;
21661
21662 TWO52 = ix86_gen_TWO52 (mode);
21663
21664 /* Temporary for holding the result, initialized to the input
21665 operand to ease control flow. */
21666 res = gen_reg_rtx (mode);
21667 emit_move_insn (res, operand1);
21668
21669 /* xa = abs (operand1) */
21670 xa = ix86_expand_sse_fabs (res, &mask);
21671
21672 /* if (!isless (xa, TWO52)) goto label; */
21673 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21674
21675 /* xa = xa + TWO52 - TWO52; */
21676 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21677 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21678
21679 /* xa = copysign (xa, operand1) */
21680 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21681
21682 /* generate 1.0 or -1.0 */
21683 one = force_reg (mode,
21684 const_double_from_real_value (do_floor
21685 ? dconst1 : dconstm1, mode));
21686
21687 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21688 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21689 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21690 gen_rtx_AND (mode, one, tmp)));
21691 /* We always need to subtract here to preserve signed zero. */
21692 tmp = expand_simple_binop (mode, MINUS,
21693 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21694 emit_move_insn (res, tmp);
21695
21696 emit_label (label);
21697 LABEL_NUSES (label) = 1;
21698
21699 emit_move_insn (operand0, res);
21700 }
21701
21702 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21703 into OPERAND0. */
21704 void
21705 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21706 {
21707 /* C code for the stuff we expand below.
21708 double xa = fabs (x), x2;
21709 if (!isless (xa, TWO52))
21710 return x;
21711 x2 = (double)(long)x;
21712 Compensate. Floor:
21713 if (x2 > x)
21714 x2 -= 1;
21715 Compensate. Ceil:
21716 if (x2 < x)
21717 x2 += 1;
21718 if (HONOR_SIGNED_ZEROS (mode))
21719 return copysign (x2, x);
21720 return x2;
21721 */
21722 enum machine_mode mode = GET_MODE (operand0);
21723 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21724
21725 TWO52 = ix86_gen_TWO52 (mode);
21726
21727 /* Temporary for holding the result, initialized to the input
21728 operand to ease control flow. */
21729 res = gen_reg_rtx (mode);
21730 emit_move_insn (res, operand1);
21731
21732 /* xa = abs (operand1) */
21733 xa = ix86_expand_sse_fabs (res, &mask);
21734
21735 /* if (!isless (xa, TWO52)) goto label; */
21736 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21737
21738 /* xa = (double)(long)x */
21739 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21740 expand_fix (xi, res, 0);
21741 expand_float (xa, xi, 0);
21742
21743 /* generate 1.0 */
21744 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21745
21746 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21747 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21748 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21749 gen_rtx_AND (mode, one, tmp)));
21750 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21751 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21752 emit_move_insn (res, tmp);
21753
21754 if (HONOR_SIGNED_ZEROS (mode))
21755 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21756
21757 emit_label (label);
21758 LABEL_NUSES (label) = 1;
21759
21760 emit_move_insn (operand0, res);
21761 }
21762
21763 /* Expand SSE sequence for computing round from OPERAND1 storing
21764 into OPERAND0. Sequence that works without relying on DImode truncation
21765 via cvttsd2siq that is only available on 64bit targets. */
21766 void
21767 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21768 {
21769 /* C code for the stuff we expand below.
21770 double xa = fabs (x), xa2, x2;
21771 if (!isless (xa, TWO52))
21772 return x;
21773 Using the absolute value and copying back sign makes
21774 -0.0 -> -0.0 correct.
21775 xa2 = xa + TWO52 - TWO52;
21776 Compensate.
21777 dxa = xa2 - xa;
21778 if (dxa <= -0.5)
21779 xa2 += 1;
21780 else if (dxa > 0.5)
21781 xa2 -= 1;
21782 x2 = copysign (xa2, x);
21783 return x2;
21784 */
21785 enum machine_mode mode = GET_MODE (operand0);
21786 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21787
21788 TWO52 = ix86_gen_TWO52 (mode);
21789
21790 /* Temporary for holding the result, initialized to the input
21791 operand to ease control flow. */
21792 res = gen_reg_rtx (mode);
21793 emit_move_insn (res, operand1);
21794
21795 /* xa = abs (operand1) */
21796 xa = ix86_expand_sse_fabs (res, &mask);
21797
21798 /* if (!isless (xa, TWO52)) goto label; */
21799 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21800
21801 /* xa2 = xa + TWO52 - TWO52; */
21802 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21803 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21804
21805 /* dxa = xa2 - xa; */
21806 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21807
21808 /* generate 0.5, 1.0 and -0.5 */
21809 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21810 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21811 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21812 0, OPTAB_DIRECT);
21813
21814 /* Compensate. */
21815 tmp = gen_reg_rtx (mode);
21816 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21817 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21818 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21819 gen_rtx_AND (mode, one, tmp)));
21820 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21821 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21822 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21823 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21824 gen_rtx_AND (mode, one, tmp)));
21825 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21826
21827 /* res = copysign (xa2, operand1) */
21828 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21829
21830 emit_label (label);
21831 LABEL_NUSES (label) = 1;
21832
21833 emit_move_insn (operand0, res);
21834 }
21835
21836 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21837 into OPERAND0. */
21838 void
21839 ix86_expand_trunc (rtx operand0, rtx operand1)
21840 {
21841 /* C code for SSE variant we expand below.
21842 double xa = fabs (x), x2;
21843 if (!isless (xa, TWO52))
21844 return x;
21845 x2 = (double)(long)x;
21846 if (HONOR_SIGNED_ZEROS (mode))
21847 return copysign (x2, x);
21848 return x2;
21849 */
21850 enum machine_mode mode = GET_MODE (operand0);
21851 rtx xa, xi, TWO52, label, res, mask;
21852
21853 TWO52 = ix86_gen_TWO52 (mode);
21854
21855 /* Temporary for holding the result, initialized to the input
21856 operand to ease control flow. */
21857 res = gen_reg_rtx (mode);
21858 emit_move_insn (res, operand1);
21859
21860 /* xa = abs (operand1) */
21861 xa = ix86_expand_sse_fabs (res, &mask);
21862
21863 /* if (!isless (xa, TWO52)) goto label; */
21864 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21865
21866 /* x = (double)(long)x */
21867 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21868 expand_fix (xi, res, 0);
21869 expand_float (res, xi, 0);
21870
21871 if (HONOR_SIGNED_ZEROS (mode))
21872 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21873
21874 emit_label (label);
21875 LABEL_NUSES (label) = 1;
21876
21877 emit_move_insn (operand0, res);
21878 }
21879
21880 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21881 into OPERAND0. */
21882 void
21883 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21884 {
21885 enum machine_mode mode = GET_MODE (operand0);
21886 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21887
21888 /* C code for SSE variant we expand below.
21889 double xa = fabs (x), x2;
21890 if (!isless (xa, TWO52))
21891 return x;
21892 xa2 = xa + TWO52 - TWO52;
21893 Compensate:
21894 if (xa2 > xa)
21895 xa2 -= 1.0;
21896 x2 = copysign (xa2, x);
21897 return x2;
21898 */
21899
21900 TWO52 = ix86_gen_TWO52 (mode);
21901
21902 /* Temporary for holding the result, initialized to the input
21903 operand to ease control flow. */
21904 res = gen_reg_rtx (mode);
21905 emit_move_insn (res, operand1);
21906
21907 /* xa = abs (operand1) */
21908 xa = ix86_expand_sse_fabs (res, &smask);
21909
21910 /* if (!isless (xa, TWO52)) goto label; */
21911 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21912
21913 /* res = xa + TWO52 - TWO52; */
21914 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21915 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21916 emit_move_insn (res, tmp);
21917
21918 /* generate 1.0 */
21919 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21920
21921 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21922 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21923 emit_insn (gen_rtx_SET (VOIDmode, mask,
21924 gen_rtx_AND (mode, mask, one)));
21925 tmp = expand_simple_binop (mode, MINUS,
21926 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21927 emit_move_insn (res, tmp);
21928
21929 /* res = copysign (res, operand1) */
21930 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21931
21932 emit_label (label);
21933 LABEL_NUSES (label) = 1;
21934
21935 emit_move_insn (operand0, res);
21936 }
21937
21938 /* Expand SSE sequence for computing round from OPERAND1 storing
21939 into OPERAND0. */
21940 void
21941 ix86_expand_round (rtx operand0, rtx operand1)
21942 {
21943 /* C code for the stuff we're doing below:
21944 double xa = fabs (x);
21945 if (!isless (xa, TWO52))
21946 return x;
21947 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21948 return copysign (xa, x);
21949 */
21950 enum machine_mode mode = GET_MODE (operand0);
21951 rtx res, TWO52, xa, label, xi, half, mask;
21952 const struct real_format *fmt;
21953 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21954
21955 /* Temporary for holding the result, initialized to the input
21956 operand to ease control flow. */
21957 res = gen_reg_rtx (mode);
21958 emit_move_insn (res, operand1);
21959
21960 TWO52 = ix86_gen_TWO52 (mode);
21961 xa = ix86_expand_sse_fabs (res, &mask);
21962 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21963
21964 /* load nextafter (0.5, 0.0) */
21965 fmt = REAL_MODE_FORMAT (mode);
21966 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21967 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21968
21969 /* xa = xa + 0.5 */
21970 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21971 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21972
21973 /* xa = (double)(int64_t)xa */
21974 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21975 expand_fix (xi, xa, 0);
21976 expand_float (xa, xi, 0);
21977
21978 /* res = copysign (xa, operand1) */
21979 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21980
21981 emit_label (label);
21982 LABEL_NUSES (label) = 1;
21983
21984 emit_move_insn (operand0, res);
21985 }
21986
21987 \f
21988 /* Table of valid machine attributes. */
21989 static const struct attribute_spec ix86_attribute_table[] =
21990 {
21991 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21992 /* Stdcall attribute says callee is responsible for popping arguments
21993 if they are not variable. */
21994 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21995 /* Fastcall attribute says callee is responsible for popping arguments
21996 if they are not variable. */
21997 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21998 /* Cdecl attribute says the callee is a normal C declaration */
21999 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22000 /* Regparm attribute specifies how many integer arguments are to be
22001 passed in registers. */
22002 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
22003 /* Sseregparm attribute says we are using x86_64 calling conventions
22004 for FP arguments. */
22005 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
22006 /* force_align_arg_pointer says this function realigns the stack at entry. */
22007 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
22008 false, true, true, ix86_handle_cconv_attribute },
22009 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22010 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
22011 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
22012 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
22013 #endif
22014 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22015 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
22016 #ifdef SUBTARGET_ATTRIBUTE_TABLE
22017 SUBTARGET_ATTRIBUTE_TABLE,
22018 #endif
22019 { NULL, 0, 0, false, false, false, NULL }
22020 };
22021
22022 /* Initialize the GCC target structure. */
22023 #undef TARGET_ATTRIBUTE_TABLE
22024 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
22025 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22026 # undef TARGET_MERGE_DECL_ATTRIBUTES
22027 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
22028 #endif
22029
22030 #undef TARGET_COMP_TYPE_ATTRIBUTES
22031 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
22032
22033 #undef TARGET_INIT_BUILTINS
22034 #define TARGET_INIT_BUILTINS ix86_init_builtins
22035 #undef TARGET_EXPAND_BUILTIN
22036 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
22037
22038 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
22039 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
22040 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
22041 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
22042
22043 #undef TARGET_ASM_FUNCTION_EPILOGUE
22044 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
22045
22046 #undef TARGET_ENCODE_SECTION_INFO
22047 #ifndef SUBTARGET_ENCODE_SECTION_INFO
22048 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
22049 #else
22050 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
22051 #endif
22052
22053 #undef TARGET_ASM_OPEN_PAREN
22054 #define TARGET_ASM_OPEN_PAREN ""
22055 #undef TARGET_ASM_CLOSE_PAREN
22056 #define TARGET_ASM_CLOSE_PAREN ""
22057
22058 #undef TARGET_ASM_ALIGNED_HI_OP
22059 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22060 #undef TARGET_ASM_ALIGNED_SI_OP
22061 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22062 #ifdef ASM_QUAD
22063 #undef TARGET_ASM_ALIGNED_DI_OP
22064 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22065 #endif
22066
22067 #undef TARGET_ASM_UNALIGNED_HI_OP
22068 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22069 #undef TARGET_ASM_UNALIGNED_SI_OP
22070 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22071 #undef TARGET_ASM_UNALIGNED_DI_OP
22072 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22073
22074 #undef TARGET_SCHED_ADJUST_COST
22075 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22076 #undef TARGET_SCHED_ISSUE_RATE
22077 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22078 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22079 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22080 ia32_multipass_dfa_lookahead
22081
22082 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22083 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22084
22085 #ifdef HAVE_AS_TLS
22086 #undef TARGET_HAVE_TLS
22087 #define TARGET_HAVE_TLS true
22088 #endif
22089 #undef TARGET_CANNOT_FORCE_CONST_MEM
22090 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22091 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22092 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22093
22094 #undef TARGET_DELEGITIMIZE_ADDRESS
22095 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22096
22097 #undef TARGET_MS_BITFIELD_LAYOUT_P
22098 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22099
22100 #if TARGET_MACHO
22101 #undef TARGET_BINDS_LOCAL_P
22102 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22103 #endif
22104 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22105 #undef TARGET_BINDS_LOCAL_P
22106 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22107 #endif
22108
22109 #undef TARGET_ASM_OUTPUT_MI_THUNK
22110 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22111 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22112 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22113
22114 #undef TARGET_ASM_FILE_START
22115 #define TARGET_ASM_FILE_START x86_file_start
22116
22117 #undef TARGET_DEFAULT_TARGET_FLAGS
22118 #define TARGET_DEFAULT_TARGET_FLAGS \
22119 (TARGET_DEFAULT \
22120 | TARGET_64BIT_DEFAULT \
22121 | TARGET_SUBTARGET_DEFAULT \
22122 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22123
22124 #undef TARGET_HANDLE_OPTION
22125 #define TARGET_HANDLE_OPTION ix86_handle_option
22126
22127 #undef TARGET_RTX_COSTS
22128 #define TARGET_RTX_COSTS ix86_rtx_costs
22129 #undef TARGET_ADDRESS_COST
22130 #define TARGET_ADDRESS_COST ix86_address_cost
22131
22132 #undef TARGET_FIXED_CONDITION_CODE_REGS
22133 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22134 #undef TARGET_CC_MODES_COMPATIBLE
22135 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22136
22137 #undef TARGET_MACHINE_DEPENDENT_REORG
22138 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22139
22140 #undef TARGET_BUILD_BUILTIN_VA_LIST
22141 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22142
22143 #undef TARGET_MD_ASM_CLOBBERS
22144 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22145
22146 #undef TARGET_PROMOTE_PROTOTYPES
22147 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22148 #undef TARGET_STRUCT_VALUE_RTX
22149 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22150 #undef TARGET_SETUP_INCOMING_VARARGS
22151 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22152 #undef TARGET_MUST_PASS_IN_STACK
22153 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22154 #undef TARGET_PASS_BY_REFERENCE
22155 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22156 #undef TARGET_INTERNAL_ARG_POINTER
22157 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22158 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22159 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22160 #undef TARGET_STRICT_ARGUMENT_NAMING
22161 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22162
22163 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22164 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22165
22166 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22167 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22168
22169 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22170 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22171
22172 #ifdef HAVE_AS_TLS
22173 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22174 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22175 #endif
22176
22177 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22178 #undef TARGET_INSERT_ATTRIBUTES
22179 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22180 #endif
22181
22182 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22183 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22184
22185 #undef TARGET_STACK_PROTECT_FAIL
22186 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22187
22188 #undef TARGET_FUNCTION_VALUE
22189 #define TARGET_FUNCTION_VALUE ix86_function_value
22190
22191 struct gcc_target targetm = TARGET_INITIALIZER;
22192 \f
22193 #include "gt-i386.h"