i386.h (override_options): Conditionally disable x86_sahf for 64bit targets only.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC32,
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1196 };
1197
1198 /* Feature tests against the various architecture variations. */
1199 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1200 /* X86_ARCH_CMOVE */
1201 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1202
1203 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1204 ~m_386,
1205
1206 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1207 ~(m_386 | m_486),
1208
1209 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1210 ~m_386,
1211
1212 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1213 ~m_386,
1214 };
1215
1216 static const unsigned int x86_accumulate_outgoing_args
1217 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1218
1219 static const unsigned int x86_arch_always_fancy_math_387
1220 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1221 | m_NOCONA | m_CORE2 | m_GENERIC;
1222
1223 static enum stringop_alg stringop_alg = no_stringop;
1224
1225 /* In case the average insn count for single function invocation is
1226 lower than this constant, emit fast (but longer) prologue and
1227 epilogue code. */
1228 #define FAST_PROLOGUE_INSN_COUNT 20
1229
1230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1231 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1232 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1233 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1234
1235 /* Array of the smallest class containing reg number REGNO, indexed by
1236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1237
1238 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1239 {
1240 /* ax, dx, cx, bx */
1241 AREG, DREG, CREG, BREG,
1242 /* si, di, bp, sp */
1243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1244 /* FP registers */
1245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1247 /* arg pointer */
1248 NON_Q_REGS,
1249 /* flags, fpsr, fpcr, frame */
1250 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1252 SSE_REGS, SSE_REGS,
1253 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1254 MMX_REGS, MMX_REGS,
1255 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1256 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1257 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1258 SSE_REGS, SSE_REGS,
1259 };
1260
1261 /* The "default" register map used in 32bit mode. */
1262
1263 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1264 {
1265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1272 };
1273
1274 static int const x86_64_int_parameter_registers[6] =
1275 {
1276 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1277 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1278 };
1279
1280 static int const x86_64_int_return_registers[4] =
1281 {
1282 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1283 };
1284
1285 /* The "default" register map used in 64bit mode. */
1286 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1287 {
1288 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1289 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1290 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1291 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1292 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1293 8,9,10,11,12,13,14,15, /* extended integer registers */
1294 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1295 };
1296
1297 /* Define the register numbers to be used in Dwarf debugging information.
1298 The SVR4 reference port C compiler uses the following register numbers
1299 in its Dwarf output code:
1300 0 for %eax (gcc regno = 0)
1301 1 for %ecx (gcc regno = 2)
1302 2 for %edx (gcc regno = 1)
1303 3 for %ebx (gcc regno = 3)
1304 4 for %esp (gcc regno = 7)
1305 5 for %ebp (gcc regno = 6)
1306 6 for %esi (gcc regno = 4)
1307 7 for %edi (gcc regno = 5)
1308 The following three DWARF register numbers are never generated by
1309 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1310 believes these numbers have these meanings.
1311 8 for %eip (no gcc equivalent)
1312 9 for %eflags (gcc regno = 17)
1313 10 for %trapno (no gcc equivalent)
1314 It is not at all clear how we should number the FP stack registers
1315 for the x86 architecture. If the version of SDB on x86/svr4 were
1316 a bit less brain dead with respect to floating-point then we would
1317 have a precedent to follow with respect to DWARF register numbers
1318 for x86 FP registers, but the SDB on x86/svr4 is so completely
1319 broken with respect to FP registers that it is hardly worth thinking
1320 of it as something to strive for compatibility with.
1321 The version of x86/svr4 SDB I have at the moment does (partially)
1322 seem to believe that DWARF register number 11 is associated with
1323 the x86 register %st(0), but that's about all. Higher DWARF
1324 register numbers don't seem to be associated with anything in
1325 particular, and even for DWARF regno 11, SDB only seems to under-
1326 stand that it should say that a variable lives in %st(0) (when
1327 asked via an `=' command) if we said it was in DWARF regno 11,
1328 but SDB still prints garbage when asked for the value of the
1329 variable in question (via a `/' command).
1330 (Also note that the labels SDB prints for various FP stack regs
1331 when doing an `x' command are all wrong.)
1332 Note that these problems generally don't affect the native SVR4
1333 C compiler because it doesn't allow the use of -O with -g and
1334 because when it is *not* optimizing, it allocates a memory
1335 location for each floating-point variable, and the memory
1336 location is what gets described in the DWARF AT_location
1337 attribute for the variable in question.
1338 Regardless of the severe mental illness of the x86/svr4 SDB, we
1339 do something sensible here and we use the following DWARF
1340 register numbers. Note that these are all stack-top-relative
1341 numbers.
1342 11 for %st(0) (gcc regno = 8)
1343 12 for %st(1) (gcc regno = 9)
1344 13 for %st(2) (gcc regno = 10)
1345 14 for %st(3) (gcc regno = 11)
1346 15 for %st(4) (gcc regno = 12)
1347 16 for %st(5) (gcc regno = 13)
1348 17 for %st(6) (gcc regno = 14)
1349 18 for %st(7) (gcc regno = 15)
1350 */
1351 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1352 {
1353 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1354 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1355 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1360 };
1361
1362 /* Test and compare insns in i386.md store the information needed to
1363 generate branch and scc insns here. */
1364
1365 rtx ix86_compare_op0 = NULL_RTX;
1366 rtx ix86_compare_op1 = NULL_RTX;
1367 rtx ix86_compare_emitted = NULL_RTX;
1368
1369 /* Size of the register save area. */
1370 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1371
1372 /* Define the structure for the machine field in struct function. */
1373
1374 struct stack_local_entry GTY(())
1375 {
1376 unsigned short mode;
1377 unsigned short n;
1378 rtx rtl;
1379 struct stack_local_entry *next;
1380 };
1381
1382 /* Structure describing stack frame layout.
1383 Stack grows downward:
1384
1385 [arguments]
1386 <- ARG_POINTER
1387 saved pc
1388
1389 saved frame pointer if frame_pointer_needed
1390 <- HARD_FRAME_POINTER
1391 [saved regs]
1392
1393 [padding1] \
1394 )
1395 [va_arg registers] (
1396 > to_allocate <- FRAME_POINTER
1397 [frame] (
1398 )
1399 [padding2] /
1400 */
1401 struct ix86_frame
1402 {
1403 int nregs;
1404 int padding1;
1405 int va_arg_size;
1406 HOST_WIDE_INT frame;
1407 int padding2;
1408 int outgoing_arguments_size;
1409 int red_zone_size;
1410
1411 HOST_WIDE_INT to_allocate;
1412 /* The offsets relative to ARG_POINTER. */
1413 HOST_WIDE_INT frame_pointer_offset;
1414 HOST_WIDE_INT hard_frame_pointer_offset;
1415 HOST_WIDE_INT stack_pointer_offset;
1416
1417 /* When save_regs_using_mov is set, emit prologue using
1418 move instead of push instructions. */
1419 bool save_regs_using_mov;
1420 };
1421
1422 /* Code model option. */
1423 enum cmodel ix86_cmodel;
1424 /* Asm dialect. */
1425 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1426 /* TLS dialects. */
1427 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1428
1429 /* Which unit we are generating floating point math for. */
1430 enum fpmath_unit ix86_fpmath;
1431
1432 /* Which cpu are we scheduling for. */
1433 enum processor_type ix86_tune;
1434
1435 /* Which instruction set architecture to use. */
1436 enum processor_type ix86_arch;
1437
1438 /* true if sse prefetch instruction is not NOOP. */
1439 int x86_prefetch_sse;
1440
1441 /* true if cmpxchg16b is supported. */
1442 int x86_cmpxchg16b;
1443
1444 /* true if sahf is supported. Early Intel CPUs with Intel 64
1445 lacked LAHF and SAHF instructions supported by AMD64 until
1446 introduction of Pentium 4 G1 step in December 2005. */
1447 int x86_sahf;
1448
1449 /* ix86_regparm_string as a number */
1450 static int ix86_regparm;
1451
1452 /* -mstackrealign option */
1453 extern int ix86_force_align_arg_pointer;
1454 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1455
1456 /* Preferred alignment for stack boundary in bits. */
1457 unsigned int ix86_preferred_stack_boundary;
1458
1459 /* Values 1-5: see jump.c */
1460 int ix86_branch_cost;
1461
1462 /* Variables which are this size or smaller are put in the data/bss
1463 or ldata/lbss sections. */
1464
1465 int ix86_section_threshold = 65536;
1466
1467 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1468 char internal_label_prefix[16];
1469 int internal_label_prefix_len;
1470 \f
1471 static bool ix86_handle_option (size_t, const char *, int);
1472 static void output_pic_addr_const (FILE *, rtx, int);
1473 static void put_condition_code (enum rtx_code, enum machine_mode,
1474 int, int, FILE *);
1475 static const char *get_some_local_dynamic_name (void);
1476 static int get_some_local_dynamic_name_1 (rtx *, void *);
1477 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1478 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1479 rtx *);
1480 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1481 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1482 enum machine_mode);
1483 static rtx get_thread_pointer (int);
1484 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1485 static void get_pc_thunk_name (char [32], unsigned int);
1486 static rtx gen_push (rtx);
1487 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1488 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1489 static struct machine_function * ix86_init_machine_status (void);
1490 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1491 static int ix86_nsaved_regs (void);
1492 static void ix86_emit_save_regs (void);
1493 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1494 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1495 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1496 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1497 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1498 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1499 static int ix86_issue_rate (void);
1500 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1501 static int ia32_multipass_dfa_lookahead (void);
1502 static void ix86_init_mmx_sse_builtins (void);
1503 static rtx x86_this_parameter (tree);
1504 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1505 HOST_WIDE_INT, tree);
1506 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1507 static void x86_file_start (void);
1508 static void ix86_reorg (void);
1509 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1510 static tree ix86_build_builtin_va_list (void);
1511 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1512 tree, int *, int);
1513 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1514 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1515 static bool ix86_vector_mode_supported_p (enum machine_mode);
1516
1517 static int ix86_address_cost (rtx);
1518 static bool ix86_cannot_force_const_mem (rtx);
1519 static rtx ix86_delegitimize_address (rtx);
1520
1521 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1522
1523 struct builtin_description;
1524 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1525 tree, rtx);
1526 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1527 tree, rtx);
1528 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1529 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1530 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1531 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1532 static rtx safe_vector_operand (rtx, enum machine_mode);
1533 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1534 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1535 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1536 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1537 static int ix86_fp_comparison_cost (enum rtx_code code);
1538 static unsigned int ix86_select_alt_pic_regnum (void);
1539 static int ix86_save_reg (unsigned int, int);
1540 static void ix86_compute_frame_layout (struct ix86_frame *);
1541 static int ix86_comp_type_attributes (tree, tree);
1542 static int ix86_function_regparm (tree, tree);
1543 const struct attribute_spec ix86_attribute_table[];
1544 static bool ix86_function_ok_for_sibcall (tree, tree);
1545 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1546 static int ix86_value_regno (enum machine_mode, tree, tree);
1547 static bool contains_128bit_aligned_vector_p (tree);
1548 static rtx ix86_struct_value_rtx (tree, int);
1549 static bool ix86_ms_bitfield_layout_p (tree);
1550 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1551 static int extended_reg_mentioned_1 (rtx *, void *);
1552 static bool ix86_rtx_costs (rtx, int, int, int *);
1553 static int min_insn_size (rtx);
1554 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1555 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1556 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1557 tree, bool);
1558 static void ix86_init_builtins (void);
1559 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1560 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1561 static tree ix86_builtin_conversion (enum tree_code, tree);
1562 static const char *ix86_mangle_fundamental_type (tree);
1563 static tree ix86_stack_protect_fail (void);
1564 static rtx ix86_internal_arg_pointer (void);
1565 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1566 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1567 rtx, rtx, int);
1568
1569 /* This function is only used on Solaris. */
1570 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1571 ATTRIBUTE_UNUSED;
1572
1573 /* Register class used for passing given 64bit part of the argument.
1574 These represent classes as documented by the PS ABI, with the exception
1575 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1576 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1577
1578 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1579 whenever possible (upper half does contain padding).
1580 */
1581 enum x86_64_reg_class
1582 {
1583 X86_64_NO_CLASS,
1584 X86_64_INTEGER_CLASS,
1585 X86_64_INTEGERSI_CLASS,
1586 X86_64_SSE_CLASS,
1587 X86_64_SSESF_CLASS,
1588 X86_64_SSEDF_CLASS,
1589 X86_64_SSEUP_CLASS,
1590 X86_64_X87_CLASS,
1591 X86_64_X87UP_CLASS,
1592 X86_64_COMPLEX_X87_CLASS,
1593 X86_64_MEMORY_CLASS
1594 };
1595 static const char * const x86_64_reg_class_name[] = {
1596 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1597 "sseup", "x87", "x87up", "cplx87", "no"
1598 };
1599
1600 #define MAX_CLASSES 4
1601
1602 /* Table of constants used by fldpi, fldln2, etc.... */
1603 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1604 static bool ext_80387_constants_init = 0;
1605 static void init_ext_80387_constants (void);
1606 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1607 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1608 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1609 static section *x86_64_elf_select_section (tree decl, int reloc,
1610 unsigned HOST_WIDE_INT align)
1611 ATTRIBUTE_UNUSED;
1612 \f
1613 /* Initialize the GCC target structure. */
1614 #undef TARGET_ATTRIBUTE_TABLE
1615 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1616 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1617 # undef TARGET_MERGE_DECL_ATTRIBUTES
1618 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1619 #endif
1620
1621 #undef TARGET_COMP_TYPE_ATTRIBUTES
1622 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1623
1624 #undef TARGET_INIT_BUILTINS
1625 #define TARGET_INIT_BUILTINS ix86_init_builtins
1626 #undef TARGET_EXPAND_BUILTIN
1627 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1628
1629 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1630 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1631 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1632 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1633
1634 #undef TARGET_ASM_FUNCTION_EPILOGUE
1635 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1636
1637 #undef TARGET_ENCODE_SECTION_INFO
1638 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1639 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1640 #else
1641 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1642 #endif
1643
1644 #undef TARGET_ASM_OPEN_PAREN
1645 #define TARGET_ASM_OPEN_PAREN ""
1646 #undef TARGET_ASM_CLOSE_PAREN
1647 #define TARGET_ASM_CLOSE_PAREN ""
1648
1649 #undef TARGET_ASM_ALIGNED_HI_OP
1650 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1651 #undef TARGET_ASM_ALIGNED_SI_OP
1652 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1653 #ifdef ASM_QUAD
1654 #undef TARGET_ASM_ALIGNED_DI_OP
1655 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1656 #endif
1657
1658 #undef TARGET_ASM_UNALIGNED_HI_OP
1659 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1660 #undef TARGET_ASM_UNALIGNED_SI_OP
1661 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1662 #undef TARGET_ASM_UNALIGNED_DI_OP
1663 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1664
1665 #undef TARGET_SCHED_ADJUST_COST
1666 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1667 #undef TARGET_SCHED_ISSUE_RATE
1668 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1671 ia32_multipass_dfa_lookahead
1672
1673 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1674 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1675
1676 #ifdef HAVE_AS_TLS
1677 #undef TARGET_HAVE_TLS
1678 #define TARGET_HAVE_TLS true
1679 #endif
1680 #undef TARGET_CANNOT_FORCE_CONST_MEM
1681 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1682 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1683 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1684
1685 #undef TARGET_DELEGITIMIZE_ADDRESS
1686 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1687
1688 #undef TARGET_MS_BITFIELD_LAYOUT_P
1689 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1690
1691 #if TARGET_MACHO
1692 #undef TARGET_BINDS_LOCAL_P
1693 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1694 #endif
1695
1696 #undef TARGET_ASM_OUTPUT_MI_THUNK
1697 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1698 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1699 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1700
1701 #undef TARGET_ASM_FILE_START
1702 #define TARGET_ASM_FILE_START x86_file_start
1703
1704 #undef TARGET_DEFAULT_TARGET_FLAGS
1705 #define TARGET_DEFAULT_TARGET_FLAGS \
1706 (TARGET_DEFAULT \
1707 | TARGET_64BIT_DEFAULT \
1708 | TARGET_SUBTARGET_DEFAULT \
1709 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1710
1711 #undef TARGET_HANDLE_OPTION
1712 #define TARGET_HANDLE_OPTION ix86_handle_option
1713
1714 #undef TARGET_RTX_COSTS
1715 #define TARGET_RTX_COSTS ix86_rtx_costs
1716 #undef TARGET_ADDRESS_COST
1717 #define TARGET_ADDRESS_COST ix86_address_cost
1718
1719 #undef TARGET_FIXED_CONDITION_CODE_REGS
1720 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1721 #undef TARGET_CC_MODES_COMPATIBLE
1722 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1723
1724 #undef TARGET_MACHINE_DEPENDENT_REORG
1725 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1726
1727 #undef TARGET_BUILD_BUILTIN_VA_LIST
1728 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1729
1730 #undef TARGET_MD_ASM_CLOBBERS
1731 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1732
1733 #undef TARGET_PROMOTE_PROTOTYPES
1734 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1735 #undef TARGET_STRUCT_VALUE_RTX
1736 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1737 #undef TARGET_SETUP_INCOMING_VARARGS
1738 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1739 #undef TARGET_MUST_PASS_IN_STACK
1740 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1741 #undef TARGET_PASS_BY_REFERENCE
1742 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1743 #undef TARGET_INTERNAL_ARG_POINTER
1744 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1745 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1746 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1747
1748 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1749 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1750
1751 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1752 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1753
1754 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1755 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1756
1757 #ifdef HAVE_AS_TLS
1758 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1759 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1760 #endif
1761
1762 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1763 #undef TARGET_INSERT_ATTRIBUTES
1764 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1765 #endif
1766
1767 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1768 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1769
1770 #undef TARGET_STACK_PROTECT_FAIL
1771 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1772
1773 #undef TARGET_FUNCTION_VALUE
1774 #define TARGET_FUNCTION_VALUE ix86_function_value
1775
1776 struct gcc_target targetm = TARGET_INITIALIZER;
1777
1778 \f
1779 /* The svr4 ABI for the i386 says that records and unions are returned
1780 in memory. */
1781 #ifndef DEFAULT_PCC_STRUCT_RETURN
1782 #define DEFAULT_PCC_STRUCT_RETURN 1
1783 #endif
1784
1785 /* Implement TARGET_HANDLE_OPTION. */
1786
1787 static bool
1788 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1789 {
1790 switch (code)
1791 {
1792 case OPT_m3dnow:
1793 if (!value)
1794 {
1795 target_flags &= ~MASK_3DNOW_A;
1796 target_flags_explicit |= MASK_3DNOW_A;
1797 }
1798 return true;
1799
1800 case OPT_mmmx:
1801 if (!value)
1802 {
1803 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1804 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1805 }
1806 return true;
1807
1808 case OPT_msse:
1809 if (!value)
1810 {
1811 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1812 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1813 }
1814 return true;
1815
1816 case OPT_msse2:
1817 if (!value)
1818 {
1819 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1820 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1821 }
1822 return true;
1823
1824 case OPT_msse3:
1825 if (!value)
1826 {
1827 target_flags &= ~MASK_SSE4A;
1828 target_flags_explicit |= MASK_SSE4A;
1829 }
1830 return true;
1831
1832 default:
1833 return true;
1834 }
1835 }
1836
1837 /* Sometimes certain combinations of command options do not make
1838 sense on a particular target machine. You can define a macro
1839 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1840 defined, is executed once just after all the command options have
1841 been parsed.
1842
1843 Don't use this macro to turn on various extra optimizations for
1844 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1845
1846 void
1847 override_options (void)
1848 {
1849 int i;
1850 int ix86_tune_defaulted = 0;
1851 unsigned int ix86_arch_mask, ix86_tune_mask;
1852
1853 /* Comes from final.c -- no real reason to change it. */
1854 #define MAX_CODE_ALIGN 16
1855
1856 static struct ptt
1857 {
1858 const struct processor_costs *cost; /* Processor costs */
1859 const int target_enable; /* Target flags to enable. */
1860 const int target_disable; /* Target flags to disable. */
1861 const int align_loop; /* Default alignments. */
1862 const int align_loop_max_skip;
1863 const int align_jump;
1864 const int align_jump_max_skip;
1865 const int align_func;
1866 }
1867 const processor_target_table[PROCESSOR_max] =
1868 {
1869 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1870 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1871 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1872 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1873 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1874 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1875 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1876 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1877 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1878 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1879 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1880 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1881 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1882 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1883 };
1884
1885 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1886 static struct pta
1887 {
1888 const char *const name; /* processor name or nickname. */
1889 const enum processor_type processor;
1890 const enum pta_flags
1891 {
1892 PTA_SSE = 1 << 0,
1893 PTA_SSE2 = 1 << 1,
1894 PTA_SSE3 = 1 << 2,
1895 PTA_MMX = 1 << 3,
1896 PTA_PREFETCH_SSE = 1 << 4,
1897 PTA_3DNOW = 1 << 5,
1898 PTA_3DNOW_A = 1 << 6,
1899 PTA_64BIT = 1 << 7,
1900 PTA_SSSE3 = 1 << 8,
1901 PTA_CX16 = 1 << 9,
1902 PTA_POPCNT = 1 << 10,
1903 PTA_ABM = 1 << 11,
1904 PTA_SSE4A = 1 << 12,
1905 PTA_NO_SAHF = 1 << 13
1906 } flags;
1907 }
1908 const processor_alias_table[] =
1909 {
1910 {"i386", PROCESSOR_I386, 0},
1911 {"i486", PROCESSOR_I486, 0},
1912 {"i586", PROCESSOR_PENTIUM, 0},
1913 {"pentium", PROCESSOR_PENTIUM, 0},
1914 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1915 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1916 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1917 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1918 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1919 {"i686", PROCESSOR_PENTIUMPRO, 0},
1920 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1921 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1922 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1923 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1924 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1925 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1926 | PTA_MMX | PTA_PREFETCH_SSE},
1927 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1928 | PTA_MMX | PTA_PREFETCH_SSE},
1929 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1930 | PTA_MMX | PTA_PREFETCH_SSE},
1931 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1932 | PTA_MMX | PTA_PREFETCH_SSE
1933 | PTA_CX16 | PTA_NO_SAHF},
1934 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1935 | PTA_64BIT | PTA_MMX
1936 | PTA_PREFETCH_SSE | PTA_CX16},
1937 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1938 | PTA_3DNOW_A},
1939 {"k6", PROCESSOR_K6, PTA_MMX},
1940 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1941 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1942 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1943 | PTA_3DNOW_A},
1944 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1945 | PTA_3DNOW | PTA_3DNOW_A},
1946 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1947 | PTA_3DNOW_A | PTA_SSE},
1948 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1949 | PTA_3DNOW_A | PTA_SSE},
1950 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1951 | PTA_3DNOW_A | PTA_SSE},
1952 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1953 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1954 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1955 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1956 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1957 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1958 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1959 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1960 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1961 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1962 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1963 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1964 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1965 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1966 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1967 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1968 };
1969
1970 int const pta_size = ARRAY_SIZE (processor_alias_table);
1971
1972 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1973 SUBTARGET_OVERRIDE_OPTIONS;
1974 #endif
1975
1976 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1977 SUBSUBTARGET_OVERRIDE_OPTIONS;
1978 #endif
1979
1980 /* -fPIC is the default for x86_64. */
1981 if (TARGET_MACHO && TARGET_64BIT)
1982 flag_pic = 2;
1983
1984 /* Set the default values for switches whose default depends on TARGET_64BIT
1985 in case they weren't overwritten by command line options. */
1986 if (TARGET_64BIT)
1987 {
1988 /* Mach-O doesn't support omitting the frame pointer for now. */
1989 if (flag_omit_frame_pointer == 2)
1990 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1991 if (flag_asynchronous_unwind_tables == 2)
1992 flag_asynchronous_unwind_tables = 1;
1993 if (flag_pcc_struct_return == 2)
1994 flag_pcc_struct_return = 0;
1995 }
1996 else
1997 {
1998 if (flag_omit_frame_pointer == 2)
1999 flag_omit_frame_pointer = 0;
2000 if (flag_asynchronous_unwind_tables == 2)
2001 flag_asynchronous_unwind_tables = 0;
2002 if (flag_pcc_struct_return == 2)
2003 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2004 }
2005
2006 /* Need to check -mtune=generic first. */
2007 if (ix86_tune_string)
2008 {
2009 if (!strcmp (ix86_tune_string, "generic")
2010 || !strcmp (ix86_tune_string, "i686")
2011 /* As special support for cross compilers we read -mtune=native
2012 as -mtune=generic. With native compilers we won't see the
2013 -mtune=native, as it was changed by the driver. */
2014 || !strcmp (ix86_tune_string, "native"))
2015 {
2016 if (TARGET_64BIT)
2017 ix86_tune_string = "generic64";
2018 else
2019 ix86_tune_string = "generic32";
2020 }
2021 else if (!strncmp (ix86_tune_string, "generic", 7))
2022 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2023 }
2024 else
2025 {
2026 if (ix86_arch_string)
2027 ix86_tune_string = ix86_arch_string;
2028 if (!ix86_tune_string)
2029 {
2030 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2031 ix86_tune_defaulted = 1;
2032 }
2033
2034 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2035 need to use a sensible tune option. */
2036 if (!strcmp (ix86_tune_string, "generic")
2037 || !strcmp (ix86_tune_string, "x86-64")
2038 || !strcmp (ix86_tune_string, "i686"))
2039 {
2040 if (TARGET_64BIT)
2041 ix86_tune_string = "generic64";
2042 else
2043 ix86_tune_string = "generic32";
2044 }
2045 }
2046 if (ix86_stringop_string)
2047 {
2048 if (!strcmp (ix86_stringop_string, "rep_byte"))
2049 stringop_alg = rep_prefix_1_byte;
2050 else if (!strcmp (ix86_stringop_string, "libcall"))
2051 stringop_alg = libcall;
2052 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2053 stringop_alg = rep_prefix_4_byte;
2054 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2055 stringop_alg = rep_prefix_8_byte;
2056 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2057 stringop_alg = loop_1_byte;
2058 else if (!strcmp (ix86_stringop_string, "loop"))
2059 stringop_alg = loop;
2060 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2061 stringop_alg = unrolled_loop;
2062 else
2063 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2064 }
2065 if (!strcmp (ix86_tune_string, "x86-64"))
2066 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2067 "-mtune=generic instead as appropriate.");
2068
2069 if (!ix86_arch_string)
2070 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2071 if (!strcmp (ix86_arch_string, "generic"))
2072 error ("generic CPU can be used only for -mtune= switch");
2073 if (!strncmp (ix86_arch_string, "generic", 7))
2074 error ("bad value (%s) for -march= switch", ix86_arch_string);
2075
2076 if (ix86_cmodel_string != 0)
2077 {
2078 if (!strcmp (ix86_cmodel_string, "small"))
2079 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2080 else if (!strcmp (ix86_cmodel_string, "medium"))
2081 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2082 else if (!strcmp (ix86_cmodel_string, "large"))
2083 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2084 else if (flag_pic)
2085 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2086 else if (!strcmp (ix86_cmodel_string, "32"))
2087 ix86_cmodel = CM_32;
2088 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2089 ix86_cmodel = CM_KERNEL;
2090 else
2091 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2092 }
2093 else
2094 {
2095 ix86_cmodel = CM_32;
2096 if (TARGET_64BIT)
2097 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2098 }
2099 if (ix86_asm_string != 0)
2100 {
2101 if (! TARGET_MACHO
2102 && !strcmp (ix86_asm_string, "intel"))
2103 ix86_asm_dialect = ASM_INTEL;
2104 else if (!strcmp (ix86_asm_string, "att"))
2105 ix86_asm_dialect = ASM_ATT;
2106 else
2107 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2108 }
2109 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2110 error ("code model %qs not supported in the %s bit mode",
2111 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2112 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2113 sorry ("%i-bit mode not compiled in",
2114 (target_flags & MASK_64BIT) ? 64 : 32);
2115
2116 for (i = 0; i < pta_size; i++)
2117 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2118 {
2119 ix86_arch = processor_alias_table[i].processor;
2120 /* Default cpu tuning to the architecture. */
2121 ix86_tune = ix86_arch;
2122 if (processor_alias_table[i].flags & PTA_MMX
2123 && !(target_flags_explicit & MASK_MMX))
2124 target_flags |= MASK_MMX;
2125 if (processor_alias_table[i].flags & PTA_3DNOW
2126 && !(target_flags_explicit & MASK_3DNOW))
2127 target_flags |= MASK_3DNOW;
2128 if (processor_alias_table[i].flags & PTA_3DNOW_A
2129 && !(target_flags_explicit & MASK_3DNOW_A))
2130 target_flags |= MASK_3DNOW_A;
2131 if (processor_alias_table[i].flags & PTA_SSE
2132 && !(target_flags_explicit & MASK_SSE))
2133 target_flags |= MASK_SSE;
2134 if (processor_alias_table[i].flags & PTA_SSE2
2135 && !(target_flags_explicit & MASK_SSE2))
2136 target_flags |= MASK_SSE2;
2137 if (processor_alias_table[i].flags & PTA_SSE3
2138 && !(target_flags_explicit & MASK_SSE3))
2139 target_flags |= MASK_SSE3;
2140 if (processor_alias_table[i].flags & PTA_SSSE3
2141 && !(target_flags_explicit & MASK_SSSE3))
2142 target_flags |= MASK_SSSE3;
2143 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2144 x86_prefetch_sse = true;
2145 if (processor_alias_table[i].flags & PTA_CX16)
2146 x86_cmpxchg16b = true;
2147 if (processor_alias_table[i].flags & PTA_POPCNT
2148 && !(target_flags_explicit & MASK_POPCNT))
2149 target_flags |= MASK_POPCNT;
2150 if (processor_alias_table[i].flags & PTA_ABM
2151 && !(target_flags_explicit & MASK_ABM))
2152 target_flags |= MASK_ABM;
2153 if (processor_alias_table[i].flags & PTA_SSE4A
2154 && !(target_flags_explicit & MASK_SSE4A))
2155 target_flags |= MASK_SSE4A;
2156 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2157 x86_sahf = true;
2158 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2159 error ("CPU you selected does not support x86-64 "
2160 "instruction set");
2161 break;
2162 }
2163
2164 if (i == pta_size)
2165 error ("bad value (%s) for -march= switch", ix86_arch_string);
2166
2167 ix86_arch_mask = 1u << ix86_arch;
2168 for (i = 0; i < X86_ARCH_LAST; ++i)
2169 ix86_arch_features[i] &= ix86_arch_mask;
2170
2171 for (i = 0; i < pta_size; i++)
2172 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2173 {
2174 ix86_tune = processor_alias_table[i].processor;
2175 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2176 {
2177 if (ix86_tune_defaulted)
2178 {
2179 ix86_tune_string = "x86-64";
2180 for (i = 0; i < pta_size; i++)
2181 if (! strcmp (ix86_tune_string,
2182 processor_alias_table[i].name))
2183 break;
2184 ix86_tune = processor_alias_table[i].processor;
2185 }
2186 else
2187 error ("CPU you selected does not support x86-64 "
2188 "instruction set");
2189 }
2190 /* Intel CPUs have always interpreted SSE prefetch instructions as
2191 NOPs; so, we can enable SSE prefetch instructions even when
2192 -mtune (rather than -march) points us to a processor that has them.
2193 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2194 higher processors. */
2195 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2196 x86_prefetch_sse = true;
2197 break;
2198 }
2199 if (i == pta_size)
2200 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2201
2202 ix86_tune_mask = 1u << ix86_tune;
2203 for (i = 0; i < X86_TUNE_LAST; ++i)
2204 ix86_tune_features[i] &= ix86_tune_mask;
2205
2206 if (optimize_size)
2207 ix86_cost = &size_cost;
2208 else
2209 ix86_cost = processor_target_table[ix86_tune].cost;
2210 target_flags |= processor_target_table[ix86_tune].target_enable;
2211 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2212
2213 /* Arrange to set up i386_stack_locals for all functions. */
2214 init_machine_status = ix86_init_machine_status;
2215
2216 /* Validate -mregparm= value. */
2217 if (ix86_regparm_string)
2218 {
2219 i = atoi (ix86_regparm_string);
2220 if (i < 0 || i > REGPARM_MAX)
2221 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2222 else
2223 ix86_regparm = i;
2224 }
2225 else
2226 if (TARGET_64BIT)
2227 ix86_regparm = REGPARM_MAX;
2228
2229 /* If the user has provided any of the -malign-* options,
2230 warn and use that value only if -falign-* is not set.
2231 Remove this code in GCC 3.2 or later. */
2232 if (ix86_align_loops_string)
2233 {
2234 warning (0, "-malign-loops is obsolete, use -falign-loops");
2235 if (align_loops == 0)
2236 {
2237 i = atoi (ix86_align_loops_string);
2238 if (i < 0 || i > MAX_CODE_ALIGN)
2239 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2240 else
2241 align_loops = 1 << i;
2242 }
2243 }
2244
2245 if (ix86_align_jumps_string)
2246 {
2247 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2248 if (align_jumps == 0)
2249 {
2250 i = atoi (ix86_align_jumps_string);
2251 if (i < 0 || i > MAX_CODE_ALIGN)
2252 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2253 else
2254 align_jumps = 1 << i;
2255 }
2256 }
2257
2258 if (ix86_align_funcs_string)
2259 {
2260 warning (0, "-malign-functions is obsolete, use -falign-functions");
2261 if (align_functions == 0)
2262 {
2263 i = atoi (ix86_align_funcs_string);
2264 if (i < 0 || i > MAX_CODE_ALIGN)
2265 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2266 else
2267 align_functions = 1 << i;
2268 }
2269 }
2270
2271 /* Default align_* from the processor table. */
2272 if (align_loops == 0)
2273 {
2274 align_loops = processor_target_table[ix86_tune].align_loop;
2275 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2276 }
2277 if (align_jumps == 0)
2278 {
2279 align_jumps = processor_target_table[ix86_tune].align_jump;
2280 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2281 }
2282 if (align_functions == 0)
2283 {
2284 align_functions = processor_target_table[ix86_tune].align_func;
2285 }
2286
2287 /* Validate -mbranch-cost= value, or provide default. */
2288 ix86_branch_cost = ix86_cost->branch_cost;
2289 if (ix86_branch_cost_string)
2290 {
2291 i = atoi (ix86_branch_cost_string);
2292 if (i < 0 || i > 5)
2293 error ("-mbranch-cost=%d is not between 0 and 5", i);
2294 else
2295 ix86_branch_cost = i;
2296 }
2297 if (ix86_section_threshold_string)
2298 {
2299 i = atoi (ix86_section_threshold_string);
2300 if (i < 0)
2301 error ("-mlarge-data-threshold=%d is negative", i);
2302 else
2303 ix86_section_threshold = i;
2304 }
2305
2306 if (ix86_tls_dialect_string)
2307 {
2308 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2309 ix86_tls_dialect = TLS_DIALECT_GNU;
2310 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2311 ix86_tls_dialect = TLS_DIALECT_GNU2;
2312 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2313 ix86_tls_dialect = TLS_DIALECT_SUN;
2314 else
2315 error ("bad value (%s) for -mtls-dialect= switch",
2316 ix86_tls_dialect_string);
2317 }
2318
2319 /* Keep nonleaf frame pointers. */
2320 if (flag_omit_frame_pointer)
2321 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2322 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2323 flag_omit_frame_pointer = 1;
2324
2325 /* If we're doing fast math, we don't care about comparison order
2326 wrt NaNs. This lets us use a shorter comparison sequence. */
2327 if (flag_finite_math_only)
2328 target_flags &= ~MASK_IEEE_FP;
2329
2330 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2331 since the insns won't need emulation. */
2332 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2333 target_flags &= ~MASK_NO_FANCY_MATH_387;
2334
2335 /* Likewise, if the target doesn't have a 387, or we've specified
2336 software floating point, don't use 387 inline intrinsics. */
2337 if (!TARGET_80387)
2338 target_flags |= MASK_NO_FANCY_MATH_387;
2339
2340 /* Turn on SSE3 builtins for -mssse3. */
2341 if (TARGET_SSSE3)
2342 target_flags |= MASK_SSE3;
2343
2344 /* Turn on SSE3 builtins for -msse4a. */
2345 if (TARGET_SSE4A)
2346 target_flags |= MASK_SSE3;
2347
2348 /* Turn on SSE2 builtins for -msse3. */
2349 if (TARGET_SSE3)
2350 target_flags |= MASK_SSE2;
2351
2352 /* Turn on SSE builtins for -msse2. */
2353 if (TARGET_SSE2)
2354 target_flags |= MASK_SSE;
2355
2356 /* Turn on MMX builtins for -msse. */
2357 if (TARGET_SSE)
2358 {
2359 target_flags |= MASK_MMX & ~target_flags_explicit;
2360 x86_prefetch_sse = true;
2361 }
2362
2363 /* Turn on MMX builtins for 3Dnow. */
2364 if (TARGET_3DNOW)
2365 target_flags |= MASK_MMX;
2366
2367 /* Turn on POPCNT builtins for -mabm. */
2368 if (TARGET_ABM)
2369 target_flags |= MASK_POPCNT;
2370
2371 if (TARGET_64BIT)
2372 {
2373 if (TARGET_ALIGN_DOUBLE)
2374 error ("-malign-double makes no sense in the 64bit mode");
2375 if (TARGET_RTD)
2376 error ("-mrtd calling convention not supported in the 64bit mode");
2377
2378 /* Enable by default the SSE and MMX builtins. Do allow the user to
2379 explicitly disable any of these. In particular, disabling SSE and
2380 MMX for kernel code is extremely useful. */
2381 target_flags
2382 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2383 & ~target_flags_explicit);
2384 }
2385 else
2386 {
2387 /* i386 ABI does not specify red zone. It still makes sense to use it
2388 when programmer takes care to stack from being destroyed. */
2389 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2390 target_flags |= MASK_NO_RED_ZONE;
2391 }
2392
2393 /* Validate -mpreferred-stack-boundary= value, or provide default.
2394 The default of 128 bits is for Pentium III's SSE __m128. We can't
2395 change it because of optimize_size. Otherwise, we can't mix object
2396 files compiled with -Os and -On. */
2397 ix86_preferred_stack_boundary = 128;
2398 if (ix86_preferred_stack_boundary_string)
2399 {
2400 i = atoi (ix86_preferred_stack_boundary_string);
2401 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2402 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2403 TARGET_64BIT ? 4 : 2);
2404 else
2405 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2406 }
2407
2408 /* Accept -msseregparm only if at least SSE support is enabled. */
2409 if (TARGET_SSEREGPARM
2410 && ! TARGET_SSE)
2411 error ("-msseregparm used without SSE enabled");
2412
2413 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2414 if (ix86_fpmath_string != 0)
2415 {
2416 if (! strcmp (ix86_fpmath_string, "387"))
2417 ix86_fpmath = FPMATH_387;
2418 else if (! strcmp (ix86_fpmath_string, "sse"))
2419 {
2420 if (!TARGET_SSE)
2421 {
2422 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2423 ix86_fpmath = FPMATH_387;
2424 }
2425 else
2426 ix86_fpmath = FPMATH_SSE;
2427 }
2428 else if (! strcmp (ix86_fpmath_string, "387,sse")
2429 || ! strcmp (ix86_fpmath_string, "sse,387"))
2430 {
2431 if (!TARGET_SSE)
2432 {
2433 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2434 ix86_fpmath = FPMATH_387;
2435 }
2436 else if (!TARGET_80387)
2437 {
2438 warning (0, "387 instruction set disabled, using SSE arithmetics");
2439 ix86_fpmath = FPMATH_SSE;
2440 }
2441 else
2442 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2443 }
2444 else
2445 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2446 }
2447
2448 /* If the i387 is disabled, then do not return values in it. */
2449 if (!TARGET_80387)
2450 target_flags &= ~MASK_FLOAT_RETURNS;
2451
2452 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2453 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2454 && !optimize_size)
2455 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2456
2457 /* ??? Unwind info is not correct around the CFG unless either a frame
2458 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2459 unwind info generation to be aware of the CFG and propagating states
2460 around edges. */
2461 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2462 || flag_exceptions || flag_non_call_exceptions)
2463 && flag_omit_frame_pointer
2464 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2465 {
2466 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2467 warning (0, "unwind tables currently require either a frame pointer "
2468 "or -maccumulate-outgoing-args for correctness");
2469 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2470 }
2471
2472 /* For sane SSE instruction set generation we need fcomi instruction.
2473 It is safe to enable all CMOVE instructions. */
2474 if (TARGET_SSE)
2475 TARGET_CMOVE = 1;
2476
2477 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2478 {
2479 char *p;
2480 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2481 p = strchr (internal_label_prefix, 'X');
2482 internal_label_prefix_len = p - internal_label_prefix;
2483 *p = '\0';
2484 }
2485
2486 /* When scheduling description is not available, disable scheduler pass
2487 so it won't slow down the compilation and make x87 code slower. */
2488 if (!TARGET_SCHEDULE)
2489 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2490
2491 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2492 set_param_value ("simultaneous-prefetches",
2493 ix86_cost->simultaneous_prefetches);
2494 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2495 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2496 }
2497 \f
2498 /* switch to the appropriate section for output of DECL.
2499 DECL is either a `VAR_DECL' node or a constant of some sort.
2500 RELOC indicates whether forming the initial value of DECL requires
2501 link-time relocations. */
2502
2503 static section *
2504 x86_64_elf_select_section (tree decl, int reloc,
2505 unsigned HOST_WIDE_INT align)
2506 {
2507 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2508 && ix86_in_large_data_p (decl))
2509 {
2510 const char *sname = NULL;
2511 unsigned int flags = SECTION_WRITE;
2512 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2513 {
2514 case SECCAT_DATA:
2515 sname = ".ldata";
2516 break;
2517 case SECCAT_DATA_REL:
2518 sname = ".ldata.rel";
2519 break;
2520 case SECCAT_DATA_REL_LOCAL:
2521 sname = ".ldata.rel.local";
2522 break;
2523 case SECCAT_DATA_REL_RO:
2524 sname = ".ldata.rel.ro";
2525 break;
2526 case SECCAT_DATA_REL_RO_LOCAL:
2527 sname = ".ldata.rel.ro.local";
2528 break;
2529 case SECCAT_BSS:
2530 sname = ".lbss";
2531 flags |= SECTION_BSS;
2532 break;
2533 case SECCAT_RODATA:
2534 case SECCAT_RODATA_MERGE_STR:
2535 case SECCAT_RODATA_MERGE_STR_INIT:
2536 case SECCAT_RODATA_MERGE_CONST:
2537 sname = ".lrodata";
2538 flags = 0;
2539 break;
2540 case SECCAT_SRODATA:
2541 case SECCAT_SDATA:
2542 case SECCAT_SBSS:
2543 gcc_unreachable ();
2544 case SECCAT_TEXT:
2545 case SECCAT_TDATA:
2546 case SECCAT_TBSS:
2547 /* We don't split these for medium model. Place them into
2548 default sections and hope for best. */
2549 break;
2550 }
2551 if (sname)
2552 {
2553 /* We might get called with string constants, but get_named_section
2554 doesn't like them as they are not DECLs. Also, we need to set
2555 flags in that case. */
2556 if (!DECL_P (decl))
2557 return get_section (sname, flags, NULL);
2558 return get_named_section (decl, sname, reloc);
2559 }
2560 }
2561 return default_elf_select_section (decl, reloc, align);
2562 }
2563
2564 /* Build up a unique section name, expressed as a
2565 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2566 RELOC indicates whether the initial value of EXP requires
2567 link-time relocations. */
2568
2569 static void
2570 x86_64_elf_unique_section (tree decl, int reloc)
2571 {
2572 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2573 && ix86_in_large_data_p (decl))
2574 {
2575 const char *prefix = NULL;
2576 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2577 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2578
2579 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2580 {
2581 case SECCAT_DATA:
2582 case SECCAT_DATA_REL:
2583 case SECCAT_DATA_REL_LOCAL:
2584 case SECCAT_DATA_REL_RO:
2585 case SECCAT_DATA_REL_RO_LOCAL:
2586 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2587 break;
2588 case SECCAT_BSS:
2589 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2590 break;
2591 case SECCAT_RODATA:
2592 case SECCAT_RODATA_MERGE_STR:
2593 case SECCAT_RODATA_MERGE_STR_INIT:
2594 case SECCAT_RODATA_MERGE_CONST:
2595 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2596 break;
2597 case SECCAT_SRODATA:
2598 case SECCAT_SDATA:
2599 case SECCAT_SBSS:
2600 gcc_unreachable ();
2601 case SECCAT_TEXT:
2602 case SECCAT_TDATA:
2603 case SECCAT_TBSS:
2604 /* We don't split these for medium model. Place them into
2605 default sections and hope for best. */
2606 break;
2607 }
2608 if (prefix)
2609 {
2610 const char *name;
2611 size_t nlen, plen;
2612 char *string;
2613 plen = strlen (prefix);
2614
2615 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2616 name = targetm.strip_name_encoding (name);
2617 nlen = strlen (name);
2618
2619 string = alloca (nlen + plen + 1);
2620 memcpy (string, prefix, plen);
2621 memcpy (string + plen, name, nlen + 1);
2622
2623 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2624 return;
2625 }
2626 }
2627 default_unique_section (decl, reloc);
2628 }
2629
2630 #ifdef COMMON_ASM_OP
2631 /* This says how to output assembler code to declare an
2632 uninitialized external linkage data object.
2633
2634 For medium model x86-64 we need to use .largecomm opcode for
2635 large objects. */
2636 void
2637 x86_elf_aligned_common (FILE *file,
2638 const char *name, unsigned HOST_WIDE_INT size,
2639 int align)
2640 {
2641 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2642 && size > (unsigned int)ix86_section_threshold)
2643 fprintf (file, ".largecomm\t");
2644 else
2645 fprintf (file, "%s", COMMON_ASM_OP);
2646 assemble_name (file, name);
2647 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2648 size, align / BITS_PER_UNIT);
2649 }
2650 #endif
2651 /* Utility function for targets to use in implementing
2652 ASM_OUTPUT_ALIGNED_BSS. */
2653
2654 void
2655 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2656 const char *name, unsigned HOST_WIDE_INT size,
2657 int align)
2658 {
2659 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2660 && size > (unsigned int)ix86_section_threshold)
2661 switch_to_section (get_named_section (decl, ".lbss", 0));
2662 else
2663 switch_to_section (bss_section);
2664 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2665 #ifdef ASM_DECLARE_OBJECT_NAME
2666 last_assemble_variable_decl = decl;
2667 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2668 #else
2669 /* Standard thing is just output label for the object. */
2670 ASM_OUTPUT_LABEL (file, name);
2671 #endif /* ASM_DECLARE_OBJECT_NAME */
2672 ASM_OUTPUT_SKIP (file, size ? size : 1);
2673 }
2674 \f
2675 void
2676 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2677 {
2678 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2679 make the problem with not enough registers even worse. */
2680 #ifdef INSN_SCHEDULING
2681 if (level > 1)
2682 flag_schedule_insns = 0;
2683 #endif
2684
2685 if (TARGET_MACHO)
2686 /* The Darwin libraries never set errno, so we might as well
2687 avoid calling them when that's the only reason we would. */
2688 flag_errno_math = 0;
2689
2690 /* The default values of these switches depend on the TARGET_64BIT
2691 that is not known at this moment. Mark these values with 2 and
2692 let user the to override these. In case there is no command line option
2693 specifying them, we will set the defaults in override_options. */
2694 if (optimize >= 1)
2695 flag_omit_frame_pointer = 2;
2696 flag_pcc_struct_return = 2;
2697 flag_asynchronous_unwind_tables = 2;
2698 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2699 SUBTARGET_OPTIMIZATION_OPTIONS;
2700 #endif
2701 }
2702 \f
2703 /* Table of valid machine attributes. */
2704 const struct attribute_spec ix86_attribute_table[] =
2705 {
2706 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2707 /* Stdcall attribute says callee is responsible for popping arguments
2708 if they are not variable. */
2709 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2710 /* Fastcall attribute says callee is responsible for popping arguments
2711 if they are not variable. */
2712 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2713 /* Cdecl attribute says the callee is a normal C declaration */
2714 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2715 /* Regparm attribute specifies how many integer arguments are to be
2716 passed in registers. */
2717 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2718 /* Sseregparm attribute says we are using x86_64 calling conventions
2719 for FP arguments. */
2720 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2721 /* force_align_arg_pointer says this function realigns the stack at entry. */
2722 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2723 false, true, true, ix86_handle_cconv_attribute },
2724 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2725 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2726 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2727 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2728 #endif
2729 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2730 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2731 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2732 SUBTARGET_ATTRIBUTE_TABLE,
2733 #endif
2734 { NULL, 0, 0, false, false, false, NULL }
2735 };
2736
2737 /* Decide whether we can make a sibling call to a function. DECL is the
2738 declaration of the function being targeted by the call and EXP is the
2739 CALL_EXPR representing the call. */
2740
2741 static bool
2742 ix86_function_ok_for_sibcall (tree decl, tree exp)
2743 {
2744 tree func;
2745 rtx a, b;
2746
2747 /* If we are generating position-independent code, we cannot sibcall
2748 optimize any indirect call, or a direct call to a global function,
2749 as the PLT requires %ebx be live. */
2750 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2751 return false;
2752
2753 if (decl)
2754 func = decl;
2755 else
2756 {
2757 func = TREE_TYPE (CALL_EXPR_FN (exp));
2758 if (POINTER_TYPE_P (func))
2759 func = TREE_TYPE (func);
2760 }
2761
2762 /* Check that the return value locations are the same. Like
2763 if we are returning floats on the 80387 register stack, we cannot
2764 make a sibcall from a function that doesn't return a float to a
2765 function that does or, conversely, from a function that does return
2766 a float to a function that doesn't; the necessary stack adjustment
2767 would not be executed. This is also the place we notice
2768 differences in the return value ABI. Note that it is ok for one
2769 of the functions to have void return type as long as the return
2770 value of the other is passed in a register. */
2771 a = ix86_function_value (TREE_TYPE (exp), func, false);
2772 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2773 cfun->decl, false);
2774 if (STACK_REG_P (a) || STACK_REG_P (b))
2775 {
2776 if (!rtx_equal_p (a, b))
2777 return false;
2778 }
2779 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2780 ;
2781 else if (!rtx_equal_p (a, b))
2782 return false;
2783
2784 /* If this call is indirect, we'll need to be able to use a call-clobbered
2785 register for the address of the target function. Make sure that all
2786 such registers are not used for passing parameters. */
2787 if (!decl && !TARGET_64BIT)
2788 {
2789 tree type;
2790
2791 /* We're looking at the CALL_EXPR, we need the type of the function. */
2792 type = CALL_EXPR_FN (exp); /* pointer expression */
2793 type = TREE_TYPE (type); /* pointer type */
2794 type = TREE_TYPE (type); /* function type */
2795
2796 if (ix86_function_regparm (type, NULL) >= 3)
2797 {
2798 /* ??? Need to count the actual number of registers to be used,
2799 not the possible number of registers. Fix later. */
2800 return false;
2801 }
2802 }
2803
2804 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2805 /* Dllimport'd functions are also called indirectly. */
2806 if (decl && DECL_DLLIMPORT_P (decl)
2807 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2808 return false;
2809 #endif
2810
2811 /* If we forced aligned the stack, then sibcalling would unalign the
2812 stack, which may break the called function. */
2813 if (cfun->machine->force_align_arg_pointer)
2814 return false;
2815
2816 /* Otherwise okay. That also includes certain types of indirect calls. */
2817 return true;
2818 }
2819
2820 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2821 calling convention attributes;
2822 arguments as in struct attribute_spec.handler. */
2823
2824 static tree
2825 ix86_handle_cconv_attribute (tree *node, tree name,
2826 tree args,
2827 int flags ATTRIBUTE_UNUSED,
2828 bool *no_add_attrs)
2829 {
2830 if (TREE_CODE (*node) != FUNCTION_TYPE
2831 && TREE_CODE (*node) != METHOD_TYPE
2832 && TREE_CODE (*node) != FIELD_DECL
2833 && TREE_CODE (*node) != TYPE_DECL)
2834 {
2835 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2836 IDENTIFIER_POINTER (name));
2837 *no_add_attrs = true;
2838 return NULL_TREE;
2839 }
2840
2841 /* Can combine regparm with all attributes but fastcall. */
2842 if (is_attribute_p ("regparm", name))
2843 {
2844 tree cst;
2845
2846 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2847 {
2848 error ("fastcall and regparm attributes are not compatible");
2849 }
2850
2851 cst = TREE_VALUE (args);
2852 if (TREE_CODE (cst) != INTEGER_CST)
2853 {
2854 warning (OPT_Wattributes,
2855 "%qs attribute requires an integer constant argument",
2856 IDENTIFIER_POINTER (name));
2857 *no_add_attrs = true;
2858 }
2859 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2860 {
2861 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2862 IDENTIFIER_POINTER (name), REGPARM_MAX);
2863 *no_add_attrs = true;
2864 }
2865
2866 if (!TARGET_64BIT
2867 && lookup_attribute (ix86_force_align_arg_pointer_string,
2868 TYPE_ATTRIBUTES (*node))
2869 && compare_tree_int (cst, REGPARM_MAX-1))
2870 {
2871 error ("%s functions limited to %d register parameters",
2872 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2873 }
2874
2875 return NULL_TREE;
2876 }
2877
2878 if (TARGET_64BIT)
2879 {
2880 warning (OPT_Wattributes, "%qs attribute ignored",
2881 IDENTIFIER_POINTER (name));
2882 *no_add_attrs = true;
2883 return NULL_TREE;
2884 }
2885
2886 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2887 if (is_attribute_p ("fastcall", name))
2888 {
2889 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2890 {
2891 error ("fastcall and cdecl attributes are not compatible");
2892 }
2893 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2894 {
2895 error ("fastcall and stdcall attributes are not compatible");
2896 }
2897 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2898 {
2899 error ("fastcall and regparm attributes are not compatible");
2900 }
2901 }
2902
2903 /* Can combine stdcall with fastcall (redundant), regparm and
2904 sseregparm. */
2905 else if (is_attribute_p ("stdcall", name))
2906 {
2907 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2908 {
2909 error ("stdcall and cdecl attributes are not compatible");
2910 }
2911 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2912 {
2913 error ("stdcall and fastcall attributes are not compatible");
2914 }
2915 }
2916
2917 /* Can combine cdecl with regparm and sseregparm. */
2918 else if (is_attribute_p ("cdecl", name))
2919 {
2920 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2921 {
2922 error ("stdcall and cdecl attributes are not compatible");
2923 }
2924 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2925 {
2926 error ("fastcall and cdecl attributes are not compatible");
2927 }
2928 }
2929
2930 /* Can combine sseregparm with all attributes. */
2931
2932 return NULL_TREE;
2933 }
2934
2935 /* Return 0 if the attributes for two types are incompatible, 1 if they
2936 are compatible, and 2 if they are nearly compatible (which causes a
2937 warning to be generated). */
2938
2939 static int
2940 ix86_comp_type_attributes (tree type1, tree type2)
2941 {
2942 /* Check for mismatch of non-default calling convention. */
2943 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2944
2945 if (TREE_CODE (type1) != FUNCTION_TYPE)
2946 return 1;
2947
2948 /* Check for mismatched fastcall/regparm types. */
2949 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2950 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2951 || (ix86_function_regparm (type1, NULL)
2952 != ix86_function_regparm (type2, NULL)))
2953 return 0;
2954
2955 /* Check for mismatched sseregparm types. */
2956 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2957 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2958 return 0;
2959
2960 /* Check for mismatched return types (cdecl vs stdcall). */
2961 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2962 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2963 return 0;
2964
2965 return 1;
2966 }
2967 \f
2968 /* Return the regparm value for a function with the indicated TYPE and DECL.
2969 DECL may be NULL when calling function indirectly
2970 or considering a libcall. */
2971
2972 static int
2973 ix86_function_regparm (tree type, tree decl)
2974 {
2975 tree attr;
2976 int regparm = ix86_regparm;
2977 bool user_convention = false;
2978
2979 if (!TARGET_64BIT)
2980 {
2981 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2982 if (attr)
2983 {
2984 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2985 user_convention = true;
2986 }
2987
2988 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2989 {
2990 regparm = 2;
2991 user_convention = true;
2992 }
2993
2994 /* Use register calling convention for local functions when possible. */
2995 if (!TARGET_64BIT && !user_convention && decl
2996 && flag_unit_at_a_time && !profile_flag)
2997 {
2998 struct cgraph_local_info *i = cgraph_local_info (decl);
2999 if (i && i->local)
3000 {
3001 int local_regparm, globals = 0, regno;
3002
3003 /* Make sure no regparm register is taken by a global register
3004 variable. */
3005 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3006 if (global_regs[local_regparm])
3007 break;
3008 /* We can't use regparm(3) for nested functions as these use
3009 static chain pointer in third argument. */
3010 if (local_regparm == 3
3011 && decl_function_context (decl)
3012 && !DECL_NO_STATIC_CHAIN (decl))
3013 local_regparm = 2;
3014 /* If the function realigns its stackpointer, the
3015 prologue will clobber %ecx. If we've already
3016 generated code for the callee, the callee
3017 DECL_STRUCT_FUNCTION is gone, so we fall back to
3018 scanning the attributes for the self-realigning
3019 property. */
3020 if ((DECL_STRUCT_FUNCTION (decl)
3021 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
3022 || (!DECL_STRUCT_FUNCTION (decl)
3023 && lookup_attribute (ix86_force_align_arg_pointer_string,
3024 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3025 local_regparm = 2;
3026 /* Each global register variable increases register preassure,
3027 so the more global reg vars there are, the smaller regparm
3028 optimization use, unless requested by the user explicitly. */
3029 for (regno = 0; regno < 6; regno++)
3030 if (global_regs[regno])
3031 globals++;
3032 local_regparm
3033 = globals < local_regparm ? local_regparm - globals : 0;
3034
3035 if (local_regparm > regparm)
3036 regparm = local_regparm;
3037 }
3038 }
3039 }
3040 return regparm;
3041 }
3042
3043 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3044 DFmode (2) arguments in SSE registers for a function with the
3045 indicated TYPE and DECL. DECL may be NULL when calling function
3046 indirectly or considering a libcall. Otherwise return 0. */
3047
3048 static int
3049 ix86_function_sseregparm (tree type, tree decl)
3050 {
3051 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3052 by the sseregparm attribute. */
3053 if (TARGET_SSEREGPARM
3054 || (type
3055 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3056 {
3057 if (!TARGET_SSE)
3058 {
3059 if (decl)
3060 error ("Calling %qD with attribute sseregparm without "
3061 "SSE/SSE2 enabled", decl);
3062 else
3063 error ("Calling %qT with attribute sseregparm without "
3064 "SSE/SSE2 enabled", type);
3065 return 0;
3066 }
3067
3068 return 2;
3069 }
3070
3071 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3072 (and DFmode for SSE2) arguments in SSE registers,
3073 even for 32-bit targets. */
3074 if (!TARGET_64BIT && decl
3075 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3076 {
3077 struct cgraph_local_info *i = cgraph_local_info (decl);
3078 if (i && i->local)
3079 return TARGET_SSE2 ? 2 : 1;
3080 }
3081
3082 return 0;
3083 }
3084
3085 /* Return true if EAX is live at the start of the function. Used by
3086 ix86_expand_prologue to determine if we need special help before
3087 calling allocate_stack_worker. */
3088
3089 static bool
3090 ix86_eax_live_at_start_p (void)
3091 {
3092 /* Cheat. Don't bother working forward from ix86_function_regparm
3093 to the function type to whether an actual argument is located in
3094 eax. Instead just look at cfg info, which is still close enough
3095 to correct at this point. This gives false positives for broken
3096 functions that might use uninitialized data that happens to be
3097 allocated in eax, but who cares? */
3098 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3099 }
3100
3101 /* Value is the number of bytes of arguments automatically
3102 popped when returning from a subroutine call.
3103 FUNDECL is the declaration node of the function (as a tree),
3104 FUNTYPE is the data type of the function (as a tree),
3105 or for a library call it is an identifier node for the subroutine name.
3106 SIZE is the number of bytes of arguments passed on the stack.
3107
3108 On the 80386, the RTD insn may be used to pop them if the number
3109 of args is fixed, but if the number is variable then the caller
3110 must pop them all. RTD can't be used for library calls now
3111 because the library is compiled with the Unix compiler.
3112 Use of RTD is a selectable option, since it is incompatible with
3113 standard Unix calling sequences. If the option is not selected,
3114 the caller must always pop the args.
3115
3116 The attribute stdcall is equivalent to RTD on a per module basis. */
3117
3118 int
3119 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3120 {
3121 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3122
3123 /* Cdecl functions override -mrtd, and never pop the stack. */
3124 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3125
3126 /* Stdcall and fastcall functions will pop the stack if not
3127 variable args. */
3128 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3129 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3130 rtd = 1;
3131
3132 if (rtd
3133 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3134 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3135 == void_type_node)))
3136 return size;
3137 }
3138
3139 /* Lose any fake structure return argument if it is passed on the stack. */
3140 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3141 && !TARGET_64BIT
3142 && !KEEP_AGGREGATE_RETURN_POINTER)
3143 {
3144 int nregs = ix86_function_regparm (funtype, fundecl);
3145
3146 if (!nregs)
3147 return GET_MODE_SIZE (Pmode);
3148 }
3149
3150 return 0;
3151 }
3152 \f
3153 /* Argument support functions. */
3154
3155 /* Return true when register may be used to pass function parameters. */
3156 bool
3157 ix86_function_arg_regno_p (int regno)
3158 {
3159 int i;
3160 if (!TARGET_64BIT)
3161 {
3162 if (TARGET_MACHO)
3163 return (regno < REGPARM_MAX
3164 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3165 else
3166 return (regno < REGPARM_MAX
3167 || (TARGET_MMX && MMX_REGNO_P (regno)
3168 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3169 || (TARGET_SSE && SSE_REGNO_P (regno)
3170 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3171 }
3172
3173 if (TARGET_MACHO)
3174 {
3175 if (SSE_REGNO_P (regno) && TARGET_SSE)
3176 return true;
3177 }
3178 else
3179 {
3180 if (TARGET_SSE && SSE_REGNO_P (regno)
3181 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3182 return true;
3183 }
3184 /* RAX is used as hidden argument to va_arg functions. */
3185 if (!regno)
3186 return true;
3187 for (i = 0; i < REGPARM_MAX; i++)
3188 if (regno == x86_64_int_parameter_registers[i])
3189 return true;
3190 return false;
3191 }
3192
3193 /* Return if we do not know how to pass TYPE solely in registers. */
3194
3195 static bool
3196 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3197 {
3198 if (must_pass_in_stack_var_size_or_pad (mode, type))
3199 return true;
3200
3201 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3202 The layout_type routine is crafty and tries to trick us into passing
3203 currently unsupported vector types on the stack by using TImode. */
3204 return (!TARGET_64BIT && mode == TImode
3205 && type && TREE_CODE (type) != VECTOR_TYPE);
3206 }
3207
3208 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3209 for a call to a function whose data type is FNTYPE.
3210 For a library call, FNTYPE is 0. */
3211
3212 void
3213 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3214 tree fntype, /* tree ptr for function decl */
3215 rtx libname, /* SYMBOL_REF of library name or 0 */
3216 tree fndecl)
3217 {
3218 static CUMULATIVE_ARGS zero_cum;
3219 tree param, next_param;
3220
3221 if (TARGET_DEBUG_ARG)
3222 {
3223 fprintf (stderr, "\ninit_cumulative_args (");
3224 if (fntype)
3225 fprintf (stderr, "fntype code = %s, ret code = %s",
3226 tree_code_name[(int) TREE_CODE (fntype)],
3227 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3228 else
3229 fprintf (stderr, "no fntype");
3230
3231 if (libname)
3232 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3233 }
3234
3235 *cum = zero_cum;
3236
3237 /* Set up the number of registers to use for passing arguments. */
3238 cum->nregs = ix86_regparm;
3239 if (TARGET_SSE)
3240 cum->sse_nregs = SSE_REGPARM_MAX;
3241 if (TARGET_MMX)
3242 cum->mmx_nregs = MMX_REGPARM_MAX;
3243 cum->warn_sse = true;
3244 cum->warn_mmx = true;
3245 cum->maybe_vaarg = false;
3246
3247 /* Use ecx and edx registers if function has fastcall attribute,
3248 else look for regparm information. */
3249 if (fntype && !TARGET_64BIT)
3250 {
3251 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3252 {
3253 cum->nregs = 2;
3254 cum->fastcall = 1;
3255 }
3256 else
3257 cum->nregs = ix86_function_regparm (fntype, fndecl);
3258 }
3259
3260 /* Set up the number of SSE registers used for passing SFmode
3261 and DFmode arguments. Warn for mismatching ABI. */
3262 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3263
3264 /* Determine if this function has variable arguments. This is
3265 indicated by the last argument being 'void_type_mode' if there
3266 are no variable arguments. If there are variable arguments, then
3267 we won't pass anything in registers in 32-bit mode. */
3268
3269 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3270 {
3271 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3272 param != 0; param = next_param)
3273 {
3274 next_param = TREE_CHAIN (param);
3275 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3276 {
3277 if (!TARGET_64BIT)
3278 {
3279 cum->nregs = 0;
3280 cum->sse_nregs = 0;
3281 cum->mmx_nregs = 0;
3282 cum->warn_sse = 0;
3283 cum->warn_mmx = 0;
3284 cum->fastcall = 0;
3285 cum->float_in_sse = 0;
3286 }
3287 cum->maybe_vaarg = true;
3288 }
3289 }
3290 }
3291 if ((!fntype && !libname)
3292 || (fntype && !TYPE_ARG_TYPES (fntype)))
3293 cum->maybe_vaarg = true;
3294
3295 if (TARGET_DEBUG_ARG)
3296 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3297
3298 return;
3299 }
3300
3301 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3302 But in the case of vector types, it is some vector mode.
3303
3304 When we have only some of our vector isa extensions enabled, then there
3305 are some modes for which vector_mode_supported_p is false. For these
3306 modes, the generic vector support in gcc will choose some non-vector mode
3307 in order to implement the type. By computing the natural mode, we'll
3308 select the proper ABI location for the operand and not depend on whatever
3309 the middle-end decides to do with these vector types. */
3310
3311 static enum machine_mode
3312 type_natural_mode (tree type)
3313 {
3314 enum machine_mode mode = TYPE_MODE (type);
3315
3316 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3317 {
3318 HOST_WIDE_INT size = int_size_in_bytes (type);
3319 if ((size == 8 || size == 16)
3320 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3321 && TYPE_VECTOR_SUBPARTS (type) > 1)
3322 {
3323 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3324
3325 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3326 mode = MIN_MODE_VECTOR_FLOAT;
3327 else
3328 mode = MIN_MODE_VECTOR_INT;
3329
3330 /* Get the mode which has this inner mode and number of units. */
3331 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3332 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3333 && GET_MODE_INNER (mode) == innermode)
3334 return mode;
3335
3336 gcc_unreachable ();
3337 }
3338 }
3339
3340 return mode;
3341 }
3342
3343 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3344 this may not agree with the mode that the type system has chosen for the
3345 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3346 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3347
3348 static rtx
3349 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3350 unsigned int regno)
3351 {
3352 rtx tmp;
3353
3354 if (orig_mode != BLKmode)
3355 tmp = gen_rtx_REG (orig_mode, regno);
3356 else
3357 {
3358 tmp = gen_rtx_REG (mode, regno);
3359 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3360 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3361 }
3362
3363 return tmp;
3364 }
3365
3366 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3367 of this code is to classify each 8bytes of incoming argument by the register
3368 class and assign registers accordingly. */
3369
3370 /* Return the union class of CLASS1 and CLASS2.
3371 See the x86-64 PS ABI for details. */
3372
3373 static enum x86_64_reg_class
3374 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3375 {
3376 /* Rule #1: If both classes are equal, this is the resulting class. */
3377 if (class1 == class2)
3378 return class1;
3379
3380 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3381 the other class. */
3382 if (class1 == X86_64_NO_CLASS)
3383 return class2;
3384 if (class2 == X86_64_NO_CLASS)
3385 return class1;
3386
3387 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3388 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3389 return X86_64_MEMORY_CLASS;
3390
3391 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3392 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3393 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3394 return X86_64_INTEGERSI_CLASS;
3395 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3396 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3397 return X86_64_INTEGER_CLASS;
3398
3399 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3400 MEMORY is used. */
3401 if (class1 == X86_64_X87_CLASS
3402 || class1 == X86_64_X87UP_CLASS
3403 || class1 == X86_64_COMPLEX_X87_CLASS
3404 || class2 == X86_64_X87_CLASS
3405 || class2 == X86_64_X87UP_CLASS
3406 || class2 == X86_64_COMPLEX_X87_CLASS)
3407 return X86_64_MEMORY_CLASS;
3408
3409 /* Rule #6: Otherwise class SSE is used. */
3410 return X86_64_SSE_CLASS;
3411 }
3412
3413 /* Classify the argument of type TYPE and mode MODE.
3414 CLASSES will be filled by the register class used to pass each word
3415 of the operand. The number of words is returned. In case the parameter
3416 should be passed in memory, 0 is returned. As a special case for zero
3417 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3418
3419 BIT_OFFSET is used internally for handling records and specifies offset
3420 of the offset in bits modulo 256 to avoid overflow cases.
3421
3422 See the x86-64 PS ABI for details.
3423 */
3424
3425 static int
3426 classify_argument (enum machine_mode mode, tree type,
3427 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3428 {
3429 HOST_WIDE_INT bytes =
3430 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3431 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3432
3433 /* Variable sized entities are always passed/returned in memory. */
3434 if (bytes < 0)
3435 return 0;
3436
3437 if (mode != VOIDmode
3438 && targetm.calls.must_pass_in_stack (mode, type))
3439 return 0;
3440
3441 if (type && AGGREGATE_TYPE_P (type))
3442 {
3443 int i;
3444 tree field;
3445 enum x86_64_reg_class subclasses[MAX_CLASSES];
3446
3447 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3448 if (bytes > 16)
3449 return 0;
3450
3451 for (i = 0; i < words; i++)
3452 classes[i] = X86_64_NO_CLASS;
3453
3454 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3455 signalize memory class, so handle it as special case. */
3456 if (!words)
3457 {
3458 classes[0] = X86_64_NO_CLASS;
3459 return 1;
3460 }
3461
3462 /* Classify each field of record and merge classes. */
3463 switch (TREE_CODE (type))
3464 {
3465 case RECORD_TYPE:
3466 /* And now merge the fields of structure. */
3467 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3468 {
3469 if (TREE_CODE (field) == FIELD_DECL)
3470 {
3471 int num;
3472
3473 if (TREE_TYPE (field) == error_mark_node)
3474 continue;
3475
3476 /* Bitfields are always classified as integer. Handle them
3477 early, since later code would consider them to be
3478 misaligned integers. */
3479 if (DECL_BIT_FIELD (field))
3480 {
3481 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3482 i < ((int_bit_position (field) + (bit_offset % 64))
3483 + tree_low_cst (DECL_SIZE (field), 0)
3484 + 63) / 8 / 8; i++)
3485 classes[i] =
3486 merge_classes (X86_64_INTEGER_CLASS,
3487 classes[i]);
3488 }
3489 else
3490 {
3491 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3492 TREE_TYPE (field), subclasses,
3493 (int_bit_position (field)
3494 + bit_offset) % 256);
3495 if (!num)
3496 return 0;
3497 for (i = 0; i < num; i++)
3498 {
3499 int pos =
3500 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3501 classes[i + pos] =
3502 merge_classes (subclasses[i], classes[i + pos]);
3503 }
3504 }
3505 }
3506 }
3507 break;
3508
3509 case ARRAY_TYPE:
3510 /* Arrays are handled as small records. */
3511 {
3512 int num;
3513 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3514 TREE_TYPE (type), subclasses, bit_offset);
3515 if (!num)
3516 return 0;
3517
3518 /* The partial classes are now full classes. */
3519 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3520 subclasses[0] = X86_64_SSE_CLASS;
3521 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3522 subclasses[0] = X86_64_INTEGER_CLASS;
3523
3524 for (i = 0; i < words; i++)
3525 classes[i] = subclasses[i % num];
3526
3527 break;
3528 }
3529 case UNION_TYPE:
3530 case QUAL_UNION_TYPE:
3531 /* Unions are similar to RECORD_TYPE but offset is always 0.
3532 */
3533 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3534 {
3535 if (TREE_CODE (field) == FIELD_DECL)
3536 {
3537 int num;
3538
3539 if (TREE_TYPE (field) == error_mark_node)
3540 continue;
3541
3542 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3543 TREE_TYPE (field), subclasses,
3544 bit_offset);
3545 if (!num)
3546 return 0;
3547 for (i = 0; i < num; i++)
3548 classes[i] = merge_classes (subclasses[i], classes[i]);
3549 }
3550 }
3551 break;
3552
3553 default:
3554 gcc_unreachable ();
3555 }
3556
3557 /* Final merger cleanup. */
3558 for (i = 0; i < words; i++)
3559 {
3560 /* If one class is MEMORY, everything should be passed in
3561 memory. */
3562 if (classes[i] == X86_64_MEMORY_CLASS)
3563 return 0;
3564
3565 /* The X86_64_SSEUP_CLASS should be always preceded by
3566 X86_64_SSE_CLASS. */
3567 if (classes[i] == X86_64_SSEUP_CLASS
3568 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3569 classes[i] = X86_64_SSE_CLASS;
3570
3571 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3572 if (classes[i] == X86_64_X87UP_CLASS
3573 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3574 classes[i] = X86_64_SSE_CLASS;
3575 }
3576 return words;
3577 }
3578
3579 /* Compute alignment needed. We align all types to natural boundaries with
3580 exception of XFmode that is aligned to 64bits. */
3581 if (mode != VOIDmode && mode != BLKmode)
3582 {
3583 int mode_alignment = GET_MODE_BITSIZE (mode);
3584
3585 if (mode == XFmode)
3586 mode_alignment = 128;
3587 else if (mode == XCmode)
3588 mode_alignment = 256;
3589 if (COMPLEX_MODE_P (mode))
3590 mode_alignment /= 2;
3591 /* Misaligned fields are always returned in memory. */
3592 if (bit_offset % mode_alignment)
3593 return 0;
3594 }
3595
3596 /* for V1xx modes, just use the base mode */
3597 if (VECTOR_MODE_P (mode)
3598 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3599 mode = GET_MODE_INNER (mode);
3600
3601 /* Classification of atomic types. */
3602 switch (mode)
3603 {
3604 case SDmode:
3605 case DDmode:
3606 classes[0] = X86_64_SSE_CLASS;
3607 return 1;
3608 case TDmode:
3609 classes[0] = X86_64_SSE_CLASS;
3610 classes[1] = X86_64_SSEUP_CLASS;
3611 return 2;
3612 case DImode:
3613 case SImode:
3614 case HImode:
3615 case QImode:
3616 case CSImode:
3617 case CHImode:
3618 case CQImode:
3619 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3620 classes[0] = X86_64_INTEGERSI_CLASS;
3621 else
3622 classes[0] = X86_64_INTEGER_CLASS;
3623 return 1;
3624 case CDImode:
3625 case TImode:
3626 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3627 return 2;
3628 case CTImode:
3629 return 0;
3630 case SFmode:
3631 if (!(bit_offset % 64))
3632 classes[0] = X86_64_SSESF_CLASS;
3633 else
3634 classes[0] = X86_64_SSE_CLASS;
3635 return 1;
3636 case DFmode:
3637 classes[0] = X86_64_SSEDF_CLASS;
3638 return 1;
3639 case XFmode:
3640 classes[0] = X86_64_X87_CLASS;
3641 classes[1] = X86_64_X87UP_CLASS;
3642 return 2;
3643 case TFmode:
3644 classes[0] = X86_64_SSE_CLASS;
3645 classes[1] = X86_64_SSEUP_CLASS;
3646 return 2;
3647 case SCmode:
3648 classes[0] = X86_64_SSE_CLASS;
3649 return 1;
3650 case DCmode:
3651 classes[0] = X86_64_SSEDF_CLASS;
3652 classes[1] = X86_64_SSEDF_CLASS;
3653 return 2;
3654 case XCmode:
3655 classes[0] = X86_64_COMPLEX_X87_CLASS;
3656 return 1;
3657 case TCmode:
3658 /* This modes is larger than 16 bytes. */
3659 return 0;
3660 case V4SFmode:
3661 case V4SImode:
3662 case V16QImode:
3663 case V8HImode:
3664 case V2DFmode:
3665 case V2DImode:
3666 classes[0] = X86_64_SSE_CLASS;
3667 classes[1] = X86_64_SSEUP_CLASS;
3668 return 2;
3669 case V2SFmode:
3670 case V2SImode:
3671 case V4HImode:
3672 case V8QImode:
3673 classes[0] = X86_64_SSE_CLASS;
3674 return 1;
3675 case BLKmode:
3676 case VOIDmode:
3677 return 0;
3678 default:
3679 gcc_assert (VECTOR_MODE_P (mode));
3680
3681 if (bytes > 16)
3682 return 0;
3683
3684 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3685
3686 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3687 classes[0] = X86_64_INTEGERSI_CLASS;
3688 else
3689 classes[0] = X86_64_INTEGER_CLASS;
3690 classes[1] = X86_64_INTEGER_CLASS;
3691 return 1 + (bytes > 8);
3692 }
3693 }
3694
3695 /* Examine the argument and return set number of register required in each
3696 class. Return 0 iff parameter should be passed in memory. */
3697 static int
3698 examine_argument (enum machine_mode mode, tree type, int in_return,
3699 int *int_nregs, int *sse_nregs)
3700 {
3701 enum x86_64_reg_class class[MAX_CLASSES];
3702 int n = classify_argument (mode, type, class, 0);
3703
3704 *int_nregs = 0;
3705 *sse_nregs = 0;
3706 if (!n)
3707 return 0;
3708 for (n--; n >= 0; n--)
3709 switch (class[n])
3710 {
3711 case X86_64_INTEGER_CLASS:
3712 case X86_64_INTEGERSI_CLASS:
3713 (*int_nregs)++;
3714 break;
3715 case X86_64_SSE_CLASS:
3716 case X86_64_SSESF_CLASS:
3717 case X86_64_SSEDF_CLASS:
3718 (*sse_nregs)++;
3719 break;
3720 case X86_64_NO_CLASS:
3721 case X86_64_SSEUP_CLASS:
3722 break;
3723 case X86_64_X87_CLASS:
3724 case X86_64_X87UP_CLASS:
3725 if (!in_return)
3726 return 0;
3727 break;
3728 case X86_64_COMPLEX_X87_CLASS:
3729 return in_return ? 2 : 0;
3730 case X86_64_MEMORY_CLASS:
3731 gcc_unreachable ();
3732 }
3733 return 1;
3734 }
3735
3736 /* Construct container for the argument used by GCC interface. See
3737 FUNCTION_ARG for the detailed description. */
3738
3739 static rtx
3740 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3741 tree type, int in_return, int nintregs, int nsseregs,
3742 const int *intreg, int sse_regno)
3743 {
3744 /* The following variables hold the static issued_error state. */
3745 static bool issued_sse_arg_error;
3746 static bool issued_sse_ret_error;
3747 static bool issued_x87_ret_error;
3748
3749 enum machine_mode tmpmode;
3750 int bytes =
3751 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3752 enum x86_64_reg_class class[MAX_CLASSES];
3753 int n;
3754 int i;
3755 int nexps = 0;
3756 int needed_sseregs, needed_intregs;
3757 rtx exp[MAX_CLASSES];
3758 rtx ret;
3759
3760 n = classify_argument (mode, type, class, 0);
3761 if (TARGET_DEBUG_ARG)
3762 {
3763 if (!n)
3764 fprintf (stderr, "Memory class\n");
3765 else
3766 {
3767 fprintf (stderr, "Classes:");
3768 for (i = 0; i < n; i++)
3769 {
3770 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3771 }
3772 fprintf (stderr, "\n");
3773 }
3774 }
3775 if (!n)
3776 return NULL;
3777 if (!examine_argument (mode, type, in_return, &needed_intregs,
3778 &needed_sseregs))
3779 return NULL;
3780 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3781 return NULL;
3782
3783 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3784 some less clueful developer tries to use floating-point anyway. */
3785 if (needed_sseregs && !TARGET_SSE)
3786 {
3787 if (in_return)
3788 {
3789 if (!issued_sse_ret_error)
3790 {
3791 error ("SSE register return with SSE disabled");
3792 issued_sse_ret_error = true;
3793 }
3794 }
3795 else if (!issued_sse_arg_error)
3796 {
3797 error ("SSE register argument with SSE disabled");
3798 issued_sse_arg_error = true;
3799 }
3800 return NULL;
3801 }
3802
3803 /* Likewise, error if the ABI requires us to return values in the
3804 x87 registers and the user specified -mno-80387. */
3805 if (!TARGET_80387 && in_return)
3806 for (i = 0; i < n; i++)
3807 if (class[i] == X86_64_X87_CLASS
3808 || class[i] == X86_64_X87UP_CLASS
3809 || class[i] == X86_64_COMPLEX_X87_CLASS)
3810 {
3811 if (!issued_x87_ret_error)
3812 {
3813 error ("x87 register return with x87 disabled");
3814 issued_x87_ret_error = true;
3815 }
3816 return NULL;
3817 }
3818
3819 /* First construct simple cases. Avoid SCmode, since we want to use
3820 single register to pass this type. */
3821 if (n == 1 && mode != SCmode)
3822 switch (class[0])
3823 {
3824 case X86_64_INTEGER_CLASS:
3825 case X86_64_INTEGERSI_CLASS:
3826 return gen_rtx_REG (mode, intreg[0]);
3827 case X86_64_SSE_CLASS:
3828 case X86_64_SSESF_CLASS:
3829 case X86_64_SSEDF_CLASS:
3830 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3831 case X86_64_X87_CLASS:
3832 case X86_64_COMPLEX_X87_CLASS:
3833 return gen_rtx_REG (mode, FIRST_STACK_REG);
3834 case X86_64_NO_CLASS:
3835 /* Zero sized array, struct or class. */
3836 return NULL;
3837 default:
3838 gcc_unreachable ();
3839 }
3840 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3841 && mode != BLKmode)
3842 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3843 if (n == 2
3844 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3845 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3846 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3847 && class[1] == X86_64_INTEGER_CLASS
3848 && (mode == CDImode || mode == TImode || mode == TFmode)
3849 && intreg[0] + 1 == intreg[1])
3850 return gen_rtx_REG (mode, intreg[0]);
3851
3852 /* Otherwise figure out the entries of the PARALLEL. */
3853 for (i = 0; i < n; i++)
3854 {
3855 switch (class[i])
3856 {
3857 case X86_64_NO_CLASS:
3858 break;
3859 case X86_64_INTEGER_CLASS:
3860 case X86_64_INTEGERSI_CLASS:
3861 /* Merge TImodes on aligned occasions here too. */
3862 if (i * 8 + 8 > bytes)
3863 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3864 else if (class[i] == X86_64_INTEGERSI_CLASS)
3865 tmpmode = SImode;
3866 else
3867 tmpmode = DImode;
3868 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3869 if (tmpmode == BLKmode)
3870 tmpmode = DImode;
3871 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3872 gen_rtx_REG (tmpmode, *intreg),
3873 GEN_INT (i*8));
3874 intreg++;
3875 break;
3876 case X86_64_SSESF_CLASS:
3877 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3878 gen_rtx_REG (SFmode,
3879 SSE_REGNO (sse_regno)),
3880 GEN_INT (i*8));
3881 sse_regno++;
3882 break;
3883 case X86_64_SSEDF_CLASS:
3884 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3885 gen_rtx_REG (DFmode,
3886 SSE_REGNO (sse_regno)),
3887 GEN_INT (i*8));
3888 sse_regno++;
3889 break;
3890 case X86_64_SSE_CLASS:
3891 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3892 tmpmode = TImode;
3893 else
3894 tmpmode = DImode;
3895 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3896 gen_rtx_REG (tmpmode,
3897 SSE_REGNO (sse_regno)),
3898 GEN_INT (i*8));
3899 if (tmpmode == TImode)
3900 i++;
3901 sse_regno++;
3902 break;
3903 default:
3904 gcc_unreachable ();
3905 }
3906 }
3907
3908 /* Empty aligned struct, union or class. */
3909 if (nexps == 0)
3910 return NULL;
3911
3912 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3913 for (i = 0; i < nexps; i++)
3914 XVECEXP (ret, 0, i) = exp [i];
3915 return ret;
3916 }
3917
3918 /* Update the data in CUM to advance over an argument
3919 of mode MODE and data type TYPE.
3920 (TYPE is null for libcalls where that information may not be available.) */
3921
3922 void
3923 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3924 tree type, int named)
3925 {
3926 int bytes =
3927 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3928 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3929
3930 if (type)
3931 mode = type_natural_mode (type);
3932
3933 if (TARGET_DEBUG_ARG)
3934 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3935 "mode=%s, named=%d)\n\n",
3936 words, cum->words, cum->nregs, cum->sse_nregs,
3937 GET_MODE_NAME (mode), named);
3938
3939 if (TARGET_64BIT)
3940 {
3941 int int_nregs, sse_nregs;
3942 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3943 cum->words += words;
3944 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3945 {
3946 cum->nregs -= int_nregs;
3947 cum->sse_nregs -= sse_nregs;
3948 cum->regno += int_nregs;
3949 cum->sse_regno += sse_nregs;
3950 }
3951 else
3952 cum->words += words;
3953 }
3954 else
3955 {
3956 switch (mode)
3957 {
3958 default:
3959 break;
3960
3961 case BLKmode:
3962 if (bytes < 0)
3963 break;
3964 /* FALLTHRU */
3965
3966 case DImode:
3967 case SImode:
3968 case HImode:
3969 case QImode:
3970 cum->words += words;
3971 cum->nregs -= words;
3972 cum->regno += words;
3973
3974 if (cum->nregs <= 0)
3975 {
3976 cum->nregs = 0;
3977 cum->regno = 0;
3978 }
3979 break;
3980
3981 case DFmode:
3982 if (cum->float_in_sse < 2)
3983 break;
3984 case SFmode:
3985 if (cum->float_in_sse < 1)
3986 break;
3987 /* FALLTHRU */
3988
3989 case TImode:
3990 case V16QImode:
3991 case V8HImode:
3992 case V4SImode:
3993 case V2DImode:
3994 case V4SFmode:
3995 case V2DFmode:
3996 if (!type || !AGGREGATE_TYPE_P (type))
3997 {
3998 cum->sse_words += words;
3999 cum->sse_nregs -= 1;
4000 cum->sse_regno += 1;
4001 if (cum->sse_nregs <= 0)
4002 {
4003 cum->sse_nregs = 0;
4004 cum->sse_regno = 0;
4005 }
4006 }
4007 break;
4008
4009 case V8QImode:
4010 case V4HImode:
4011 case V2SImode:
4012 case V2SFmode:
4013 if (!type || !AGGREGATE_TYPE_P (type))
4014 {
4015 cum->mmx_words += words;
4016 cum->mmx_nregs -= 1;
4017 cum->mmx_regno += 1;
4018 if (cum->mmx_nregs <= 0)
4019 {
4020 cum->mmx_nregs = 0;
4021 cum->mmx_regno = 0;
4022 }
4023 }
4024 break;
4025 }
4026 }
4027 }
4028
4029 /* Define where to put the arguments to a function.
4030 Value is zero to push the argument on the stack,
4031 or a hard register in which to store the argument.
4032
4033 MODE is the argument's machine mode.
4034 TYPE is the data type of the argument (as a tree).
4035 This is null for libcalls where that information may
4036 not be available.
4037 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4038 the preceding args and about the function being called.
4039 NAMED is nonzero if this argument is a named parameter
4040 (otherwise it is an extra parameter matching an ellipsis). */
4041
4042 rtx
4043 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
4044 tree type, int named)
4045 {
4046 enum machine_mode mode = orig_mode;
4047 rtx ret = NULL_RTX;
4048 int bytes =
4049 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4050 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4051 static bool warnedsse, warnedmmx;
4052
4053 /* To simplify the code below, represent vector types with a vector mode
4054 even if MMX/SSE are not active. */
4055 if (type && TREE_CODE (type) == VECTOR_TYPE)
4056 mode = type_natural_mode (type);
4057
4058 /* Handle a hidden AL argument containing number of registers for varargs
4059 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4060 any AL settings. */
4061 if (mode == VOIDmode)
4062 {
4063 if (TARGET_64BIT)
4064 return GEN_INT (cum->maybe_vaarg
4065 ? (cum->sse_nregs < 0
4066 ? SSE_REGPARM_MAX
4067 : cum->sse_regno)
4068 : -1);
4069 else
4070 return constm1_rtx;
4071 }
4072 if (TARGET_64BIT)
4073 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4074 cum->sse_nregs,
4075 &x86_64_int_parameter_registers [cum->regno],
4076 cum->sse_regno);
4077 else
4078 switch (mode)
4079 {
4080 /* For now, pass fp/complex values on the stack. */
4081 default:
4082 break;
4083
4084 case BLKmode:
4085 if (bytes < 0)
4086 break;
4087 /* FALLTHRU */
4088 case DImode:
4089 case SImode:
4090 case HImode:
4091 case QImode:
4092 if (words <= cum->nregs)
4093 {
4094 int regno = cum->regno;
4095
4096 /* Fastcall allocates the first two DWORD (SImode) or
4097 smaller arguments to ECX and EDX. */
4098 if (cum->fastcall)
4099 {
4100 if (mode == BLKmode || mode == DImode)
4101 break;
4102
4103 /* ECX not EAX is the first allocated register. */
4104 if (regno == 0)
4105 regno = 2;
4106 }
4107 ret = gen_rtx_REG (mode, regno);
4108 }
4109 break;
4110 case DFmode:
4111 if (cum->float_in_sse < 2)
4112 break;
4113 case SFmode:
4114 if (cum->float_in_sse < 1)
4115 break;
4116 /* FALLTHRU */
4117 case TImode:
4118 case V16QImode:
4119 case V8HImode:
4120 case V4SImode:
4121 case V2DImode:
4122 case V4SFmode:
4123 case V2DFmode:
4124 if (!type || !AGGREGATE_TYPE_P (type))
4125 {
4126 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4127 {
4128 warnedsse = true;
4129 warning (0, "SSE vector argument without SSE enabled "
4130 "changes the ABI");
4131 }
4132 if (cum->sse_nregs)
4133 ret = gen_reg_or_parallel (mode, orig_mode,
4134 cum->sse_regno + FIRST_SSE_REG);
4135 }
4136 break;
4137 case V8QImode:
4138 case V4HImode:
4139 case V2SImode:
4140 case V2SFmode:
4141 if (!type || !AGGREGATE_TYPE_P (type))
4142 {
4143 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4144 {
4145 warnedmmx = true;
4146 warning (0, "MMX vector argument without MMX enabled "
4147 "changes the ABI");
4148 }
4149 if (cum->mmx_nregs)
4150 ret = gen_reg_or_parallel (mode, orig_mode,
4151 cum->mmx_regno + FIRST_MMX_REG);
4152 }
4153 break;
4154 }
4155
4156 if (TARGET_DEBUG_ARG)
4157 {
4158 fprintf (stderr,
4159 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4160 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4161
4162 if (ret)
4163 print_simple_rtl (stderr, ret);
4164 else
4165 fprintf (stderr, ", stack");
4166
4167 fprintf (stderr, " )\n");
4168 }
4169
4170 return ret;
4171 }
4172
4173 /* A C expression that indicates when an argument must be passed by
4174 reference. If nonzero for an argument, a copy of that argument is
4175 made in memory and a pointer to the argument is passed instead of
4176 the argument itself. The pointer is passed in whatever way is
4177 appropriate for passing a pointer to that type. */
4178
4179 static bool
4180 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4181 enum machine_mode mode ATTRIBUTE_UNUSED,
4182 tree type, bool named ATTRIBUTE_UNUSED)
4183 {
4184 if (!TARGET_64BIT)
4185 return 0;
4186
4187 if (type && int_size_in_bytes (type) == -1)
4188 {
4189 if (TARGET_DEBUG_ARG)
4190 fprintf (stderr, "function_arg_pass_by_reference\n");
4191 return 1;
4192 }
4193
4194 return 0;
4195 }
4196
4197 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4198 ABI. Only called if TARGET_SSE. */
4199 static bool
4200 contains_128bit_aligned_vector_p (tree type)
4201 {
4202 enum machine_mode mode = TYPE_MODE (type);
4203 if (SSE_REG_MODE_P (mode)
4204 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4205 return true;
4206 if (TYPE_ALIGN (type) < 128)
4207 return false;
4208
4209 if (AGGREGATE_TYPE_P (type))
4210 {
4211 /* Walk the aggregates recursively. */
4212 switch (TREE_CODE (type))
4213 {
4214 case RECORD_TYPE:
4215 case UNION_TYPE:
4216 case QUAL_UNION_TYPE:
4217 {
4218 tree field;
4219
4220 /* Walk all the structure fields. */
4221 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4222 {
4223 if (TREE_CODE (field) == FIELD_DECL
4224 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4225 return true;
4226 }
4227 break;
4228 }
4229
4230 case ARRAY_TYPE:
4231 /* Just for use if some languages passes arrays by value. */
4232 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4233 return true;
4234 break;
4235
4236 default:
4237 gcc_unreachable ();
4238 }
4239 }
4240 return false;
4241 }
4242
4243 /* Gives the alignment boundary, in bits, of an argument with the
4244 specified mode and type. */
4245
4246 int
4247 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4248 {
4249 int align;
4250 if (type)
4251 align = TYPE_ALIGN (type);
4252 else
4253 align = GET_MODE_ALIGNMENT (mode);
4254 if (align < PARM_BOUNDARY)
4255 align = PARM_BOUNDARY;
4256 if (!TARGET_64BIT)
4257 {
4258 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4259 make an exception for SSE modes since these require 128bit
4260 alignment.
4261
4262 The handling here differs from field_alignment. ICC aligns MMX
4263 arguments to 4 byte boundaries, while structure fields are aligned
4264 to 8 byte boundaries. */
4265 if (!TARGET_SSE)
4266 align = PARM_BOUNDARY;
4267 else if (!type)
4268 {
4269 if (!SSE_REG_MODE_P (mode))
4270 align = PARM_BOUNDARY;
4271 }
4272 else
4273 {
4274 if (!contains_128bit_aligned_vector_p (type))
4275 align = PARM_BOUNDARY;
4276 }
4277 }
4278 if (align > 128)
4279 align = 128;
4280 return align;
4281 }
4282
4283 /* Return true if N is a possible register number of function value. */
4284 bool
4285 ix86_function_value_regno_p (int regno)
4286 {
4287 if (TARGET_MACHO)
4288 {
4289 if (!TARGET_64BIT)
4290 {
4291 return ((regno) == 0
4292 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4293 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4294 }
4295 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4296 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4297 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4298 }
4299 else
4300 {
4301 if (regno == 0
4302 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4303 || (regno == FIRST_SSE_REG && TARGET_SSE))
4304 return true;
4305
4306 if (!TARGET_64BIT
4307 && (regno == FIRST_MMX_REG && TARGET_MMX))
4308 return true;
4309
4310 return false;
4311 }
4312 }
4313
4314 /* Define how to find the value returned by a function.
4315 VALTYPE is the data type of the value (as a tree).
4316 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4317 otherwise, FUNC is 0. */
4318 rtx
4319 ix86_function_value (tree valtype, tree fntype_or_decl,
4320 bool outgoing ATTRIBUTE_UNUSED)
4321 {
4322 enum machine_mode natmode = type_natural_mode (valtype);
4323
4324 if (TARGET_64BIT)
4325 {
4326 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4327 1, REGPARM_MAX, SSE_REGPARM_MAX,
4328 x86_64_int_return_registers, 0);
4329 /* For zero sized structures, construct_container return NULL, but we
4330 need to keep rest of compiler happy by returning meaningful value. */
4331 if (!ret)
4332 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4333 return ret;
4334 }
4335 else
4336 {
4337 tree fn = NULL_TREE, fntype;
4338 if (fntype_or_decl
4339 && DECL_P (fntype_or_decl))
4340 fn = fntype_or_decl;
4341 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4342 return gen_rtx_REG (TYPE_MODE (valtype),
4343 ix86_value_regno (natmode, fn, fntype));
4344 }
4345 }
4346
4347 /* Return true iff type is returned in memory. */
4348 int
4349 ix86_return_in_memory (tree type)
4350 {
4351 int needed_intregs, needed_sseregs, size;
4352 enum machine_mode mode = type_natural_mode (type);
4353
4354 if (TARGET_64BIT)
4355 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4356
4357 if (mode == BLKmode)
4358 return 1;
4359
4360 size = int_size_in_bytes (type);
4361
4362 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4363 return 0;
4364
4365 if (VECTOR_MODE_P (mode) || mode == TImode)
4366 {
4367 /* User-created vectors small enough to fit in EAX. */
4368 if (size < 8)
4369 return 0;
4370
4371 /* MMX/3dNow values are returned in MM0,
4372 except when it doesn't exits. */
4373 if (size == 8)
4374 return (TARGET_MMX ? 0 : 1);
4375
4376 /* SSE values are returned in XMM0, except when it doesn't exist. */
4377 if (size == 16)
4378 return (TARGET_SSE ? 0 : 1);
4379 }
4380
4381 if (mode == XFmode)
4382 return 0;
4383
4384 if (mode == TDmode)
4385 return 1;
4386
4387 if (size > 12)
4388 return 1;
4389 return 0;
4390 }
4391
4392 /* When returning SSE vector types, we have a choice of either
4393 (1) being abi incompatible with a -march switch, or
4394 (2) generating an error.
4395 Given no good solution, I think the safest thing is one warning.
4396 The user won't be able to use -Werror, but....
4397
4398 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4399 called in response to actually generating a caller or callee that
4400 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4401 via aggregate_value_p for general type probing from tree-ssa. */
4402
4403 static rtx
4404 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4405 {
4406 static bool warnedsse, warnedmmx;
4407
4408 if (type)
4409 {
4410 /* Look at the return type of the function, not the function type. */
4411 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4412
4413 if (!TARGET_SSE && !warnedsse)
4414 {
4415 if (mode == TImode
4416 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4417 {
4418 warnedsse = true;
4419 warning (0, "SSE vector return without SSE enabled "
4420 "changes the ABI");
4421 }
4422 }
4423
4424 if (!TARGET_MMX && !warnedmmx)
4425 {
4426 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4427 {
4428 warnedmmx = true;
4429 warning (0, "MMX vector return without MMX enabled "
4430 "changes the ABI");
4431 }
4432 }
4433 }
4434
4435 return NULL;
4436 }
4437
4438 /* Define how to find the value returned by a library function
4439 assuming the value has mode MODE. */
4440 rtx
4441 ix86_libcall_value (enum machine_mode mode)
4442 {
4443 if (TARGET_64BIT)
4444 {
4445 switch (mode)
4446 {
4447 case SFmode:
4448 case SCmode:
4449 case DFmode:
4450 case DCmode:
4451 case TFmode:
4452 case SDmode:
4453 case DDmode:
4454 case TDmode:
4455 return gen_rtx_REG (mode, FIRST_SSE_REG);
4456 case XFmode:
4457 case XCmode:
4458 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4459 case TCmode:
4460 return NULL;
4461 default:
4462 return gen_rtx_REG (mode, 0);
4463 }
4464 }
4465 else
4466 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4467 }
4468
4469 /* Given a mode, return the register to use for a return value. */
4470
4471 static int
4472 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4473 {
4474 gcc_assert (!TARGET_64BIT);
4475
4476 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4477 we normally prevent this case when mmx is not available. However
4478 some ABIs may require the result to be returned like DImode. */
4479 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4480 return TARGET_MMX ? FIRST_MMX_REG : 0;
4481
4482 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4483 we prevent this case when sse is not available. However some ABIs
4484 may require the result to be returned like integer TImode. */
4485 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4486 return TARGET_SSE ? FIRST_SSE_REG : 0;
4487
4488 /* Decimal floating point values can go in %eax, unlike other float modes. */
4489 if (DECIMAL_FLOAT_MODE_P (mode))
4490 return 0;
4491
4492 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4493 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4494 return 0;
4495
4496 /* Floating point return values in %st(0), except for local functions when
4497 SSE math is enabled or for functions with sseregparm attribute. */
4498 if ((func || fntype)
4499 && (mode == SFmode || mode == DFmode))
4500 {
4501 int sse_level = ix86_function_sseregparm (fntype, func);
4502 if ((sse_level >= 1 && mode == SFmode)
4503 || (sse_level == 2 && mode == DFmode))
4504 return FIRST_SSE_REG;
4505 }
4506
4507 return FIRST_FLOAT_REG;
4508 }
4509 \f
4510 /* Create the va_list data type. */
4511
4512 static tree
4513 ix86_build_builtin_va_list (void)
4514 {
4515 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4516
4517 /* For i386 we use plain pointer to argument area. */
4518 if (!TARGET_64BIT)
4519 return build_pointer_type (char_type_node);
4520
4521 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4522 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4523
4524 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4525 unsigned_type_node);
4526 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4527 unsigned_type_node);
4528 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4529 ptr_type_node);
4530 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4531 ptr_type_node);
4532
4533 va_list_gpr_counter_field = f_gpr;
4534 va_list_fpr_counter_field = f_fpr;
4535
4536 DECL_FIELD_CONTEXT (f_gpr) = record;
4537 DECL_FIELD_CONTEXT (f_fpr) = record;
4538 DECL_FIELD_CONTEXT (f_ovf) = record;
4539 DECL_FIELD_CONTEXT (f_sav) = record;
4540
4541 TREE_CHAIN (record) = type_decl;
4542 TYPE_NAME (record) = type_decl;
4543 TYPE_FIELDS (record) = f_gpr;
4544 TREE_CHAIN (f_gpr) = f_fpr;
4545 TREE_CHAIN (f_fpr) = f_ovf;
4546 TREE_CHAIN (f_ovf) = f_sav;
4547
4548 layout_type (record);
4549
4550 /* The correct type is an array type of one element. */
4551 return build_array_type (record, build_index_type (size_zero_node));
4552 }
4553
4554 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4555
4556 static void
4557 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4558 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4559 int no_rtl)
4560 {
4561 CUMULATIVE_ARGS next_cum;
4562 rtx save_area = NULL_RTX, mem;
4563 rtx label;
4564 rtx label_ref;
4565 rtx tmp_reg;
4566 rtx nsse_reg;
4567 int set;
4568 tree fntype;
4569 int stdarg_p;
4570 int i;
4571
4572 if (!TARGET_64BIT)
4573 return;
4574
4575 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4576 return;
4577
4578 /* Indicate to allocate space on the stack for varargs save area. */
4579 ix86_save_varrargs_registers = 1;
4580
4581 cfun->stack_alignment_needed = 128;
4582
4583 fntype = TREE_TYPE (current_function_decl);
4584 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4585 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4586 != void_type_node));
4587
4588 /* For varargs, we do not want to skip the dummy va_dcl argument.
4589 For stdargs, we do want to skip the last named argument. */
4590 next_cum = *cum;
4591 if (stdarg_p)
4592 function_arg_advance (&next_cum, mode, type, 1);
4593
4594 if (!no_rtl)
4595 save_area = frame_pointer_rtx;
4596
4597 set = get_varargs_alias_set ();
4598
4599 for (i = next_cum.regno;
4600 i < ix86_regparm
4601 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4602 i++)
4603 {
4604 mem = gen_rtx_MEM (Pmode,
4605 plus_constant (save_area, i * UNITS_PER_WORD));
4606 MEM_NOTRAP_P (mem) = 1;
4607 set_mem_alias_set (mem, set);
4608 emit_move_insn (mem, gen_rtx_REG (Pmode,
4609 x86_64_int_parameter_registers[i]));
4610 }
4611
4612 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4613 {
4614 /* Now emit code to save SSE registers. The AX parameter contains number
4615 of SSE parameter registers used to call this function. We use
4616 sse_prologue_save insn template that produces computed jump across
4617 SSE saves. We need some preparation work to get this working. */
4618
4619 label = gen_label_rtx ();
4620 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4621
4622 /* Compute address to jump to :
4623 label - 5*eax + nnamed_sse_arguments*5 */
4624 tmp_reg = gen_reg_rtx (Pmode);
4625 nsse_reg = gen_reg_rtx (Pmode);
4626 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4627 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4628 gen_rtx_MULT (Pmode, nsse_reg,
4629 GEN_INT (4))));
4630 if (next_cum.sse_regno)
4631 emit_move_insn
4632 (nsse_reg,
4633 gen_rtx_CONST (DImode,
4634 gen_rtx_PLUS (DImode,
4635 label_ref,
4636 GEN_INT (next_cum.sse_regno * 4))));
4637 else
4638 emit_move_insn (nsse_reg, label_ref);
4639 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4640
4641 /* Compute address of memory block we save into. We always use pointer
4642 pointing 127 bytes after first byte to store - this is needed to keep
4643 instruction size limited by 4 bytes. */
4644 tmp_reg = gen_reg_rtx (Pmode);
4645 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4646 plus_constant (save_area,
4647 8 * REGPARM_MAX + 127)));
4648 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4649 MEM_NOTRAP_P (mem) = 1;
4650 set_mem_alias_set (mem, set);
4651 set_mem_align (mem, BITS_PER_WORD);
4652
4653 /* And finally do the dirty job! */
4654 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4655 GEN_INT (next_cum.sse_regno), label));
4656 }
4657
4658 }
4659
4660 /* Implement va_start. */
4661
4662 void
4663 ix86_va_start (tree valist, rtx nextarg)
4664 {
4665 HOST_WIDE_INT words, n_gpr, n_fpr;
4666 tree f_gpr, f_fpr, f_ovf, f_sav;
4667 tree gpr, fpr, ovf, sav, t;
4668 tree type;
4669
4670 /* Only 64bit target needs something special. */
4671 if (!TARGET_64BIT)
4672 {
4673 std_expand_builtin_va_start (valist, nextarg);
4674 return;
4675 }
4676
4677 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4678 f_fpr = TREE_CHAIN (f_gpr);
4679 f_ovf = TREE_CHAIN (f_fpr);
4680 f_sav = TREE_CHAIN (f_ovf);
4681
4682 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4683 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4684 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4685 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4686 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4687
4688 /* Count number of gp and fp argument registers used. */
4689 words = current_function_args_info.words;
4690 n_gpr = current_function_args_info.regno;
4691 n_fpr = current_function_args_info.sse_regno;
4692
4693 if (TARGET_DEBUG_ARG)
4694 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4695 (int) words, (int) n_gpr, (int) n_fpr);
4696
4697 if (cfun->va_list_gpr_size)
4698 {
4699 type = TREE_TYPE (gpr);
4700 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4701 build_int_cst (type, n_gpr * 8));
4702 TREE_SIDE_EFFECTS (t) = 1;
4703 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4704 }
4705
4706 if (cfun->va_list_fpr_size)
4707 {
4708 type = TREE_TYPE (fpr);
4709 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4710 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4711 TREE_SIDE_EFFECTS (t) = 1;
4712 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4713 }
4714
4715 /* Find the overflow area. */
4716 type = TREE_TYPE (ovf);
4717 t = make_tree (type, virtual_incoming_args_rtx);
4718 if (words != 0)
4719 t = build2 (PLUS_EXPR, type, t,
4720 build_int_cst (type, words * UNITS_PER_WORD));
4721 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4722 TREE_SIDE_EFFECTS (t) = 1;
4723 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4724
4725 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4726 {
4727 /* Find the register save area.
4728 Prologue of the function save it right above stack frame. */
4729 type = TREE_TYPE (sav);
4730 t = make_tree (type, frame_pointer_rtx);
4731 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4732 TREE_SIDE_EFFECTS (t) = 1;
4733 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4734 }
4735 }
4736
4737 /* Implement va_arg. */
4738
4739 tree
4740 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4741 {
4742 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4743 tree f_gpr, f_fpr, f_ovf, f_sav;
4744 tree gpr, fpr, ovf, sav, t;
4745 int size, rsize;
4746 tree lab_false, lab_over = NULL_TREE;
4747 tree addr, t2;
4748 rtx container;
4749 int indirect_p = 0;
4750 tree ptrtype;
4751 enum machine_mode nat_mode;
4752
4753 /* Only 64bit target needs something special. */
4754 if (!TARGET_64BIT)
4755 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4756
4757 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4758 f_fpr = TREE_CHAIN (f_gpr);
4759 f_ovf = TREE_CHAIN (f_fpr);
4760 f_sav = TREE_CHAIN (f_ovf);
4761
4762 valist = build_va_arg_indirect_ref (valist);
4763 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4764 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4765 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4766 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4767
4768 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4769 if (indirect_p)
4770 type = build_pointer_type (type);
4771 size = int_size_in_bytes (type);
4772 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4773
4774 nat_mode = type_natural_mode (type);
4775 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4776 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4777
4778 /* Pull the value out of the saved registers. */
4779
4780 addr = create_tmp_var (ptr_type_node, "addr");
4781 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4782
4783 if (container)
4784 {
4785 int needed_intregs, needed_sseregs;
4786 bool need_temp;
4787 tree int_addr, sse_addr;
4788
4789 lab_false = create_artificial_label ();
4790 lab_over = create_artificial_label ();
4791
4792 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4793
4794 need_temp = (!REG_P (container)
4795 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4796 || TYPE_ALIGN (type) > 128));
4797
4798 /* In case we are passing structure, verify that it is consecutive block
4799 on the register save area. If not we need to do moves. */
4800 if (!need_temp && !REG_P (container))
4801 {
4802 /* Verify that all registers are strictly consecutive */
4803 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4804 {
4805 int i;
4806
4807 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4808 {
4809 rtx slot = XVECEXP (container, 0, i);
4810 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4811 || INTVAL (XEXP (slot, 1)) != i * 16)
4812 need_temp = 1;
4813 }
4814 }
4815 else
4816 {
4817 int i;
4818
4819 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4820 {
4821 rtx slot = XVECEXP (container, 0, i);
4822 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4823 || INTVAL (XEXP (slot, 1)) != i * 8)
4824 need_temp = 1;
4825 }
4826 }
4827 }
4828 if (!need_temp)
4829 {
4830 int_addr = addr;
4831 sse_addr = addr;
4832 }
4833 else
4834 {
4835 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4836 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4837 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4838 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4839 }
4840
4841 /* First ensure that we fit completely in registers. */
4842 if (needed_intregs)
4843 {
4844 t = build_int_cst (TREE_TYPE (gpr),
4845 (REGPARM_MAX - needed_intregs + 1) * 8);
4846 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4847 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4848 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4849 gimplify_and_add (t, pre_p);
4850 }
4851 if (needed_sseregs)
4852 {
4853 t = build_int_cst (TREE_TYPE (fpr),
4854 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4855 + REGPARM_MAX * 8);
4856 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4857 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4858 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4859 gimplify_and_add (t, pre_p);
4860 }
4861
4862 /* Compute index to start of area used for integer regs. */
4863 if (needed_intregs)
4864 {
4865 /* int_addr = gpr + sav; */
4866 t = fold_convert (ptr_type_node, gpr);
4867 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4868 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4869 gimplify_and_add (t, pre_p);
4870 }
4871 if (needed_sseregs)
4872 {
4873 /* sse_addr = fpr + sav; */
4874 t = fold_convert (ptr_type_node, fpr);
4875 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4876 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4877 gimplify_and_add (t, pre_p);
4878 }
4879 if (need_temp)
4880 {
4881 int i;
4882 tree temp = create_tmp_var (type, "va_arg_tmp");
4883
4884 /* addr = &temp; */
4885 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4886 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4887 gimplify_and_add (t, pre_p);
4888
4889 for (i = 0; i < XVECLEN (container, 0); i++)
4890 {
4891 rtx slot = XVECEXP (container, 0, i);
4892 rtx reg = XEXP (slot, 0);
4893 enum machine_mode mode = GET_MODE (reg);
4894 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4895 tree addr_type = build_pointer_type (piece_type);
4896 tree src_addr, src;
4897 int src_offset;
4898 tree dest_addr, dest;
4899
4900 if (SSE_REGNO_P (REGNO (reg)))
4901 {
4902 src_addr = sse_addr;
4903 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4904 }
4905 else
4906 {
4907 src_addr = int_addr;
4908 src_offset = REGNO (reg) * 8;
4909 }
4910 src_addr = fold_convert (addr_type, src_addr);
4911 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4912 size_int (src_offset));
4913 src = build_va_arg_indirect_ref (src_addr);
4914
4915 dest_addr = fold_convert (addr_type, addr);
4916 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4917 size_int (INTVAL (XEXP (slot, 1))));
4918 dest = build_va_arg_indirect_ref (dest_addr);
4919
4920 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4921 gimplify_and_add (t, pre_p);
4922 }
4923 }
4924
4925 if (needed_intregs)
4926 {
4927 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4928 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4929 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4930 gimplify_and_add (t, pre_p);
4931 }
4932 if (needed_sseregs)
4933 {
4934 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4935 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4936 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4937 gimplify_and_add (t, pre_p);
4938 }
4939
4940 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4941 gimplify_and_add (t, pre_p);
4942
4943 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4944 append_to_statement_list (t, pre_p);
4945 }
4946
4947 /* ... otherwise out of the overflow area. */
4948
4949 /* Care for on-stack alignment if needed. */
4950 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4951 || integer_zerop (TYPE_SIZE (type)))
4952 t = ovf;
4953 else
4954 {
4955 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4956 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4957 build_int_cst (TREE_TYPE (ovf), align - 1));
4958 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4959 build_int_cst (TREE_TYPE (t), -align));
4960 }
4961 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4962
4963 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4964 gimplify_and_add (t2, pre_p);
4965
4966 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4967 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4968 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4969 gimplify_and_add (t, pre_p);
4970
4971 if (container)
4972 {
4973 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4974 append_to_statement_list (t, pre_p);
4975 }
4976
4977 ptrtype = build_pointer_type (type);
4978 addr = fold_convert (ptrtype, addr);
4979
4980 if (indirect_p)
4981 addr = build_va_arg_indirect_ref (addr);
4982 return build_va_arg_indirect_ref (addr);
4983 }
4984 \f
4985 /* Return nonzero if OPNUM's MEM should be matched
4986 in movabs* patterns. */
4987
4988 int
4989 ix86_check_movabs (rtx insn, int opnum)
4990 {
4991 rtx set, mem;
4992
4993 set = PATTERN (insn);
4994 if (GET_CODE (set) == PARALLEL)
4995 set = XVECEXP (set, 0, 0);
4996 gcc_assert (GET_CODE (set) == SET);
4997 mem = XEXP (set, opnum);
4998 while (GET_CODE (mem) == SUBREG)
4999 mem = SUBREG_REG (mem);
5000 gcc_assert (MEM_P (mem));
5001 return (volatile_ok || !MEM_VOLATILE_P (mem));
5002 }
5003 \f
5004 /* Initialize the table of extra 80387 mathematical constants. */
5005
5006 static void
5007 init_ext_80387_constants (void)
5008 {
5009 static const char * cst[5] =
5010 {
5011 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5012 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5013 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5014 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5015 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5016 };
5017 int i;
5018
5019 for (i = 0; i < 5; i++)
5020 {
5021 real_from_string (&ext_80387_constants_table[i], cst[i]);
5022 /* Ensure each constant is rounded to XFmode precision. */
5023 real_convert (&ext_80387_constants_table[i],
5024 XFmode, &ext_80387_constants_table[i]);
5025 }
5026
5027 ext_80387_constants_init = 1;
5028 }
5029
5030 /* Return true if the constant is something that can be loaded with
5031 a special instruction. */
5032
5033 int
5034 standard_80387_constant_p (rtx x)
5035 {
5036 REAL_VALUE_TYPE r;
5037
5038 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
5039 return -1;
5040
5041 if (x == CONST0_RTX (GET_MODE (x)))
5042 return 1;
5043 if (x == CONST1_RTX (GET_MODE (x)))
5044 return 2;
5045
5046 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5047
5048 /* For XFmode constants, try to find a special 80387 instruction when
5049 optimizing for size or on those CPUs that benefit from them. */
5050 if (GET_MODE (x) == XFmode
5051 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5052 {
5053 int i;
5054
5055 if (! ext_80387_constants_init)
5056 init_ext_80387_constants ();
5057
5058 for (i = 0; i < 5; i++)
5059 if (real_identical (&r, &ext_80387_constants_table[i]))
5060 return i + 3;
5061 }
5062
5063 /* Load of the constant -0.0 or -1.0 will be split as
5064 fldz;fchs or fld1;fchs sequence. */
5065 if (real_isnegzero (&r))
5066 return 8;
5067 if (real_identical (&r, &dconstm1))
5068 return 9;
5069
5070 return 0;
5071 }
5072
5073 /* Return the opcode of the special instruction to be used to load
5074 the constant X. */
5075
5076 const char *
5077 standard_80387_constant_opcode (rtx x)
5078 {
5079 switch (standard_80387_constant_p (x))
5080 {
5081 case 1:
5082 return "fldz";
5083 case 2:
5084 return "fld1";
5085 case 3:
5086 return "fldlg2";
5087 case 4:
5088 return "fldln2";
5089 case 5:
5090 return "fldl2e";
5091 case 6:
5092 return "fldl2t";
5093 case 7:
5094 return "fldpi";
5095 case 8:
5096 case 9:
5097 return "#";
5098 default:
5099 gcc_unreachable ();
5100 }
5101 }
5102
5103 /* Return the CONST_DOUBLE representing the 80387 constant that is
5104 loaded by the specified special instruction. The argument IDX
5105 matches the return value from standard_80387_constant_p. */
5106
5107 rtx
5108 standard_80387_constant_rtx (int idx)
5109 {
5110 int i;
5111
5112 if (! ext_80387_constants_init)
5113 init_ext_80387_constants ();
5114
5115 switch (idx)
5116 {
5117 case 3:
5118 case 4:
5119 case 5:
5120 case 6:
5121 case 7:
5122 i = idx - 3;
5123 break;
5124
5125 default:
5126 gcc_unreachable ();
5127 }
5128
5129 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5130 XFmode);
5131 }
5132
5133 /* Return 1 if mode is a valid mode for sse. */
5134 static int
5135 standard_sse_mode_p (enum machine_mode mode)
5136 {
5137 switch (mode)
5138 {
5139 case V16QImode:
5140 case V8HImode:
5141 case V4SImode:
5142 case V2DImode:
5143 case V4SFmode:
5144 case V2DFmode:
5145 return 1;
5146
5147 default:
5148 return 0;
5149 }
5150 }
5151
5152 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5153 */
5154 int
5155 standard_sse_constant_p (rtx x)
5156 {
5157 enum machine_mode mode = GET_MODE (x);
5158
5159 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5160 return 1;
5161 if (vector_all_ones_operand (x, mode)
5162 && standard_sse_mode_p (mode))
5163 return TARGET_SSE2 ? 2 : -1;
5164
5165 return 0;
5166 }
5167
5168 /* Return the opcode of the special instruction to be used to load
5169 the constant X. */
5170
5171 const char *
5172 standard_sse_constant_opcode (rtx insn, rtx x)
5173 {
5174 switch (standard_sse_constant_p (x))
5175 {
5176 case 1:
5177 if (get_attr_mode (insn) == MODE_V4SF)
5178 return "xorps\t%0, %0";
5179 else if (get_attr_mode (insn) == MODE_V2DF)
5180 return "xorpd\t%0, %0";
5181 else
5182 return "pxor\t%0, %0";
5183 case 2:
5184 return "pcmpeqd\t%0, %0";
5185 }
5186 gcc_unreachable ();
5187 }
5188
5189 /* Returns 1 if OP contains a symbol reference */
5190
5191 int
5192 symbolic_reference_mentioned_p (rtx op)
5193 {
5194 const char *fmt;
5195 int i;
5196
5197 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5198 return 1;
5199
5200 fmt = GET_RTX_FORMAT (GET_CODE (op));
5201 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5202 {
5203 if (fmt[i] == 'E')
5204 {
5205 int j;
5206
5207 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5208 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5209 return 1;
5210 }
5211
5212 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5213 return 1;
5214 }
5215
5216 return 0;
5217 }
5218
5219 /* Return 1 if it is appropriate to emit `ret' instructions in the
5220 body of a function. Do this only if the epilogue is simple, needing a
5221 couple of insns. Prior to reloading, we can't tell how many registers
5222 must be saved, so return 0 then. Return 0 if there is no frame
5223 marker to de-allocate. */
5224
5225 int
5226 ix86_can_use_return_insn_p (void)
5227 {
5228 struct ix86_frame frame;
5229
5230 if (! reload_completed || frame_pointer_needed)
5231 return 0;
5232
5233 /* Don't allow more than 32 pop, since that's all we can do
5234 with one instruction. */
5235 if (current_function_pops_args
5236 && current_function_args_size >= 32768)
5237 return 0;
5238
5239 ix86_compute_frame_layout (&frame);
5240 return frame.to_allocate == 0 && frame.nregs == 0;
5241 }
5242 \f
5243 /* Value should be nonzero if functions must have frame pointers.
5244 Zero means the frame pointer need not be set up (and parms may
5245 be accessed via the stack pointer) in functions that seem suitable. */
5246
5247 int
5248 ix86_frame_pointer_required (void)
5249 {
5250 /* If we accessed previous frames, then the generated code expects
5251 to be able to access the saved ebp value in our frame. */
5252 if (cfun->machine->accesses_prev_frame)
5253 return 1;
5254
5255 /* Several x86 os'es need a frame pointer for other reasons,
5256 usually pertaining to setjmp. */
5257 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5258 return 1;
5259
5260 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5261 the frame pointer by default. Turn it back on now if we've not
5262 got a leaf function. */
5263 if (TARGET_OMIT_LEAF_FRAME_POINTER
5264 && (!current_function_is_leaf
5265 || ix86_current_function_calls_tls_descriptor))
5266 return 1;
5267
5268 if (current_function_profile)
5269 return 1;
5270
5271 return 0;
5272 }
5273
5274 /* Record that the current function accesses previous call frames. */
5275
5276 void
5277 ix86_setup_frame_addresses (void)
5278 {
5279 cfun->machine->accesses_prev_frame = 1;
5280 }
5281 \f
5282 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5283 # define USE_HIDDEN_LINKONCE 1
5284 #else
5285 # define USE_HIDDEN_LINKONCE 0
5286 #endif
5287
5288 static int pic_labels_used;
5289
5290 /* Fills in the label name that should be used for a pc thunk for
5291 the given register. */
5292
5293 static void
5294 get_pc_thunk_name (char name[32], unsigned int regno)
5295 {
5296 gcc_assert (!TARGET_64BIT);
5297
5298 if (USE_HIDDEN_LINKONCE)
5299 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5300 else
5301 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5302 }
5303
5304
5305 /* This function generates code for -fpic that loads %ebx with
5306 the return address of the caller and then returns. */
5307
5308 void
5309 ix86_file_end (void)
5310 {
5311 rtx xops[2];
5312 int regno;
5313
5314 for (regno = 0; regno < 8; ++regno)
5315 {
5316 char name[32];
5317
5318 if (! ((pic_labels_used >> regno) & 1))
5319 continue;
5320
5321 get_pc_thunk_name (name, regno);
5322
5323 #if TARGET_MACHO
5324 if (TARGET_MACHO)
5325 {
5326 switch_to_section (darwin_sections[text_coal_section]);
5327 fputs ("\t.weak_definition\t", asm_out_file);
5328 assemble_name (asm_out_file, name);
5329 fputs ("\n\t.private_extern\t", asm_out_file);
5330 assemble_name (asm_out_file, name);
5331 fputs ("\n", asm_out_file);
5332 ASM_OUTPUT_LABEL (asm_out_file, name);
5333 }
5334 else
5335 #endif
5336 if (USE_HIDDEN_LINKONCE)
5337 {
5338 tree decl;
5339
5340 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5341 error_mark_node);
5342 TREE_PUBLIC (decl) = 1;
5343 TREE_STATIC (decl) = 1;
5344 DECL_ONE_ONLY (decl) = 1;
5345
5346 (*targetm.asm_out.unique_section) (decl, 0);
5347 switch_to_section (get_named_section (decl, NULL, 0));
5348
5349 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5350 fputs ("\t.hidden\t", asm_out_file);
5351 assemble_name (asm_out_file, name);
5352 fputc ('\n', asm_out_file);
5353 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5354 }
5355 else
5356 {
5357 switch_to_section (text_section);
5358 ASM_OUTPUT_LABEL (asm_out_file, name);
5359 }
5360
5361 xops[0] = gen_rtx_REG (SImode, regno);
5362 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5363 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5364 output_asm_insn ("ret", xops);
5365 }
5366
5367 if (NEED_INDICATE_EXEC_STACK)
5368 file_end_indicate_exec_stack ();
5369 }
5370
5371 /* Emit code for the SET_GOT patterns. */
5372
5373 const char *
5374 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5375 {
5376 rtx xops[3];
5377
5378 xops[0] = dest;
5379
5380 if (TARGET_VXWORKS_RTP && flag_pic)
5381 {
5382 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5383 xops[2] = gen_rtx_MEM (Pmode,
5384 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5385 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5386
5387 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5388 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5389 an unadorned address. */
5390 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5391 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5392 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5393 return "";
5394 }
5395
5396 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5397
5398 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5399 {
5400 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5401
5402 if (!flag_pic)
5403 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5404 else
5405 output_asm_insn ("call\t%a2", xops);
5406
5407 #if TARGET_MACHO
5408 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5409 is what will be referenced by the Mach-O PIC subsystem. */
5410 if (!label)
5411 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5412 #endif
5413
5414 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5415 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5416
5417 if (flag_pic)
5418 output_asm_insn ("pop{l}\t%0", xops);
5419 }
5420 else
5421 {
5422 char name[32];
5423 get_pc_thunk_name (name, REGNO (dest));
5424 pic_labels_used |= 1 << REGNO (dest);
5425
5426 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5427 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5428 output_asm_insn ("call\t%X2", xops);
5429 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5430 is what will be referenced by the Mach-O PIC subsystem. */
5431 #if TARGET_MACHO
5432 if (!label)
5433 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5434 else
5435 targetm.asm_out.internal_label (asm_out_file, "L",
5436 CODE_LABEL_NUMBER (label));
5437 #endif
5438 }
5439
5440 if (TARGET_MACHO)
5441 return "";
5442
5443 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5444 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5445 else
5446 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5447
5448 return "";
5449 }
5450
5451 /* Generate an "push" pattern for input ARG. */
5452
5453 static rtx
5454 gen_push (rtx arg)
5455 {
5456 return gen_rtx_SET (VOIDmode,
5457 gen_rtx_MEM (Pmode,
5458 gen_rtx_PRE_DEC (Pmode,
5459 stack_pointer_rtx)),
5460 arg);
5461 }
5462
5463 /* Return >= 0 if there is an unused call-clobbered register available
5464 for the entire function. */
5465
5466 static unsigned int
5467 ix86_select_alt_pic_regnum (void)
5468 {
5469 if (current_function_is_leaf && !current_function_profile
5470 && !ix86_current_function_calls_tls_descriptor)
5471 {
5472 int i;
5473 for (i = 2; i >= 0; --i)
5474 if (!regs_ever_live[i])
5475 return i;
5476 }
5477
5478 return INVALID_REGNUM;
5479 }
5480
5481 /* Return 1 if we need to save REGNO. */
5482 static int
5483 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5484 {
5485 if (pic_offset_table_rtx
5486 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5487 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5488 || current_function_profile
5489 || current_function_calls_eh_return
5490 || current_function_uses_const_pool))
5491 {
5492 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5493 return 0;
5494 return 1;
5495 }
5496
5497 if (current_function_calls_eh_return && maybe_eh_return)
5498 {
5499 unsigned i;
5500 for (i = 0; ; i++)
5501 {
5502 unsigned test = EH_RETURN_DATA_REGNO (i);
5503 if (test == INVALID_REGNUM)
5504 break;
5505 if (test == regno)
5506 return 1;
5507 }
5508 }
5509
5510 if (cfun->machine->force_align_arg_pointer
5511 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5512 return 1;
5513
5514 return (regs_ever_live[regno]
5515 && !call_used_regs[regno]
5516 && !fixed_regs[regno]
5517 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5518 }
5519
5520 /* Return number of registers to be saved on the stack. */
5521
5522 static int
5523 ix86_nsaved_regs (void)
5524 {
5525 int nregs = 0;
5526 int regno;
5527
5528 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5529 if (ix86_save_reg (regno, true))
5530 nregs++;
5531 return nregs;
5532 }
5533
5534 /* Return the offset between two registers, one to be eliminated, and the other
5535 its replacement, at the start of a routine. */
5536
5537 HOST_WIDE_INT
5538 ix86_initial_elimination_offset (int from, int to)
5539 {
5540 struct ix86_frame frame;
5541 ix86_compute_frame_layout (&frame);
5542
5543 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5544 return frame.hard_frame_pointer_offset;
5545 else if (from == FRAME_POINTER_REGNUM
5546 && to == HARD_FRAME_POINTER_REGNUM)
5547 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5548 else
5549 {
5550 gcc_assert (to == STACK_POINTER_REGNUM);
5551
5552 if (from == ARG_POINTER_REGNUM)
5553 return frame.stack_pointer_offset;
5554
5555 gcc_assert (from == FRAME_POINTER_REGNUM);
5556 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5557 }
5558 }
5559
5560 /* Fill structure ix86_frame about frame of currently computed function. */
5561
5562 static void
5563 ix86_compute_frame_layout (struct ix86_frame *frame)
5564 {
5565 HOST_WIDE_INT total_size;
5566 unsigned int stack_alignment_needed;
5567 HOST_WIDE_INT offset;
5568 unsigned int preferred_alignment;
5569 HOST_WIDE_INT size = get_frame_size ();
5570
5571 frame->nregs = ix86_nsaved_regs ();
5572 total_size = size;
5573
5574 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5575 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5576
5577 /* During reload iteration the amount of registers saved can change.
5578 Recompute the value as needed. Do not recompute when amount of registers
5579 didn't change as reload does multiple calls to the function and does not
5580 expect the decision to change within single iteration. */
5581 if (!optimize_size
5582 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5583 {
5584 int count = frame->nregs;
5585
5586 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5587 /* The fast prologue uses move instead of push to save registers. This
5588 is significantly longer, but also executes faster as modern hardware
5589 can execute the moves in parallel, but can't do that for push/pop.
5590
5591 Be careful about choosing what prologue to emit: When function takes
5592 many instructions to execute we may use slow version as well as in
5593 case function is known to be outside hot spot (this is known with
5594 feedback only). Weight the size of function by number of registers
5595 to save as it is cheap to use one or two push instructions but very
5596 slow to use many of them. */
5597 if (count)
5598 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5599 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5600 || (flag_branch_probabilities
5601 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5602 cfun->machine->use_fast_prologue_epilogue = false;
5603 else
5604 cfun->machine->use_fast_prologue_epilogue
5605 = !expensive_function_p (count);
5606 }
5607 if (TARGET_PROLOGUE_USING_MOVE
5608 && cfun->machine->use_fast_prologue_epilogue)
5609 frame->save_regs_using_mov = true;
5610 else
5611 frame->save_regs_using_mov = false;
5612
5613
5614 /* Skip return address and saved base pointer. */
5615 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5616
5617 frame->hard_frame_pointer_offset = offset;
5618
5619 /* Do some sanity checking of stack_alignment_needed and
5620 preferred_alignment, since i386 port is the only using those features
5621 that may break easily. */
5622
5623 gcc_assert (!size || stack_alignment_needed);
5624 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5625 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5626 gcc_assert (stack_alignment_needed
5627 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5628
5629 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5630 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5631
5632 /* Register save area */
5633 offset += frame->nregs * UNITS_PER_WORD;
5634
5635 /* Va-arg area */
5636 if (ix86_save_varrargs_registers)
5637 {
5638 offset += X86_64_VARARGS_SIZE;
5639 frame->va_arg_size = X86_64_VARARGS_SIZE;
5640 }
5641 else
5642 frame->va_arg_size = 0;
5643
5644 /* Align start of frame for local function. */
5645 frame->padding1 = ((offset + stack_alignment_needed - 1)
5646 & -stack_alignment_needed) - offset;
5647
5648 offset += frame->padding1;
5649
5650 /* Frame pointer points here. */
5651 frame->frame_pointer_offset = offset;
5652
5653 offset += size;
5654
5655 /* Add outgoing arguments area. Can be skipped if we eliminated
5656 all the function calls as dead code.
5657 Skipping is however impossible when function calls alloca. Alloca
5658 expander assumes that last current_function_outgoing_args_size
5659 of stack frame are unused. */
5660 if (ACCUMULATE_OUTGOING_ARGS
5661 && (!current_function_is_leaf || current_function_calls_alloca
5662 || ix86_current_function_calls_tls_descriptor))
5663 {
5664 offset += current_function_outgoing_args_size;
5665 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5666 }
5667 else
5668 frame->outgoing_arguments_size = 0;
5669
5670 /* Align stack boundary. Only needed if we're calling another function
5671 or using alloca. */
5672 if (!current_function_is_leaf || current_function_calls_alloca
5673 || ix86_current_function_calls_tls_descriptor)
5674 frame->padding2 = ((offset + preferred_alignment - 1)
5675 & -preferred_alignment) - offset;
5676 else
5677 frame->padding2 = 0;
5678
5679 offset += frame->padding2;
5680
5681 /* We've reached end of stack frame. */
5682 frame->stack_pointer_offset = offset;
5683
5684 /* Size prologue needs to allocate. */
5685 frame->to_allocate =
5686 (size + frame->padding1 + frame->padding2
5687 + frame->outgoing_arguments_size + frame->va_arg_size);
5688
5689 if ((!frame->to_allocate && frame->nregs <= 1)
5690 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5691 frame->save_regs_using_mov = false;
5692
5693 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5694 && current_function_is_leaf
5695 && !ix86_current_function_calls_tls_descriptor)
5696 {
5697 frame->red_zone_size = frame->to_allocate;
5698 if (frame->save_regs_using_mov)
5699 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5700 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5701 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5702 }
5703 else
5704 frame->red_zone_size = 0;
5705 frame->to_allocate -= frame->red_zone_size;
5706 frame->stack_pointer_offset -= frame->red_zone_size;
5707 #if 0
5708 fprintf (stderr, "\n");
5709 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5710 fprintf (stderr, "size: %ld\n", (long)size);
5711 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5712 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5713 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5714 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5715 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5716 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5717 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5718 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5719 (long)frame->hard_frame_pointer_offset);
5720 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5721 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5722 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5723 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5724 #endif
5725 }
5726
5727 /* Emit code to save registers in the prologue. */
5728
5729 static void
5730 ix86_emit_save_regs (void)
5731 {
5732 unsigned int regno;
5733 rtx insn;
5734
5735 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5736 if (ix86_save_reg (regno, true))
5737 {
5738 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5739 RTX_FRAME_RELATED_P (insn) = 1;
5740 }
5741 }
5742
5743 /* Emit code to save registers using MOV insns. First register
5744 is restored from POINTER + OFFSET. */
5745 static void
5746 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5747 {
5748 unsigned int regno;
5749 rtx insn;
5750
5751 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5752 if (ix86_save_reg (regno, true))
5753 {
5754 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5755 Pmode, offset),
5756 gen_rtx_REG (Pmode, regno));
5757 RTX_FRAME_RELATED_P (insn) = 1;
5758 offset += UNITS_PER_WORD;
5759 }
5760 }
5761
5762 /* Expand prologue or epilogue stack adjustment.
5763 The pattern exist to put a dependency on all ebp-based memory accesses.
5764 STYLE should be negative if instructions should be marked as frame related,
5765 zero if %r11 register is live and cannot be freely used and positive
5766 otherwise. */
5767
5768 static void
5769 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5770 {
5771 rtx insn;
5772
5773 if (! TARGET_64BIT)
5774 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5775 else if (x86_64_immediate_operand (offset, DImode))
5776 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5777 else
5778 {
5779 rtx r11;
5780 /* r11 is used by indirect sibcall return as well, set before the
5781 epilogue and used after the epilogue. ATM indirect sibcall
5782 shouldn't be used together with huge frame sizes in one
5783 function because of the frame_size check in sibcall.c. */
5784 gcc_assert (style);
5785 r11 = gen_rtx_REG (DImode, R11_REG);
5786 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5787 if (style < 0)
5788 RTX_FRAME_RELATED_P (insn) = 1;
5789 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5790 offset));
5791 }
5792 if (style < 0)
5793 RTX_FRAME_RELATED_P (insn) = 1;
5794 }
5795
5796 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5797
5798 static rtx
5799 ix86_internal_arg_pointer (void)
5800 {
5801 bool has_force_align_arg_pointer =
5802 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5803 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5804 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5805 && DECL_NAME (current_function_decl)
5806 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5807 && DECL_FILE_SCOPE_P (current_function_decl))
5808 || ix86_force_align_arg_pointer
5809 || has_force_align_arg_pointer)
5810 {
5811 /* Nested functions can't realign the stack due to a register
5812 conflict. */
5813 if (DECL_CONTEXT (current_function_decl)
5814 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5815 {
5816 if (ix86_force_align_arg_pointer)
5817 warning (0, "-mstackrealign ignored for nested functions");
5818 if (has_force_align_arg_pointer)
5819 error ("%s not supported for nested functions",
5820 ix86_force_align_arg_pointer_string);
5821 return virtual_incoming_args_rtx;
5822 }
5823 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5824 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5825 }
5826 else
5827 return virtual_incoming_args_rtx;
5828 }
5829
5830 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5831 This is called from dwarf2out.c to emit call frame instructions
5832 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5833 static void
5834 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5835 {
5836 rtx unspec = SET_SRC (pattern);
5837 gcc_assert (GET_CODE (unspec) == UNSPEC);
5838
5839 switch (index)
5840 {
5841 case UNSPEC_REG_SAVE:
5842 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5843 SET_DEST (pattern));
5844 break;
5845 case UNSPEC_DEF_CFA:
5846 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5847 INTVAL (XVECEXP (unspec, 0, 0)));
5848 break;
5849 default:
5850 gcc_unreachable ();
5851 }
5852 }
5853
5854 /* Expand the prologue into a bunch of separate insns. */
5855
5856 void
5857 ix86_expand_prologue (void)
5858 {
5859 rtx insn;
5860 bool pic_reg_used;
5861 struct ix86_frame frame;
5862 HOST_WIDE_INT allocate;
5863
5864 ix86_compute_frame_layout (&frame);
5865
5866 if (cfun->machine->force_align_arg_pointer)
5867 {
5868 rtx x, y;
5869
5870 /* Grab the argument pointer. */
5871 x = plus_constant (stack_pointer_rtx, 4);
5872 y = cfun->machine->force_align_arg_pointer;
5873 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5874 RTX_FRAME_RELATED_P (insn) = 1;
5875
5876 /* The unwind info consists of two parts: install the fafp as the cfa,
5877 and record the fafp as the "save register" of the stack pointer.
5878 The later is there in order that the unwinder can see where it
5879 should restore the stack pointer across the and insn. */
5880 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5881 x = gen_rtx_SET (VOIDmode, y, x);
5882 RTX_FRAME_RELATED_P (x) = 1;
5883 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5884 UNSPEC_REG_SAVE);
5885 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5886 RTX_FRAME_RELATED_P (y) = 1;
5887 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5888 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5889 REG_NOTES (insn) = x;
5890
5891 /* Align the stack. */
5892 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5893 GEN_INT (-16)));
5894
5895 /* And here we cheat like madmen with the unwind info. We force the
5896 cfa register back to sp+4, which is exactly what it was at the
5897 start of the function. Re-pushing the return address results in
5898 the return at the same spot relative to the cfa, and thus is
5899 correct wrt the unwind info. */
5900 x = cfun->machine->force_align_arg_pointer;
5901 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5902 insn = emit_insn (gen_push (x));
5903 RTX_FRAME_RELATED_P (insn) = 1;
5904
5905 x = GEN_INT (4);
5906 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5907 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5908 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5909 REG_NOTES (insn) = x;
5910 }
5911
5912 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5913 slower on all targets. Also sdb doesn't like it. */
5914
5915 if (frame_pointer_needed)
5916 {
5917 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5918 RTX_FRAME_RELATED_P (insn) = 1;
5919
5920 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5921 RTX_FRAME_RELATED_P (insn) = 1;
5922 }
5923
5924 allocate = frame.to_allocate;
5925
5926 if (!frame.save_regs_using_mov)
5927 ix86_emit_save_regs ();
5928 else
5929 allocate += frame.nregs * UNITS_PER_WORD;
5930
5931 /* When using red zone we may start register saving before allocating
5932 the stack frame saving one cycle of the prologue. */
5933 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5934 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5935 : stack_pointer_rtx,
5936 -frame.nregs * UNITS_PER_WORD);
5937
5938 if (allocate == 0)
5939 ;
5940 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5941 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5942 GEN_INT (-allocate), -1);
5943 else
5944 {
5945 /* Only valid for Win32. */
5946 rtx eax = gen_rtx_REG (SImode, 0);
5947 bool eax_live = ix86_eax_live_at_start_p ();
5948 rtx t;
5949
5950 gcc_assert (!TARGET_64BIT);
5951
5952 if (eax_live)
5953 {
5954 emit_insn (gen_push (eax));
5955 allocate -= 4;
5956 }
5957
5958 emit_move_insn (eax, GEN_INT (allocate));
5959
5960 insn = emit_insn (gen_allocate_stack_worker (eax));
5961 RTX_FRAME_RELATED_P (insn) = 1;
5962 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5963 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5964 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5965 t, REG_NOTES (insn));
5966
5967 if (eax_live)
5968 {
5969 if (frame_pointer_needed)
5970 t = plus_constant (hard_frame_pointer_rtx,
5971 allocate
5972 - frame.to_allocate
5973 - frame.nregs * UNITS_PER_WORD);
5974 else
5975 t = plus_constant (stack_pointer_rtx, allocate);
5976 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5977 }
5978 }
5979
5980 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5981 {
5982 if (!frame_pointer_needed || !frame.to_allocate)
5983 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5984 else
5985 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5986 -frame.nregs * UNITS_PER_WORD);
5987 }
5988
5989 pic_reg_used = false;
5990 if (pic_offset_table_rtx
5991 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5992 || current_function_profile))
5993 {
5994 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5995
5996 if (alt_pic_reg_used != INVALID_REGNUM)
5997 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5998
5999 pic_reg_used = true;
6000 }
6001
6002 if (pic_reg_used)
6003 {
6004 if (TARGET_64BIT)
6005 {
6006 if (ix86_cmodel == CM_LARGE_PIC)
6007 {
6008 rtx tmp_reg = gen_rtx_REG (DImode,
6009 FIRST_REX_INT_REG + 3 /* R11 */);
6010 rtx label = gen_label_rtx ();
6011 emit_label (label);
6012 LABEL_PRESERVE_P (label) = 1;
6013 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6014 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6015 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6016 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6017 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6018 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6019 pic_offset_table_rtx, tmp_reg));
6020 }
6021 else
6022 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6023 }
6024 else
6025 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6026
6027 /* Even with accurate pre-reload life analysis, we can wind up
6028 deleting all references to the pic register after reload.
6029 Consider if cross-jumping unifies two sides of a branch
6030 controlled by a comparison vs the only read from a global.
6031 In which case, allow the set_got to be deleted, though we're
6032 too late to do anything about the ebx save in the prologue. */
6033 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6034 }
6035
6036 /* Prevent function calls from be scheduled before the call to mcount.
6037 In the pic_reg_used case, make sure that the got load isn't deleted. */
6038 if (current_function_profile)
6039 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6040 }
6041
6042 /* Emit code to restore saved registers using MOV insns. First register
6043 is restored from POINTER + OFFSET. */
6044 static void
6045 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6046 int maybe_eh_return)
6047 {
6048 int regno;
6049 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6050
6051 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6052 if (ix86_save_reg (regno, maybe_eh_return))
6053 {
6054 /* Ensure that adjust_address won't be forced to produce pointer
6055 out of range allowed by x86-64 instruction set. */
6056 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6057 {
6058 rtx r11;
6059
6060 r11 = gen_rtx_REG (DImode, R11_REG);
6061 emit_move_insn (r11, GEN_INT (offset));
6062 emit_insn (gen_adddi3 (r11, r11, pointer));
6063 base_address = gen_rtx_MEM (Pmode, r11);
6064 offset = 0;
6065 }
6066 emit_move_insn (gen_rtx_REG (Pmode, regno),
6067 adjust_address (base_address, Pmode, offset));
6068 offset += UNITS_PER_WORD;
6069 }
6070 }
6071
6072 /* Restore function stack, frame, and registers. */
6073
6074 void
6075 ix86_expand_epilogue (int style)
6076 {
6077 int regno;
6078 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6079 struct ix86_frame frame;
6080 HOST_WIDE_INT offset;
6081
6082 ix86_compute_frame_layout (&frame);
6083
6084 /* Calculate start of saved registers relative to ebp. Special care
6085 must be taken for the normal return case of a function using
6086 eh_return: the eax and edx registers are marked as saved, but not
6087 restored along this path. */
6088 offset = frame.nregs;
6089 if (current_function_calls_eh_return && style != 2)
6090 offset -= 2;
6091 offset *= -UNITS_PER_WORD;
6092
6093 /* If we're only restoring one register and sp is not valid then
6094 using a move instruction to restore the register since it's
6095 less work than reloading sp and popping the register.
6096
6097 The default code result in stack adjustment using add/lea instruction,
6098 while this code results in LEAVE instruction (or discrete equivalent),
6099 so it is profitable in some other cases as well. Especially when there
6100 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6101 and there is exactly one register to pop. This heuristic may need some
6102 tuning in future. */
6103 if ((!sp_valid && frame.nregs <= 1)
6104 || (TARGET_EPILOGUE_USING_MOVE
6105 && cfun->machine->use_fast_prologue_epilogue
6106 && (frame.nregs > 1 || frame.to_allocate))
6107 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6108 || (frame_pointer_needed && TARGET_USE_LEAVE
6109 && cfun->machine->use_fast_prologue_epilogue
6110 && frame.nregs == 1)
6111 || current_function_calls_eh_return)
6112 {
6113 /* Restore registers. We can use ebp or esp to address the memory
6114 locations. If both are available, default to ebp, since offsets
6115 are known to be small. Only exception is esp pointing directly to the
6116 end of block of saved registers, where we may simplify addressing
6117 mode. */
6118
6119 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6120 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6121 frame.to_allocate, style == 2);
6122 else
6123 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6124 offset, style == 2);
6125
6126 /* eh_return epilogues need %ecx added to the stack pointer. */
6127 if (style == 2)
6128 {
6129 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6130
6131 if (frame_pointer_needed)
6132 {
6133 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6134 tmp = plus_constant (tmp, UNITS_PER_WORD);
6135 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6136
6137 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6138 emit_move_insn (hard_frame_pointer_rtx, tmp);
6139
6140 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6141 const0_rtx, style);
6142 }
6143 else
6144 {
6145 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6146 tmp = plus_constant (tmp, (frame.to_allocate
6147 + frame.nregs * UNITS_PER_WORD));
6148 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6149 }
6150 }
6151 else if (!frame_pointer_needed)
6152 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6153 GEN_INT (frame.to_allocate
6154 + frame.nregs * UNITS_PER_WORD),
6155 style);
6156 /* If not an i386, mov & pop is faster than "leave". */
6157 else if (TARGET_USE_LEAVE || optimize_size
6158 || !cfun->machine->use_fast_prologue_epilogue)
6159 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6160 else
6161 {
6162 pro_epilogue_adjust_stack (stack_pointer_rtx,
6163 hard_frame_pointer_rtx,
6164 const0_rtx, style);
6165 if (TARGET_64BIT)
6166 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6167 else
6168 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6169 }
6170 }
6171 else
6172 {
6173 /* First step is to deallocate the stack frame so that we can
6174 pop the registers. */
6175 if (!sp_valid)
6176 {
6177 gcc_assert (frame_pointer_needed);
6178 pro_epilogue_adjust_stack (stack_pointer_rtx,
6179 hard_frame_pointer_rtx,
6180 GEN_INT (offset), style);
6181 }
6182 else if (frame.to_allocate)
6183 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6184 GEN_INT (frame.to_allocate), style);
6185
6186 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6187 if (ix86_save_reg (regno, false))
6188 {
6189 if (TARGET_64BIT)
6190 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6191 else
6192 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6193 }
6194 if (frame_pointer_needed)
6195 {
6196 /* Leave results in shorter dependency chains on CPUs that are
6197 able to grok it fast. */
6198 if (TARGET_USE_LEAVE)
6199 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6200 else if (TARGET_64BIT)
6201 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6202 else
6203 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6204 }
6205 }
6206
6207 if (cfun->machine->force_align_arg_pointer)
6208 {
6209 emit_insn (gen_addsi3 (stack_pointer_rtx,
6210 cfun->machine->force_align_arg_pointer,
6211 GEN_INT (-4)));
6212 }
6213
6214 /* Sibcall epilogues don't want a return instruction. */
6215 if (style == 0)
6216 return;
6217
6218 if (current_function_pops_args && current_function_args_size)
6219 {
6220 rtx popc = GEN_INT (current_function_pops_args);
6221
6222 /* i386 can only pop 64K bytes. If asked to pop more, pop
6223 return address, do explicit add, and jump indirectly to the
6224 caller. */
6225
6226 if (current_function_pops_args >= 65536)
6227 {
6228 rtx ecx = gen_rtx_REG (SImode, 2);
6229
6230 /* There is no "pascal" calling convention in 64bit ABI. */
6231 gcc_assert (!TARGET_64BIT);
6232
6233 emit_insn (gen_popsi1 (ecx));
6234 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6235 emit_jump_insn (gen_return_indirect_internal (ecx));
6236 }
6237 else
6238 emit_jump_insn (gen_return_pop_internal (popc));
6239 }
6240 else
6241 emit_jump_insn (gen_return_internal ());
6242 }
6243
6244 /* Reset from the function's potential modifications. */
6245
6246 static void
6247 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6248 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6249 {
6250 if (pic_offset_table_rtx)
6251 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6252 #if TARGET_MACHO
6253 /* Mach-O doesn't support labels at the end of objects, so if
6254 it looks like we might want one, insert a NOP. */
6255 {
6256 rtx insn = get_last_insn ();
6257 while (insn
6258 && NOTE_P (insn)
6259 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6260 insn = PREV_INSN (insn);
6261 if (insn
6262 && (LABEL_P (insn)
6263 || (NOTE_P (insn)
6264 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6265 fputs ("\tnop\n", file);
6266 }
6267 #endif
6268
6269 }
6270 \f
6271 /* Extract the parts of an RTL expression that is a valid memory address
6272 for an instruction. Return 0 if the structure of the address is
6273 grossly off. Return -1 if the address contains ASHIFT, so it is not
6274 strictly valid, but still used for computing length of lea instruction. */
6275
6276 int
6277 ix86_decompose_address (rtx addr, struct ix86_address *out)
6278 {
6279 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6280 rtx base_reg, index_reg;
6281 HOST_WIDE_INT scale = 1;
6282 rtx scale_rtx = NULL_RTX;
6283 int retval = 1;
6284 enum ix86_address_seg seg = SEG_DEFAULT;
6285
6286 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6287 base = addr;
6288 else if (GET_CODE (addr) == PLUS)
6289 {
6290 rtx addends[4], op;
6291 int n = 0, i;
6292
6293 op = addr;
6294 do
6295 {
6296 if (n >= 4)
6297 return 0;
6298 addends[n++] = XEXP (op, 1);
6299 op = XEXP (op, 0);
6300 }
6301 while (GET_CODE (op) == PLUS);
6302 if (n >= 4)
6303 return 0;
6304 addends[n] = op;
6305
6306 for (i = n; i >= 0; --i)
6307 {
6308 op = addends[i];
6309 switch (GET_CODE (op))
6310 {
6311 case MULT:
6312 if (index)
6313 return 0;
6314 index = XEXP (op, 0);
6315 scale_rtx = XEXP (op, 1);
6316 break;
6317
6318 case UNSPEC:
6319 if (XINT (op, 1) == UNSPEC_TP
6320 && TARGET_TLS_DIRECT_SEG_REFS
6321 && seg == SEG_DEFAULT)
6322 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6323 else
6324 return 0;
6325 break;
6326
6327 case REG:
6328 case SUBREG:
6329 if (!base)
6330 base = op;
6331 else if (!index)
6332 index = op;
6333 else
6334 return 0;
6335 break;
6336
6337 case CONST:
6338 case CONST_INT:
6339 case SYMBOL_REF:
6340 case LABEL_REF:
6341 if (disp)
6342 return 0;
6343 disp = op;
6344 break;
6345
6346 default:
6347 return 0;
6348 }
6349 }
6350 }
6351 else if (GET_CODE (addr) == MULT)
6352 {
6353 index = XEXP (addr, 0); /* index*scale */
6354 scale_rtx = XEXP (addr, 1);
6355 }
6356 else if (GET_CODE (addr) == ASHIFT)
6357 {
6358 rtx tmp;
6359
6360 /* We're called for lea too, which implements ashift on occasion. */
6361 index = XEXP (addr, 0);
6362 tmp = XEXP (addr, 1);
6363 if (!CONST_INT_P (tmp))
6364 return 0;
6365 scale = INTVAL (tmp);
6366 if ((unsigned HOST_WIDE_INT) scale > 3)
6367 return 0;
6368 scale = 1 << scale;
6369 retval = -1;
6370 }
6371 else
6372 disp = addr; /* displacement */
6373
6374 /* Extract the integral value of scale. */
6375 if (scale_rtx)
6376 {
6377 if (!CONST_INT_P (scale_rtx))
6378 return 0;
6379 scale = INTVAL (scale_rtx);
6380 }
6381
6382 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6383 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6384
6385 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6386 if (base_reg && index_reg && scale == 1
6387 && (index_reg == arg_pointer_rtx
6388 || index_reg == frame_pointer_rtx
6389 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6390 {
6391 rtx tmp;
6392 tmp = base, base = index, index = tmp;
6393 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6394 }
6395
6396 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6397 if ((base_reg == hard_frame_pointer_rtx
6398 || base_reg == frame_pointer_rtx
6399 || base_reg == arg_pointer_rtx) && !disp)
6400 disp = const0_rtx;
6401
6402 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6403 Avoid this by transforming to [%esi+0]. */
6404 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6405 && base_reg && !index_reg && !disp
6406 && REG_P (base_reg)
6407 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6408 disp = const0_rtx;
6409
6410 /* Special case: encode reg+reg instead of reg*2. */
6411 if (!base && index && scale && scale == 2)
6412 base = index, base_reg = index_reg, scale = 1;
6413
6414 /* Special case: scaling cannot be encoded without base or displacement. */
6415 if (!base && !disp && index && scale != 1)
6416 disp = const0_rtx;
6417
6418 out->base = base;
6419 out->index = index;
6420 out->disp = disp;
6421 out->scale = scale;
6422 out->seg = seg;
6423
6424 return retval;
6425 }
6426 \f
6427 /* Return cost of the memory address x.
6428 For i386, it is better to use a complex address than let gcc copy
6429 the address into a reg and make a new pseudo. But not if the address
6430 requires to two regs - that would mean more pseudos with longer
6431 lifetimes. */
6432 static int
6433 ix86_address_cost (rtx x)
6434 {
6435 struct ix86_address parts;
6436 int cost = 1;
6437 int ok = ix86_decompose_address (x, &parts);
6438
6439 gcc_assert (ok);
6440
6441 if (parts.base && GET_CODE (parts.base) == SUBREG)
6442 parts.base = SUBREG_REG (parts.base);
6443 if (parts.index && GET_CODE (parts.index) == SUBREG)
6444 parts.index = SUBREG_REG (parts.index);
6445
6446 /* More complex memory references are better. */
6447 if (parts.disp && parts.disp != const0_rtx)
6448 cost--;
6449 if (parts.seg != SEG_DEFAULT)
6450 cost--;
6451
6452 /* Attempt to minimize number of registers in the address. */
6453 if ((parts.base
6454 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6455 || (parts.index
6456 && (!REG_P (parts.index)
6457 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6458 cost++;
6459
6460 if (parts.base
6461 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6462 && parts.index
6463 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6464 && parts.base != parts.index)
6465 cost++;
6466
6467 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6468 since it's predecode logic can't detect the length of instructions
6469 and it degenerates to vector decoded. Increase cost of such
6470 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6471 to split such addresses or even refuse such addresses at all.
6472
6473 Following addressing modes are affected:
6474 [base+scale*index]
6475 [scale*index+disp]
6476 [base+index]
6477
6478 The first and last case may be avoidable by explicitly coding the zero in
6479 memory address, but I don't have AMD-K6 machine handy to check this
6480 theory. */
6481
6482 if (TARGET_K6
6483 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6484 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6485 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6486 cost += 10;
6487
6488 return cost;
6489 }
6490 \f
6491 /* If X is a machine specific address (i.e. a symbol or label being
6492 referenced as a displacement from the GOT implemented using an
6493 UNSPEC), then return the base term. Otherwise return X. */
6494
6495 rtx
6496 ix86_find_base_term (rtx x)
6497 {
6498 rtx term;
6499
6500 if (TARGET_64BIT)
6501 {
6502 if (GET_CODE (x) != CONST)
6503 return x;
6504 term = XEXP (x, 0);
6505 if (GET_CODE (term) == PLUS
6506 && (CONST_INT_P (XEXP (term, 1))
6507 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6508 term = XEXP (term, 0);
6509 if (GET_CODE (term) != UNSPEC
6510 || XINT (term, 1) != UNSPEC_GOTPCREL)
6511 return x;
6512
6513 term = XVECEXP (term, 0, 0);
6514
6515 if (GET_CODE (term) != SYMBOL_REF
6516 && GET_CODE (term) != LABEL_REF)
6517 return x;
6518
6519 return term;
6520 }
6521
6522 term = ix86_delegitimize_address (x);
6523
6524 if (GET_CODE (term) != SYMBOL_REF
6525 && GET_CODE (term) != LABEL_REF)
6526 return x;
6527
6528 return term;
6529 }
6530
6531 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6532 this is used for to form addresses to local data when -fPIC is in
6533 use. */
6534
6535 static bool
6536 darwin_local_data_pic (rtx disp)
6537 {
6538 if (GET_CODE (disp) == MINUS)
6539 {
6540 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6541 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6542 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6543 {
6544 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6545 if (! strcmp (sym_name, "<pic base>"))
6546 return true;
6547 }
6548 }
6549
6550 return false;
6551 }
6552 \f
6553 /* Determine if a given RTX is a valid constant. We already know this
6554 satisfies CONSTANT_P. */
6555
6556 bool
6557 legitimate_constant_p (rtx x)
6558 {
6559 switch (GET_CODE (x))
6560 {
6561 case CONST:
6562 x = XEXP (x, 0);
6563
6564 if (GET_CODE (x) == PLUS)
6565 {
6566 if (!CONST_INT_P (XEXP (x, 1)))
6567 return false;
6568 x = XEXP (x, 0);
6569 }
6570
6571 if (TARGET_MACHO && darwin_local_data_pic (x))
6572 return true;
6573
6574 /* Only some unspecs are valid as "constants". */
6575 if (GET_CODE (x) == UNSPEC)
6576 switch (XINT (x, 1))
6577 {
6578 case UNSPEC_GOT:
6579 case UNSPEC_GOTOFF:
6580 case UNSPEC_PLTOFF:
6581 return TARGET_64BIT;
6582 case UNSPEC_TPOFF:
6583 case UNSPEC_NTPOFF:
6584 x = XVECEXP (x, 0, 0);
6585 return (GET_CODE (x) == SYMBOL_REF
6586 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6587 case UNSPEC_DTPOFF:
6588 x = XVECEXP (x, 0, 0);
6589 return (GET_CODE (x) == SYMBOL_REF
6590 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6591 default:
6592 return false;
6593 }
6594
6595 /* We must have drilled down to a symbol. */
6596 if (GET_CODE (x) == LABEL_REF)
6597 return true;
6598 if (GET_CODE (x) != SYMBOL_REF)
6599 return false;
6600 /* FALLTHRU */
6601
6602 case SYMBOL_REF:
6603 /* TLS symbols are never valid. */
6604 if (SYMBOL_REF_TLS_MODEL (x))
6605 return false;
6606 break;
6607
6608 case CONST_DOUBLE:
6609 if (GET_MODE (x) == TImode
6610 && x != CONST0_RTX (TImode)
6611 && !TARGET_64BIT)
6612 return false;
6613 break;
6614
6615 case CONST_VECTOR:
6616 if (x == CONST0_RTX (GET_MODE (x)))
6617 return true;
6618 return false;
6619
6620 default:
6621 break;
6622 }
6623
6624 /* Otherwise we handle everything else in the move patterns. */
6625 return true;
6626 }
6627
6628 /* Determine if it's legal to put X into the constant pool. This
6629 is not possible for the address of thread-local symbols, which
6630 is checked above. */
6631
6632 static bool
6633 ix86_cannot_force_const_mem (rtx x)
6634 {
6635 /* We can always put integral constants and vectors in memory. */
6636 switch (GET_CODE (x))
6637 {
6638 case CONST_INT:
6639 case CONST_DOUBLE:
6640 case CONST_VECTOR:
6641 return false;
6642
6643 default:
6644 break;
6645 }
6646 return !legitimate_constant_p (x);
6647 }
6648
6649 /* Determine if a given RTX is a valid constant address. */
6650
6651 bool
6652 constant_address_p (rtx x)
6653 {
6654 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6655 }
6656
6657 /* Nonzero if the constant value X is a legitimate general operand
6658 when generating PIC code. It is given that flag_pic is on and
6659 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6660
6661 bool
6662 legitimate_pic_operand_p (rtx x)
6663 {
6664 rtx inner;
6665
6666 switch (GET_CODE (x))
6667 {
6668 case CONST:
6669 inner = XEXP (x, 0);
6670 if (GET_CODE (inner) == PLUS
6671 && CONST_INT_P (XEXP (inner, 1)))
6672 inner = XEXP (inner, 0);
6673
6674 /* Only some unspecs are valid as "constants". */
6675 if (GET_CODE (inner) == UNSPEC)
6676 switch (XINT (inner, 1))
6677 {
6678 case UNSPEC_GOT:
6679 case UNSPEC_GOTOFF:
6680 case UNSPEC_PLTOFF:
6681 return TARGET_64BIT;
6682 case UNSPEC_TPOFF:
6683 x = XVECEXP (inner, 0, 0);
6684 return (GET_CODE (x) == SYMBOL_REF
6685 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6686 default:
6687 return false;
6688 }
6689 /* FALLTHRU */
6690
6691 case SYMBOL_REF:
6692 case LABEL_REF:
6693 return legitimate_pic_address_disp_p (x);
6694
6695 default:
6696 return true;
6697 }
6698 }
6699
6700 /* Determine if a given CONST RTX is a valid memory displacement
6701 in PIC mode. */
6702
6703 int
6704 legitimate_pic_address_disp_p (rtx disp)
6705 {
6706 bool saw_plus;
6707
6708 /* In 64bit mode we can allow direct addresses of symbols and labels
6709 when they are not dynamic symbols. */
6710 if (TARGET_64BIT)
6711 {
6712 rtx op0 = disp, op1;
6713
6714 switch (GET_CODE (disp))
6715 {
6716 case LABEL_REF:
6717 return true;
6718
6719 case CONST:
6720 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6721 break;
6722 op0 = XEXP (XEXP (disp, 0), 0);
6723 op1 = XEXP (XEXP (disp, 0), 1);
6724 if (!CONST_INT_P (op1)
6725 || INTVAL (op1) >= 16*1024*1024
6726 || INTVAL (op1) < -16*1024*1024)
6727 break;
6728 if (GET_CODE (op0) == LABEL_REF)
6729 return true;
6730 if (GET_CODE (op0) != SYMBOL_REF)
6731 break;
6732 /* FALLTHRU */
6733
6734 case SYMBOL_REF:
6735 /* TLS references should always be enclosed in UNSPEC. */
6736 if (SYMBOL_REF_TLS_MODEL (op0))
6737 return false;
6738 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6739 && ix86_cmodel != CM_LARGE_PIC)
6740 return true;
6741 break;
6742
6743 default:
6744 break;
6745 }
6746 }
6747 if (GET_CODE (disp) != CONST)
6748 return 0;
6749 disp = XEXP (disp, 0);
6750
6751 if (TARGET_64BIT)
6752 {
6753 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6754 of GOT tables. We should not need these anyway. */
6755 if (GET_CODE (disp) != UNSPEC
6756 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6757 && XINT (disp, 1) != UNSPEC_GOTOFF
6758 && XINT (disp, 1) != UNSPEC_PLTOFF))
6759 return 0;
6760
6761 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6762 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6763 return 0;
6764 return 1;
6765 }
6766
6767 saw_plus = false;
6768 if (GET_CODE (disp) == PLUS)
6769 {
6770 if (!CONST_INT_P (XEXP (disp, 1)))
6771 return 0;
6772 disp = XEXP (disp, 0);
6773 saw_plus = true;
6774 }
6775
6776 if (TARGET_MACHO && darwin_local_data_pic (disp))
6777 return 1;
6778
6779 if (GET_CODE (disp) != UNSPEC)
6780 return 0;
6781
6782 switch (XINT (disp, 1))
6783 {
6784 case UNSPEC_GOT:
6785 if (saw_plus)
6786 return false;
6787 /* We need to check for both symbols and labels because VxWorks loads
6788 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6789 details. */
6790 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6791 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6792 case UNSPEC_GOTOFF:
6793 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6794 While ABI specify also 32bit relocation but we don't produce it in
6795 small PIC model at all. */
6796 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6797 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6798 && !TARGET_64BIT)
6799 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6800 return false;
6801 case UNSPEC_GOTTPOFF:
6802 case UNSPEC_GOTNTPOFF:
6803 case UNSPEC_INDNTPOFF:
6804 if (saw_plus)
6805 return false;
6806 disp = XVECEXP (disp, 0, 0);
6807 return (GET_CODE (disp) == SYMBOL_REF
6808 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6809 case UNSPEC_NTPOFF:
6810 disp = XVECEXP (disp, 0, 0);
6811 return (GET_CODE (disp) == SYMBOL_REF
6812 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6813 case UNSPEC_DTPOFF:
6814 disp = XVECEXP (disp, 0, 0);
6815 return (GET_CODE (disp) == SYMBOL_REF
6816 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6817 }
6818
6819 return 0;
6820 }
6821
6822 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6823 memory address for an instruction. The MODE argument is the machine mode
6824 for the MEM expression that wants to use this address.
6825
6826 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6827 convert common non-canonical forms to canonical form so that they will
6828 be recognized. */
6829
6830 int
6831 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6832 {
6833 struct ix86_address parts;
6834 rtx base, index, disp;
6835 HOST_WIDE_INT scale;
6836 const char *reason = NULL;
6837 rtx reason_rtx = NULL_RTX;
6838
6839 if (TARGET_DEBUG_ADDR)
6840 {
6841 fprintf (stderr,
6842 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6843 GET_MODE_NAME (mode), strict);
6844 debug_rtx (addr);
6845 }
6846
6847 if (ix86_decompose_address (addr, &parts) <= 0)
6848 {
6849 reason = "decomposition failed";
6850 goto report_error;
6851 }
6852
6853 base = parts.base;
6854 index = parts.index;
6855 disp = parts.disp;
6856 scale = parts.scale;
6857
6858 /* Validate base register.
6859
6860 Don't allow SUBREG's that span more than a word here. It can lead to spill
6861 failures when the base is one word out of a two word structure, which is
6862 represented internally as a DImode int. */
6863
6864 if (base)
6865 {
6866 rtx reg;
6867 reason_rtx = base;
6868
6869 if (REG_P (base))
6870 reg = base;
6871 else if (GET_CODE (base) == SUBREG
6872 && REG_P (SUBREG_REG (base))
6873 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6874 <= UNITS_PER_WORD)
6875 reg = SUBREG_REG (base);
6876 else
6877 {
6878 reason = "base is not a register";
6879 goto report_error;
6880 }
6881
6882 if (GET_MODE (base) != Pmode)
6883 {
6884 reason = "base is not in Pmode";
6885 goto report_error;
6886 }
6887
6888 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6889 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6890 {
6891 reason = "base is not valid";
6892 goto report_error;
6893 }
6894 }
6895
6896 /* Validate index register.
6897
6898 Don't allow SUBREG's that span more than a word here -- same as above. */
6899
6900 if (index)
6901 {
6902 rtx reg;
6903 reason_rtx = index;
6904
6905 if (REG_P (index))
6906 reg = index;
6907 else if (GET_CODE (index) == SUBREG
6908 && REG_P (SUBREG_REG (index))
6909 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6910 <= UNITS_PER_WORD)
6911 reg = SUBREG_REG (index);
6912 else
6913 {
6914 reason = "index is not a register";
6915 goto report_error;
6916 }
6917
6918 if (GET_MODE (index) != Pmode)
6919 {
6920 reason = "index is not in Pmode";
6921 goto report_error;
6922 }
6923
6924 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6925 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6926 {
6927 reason = "index is not valid";
6928 goto report_error;
6929 }
6930 }
6931
6932 /* Validate scale factor. */
6933 if (scale != 1)
6934 {
6935 reason_rtx = GEN_INT (scale);
6936 if (!index)
6937 {
6938 reason = "scale without index";
6939 goto report_error;
6940 }
6941
6942 if (scale != 2 && scale != 4 && scale != 8)
6943 {
6944 reason = "scale is not a valid multiplier";
6945 goto report_error;
6946 }
6947 }
6948
6949 /* Validate displacement. */
6950 if (disp)
6951 {
6952 reason_rtx = disp;
6953
6954 if (GET_CODE (disp) == CONST
6955 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6956 switch (XINT (XEXP (disp, 0), 1))
6957 {
6958 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6959 used. While ABI specify also 32bit relocations, we don't produce
6960 them at all and use IP relative instead. */
6961 case UNSPEC_GOT:
6962 case UNSPEC_GOTOFF:
6963 gcc_assert (flag_pic);
6964 if (!TARGET_64BIT)
6965 goto is_legitimate_pic;
6966 reason = "64bit address unspec";
6967 goto report_error;
6968
6969 case UNSPEC_GOTPCREL:
6970 gcc_assert (flag_pic);
6971 goto is_legitimate_pic;
6972
6973 case UNSPEC_GOTTPOFF:
6974 case UNSPEC_GOTNTPOFF:
6975 case UNSPEC_INDNTPOFF:
6976 case UNSPEC_NTPOFF:
6977 case UNSPEC_DTPOFF:
6978 break;
6979
6980 default:
6981 reason = "invalid address unspec";
6982 goto report_error;
6983 }
6984
6985 else if (SYMBOLIC_CONST (disp)
6986 && (flag_pic
6987 || (TARGET_MACHO
6988 #if TARGET_MACHO
6989 && MACHOPIC_INDIRECT
6990 && !machopic_operand_p (disp)
6991 #endif
6992 )))
6993 {
6994
6995 is_legitimate_pic:
6996 if (TARGET_64BIT && (index || base))
6997 {
6998 /* foo@dtpoff(%rX) is ok. */
6999 if (GET_CODE (disp) != CONST
7000 || GET_CODE (XEXP (disp, 0)) != PLUS
7001 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7002 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7003 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7004 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7005 {
7006 reason = "non-constant pic memory reference";
7007 goto report_error;
7008 }
7009 }
7010 else if (! legitimate_pic_address_disp_p (disp))
7011 {
7012 reason = "displacement is an invalid pic construct";
7013 goto report_error;
7014 }
7015
7016 /* This code used to verify that a symbolic pic displacement
7017 includes the pic_offset_table_rtx register.
7018
7019 While this is good idea, unfortunately these constructs may
7020 be created by "adds using lea" optimization for incorrect
7021 code like:
7022
7023 int a;
7024 int foo(int i)
7025 {
7026 return *(&a+i);
7027 }
7028
7029 This code is nonsensical, but results in addressing
7030 GOT table with pic_offset_table_rtx base. We can't
7031 just refuse it easily, since it gets matched by
7032 "addsi3" pattern, that later gets split to lea in the
7033 case output register differs from input. While this
7034 can be handled by separate addsi pattern for this case
7035 that never results in lea, this seems to be easier and
7036 correct fix for crash to disable this test. */
7037 }
7038 else if (GET_CODE (disp) != LABEL_REF
7039 && !CONST_INT_P (disp)
7040 && (GET_CODE (disp) != CONST
7041 || !legitimate_constant_p (disp))
7042 && (GET_CODE (disp) != SYMBOL_REF
7043 || !legitimate_constant_p (disp)))
7044 {
7045 reason = "displacement is not constant";
7046 goto report_error;
7047 }
7048 else if (TARGET_64BIT
7049 && !x86_64_immediate_operand (disp, VOIDmode))
7050 {
7051 reason = "displacement is out of range";
7052 goto report_error;
7053 }
7054 }
7055
7056 /* Everything looks valid. */
7057 if (TARGET_DEBUG_ADDR)
7058 fprintf (stderr, "Success.\n");
7059 return TRUE;
7060
7061 report_error:
7062 if (TARGET_DEBUG_ADDR)
7063 {
7064 fprintf (stderr, "Error: %s\n", reason);
7065 debug_rtx (reason_rtx);
7066 }
7067 return FALSE;
7068 }
7069 \f
7070 /* Return a unique alias set for the GOT. */
7071
7072 static HOST_WIDE_INT
7073 ix86_GOT_alias_set (void)
7074 {
7075 static HOST_WIDE_INT set = -1;
7076 if (set == -1)
7077 set = new_alias_set ();
7078 return set;
7079 }
7080
7081 /* Return a legitimate reference for ORIG (an address) using the
7082 register REG. If REG is 0, a new pseudo is generated.
7083
7084 There are two types of references that must be handled:
7085
7086 1. Global data references must load the address from the GOT, via
7087 the PIC reg. An insn is emitted to do this load, and the reg is
7088 returned.
7089
7090 2. Static data references, constant pool addresses, and code labels
7091 compute the address as an offset from the GOT, whose base is in
7092 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7093 differentiate them from global data objects. The returned
7094 address is the PIC reg + an unspec constant.
7095
7096 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7097 reg also appears in the address. */
7098
7099 static rtx
7100 legitimize_pic_address (rtx orig, rtx reg)
7101 {
7102 rtx addr = orig;
7103 rtx new = orig;
7104 rtx base;
7105
7106 #if TARGET_MACHO
7107 if (TARGET_MACHO && !TARGET_64BIT)
7108 {
7109 if (reg == 0)
7110 reg = gen_reg_rtx (Pmode);
7111 /* Use the generic Mach-O PIC machinery. */
7112 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7113 }
7114 #endif
7115
7116 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7117 new = addr;
7118 else if (TARGET_64BIT
7119 && ix86_cmodel != CM_SMALL_PIC
7120 && gotoff_operand (addr, Pmode))
7121 {
7122 rtx tmpreg;
7123 /* This symbol may be referenced via a displacement from the PIC
7124 base address (@GOTOFF). */
7125
7126 if (reload_in_progress)
7127 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7128 if (GET_CODE (addr) == CONST)
7129 addr = XEXP (addr, 0);
7130 if (GET_CODE (addr) == PLUS)
7131 {
7132 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7133 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7134 }
7135 else
7136 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7137 new = gen_rtx_CONST (Pmode, new);
7138 if (!reg)
7139 tmpreg = gen_reg_rtx (Pmode);
7140 else
7141 tmpreg = reg;
7142 emit_move_insn (tmpreg, new);
7143
7144 if (reg != 0)
7145 {
7146 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7147 tmpreg, 1, OPTAB_DIRECT);
7148 new = reg;
7149 }
7150 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7151 }
7152 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7153 {
7154 /* This symbol may be referenced via a displacement from the PIC
7155 base address (@GOTOFF). */
7156
7157 if (reload_in_progress)
7158 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7159 if (GET_CODE (addr) == CONST)
7160 addr = XEXP (addr, 0);
7161 if (GET_CODE (addr) == PLUS)
7162 {
7163 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7164 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7165 }
7166 else
7167 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7168 new = gen_rtx_CONST (Pmode, new);
7169 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7170
7171 if (reg != 0)
7172 {
7173 emit_move_insn (reg, new);
7174 new = reg;
7175 }
7176 }
7177 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7178 /* We can't use @GOTOFF for text labels on VxWorks;
7179 see gotoff_operand. */
7180 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7181 {
7182 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7183 {
7184 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7185 new = gen_rtx_CONST (Pmode, new);
7186 new = gen_const_mem (Pmode, new);
7187 set_mem_alias_set (new, ix86_GOT_alias_set ());
7188
7189 if (reg == 0)
7190 reg = gen_reg_rtx (Pmode);
7191 /* Use directly gen_movsi, otherwise the address is loaded
7192 into register for CSE. We don't want to CSE this addresses,
7193 instead we CSE addresses from the GOT table, so skip this. */
7194 emit_insn (gen_movsi (reg, new));
7195 new = reg;
7196 }
7197 else
7198 {
7199 /* This symbol must be referenced via a load from the
7200 Global Offset Table (@GOT). */
7201
7202 if (reload_in_progress)
7203 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7204 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7205 new = gen_rtx_CONST (Pmode, new);
7206 if (TARGET_64BIT)
7207 new = force_reg (Pmode, new);
7208 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7209 new = gen_const_mem (Pmode, new);
7210 set_mem_alias_set (new, ix86_GOT_alias_set ());
7211
7212 if (reg == 0)
7213 reg = gen_reg_rtx (Pmode);
7214 emit_move_insn (reg, new);
7215 new = reg;
7216 }
7217 }
7218 else
7219 {
7220 if (CONST_INT_P (addr)
7221 && !x86_64_immediate_operand (addr, VOIDmode))
7222 {
7223 if (reg)
7224 {
7225 emit_move_insn (reg, addr);
7226 new = reg;
7227 }
7228 else
7229 new = force_reg (Pmode, addr);
7230 }
7231 else if (GET_CODE (addr) == CONST)
7232 {
7233 addr = XEXP (addr, 0);
7234
7235 /* We must match stuff we generate before. Assume the only
7236 unspecs that can get here are ours. Not that we could do
7237 anything with them anyway.... */
7238 if (GET_CODE (addr) == UNSPEC
7239 || (GET_CODE (addr) == PLUS
7240 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7241 return orig;
7242 gcc_assert (GET_CODE (addr) == PLUS);
7243 }
7244 if (GET_CODE (addr) == PLUS)
7245 {
7246 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7247
7248 /* Check first to see if this is a constant offset from a @GOTOFF
7249 symbol reference. */
7250 if (gotoff_operand (op0, Pmode)
7251 && CONST_INT_P (op1))
7252 {
7253 if (!TARGET_64BIT)
7254 {
7255 if (reload_in_progress)
7256 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7257 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7258 UNSPEC_GOTOFF);
7259 new = gen_rtx_PLUS (Pmode, new, op1);
7260 new = gen_rtx_CONST (Pmode, new);
7261 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7262
7263 if (reg != 0)
7264 {
7265 emit_move_insn (reg, new);
7266 new = reg;
7267 }
7268 }
7269 else
7270 {
7271 if (INTVAL (op1) < -16*1024*1024
7272 || INTVAL (op1) >= 16*1024*1024)
7273 {
7274 if (!x86_64_immediate_operand (op1, Pmode))
7275 op1 = force_reg (Pmode, op1);
7276 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7277 }
7278 }
7279 }
7280 else
7281 {
7282 base = legitimize_pic_address (XEXP (addr, 0), reg);
7283 new = legitimize_pic_address (XEXP (addr, 1),
7284 base == reg ? NULL_RTX : reg);
7285
7286 if (CONST_INT_P (new))
7287 new = plus_constant (base, INTVAL (new));
7288 else
7289 {
7290 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7291 {
7292 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7293 new = XEXP (new, 1);
7294 }
7295 new = gen_rtx_PLUS (Pmode, base, new);
7296 }
7297 }
7298 }
7299 }
7300 return new;
7301 }
7302 \f
7303 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7304
7305 static rtx
7306 get_thread_pointer (int to_reg)
7307 {
7308 rtx tp, reg, insn;
7309
7310 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7311 if (!to_reg)
7312 return tp;
7313
7314 reg = gen_reg_rtx (Pmode);
7315 insn = gen_rtx_SET (VOIDmode, reg, tp);
7316 insn = emit_insn (insn);
7317
7318 return reg;
7319 }
7320
7321 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7322 false if we expect this to be used for a memory address and true if
7323 we expect to load the address into a register. */
7324
7325 static rtx
7326 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7327 {
7328 rtx dest, base, off, pic, tp;
7329 int type;
7330
7331 switch (model)
7332 {
7333 case TLS_MODEL_GLOBAL_DYNAMIC:
7334 dest = gen_reg_rtx (Pmode);
7335 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7336
7337 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7338 {
7339 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7340
7341 start_sequence ();
7342 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7343 insns = get_insns ();
7344 end_sequence ();
7345
7346 emit_libcall_block (insns, dest, rax, x);
7347 }
7348 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7349 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7350 else
7351 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7352
7353 if (TARGET_GNU2_TLS)
7354 {
7355 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7356
7357 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7358 }
7359 break;
7360
7361 case TLS_MODEL_LOCAL_DYNAMIC:
7362 base = gen_reg_rtx (Pmode);
7363 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7364
7365 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7366 {
7367 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7368
7369 start_sequence ();
7370 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7371 insns = get_insns ();
7372 end_sequence ();
7373
7374 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7375 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7376 emit_libcall_block (insns, base, rax, note);
7377 }
7378 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7379 emit_insn (gen_tls_local_dynamic_base_64 (base));
7380 else
7381 emit_insn (gen_tls_local_dynamic_base_32 (base));
7382
7383 if (TARGET_GNU2_TLS)
7384 {
7385 rtx x = ix86_tls_module_base ();
7386
7387 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7388 gen_rtx_MINUS (Pmode, x, tp));
7389 }
7390
7391 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7392 off = gen_rtx_CONST (Pmode, off);
7393
7394 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7395
7396 if (TARGET_GNU2_TLS)
7397 {
7398 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7399
7400 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7401 }
7402
7403 break;
7404
7405 case TLS_MODEL_INITIAL_EXEC:
7406 if (TARGET_64BIT)
7407 {
7408 pic = NULL;
7409 type = UNSPEC_GOTNTPOFF;
7410 }
7411 else if (flag_pic)
7412 {
7413 if (reload_in_progress)
7414 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7415 pic = pic_offset_table_rtx;
7416 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7417 }
7418 else if (!TARGET_ANY_GNU_TLS)
7419 {
7420 pic = gen_reg_rtx (Pmode);
7421 emit_insn (gen_set_got (pic));
7422 type = UNSPEC_GOTTPOFF;
7423 }
7424 else
7425 {
7426 pic = NULL;
7427 type = UNSPEC_INDNTPOFF;
7428 }
7429
7430 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7431 off = gen_rtx_CONST (Pmode, off);
7432 if (pic)
7433 off = gen_rtx_PLUS (Pmode, pic, off);
7434 off = gen_const_mem (Pmode, off);
7435 set_mem_alias_set (off, ix86_GOT_alias_set ());
7436
7437 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7438 {
7439 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7440 off = force_reg (Pmode, off);
7441 return gen_rtx_PLUS (Pmode, base, off);
7442 }
7443 else
7444 {
7445 base = get_thread_pointer (true);
7446 dest = gen_reg_rtx (Pmode);
7447 emit_insn (gen_subsi3 (dest, base, off));
7448 }
7449 break;
7450
7451 case TLS_MODEL_LOCAL_EXEC:
7452 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7453 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7454 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7455 off = gen_rtx_CONST (Pmode, off);
7456
7457 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7458 {
7459 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7460 return gen_rtx_PLUS (Pmode, base, off);
7461 }
7462 else
7463 {
7464 base = get_thread_pointer (true);
7465 dest = gen_reg_rtx (Pmode);
7466 emit_insn (gen_subsi3 (dest, base, off));
7467 }
7468 break;
7469
7470 default:
7471 gcc_unreachable ();
7472 }
7473
7474 return dest;
7475 }
7476
7477 /* Try machine-dependent ways of modifying an illegitimate address
7478 to be legitimate. If we find one, return the new, valid address.
7479 This macro is used in only one place: `memory_address' in explow.c.
7480
7481 OLDX is the address as it was before break_out_memory_refs was called.
7482 In some cases it is useful to look at this to decide what needs to be done.
7483
7484 MODE and WIN are passed so that this macro can use
7485 GO_IF_LEGITIMATE_ADDRESS.
7486
7487 It is always safe for this macro to do nothing. It exists to recognize
7488 opportunities to optimize the output.
7489
7490 For the 80386, we handle X+REG by loading X into a register R and
7491 using R+REG. R will go in a general reg and indexing will be used.
7492 However, if REG is a broken-out memory address or multiplication,
7493 nothing needs to be done because REG can certainly go in a general reg.
7494
7495 When -fpic is used, special handling is needed for symbolic references.
7496 See comments by legitimize_pic_address in i386.c for details. */
7497
7498 rtx
7499 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7500 {
7501 int changed = 0;
7502 unsigned log;
7503
7504 if (TARGET_DEBUG_ADDR)
7505 {
7506 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7507 GET_MODE_NAME (mode));
7508 debug_rtx (x);
7509 }
7510
7511 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7512 if (log)
7513 return legitimize_tls_address (x, log, false);
7514 if (GET_CODE (x) == CONST
7515 && GET_CODE (XEXP (x, 0)) == PLUS
7516 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7517 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7518 {
7519 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7520 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7521 }
7522
7523 if (flag_pic && SYMBOLIC_CONST (x))
7524 return legitimize_pic_address (x, 0);
7525
7526 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7527 if (GET_CODE (x) == ASHIFT
7528 && CONST_INT_P (XEXP (x, 1))
7529 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7530 {
7531 changed = 1;
7532 log = INTVAL (XEXP (x, 1));
7533 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7534 GEN_INT (1 << log));
7535 }
7536
7537 if (GET_CODE (x) == PLUS)
7538 {
7539 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7540
7541 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7542 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7543 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7544 {
7545 changed = 1;
7546 log = INTVAL (XEXP (XEXP (x, 0), 1));
7547 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7548 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7549 GEN_INT (1 << log));
7550 }
7551
7552 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7553 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7554 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7555 {
7556 changed = 1;
7557 log = INTVAL (XEXP (XEXP (x, 1), 1));
7558 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7559 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7560 GEN_INT (1 << log));
7561 }
7562
7563 /* Put multiply first if it isn't already. */
7564 if (GET_CODE (XEXP (x, 1)) == MULT)
7565 {
7566 rtx tmp = XEXP (x, 0);
7567 XEXP (x, 0) = XEXP (x, 1);
7568 XEXP (x, 1) = tmp;
7569 changed = 1;
7570 }
7571
7572 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7573 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7574 created by virtual register instantiation, register elimination, and
7575 similar optimizations. */
7576 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7577 {
7578 changed = 1;
7579 x = gen_rtx_PLUS (Pmode,
7580 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7581 XEXP (XEXP (x, 1), 0)),
7582 XEXP (XEXP (x, 1), 1));
7583 }
7584
7585 /* Canonicalize
7586 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7587 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7588 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7589 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7590 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7591 && CONSTANT_P (XEXP (x, 1)))
7592 {
7593 rtx constant;
7594 rtx other = NULL_RTX;
7595
7596 if (CONST_INT_P (XEXP (x, 1)))
7597 {
7598 constant = XEXP (x, 1);
7599 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7600 }
7601 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7602 {
7603 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7604 other = XEXP (x, 1);
7605 }
7606 else
7607 constant = 0;
7608
7609 if (constant)
7610 {
7611 changed = 1;
7612 x = gen_rtx_PLUS (Pmode,
7613 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7614 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7615 plus_constant (other, INTVAL (constant)));
7616 }
7617 }
7618
7619 if (changed && legitimate_address_p (mode, x, FALSE))
7620 return x;
7621
7622 if (GET_CODE (XEXP (x, 0)) == MULT)
7623 {
7624 changed = 1;
7625 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7626 }
7627
7628 if (GET_CODE (XEXP (x, 1)) == MULT)
7629 {
7630 changed = 1;
7631 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7632 }
7633
7634 if (changed
7635 && REG_P (XEXP (x, 1))
7636 && REG_P (XEXP (x, 0)))
7637 return x;
7638
7639 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7640 {
7641 changed = 1;
7642 x = legitimize_pic_address (x, 0);
7643 }
7644
7645 if (changed && legitimate_address_p (mode, x, FALSE))
7646 return x;
7647
7648 if (REG_P (XEXP (x, 0)))
7649 {
7650 rtx temp = gen_reg_rtx (Pmode);
7651 rtx val = force_operand (XEXP (x, 1), temp);
7652 if (val != temp)
7653 emit_move_insn (temp, val);
7654
7655 XEXP (x, 1) = temp;
7656 return x;
7657 }
7658
7659 else if (REG_P (XEXP (x, 1)))
7660 {
7661 rtx temp = gen_reg_rtx (Pmode);
7662 rtx val = force_operand (XEXP (x, 0), temp);
7663 if (val != temp)
7664 emit_move_insn (temp, val);
7665
7666 XEXP (x, 0) = temp;
7667 return x;
7668 }
7669 }
7670
7671 return x;
7672 }
7673 \f
7674 /* Print an integer constant expression in assembler syntax. Addition
7675 and subtraction are the only arithmetic that may appear in these
7676 expressions. FILE is the stdio stream to write to, X is the rtx, and
7677 CODE is the operand print code from the output string. */
7678
7679 static void
7680 output_pic_addr_const (FILE *file, rtx x, int code)
7681 {
7682 char buf[256];
7683
7684 switch (GET_CODE (x))
7685 {
7686 case PC:
7687 gcc_assert (flag_pic);
7688 putc ('.', file);
7689 break;
7690
7691 case SYMBOL_REF:
7692 if (! TARGET_MACHO || TARGET_64BIT)
7693 output_addr_const (file, x);
7694 else
7695 {
7696 const char *name = XSTR (x, 0);
7697
7698 /* Mark the decl as referenced so that cgraph will output the function. */
7699 if (SYMBOL_REF_DECL (x))
7700 mark_decl_referenced (SYMBOL_REF_DECL (x));
7701
7702 #if TARGET_MACHO
7703 if (MACHOPIC_INDIRECT
7704 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7705 name = machopic_indirection_name (x, /*stub_p=*/true);
7706 #endif
7707 assemble_name (file, name);
7708 }
7709 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7710 fputs ("@PLT", file);
7711 break;
7712
7713 case LABEL_REF:
7714 x = XEXP (x, 0);
7715 /* FALLTHRU */
7716 case CODE_LABEL:
7717 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7718 assemble_name (asm_out_file, buf);
7719 break;
7720
7721 case CONST_INT:
7722 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7723 break;
7724
7725 case CONST:
7726 /* This used to output parentheses around the expression,
7727 but that does not work on the 386 (either ATT or BSD assembler). */
7728 output_pic_addr_const (file, XEXP (x, 0), code);
7729 break;
7730
7731 case CONST_DOUBLE:
7732 if (GET_MODE (x) == VOIDmode)
7733 {
7734 /* We can use %d if the number is <32 bits and positive. */
7735 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7736 fprintf (file, "0x%lx%08lx",
7737 (unsigned long) CONST_DOUBLE_HIGH (x),
7738 (unsigned long) CONST_DOUBLE_LOW (x));
7739 else
7740 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7741 }
7742 else
7743 /* We can't handle floating point constants;
7744 PRINT_OPERAND must handle them. */
7745 output_operand_lossage ("floating constant misused");
7746 break;
7747
7748 case PLUS:
7749 /* Some assemblers need integer constants to appear first. */
7750 if (CONST_INT_P (XEXP (x, 0)))
7751 {
7752 output_pic_addr_const (file, XEXP (x, 0), code);
7753 putc ('+', file);
7754 output_pic_addr_const (file, XEXP (x, 1), code);
7755 }
7756 else
7757 {
7758 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7759 output_pic_addr_const (file, XEXP (x, 1), code);
7760 putc ('+', file);
7761 output_pic_addr_const (file, XEXP (x, 0), code);
7762 }
7763 break;
7764
7765 case MINUS:
7766 if (!TARGET_MACHO)
7767 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7768 output_pic_addr_const (file, XEXP (x, 0), code);
7769 putc ('-', file);
7770 output_pic_addr_const (file, XEXP (x, 1), code);
7771 if (!TARGET_MACHO)
7772 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7773 break;
7774
7775 case UNSPEC:
7776 gcc_assert (XVECLEN (x, 0) == 1);
7777 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7778 switch (XINT (x, 1))
7779 {
7780 case UNSPEC_GOT:
7781 fputs ("@GOT", file);
7782 break;
7783 case UNSPEC_GOTOFF:
7784 fputs ("@GOTOFF", file);
7785 break;
7786 case UNSPEC_PLTOFF:
7787 fputs ("@PLTOFF", file);
7788 break;
7789 case UNSPEC_GOTPCREL:
7790 fputs ("@GOTPCREL(%rip)", file);
7791 break;
7792 case UNSPEC_GOTTPOFF:
7793 /* FIXME: This might be @TPOFF in Sun ld too. */
7794 fputs ("@GOTTPOFF", file);
7795 break;
7796 case UNSPEC_TPOFF:
7797 fputs ("@TPOFF", file);
7798 break;
7799 case UNSPEC_NTPOFF:
7800 if (TARGET_64BIT)
7801 fputs ("@TPOFF", file);
7802 else
7803 fputs ("@NTPOFF", file);
7804 break;
7805 case UNSPEC_DTPOFF:
7806 fputs ("@DTPOFF", file);
7807 break;
7808 case UNSPEC_GOTNTPOFF:
7809 if (TARGET_64BIT)
7810 fputs ("@GOTTPOFF(%rip)", file);
7811 else
7812 fputs ("@GOTNTPOFF", file);
7813 break;
7814 case UNSPEC_INDNTPOFF:
7815 fputs ("@INDNTPOFF", file);
7816 break;
7817 default:
7818 output_operand_lossage ("invalid UNSPEC as operand");
7819 break;
7820 }
7821 break;
7822
7823 default:
7824 output_operand_lossage ("invalid expression as operand");
7825 }
7826 }
7827
7828 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7829 We need to emit DTP-relative relocations. */
7830
7831 static void
7832 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7833 {
7834 fputs (ASM_LONG, file);
7835 output_addr_const (file, x);
7836 fputs ("@DTPOFF", file);
7837 switch (size)
7838 {
7839 case 4:
7840 break;
7841 case 8:
7842 fputs (", 0", file);
7843 break;
7844 default:
7845 gcc_unreachable ();
7846 }
7847 }
7848
7849 /* In the name of slightly smaller debug output, and to cater to
7850 general assembler lossage, recognize PIC+GOTOFF and turn it back
7851 into a direct symbol reference.
7852
7853 On Darwin, this is necessary to avoid a crash, because Darwin
7854 has a different PIC label for each routine but the DWARF debugging
7855 information is not associated with any particular routine, so it's
7856 necessary to remove references to the PIC label from RTL stored by
7857 the DWARF output code. */
7858
7859 static rtx
7860 ix86_delegitimize_address (rtx orig_x)
7861 {
7862 rtx x = orig_x;
7863 /* reg_addend is NULL or a multiple of some register. */
7864 rtx reg_addend = NULL_RTX;
7865 /* const_addend is NULL or a const_int. */
7866 rtx const_addend = NULL_RTX;
7867 /* This is the result, or NULL. */
7868 rtx result = NULL_RTX;
7869
7870 if (MEM_P (x))
7871 x = XEXP (x, 0);
7872
7873 if (TARGET_64BIT)
7874 {
7875 if (GET_CODE (x) != CONST
7876 || GET_CODE (XEXP (x, 0)) != UNSPEC
7877 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7878 || !MEM_P (orig_x))
7879 return orig_x;
7880 return XVECEXP (XEXP (x, 0), 0, 0);
7881 }
7882
7883 if (GET_CODE (x) != PLUS
7884 || GET_CODE (XEXP (x, 1)) != CONST)
7885 return orig_x;
7886
7887 if (REG_P (XEXP (x, 0))
7888 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7889 /* %ebx + GOT/GOTOFF */
7890 ;
7891 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7892 {
7893 /* %ebx + %reg * scale + GOT/GOTOFF */
7894 reg_addend = XEXP (x, 0);
7895 if (REG_P (XEXP (reg_addend, 0))
7896 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7897 reg_addend = XEXP (reg_addend, 1);
7898 else if (REG_P (XEXP (reg_addend, 1))
7899 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7900 reg_addend = XEXP (reg_addend, 0);
7901 else
7902 return orig_x;
7903 if (!REG_P (reg_addend)
7904 && GET_CODE (reg_addend) != MULT
7905 && GET_CODE (reg_addend) != ASHIFT)
7906 return orig_x;
7907 }
7908 else
7909 return orig_x;
7910
7911 x = XEXP (XEXP (x, 1), 0);
7912 if (GET_CODE (x) == PLUS
7913 && CONST_INT_P (XEXP (x, 1)))
7914 {
7915 const_addend = XEXP (x, 1);
7916 x = XEXP (x, 0);
7917 }
7918
7919 if (GET_CODE (x) == UNSPEC
7920 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7921 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7922 result = XVECEXP (x, 0, 0);
7923
7924 if (TARGET_MACHO && darwin_local_data_pic (x)
7925 && !MEM_P (orig_x))
7926 result = XEXP (x, 0);
7927
7928 if (! result)
7929 return orig_x;
7930
7931 if (const_addend)
7932 result = gen_rtx_PLUS (Pmode, result, const_addend);
7933 if (reg_addend)
7934 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7935 return result;
7936 }
7937 \f
7938 static void
7939 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7940 int fp, FILE *file)
7941 {
7942 const char *suffix;
7943
7944 if (mode == CCFPmode || mode == CCFPUmode)
7945 {
7946 enum rtx_code second_code, bypass_code;
7947 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7948 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7949 code = ix86_fp_compare_code_to_integer (code);
7950 mode = CCmode;
7951 }
7952 if (reverse)
7953 code = reverse_condition (code);
7954
7955 switch (code)
7956 {
7957 case EQ:
7958 suffix = "e";
7959 break;
7960 case NE:
7961 suffix = "ne";
7962 break;
7963 case GT:
7964 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7965 suffix = "g";
7966 break;
7967 case GTU:
7968 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7969 Those same assemblers have the same but opposite lossage on cmov. */
7970 gcc_assert (mode == CCmode);
7971 suffix = fp ? "nbe" : "a";
7972 break;
7973 case LT:
7974 switch (mode)
7975 {
7976 case CCNOmode:
7977 case CCGOCmode:
7978 suffix = "s";
7979 break;
7980
7981 case CCmode:
7982 case CCGCmode:
7983 suffix = "l";
7984 break;
7985
7986 default:
7987 gcc_unreachable ();
7988 }
7989 break;
7990 case LTU:
7991 gcc_assert (mode == CCmode);
7992 suffix = "b";
7993 break;
7994 case GE:
7995 switch (mode)
7996 {
7997 case CCNOmode:
7998 case CCGOCmode:
7999 suffix = "ns";
8000 break;
8001
8002 case CCmode:
8003 case CCGCmode:
8004 suffix = "ge";
8005 break;
8006
8007 default:
8008 gcc_unreachable ();
8009 }
8010 break;
8011 case GEU:
8012 /* ??? As above. */
8013 gcc_assert (mode == CCmode);
8014 suffix = fp ? "nb" : "ae";
8015 break;
8016 case LE:
8017 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8018 suffix = "le";
8019 break;
8020 case LEU:
8021 gcc_assert (mode == CCmode);
8022 suffix = "be";
8023 break;
8024 case UNORDERED:
8025 suffix = fp ? "u" : "p";
8026 break;
8027 case ORDERED:
8028 suffix = fp ? "nu" : "np";
8029 break;
8030 default:
8031 gcc_unreachable ();
8032 }
8033 fputs (suffix, file);
8034 }
8035
8036 /* Print the name of register X to FILE based on its machine mode and number.
8037 If CODE is 'w', pretend the mode is HImode.
8038 If CODE is 'b', pretend the mode is QImode.
8039 If CODE is 'k', pretend the mode is SImode.
8040 If CODE is 'q', pretend the mode is DImode.
8041 If CODE is 'h', pretend the reg is the 'high' byte register.
8042 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8043
8044 void
8045 print_reg (rtx x, int code, FILE *file)
8046 {
8047 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8048 && REGNO (x) != FRAME_POINTER_REGNUM
8049 && REGNO (x) != FLAGS_REG
8050 && REGNO (x) != FPSR_REG
8051 && REGNO (x) != FPCR_REG);
8052
8053 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8054 putc ('%', file);
8055
8056 if (code == 'w' || MMX_REG_P (x))
8057 code = 2;
8058 else if (code == 'b')
8059 code = 1;
8060 else if (code == 'k')
8061 code = 4;
8062 else if (code == 'q')
8063 code = 8;
8064 else if (code == 'y')
8065 code = 3;
8066 else if (code == 'h')
8067 code = 0;
8068 else
8069 code = GET_MODE_SIZE (GET_MODE (x));
8070
8071 /* Irritatingly, AMD extended registers use different naming convention
8072 from the normal registers. */
8073 if (REX_INT_REG_P (x))
8074 {
8075 gcc_assert (TARGET_64BIT);
8076 switch (code)
8077 {
8078 case 0:
8079 error ("extended registers have no high halves");
8080 break;
8081 case 1:
8082 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8083 break;
8084 case 2:
8085 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8086 break;
8087 case 4:
8088 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8089 break;
8090 case 8:
8091 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8092 break;
8093 default:
8094 error ("unsupported operand size for extended register");
8095 break;
8096 }
8097 return;
8098 }
8099 switch (code)
8100 {
8101 case 3:
8102 if (STACK_TOP_P (x))
8103 {
8104 fputs ("st(0)", file);
8105 break;
8106 }
8107 /* FALLTHRU */
8108 case 8:
8109 case 4:
8110 case 12:
8111 if (! ANY_FP_REG_P (x))
8112 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8113 /* FALLTHRU */
8114 case 16:
8115 case 2:
8116 normal:
8117 fputs (hi_reg_name[REGNO (x)], file);
8118 break;
8119 case 1:
8120 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8121 goto normal;
8122 fputs (qi_reg_name[REGNO (x)], file);
8123 break;
8124 case 0:
8125 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8126 goto normal;
8127 fputs (qi_high_reg_name[REGNO (x)], file);
8128 break;
8129 default:
8130 gcc_unreachable ();
8131 }
8132 }
8133
8134 /* Locate some local-dynamic symbol still in use by this function
8135 so that we can print its name in some tls_local_dynamic_base
8136 pattern. */
8137
8138 static const char *
8139 get_some_local_dynamic_name (void)
8140 {
8141 rtx insn;
8142
8143 if (cfun->machine->some_ld_name)
8144 return cfun->machine->some_ld_name;
8145
8146 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8147 if (INSN_P (insn)
8148 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8149 return cfun->machine->some_ld_name;
8150
8151 gcc_unreachable ();
8152 }
8153
8154 static int
8155 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8156 {
8157 rtx x = *px;
8158
8159 if (GET_CODE (x) == SYMBOL_REF
8160 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8161 {
8162 cfun->machine->some_ld_name = XSTR (x, 0);
8163 return 1;
8164 }
8165
8166 return 0;
8167 }
8168
8169 /* Meaning of CODE:
8170 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8171 C -- print opcode suffix for set/cmov insn.
8172 c -- like C, but print reversed condition
8173 F,f -- likewise, but for floating-point.
8174 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8175 otherwise nothing
8176 R -- print the prefix for register names.
8177 z -- print the opcode suffix for the size of the current operand.
8178 * -- print a star (in certain assembler syntax)
8179 A -- print an absolute memory reference.
8180 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8181 s -- print a shift double count, followed by the assemblers argument
8182 delimiter.
8183 b -- print the QImode name of the register for the indicated operand.
8184 %b0 would print %al if operands[0] is reg 0.
8185 w -- likewise, print the HImode name of the register.
8186 k -- likewise, print the SImode name of the register.
8187 q -- likewise, print the DImode name of the register.
8188 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8189 y -- print "st(0)" instead of "st" as a register.
8190 D -- print condition for SSE cmp instruction.
8191 P -- if PIC, print an @PLT suffix.
8192 X -- don't print any sort of PIC '@' suffix for a symbol.
8193 & -- print some in-use local-dynamic symbol name.
8194 H -- print a memory address offset by 8; used for sse high-parts
8195 */
8196
8197 void
8198 print_operand (FILE *file, rtx x, int code)
8199 {
8200 if (code)
8201 {
8202 switch (code)
8203 {
8204 case '*':
8205 if (ASSEMBLER_DIALECT == ASM_ATT)
8206 putc ('*', file);
8207 return;
8208
8209 case '&':
8210 assemble_name (file, get_some_local_dynamic_name ());
8211 return;
8212
8213 case 'A':
8214 switch (ASSEMBLER_DIALECT)
8215 {
8216 case ASM_ATT:
8217 putc ('*', file);
8218 break;
8219
8220 case ASM_INTEL:
8221 /* Intel syntax. For absolute addresses, registers should not
8222 be surrounded by braces. */
8223 if (!REG_P (x))
8224 {
8225 putc ('[', file);
8226 PRINT_OPERAND (file, x, 0);
8227 putc (']', file);
8228 return;
8229 }
8230 break;
8231
8232 default:
8233 gcc_unreachable ();
8234 }
8235
8236 PRINT_OPERAND (file, x, 0);
8237 return;
8238
8239
8240 case 'L':
8241 if (ASSEMBLER_DIALECT == ASM_ATT)
8242 putc ('l', file);
8243 return;
8244
8245 case 'W':
8246 if (ASSEMBLER_DIALECT == ASM_ATT)
8247 putc ('w', file);
8248 return;
8249
8250 case 'B':
8251 if (ASSEMBLER_DIALECT == ASM_ATT)
8252 putc ('b', file);
8253 return;
8254
8255 case 'Q':
8256 if (ASSEMBLER_DIALECT == ASM_ATT)
8257 putc ('l', file);
8258 return;
8259
8260 case 'S':
8261 if (ASSEMBLER_DIALECT == ASM_ATT)
8262 putc ('s', file);
8263 return;
8264
8265 case 'T':
8266 if (ASSEMBLER_DIALECT == ASM_ATT)
8267 putc ('t', file);
8268 return;
8269
8270 case 'z':
8271 /* 387 opcodes don't get size suffixes if the operands are
8272 registers. */
8273 if (STACK_REG_P (x))
8274 return;
8275
8276 /* Likewise if using Intel opcodes. */
8277 if (ASSEMBLER_DIALECT == ASM_INTEL)
8278 return;
8279
8280 /* This is the size of op from size of operand. */
8281 switch (GET_MODE_SIZE (GET_MODE (x)))
8282 {
8283 case 1:
8284 putc ('b', file);
8285 return;
8286
8287 case 2:
8288 #ifdef HAVE_GAS_FILDS_FISTS
8289 putc ('s', file);
8290 #endif
8291 return;
8292
8293 case 4:
8294 if (GET_MODE (x) == SFmode)
8295 {
8296 putc ('s', file);
8297 return;
8298 }
8299 else
8300 putc ('l', file);
8301 return;
8302
8303 case 12:
8304 case 16:
8305 putc ('t', file);
8306 return;
8307
8308 case 8:
8309 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8310 {
8311 #ifdef GAS_MNEMONICS
8312 putc ('q', file);
8313 #else
8314 putc ('l', file);
8315 putc ('l', file);
8316 #endif
8317 }
8318 else
8319 putc ('l', file);
8320 return;
8321
8322 default:
8323 gcc_unreachable ();
8324 }
8325
8326 case 'b':
8327 case 'w':
8328 case 'k':
8329 case 'q':
8330 case 'h':
8331 case 'y':
8332 case 'X':
8333 case 'P':
8334 break;
8335
8336 case 's':
8337 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8338 {
8339 PRINT_OPERAND (file, x, 0);
8340 putc (',', file);
8341 }
8342 return;
8343
8344 case 'D':
8345 /* Little bit of braindamage here. The SSE compare instructions
8346 does use completely different names for the comparisons that the
8347 fp conditional moves. */
8348 switch (GET_CODE (x))
8349 {
8350 case EQ:
8351 case UNEQ:
8352 fputs ("eq", file);
8353 break;
8354 case LT:
8355 case UNLT:
8356 fputs ("lt", file);
8357 break;
8358 case LE:
8359 case UNLE:
8360 fputs ("le", file);
8361 break;
8362 case UNORDERED:
8363 fputs ("unord", file);
8364 break;
8365 case NE:
8366 case LTGT:
8367 fputs ("neq", file);
8368 break;
8369 case UNGE:
8370 case GE:
8371 fputs ("nlt", file);
8372 break;
8373 case UNGT:
8374 case GT:
8375 fputs ("nle", file);
8376 break;
8377 case ORDERED:
8378 fputs ("ord", file);
8379 break;
8380 default:
8381 gcc_unreachable ();
8382 }
8383 return;
8384 case 'O':
8385 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8386 if (ASSEMBLER_DIALECT == ASM_ATT)
8387 {
8388 switch (GET_MODE (x))
8389 {
8390 case HImode: putc ('w', file); break;
8391 case SImode:
8392 case SFmode: putc ('l', file); break;
8393 case DImode:
8394 case DFmode: putc ('q', file); break;
8395 default: gcc_unreachable ();
8396 }
8397 putc ('.', file);
8398 }
8399 #endif
8400 return;
8401 case 'C':
8402 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8403 return;
8404 case 'F':
8405 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8406 if (ASSEMBLER_DIALECT == ASM_ATT)
8407 putc ('.', file);
8408 #endif
8409 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8410 return;
8411
8412 /* Like above, but reverse condition */
8413 case 'c':
8414 /* Check to see if argument to %c is really a constant
8415 and not a condition code which needs to be reversed. */
8416 if (!COMPARISON_P (x))
8417 {
8418 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8419 return;
8420 }
8421 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8422 return;
8423 case 'f':
8424 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8425 if (ASSEMBLER_DIALECT == ASM_ATT)
8426 putc ('.', file);
8427 #endif
8428 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8429 return;
8430
8431 case 'H':
8432 /* It doesn't actually matter what mode we use here, as we're
8433 only going to use this for printing. */
8434 x = adjust_address_nv (x, DImode, 8);
8435 break;
8436
8437 case '+':
8438 {
8439 rtx x;
8440
8441 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8442 return;
8443
8444 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8445 if (x)
8446 {
8447 int pred_val = INTVAL (XEXP (x, 0));
8448
8449 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8450 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8451 {
8452 int taken = pred_val > REG_BR_PROB_BASE / 2;
8453 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8454
8455 /* Emit hints only in the case default branch prediction
8456 heuristics would fail. */
8457 if (taken != cputaken)
8458 {
8459 /* We use 3e (DS) prefix for taken branches and
8460 2e (CS) prefix for not taken branches. */
8461 if (taken)
8462 fputs ("ds ; ", file);
8463 else
8464 fputs ("cs ; ", file);
8465 }
8466 }
8467 }
8468 return;
8469 }
8470 default:
8471 output_operand_lossage ("invalid operand code '%c'", code);
8472 }
8473 }
8474
8475 if (REG_P (x))
8476 print_reg (x, code, file);
8477
8478 else if (MEM_P (x))
8479 {
8480 /* No `byte ptr' prefix for call instructions. */
8481 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8482 {
8483 const char * size;
8484 switch (GET_MODE_SIZE (GET_MODE (x)))
8485 {
8486 case 1: size = "BYTE"; break;
8487 case 2: size = "WORD"; break;
8488 case 4: size = "DWORD"; break;
8489 case 8: size = "QWORD"; break;
8490 case 12: size = "XWORD"; break;
8491 case 16: size = "XMMWORD"; break;
8492 default:
8493 gcc_unreachable ();
8494 }
8495
8496 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8497 if (code == 'b')
8498 size = "BYTE";
8499 else if (code == 'w')
8500 size = "WORD";
8501 else if (code == 'k')
8502 size = "DWORD";
8503
8504 fputs (size, file);
8505 fputs (" PTR ", file);
8506 }
8507
8508 x = XEXP (x, 0);
8509 /* Avoid (%rip) for call operands. */
8510 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8511 && !CONST_INT_P (x))
8512 output_addr_const (file, x);
8513 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8514 output_operand_lossage ("invalid constraints for operand");
8515 else
8516 output_address (x);
8517 }
8518
8519 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8520 {
8521 REAL_VALUE_TYPE r;
8522 long l;
8523
8524 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8525 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8526
8527 if (ASSEMBLER_DIALECT == ASM_ATT)
8528 putc ('$', file);
8529 fprintf (file, "0x%08lx", l);
8530 }
8531
8532 /* These float cases don't actually occur as immediate operands. */
8533 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8534 {
8535 char dstr[30];
8536
8537 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8538 fprintf (file, "%s", dstr);
8539 }
8540
8541 else if (GET_CODE (x) == CONST_DOUBLE
8542 && GET_MODE (x) == XFmode)
8543 {
8544 char dstr[30];
8545
8546 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8547 fprintf (file, "%s", dstr);
8548 }
8549
8550 else
8551 {
8552 /* We have patterns that allow zero sets of memory, for instance.
8553 In 64-bit mode, we should probably support all 8-byte vectors,
8554 since we can in fact encode that into an immediate. */
8555 if (GET_CODE (x) == CONST_VECTOR)
8556 {
8557 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8558 x = const0_rtx;
8559 }
8560
8561 if (code != 'P')
8562 {
8563 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8564 {
8565 if (ASSEMBLER_DIALECT == ASM_ATT)
8566 putc ('$', file);
8567 }
8568 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8569 || GET_CODE (x) == LABEL_REF)
8570 {
8571 if (ASSEMBLER_DIALECT == ASM_ATT)
8572 putc ('$', file);
8573 else
8574 fputs ("OFFSET FLAT:", file);
8575 }
8576 }
8577 if (CONST_INT_P (x))
8578 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8579 else if (flag_pic)
8580 output_pic_addr_const (file, x, code);
8581 else
8582 output_addr_const (file, x);
8583 }
8584 }
8585 \f
8586 /* Print a memory operand whose address is ADDR. */
8587
8588 void
8589 print_operand_address (FILE *file, rtx addr)
8590 {
8591 struct ix86_address parts;
8592 rtx base, index, disp;
8593 int scale;
8594 int ok = ix86_decompose_address (addr, &parts);
8595
8596 gcc_assert (ok);
8597
8598 base = parts.base;
8599 index = parts.index;
8600 disp = parts.disp;
8601 scale = parts.scale;
8602
8603 switch (parts.seg)
8604 {
8605 case SEG_DEFAULT:
8606 break;
8607 case SEG_FS:
8608 case SEG_GS:
8609 if (USER_LABEL_PREFIX[0] == 0)
8610 putc ('%', file);
8611 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8612 break;
8613 default:
8614 gcc_unreachable ();
8615 }
8616
8617 if (!base && !index)
8618 {
8619 /* Displacement only requires special attention. */
8620
8621 if (CONST_INT_P (disp))
8622 {
8623 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8624 {
8625 if (USER_LABEL_PREFIX[0] == 0)
8626 putc ('%', file);
8627 fputs ("ds:", file);
8628 }
8629 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8630 }
8631 else if (flag_pic)
8632 output_pic_addr_const (file, disp, 0);
8633 else
8634 output_addr_const (file, disp);
8635
8636 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8637 if (TARGET_64BIT)
8638 {
8639 if (GET_CODE (disp) == CONST
8640 && GET_CODE (XEXP (disp, 0)) == PLUS
8641 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8642 disp = XEXP (XEXP (disp, 0), 0);
8643 if (GET_CODE (disp) == LABEL_REF
8644 || (GET_CODE (disp) == SYMBOL_REF
8645 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8646 fputs ("(%rip)", file);
8647 }
8648 }
8649 else
8650 {
8651 if (ASSEMBLER_DIALECT == ASM_ATT)
8652 {
8653 if (disp)
8654 {
8655 if (flag_pic)
8656 output_pic_addr_const (file, disp, 0);
8657 else if (GET_CODE (disp) == LABEL_REF)
8658 output_asm_label (disp);
8659 else
8660 output_addr_const (file, disp);
8661 }
8662
8663 putc ('(', file);
8664 if (base)
8665 print_reg (base, 0, file);
8666 if (index)
8667 {
8668 putc (',', file);
8669 print_reg (index, 0, file);
8670 if (scale != 1)
8671 fprintf (file, ",%d", scale);
8672 }
8673 putc (')', file);
8674 }
8675 else
8676 {
8677 rtx offset = NULL_RTX;
8678
8679 if (disp)
8680 {
8681 /* Pull out the offset of a symbol; print any symbol itself. */
8682 if (GET_CODE (disp) == CONST
8683 && GET_CODE (XEXP (disp, 0)) == PLUS
8684 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8685 {
8686 offset = XEXP (XEXP (disp, 0), 1);
8687 disp = gen_rtx_CONST (VOIDmode,
8688 XEXP (XEXP (disp, 0), 0));
8689 }
8690
8691 if (flag_pic)
8692 output_pic_addr_const (file, disp, 0);
8693 else if (GET_CODE (disp) == LABEL_REF)
8694 output_asm_label (disp);
8695 else if (CONST_INT_P (disp))
8696 offset = disp;
8697 else
8698 output_addr_const (file, disp);
8699 }
8700
8701 putc ('[', file);
8702 if (base)
8703 {
8704 print_reg (base, 0, file);
8705 if (offset)
8706 {
8707 if (INTVAL (offset) >= 0)
8708 putc ('+', file);
8709 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8710 }
8711 }
8712 else if (offset)
8713 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8714 else
8715 putc ('0', file);
8716
8717 if (index)
8718 {
8719 putc ('+', file);
8720 print_reg (index, 0, file);
8721 if (scale != 1)
8722 fprintf (file, "*%d", scale);
8723 }
8724 putc (']', file);
8725 }
8726 }
8727 }
8728
8729 bool
8730 output_addr_const_extra (FILE *file, rtx x)
8731 {
8732 rtx op;
8733
8734 if (GET_CODE (x) != UNSPEC)
8735 return false;
8736
8737 op = XVECEXP (x, 0, 0);
8738 switch (XINT (x, 1))
8739 {
8740 case UNSPEC_GOTTPOFF:
8741 output_addr_const (file, op);
8742 /* FIXME: This might be @TPOFF in Sun ld. */
8743 fputs ("@GOTTPOFF", file);
8744 break;
8745 case UNSPEC_TPOFF:
8746 output_addr_const (file, op);
8747 fputs ("@TPOFF", file);
8748 break;
8749 case UNSPEC_NTPOFF:
8750 output_addr_const (file, op);
8751 if (TARGET_64BIT)
8752 fputs ("@TPOFF", file);
8753 else
8754 fputs ("@NTPOFF", file);
8755 break;
8756 case UNSPEC_DTPOFF:
8757 output_addr_const (file, op);
8758 fputs ("@DTPOFF", file);
8759 break;
8760 case UNSPEC_GOTNTPOFF:
8761 output_addr_const (file, op);
8762 if (TARGET_64BIT)
8763 fputs ("@GOTTPOFF(%rip)", file);
8764 else
8765 fputs ("@GOTNTPOFF", file);
8766 break;
8767 case UNSPEC_INDNTPOFF:
8768 output_addr_const (file, op);
8769 fputs ("@INDNTPOFF", file);
8770 break;
8771
8772 default:
8773 return false;
8774 }
8775
8776 return true;
8777 }
8778 \f
8779 /* Split one or more DImode RTL references into pairs of SImode
8780 references. The RTL can be REG, offsettable MEM, integer constant, or
8781 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8782 split and "num" is its length. lo_half and hi_half are output arrays
8783 that parallel "operands". */
8784
8785 void
8786 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8787 {
8788 while (num--)
8789 {
8790 rtx op = operands[num];
8791
8792 /* simplify_subreg refuse to split volatile memory addresses,
8793 but we still have to handle it. */
8794 if (MEM_P (op))
8795 {
8796 lo_half[num] = adjust_address (op, SImode, 0);
8797 hi_half[num] = adjust_address (op, SImode, 4);
8798 }
8799 else
8800 {
8801 lo_half[num] = simplify_gen_subreg (SImode, op,
8802 GET_MODE (op) == VOIDmode
8803 ? DImode : GET_MODE (op), 0);
8804 hi_half[num] = simplify_gen_subreg (SImode, op,
8805 GET_MODE (op) == VOIDmode
8806 ? DImode : GET_MODE (op), 4);
8807 }
8808 }
8809 }
8810 /* Split one or more TImode RTL references into pairs of DImode
8811 references. The RTL can be REG, offsettable MEM, integer constant, or
8812 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8813 split and "num" is its length. lo_half and hi_half are output arrays
8814 that parallel "operands". */
8815
8816 void
8817 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8818 {
8819 while (num--)
8820 {
8821 rtx op = operands[num];
8822
8823 /* simplify_subreg refuse to split volatile memory addresses, but we
8824 still have to handle it. */
8825 if (MEM_P (op))
8826 {
8827 lo_half[num] = adjust_address (op, DImode, 0);
8828 hi_half[num] = adjust_address (op, DImode, 8);
8829 }
8830 else
8831 {
8832 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8833 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8834 }
8835 }
8836 }
8837 \f
8838 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8839 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8840 is the expression of the binary operation. The output may either be
8841 emitted here, or returned to the caller, like all output_* functions.
8842
8843 There is no guarantee that the operands are the same mode, as they
8844 might be within FLOAT or FLOAT_EXTEND expressions. */
8845
8846 #ifndef SYSV386_COMPAT
8847 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8848 wants to fix the assemblers because that causes incompatibility
8849 with gcc. No-one wants to fix gcc because that causes
8850 incompatibility with assemblers... You can use the option of
8851 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8852 #define SYSV386_COMPAT 1
8853 #endif
8854
8855 const char *
8856 output_387_binary_op (rtx insn, rtx *operands)
8857 {
8858 static char buf[30];
8859 const char *p;
8860 const char *ssep;
8861 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8862
8863 #ifdef ENABLE_CHECKING
8864 /* Even if we do not want to check the inputs, this documents input
8865 constraints. Which helps in understanding the following code. */
8866 if (STACK_REG_P (operands[0])
8867 && ((REG_P (operands[1])
8868 && REGNO (operands[0]) == REGNO (operands[1])
8869 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8870 || (REG_P (operands[2])
8871 && REGNO (operands[0]) == REGNO (operands[2])
8872 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8873 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8874 ; /* ok */
8875 else
8876 gcc_assert (is_sse);
8877 #endif
8878
8879 switch (GET_CODE (operands[3]))
8880 {
8881 case PLUS:
8882 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8883 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8884 p = "fiadd";
8885 else
8886 p = "fadd";
8887 ssep = "add";
8888 break;
8889
8890 case MINUS:
8891 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8892 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8893 p = "fisub";
8894 else
8895 p = "fsub";
8896 ssep = "sub";
8897 break;
8898
8899 case MULT:
8900 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8901 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8902 p = "fimul";
8903 else
8904 p = "fmul";
8905 ssep = "mul";
8906 break;
8907
8908 case DIV:
8909 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8910 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8911 p = "fidiv";
8912 else
8913 p = "fdiv";
8914 ssep = "div";
8915 break;
8916
8917 default:
8918 gcc_unreachable ();
8919 }
8920
8921 if (is_sse)
8922 {
8923 strcpy (buf, ssep);
8924 if (GET_MODE (operands[0]) == SFmode)
8925 strcat (buf, "ss\t{%2, %0|%0, %2}");
8926 else
8927 strcat (buf, "sd\t{%2, %0|%0, %2}");
8928 return buf;
8929 }
8930 strcpy (buf, p);
8931
8932 switch (GET_CODE (operands[3]))
8933 {
8934 case MULT:
8935 case PLUS:
8936 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8937 {
8938 rtx temp = operands[2];
8939 operands[2] = operands[1];
8940 operands[1] = temp;
8941 }
8942
8943 /* know operands[0] == operands[1]. */
8944
8945 if (MEM_P (operands[2]))
8946 {
8947 p = "%z2\t%2";
8948 break;
8949 }
8950
8951 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8952 {
8953 if (STACK_TOP_P (operands[0]))
8954 /* How is it that we are storing to a dead operand[2]?
8955 Well, presumably operands[1] is dead too. We can't
8956 store the result to st(0) as st(0) gets popped on this
8957 instruction. Instead store to operands[2] (which I
8958 think has to be st(1)). st(1) will be popped later.
8959 gcc <= 2.8.1 didn't have this check and generated
8960 assembly code that the Unixware assembler rejected. */
8961 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8962 else
8963 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8964 break;
8965 }
8966
8967 if (STACK_TOP_P (operands[0]))
8968 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8969 else
8970 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8971 break;
8972
8973 case MINUS:
8974 case DIV:
8975 if (MEM_P (operands[1]))
8976 {
8977 p = "r%z1\t%1";
8978 break;
8979 }
8980
8981 if (MEM_P (operands[2]))
8982 {
8983 p = "%z2\t%2";
8984 break;
8985 }
8986
8987 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8988 {
8989 #if SYSV386_COMPAT
8990 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8991 derived assemblers, confusingly reverse the direction of
8992 the operation for fsub{r} and fdiv{r} when the
8993 destination register is not st(0). The Intel assembler
8994 doesn't have this brain damage. Read !SYSV386_COMPAT to
8995 figure out what the hardware really does. */
8996 if (STACK_TOP_P (operands[0]))
8997 p = "{p\t%0, %2|rp\t%2, %0}";
8998 else
8999 p = "{rp\t%2, %0|p\t%0, %2}";
9000 #else
9001 if (STACK_TOP_P (operands[0]))
9002 /* As above for fmul/fadd, we can't store to st(0). */
9003 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9004 else
9005 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9006 #endif
9007 break;
9008 }
9009
9010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9011 {
9012 #if SYSV386_COMPAT
9013 if (STACK_TOP_P (operands[0]))
9014 p = "{rp\t%0, %1|p\t%1, %0}";
9015 else
9016 p = "{p\t%1, %0|rp\t%0, %1}";
9017 #else
9018 if (STACK_TOP_P (operands[0]))
9019 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9020 else
9021 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9022 #endif
9023 break;
9024 }
9025
9026 if (STACK_TOP_P (operands[0]))
9027 {
9028 if (STACK_TOP_P (operands[1]))
9029 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9030 else
9031 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9032 break;
9033 }
9034 else if (STACK_TOP_P (operands[1]))
9035 {
9036 #if SYSV386_COMPAT
9037 p = "{\t%1, %0|r\t%0, %1}";
9038 #else
9039 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9040 #endif
9041 }
9042 else
9043 {
9044 #if SYSV386_COMPAT
9045 p = "{r\t%2, %0|\t%0, %2}";
9046 #else
9047 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9048 #endif
9049 }
9050 break;
9051
9052 default:
9053 gcc_unreachable ();
9054 }
9055
9056 strcat (buf, p);
9057 return buf;
9058 }
9059
9060 /* Return needed mode for entity in optimize_mode_switching pass. */
9061
9062 int
9063 ix86_mode_needed (int entity, rtx insn)
9064 {
9065 enum attr_i387_cw mode;
9066
9067 /* The mode UNINITIALIZED is used to store control word after a
9068 function call or ASM pattern. The mode ANY specify that function
9069 has no requirements on the control word and make no changes in the
9070 bits we are interested in. */
9071
9072 if (CALL_P (insn)
9073 || (NONJUMP_INSN_P (insn)
9074 && (asm_noperands (PATTERN (insn)) >= 0
9075 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9076 return I387_CW_UNINITIALIZED;
9077
9078 if (recog_memoized (insn) < 0)
9079 return I387_CW_ANY;
9080
9081 mode = get_attr_i387_cw (insn);
9082
9083 switch (entity)
9084 {
9085 case I387_TRUNC:
9086 if (mode == I387_CW_TRUNC)
9087 return mode;
9088 break;
9089
9090 case I387_FLOOR:
9091 if (mode == I387_CW_FLOOR)
9092 return mode;
9093 break;
9094
9095 case I387_CEIL:
9096 if (mode == I387_CW_CEIL)
9097 return mode;
9098 break;
9099
9100 case I387_MASK_PM:
9101 if (mode == I387_CW_MASK_PM)
9102 return mode;
9103 break;
9104
9105 default:
9106 gcc_unreachable ();
9107 }
9108
9109 return I387_CW_ANY;
9110 }
9111
9112 /* Output code to initialize control word copies used by trunc?f?i and
9113 rounding patterns. CURRENT_MODE is set to current control word,
9114 while NEW_MODE is set to new control word. */
9115
9116 void
9117 emit_i387_cw_initialization (int mode)
9118 {
9119 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9120 rtx new_mode;
9121
9122 int slot;
9123
9124 rtx reg = gen_reg_rtx (HImode);
9125
9126 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9127 emit_move_insn (reg, copy_rtx (stored_mode));
9128
9129 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9130 {
9131 switch (mode)
9132 {
9133 case I387_CW_TRUNC:
9134 /* round toward zero (truncate) */
9135 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9136 slot = SLOT_CW_TRUNC;
9137 break;
9138
9139 case I387_CW_FLOOR:
9140 /* round down toward -oo */
9141 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9142 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9143 slot = SLOT_CW_FLOOR;
9144 break;
9145
9146 case I387_CW_CEIL:
9147 /* round up toward +oo */
9148 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9149 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9150 slot = SLOT_CW_CEIL;
9151 break;
9152
9153 case I387_CW_MASK_PM:
9154 /* mask precision exception for nearbyint() */
9155 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9156 slot = SLOT_CW_MASK_PM;
9157 break;
9158
9159 default:
9160 gcc_unreachable ();
9161 }
9162 }
9163 else
9164 {
9165 switch (mode)
9166 {
9167 case I387_CW_TRUNC:
9168 /* round toward zero (truncate) */
9169 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9170 slot = SLOT_CW_TRUNC;
9171 break;
9172
9173 case I387_CW_FLOOR:
9174 /* round down toward -oo */
9175 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9176 slot = SLOT_CW_FLOOR;
9177 break;
9178
9179 case I387_CW_CEIL:
9180 /* round up toward +oo */
9181 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9182 slot = SLOT_CW_CEIL;
9183 break;
9184
9185 case I387_CW_MASK_PM:
9186 /* mask precision exception for nearbyint() */
9187 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9188 slot = SLOT_CW_MASK_PM;
9189 break;
9190
9191 default:
9192 gcc_unreachable ();
9193 }
9194 }
9195
9196 gcc_assert (slot < MAX_386_STACK_LOCALS);
9197
9198 new_mode = assign_386_stack_local (HImode, slot);
9199 emit_move_insn (new_mode, reg);
9200 }
9201
9202 /* Output code for INSN to convert a float to a signed int. OPERANDS
9203 are the insn operands. The output may be [HSD]Imode and the input
9204 operand may be [SDX]Fmode. */
9205
9206 const char *
9207 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9208 {
9209 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9210 int dimode_p = GET_MODE (operands[0]) == DImode;
9211 int round_mode = get_attr_i387_cw (insn);
9212
9213 /* Jump through a hoop or two for DImode, since the hardware has no
9214 non-popping instruction. We used to do this a different way, but
9215 that was somewhat fragile and broke with post-reload splitters. */
9216 if ((dimode_p || fisttp) && !stack_top_dies)
9217 output_asm_insn ("fld\t%y1", operands);
9218
9219 gcc_assert (STACK_TOP_P (operands[1]));
9220 gcc_assert (MEM_P (operands[0]));
9221
9222 if (fisttp)
9223 output_asm_insn ("fisttp%z0\t%0", operands);
9224 else
9225 {
9226 if (round_mode != I387_CW_ANY)
9227 output_asm_insn ("fldcw\t%3", operands);
9228 if (stack_top_dies || dimode_p)
9229 output_asm_insn ("fistp%z0\t%0", operands);
9230 else
9231 output_asm_insn ("fist%z0\t%0", operands);
9232 if (round_mode != I387_CW_ANY)
9233 output_asm_insn ("fldcw\t%2", operands);
9234 }
9235
9236 return "";
9237 }
9238
9239 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9240 have the values zero or one, indicates the ffreep insn's operand
9241 from the OPERANDS array. */
9242
9243 static const char *
9244 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9245 {
9246 if (TARGET_USE_FFREEP)
9247 #if HAVE_AS_IX86_FFREEP
9248 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9249 #else
9250 {
9251 static char retval[] = ".word\t0xc_df";
9252 int regno = REGNO (operands[opno]);
9253
9254 gcc_assert (FP_REGNO_P (regno));
9255
9256 retval[9] = '0' + (regno - FIRST_STACK_REG);
9257 return retval;
9258 }
9259 #endif
9260
9261 return opno ? "fstp\t%y1" : "fstp\t%y0";
9262 }
9263
9264
9265 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9266 should be used. UNORDERED_P is true when fucom should be used. */
9267
9268 const char *
9269 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9270 {
9271 int stack_top_dies;
9272 rtx cmp_op0, cmp_op1;
9273 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9274
9275 if (eflags_p)
9276 {
9277 cmp_op0 = operands[0];
9278 cmp_op1 = operands[1];
9279 }
9280 else
9281 {
9282 cmp_op0 = operands[1];
9283 cmp_op1 = operands[2];
9284 }
9285
9286 if (is_sse)
9287 {
9288 if (GET_MODE (operands[0]) == SFmode)
9289 if (unordered_p)
9290 return "ucomiss\t{%1, %0|%0, %1}";
9291 else
9292 return "comiss\t{%1, %0|%0, %1}";
9293 else
9294 if (unordered_p)
9295 return "ucomisd\t{%1, %0|%0, %1}";
9296 else
9297 return "comisd\t{%1, %0|%0, %1}";
9298 }
9299
9300 gcc_assert (STACK_TOP_P (cmp_op0));
9301
9302 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9303
9304 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9305 {
9306 if (stack_top_dies)
9307 {
9308 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9309 return output_387_ffreep (operands, 1);
9310 }
9311 else
9312 return "ftst\n\tfnstsw\t%0";
9313 }
9314
9315 if (STACK_REG_P (cmp_op1)
9316 && stack_top_dies
9317 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9318 && REGNO (cmp_op1) != FIRST_STACK_REG)
9319 {
9320 /* If both the top of the 387 stack dies, and the other operand
9321 is also a stack register that dies, then this must be a
9322 `fcompp' float compare */
9323
9324 if (eflags_p)
9325 {
9326 /* There is no double popping fcomi variant. Fortunately,
9327 eflags is immune from the fstp's cc clobbering. */
9328 if (unordered_p)
9329 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9330 else
9331 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9332 return output_387_ffreep (operands, 0);
9333 }
9334 else
9335 {
9336 if (unordered_p)
9337 return "fucompp\n\tfnstsw\t%0";
9338 else
9339 return "fcompp\n\tfnstsw\t%0";
9340 }
9341 }
9342 else
9343 {
9344 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9345
9346 static const char * const alt[16] =
9347 {
9348 "fcom%z2\t%y2\n\tfnstsw\t%0",
9349 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9350 "fucom%z2\t%y2\n\tfnstsw\t%0",
9351 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9352
9353 "ficom%z2\t%y2\n\tfnstsw\t%0",
9354 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9355 NULL,
9356 NULL,
9357
9358 "fcomi\t{%y1, %0|%0, %y1}",
9359 "fcomip\t{%y1, %0|%0, %y1}",
9360 "fucomi\t{%y1, %0|%0, %y1}",
9361 "fucomip\t{%y1, %0|%0, %y1}",
9362
9363 NULL,
9364 NULL,
9365 NULL,
9366 NULL
9367 };
9368
9369 int mask;
9370 const char *ret;
9371
9372 mask = eflags_p << 3;
9373 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9374 mask |= unordered_p << 1;
9375 mask |= stack_top_dies;
9376
9377 gcc_assert (mask < 16);
9378 ret = alt[mask];
9379 gcc_assert (ret);
9380
9381 return ret;
9382 }
9383 }
9384
9385 void
9386 ix86_output_addr_vec_elt (FILE *file, int value)
9387 {
9388 const char *directive = ASM_LONG;
9389
9390 #ifdef ASM_QUAD
9391 if (TARGET_64BIT)
9392 directive = ASM_QUAD;
9393 #else
9394 gcc_assert (!TARGET_64BIT);
9395 #endif
9396
9397 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9398 }
9399
9400 void
9401 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9402 {
9403 const char *directive = ASM_LONG;
9404
9405 #ifdef ASM_QUAD
9406 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9407 directive = ASM_QUAD;
9408 #else
9409 gcc_assert (!TARGET_64BIT);
9410 #endif
9411 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9412 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9413 fprintf (file, "%s%s%d-%s%d\n",
9414 directive, LPREFIX, value, LPREFIX, rel);
9415 else if (HAVE_AS_GOTOFF_IN_DATA)
9416 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9417 #if TARGET_MACHO
9418 else if (TARGET_MACHO)
9419 {
9420 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9421 machopic_output_function_base_name (file);
9422 fprintf(file, "\n");
9423 }
9424 #endif
9425 else
9426 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9427 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9428 }
9429 \f
9430 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9431 for the target. */
9432
9433 void
9434 ix86_expand_clear (rtx dest)
9435 {
9436 rtx tmp;
9437
9438 /* We play register width games, which are only valid after reload. */
9439 gcc_assert (reload_completed);
9440
9441 /* Avoid HImode and its attendant prefix byte. */
9442 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9443 dest = gen_rtx_REG (SImode, REGNO (dest));
9444
9445 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9446
9447 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9448 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9449 {
9450 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9451 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9452 }
9453
9454 emit_insn (tmp);
9455 }
9456
9457 /* X is an unchanging MEM. If it is a constant pool reference, return
9458 the constant pool rtx, else NULL. */
9459
9460 rtx
9461 maybe_get_pool_constant (rtx x)
9462 {
9463 x = ix86_delegitimize_address (XEXP (x, 0));
9464
9465 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9466 return get_pool_constant (x);
9467
9468 return NULL_RTX;
9469 }
9470
9471 void
9472 ix86_expand_move (enum machine_mode mode, rtx operands[])
9473 {
9474 int strict = (reload_in_progress || reload_completed);
9475 rtx op0, op1;
9476 enum tls_model model;
9477
9478 op0 = operands[0];
9479 op1 = operands[1];
9480
9481 if (GET_CODE (op1) == SYMBOL_REF)
9482 {
9483 model = SYMBOL_REF_TLS_MODEL (op1);
9484 if (model)
9485 {
9486 op1 = legitimize_tls_address (op1, model, true);
9487 op1 = force_operand (op1, op0);
9488 if (op1 == op0)
9489 return;
9490 }
9491 }
9492 else if (GET_CODE (op1) == CONST
9493 && GET_CODE (XEXP (op1, 0)) == PLUS
9494 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9495 {
9496 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9497 if (model)
9498 {
9499 rtx addend = XEXP (XEXP (op1, 0), 1);
9500 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9501 op1 = force_operand (op1, NULL);
9502 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9503 op0, 1, OPTAB_DIRECT);
9504 if (op1 == op0)
9505 return;
9506 }
9507 }
9508
9509 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9510 {
9511 if (TARGET_MACHO && !TARGET_64BIT)
9512 {
9513 #if TARGET_MACHO
9514 if (MACHOPIC_PURE)
9515 {
9516 rtx temp = ((reload_in_progress
9517 || ((op0 && REG_P (op0))
9518 && mode == Pmode))
9519 ? op0 : gen_reg_rtx (Pmode));
9520 op1 = machopic_indirect_data_reference (op1, temp);
9521 op1 = machopic_legitimize_pic_address (op1, mode,
9522 temp == op1 ? 0 : temp);
9523 }
9524 else if (MACHOPIC_INDIRECT)
9525 op1 = machopic_indirect_data_reference (op1, 0);
9526 if (op0 == op1)
9527 return;
9528 #endif
9529 }
9530 else
9531 {
9532 if (MEM_P (op0))
9533 op1 = force_reg (Pmode, op1);
9534 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9535 {
9536 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9537 op1 = legitimize_pic_address (op1, reg);
9538 if (op0 == op1)
9539 return;
9540 }
9541 }
9542 }
9543 else
9544 {
9545 if (MEM_P (op0)
9546 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9547 || !push_operand (op0, mode))
9548 && MEM_P (op1))
9549 op1 = force_reg (mode, op1);
9550
9551 if (push_operand (op0, mode)
9552 && ! general_no_elim_operand (op1, mode))
9553 op1 = copy_to_mode_reg (mode, op1);
9554
9555 /* Force large constants in 64bit compilation into register
9556 to get them CSEed. */
9557 if (TARGET_64BIT && mode == DImode
9558 && immediate_operand (op1, mode)
9559 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9560 && !register_operand (op0, mode)
9561 && optimize && !reload_completed && !reload_in_progress)
9562 op1 = copy_to_mode_reg (mode, op1);
9563
9564 if (FLOAT_MODE_P (mode))
9565 {
9566 /* If we are loading a floating point constant to a register,
9567 force the value to memory now, since we'll get better code
9568 out the back end. */
9569
9570 if (strict)
9571 ;
9572 else if (GET_CODE (op1) == CONST_DOUBLE)
9573 {
9574 op1 = validize_mem (force_const_mem (mode, op1));
9575 if (!register_operand (op0, mode))
9576 {
9577 rtx temp = gen_reg_rtx (mode);
9578 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9579 emit_move_insn (op0, temp);
9580 return;
9581 }
9582 }
9583 }
9584 }
9585
9586 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9587 }
9588
9589 void
9590 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9591 {
9592 rtx op0 = operands[0], op1 = operands[1];
9593
9594 /* Force constants other than zero into memory. We do not know how
9595 the instructions used to build constants modify the upper 64 bits
9596 of the register, once we have that information we may be able
9597 to handle some of them more efficiently. */
9598 if ((reload_in_progress | reload_completed) == 0
9599 && register_operand (op0, mode)
9600 && CONSTANT_P (op1)
9601 && standard_sse_constant_p (op1) <= 0)
9602 op1 = validize_mem (force_const_mem (mode, op1));
9603
9604 /* Make operand1 a register if it isn't already. */
9605 if (!no_new_pseudos
9606 && !register_operand (op0, mode)
9607 && !register_operand (op1, mode))
9608 {
9609 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9610 return;
9611 }
9612
9613 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9614 }
9615
9616 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9617 straight to ix86_expand_vector_move. */
9618 /* Code generation for scalar reg-reg moves of single and double precision data:
9619 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9620 movaps reg, reg
9621 else
9622 movss reg, reg
9623 if (x86_sse_partial_reg_dependency == true)
9624 movapd reg, reg
9625 else
9626 movsd reg, reg
9627
9628 Code generation for scalar loads of double precision data:
9629 if (x86_sse_split_regs == true)
9630 movlpd mem, reg (gas syntax)
9631 else
9632 movsd mem, reg
9633
9634 Code generation for unaligned packed loads of single precision data
9635 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9636 if (x86_sse_unaligned_move_optimal)
9637 movups mem, reg
9638
9639 if (x86_sse_partial_reg_dependency == true)
9640 {
9641 xorps reg, reg
9642 movlps mem, reg
9643 movhps mem+8, reg
9644 }
9645 else
9646 {
9647 movlps mem, reg
9648 movhps mem+8, reg
9649 }
9650
9651 Code generation for unaligned packed loads of double precision data
9652 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9653 if (x86_sse_unaligned_move_optimal)
9654 movupd mem, reg
9655
9656 if (x86_sse_split_regs == true)
9657 {
9658 movlpd mem, reg
9659 movhpd mem+8, reg
9660 }
9661 else
9662 {
9663 movsd mem, reg
9664 movhpd mem+8, reg
9665 }
9666 */
9667
9668 void
9669 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9670 {
9671 rtx op0, op1, m;
9672
9673 op0 = operands[0];
9674 op1 = operands[1];
9675
9676 if (MEM_P (op1))
9677 {
9678 /* If we're optimizing for size, movups is the smallest. */
9679 if (optimize_size)
9680 {
9681 op0 = gen_lowpart (V4SFmode, op0);
9682 op1 = gen_lowpart (V4SFmode, op1);
9683 emit_insn (gen_sse_movups (op0, op1));
9684 return;
9685 }
9686
9687 /* ??? If we have typed data, then it would appear that using
9688 movdqu is the only way to get unaligned data loaded with
9689 integer type. */
9690 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9691 {
9692 op0 = gen_lowpart (V16QImode, op0);
9693 op1 = gen_lowpart (V16QImode, op1);
9694 emit_insn (gen_sse2_movdqu (op0, op1));
9695 return;
9696 }
9697
9698 if (TARGET_SSE2 && mode == V2DFmode)
9699 {
9700 rtx zero;
9701
9702 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9703 {
9704 op0 = gen_lowpart (V2DFmode, op0);
9705 op1 = gen_lowpart (V2DFmode, op1);
9706 emit_insn (gen_sse2_movupd (op0, op1));
9707 return;
9708 }
9709
9710 /* When SSE registers are split into halves, we can avoid
9711 writing to the top half twice. */
9712 if (TARGET_SSE_SPLIT_REGS)
9713 {
9714 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9715 zero = op0;
9716 }
9717 else
9718 {
9719 /* ??? Not sure about the best option for the Intel chips.
9720 The following would seem to satisfy; the register is
9721 entirely cleared, breaking the dependency chain. We
9722 then store to the upper half, with a dependency depth
9723 of one. A rumor has it that Intel recommends two movsd
9724 followed by an unpacklpd, but this is unconfirmed. And
9725 given that the dependency depth of the unpacklpd would
9726 still be one, I'm not sure why this would be better. */
9727 zero = CONST0_RTX (V2DFmode);
9728 }
9729
9730 m = adjust_address (op1, DFmode, 0);
9731 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9732 m = adjust_address (op1, DFmode, 8);
9733 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9734 }
9735 else
9736 {
9737 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9738 {
9739 op0 = gen_lowpart (V4SFmode, op0);
9740 op1 = gen_lowpart (V4SFmode, op1);
9741 emit_insn (gen_sse_movups (op0, op1));
9742 return;
9743 }
9744
9745 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9746 emit_move_insn (op0, CONST0_RTX (mode));
9747 else
9748 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9749
9750 if (mode != V4SFmode)
9751 op0 = gen_lowpart (V4SFmode, op0);
9752 m = adjust_address (op1, V2SFmode, 0);
9753 emit_insn (gen_sse_loadlps (op0, op0, m));
9754 m = adjust_address (op1, V2SFmode, 8);
9755 emit_insn (gen_sse_loadhps (op0, op0, m));
9756 }
9757 }
9758 else if (MEM_P (op0))
9759 {
9760 /* If we're optimizing for size, movups is the smallest. */
9761 if (optimize_size)
9762 {
9763 op0 = gen_lowpart (V4SFmode, op0);
9764 op1 = gen_lowpart (V4SFmode, op1);
9765 emit_insn (gen_sse_movups (op0, op1));
9766 return;
9767 }
9768
9769 /* ??? Similar to above, only less clear because of quote
9770 typeless stores unquote. */
9771 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9772 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9773 {
9774 op0 = gen_lowpart (V16QImode, op0);
9775 op1 = gen_lowpart (V16QImode, op1);
9776 emit_insn (gen_sse2_movdqu (op0, op1));
9777 return;
9778 }
9779
9780 if (TARGET_SSE2 && mode == V2DFmode)
9781 {
9782 m = adjust_address (op0, DFmode, 0);
9783 emit_insn (gen_sse2_storelpd (m, op1));
9784 m = adjust_address (op0, DFmode, 8);
9785 emit_insn (gen_sse2_storehpd (m, op1));
9786 }
9787 else
9788 {
9789 if (mode != V4SFmode)
9790 op1 = gen_lowpart (V4SFmode, op1);
9791 m = adjust_address (op0, V2SFmode, 0);
9792 emit_insn (gen_sse_storelps (m, op1));
9793 m = adjust_address (op0, V2SFmode, 8);
9794 emit_insn (gen_sse_storehps (m, op1));
9795 }
9796 }
9797 else
9798 gcc_unreachable ();
9799 }
9800
9801 /* Expand a push in MODE. This is some mode for which we do not support
9802 proper push instructions, at least from the registers that we expect
9803 the value to live in. */
9804
9805 void
9806 ix86_expand_push (enum machine_mode mode, rtx x)
9807 {
9808 rtx tmp;
9809
9810 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9811 GEN_INT (-GET_MODE_SIZE (mode)),
9812 stack_pointer_rtx, 1, OPTAB_DIRECT);
9813 if (tmp != stack_pointer_rtx)
9814 emit_move_insn (stack_pointer_rtx, tmp);
9815
9816 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9817 emit_move_insn (tmp, x);
9818 }
9819
9820 /* Helper function of ix86_fixup_binary_operands to canonicalize
9821 operand order. Returns true if the operands should be swapped. */
9822
9823 static bool
9824 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9825 rtx operands[])
9826 {
9827 rtx dst = operands[0];
9828 rtx src1 = operands[1];
9829 rtx src2 = operands[2];
9830
9831 /* If the operation is not commutative, we can't do anything. */
9832 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9833 return false;
9834
9835 /* Highest priority is that src1 should match dst. */
9836 if (rtx_equal_p (dst, src1))
9837 return false;
9838 if (rtx_equal_p (dst, src2))
9839 return true;
9840
9841 /* Next highest priority is that immediate constants come second. */
9842 if (immediate_operand (src2, mode))
9843 return false;
9844 if (immediate_operand (src1, mode))
9845 return true;
9846
9847 /* Lowest priority is that memory references should come second. */
9848 if (MEM_P (src2))
9849 return false;
9850 if (MEM_P (src1))
9851 return true;
9852
9853 return false;
9854 }
9855
9856
9857 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9858 destination to use for the operation. If different from the true
9859 destination in operands[0], a copy operation will be required. */
9860
9861 rtx
9862 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9863 rtx operands[])
9864 {
9865 rtx dst = operands[0];
9866 rtx src1 = operands[1];
9867 rtx src2 = operands[2];
9868
9869 /* Canonicalize operand order. */
9870 if (ix86_swap_binary_operands_p (code, mode, operands))
9871 {
9872 rtx temp = src1;
9873 src1 = src2;
9874 src2 = temp;
9875 }
9876
9877 /* Both source operands cannot be in memory. */
9878 if (MEM_P (src1) && MEM_P (src2))
9879 {
9880 /* Optimization: Only read from memory once. */
9881 if (rtx_equal_p (src1, src2))
9882 {
9883 src2 = force_reg (mode, src2);
9884 src1 = src2;
9885 }
9886 else
9887 src2 = force_reg (mode, src2);
9888 }
9889
9890 /* If the destination is memory, and we do not have matching source
9891 operands, do things in registers. */
9892 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9893 dst = gen_reg_rtx (mode);
9894
9895 /* Source 1 cannot be a constant. */
9896 if (CONSTANT_P (src1))
9897 src1 = force_reg (mode, src1);
9898
9899 /* Source 1 cannot be a non-matching memory. */
9900 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9901 src1 = force_reg (mode, src1);
9902
9903 operands[1] = src1;
9904 operands[2] = src2;
9905 return dst;
9906 }
9907
9908 /* Similarly, but assume that the destination has already been
9909 set up properly. */
9910
9911 void
9912 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9913 enum machine_mode mode, rtx operands[])
9914 {
9915 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9916 gcc_assert (dst == operands[0]);
9917 }
9918
9919 /* Attempt to expand a binary operator. Make the expansion closer to the
9920 actual machine, then just general_operand, which will allow 3 separate
9921 memory references (one output, two input) in a single insn. */
9922
9923 void
9924 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9925 rtx operands[])
9926 {
9927 rtx src1, src2, dst, op, clob;
9928
9929 dst = ix86_fixup_binary_operands (code, mode, operands);
9930 src1 = operands[1];
9931 src2 = operands[2];
9932
9933 /* Emit the instruction. */
9934
9935 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9936 if (reload_in_progress)
9937 {
9938 /* Reload doesn't know about the flags register, and doesn't know that
9939 it doesn't want to clobber it. We can only do this with PLUS. */
9940 gcc_assert (code == PLUS);
9941 emit_insn (op);
9942 }
9943 else
9944 {
9945 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9946 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9947 }
9948
9949 /* Fix up the destination if needed. */
9950 if (dst != operands[0])
9951 emit_move_insn (operands[0], dst);
9952 }
9953
9954 /* Return TRUE or FALSE depending on whether the binary operator meets the
9955 appropriate constraints. */
9956
9957 int
9958 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9959 rtx operands[3])
9960 {
9961 rtx dst = operands[0];
9962 rtx src1 = operands[1];
9963 rtx src2 = operands[2];
9964
9965 /* Both source operands cannot be in memory. */
9966 if (MEM_P (src1) && MEM_P (src2))
9967 return 0;
9968
9969 /* Canonicalize operand order for commutative operators. */
9970 if (ix86_swap_binary_operands_p (code, mode, operands))
9971 {
9972 rtx temp = src1;
9973 src1 = src2;
9974 src2 = temp;
9975 }
9976
9977 /* If the destination is memory, we must have a matching source operand. */
9978 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9979 return 0;
9980
9981 /* Source 1 cannot be a constant. */
9982 if (CONSTANT_P (src1))
9983 return 0;
9984
9985 /* Source 1 cannot be a non-matching memory. */
9986 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9987 return 0;
9988
9989 return 1;
9990 }
9991
9992 /* Attempt to expand a unary operator. Make the expansion closer to the
9993 actual machine, then just general_operand, which will allow 2 separate
9994 memory references (one output, one input) in a single insn. */
9995
9996 void
9997 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9998 rtx operands[])
9999 {
10000 int matching_memory;
10001 rtx src, dst, op, clob;
10002
10003 dst = operands[0];
10004 src = operands[1];
10005
10006 /* If the destination is memory, and we do not have matching source
10007 operands, do things in registers. */
10008 matching_memory = 0;
10009 if (MEM_P (dst))
10010 {
10011 if (rtx_equal_p (dst, src))
10012 matching_memory = 1;
10013 else
10014 dst = gen_reg_rtx (mode);
10015 }
10016
10017 /* When source operand is memory, destination must match. */
10018 if (MEM_P (src) && !matching_memory)
10019 src = force_reg (mode, src);
10020
10021 /* Emit the instruction. */
10022
10023 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10024 if (reload_in_progress || code == NOT)
10025 {
10026 /* Reload doesn't know about the flags register, and doesn't know that
10027 it doesn't want to clobber it. */
10028 gcc_assert (code == NOT);
10029 emit_insn (op);
10030 }
10031 else
10032 {
10033 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10034 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10035 }
10036
10037 /* Fix up the destination if needed. */
10038 if (dst != operands[0])
10039 emit_move_insn (operands[0], dst);
10040 }
10041
10042 /* Return TRUE or FALSE depending on whether the unary operator meets the
10043 appropriate constraints. */
10044
10045 int
10046 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10047 enum machine_mode mode ATTRIBUTE_UNUSED,
10048 rtx operands[2] ATTRIBUTE_UNUSED)
10049 {
10050 /* If one of operands is memory, source and destination must match. */
10051 if ((MEM_P (operands[0])
10052 || MEM_P (operands[1]))
10053 && ! rtx_equal_p (operands[0], operands[1]))
10054 return FALSE;
10055 return TRUE;
10056 }
10057
10058 /* Post-reload splitter for converting an SF or DFmode value in an
10059 SSE register into an unsigned SImode. */
10060
10061 void
10062 ix86_split_convert_uns_si_sse (rtx operands[])
10063 {
10064 enum machine_mode vecmode;
10065 rtx value, large, zero_or_two31, input, two31, x;
10066
10067 large = operands[1];
10068 zero_or_two31 = operands[2];
10069 input = operands[3];
10070 two31 = operands[4];
10071 vecmode = GET_MODE (large);
10072 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10073
10074 /* Load up the value into the low element. We must ensure that the other
10075 elements are valid floats -- zero is the easiest such value. */
10076 if (MEM_P (input))
10077 {
10078 if (vecmode == V4SFmode)
10079 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10080 else
10081 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10082 }
10083 else
10084 {
10085 input = gen_rtx_REG (vecmode, REGNO (input));
10086 emit_move_insn (value, CONST0_RTX (vecmode));
10087 if (vecmode == V4SFmode)
10088 emit_insn (gen_sse_movss (value, value, input));
10089 else
10090 emit_insn (gen_sse2_movsd (value, value, input));
10091 }
10092
10093 emit_move_insn (large, two31);
10094 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10095
10096 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10097 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10098
10099 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10100 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10101
10102 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10103 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10104
10105 large = gen_rtx_REG (V4SImode, REGNO (large));
10106 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10107
10108 x = gen_rtx_REG (V4SImode, REGNO (value));
10109 if (vecmode == V4SFmode)
10110 emit_insn (gen_sse2_cvttps2dq (x, value));
10111 else
10112 emit_insn (gen_sse2_cvttpd2dq (x, value));
10113 value = x;
10114
10115 emit_insn (gen_xorv4si3 (value, value, large));
10116 }
10117
10118 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10119 Expects the 64-bit DImode to be supplied in a pair of integral
10120 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10121 -mfpmath=sse, !optimize_size only. */
10122
10123 void
10124 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10125 {
10126 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10127 rtx int_xmm, fp_xmm;
10128 rtx biases, exponents;
10129 rtx x;
10130
10131 int_xmm = gen_reg_rtx (V4SImode);
10132 if (TARGET_INTER_UNIT_MOVES)
10133 emit_insn (gen_movdi_to_sse (int_xmm, input));
10134 else if (TARGET_SSE_SPLIT_REGS)
10135 {
10136 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10137 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10138 }
10139 else
10140 {
10141 x = gen_reg_rtx (V2DImode);
10142 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10143 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10144 }
10145
10146 x = gen_rtx_CONST_VECTOR (V4SImode,
10147 gen_rtvec (4, GEN_INT (0x43300000UL),
10148 GEN_INT (0x45300000UL),
10149 const0_rtx, const0_rtx));
10150 exponents = validize_mem (force_const_mem (V4SImode, x));
10151
10152 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10153 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10154
10155 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10156 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10157 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10158 (0x1.0p84 + double(fp_value_hi_xmm)).
10159 Note these exponents differ by 32. */
10160
10161 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10162
10163 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10164 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10165 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10166 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10167 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10168 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10169 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10170 biases = validize_mem (force_const_mem (V2DFmode, biases));
10171 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10172
10173 /* Add the upper and lower DFmode values together. */
10174 if (TARGET_SSE3)
10175 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10176 else
10177 {
10178 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10179 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10180 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10181 }
10182
10183 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10184 }
10185
10186 /* Convert an unsigned SImode value into a DFmode. Only currently used
10187 for SSE, but applicable anywhere. */
10188
10189 void
10190 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10191 {
10192 REAL_VALUE_TYPE TWO31r;
10193 rtx x, fp;
10194
10195 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10196 NULL, 1, OPTAB_DIRECT);
10197
10198 fp = gen_reg_rtx (DFmode);
10199 emit_insn (gen_floatsidf2 (fp, x));
10200
10201 real_ldexp (&TWO31r, &dconst1, 31);
10202 x = const_double_from_real_value (TWO31r, DFmode);
10203
10204 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10205 if (x != target)
10206 emit_move_insn (target, x);
10207 }
10208
10209 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10210 32-bit mode; otherwise we have a direct convert instruction. */
10211
10212 void
10213 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10214 {
10215 REAL_VALUE_TYPE TWO32r;
10216 rtx fp_lo, fp_hi, x;
10217
10218 fp_lo = gen_reg_rtx (DFmode);
10219 fp_hi = gen_reg_rtx (DFmode);
10220
10221 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10222
10223 real_ldexp (&TWO32r, &dconst1, 32);
10224 x = const_double_from_real_value (TWO32r, DFmode);
10225 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10226
10227 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10228
10229 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10230 0, OPTAB_DIRECT);
10231 if (x != target)
10232 emit_move_insn (target, x);
10233 }
10234
10235 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10236 For x86_32, -mfpmath=sse, !optimize_size only. */
10237 void
10238 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10239 {
10240 REAL_VALUE_TYPE ONE16r;
10241 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10242
10243 real_ldexp (&ONE16r, &dconst1, 16);
10244 x = const_double_from_real_value (ONE16r, SFmode);
10245 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10246 NULL, 0, OPTAB_DIRECT);
10247 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10248 NULL, 0, OPTAB_DIRECT);
10249 fp_hi = gen_reg_rtx (SFmode);
10250 fp_lo = gen_reg_rtx (SFmode);
10251 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10252 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10253 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10254 0, OPTAB_DIRECT);
10255 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10256 0, OPTAB_DIRECT);
10257 if (!rtx_equal_p (target, fp_hi))
10258 emit_move_insn (target, fp_hi);
10259 }
10260
10261 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10262 then replicate the value for all elements of the vector
10263 register. */
10264
10265 rtx
10266 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10267 {
10268 rtvec v;
10269 switch (mode)
10270 {
10271 case SFmode:
10272 if (vect)
10273 v = gen_rtvec (4, value, value, value, value);
10274 else
10275 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10276 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10277 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10278
10279 case DFmode:
10280 if (vect)
10281 v = gen_rtvec (2, value, value);
10282 else
10283 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10284 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10285
10286 default:
10287 gcc_unreachable ();
10288 }
10289 }
10290
10291 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10292 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10293 true, then replicate the mask for all elements of the vector register.
10294 If INVERT is true, then create a mask excluding the sign bit. */
10295
10296 rtx
10297 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10298 {
10299 enum machine_mode vec_mode;
10300 HOST_WIDE_INT hi, lo;
10301 int shift = 63;
10302 rtx v;
10303 rtx mask;
10304
10305 /* Find the sign bit, sign extended to 2*HWI. */
10306 if (mode == SFmode)
10307 lo = 0x80000000, hi = lo < 0;
10308 else if (HOST_BITS_PER_WIDE_INT >= 64)
10309 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10310 else
10311 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10312
10313 if (invert)
10314 lo = ~lo, hi = ~hi;
10315
10316 /* Force this value into the low part of a fp vector constant. */
10317 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10318 mask = gen_lowpart (mode, mask);
10319
10320 v = ix86_build_const_vector (mode, vect, mask);
10321 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10322 return force_reg (vec_mode, v);
10323 }
10324
10325 /* Generate code for floating point ABS or NEG. */
10326
10327 void
10328 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10329 rtx operands[])
10330 {
10331 rtx mask, set, use, clob, dst, src;
10332 bool matching_memory;
10333 bool use_sse = false;
10334 bool vector_mode = VECTOR_MODE_P (mode);
10335 enum machine_mode elt_mode = mode;
10336
10337 if (vector_mode)
10338 {
10339 elt_mode = GET_MODE_INNER (mode);
10340 use_sse = true;
10341 }
10342 else if (TARGET_SSE_MATH)
10343 use_sse = SSE_FLOAT_MODE_P (mode);
10344
10345 /* NEG and ABS performed with SSE use bitwise mask operations.
10346 Create the appropriate mask now. */
10347 if (use_sse)
10348 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10349 else
10350 mask = NULL_RTX;
10351
10352 dst = operands[0];
10353 src = operands[1];
10354
10355 /* If the destination is memory, and we don't have matching source
10356 operands or we're using the x87, do things in registers. */
10357 matching_memory = false;
10358 if (MEM_P (dst))
10359 {
10360 if (use_sse && rtx_equal_p (dst, src))
10361 matching_memory = true;
10362 else
10363 dst = gen_reg_rtx (mode);
10364 }
10365 if (MEM_P (src) && !matching_memory)
10366 src = force_reg (mode, src);
10367
10368 if (vector_mode)
10369 {
10370 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10371 set = gen_rtx_SET (VOIDmode, dst, set);
10372 emit_insn (set);
10373 }
10374 else
10375 {
10376 set = gen_rtx_fmt_e (code, mode, src);
10377 set = gen_rtx_SET (VOIDmode, dst, set);
10378 if (mask)
10379 {
10380 use = gen_rtx_USE (VOIDmode, mask);
10381 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10382 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10383 gen_rtvec (3, set, use, clob)));
10384 }
10385 else
10386 emit_insn (set);
10387 }
10388
10389 if (dst != operands[0])
10390 emit_move_insn (operands[0], dst);
10391 }
10392
10393 /* Expand a copysign operation. Special case operand 0 being a constant. */
10394
10395 void
10396 ix86_expand_copysign (rtx operands[])
10397 {
10398 enum machine_mode mode, vmode;
10399 rtx dest, op0, op1, mask, nmask;
10400
10401 dest = operands[0];
10402 op0 = operands[1];
10403 op1 = operands[2];
10404
10405 mode = GET_MODE (dest);
10406 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10407
10408 if (GET_CODE (op0) == CONST_DOUBLE)
10409 {
10410 rtvec v;
10411
10412 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10413 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10414
10415 if (op0 == CONST0_RTX (mode))
10416 op0 = CONST0_RTX (vmode);
10417 else
10418 {
10419 if (mode == SFmode)
10420 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10421 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10422 else
10423 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10424 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10425 }
10426
10427 mask = ix86_build_signbit_mask (mode, 0, 0);
10428
10429 if (mode == SFmode)
10430 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10431 else
10432 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10433 }
10434 else
10435 {
10436 nmask = ix86_build_signbit_mask (mode, 0, 1);
10437 mask = ix86_build_signbit_mask (mode, 0, 0);
10438
10439 if (mode == SFmode)
10440 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10441 else
10442 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10443 }
10444 }
10445
10446 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10447 be a constant, and so has already been expanded into a vector constant. */
10448
10449 void
10450 ix86_split_copysign_const (rtx operands[])
10451 {
10452 enum machine_mode mode, vmode;
10453 rtx dest, op0, op1, mask, x;
10454
10455 dest = operands[0];
10456 op0 = operands[1];
10457 op1 = operands[2];
10458 mask = operands[3];
10459
10460 mode = GET_MODE (dest);
10461 vmode = GET_MODE (mask);
10462
10463 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10464 x = gen_rtx_AND (vmode, dest, mask);
10465 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10466
10467 if (op0 != CONST0_RTX (vmode))
10468 {
10469 x = gen_rtx_IOR (vmode, dest, op0);
10470 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10471 }
10472 }
10473
10474 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10475 so we have to do two masks. */
10476
10477 void
10478 ix86_split_copysign_var (rtx operands[])
10479 {
10480 enum machine_mode mode, vmode;
10481 rtx dest, scratch, op0, op1, mask, nmask, x;
10482
10483 dest = operands[0];
10484 scratch = operands[1];
10485 op0 = operands[2];
10486 op1 = operands[3];
10487 nmask = operands[4];
10488 mask = operands[5];
10489
10490 mode = GET_MODE (dest);
10491 vmode = GET_MODE (mask);
10492
10493 if (rtx_equal_p (op0, op1))
10494 {
10495 /* Shouldn't happen often (it's useless, obviously), but when it does
10496 we'd generate incorrect code if we continue below. */
10497 emit_move_insn (dest, op0);
10498 return;
10499 }
10500
10501 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10502 {
10503 gcc_assert (REGNO (op1) == REGNO (scratch));
10504
10505 x = gen_rtx_AND (vmode, scratch, mask);
10506 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10507
10508 dest = mask;
10509 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10510 x = gen_rtx_NOT (vmode, dest);
10511 x = gen_rtx_AND (vmode, x, op0);
10512 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10513 }
10514 else
10515 {
10516 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10517 {
10518 x = gen_rtx_AND (vmode, scratch, mask);
10519 }
10520 else /* alternative 2,4 */
10521 {
10522 gcc_assert (REGNO (mask) == REGNO (scratch));
10523 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10524 x = gen_rtx_AND (vmode, scratch, op1);
10525 }
10526 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10527
10528 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10529 {
10530 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10531 x = gen_rtx_AND (vmode, dest, nmask);
10532 }
10533 else /* alternative 3,4 */
10534 {
10535 gcc_assert (REGNO (nmask) == REGNO (dest));
10536 dest = nmask;
10537 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10538 x = gen_rtx_AND (vmode, dest, op0);
10539 }
10540 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10541 }
10542
10543 x = gen_rtx_IOR (vmode, dest, scratch);
10544 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10545 }
10546
10547 /* Return TRUE or FALSE depending on whether the first SET in INSN
10548 has source and destination with matching CC modes, and that the
10549 CC mode is at least as constrained as REQ_MODE. */
10550
10551 int
10552 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10553 {
10554 rtx set;
10555 enum machine_mode set_mode;
10556
10557 set = PATTERN (insn);
10558 if (GET_CODE (set) == PARALLEL)
10559 set = XVECEXP (set, 0, 0);
10560 gcc_assert (GET_CODE (set) == SET);
10561 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10562
10563 set_mode = GET_MODE (SET_DEST (set));
10564 switch (set_mode)
10565 {
10566 case CCNOmode:
10567 if (req_mode != CCNOmode
10568 && (req_mode != CCmode
10569 || XEXP (SET_SRC (set), 1) != const0_rtx))
10570 return 0;
10571 break;
10572 case CCmode:
10573 if (req_mode == CCGCmode)
10574 return 0;
10575 /* FALLTHRU */
10576 case CCGCmode:
10577 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10578 return 0;
10579 /* FALLTHRU */
10580 case CCGOCmode:
10581 if (req_mode == CCZmode)
10582 return 0;
10583 /* FALLTHRU */
10584 case CCZmode:
10585 break;
10586
10587 default:
10588 gcc_unreachable ();
10589 }
10590
10591 return (GET_MODE (SET_SRC (set)) == set_mode);
10592 }
10593
10594 /* Generate insn patterns to do an integer compare of OPERANDS. */
10595
10596 static rtx
10597 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10598 {
10599 enum machine_mode cmpmode;
10600 rtx tmp, flags;
10601
10602 cmpmode = SELECT_CC_MODE (code, op0, op1);
10603 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10604
10605 /* This is very simple, but making the interface the same as in the
10606 FP case makes the rest of the code easier. */
10607 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10608 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10609
10610 /* Return the test that should be put into the flags user, i.e.
10611 the bcc, scc, or cmov instruction. */
10612 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10613 }
10614
10615 /* Figure out whether to use ordered or unordered fp comparisons.
10616 Return the appropriate mode to use. */
10617
10618 enum machine_mode
10619 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10620 {
10621 /* ??? In order to make all comparisons reversible, we do all comparisons
10622 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10623 all forms trapping and nontrapping comparisons, we can make inequality
10624 comparisons trapping again, since it results in better code when using
10625 FCOM based compares. */
10626 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10627 }
10628
10629 enum machine_mode
10630 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10631 {
10632 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10633 return ix86_fp_compare_mode (code);
10634 switch (code)
10635 {
10636 /* Only zero flag is needed. */
10637 case EQ: /* ZF=0 */
10638 case NE: /* ZF!=0 */
10639 return CCZmode;
10640 /* Codes needing carry flag. */
10641 case GEU: /* CF=0 */
10642 case GTU: /* CF=0 & ZF=0 */
10643 case LTU: /* CF=1 */
10644 case LEU: /* CF=1 | ZF=1 */
10645 return CCmode;
10646 /* Codes possibly doable only with sign flag when
10647 comparing against zero. */
10648 case GE: /* SF=OF or SF=0 */
10649 case LT: /* SF<>OF or SF=1 */
10650 if (op1 == const0_rtx)
10651 return CCGOCmode;
10652 else
10653 /* For other cases Carry flag is not required. */
10654 return CCGCmode;
10655 /* Codes doable only with sign flag when comparing
10656 against zero, but we miss jump instruction for it
10657 so we need to use relational tests against overflow
10658 that thus needs to be zero. */
10659 case GT: /* ZF=0 & SF=OF */
10660 case LE: /* ZF=1 | SF<>OF */
10661 if (op1 == const0_rtx)
10662 return CCNOmode;
10663 else
10664 return CCGCmode;
10665 /* strcmp pattern do (use flags) and combine may ask us for proper
10666 mode. */
10667 case USE:
10668 return CCmode;
10669 default:
10670 gcc_unreachable ();
10671 }
10672 }
10673
10674 /* Return the fixed registers used for condition codes. */
10675
10676 static bool
10677 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10678 {
10679 *p1 = FLAGS_REG;
10680 *p2 = FPSR_REG;
10681 return true;
10682 }
10683
10684 /* If two condition code modes are compatible, return a condition code
10685 mode which is compatible with both. Otherwise, return
10686 VOIDmode. */
10687
10688 static enum machine_mode
10689 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10690 {
10691 if (m1 == m2)
10692 return m1;
10693
10694 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10695 return VOIDmode;
10696
10697 if ((m1 == CCGCmode && m2 == CCGOCmode)
10698 || (m1 == CCGOCmode && m2 == CCGCmode))
10699 return CCGCmode;
10700
10701 switch (m1)
10702 {
10703 default:
10704 gcc_unreachable ();
10705
10706 case CCmode:
10707 case CCGCmode:
10708 case CCGOCmode:
10709 case CCNOmode:
10710 case CCZmode:
10711 switch (m2)
10712 {
10713 default:
10714 return VOIDmode;
10715
10716 case CCmode:
10717 case CCGCmode:
10718 case CCGOCmode:
10719 case CCNOmode:
10720 case CCZmode:
10721 return CCmode;
10722 }
10723
10724 case CCFPmode:
10725 case CCFPUmode:
10726 /* These are only compatible with themselves, which we already
10727 checked above. */
10728 return VOIDmode;
10729 }
10730 }
10731
10732 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10733
10734 int
10735 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10736 {
10737 enum rtx_code swapped_code = swap_condition (code);
10738 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10739 || (ix86_fp_comparison_cost (swapped_code)
10740 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10741 }
10742
10743 /* Swap, force into registers, or otherwise massage the two operands
10744 to a fp comparison. The operands are updated in place; the new
10745 comparison code is returned. */
10746
10747 static enum rtx_code
10748 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10749 {
10750 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10751 rtx op0 = *pop0, op1 = *pop1;
10752 enum machine_mode op_mode = GET_MODE (op0);
10753 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10754
10755 /* All of the unordered compare instructions only work on registers.
10756 The same is true of the fcomi compare instructions. The XFmode
10757 compare instructions require registers except when comparing
10758 against zero or when converting operand 1 from fixed point to
10759 floating point. */
10760
10761 if (!is_sse
10762 && (fpcmp_mode == CCFPUmode
10763 || (op_mode == XFmode
10764 && ! (standard_80387_constant_p (op0) == 1
10765 || standard_80387_constant_p (op1) == 1)
10766 && GET_CODE (op1) != FLOAT)
10767 || ix86_use_fcomi_compare (code)))
10768 {
10769 op0 = force_reg (op_mode, op0);
10770 op1 = force_reg (op_mode, op1);
10771 }
10772 else
10773 {
10774 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10775 things around if they appear profitable, otherwise force op0
10776 into a register. */
10777
10778 if (standard_80387_constant_p (op0) == 0
10779 || (MEM_P (op0)
10780 && ! (standard_80387_constant_p (op1) == 0
10781 || MEM_P (op1))))
10782 {
10783 rtx tmp;
10784 tmp = op0, op0 = op1, op1 = tmp;
10785 code = swap_condition (code);
10786 }
10787
10788 if (!REG_P (op0))
10789 op0 = force_reg (op_mode, op0);
10790
10791 if (CONSTANT_P (op1))
10792 {
10793 int tmp = standard_80387_constant_p (op1);
10794 if (tmp == 0)
10795 op1 = validize_mem (force_const_mem (op_mode, op1));
10796 else if (tmp == 1)
10797 {
10798 if (TARGET_CMOVE)
10799 op1 = force_reg (op_mode, op1);
10800 }
10801 else
10802 op1 = force_reg (op_mode, op1);
10803 }
10804 }
10805
10806 /* Try to rearrange the comparison to make it cheaper. */
10807 if (ix86_fp_comparison_cost (code)
10808 > ix86_fp_comparison_cost (swap_condition (code))
10809 && (REG_P (op1) || !no_new_pseudos))
10810 {
10811 rtx tmp;
10812 tmp = op0, op0 = op1, op1 = tmp;
10813 code = swap_condition (code);
10814 if (!REG_P (op0))
10815 op0 = force_reg (op_mode, op0);
10816 }
10817
10818 *pop0 = op0;
10819 *pop1 = op1;
10820 return code;
10821 }
10822
10823 /* Convert comparison codes we use to represent FP comparison to integer
10824 code that will result in proper branch. Return UNKNOWN if no such code
10825 is available. */
10826
10827 enum rtx_code
10828 ix86_fp_compare_code_to_integer (enum rtx_code code)
10829 {
10830 switch (code)
10831 {
10832 case GT:
10833 return GTU;
10834 case GE:
10835 return GEU;
10836 case ORDERED:
10837 case UNORDERED:
10838 return code;
10839 break;
10840 case UNEQ:
10841 return EQ;
10842 break;
10843 case UNLT:
10844 return LTU;
10845 break;
10846 case UNLE:
10847 return LEU;
10848 break;
10849 case LTGT:
10850 return NE;
10851 break;
10852 default:
10853 return UNKNOWN;
10854 }
10855 }
10856
10857 /* Split comparison code CODE into comparisons we can do using branch
10858 instructions. BYPASS_CODE is comparison code for branch that will
10859 branch around FIRST_CODE and SECOND_CODE. If some of branches
10860 is not required, set value to UNKNOWN.
10861 We never require more than two branches. */
10862
10863 void
10864 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10865 enum rtx_code *first_code,
10866 enum rtx_code *second_code)
10867 {
10868 *first_code = code;
10869 *bypass_code = UNKNOWN;
10870 *second_code = UNKNOWN;
10871
10872 /* The fcomi comparison sets flags as follows:
10873
10874 cmp ZF PF CF
10875 > 0 0 0
10876 < 0 0 1
10877 = 1 0 0
10878 un 1 1 1 */
10879
10880 switch (code)
10881 {
10882 case GT: /* GTU - CF=0 & ZF=0 */
10883 case GE: /* GEU - CF=0 */
10884 case ORDERED: /* PF=0 */
10885 case UNORDERED: /* PF=1 */
10886 case UNEQ: /* EQ - ZF=1 */
10887 case UNLT: /* LTU - CF=1 */
10888 case UNLE: /* LEU - CF=1 | ZF=1 */
10889 case LTGT: /* EQ - ZF=0 */
10890 break;
10891 case LT: /* LTU - CF=1 - fails on unordered */
10892 *first_code = UNLT;
10893 *bypass_code = UNORDERED;
10894 break;
10895 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10896 *first_code = UNLE;
10897 *bypass_code = UNORDERED;
10898 break;
10899 case EQ: /* EQ - ZF=1 - fails on unordered */
10900 *first_code = UNEQ;
10901 *bypass_code = UNORDERED;
10902 break;
10903 case NE: /* NE - ZF=0 - fails on unordered */
10904 *first_code = LTGT;
10905 *second_code = UNORDERED;
10906 break;
10907 case UNGE: /* GEU - CF=0 - fails on unordered */
10908 *first_code = GE;
10909 *second_code = UNORDERED;
10910 break;
10911 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10912 *first_code = GT;
10913 *second_code = UNORDERED;
10914 break;
10915 default:
10916 gcc_unreachable ();
10917 }
10918 if (!TARGET_IEEE_FP)
10919 {
10920 *second_code = UNKNOWN;
10921 *bypass_code = UNKNOWN;
10922 }
10923 }
10924
10925 /* Return cost of comparison done fcom + arithmetics operations on AX.
10926 All following functions do use number of instructions as a cost metrics.
10927 In future this should be tweaked to compute bytes for optimize_size and
10928 take into account performance of various instructions on various CPUs. */
10929 static int
10930 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10931 {
10932 if (!TARGET_IEEE_FP)
10933 return 4;
10934 /* The cost of code output by ix86_expand_fp_compare. */
10935 switch (code)
10936 {
10937 case UNLE:
10938 case UNLT:
10939 case LTGT:
10940 case GT:
10941 case GE:
10942 case UNORDERED:
10943 case ORDERED:
10944 case UNEQ:
10945 return 4;
10946 break;
10947 case LT:
10948 case NE:
10949 case EQ:
10950 case UNGE:
10951 return 5;
10952 break;
10953 case LE:
10954 case UNGT:
10955 return 6;
10956 break;
10957 default:
10958 gcc_unreachable ();
10959 }
10960 }
10961
10962 /* Return cost of comparison done using fcomi operation.
10963 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10964 static int
10965 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10966 {
10967 enum rtx_code bypass_code, first_code, second_code;
10968 /* Return arbitrarily high cost when instruction is not supported - this
10969 prevents gcc from using it. */
10970 if (!TARGET_CMOVE)
10971 return 1024;
10972 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10973 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10974 }
10975
10976 /* Return cost of comparison done using sahf operation.
10977 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10978 static int
10979 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10980 {
10981 enum rtx_code bypass_code, first_code, second_code;
10982 /* Return arbitrarily high cost when instruction is not preferred - this
10983 avoids gcc from using it. */
10984 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10985 return 1024;
10986 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10987 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10988 }
10989
10990 /* Compute cost of the comparison done using any method.
10991 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10992 static int
10993 ix86_fp_comparison_cost (enum rtx_code code)
10994 {
10995 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10996 int min;
10997
10998 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10999 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11000
11001 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11002 if (min > sahf_cost)
11003 min = sahf_cost;
11004 if (min > fcomi_cost)
11005 min = fcomi_cost;
11006 return min;
11007 }
11008
11009 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11010
11011 static rtx
11012 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11013 rtx *second_test, rtx *bypass_test)
11014 {
11015 enum machine_mode fpcmp_mode, intcmp_mode;
11016 rtx tmp, tmp2;
11017 int cost = ix86_fp_comparison_cost (code);
11018 enum rtx_code bypass_code, first_code, second_code;
11019
11020 fpcmp_mode = ix86_fp_compare_mode (code);
11021 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11022
11023 if (second_test)
11024 *second_test = NULL_RTX;
11025 if (bypass_test)
11026 *bypass_test = NULL_RTX;
11027
11028 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11029
11030 /* Do fcomi/sahf based test when profitable. */
11031 if ((TARGET_CMOVE || TARGET_SAHF)
11032 && (bypass_code == UNKNOWN || bypass_test)
11033 && (second_code == UNKNOWN || second_test)
11034 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11035 {
11036 if (TARGET_CMOVE)
11037 {
11038 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11039 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11040 tmp);
11041 emit_insn (tmp);
11042 }
11043 else
11044 {
11045 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11046 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11047 if (!scratch)
11048 scratch = gen_reg_rtx (HImode);
11049 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11050 emit_insn (gen_x86_sahf_1 (scratch));
11051 }
11052
11053 /* The FP codes work out to act like unsigned. */
11054 intcmp_mode = fpcmp_mode;
11055 code = first_code;
11056 if (bypass_code != UNKNOWN)
11057 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11058 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11059 const0_rtx);
11060 if (second_code != UNKNOWN)
11061 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11062 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11063 const0_rtx);
11064 }
11065 else
11066 {
11067 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11068 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11069 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11070 if (!scratch)
11071 scratch = gen_reg_rtx (HImode);
11072 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11073
11074 /* In the unordered case, we have to check C2 for NaN's, which
11075 doesn't happen to work out to anything nice combination-wise.
11076 So do some bit twiddling on the value we've got in AH to come
11077 up with an appropriate set of condition codes. */
11078
11079 intcmp_mode = CCNOmode;
11080 switch (code)
11081 {
11082 case GT:
11083 case UNGT:
11084 if (code == GT || !TARGET_IEEE_FP)
11085 {
11086 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11087 code = EQ;
11088 }
11089 else
11090 {
11091 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11092 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11093 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11094 intcmp_mode = CCmode;
11095 code = GEU;
11096 }
11097 break;
11098 case LT:
11099 case UNLT:
11100 if (code == LT && TARGET_IEEE_FP)
11101 {
11102 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11103 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11104 intcmp_mode = CCmode;
11105 code = EQ;
11106 }
11107 else
11108 {
11109 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11110 code = NE;
11111 }
11112 break;
11113 case GE:
11114 case UNGE:
11115 if (code == GE || !TARGET_IEEE_FP)
11116 {
11117 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11118 code = EQ;
11119 }
11120 else
11121 {
11122 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11123 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11124 GEN_INT (0x01)));
11125 code = NE;
11126 }
11127 break;
11128 case LE:
11129 case UNLE:
11130 if (code == LE && TARGET_IEEE_FP)
11131 {
11132 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11133 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11134 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11135 intcmp_mode = CCmode;
11136 code = LTU;
11137 }
11138 else
11139 {
11140 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11141 code = NE;
11142 }
11143 break;
11144 case EQ:
11145 case UNEQ:
11146 if (code == EQ && TARGET_IEEE_FP)
11147 {
11148 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11149 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11150 intcmp_mode = CCmode;
11151 code = EQ;
11152 }
11153 else
11154 {
11155 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11156 code = NE;
11157 break;
11158 }
11159 break;
11160 case NE:
11161 case LTGT:
11162 if (code == NE && TARGET_IEEE_FP)
11163 {
11164 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11165 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11166 GEN_INT (0x40)));
11167 code = NE;
11168 }
11169 else
11170 {
11171 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11172 code = EQ;
11173 }
11174 break;
11175
11176 case UNORDERED:
11177 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11178 code = NE;
11179 break;
11180 case ORDERED:
11181 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11182 code = EQ;
11183 break;
11184
11185 default:
11186 gcc_unreachable ();
11187 }
11188 }
11189
11190 /* Return the test that should be put into the flags user, i.e.
11191 the bcc, scc, or cmov instruction. */
11192 return gen_rtx_fmt_ee (code, VOIDmode,
11193 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11194 const0_rtx);
11195 }
11196
11197 rtx
11198 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11199 {
11200 rtx op0, op1, ret;
11201 op0 = ix86_compare_op0;
11202 op1 = ix86_compare_op1;
11203
11204 if (second_test)
11205 *second_test = NULL_RTX;
11206 if (bypass_test)
11207 *bypass_test = NULL_RTX;
11208
11209 if (ix86_compare_emitted)
11210 {
11211 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11212 ix86_compare_emitted = NULL_RTX;
11213 }
11214 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11215 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11216 second_test, bypass_test);
11217 else
11218 ret = ix86_expand_int_compare (code, op0, op1);
11219
11220 return ret;
11221 }
11222
11223 /* Return true if the CODE will result in nontrivial jump sequence. */
11224 bool
11225 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11226 {
11227 enum rtx_code bypass_code, first_code, second_code;
11228 if (!TARGET_CMOVE)
11229 return true;
11230 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11231 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11232 }
11233
11234 void
11235 ix86_expand_branch (enum rtx_code code, rtx label)
11236 {
11237 rtx tmp;
11238
11239 /* If we have emitted a compare insn, go straight to simple.
11240 ix86_expand_compare won't emit anything if ix86_compare_emitted
11241 is non NULL. */
11242 if (ix86_compare_emitted)
11243 goto simple;
11244
11245 switch (GET_MODE (ix86_compare_op0))
11246 {
11247 case QImode:
11248 case HImode:
11249 case SImode:
11250 simple:
11251 tmp = ix86_expand_compare (code, NULL, NULL);
11252 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11253 gen_rtx_LABEL_REF (VOIDmode, label),
11254 pc_rtx);
11255 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11256 return;
11257
11258 case SFmode:
11259 case DFmode:
11260 case XFmode:
11261 {
11262 rtvec vec;
11263 int use_fcomi;
11264 enum rtx_code bypass_code, first_code, second_code;
11265
11266 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11267 &ix86_compare_op1);
11268
11269 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11270
11271 /* Check whether we will use the natural sequence with one jump. If
11272 so, we can expand jump early. Otherwise delay expansion by
11273 creating compound insn to not confuse optimizers. */
11274 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11275 && TARGET_CMOVE)
11276 {
11277 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11278 gen_rtx_LABEL_REF (VOIDmode, label),
11279 pc_rtx, NULL_RTX, NULL_RTX);
11280 }
11281 else
11282 {
11283 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11284 ix86_compare_op0, ix86_compare_op1);
11285 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11286 gen_rtx_LABEL_REF (VOIDmode, label),
11287 pc_rtx);
11288 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11289
11290 use_fcomi = ix86_use_fcomi_compare (code);
11291 vec = rtvec_alloc (3 + !use_fcomi);
11292 RTVEC_ELT (vec, 0) = tmp;
11293 RTVEC_ELT (vec, 1)
11294 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11295 RTVEC_ELT (vec, 2)
11296 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11297 if (! use_fcomi)
11298 RTVEC_ELT (vec, 3)
11299 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11300
11301 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11302 }
11303 return;
11304 }
11305
11306 case DImode:
11307 if (TARGET_64BIT)
11308 goto simple;
11309 case TImode:
11310 /* Expand DImode branch into multiple compare+branch. */
11311 {
11312 rtx lo[2], hi[2], label2;
11313 enum rtx_code code1, code2, code3;
11314 enum machine_mode submode;
11315
11316 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11317 {
11318 tmp = ix86_compare_op0;
11319 ix86_compare_op0 = ix86_compare_op1;
11320 ix86_compare_op1 = tmp;
11321 code = swap_condition (code);
11322 }
11323 if (GET_MODE (ix86_compare_op0) == DImode)
11324 {
11325 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11326 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11327 submode = SImode;
11328 }
11329 else
11330 {
11331 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11332 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11333 submode = DImode;
11334 }
11335
11336 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11337 avoid two branches. This costs one extra insn, so disable when
11338 optimizing for size. */
11339
11340 if ((code == EQ || code == NE)
11341 && (!optimize_size
11342 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11343 {
11344 rtx xor0, xor1;
11345
11346 xor1 = hi[0];
11347 if (hi[1] != const0_rtx)
11348 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11349 NULL_RTX, 0, OPTAB_WIDEN);
11350
11351 xor0 = lo[0];
11352 if (lo[1] != const0_rtx)
11353 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11354 NULL_RTX, 0, OPTAB_WIDEN);
11355
11356 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11357 NULL_RTX, 0, OPTAB_WIDEN);
11358
11359 ix86_compare_op0 = tmp;
11360 ix86_compare_op1 = const0_rtx;
11361 ix86_expand_branch (code, label);
11362 return;
11363 }
11364
11365 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11366 op1 is a constant and the low word is zero, then we can just
11367 examine the high word. */
11368
11369 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11370 switch (code)
11371 {
11372 case LT: case LTU: case GE: case GEU:
11373 ix86_compare_op0 = hi[0];
11374 ix86_compare_op1 = hi[1];
11375 ix86_expand_branch (code, label);
11376 return;
11377 default:
11378 break;
11379 }
11380
11381 /* Otherwise, we need two or three jumps. */
11382
11383 label2 = gen_label_rtx ();
11384
11385 code1 = code;
11386 code2 = swap_condition (code);
11387 code3 = unsigned_condition (code);
11388
11389 switch (code)
11390 {
11391 case LT: case GT: case LTU: case GTU:
11392 break;
11393
11394 case LE: code1 = LT; code2 = GT; break;
11395 case GE: code1 = GT; code2 = LT; break;
11396 case LEU: code1 = LTU; code2 = GTU; break;
11397 case GEU: code1 = GTU; code2 = LTU; break;
11398
11399 case EQ: code1 = UNKNOWN; code2 = NE; break;
11400 case NE: code2 = UNKNOWN; break;
11401
11402 default:
11403 gcc_unreachable ();
11404 }
11405
11406 /*
11407 * a < b =>
11408 * if (hi(a) < hi(b)) goto true;
11409 * if (hi(a) > hi(b)) goto false;
11410 * if (lo(a) < lo(b)) goto true;
11411 * false:
11412 */
11413
11414 ix86_compare_op0 = hi[0];
11415 ix86_compare_op1 = hi[1];
11416
11417 if (code1 != UNKNOWN)
11418 ix86_expand_branch (code1, label);
11419 if (code2 != UNKNOWN)
11420 ix86_expand_branch (code2, label2);
11421
11422 ix86_compare_op0 = lo[0];
11423 ix86_compare_op1 = lo[1];
11424 ix86_expand_branch (code3, label);
11425
11426 if (code2 != UNKNOWN)
11427 emit_label (label2);
11428 return;
11429 }
11430
11431 default:
11432 gcc_unreachable ();
11433 }
11434 }
11435
11436 /* Split branch based on floating point condition. */
11437 void
11438 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11439 rtx target1, rtx target2, rtx tmp, rtx pushed)
11440 {
11441 rtx second, bypass;
11442 rtx label = NULL_RTX;
11443 rtx condition;
11444 int bypass_probability = -1, second_probability = -1, probability = -1;
11445 rtx i;
11446
11447 if (target2 != pc_rtx)
11448 {
11449 rtx tmp = target2;
11450 code = reverse_condition_maybe_unordered (code);
11451 target2 = target1;
11452 target1 = tmp;
11453 }
11454
11455 condition = ix86_expand_fp_compare (code, op1, op2,
11456 tmp, &second, &bypass);
11457
11458 /* Remove pushed operand from stack. */
11459 if (pushed)
11460 ix86_free_from_memory (GET_MODE (pushed));
11461
11462 if (split_branch_probability >= 0)
11463 {
11464 /* Distribute the probabilities across the jumps.
11465 Assume the BYPASS and SECOND to be always test
11466 for UNORDERED. */
11467 probability = split_branch_probability;
11468
11469 /* Value of 1 is low enough to make no need for probability
11470 to be updated. Later we may run some experiments and see
11471 if unordered values are more frequent in practice. */
11472 if (bypass)
11473 bypass_probability = 1;
11474 if (second)
11475 second_probability = 1;
11476 }
11477 if (bypass != NULL_RTX)
11478 {
11479 label = gen_label_rtx ();
11480 i = emit_jump_insn (gen_rtx_SET
11481 (VOIDmode, pc_rtx,
11482 gen_rtx_IF_THEN_ELSE (VOIDmode,
11483 bypass,
11484 gen_rtx_LABEL_REF (VOIDmode,
11485 label),
11486 pc_rtx)));
11487 if (bypass_probability >= 0)
11488 REG_NOTES (i)
11489 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11490 GEN_INT (bypass_probability),
11491 REG_NOTES (i));
11492 }
11493 i = emit_jump_insn (gen_rtx_SET
11494 (VOIDmode, pc_rtx,
11495 gen_rtx_IF_THEN_ELSE (VOIDmode,
11496 condition, target1, target2)));
11497 if (probability >= 0)
11498 REG_NOTES (i)
11499 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11500 GEN_INT (probability),
11501 REG_NOTES (i));
11502 if (second != NULL_RTX)
11503 {
11504 i = emit_jump_insn (gen_rtx_SET
11505 (VOIDmode, pc_rtx,
11506 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11507 target2)));
11508 if (second_probability >= 0)
11509 REG_NOTES (i)
11510 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11511 GEN_INT (second_probability),
11512 REG_NOTES (i));
11513 }
11514 if (label != NULL_RTX)
11515 emit_label (label);
11516 }
11517
11518 int
11519 ix86_expand_setcc (enum rtx_code code, rtx dest)
11520 {
11521 rtx ret, tmp, tmpreg, equiv;
11522 rtx second_test, bypass_test;
11523
11524 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11525 return 0; /* FAIL */
11526
11527 gcc_assert (GET_MODE (dest) == QImode);
11528
11529 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11530 PUT_MODE (ret, QImode);
11531
11532 tmp = dest;
11533 tmpreg = dest;
11534
11535 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11536 if (bypass_test || second_test)
11537 {
11538 rtx test = second_test;
11539 int bypass = 0;
11540 rtx tmp2 = gen_reg_rtx (QImode);
11541 if (bypass_test)
11542 {
11543 gcc_assert (!second_test);
11544 test = bypass_test;
11545 bypass = 1;
11546 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11547 }
11548 PUT_MODE (test, QImode);
11549 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11550
11551 if (bypass)
11552 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11553 else
11554 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11555 }
11556
11557 /* Attach a REG_EQUAL note describing the comparison result. */
11558 if (ix86_compare_op0 && ix86_compare_op1)
11559 {
11560 equiv = simplify_gen_relational (code, QImode,
11561 GET_MODE (ix86_compare_op0),
11562 ix86_compare_op0, ix86_compare_op1);
11563 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11564 }
11565
11566 return 1; /* DONE */
11567 }
11568
11569 /* Expand comparison setting or clearing carry flag. Return true when
11570 successful and set pop for the operation. */
11571 static bool
11572 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11573 {
11574 enum machine_mode mode =
11575 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11576
11577 /* Do not handle DImode compares that go through special path. Also we can't
11578 deal with FP compares yet. This is possible to add. */
11579 if (mode == (TARGET_64BIT ? TImode : DImode))
11580 return false;
11581 if (FLOAT_MODE_P (mode))
11582 {
11583 rtx second_test = NULL, bypass_test = NULL;
11584 rtx compare_op, compare_seq;
11585
11586 /* Shortcut: following common codes never translate into carry flag compares. */
11587 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11588 || code == ORDERED || code == UNORDERED)
11589 return false;
11590
11591 /* These comparisons require zero flag; swap operands so they won't. */
11592 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11593 && !TARGET_IEEE_FP)
11594 {
11595 rtx tmp = op0;
11596 op0 = op1;
11597 op1 = tmp;
11598 code = swap_condition (code);
11599 }
11600
11601 /* Try to expand the comparison and verify that we end up with carry flag
11602 based comparison. This is fails to be true only when we decide to expand
11603 comparison using arithmetic that is not too common scenario. */
11604 start_sequence ();
11605 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11606 &second_test, &bypass_test);
11607 compare_seq = get_insns ();
11608 end_sequence ();
11609
11610 if (second_test || bypass_test)
11611 return false;
11612 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11613 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11614 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11615 else
11616 code = GET_CODE (compare_op);
11617 if (code != LTU && code != GEU)
11618 return false;
11619 emit_insn (compare_seq);
11620 *pop = compare_op;
11621 return true;
11622 }
11623 if (!INTEGRAL_MODE_P (mode))
11624 return false;
11625 switch (code)
11626 {
11627 case LTU:
11628 case GEU:
11629 break;
11630
11631 /* Convert a==0 into (unsigned)a<1. */
11632 case EQ:
11633 case NE:
11634 if (op1 != const0_rtx)
11635 return false;
11636 op1 = const1_rtx;
11637 code = (code == EQ ? LTU : GEU);
11638 break;
11639
11640 /* Convert a>b into b<a or a>=b-1. */
11641 case GTU:
11642 case LEU:
11643 if (CONST_INT_P (op1))
11644 {
11645 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11646 /* Bail out on overflow. We still can swap operands but that
11647 would force loading of the constant into register. */
11648 if (op1 == const0_rtx
11649 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11650 return false;
11651 code = (code == GTU ? GEU : LTU);
11652 }
11653 else
11654 {
11655 rtx tmp = op1;
11656 op1 = op0;
11657 op0 = tmp;
11658 code = (code == GTU ? LTU : GEU);
11659 }
11660 break;
11661
11662 /* Convert a>=0 into (unsigned)a<0x80000000. */
11663 case LT:
11664 case GE:
11665 if (mode == DImode || op1 != const0_rtx)
11666 return false;
11667 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11668 code = (code == LT ? GEU : LTU);
11669 break;
11670 case LE:
11671 case GT:
11672 if (mode == DImode || op1 != constm1_rtx)
11673 return false;
11674 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11675 code = (code == LE ? GEU : LTU);
11676 break;
11677
11678 default:
11679 return false;
11680 }
11681 /* Swapping operands may cause constant to appear as first operand. */
11682 if (!nonimmediate_operand (op0, VOIDmode))
11683 {
11684 if (no_new_pseudos)
11685 return false;
11686 op0 = force_reg (mode, op0);
11687 }
11688 ix86_compare_op0 = op0;
11689 ix86_compare_op1 = op1;
11690 *pop = ix86_expand_compare (code, NULL, NULL);
11691 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11692 return true;
11693 }
11694
11695 int
11696 ix86_expand_int_movcc (rtx operands[])
11697 {
11698 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11699 rtx compare_seq, compare_op;
11700 rtx second_test, bypass_test;
11701 enum machine_mode mode = GET_MODE (operands[0]);
11702 bool sign_bit_compare_p = false;;
11703
11704 start_sequence ();
11705 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11706 compare_seq = get_insns ();
11707 end_sequence ();
11708
11709 compare_code = GET_CODE (compare_op);
11710
11711 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11712 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11713 sign_bit_compare_p = true;
11714
11715 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11716 HImode insns, we'd be swallowed in word prefix ops. */
11717
11718 if ((mode != HImode || TARGET_FAST_PREFIX)
11719 && (mode != (TARGET_64BIT ? TImode : DImode))
11720 && CONST_INT_P (operands[2])
11721 && CONST_INT_P (operands[3]))
11722 {
11723 rtx out = operands[0];
11724 HOST_WIDE_INT ct = INTVAL (operands[2]);
11725 HOST_WIDE_INT cf = INTVAL (operands[3]);
11726 HOST_WIDE_INT diff;
11727
11728 diff = ct - cf;
11729 /* Sign bit compares are better done using shifts than we do by using
11730 sbb. */
11731 if (sign_bit_compare_p
11732 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11733 ix86_compare_op1, &compare_op))
11734 {
11735 /* Detect overlap between destination and compare sources. */
11736 rtx tmp = out;
11737
11738 if (!sign_bit_compare_p)
11739 {
11740 bool fpcmp = false;
11741
11742 compare_code = GET_CODE (compare_op);
11743
11744 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11745 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11746 {
11747 fpcmp = true;
11748 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11749 }
11750
11751 /* To simplify rest of code, restrict to the GEU case. */
11752 if (compare_code == LTU)
11753 {
11754 HOST_WIDE_INT tmp = ct;
11755 ct = cf;
11756 cf = tmp;
11757 compare_code = reverse_condition (compare_code);
11758 code = reverse_condition (code);
11759 }
11760 else
11761 {
11762 if (fpcmp)
11763 PUT_CODE (compare_op,
11764 reverse_condition_maybe_unordered
11765 (GET_CODE (compare_op)));
11766 else
11767 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11768 }
11769 diff = ct - cf;
11770
11771 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11772 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11773 tmp = gen_reg_rtx (mode);
11774
11775 if (mode == DImode)
11776 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11777 else
11778 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11779 }
11780 else
11781 {
11782 if (code == GT || code == GE)
11783 code = reverse_condition (code);
11784 else
11785 {
11786 HOST_WIDE_INT tmp = ct;
11787 ct = cf;
11788 cf = tmp;
11789 diff = ct - cf;
11790 }
11791 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11792 ix86_compare_op1, VOIDmode, 0, -1);
11793 }
11794
11795 if (diff == 1)
11796 {
11797 /*
11798 * cmpl op0,op1
11799 * sbbl dest,dest
11800 * [addl dest, ct]
11801 *
11802 * Size 5 - 8.
11803 */
11804 if (ct)
11805 tmp = expand_simple_binop (mode, PLUS,
11806 tmp, GEN_INT (ct),
11807 copy_rtx (tmp), 1, OPTAB_DIRECT);
11808 }
11809 else if (cf == -1)
11810 {
11811 /*
11812 * cmpl op0,op1
11813 * sbbl dest,dest
11814 * orl $ct, dest
11815 *
11816 * Size 8.
11817 */
11818 tmp = expand_simple_binop (mode, IOR,
11819 tmp, GEN_INT (ct),
11820 copy_rtx (tmp), 1, OPTAB_DIRECT);
11821 }
11822 else if (diff == -1 && ct)
11823 {
11824 /*
11825 * cmpl op0,op1
11826 * sbbl dest,dest
11827 * notl dest
11828 * [addl dest, cf]
11829 *
11830 * Size 8 - 11.
11831 */
11832 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11833 if (cf)
11834 tmp = expand_simple_binop (mode, PLUS,
11835 copy_rtx (tmp), GEN_INT (cf),
11836 copy_rtx (tmp), 1, OPTAB_DIRECT);
11837 }
11838 else
11839 {
11840 /*
11841 * cmpl op0,op1
11842 * sbbl dest,dest
11843 * [notl dest]
11844 * andl cf - ct, dest
11845 * [addl dest, ct]
11846 *
11847 * Size 8 - 11.
11848 */
11849
11850 if (cf == 0)
11851 {
11852 cf = ct;
11853 ct = 0;
11854 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11855 }
11856
11857 tmp = expand_simple_binop (mode, AND,
11858 copy_rtx (tmp),
11859 gen_int_mode (cf - ct, mode),
11860 copy_rtx (tmp), 1, OPTAB_DIRECT);
11861 if (ct)
11862 tmp = expand_simple_binop (mode, PLUS,
11863 copy_rtx (tmp), GEN_INT (ct),
11864 copy_rtx (tmp), 1, OPTAB_DIRECT);
11865 }
11866
11867 if (!rtx_equal_p (tmp, out))
11868 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11869
11870 return 1; /* DONE */
11871 }
11872
11873 if (diff < 0)
11874 {
11875 HOST_WIDE_INT tmp;
11876 tmp = ct, ct = cf, cf = tmp;
11877 diff = -diff;
11878 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11879 {
11880 /* We may be reversing unordered compare to normal compare, that
11881 is not valid in general (we may convert non-trapping condition
11882 to trapping one), however on i386 we currently emit all
11883 comparisons unordered. */
11884 compare_code = reverse_condition_maybe_unordered (compare_code);
11885 code = reverse_condition_maybe_unordered (code);
11886 }
11887 else
11888 {
11889 compare_code = reverse_condition (compare_code);
11890 code = reverse_condition (code);
11891 }
11892 }
11893
11894 compare_code = UNKNOWN;
11895 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11896 && CONST_INT_P (ix86_compare_op1))
11897 {
11898 if (ix86_compare_op1 == const0_rtx
11899 && (code == LT || code == GE))
11900 compare_code = code;
11901 else if (ix86_compare_op1 == constm1_rtx)
11902 {
11903 if (code == LE)
11904 compare_code = LT;
11905 else if (code == GT)
11906 compare_code = GE;
11907 }
11908 }
11909
11910 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11911 if (compare_code != UNKNOWN
11912 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11913 && (cf == -1 || ct == -1))
11914 {
11915 /* If lea code below could be used, only optimize
11916 if it results in a 2 insn sequence. */
11917
11918 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11919 || diff == 3 || diff == 5 || diff == 9)
11920 || (compare_code == LT && ct == -1)
11921 || (compare_code == GE && cf == -1))
11922 {
11923 /*
11924 * notl op1 (if necessary)
11925 * sarl $31, op1
11926 * orl cf, op1
11927 */
11928 if (ct != -1)
11929 {
11930 cf = ct;
11931 ct = -1;
11932 code = reverse_condition (code);
11933 }
11934
11935 out = emit_store_flag (out, code, ix86_compare_op0,
11936 ix86_compare_op1, VOIDmode, 0, -1);
11937
11938 out = expand_simple_binop (mode, IOR,
11939 out, GEN_INT (cf),
11940 out, 1, OPTAB_DIRECT);
11941 if (out != operands[0])
11942 emit_move_insn (operands[0], out);
11943
11944 return 1; /* DONE */
11945 }
11946 }
11947
11948
11949 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11950 || diff == 3 || diff == 5 || diff == 9)
11951 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11952 && (mode != DImode
11953 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11954 {
11955 /*
11956 * xorl dest,dest
11957 * cmpl op1,op2
11958 * setcc dest
11959 * lea cf(dest*(ct-cf)),dest
11960 *
11961 * Size 14.
11962 *
11963 * This also catches the degenerate setcc-only case.
11964 */
11965
11966 rtx tmp;
11967 int nops;
11968
11969 out = emit_store_flag (out, code, ix86_compare_op0,
11970 ix86_compare_op1, VOIDmode, 0, 1);
11971
11972 nops = 0;
11973 /* On x86_64 the lea instruction operates on Pmode, so we need
11974 to get arithmetics done in proper mode to match. */
11975 if (diff == 1)
11976 tmp = copy_rtx (out);
11977 else
11978 {
11979 rtx out1;
11980 out1 = copy_rtx (out);
11981 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11982 nops++;
11983 if (diff & 1)
11984 {
11985 tmp = gen_rtx_PLUS (mode, tmp, out1);
11986 nops++;
11987 }
11988 }
11989 if (cf != 0)
11990 {
11991 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11992 nops++;
11993 }
11994 if (!rtx_equal_p (tmp, out))
11995 {
11996 if (nops == 1)
11997 out = force_operand (tmp, copy_rtx (out));
11998 else
11999 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12000 }
12001 if (!rtx_equal_p (out, operands[0]))
12002 emit_move_insn (operands[0], copy_rtx (out));
12003
12004 return 1; /* DONE */
12005 }
12006
12007 /*
12008 * General case: Jumpful:
12009 * xorl dest,dest cmpl op1, op2
12010 * cmpl op1, op2 movl ct, dest
12011 * setcc dest jcc 1f
12012 * decl dest movl cf, dest
12013 * andl (cf-ct),dest 1:
12014 * addl ct,dest
12015 *
12016 * Size 20. Size 14.
12017 *
12018 * This is reasonably steep, but branch mispredict costs are
12019 * high on modern cpus, so consider failing only if optimizing
12020 * for space.
12021 */
12022
12023 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12024 && BRANCH_COST >= 2)
12025 {
12026 if (cf == 0)
12027 {
12028 cf = ct;
12029 ct = 0;
12030 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
12031 /* We may be reversing unordered compare to normal compare,
12032 that is not valid in general (we may convert non-trapping
12033 condition to trapping one), however on i386 we currently
12034 emit all comparisons unordered. */
12035 code = reverse_condition_maybe_unordered (code);
12036 else
12037 {
12038 code = reverse_condition (code);
12039 if (compare_code != UNKNOWN)
12040 compare_code = reverse_condition (compare_code);
12041 }
12042 }
12043
12044 if (compare_code != UNKNOWN)
12045 {
12046 /* notl op1 (if needed)
12047 sarl $31, op1
12048 andl (cf-ct), op1
12049 addl ct, op1
12050
12051 For x < 0 (resp. x <= -1) there will be no notl,
12052 so if possible swap the constants to get rid of the
12053 complement.
12054 True/false will be -1/0 while code below (store flag
12055 followed by decrement) is 0/-1, so the constants need
12056 to be exchanged once more. */
12057
12058 if (compare_code == GE || !cf)
12059 {
12060 code = reverse_condition (code);
12061 compare_code = LT;
12062 }
12063 else
12064 {
12065 HOST_WIDE_INT tmp = cf;
12066 cf = ct;
12067 ct = tmp;
12068 }
12069
12070 out = emit_store_flag (out, code, ix86_compare_op0,
12071 ix86_compare_op1, VOIDmode, 0, -1);
12072 }
12073 else
12074 {
12075 out = emit_store_flag (out, code, ix86_compare_op0,
12076 ix86_compare_op1, VOIDmode, 0, 1);
12077
12078 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12079 copy_rtx (out), 1, OPTAB_DIRECT);
12080 }
12081
12082 out = expand_simple_binop (mode, AND, copy_rtx (out),
12083 gen_int_mode (cf - ct, mode),
12084 copy_rtx (out), 1, OPTAB_DIRECT);
12085 if (ct)
12086 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12087 copy_rtx (out), 1, OPTAB_DIRECT);
12088 if (!rtx_equal_p (out, operands[0]))
12089 emit_move_insn (operands[0], copy_rtx (out));
12090
12091 return 1; /* DONE */
12092 }
12093 }
12094
12095 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12096 {
12097 /* Try a few things more with specific constants and a variable. */
12098
12099 optab op;
12100 rtx var, orig_out, out, tmp;
12101
12102 if (BRANCH_COST <= 2)
12103 return 0; /* FAIL */
12104
12105 /* If one of the two operands is an interesting constant, load a
12106 constant with the above and mask it in with a logical operation. */
12107
12108 if (CONST_INT_P (operands[2]))
12109 {
12110 var = operands[3];
12111 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12112 operands[3] = constm1_rtx, op = and_optab;
12113 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12114 operands[3] = const0_rtx, op = ior_optab;
12115 else
12116 return 0; /* FAIL */
12117 }
12118 else if (CONST_INT_P (operands[3]))
12119 {
12120 var = operands[2];
12121 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12122 operands[2] = constm1_rtx, op = and_optab;
12123 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12124 operands[2] = const0_rtx, op = ior_optab;
12125 else
12126 return 0; /* FAIL */
12127 }
12128 else
12129 return 0; /* FAIL */
12130
12131 orig_out = operands[0];
12132 tmp = gen_reg_rtx (mode);
12133 operands[0] = tmp;
12134
12135 /* Recurse to get the constant loaded. */
12136 if (ix86_expand_int_movcc (operands) == 0)
12137 return 0; /* FAIL */
12138
12139 /* Mask in the interesting variable. */
12140 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12141 OPTAB_WIDEN);
12142 if (!rtx_equal_p (out, orig_out))
12143 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12144
12145 return 1; /* DONE */
12146 }
12147
12148 /*
12149 * For comparison with above,
12150 *
12151 * movl cf,dest
12152 * movl ct,tmp
12153 * cmpl op1,op2
12154 * cmovcc tmp,dest
12155 *
12156 * Size 15.
12157 */
12158
12159 if (! nonimmediate_operand (operands[2], mode))
12160 operands[2] = force_reg (mode, operands[2]);
12161 if (! nonimmediate_operand (operands[3], mode))
12162 operands[3] = force_reg (mode, operands[3]);
12163
12164 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12165 {
12166 rtx tmp = gen_reg_rtx (mode);
12167 emit_move_insn (tmp, operands[3]);
12168 operands[3] = tmp;
12169 }
12170 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12171 {
12172 rtx tmp = gen_reg_rtx (mode);
12173 emit_move_insn (tmp, operands[2]);
12174 operands[2] = tmp;
12175 }
12176
12177 if (! register_operand (operands[2], VOIDmode)
12178 && (mode == QImode
12179 || ! register_operand (operands[3], VOIDmode)))
12180 operands[2] = force_reg (mode, operands[2]);
12181
12182 if (mode == QImode
12183 && ! register_operand (operands[3], VOIDmode))
12184 operands[3] = force_reg (mode, operands[3]);
12185
12186 emit_insn (compare_seq);
12187 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12188 gen_rtx_IF_THEN_ELSE (mode,
12189 compare_op, operands[2],
12190 operands[3])));
12191 if (bypass_test)
12192 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12193 gen_rtx_IF_THEN_ELSE (mode,
12194 bypass_test,
12195 copy_rtx (operands[3]),
12196 copy_rtx (operands[0]))));
12197 if (second_test)
12198 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12199 gen_rtx_IF_THEN_ELSE (mode,
12200 second_test,
12201 copy_rtx (operands[2]),
12202 copy_rtx (operands[0]))));
12203
12204 return 1; /* DONE */
12205 }
12206
12207 /* Swap, force into registers, or otherwise massage the two operands
12208 to an sse comparison with a mask result. Thus we differ a bit from
12209 ix86_prepare_fp_compare_args which expects to produce a flags result.
12210
12211 The DEST operand exists to help determine whether to commute commutative
12212 operators. The POP0/POP1 operands are updated in place. The new
12213 comparison code is returned, or UNKNOWN if not implementable. */
12214
12215 static enum rtx_code
12216 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12217 rtx *pop0, rtx *pop1)
12218 {
12219 rtx tmp;
12220
12221 switch (code)
12222 {
12223 case LTGT:
12224 case UNEQ:
12225 /* We have no LTGT as an operator. We could implement it with
12226 NE & ORDERED, but this requires an extra temporary. It's
12227 not clear that it's worth it. */
12228 return UNKNOWN;
12229
12230 case LT:
12231 case LE:
12232 case UNGT:
12233 case UNGE:
12234 /* These are supported directly. */
12235 break;
12236
12237 case EQ:
12238 case NE:
12239 case UNORDERED:
12240 case ORDERED:
12241 /* For commutative operators, try to canonicalize the destination
12242 operand to be first in the comparison - this helps reload to
12243 avoid extra moves. */
12244 if (!dest || !rtx_equal_p (dest, *pop1))
12245 break;
12246 /* FALLTHRU */
12247
12248 case GE:
12249 case GT:
12250 case UNLE:
12251 case UNLT:
12252 /* These are not supported directly. Swap the comparison operands
12253 to transform into something that is supported. */
12254 tmp = *pop0;
12255 *pop0 = *pop1;
12256 *pop1 = tmp;
12257 code = swap_condition (code);
12258 break;
12259
12260 default:
12261 gcc_unreachable ();
12262 }
12263
12264 return code;
12265 }
12266
12267 /* Detect conditional moves that exactly match min/max operational
12268 semantics. Note that this is IEEE safe, as long as we don't
12269 interchange the operands.
12270
12271 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12272 and TRUE if the operation is successful and instructions are emitted. */
12273
12274 static bool
12275 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12276 rtx cmp_op1, rtx if_true, rtx if_false)
12277 {
12278 enum machine_mode mode;
12279 bool is_min;
12280 rtx tmp;
12281
12282 if (code == LT)
12283 ;
12284 else if (code == UNGE)
12285 {
12286 tmp = if_true;
12287 if_true = if_false;
12288 if_false = tmp;
12289 }
12290 else
12291 return false;
12292
12293 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12294 is_min = true;
12295 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12296 is_min = false;
12297 else
12298 return false;
12299
12300 mode = GET_MODE (dest);
12301
12302 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12303 but MODE may be a vector mode and thus not appropriate. */
12304 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12305 {
12306 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12307 rtvec v;
12308
12309 if_true = force_reg (mode, if_true);
12310 v = gen_rtvec (2, if_true, if_false);
12311 tmp = gen_rtx_UNSPEC (mode, v, u);
12312 }
12313 else
12314 {
12315 code = is_min ? SMIN : SMAX;
12316 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12317 }
12318
12319 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12320 return true;
12321 }
12322
12323 /* Expand an sse vector comparison. Return the register with the result. */
12324
12325 static rtx
12326 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12327 rtx op_true, rtx op_false)
12328 {
12329 enum machine_mode mode = GET_MODE (dest);
12330 rtx x;
12331
12332 cmp_op0 = force_reg (mode, cmp_op0);
12333 if (!nonimmediate_operand (cmp_op1, mode))
12334 cmp_op1 = force_reg (mode, cmp_op1);
12335
12336 if (optimize
12337 || reg_overlap_mentioned_p (dest, op_true)
12338 || reg_overlap_mentioned_p (dest, op_false))
12339 dest = gen_reg_rtx (mode);
12340
12341 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12342 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12343
12344 return dest;
12345 }
12346
12347 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12348 operations. This is used for both scalar and vector conditional moves. */
12349
12350 static void
12351 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12352 {
12353 enum machine_mode mode = GET_MODE (dest);
12354 rtx t2, t3, x;
12355
12356 if (op_false == CONST0_RTX (mode))
12357 {
12358 op_true = force_reg (mode, op_true);
12359 x = gen_rtx_AND (mode, cmp, op_true);
12360 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12361 }
12362 else if (op_true == CONST0_RTX (mode))
12363 {
12364 op_false = force_reg (mode, op_false);
12365 x = gen_rtx_NOT (mode, cmp);
12366 x = gen_rtx_AND (mode, x, op_false);
12367 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12368 }
12369 else
12370 {
12371 op_true = force_reg (mode, op_true);
12372 op_false = force_reg (mode, op_false);
12373
12374 t2 = gen_reg_rtx (mode);
12375 if (optimize)
12376 t3 = gen_reg_rtx (mode);
12377 else
12378 t3 = dest;
12379
12380 x = gen_rtx_AND (mode, op_true, cmp);
12381 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12382
12383 x = gen_rtx_NOT (mode, cmp);
12384 x = gen_rtx_AND (mode, x, op_false);
12385 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12386
12387 x = gen_rtx_IOR (mode, t3, t2);
12388 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12389 }
12390 }
12391
12392 /* Expand a floating-point conditional move. Return true if successful. */
12393
12394 int
12395 ix86_expand_fp_movcc (rtx operands[])
12396 {
12397 enum machine_mode mode = GET_MODE (operands[0]);
12398 enum rtx_code code = GET_CODE (operands[1]);
12399 rtx tmp, compare_op, second_test, bypass_test;
12400
12401 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12402 {
12403 enum machine_mode cmode;
12404
12405 /* Since we've no cmove for sse registers, don't force bad register
12406 allocation just to gain access to it. Deny movcc when the
12407 comparison mode doesn't match the move mode. */
12408 cmode = GET_MODE (ix86_compare_op0);
12409 if (cmode == VOIDmode)
12410 cmode = GET_MODE (ix86_compare_op1);
12411 if (cmode != mode)
12412 return 0;
12413
12414 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12415 &ix86_compare_op0,
12416 &ix86_compare_op1);
12417 if (code == UNKNOWN)
12418 return 0;
12419
12420 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12421 ix86_compare_op1, operands[2],
12422 operands[3]))
12423 return 1;
12424
12425 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12426 ix86_compare_op1, operands[2], operands[3]);
12427 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12428 return 1;
12429 }
12430
12431 /* The floating point conditional move instructions don't directly
12432 support conditions resulting from a signed integer comparison. */
12433
12434 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12435
12436 /* The floating point conditional move instructions don't directly
12437 support signed integer comparisons. */
12438
12439 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12440 {
12441 gcc_assert (!second_test && !bypass_test);
12442 tmp = gen_reg_rtx (QImode);
12443 ix86_expand_setcc (code, tmp);
12444 code = NE;
12445 ix86_compare_op0 = tmp;
12446 ix86_compare_op1 = const0_rtx;
12447 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12448 }
12449 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12450 {
12451 tmp = gen_reg_rtx (mode);
12452 emit_move_insn (tmp, operands[3]);
12453 operands[3] = tmp;
12454 }
12455 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12456 {
12457 tmp = gen_reg_rtx (mode);
12458 emit_move_insn (tmp, operands[2]);
12459 operands[2] = tmp;
12460 }
12461
12462 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12463 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12464 operands[2], operands[3])));
12465 if (bypass_test)
12466 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12467 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12468 operands[3], operands[0])));
12469 if (second_test)
12470 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12471 gen_rtx_IF_THEN_ELSE (mode, second_test,
12472 operands[2], operands[0])));
12473
12474 return 1;
12475 }
12476
12477 /* Expand a floating-point vector conditional move; a vcond operation
12478 rather than a movcc operation. */
12479
12480 bool
12481 ix86_expand_fp_vcond (rtx operands[])
12482 {
12483 enum rtx_code code = GET_CODE (operands[3]);
12484 rtx cmp;
12485
12486 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12487 &operands[4], &operands[5]);
12488 if (code == UNKNOWN)
12489 return false;
12490
12491 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12492 operands[5], operands[1], operands[2]))
12493 return true;
12494
12495 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12496 operands[1], operands[2]);
12497 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12498 return true;
12499 }
12500
12501 /* Expand a signed integral vector conditional move. */
12502
12503 bool
12504 ix86_expand_int_vcond (rtx operands[])
12505 {
12506 enum machine_mode mode = GET_MODE (operands[0]);
12507 enum rtx_code code = GET_CODE (operands[3]);
12508 bool negate = false;
12509 rtx x, cop0, cop1;
12510
12511 cop0 = operands[4];
12512 cop1 = operands[5];
12513
12514 /* Canonicalize the comparison to EQ, GT, GTU. */
12515 switch (code)
12516 {
12517 case EQ:
12518 case GT:
12519 case GTU:
12520 break;
12521
12522 case NE:
12523 case LE:
12524 case LEU:
12525 code = reverse_condition (code);
12526 negate = true;
12527 break;
12528
12529 case GE:
12530 case GEU:
12531 code = reverse_condition (code);
12532 negate = true;
12533 /* FALLTHRU */
12534
12535 case LT:
12536 case LTU:
12537 code = swap_condition (code);
12538 x = cop0, cop0 = cop1, cop1 = x;
12539 break;
12540
12541 default:
12542 gcc_unreachable ();
12543 }
12544
12545 /* Unsigned parallel compare is not supported by the hardware. Play some
12546 tricks to turn this into a signed comparison against 0. */
12547 if (code == GTU)
12548 {
12549 cop0 = force_reg (mode, cop0);
12550
12551 switch (mode)
12552 {
12553 case V4SImode:
12554 {
12555 rtx t1, t2, mask;
12556
12557 /* Perform a parallel modulo subtraction. */
12558 t1 = gen_reg_rtx (mode);
12559 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12560
12561 /* Extract the original sign bit of op0. */
12562 mask = GEN_INT (-0x80000000);
12563 mask = gen_rtx_CONST_VECTOR (mode,
12564 gen_rtvec (4, mask, mask, mask, mask));
12565 mask = force_reg (mode, mask);
12566 t2 = gen_reg_rtx (mode);
12567 emit_insn (gen_andv4si3 (t2, cop0, mask));
12568
12569 /* XOR it back into the result of the subtraction. This results
12570 in the sign bit set iff we saw unsigned underflow. */
12571 x = gen_reg_rtx (mode);
12572 emit_insn (gen_xorv4si3 (x, t1, t2));
12573
12574 code = GT;
12575 }
12576 break;
12577
12578 case V16QImode:
12579 case V8HImode:
12580 /* Perform a parallel unsigned saturating subtraction. */
12581 x = gen_reg_rtx (mode);
12582 emit_insn (gen_rtx_SET (VOIDmode, x,
12583 gen_rtx_US_MINUS (mode, cop0, cop1)));
12584
12585 code = EQ;
12586 negate = !negate;
12587 break;
12588
12589 default:
12590 gcc_unreachable ();
12591 }
12592
12593 cop0 = x;
12594 cop1 = CONST0_RTX (mode);
12595 }
12596
12597 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12598 operands[1+negate], operands[2-negate]);
12599
12600 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12601 operands[2-negate]);
12602 return true;
12603 }
12604
12605 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12606 true if we should do zero extension, else sign extension. HIGH_P is
12607 true if we want the N/2 high elements, else the low elements. */
12608
12609 void
12610 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12611 {
12612 enum machine_mode imode = GET_MODE (operands[1]);
12613 rtx (*unpack)(rtx, rtx, rtx);
12614 rtx se, dest;
12615
12616 switch (imode)
12617 {
12618 case V16QImode:
12619 if (high_p)
12620 unpack = gen_vec_interleave_highv16qi;
12621 else
12622 unpack = gen_vec_interleave_lowv16qi;
12623 break;
12624 case V8HImode:
12625 if (high_p)
12626 unpack = gen_vec_interleave_highv8hi;
12627 else
12628 unpack = gen_vec_interleave_lowv8hi;
12629 break;
12630 case V4SImode:
12631 if (high_p)
12632 unpack = gen_vec_interleave_highv4si;
12633 else
12634 unpack = gen_vec_interleave_lowv4si;
12635 break;
12636 default:
12637 gcc_unreachable ();
12638 }
12639
12640 dest = gen_lowpart (imode, operands[0]);
12641
12642 if (unsigned_p)
12643 se = force_reg (imode, CONST0_RTX (imode));
12644 else
12645 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12646 operands[1], pc_rtx, pc_rtx);
12647
12648 emit_insn (unpack (dest, operands[1], se));
12649 }
12650
12651 /* Expand conditional increment or decrement using adb/sbb instructions.
12652 The default case using setcc followed by the conditional move can be
12653 done by generic code. */
12654 int
12655 ix86_expand_int_addcc (rtx operands[])
12656 {
12657 enum rtx_code code = GET_CODE (operands[1]);
12658 rtx compare_op;
12659 rtx val = const0_rtx;
12660 bool fpcmp = false;
12661 enum machine_mode mode = GET_MODE (operands[0]);
12662
12663 if (operands[3] != const1_rtx
12664 && operands[3] != constm1_rtx)
12665 return 0;
12666 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12667 ix86_compare_op1, &compare_op))
12668 return 0;
12669 code = GET_CODE (compare_op);
12670
12671 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12672 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12673 {
12674 fpcmp = true;
12675 code = ix86_fp_compare_code_to_integer (code);
12676 }
12677
12678 if (code != LTU)
12679 {
12680 val = constm1_rtx;
12681 if (fpcmp)
12682 PUT_CODE (compare_op,
12683 reverse_condition_maybe_unordered
12684 (GET_CODE (compare_op)));
12685 else
12686 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12687 }
12688 PUT_MODE (compare_op, mode);
12689
12690 /* Construct either adc or sbb insn. */
12691 if ((code == LTU) == (operands[3] == constm1_rtx))
12692 {
12693 switch (GET_MODE (operands[0]))
12694 {
12695 case QImode:
12696 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12697 break;
12698 case HImode:
12699 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12700 break;
12701 case SImode:
12702 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12703 break;
12704 case DImode:
12705 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12706 break;
12707 default:
12708 gcc_unreachable ();
12709 }
12710 }
12711 else
12712 {
12713 switch (GET_MODE (operands[0]))
12714 {
12715 case QImode:
12716 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12717 break;
12718 case HImode:
12719 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12720 break;
12721 case SImode:
12722 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12723 break;
12724 case DImode:
12725 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12726 break;
12727 default:
12728 gcc_unreachable ();
12729 }
12730 }
12731 return 1; /* DONE */
12732 }
12733
12734
12735 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12736 works for floating pointer parameters and nonoffsetable memories.
12737 For pushes, it returns just stack offsets; the values will be saved
12738 in the right order. Maximally three parts are generated. */
12739
12740 static int
12741 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12742 {
12743 int size;
12744
12745 if (!TARGET_64BIT)
12746 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12747 else
12748 size = (GET_MODE_SIZE (mode) + 4) / 8;
12749
12750 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12751 gcc_assert (size >= 2 && size <= 3);
12752
12753 /* Optimize constant pool reference to immediates. This is used by fp
12754 moves, that force all constants to memory to allow combining. */
12755 if (MEM_P (operand) && MEM_READONLY_P (operand))
12756 {
12757 rtx tmp = maybe_get_pool_constant (operand);
12758 if (tmp)
12759 operand = tmp;
12760 }
12761
12762 if (MEM_P (operand) && !offsettable_memref_p (operand))
12763 {
12764 /* The only non-offsetable memories we handle are pushes. */
12765 int ok = push_operand (operand, VOIDmode);
12766
12767 gcc_assert (ok);
12768
12769 operand = copy_rtx (operand);
12770 PUT_MODE (operand, Pmode);
12771 parts[0] = parts[1] = parts[2] = operand;
12772 return size;
12773 }
12774
12775 if (GET_CODE (operand) == CONST_VECTOR)
12776 {
12777 enum machine_mode imode = int_mode_for_mode (mode);
12778 /* Caution: if we looked through a constant pool memory above,
12779 the operand may actually have a different mode now. That's
12780 ok, since we want to pun this all the way back to an integer. */
12781 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12782 gcc_assert (operand != NULL);
12783 mode = imode;
12784 }
12785
12786 if (!TARGET_64BIT)
12787 {
12788 if (mode == DImode)
12789 split_di (&operand, 1, &parts[0], &parts[1]);
12790 else
12791 {
12792 if (REG_P (operand))
12793 {
12794 gcc_assert (reload_completed);
12795 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12796 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12797 if (size == 3)
12798 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12799 }
12800 else if (offsettable_memref_p (operand))
12801 {
12802 operand = adjust_address (operand, SImode, 0);
12803 parts[0] = operand;
12804 parts[1] = adjust_address (operand, SImode, 4);
12805 if (size == 3)
12806 parts[2] = adjust_address (operand, SImode, 8);
12807 }
12808 else if (GET_CODE (operand) == CONST_DOUBLE)
12809 {
12810 REAL_VALUE_TYPE r;
12811 long l[4];
12812
12813 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12814 switch (mode)
12815 {
12816 case XFmode:
12817 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12818 parts[2] = gen_int_mode (l[2], SImode);
12819 break;
12820 case DFmode:
12821 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12822 break;
12823 default:
12824 gcc_unreachable ();
12825 }
12826 parts[1] = gen_int_mode (l[1], SImode);
12827 parts[0] = gen_int_mode (l[0], SImode);
12828 }
12829 else
12830 gcc_unreachable ();
12831 }
12832 }
12833 else
12834 {
12835 if (mode == TImode)
12836 split_ti (&operand, 1, &parts[0], &parts[1]);
12837 if (mode == XFmode || mode == TFmode)
12838 {
12839 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12840 if (REG_P (operand))
12841 {
12842 gcc_assert (reload_completed);
12843 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12844 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12845 }
12846 else if (offsettable_memref_p (operand))
12847 {
12848 operand = adjust_address (operand, DImode, 0);
12849 parts[0] = operand;
12850 parts[1] = adjust_address (operand, upper_mode, 8);
12851 }
12852 else if (GET_CODE (operand) == CONST_DOUBLE)
12853 {
12854 REAL_VALUE_TYPE r;
12855 long l[4];
12856
12857 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12858 real_to_target (l, &r, mode);
12859
12860 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12861 if (HOST_BITS_PER_WIDE_INT >= 64)
12862 parts[0]
12863 = gen_int_mode
12864 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12865 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12866 DImode);
12867 else
12868 parts[0] = immed_double_const (l[0], l[1], DImode);
12869
12870 if (upper_mode == SImode)
12871 parts[1] = gen_int_mode (l[2], SImode);
12872 else if (HOST_BITS_PER_WIDE_INT >= 64)
12873 parts[1]
12874 = gen_int_mode
12875 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12876 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12877 DImode);
12878 else
12879 parts[1] = immed_double_const (l[2], l[3], DImode);
12880 }
12881 else
12882 gcc_unreachable ();
12883 }
12884 }
12885
12886 return size;
12887 }
12888
12889 /* Emit insns to perform a move or push of DI, DF, and XF values.
12890 Return false when normal moves are needed; true when all required
12891 insns have been emitted. Operands 2-4 contain the input values
12892 int the correct order; operands 5-7 contain the output values. */
12893
12894 void
12895 ix86_split_long_move (rtx operands[])
12896 {
12897 rtx part[2][3];
12898 int nparts;
12899 int push = 0;
12900 int collisions = 0;
12901 enum machine_mode mode = GET_MODE (operands[0]);
12902
12903 /* The DFmode expanders may ask us to move double.
12904 For 64bit target this is single move. By hiding the fact
12905 here we simplify i386.md splitters. */
12906 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12907 {
12908 /* Optimize constant pool reference to immediates. This is used by
12909 fp moves, that force all constants to memory to allow combining. */
12910
12911 if (MEM_P (operands[1])
12912 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12913 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12914 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12915 if (push_operand (operands[0], VOIDmode))
12916 {
12917 operands[0] = copy_rtx (operands[0]);
12918 PUT_MODE (operands[0], Pmode);
12919 }
12920 else
12921 operands[0] = gen_lowpart (DImode, operands[0]);
12922 operands[1] = gen_lowpart (DImode, operands[1]);
12923 emit_move_insn (operands[0], operands[1]);
12924 return;
12925 }
12926
12927 /* The only non-offsettable memory we handle is push. */
12928 if (push_operand (operands[0], VOIDmode))
12929 push = 1;
12930 else
12931 gcc_assert (!MEM_P (operands[0])
12932 || offsettable_memref_p (operands[0]));
12933
12934 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12935 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12936
12937 /* When emitting push, take care for source operands on the stack. */
12938 if (push && MEM_P (operands[1])
12939 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12940 {
12941 if (nparts == 3)
12942 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12943 XEXP (part[1][2], 0));
12944 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12945 XEXP (part[1][1], 0));
12946 }
12947
12948 /* We need to do copy in the right order in case an address register
12949 of the source overlaps the destination. */
12950 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12951 {
12952 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12953 collisions++;
12954 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12955 collisions++;
12956 if (nparts == 3
12957 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12958 collisions++;
12959
12960 /* Collision in the middle part can be handled by reordering. */
12961 if (collisions == 1 && nparts == 3
12962 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12963 {
12964 rtx tmp;
12965 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12966 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12967 }
12968
12969 /* If there are more collisions, we can't handle it by reordering.
12970 Do an lea to the last part and use only one colliding move. */
12971 else if (collisions > 1)
12972 {
12973 rtx base;
12974
12975 collisions = 1;
12976
12977 base = part[0][nparts - 1];
12978
12979 /* Handle the case when the last part isn't valid for lea.
12980 Happens in 64-bit mode storing the 12-byte XFmode. */
12981 if (GET_MODE (base) != Pmode)
12982 base = gen_rtx_REG (Pmode, REGNO (base));
12983
12984 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12985 part[1][0] = replace_equiv_address (part[1][0], base);
12986 part[1][1] = replace_equiv_address (part[1][1],
12987 plus_constant (base, UNITS_PER_WORD));
12988 if (nparts == 3)
12989 part[1][2] = replace_equiv_address (part[1][2],
12990 plus_constant (base, 8));
12991 }
12992 }
12993
12994 if (push)
12995 {
12996 if (!TARGET_64BIT)
12997 {
12998 if (nparts == 3)
12999 {
13000 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13001 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13002 emit_move_insn (part[0][2], part[1][2]);
13003 }
13004 }
13005 else
13006 {
13007 /* In 64bit mode we don't have 32bit push available. In case this is
13008 register, it is OK - we will just use larger counterpart. We also
13009 retype memory - these comes from attempt to avoid REX prefix on
13010 moving of second half of TFmode value. */
13011 if (GET_MODE (part[1][1]) == SImode)
13012 {
13013 switch (GET_CODE (part[1][1]))
13014 {
13015 case MEM:
13016 part[1][1] = adjust_address (part[1][1], DImode, 0);
13017 break;
13018
13019 case REG:
13020 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13021 break;
13022
13023 default:
13024 gcc_unreachable ();
13025 }
13026
13027 if (GET_MODE (part[1][0]) == SImode)
13028 part[1][0] = part[1][1];
13029 }
13030 }
13031 emit_move_insn (part[0][1], part[1][1]);
13032 emit_move_insn (part[0][0], part[1][0]);
13033 return;
13034 }
13035
13036 /* Choose correct order to not overwrite the source before it is copied. */
13037 if ((REG_P (part[0][0])
13038 && REG_P (part[1][1])
13039 && (REGNO (part[0][0]) == REGNO (part[1][1])
13040 || (nparts == 3
13041 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13042 || (collisions > 0
13043 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13044 {
13045 if (nparts == 3)
13046 {
13047 operands[2] = part[0][2];
13048 operands[3] = part[0][1];
13049 operands[4] = part[0][0];
13050 operands[5] = part[1][2];
13051 operands[6] = part[1][1];
13052 operands[7] = part[1][0];
13053 }
13054 else
13055 {
13056 operands[2] = part[0][1];
13057 operands[3] = part[0][0];
13058 operands[5] = part[1][1];
13059 operands[6] = part[1][0];
13060 }
13061 }
13062 else
13063 {
13064 if (nparts == 3)
13065 {
13066 operands[2] = part[0][0];
13067 operands[3] = part[0][1];
13068 operands[4] = part[0][2];
13069 operands[5] = part[1][0];
13070 operands[6] = part[1][1];
13071 operands[7] = part[1][2];
13072 }
13073 else
13074 {
13075 operands[2] = part[0][0];
13076 operands[3] = part[0][1];
13077 operands[5] = part[1][0];
13078 operands[6] = part[1][1];
13079 }
13080 }
13081
13082 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13083 if (optimize_size)
13084 {
13085 if (CONST_INT_P (operands[5])
13086 && operands[5] != const0_rtx
13087 && REG_P (operands[2]))
13088 {
13089 if (CONST_INT_P (operands[6])
13090 && INTVAL (operands[6]) == INTVAL (operands[5]))
13091 operands[6] = operands[2];
13092
13093 if (nparts == 3
13094 && CONST_INT_P (operands[7])
13095 && INTVAL (operands[7]) == INTVAL (operands[5]))
13096 operands[7] = operands[2];
13097 }
13098
13099 if (nparts == 3
13100 && CONST_INT_P (operands[6])
13101 && operands[6] != const0_rtx
13102 && REG_P (operands[3])
13103 && CONST_INT_P (operands[7])
13104 && INTVAL (operands[7]) == INTVAL (operands[6]))
13105 operands[7] = operands[3];
13106 }
13107
13108 emit_move_insn (operands[2], operands[5]);
13109 emit_move_insn (operands[3], operands[6]);
13110 if (nparts == 3)
13111 emit_move_insn (operands[4], operands[7]);
13112
13113 return;
13114 }
13115
13116 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13117 left shift by a constant, either using a single shift or
13118 a sequence of add instructions. */
13119
13120 static void
13121 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13122 {
13123 if (count == 1)
13124 {
13125 emit_insn ((mode == DImode
13126 ? gen_addsi3
13127 : gen_adddi3) (operand, operand, operand));
13128 }
13129 else if (!optimize_size
13130 && count * ix86_cost->add <= ix86_cost->shift_const)
13131 {
13132 int i;
13133 for (i=0; i<count; i++)
13134 {
13135 emit_insn ((mode == DImode
13136 ? gen_addsi3
13137 : gen_adddi3) (operand, operand, operand));
13138 }
13139 }
13140 else
13141 emit_insn ((mode == DImode
13142 ? gen_ashlsi3
13143 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13144 }
13145
13146 void
13147 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13148 {
13149 rtx low[2], high[2];
13150 int count;
13151 const int single_width = mode == DImode ? 32 : 64;
13152
13153 if (CONST_INT_P (operands[2]))
13154 {
13155 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13156 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13157
13158 if (count >= single_width)
13159 {
13160 emit_move_insn (high[0], low[1]);
13161 emit_move_insn (low[0], const0_rtx);
13162
13163 if (count > single_width)
13164 ix86_expand_ashl_const (high[0], count - single_width, mode);
13165 }
13166 else
13167 {
13168 if (!rtx_equal_p (operands[0], operands[1]))
13169 emit_move_insn (operands[0], operands[1]);
13170 emit_insn ((mode == DImode
13171 ? gen_x86_shld_1
13172 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13173 ix86_expand_ashl_const (low[0], count, mode);
13174 }
13175 return;
13176 }
13177
13178 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13179
13180 if (operands[1] == const1_rtx)
13181 {
13182 /* Assuming we've chosen a QImode capable registers, then 1 << N
13183 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13184 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13185 {
13186 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13187
13188 ix86_expand_clear (low[0]);
13189 ix86_expand_clear (high[0]);
13190 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13191
13192 d = gen_lowpart (QImode, low[0]);
13193 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13194 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13195 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13196
13197 d = gen_lowpart (QImode, high[0]);
13198 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13199 s = gen_rtx_NE (QImode, flags, const0_rtx);
13200 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13201 }
13202
13203 /* Otherwise, we can get the same results by manually performing
13204 a bit extract operation on bit 5/6, and then performing the two
13205 shifts. The two methods of getting 0/1 into low/high are exactly
13206 the same size. Avoiding the shift in the bit extract case helps
13207 pentium4 a bit; no one else seems to care much either way. */
13208 else
13209 {
13210 rtx x;
13211
13212 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13213 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13214 else
13215 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13216 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13217
13218 emit_insn ((mode == DImode
13219 ? gen_lshrsi3
13220 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13221 emit_insn ((mode == DImode
13222 ? gen_andsi3
13223 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13224 emit_move_insn (low[0], high[0]);
13225 emit_insn ((mode == DImode
13226 ? gen_xorsi3
13227 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13228 }
13229
13230 emit_insn ((mode == DImode
13231 ? gen_ashlsi3
13232 : gen_ashldi3) (low[0], low[0], operands[2]));
13233 emit_insn ((mode == DImode
13234 ? gen_ashlsi3
13235 : gen_ashldi3) (high[0], high[0], operands[2]));
13236 return;
13237 }
13238
13239 if (operands[1] == constm1_rtx)
13240 {
13241 /* For -1 << N, we can avoid the shld instruction, because we
13242 know that we're shifting 0...31/63 ones into a -1. */
13243 emit_move_insn (low[0], constm1_rtx);
13244 if (optimize_size)
13245 emit_move_insn (high[0], low[0]);
13246 else
13247 emit_move_insn (high[0], constm1_rtx);
13248 }
13249 else
13250 {
13251 if (!rtx_equal_p (operands[0], operands[1]))
13252 emit_move_insn (operands[0], operands[1]);
13253
13254 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13255 emit_insn ((mode == DImode
13256 ? gen_x86_shld_1
13257 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13258 }
13259
13260 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13261
13262 if (TARGET_CMOVE && scratch)
13263 {
13264 ix86_expand_clear (scratch);
13265 emit_insn ((mode == DImode
13266 ? gen_x86_shift_adj_1
13267 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13268 }
13269 else
13270 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13271 }
13272
13273 void
13274 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13275 {
13276 rtx low[2], high[2];
13277 int count;
13278 const int single_width = mode == DImode ? 32 : 64;
13279
13280 if (CONST_INT_P (operands[2]))
13281 {
13282 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13283 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13284
13285 if (count == single_width * 2 - 1)
13286 {
13287 emit_move_insn (high[0], high[1]);
13288 emit_insn ((mode == DImode
13289 ? gen_ashrsi3
13290 : gen_ashrdi3) (high[0], high[0],
13291 GEN_INT (single_width - 1)));
13292 emit_move_insn (low[0], high[0]);
13293
13294 }
13295 else if (count >= single_width)
13296 {
13297 emit_move_insn (low[0], high[1]);
13298 emit_move_insn (high[0], low[0]);
13299 emit_insn ((mode == DImode
13300 ? gen_ashrsi3
13301 : gen_ashrdi3) (high[0], high[0],
13302 GEN_INT (single_width - 1)));
13303 if (count > single_width)
13304 emit_insn ((mode == DImode
13305 ? gen_ashrsi3
13306 : gen_ashrdi3) (low[0], low[0],
13307 GEN_INT (count - single_width)));
13308 }
13309 else
13310 {
13311 if (!rtx_equal_p (operands[0], operands[1]))
13312 emit_move_insn (operands[0], operands[1]);
13313 emit_insn ((mode == DImode
13314 ? gen_x86_shrd_1
13315 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13316 emit_insn ((mode == DImode
13317 ? gen_ashrsi3
13318 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13319 }
13320 }
13321 else
13322 {
13323 if (!rtx_equal_p (operands[0], operands[1]))
13324 emit_move_insn (operands[0], operands[1]);
13325
13326 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13327
13328 emit_insn ((mode == DImode
13329 ? gen_x86_shrd_1
13330 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13331 emit_insn ((mode == DImode
13332 ? gen_ashrsi3
13333 : gen_ashrdi3) (high[0], high[0], operands[2]));
13334
13335 if (TARGET_CMOVE && scratch)
13336 {
13337 emit_move_insn (scratch, high[0]);
13338 emit_insn ((mode == DImode
13339 ? gen_ashrsi3
13340 : gen_ashrdi3) (scratch, scratch,
13341 GEN_INT (single_width - 1)));
13342 emit_insn ((mode == DImode
13343 ? gen_x86_shift_adj_1
13344 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13345 scratch));
13346 }
13347 else
13348 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13349 }
13350 }
13351
13352 void
13353 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13354 {
13355 rtx low[2], high[2];
13356 int count;
13357 const int single_width = mode == DImode ? 32 : 64;
13358
13359 if (CONST_INT_P (operands[2]))
13360 {
13361 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13362 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13363
13364 if (count >= single_width)
13365 {
13366 emit_move_insn (low[0], high[1]);
13367 ix86_expand_clear (high[0]);
13368
13369 if (count > single_width)
13370 emit_insn ((mode == DImode
13371 ? gen_lshrsi3
13372 : gen_lshrdi3) (low[0], low[0],
13373 GEN_INT (count - single_width)));
13374 }
13375 else
13376 {
13377 if (!rtx_equal_p (operands[0], operands[1]))
13378 emit_move_insn (operands[0], operands[1]);
13379 emit_insn ((mode == DImode
13380 ? gen_x86_shrd_1
13381 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13382 emit_insn ((mode == DImode
13383 ? gen_lshrsi3
13384 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13385 }
13386 }
13387 else
13388 {
13389 if (!rtx_equal_p (operands[0], operands[1]))
13390 emit_move_insn (operands[0], operands[1]);
13391
13392 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13393
13394 emit_insn ((mode == DImode
13395 ? gen_x86_shrd_1
13396 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13397 emit_insn ((mode == DImode
13398 ? gen_lshrsi3
13399 : gen_lshrdi3) (high[0], high[0], operands[2]));
13400
13401 /* Heh. By reversing the arguments, we can reuse this pattern. */
13402 if (TARGET_CMOVE && scratch)
13403 {
13404 ix86_expand_clear (scratch);
13405 emit_insn ((mode == DImode
13406 ? gen_x86_shift_adj_1
13407 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13408 scratch));
13409 }
13410 else
13411 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13412 }
13413 }
13414
13415 /* Predict just emitted jump instruction to be taken with probability PROB. */
13416 static void
13417 predict_jump (int prob)
13418 {
13419 rtx insn = get_last_insn ();
13420 gcc_assert (JUMP_P (insn));
13421 REG_NOTES (insn)
13422 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13423 GEN_INT (prob),
13424 REG_NOTES (insn));
13425 }
13426
13427 /* Helper function for the string operations below. Dest VARIABLE whether
13428 it is aligned to VALUE bytes. If true, jump to the label. */
13429 static rtx
13430 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13431 {
13432 rtx label = gen_label_rtx ();
13433 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13434 if (GET_MODE (variable) == DImode)
13435 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13436 else
13437 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13438 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13439 1, label);
13440 if (epilogue)
13441 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13442 else
13443 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13444 return label;
13445 }
13446
13447 /* Adjust COUNTER by the VALUE. */
13448 static void
13449 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13450 {
13451 if (GET_MODE (countreg) == DImode)
13452 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13453 else
13454 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13455 }
13456
13457 /* Zero extend possibly SImode EXP to Pmode register. */
13458 rtx
13459 ix86_zero_extend_to_Pmode (rtx exp)
13460 {
13461 rtx r;
13462 if (GET_MODE (exp) == VOIDmode)
13463 return force_reg (Pmode, exp);
13464 if (GET_MODE (exp) == Pmode)
13465 return copy_to_mode_reg (Pmode, exp);
13466 r = gen_reg_rtx (Pmode);
13467 emit_insn (gen_zero_extendsidi2 (r, exp));
13468 return r;
13469 }
13470
13471 /* Divide COUNTREG by SCALE. */
13472 static rtx
13473 scale_counter (rtx countreg, int scale)
13474 {
13475 rtx sc;
13476 rtx piece_size_mask;
13477
13478 if (scale == 1)
13479 return countreg;
13480 if (CONST_INT_P (countreg))
13481 return GEN_INT (INTVAL (countreg) / scale);
13482 gcc_assert (REG_P (countreg));
13483
13484 piece_size_mask = GEN_INT (scale - 1);
13485 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13486 GEN_INT (exact_log2 (scale)),
13487 NULL, 1, OPTAB_DIRECT);
13488 return sc;
13489 }
13490
13491 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13492 for constant loop counts. */
13493
13494 static enum machine_mode
13495 counter_mode (rtx count_exp)
13496 {
13497 if (GET_MODE (count_exp) != VOIDmode)
13498 return GET_MODE (count_exp);
13499 if (GET_CODE (count_exp) != CONST_INT)
13500 return Pmode;
13501 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13502 return DImode;
13503 return SImode;
13504 }
13505
13506 /* When SRCPTR is non-NULL, output simple loop to move memory
13507 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13508 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13509 equivalent loop to set memory by VALUE (supposed to be in MODE).
13510
13511 The size is rounded down to whole number of chunk size moved at once.
13512 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13513
13514
13515 static void
13516 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13517 rtx destptr, rtx srcptr, rtx value,
13518 rtx count, enum machine_mode mode, int unroll,
13519 int expected_size)
13520 {
13521 rtx out_label, top_label, iter, tmp;
13522 enum machine_mode iter_mode = counter_mode (count);
13523 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13524 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13525 rtx size;
13526 rtx x_addr;
13527 rtx y_addr;
13528 int i;
13529
13530 top_label = gen_label_rtx ();
13531 out_label = gen_label_rtx ();
13532 iter = gen_reg_rtx (iter_mode);
13533
13534 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13535 NULL, 1, OPTAB_DIRECT);
13536 /* Those two should combine. */
13537 if (piece_size == const1_rtx)
13538 {
13539 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13540 true, out_label);
13541 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13542 }
13543 emit_move_insn (iter, const0_rtx);
13544
13545 emit_label (top_label);
13546
13547 tmp = convert_modes (Pmode, iter_mode, iter, true);
13548 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13549 destmem = change_address (destmem, mode, x_addr);
13550
13551 if (srcmem)
13552 {
13553 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13554 srcmem = change_address (srcmem, mode, y_addr);
13555
13556 /* When unrolling for chips that reorder memory reads and writes,
13557 we can save registers by using single temporary.
13558 Also using 4 temporaries is overkill in 32bit mode. */
13559 if (!TARGET_64BIT && 0)
13560 {
13561 for (i = 0; i < unroll; i++)
13562 {
13563 if (i)
13564 {
13565 destmem =
13566 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13567 srcmem =
13568 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13569 }
13570 emit_move_insn (destmem, srcmem);
13571 }
13572 }
13573 else
13574 {
13575 rtx tmpreg[4];
13576 gcc_assert (unroll <= 4);
13577 for (i = 0; i < unroll; i++)
13578 {
13579 tmpreg[i] = gen_reg_rtx (mode);
13580 if (i)
13581 {
13582 srcmem =
13583 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13584 }
13585 emit_move_insn (tmpreg[i], srcmem);
13586 }
13587 for (i = 0; i < unroll; i++)
13588 {
13589 if (i)
13590 {
13591 destmem =
13592 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13593 }
13594 emit_move_insn (destmem, tmpreg[i]);
13595 }
13596 }
13597 }
13598 else
13599 for (i = 0; i < unroll; i++)
13600 {
13601 if (i)
13602 destmem =
13603 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13604 emit_move_insn (destmem, value);
13605 }
13606
13607 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13608 true, OPTAB_LIB_WIDEN);
13609 if (tmp != iter)
13610 emit_move_insn (iter, tmp);
13611
13612 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13613 true, top_label);
13614 if (expected_size != -1)
13615 {
13616 expected_size /= GET_MODE_SIZE (mode) * unroll;
13617 if (expected_size == 0)
13618 predict_jump (0);
13619 else if (expected_size > REG_BR_PROB_BASE)
13620 predict_jump (REG_BR_PROB_BASE - 1);
13621 else
13622 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13623 }
13624 else
13625 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13626 iter = ix86_zero_extend_to_Pmode (iter);
13627 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13628 true, OPTAB_LIB_WIDEN);
13629 if (tmp != destptr)
13630 emit_move_insn (destptr, tmp);
13631 if (srcptr)
13632 {
13633 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13634 true, OPTAB_LIB_WIDEN);
13635 if (tmp != srcptr)
13636 emit_move_insn (srcptr, tmp);
13637 }
13638 emit_label (out_label);
13639 }
13640
13641 /* Output "rep; mov" instruction.
13642 Arguments have same meaning as for previous function */
13643 static void
13644 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13645 rtx destptr, rtx srcptr,
13646 rtx count,
13647 enum machine_mode mode)
13648 {
13649 rtx destexp;
13650 rtx srcexp;
13651 rtx countreg;
13652
13653 /* If the size is known, it is shorter to use rep movs. */
13654 if (mode == QImode && CONST_INT_P (count)
13655 && !(INTVAL (count) & 3))
13656 mode = SImode;
13657
13658 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13659 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13660 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13661 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13662 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13663 if (mode != QImode)
13664 {
13665 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13666 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13667 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13668 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13669 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13670 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13671 }
13672 else
13673 {
13674 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13675 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13676 }
13677 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13678 destexp, srcexp));
13679 }
13680
13681 /* Output "rep; stos" instruction.
13682 Arguments have same meaning as for previous function */
13683 static void
13684 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13685 rtx count,
13686 enum machine_mode mode)
13687 {
13688 rtx destexp;
13689 rtx countreg;
13690
13691 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13692 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13693 value = force_reg (mode, gen_lowpart (mode, value));
13694 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13695 if (mode != QImode)
13696 {
13697 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13698 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13699 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13700 }
13701 else
13702 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13703 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13704 }
13705
13706 static void
13707 emit_strmov (rtx destmem, rtx srcmem,
13708 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13709 {
13710 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13711 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13712 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13713 }
13714
13715 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13716 static void
13717 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13718 rtx destptr, rtx srcptr, rtx count, int max_size)
13719 {
13720 rtx src, dest;
13721 if (CONST_INT_P (count))
13722 {
13723 HOST_WIDE_INT countval = INTVAL (count);
13724 int offset = 0;
13725
13726 if ((countval & 0x10) && max_size > 16)
13727 {
13728 if (TARGET_64BIT)
13729 {
13730 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13731 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13732 }
13733 else
13734 gcc_unreachable ();
13735 offset += 16;
13736 }
13737 if ((countval & 0x08) && max_size > 8)
13738 {
13739 if (TARGET_64BIT)
13740 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13741 else
13742 {
13743 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13744 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13745 }
13746 offset += 8;
13747 }
13748 if ((countval & 0x04) && max_size > 4)
13749 {
13750 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13751 offset += 4;
13752 }
13753 if ((countval & 0x02) && max_size > 2)
13754 {
13755 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13756 offset += 2;
13757 }
13758 if ((countval & 0x01) && max_size > 1)
13759 {
13760 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13761 offset += 1;
13762 }
13763 return;
13764 }
13765 if (max_size > 8)
13766 {
13767 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13768 count, 1, OPTAB_DIRECT);
13769 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13770 count, QImode, 1, 4);
13771 return;
13772 }
13773
13774 /* When there are stringops, we can cheaply increase dest and src pointers.
13775 Otherwise we save code size by maintaining offset (zero is readily
13776 available from preceding rep operation) and using x86 addressing modes.
13777 */
13778 if (TARGET_SINGLE_STRINGOP)
13779 {
13780 if (max_size > 4)
13781 {
13782 rtx label = ix86_expand_aligntest (count, 4, true);
13783 src = change_address (srcmem, SImode, srcptr);
13784 dest = change_address (destmem, SImode, destptr);
13785 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13786 emit_label (label);
13787 LABEL_NUSES (label) = 1;
13788 }
13789 if (max_size > 2)
13790 {
13791 rtx label = ix86_expand_aligntest (count, 2, true);
13792 src = change_address (srcmem, HImode, srcptr);
13793 dest = change_address (destmem, HImode, destptr);
13794 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13795 emit_label (label);
13796 LABEL_NUSES (label) = 1;
13797 }
13798 if (max_size > 1)
13799 {
13800 rtx label = ix86_expand_aligntest (count, 1, true);
13801 src = change_address (srcmem, QImode, srcptr);
13802 dest = change_address (destmem, QImode, destptr);
13803 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13804 emit_label (label);
13805 LABEL_NUSES (label) = 1;
13806 }
13807 }
13808 else
13809 {
13810 rtx offset = force_reg (Pmode, const0_rtx);
13811 rtx tmp;
13812
13813 if (max_size > 4)
13814 {
13815 rtx label = ix86_expand_aligntest (count, 4, true);
13816 src = change_address (srcmem, SImode, srcptr);
13817 dest = change_address (destmem, SImode, destptr);
13818 emit_move_insn (dest, src);
13819 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13820 true, OPTAB_LIB_WIDEN);
13821 if (tmp != offset)
13822 emit_move_insn (offset, tmp);
13823 emit_label (label);
13824 LABEL_NUSES (label) = 1;
13825 }
13826 if (max_size > 2)
13827 {
13828 rtx label = ix86_expand_aligntest (count, 2, true);
13829 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13830 src = change_address (srcmem, HImode, tmp);
13831 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13832 dest = change_address (destmem, HImode, tmp);
13833 emit_move_insn (dest, src);
13834 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13835 true, OPTAB_LIB_WIDEN);
13836 if (tmp != offset)
13837 emit_move_insn (offset, tmp);
13838 emit_label (label);
13839 LABEL_NUSES (label) = 1;
13840 }
13841 if (max_size > 1)
13842 {
13843 rtx label = ix86_expand_aligntest (count, 1, true);
13844 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13845 src = change_address (srcmem, QImode, tmp);
13846 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13847 dest = change_address (destmem, QImode, tmp);
13848 emit_move_insn (dest, src);
13849 emit_label (label);
13850 LABEL_NUSES (label) = 1;
13851 }
13852 }
13853 }
13854
13855 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13856 static void
13857 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13858 rtx count, int max_size)
13859 {
13860 count =
13861 expand_simple_binop (counter_mode (count), AND, count,
13862 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13863 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13864 gen_lowpart (QImode, value), count, QImode,
13865 1, max_size / 2);
13866 }
13867
13868 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13869 static void
13870 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13871 {
13872 rtx dest;
13873
13874 if (CONST_INT_P (count))
13875 {
13876 HOST_WIDE_INT countval = INTVAL (count);
13877 int offset = 0;
13878
13879 if ((countval & 0x10) && max_size > 16)
13880 {
13881 if (TARGET_64BIT)
13882 {
13883 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13884 emit_insn (gen_strset (destptr, dest, value));
13885 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13886 emit_insn (gen_strset (destptr, dest, value));
13887 }
13888 else
13889 gcc_unreachable ();
13890 offset += 16;
13891 }
13892 if ((countval & 0x08) && max_size > 8)
13893 {
13894 if (TARGET_64BIT)
13895 {
13896 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13897 emit_insn (gen_strset (destptr, dest, value));
13898 }
13899 else
13900 {
13901 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13902 emit_insn (gen_strset (destptr, dest, value));
13903 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13904 emit_insn (gen_strset (destptr, dest, value));
13905 }
13906 offset += 8;
13907 }
13908 if ((countval & 0x04) && max_size > 4)
13909 {
13910 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13911 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13912 offset += 4;
13913 }
13914 if ((countval & 0x02) && max_size > 2)
13915 {
13916 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13917 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13918 offset += 2;
13919 }
13920 if ((countval & 0x01) && max_size > 1)
13921 {
13922 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13923 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13924 offset += 1;
13925 }
13926 return;
13927 }
13928 if (max_size > 32)
13929 {
13930 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13931 return;
13932 }
13933 if (max_size > 16)
13934 {
13935 rtx label = ix86_expand_aligntest (count, 16, true);
13936 if (TARGET_64BIT)
13937 {
13938 dest = change_address (destmem, DImode, destptr);
13939 emit_insn (gen_strset (destptr, dest, value));
13940 emit_insn (gen_strset (destptr, dest, value));
13941 }
13942 else
13943 {
13944 dest = change_address (destmem, SImode, destptr);
13945 emit_insn (gen_strset (destptr, dest, value));
13946 emit_insn (gen_strset (destptr, dest, value));
13947 emit_insn (gen_strset (destptr, dest, value));
13948 emit_insn (gen_strset (destptr, dest, value));
13949 }
13950 emit_label (label);
13951 LABEL_NUSES (label) = 1;
13952 }
13953 if (max_size > 8)
13954 {
13955 rtx label = ix86_expand_aligntest (count, 8, true);
13956 if (TARGET_64BIT)
13957 {
13958 dest = change_address (destmem, DImode, destptr);
13959 emit_insn (gen_strset (destptr, dest, value));
13960 }
13961 else
13962 {
13963 dest = change_address (destmem, SImode, destptr);
13964 emit_insn (gen_strset (destptr, dest, value));
13965 emit_insn (gen_strset (destptr, dest, value));
13966 }
13967 emit_label (label);
13968 LABEL_NUSES (label) = 1;
13969 }
13970 if (max_size > 4)
13971 {
13972 rtx label = ix86_expand_aligntest (count, 4, true);
13973 dest = change_address (destmem, SImode, destptr);
13974 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13975 emit_label (label);
13976 LABEL_NUSES (label) = 1;
13977 }
13978 if (max_size > 2)
13979 {
13980 rtx label = ix86_expand_aligntest (count, 2, true);
13981 dest = change_address (destmem, HImode, destptr);
13982 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13983 emit_label (label);
13984 LABEL_NUSES (label) = 1;
13985 }
13986 if (max_size > 1)
13987 {
13988 rtx label = ix86_expand_aligntest (count, 1, true);
13989 dest = change_address (destmem, QImode, destptr);
13990 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13991 emit_label (label);
13992 LABEL_NUSES (label) = 1;
13993 }
13994 }
13995
13996 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13997 DESIRED_ALIGNMENT. */
13998 static void
13999 expand_movmem_prologue (rtx destmem, rtx srcmem,
14000 rtx destptr, rtx srcptr, rtx count,
14001 int align, int desired_alignment)
14002 {
14003 if (align <= 1 && desired_alignment > 1)
14004 {
14005 rtx label = ix86_expand_aligntest (destptr, 1, false);
14006 srcmem = change_address (srcmem, QImode, srcptr);
14007 destmem = change_address (destmem, QImode, destptr);
14008 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14009 ix86_adjust_counter (count, 1);
14010 emit_label (label);
14011 LABEL_NUSES (label) = 1;
14012 }
14013 if (align <= 2 && desired_alignment > 2)
14014 {
14015 rtx label = ix86_expand_aligntest (destptr, 2, false);
14016 srcmem = change_address (srcmem, HImode, srcptr);
14017 destmem = change_address (destmem, HImode, destptr);
14018 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14019 ix86_adjust_counter (count, 2);
14020 emit_label (label);
14021 LABEL_NUSES (label) = 1;
14022 }
14023 if (align <= 4 && desired_alignment > 4)
14024 {
14025 rtx label = ix86_expand_aligntest (destptr, 4, false);
14026 srcmem = change_address (srcmem, SImode, srcptr);
14027 destmem = change_address (destmem, SImode, destptr);
14028 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14029 ix86_adjust_counter (count, 4);
14030 emit_label (label);
14031 LABEL_NUSES (label) = 1;
14032 }
14033 gcc_assert (desired_alignment <= 8);
14034 }
14035
14036 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14037 DESIRED_ALIGNMENT. */
14038 static void
14039 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14040 int align, int desired_alignment)
14041 {
14042 if (align <= 1 && desired_alignment > 1)
14043 {
14044 rtx label = ix86_expand_aligntest (destptr, 1, false);
14045 destmem = change_address (destmem, QImode, destptr);
14046 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14047 ix86_adjust_counter (count, 1);
14048 emit_label (label);
14049 LABEL_NUSES (label) = 1;
14050 }
14051 if (align <= 2 && desired_alignment > 2)
14052 {
14053 rtx label = ix86_expand_aligntest (destptr, 2, false);
14054 destmem = change_address (destmem, HImode, destptr);
14055 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14056 ix86_adjust_counter (count, 2);
14057 emit_label (label);
14058 LABEL_NUSES (label) = 1;
14059 }
14060 if (align <= 4 && desired_alignment > 4)
14061 {
14062 rtx label = ix86_expand_aligntest (destptr, 4, false);
14063 destmem = change_address (destmem, SImode, destptr);
14064 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14065 ix86_adjust_counter (count, 4);
14066 emit_label (label);
14067 LABEL_NUSES (label) = 1;
14068 }
14069 gcc_assert (desired_alignment <= 8);
14070 }
14071
14072 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14073 static enum stringop_alg
14074 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14075 int *dynamic_check)
14076 {
14077 const struct stringop_algs * algs;
14078
14079 *dynamic_check = -1;
14080 if (memset)
14081 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14082 else
14083 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14084 if (stringop_alg != no_stringop)
14085 return stringop_alg;
14086 /* rep; movq or rep; movl is the smallest variant. */
14087 else if (optimize_size)
14088 {
14089 if (!count || (count & 3))
14090 return rep_prefix_1_byte;
14091 else
14092 return rep_prefix_4_byte;
14093 }
14094 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14095 */
14096 else if (expected_size != -1 && expected_size < 4)
14097 return loop_1_byte;
14098 else if (expected_size != -1)
14099 {
14100 unsigned int i;
14101 enum stringop_alg alg = libcall;
14102 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14103 {
14104 gcc_assert (algs->size[i].max);
14105 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14106 {
14107 if (algs->size[i].alg != libcall)
14108 alg = algs->size[i].alg;
14109 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14110 last non-libcall inline algorithm. */
14111 if (TARGET_INLINE_ALL_STRINGOPS)
14112 {
14113 /* When the current size is best to be copied by a libcall,
14114 but we are still forced to inline, run the heuristic bellow
14115 that will pick code for medium sized blocks. */
14116 if (alg != libcall)
14117 return alg;
14118 break;
14119 }
14120 else
14121 return algs->size[i].alg;
14122 }
14123 }
14124 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14125 }
14126 /* When asked to inline the call anyway, try to pick meaningful choice.
14127 We look for maximal size of block that is faster to copy by hand and
14128 take blocks of at most of that size guessing that average size will
14129 be roughly half of the block.
14130
14131 If this turns out to be bad, we might simply specify the preferred
14132 choice in ix86_costs. */
14133 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14134 && algs->unknown_size == libcall)
14135 {
14136 int max = -1;
14137 enum stringop_alg alg;
14138 int i;
14139
14140 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14141 if (algs->size[i].alg != libcall && algs->size[i].alg)
14142 max = algs->size[i].max;
14143 if (max == -1)
14144 max = 4096;
14145 alg = decide_alg (count, max / 2, memset, dynamic_check);
14146 gcc_assert (*dynamic_check == -1);
14147 gcc_assert (alg != libcall);
14148 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14149 *dynamic_check = max;
14150 return alg;
14151 }
14152 return algs->unknown_size;
14153 }
14154
14155 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14156 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14157 static int
14158 decide_alignment (int align,
14159 enum stringop_alg alg,
14160 int expected_size)
14161 {
14162 int desired_align = 0;
14163 switch (alg)
14164 {
14165 case no_stringop:
14166 gcc_unreachable ();
14167 case loop:
14168 case unrolled_loop:
14169 desired_align = GET_MODE_SIZE (Pmode);
14170 break;
14171 case rep_prefix_8_byte:
14172 desired_align = 8;
14173 break;
14174 case rep_prefix_4_byte:
14175 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14176 copying whole cacheline at once. */
14177 if (TARGET_PENTIUMPRO)
14178 desired_align = 8;
14179 else
14180 desired_align = 4;
14181 break;
14182 case rep_prefix_1_byte:
14183 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14184 copying whole cacheline at once. */
14185 if (TARGET_PENTIUMPRO)
14186 desired_align = 8;
14187 else
14188 desired_align = 1;
14189 break;
14190 case loop_1_byte:
14191 desired_align = 1;
14192 break;
14193 case libcall:
14194 return 0;
14195 }
14196
14197 if (optimize_size)
14198 desired_align = 1;
14199 if (desired_align < align)
14200 desired_align = align;
14201 if (expected_size != -1 && expected_size < 4)
14202 desired_align = align;
14203 return desired_align;
14204 }
14205
14206 /* Return the smallest power of 2 greater than VAL. */
14207 static int
14208 smallest_pow2_greater_than (int val)
14209 {
14210 int ret = 1;
14211 while (ret <= val)
14212 ret <<= 1;
14213 return ret;
14214 }
14215
14216 /* Expand string move (memcpy) operation. Use i386 string operations when
14217 profitable. expand_clrmem contains similar code. The code depends upon
14218 architecture, block size and alignment, but always has the same
14219 overall structure:
14220
14221 1) Prologue guard: Conditional that jumps up to epilogues for small
14222 blocks that can be handled by epilogue alone. This is faster but
14223 also needed for correctness, since prologue assume the block is larger
14224 than the desired alignment.
14225
14226 Optional dynamic check for size and libcall for large
14227 blocks is emitted here too, with -minline-stringops-dynamically.
14228
14229 2) Prologue: copy first few bytes in order to get destination aligned
14230 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14231 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14232 We emit either a jump tree on power of two sized blocks, or a byte loop.
14233
14234 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14235 with specified algorithm.
14236
14237 4) Epilogue: code copying tail of the block that is too small to be
14238 handled by main body (or up to size guarded by prologue guard). */
14239
14240 int
14241 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14242 rtx expected_align_exp, rtx expected_size_exp)
14243 {
14244 rtx destreg;
14245 rtx srcreg;
14246 rtx label = NULL;
14247 rtx tmp;
14248 rtx jump_around_label = NULL;
14249 HOST_WIDE_INT align = 1;
14250 unsigned HOST_WIDE_INT count = 0;
14251 HOST_WIDE_INT expected_size = -1;
14252 int size_needed = 0, epilogue_size_needed;
14253 int desired_align = 0;
14254 enum stringop_alg alg;
14255 int dynamic_check;
14256
14257 if (CONST_INT_P (align_exp))
14258 align = INTVAL (align_exp);
14259 /* i386 can do misaligned access on reasonably increased cost. */
14260 if (CONST_INT_P (expected_align_exp)
14261 && INTVAL (expected_align_exp) > align)
14262 align = INTVAL (expected_align_exp);
14263 if (CONST_INT_P (count_exp))
14264 count = expected_size = INTVAL (count_exp);
14265 if (CONST_INT_P (expected_size_exp) && count == 0)
14266 expected_size = INTVAL (expected_size_exp);
14267
14268 /* Step 0: Decide on preferred algorithm, desired alignment and
14269 size of chunks to be copied by main loop. */
14270
14271 alg = decide_alg (count, expected_size, false, &dynamic_check);
14272 desired_align = decide_alignment (align, alg, expected_size);
14273
14274 if (!TARGET_ALIGN_STRINGOPS)
14275 align = desired_align;
14276
14277 if (alg == libcall)
14278 return 0;
14279 gcc_assert (alg != no_stringop);
14280 if (!count)
14281 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14282 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14283 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14284 switch (alg)
14285 {
14286 case libcall:
14287 case no_stringop:
14288 gcc_unreachable ();
14289 case loop:
14290 size_needed = GET_MODE_SIZE (Pmode);
14291 break;
14292 case unrolled_loop:
14293 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14294 break;
14295 case rep_prefix_8_byte:
14296 size_needed = 8;
14297 break;
14298 case rep_prefix_4_byte:
14299 size_needed = 4;
14300 break;
14301 case rep_prefix_1_byte:
14302 case loop_1_byte:
14303 size_needed = 1;
14304 break;
14305 }
14306
14307 epilogue_size_needed = size_needed;
14308
14309 /* Step 1: Prologue guard. */
14310
14311 /* Alignment code needs count to be in register. */
14312 if (CONST_INT_P (count_exp) && desired_align > align)
14313 {
14314 enum machine_mode mode = SImode;
14315 if (TARGET_64BIT && (count & ~0xffffffff))
14316 mode = DImode;
14317 count_exp = force_reg (mode, count_exp);
14318 }
14319 gcc_assert (desired_align >= 1 && align >= 1);
14320
14321 /* Ensure that alignment prologue won't copy past end of block. */
14322 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14323 {
14324 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14325 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14326 Make sure it is power of 2. */
14327 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14328
14329 label = gen_label_rtx ();
14330 emit_cmp_and_jump_insns (count_exp,
14331 GEN_INT (epilogue_size_needed),
14332 LTU, 0, counter_mode (count_exp), 1, label);
14333 if (GET_CODE (count_exp) == CONST_INT)
14334 ;
14335 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14336 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14337 else
14338 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14339 }
14340 /* Emit code to decide on runtime whether library call or inline should be
14341 used. */
14342 if (dynamic_check != -1)
14343 {
14344 rtx hot_label = gen_label_rtx ();
14345 jump_around_label = gen_label_rtx ();
14346 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14347 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14348 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14349 emit_block_move_via_libcall (dst, src, count_exp, false);
14350 emit_jump (jump_around_label);
14351 emit_label (hot_label);
14352 }
14353
14354 /* Step 2: Alignment prologue. */
14355
14356 if (desired_align > align)
14357 {
14358 /* Except for the first move in epilogue, we no longer know
14359 constant offset in aliasing info. It don't seems to worth
14360 the pain to maintain it for the first move, so throw away
14361 the info early. */
14362 src = change_address (src, BLKmode, srcreg);
14363 dst = change_address (dst, BLKmode, destreg);
14364 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14365 desired_align);
14366 }
14367 if (label && size_needed == 1)
14368 {
14369 emit_label (label);
14370 LABEL_NUSES (label) = 1;
14371 label = NULL;
14372 }
14373
14374 /* Step 3: Main loop. */
14375
14376 switch (alg)
14377 {
14378 case libcall:
14379 case no_stringop:
14380 gcc_unreachable ();
14381 case loop_1_byte:
14382 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14383 count_exp, QImode, 1, expected_size);
14384 break;
14385 case loop:
14386 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14387 count_exp, Pmode, 1, expected_size);
14388 break;
14389 case unrolled_loop:
14390 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14391 registers for 4 temporaries anyway. */
14392 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14393 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14394 expected_size);
14395 break;
14396 case rep_prefix_8_byte:
14397 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14398 DImode);
14399 break;
14400 case rep_prefix_4_byte:
14401 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14402 SImode);
14403 break;
14404 case rep_prefix_1_byte:
14405 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14406 QImode);
14407 break;
14408 }
14409 /* Adjust properly the offset of src and dest memory for aliasing. */
14410 if (CONST_INT_P (count_exp))
14411 {
14412 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14413 (count / size_needed) * size_needed);
14414 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14415 (count / size_needed) * size_needed);
14416 }
14417 else
14418 {
14419 src = change_address (src, BLKmode, srcreg);
14420 dst = change_address (dst, BLKmode, destreg);
14421 }
14422
14423 /* Step 4: Epilogue to copy the remaining bytes. */
14424
14425 if (label)
14426 {
14427 /* When the main loop is done, COUNT_EXP might hold original count,
14428 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14429 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14430 bytes. Compensate if needed. */
14431
14432 if (size_needed < epilogue_size_needed)
14433 {
14434 tmp =
14435 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14436 GEN_INT (size_needed - 1), count_exp, 1,
14437 OPTAB_DIRECT);
14438 if (tmp != count_exp)
14439 emit_move_insn (count_exp, tmp);
14440 }
14441 emit_label (label);
14442 LABEL_NUSES (label) = 1;
14443 }
14444
14445 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14446 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14447 epilogue_size_needed);
14448 if (jump_around_label)
14449 emit_label (jump_around_label);
14450 return 1;
14451 }
14452
14453 /* Helper function for memcpy. For QImode value 0xXY produce
14454 0xXYXYXYXY of wide specified by MODE. This is essentially
14455 a * 0x10101010, but we can do slightly better than
14456 synth_mult by unwinding the sequence by hand on CPUs with
14457 slow multiply. */
14458 static rtx
14459 promote_duplicated_reg (enum machine_mode mode, rtx val)
14460 {
14461 enum machine_mode valmode = GET_MODE (val);
14462 rtx tmp;
14463 int nops = mode == DImode ? 3 : 2;
14464
14465 gcc_assert (mode == SImode || mode == DImode);
14466 if (val == const0_rtx)
14467 return copy_to_mode_reg (mode, const0_rtx);
14468 if (CONST_INT_P (val))
14469 {
14470 HOST_WIDE_INT v = INTVAL (val) & 255;
14471
14472 v |= v << 8;
14473 v |= v << 16;
14474 if (mode == DImode)
14475 v |= (v << 16) << 16;
14476 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14477 }
14478
14479 if (valmode == VOIDmode)
14480 valmode = QImode;
14481 if (valmode != QImode)
14482 val = gen_lowpart (QImode, val);
14483 if (mode == QImode)
14484 return val;
14485 if (!TARGET_PARTIAL_REG_STALL)
14486 nops--;
14487 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14488 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14489 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14490 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14491 {
14492 rtx reg = convert_modes (mode, QImode, val, true);
14493 tmp = promote_duplicated_reg (mode, const1_rtx);
14494 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14495 OPTAB_DIRECT);
14496 }
14497 else
14498 {
14499 rtx reg = convert_modes (mode, QImode, val, true);
14500
14501 if (!TARGET_PARTIAL_REG_STALL)
14502 if (mode == SImode)
14503 emit_insn (gen_movsi_insv_1 (reg, reg));
14504 else
14505 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14506 else
14507 {
14508 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14509 NULL, 1, OPTAB_DIRECT);
14510 reg =
14511 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14512 }
14513 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14514 NULL, 1, OPTAB_DIRECT);
14515 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14516 if (mode == SImode)
14517 return reg;
14518 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14519 NULL, 1, OPTAB_DIRECT);
14520 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14521 return reg;
14522 }
14523 }
14524
14525 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14526 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14527 alignment from ALIGN to DESIRED_ALIGN. */
14528 static rtx
14529 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14530 {
14531 rtx promoted_val;
14532
14533 if (TARGET_64BIT
14534 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14535 promoted_val = promote_duplicated_reg (DImode, val);
14536 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14537 promoted_val = promote_duplicated_reg (SImode, val);
14538 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14539 promoted_val = promote_duplicated_reg (HImode, val);
14540 else
14541 promoted_val = val;
14542
14543 return promoted_val;
14544 }
14545
14546 /* Expand string clear operation (bzero). Use i386 string operations when
14547 profitable. See expand_movmem comment for explanation of individual
14548 steps performed. */
14549 int
14550 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14551 rtx expected_align_exp, rtx expected_size_exp)
14552 {
14553 rtx destreg;
14554 rtx label = NULL;
14555 rtx tmp;
14556 rtx jump_around_label = NULL;
14557 HOST_WIDE_INT align = 1;
14558 unsigned HOST_WIDE_INT count = 0;
14559 HOST_WIDE_INT expected_size = -1;
14560 int size_needed = 0, epilogue_size_needed;
14561 int desired_align = 0;
14562 enum stringop_alg alg;
14563 rtx promoted_val = NULL;
14564 bool force_loopy_epilogue = false;
14565 int dynamic_check;
14566
14567 if (CONST_INT_P (align_exp))
14568 align = INTVAL (align_exp);
14569 /* i386 can do misaligned access on reasonably increased cost. */
14570 if (CONST_INT_P (expected_align_exp)
14571 && INTVAL (expected_align_exp) > align)
14572 align = INTVAL (expected_align_exp);
14573 if (CONST_INT_P (count_exp))
14574 count = expected_size = INTVAL (count_exp);
14575 if (CONST_INT_P (expected_size_exp) && count == 0)
14576 expected_size = INTVAL (expected_size_exp);
14577
14578 /* Step 0: Decide on preferred algorithm, desired alignment and
14579 size of chunks to be copied by main loop. */
14580
14581 alg = decide_alg (count, expected_size, true, &dynamic_check);
14582 desired_align = decide_alignment (align, alg, expected_size);
14583
14584 if (!TARGET_ALIGN_STRINGOPS)
14585 align = desired_align;
14586
14587 if (alg == libcall)
14588 return 0;
14589 gcc_assert (alg != no_stringop);
14590 if (!count)
14591 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14592 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14593 switch (alg)
14594 {
14595 case libcall:
14596 case no_stringop:
14597 gcc_unreachable ();
14598 case loop:
14599 size_needed = GET_MODE_SIZE (Pmode);
14600 break;
14601 case unrolled_loop:
14602 size_needed = GET_MODE_SIZE (Pmode) * 4;
14603 break;
14604 case rep_prefix_8_byte:
14605 size_needed = 8;
14606 break;
14607 case rep_prefix_4_byte:
14608 size_needed = 4;
14609 break;
14610 case rep_prefix_1_byte:
14611 case loop_1_byte:
14612 size_needed = 1;
14613 break;
14614 }
14615 epilogue_size_needed = size_needed;
14616
14617 /* Step 1: Prologue guard. */
14618
14619 /* Alignment code needs count to be in register. */
14620 if (CONST_INT_P (count_exp) && desired_align > align)
14621 {
14622 enum machine_mode mode = SImode;
14623 if (TARGET_64BIT && (count & ~0xffffffff))
14624 mode = DImode;
14625 count_exp = force_reg (mode, count_exp);
14626 }
14627 /* Do the cheap promotion to allow better CSE across the
14628 main loop and epilogue (ie one load of the big constant in the
14629 front of all code. */
14630 if (CONST_INT_P (val_exp))
14631 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14632 desired_align, align);
14633 /* Ensure that alignment prologue won't copy past end of block. */
14634 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14635 {
14636 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14637 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14638 Make sure it is power of 2. */
14639 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14640
14641 /* To improve performance of small blocks, we jump around the VAL
14642 promoting mode. This mean that if the promoted VAL is not constant,
14643 we might not use it in the epilogue and have to use byte
14644 loop variant. */
14645 if (epilogue_size_needed > 2 && !promoted_val)
14646 force_loopy_epilogue = true;
14647 label = gen_label_rtx ();
14648 emit_cmp_and_jump_insns (count_exp,
14649 GEN_INT (epilogue_size_needed),
14650 LTU, 0, counter_mode (count_exp), 1, label);
14651 if (GET_CODE (count_exp) == CONST_INT)
14652 ;
14653 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14654 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14655 else
14656 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14657 }
14658 if (dynamic_check != -1)
14659 {
14660 rtx hot_label = gen_label_rtx ();
14661 jump_around_label = gen_label_rtx ();
14662 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14663 LEU, 0, counter_mode (count_exp), 1, hot_label);
14664 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14665 set_storage_via_libcall (dst, count_exp, val_exp, false);
14666 emit_jump (jump_around_label);
14667 emit_label (hot_label);
14668 }
14669
14670 /* Step 2: Alignment prologue. */
14671
14672 /* Do the expensive promotion once we branched off the small blocks. */
14673 if (!promoted_val)
14674 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14675 desired_align, align);
14676 gcc_assert (desired_align >= 1 && align >= 1);
14677
14678 if (desired_align > align)
14679 {
14680 /* Except for the first move in epilogue, we no longer know
14681 constant offset in aliasing info. It don't seems to worth
14682 the pain to maintain it for the first move, so throw away
14683 the info early. */
14684 dst = change_address (dst, BLKmode, destreg);
14685 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14686 desired_align);
14687 }
14688 if (label && size_needed == 1)
14689 {
14690 emit_label (label);
14691 LABEL_NUSES (label) = 1;
14692 label = NULL;
14693 }
14694
14695 /* Step 3: Main loop. */
14696
14697 switch (alg)
14698 {
14699 case libcall:
14700 case no_stringop:
14701 gcc_unreachable ();
14702 case loop_1_byte:
14703 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14704 count_exp, QImode, 1, expected_size);
14705 break;
14706 case loop:
14707 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14708 count_exp, Pmode, 1, expected_size);
14709 break;
14710 case unrolled_loop:
14711 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14712 count_exp, Pmode, 4, expected_size);
14713 break;
14714 case rep_prefix_8_byte:
14715 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14716 DImode);
14717 break;
14718 case rep_prefix_4_byte:
14719 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14720 SImode);
14721 break;
14722 case rep_prefix_1_byte:
14723 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14724 QImode);
14725 break;
14726 }
14727 /* Adjust properly the offset of src and dest memory for aliasing. */
14728 if (CONST_INT_P (count_exp))
14729 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14730 (count / size_needed) * size_needed);
14731 else
14732 dst = change_address (dst, BLKmode, destreg);
14733
14734 /* Step 4: Epilogue to copy the remaining bytes. */
14735
14736 if (label)
14737 {
14738 /* When the main loop is done, COUNT_EXP might hold original count,
14739 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14740 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14741 bytes. Compensate if needed. */
14742
14743 if (size_needed < desired_align - align)
14744 {
14745 tmp =
14746 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14747 GEN_INT (size_needed - 1), count_exp, 1,
14748 OPTAB_DIRECT);
14749 size_needed = desired_align - align + 1;
14750 if (tmp != count_exp)
14751 emit_move_insn (count_exp, tmp);
14752 }
14753 emit_label (label);
14754 LABEL_NUSES (label) = 1;
14755 }
14756 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14757 {
14758 if (force_loopy_epilogue)
14759 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14760 size_needed);
14761 else
14762 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14763 size_needed);
14764 }
14765 if (jump_around_label)
14766 emit_label (jump_around_label);
14767 return 1;
14768 }
14769
14770 /* Expand strlen. */
14771 int
14772 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14773 {
14774 rtx addr, scratch1, scratch2, scratch3, scratch4;
14775
14776 /* The generic case of strlen expander is long. Avoid it's
14777 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14778
14779 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14780 && !TARGET_INLINE_ALL_STRINGOPS
14781 && !optimize_size
14782 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14783 return 0;
14784
14785 addr = force_reg (Pmode, XEXP (src, 0));
14786 scratch1 = gen_reg_rtx (Pmode);
14787
14788 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14789 && !optimize_size)
14790 {
14791 /* Well it seems that some optimizer does not combine a call like
14792 foo(strlen(bar), strlen(bar));
14793 when the move and the subtraction is done here. It does calculate
14794 the length just once when these instructions are done inside of
14795 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14796 often used and I use one fewer register for the lifetime of
14797 output_strlen_unroll() this is better. */
14798
14799 emit_move_insn (out, addr);
14800
14801 ix86_expand_strlensi_unroll_1 (out, src, align);
14802
14803 /* strlensi_unroll_1 returns the address of the zero at the end of
14804 the string, like memchr(), so compute the length by subtracting
14805 the start address. */
14806 if (TARGET_64BIT)
14807 emit_insn (gen_subdi3 (out, out, addr));
14808 else
14809 emit_insn (gen_subsi3 (out, out, addr));
14810 }
14811 else
14812 {
14813 rtx unspec;
14814 scratch2 = gen_reg_rtx (Pmode);
14815 scratch3 = gen_reg_rtx (Pmode);
14816 scratch4 = force_reg (Pmode, constm1_rtx);
14817
14818 emit_move_insn (scratch3, addr);
14819 eoschar = force_reg (QImode, eoschar);
14820
14821 src = replace_equiv_address_nv (src, scratch3);
14822
14823 /* If .md starts supporting :P, this can be done in .md. */
14824 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14825 scratch4), UNSPEC_SCAS);
14826 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14827 if (TARGET_64BIT)
14828 {
14829 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14830 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14831 }
14832 else
14833 {
14834 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14835 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14836 }
14837 }
14838 return 1;
14839 }
14840
14841 /* Expand the appropriate insns for doing strlen if not just doing
14842 repnz; scasb
14843
14844 out = result, initialized with the start address
14845 align_rtx = alignment of the address.
14846 scratch = scratch register, initialized with the startaddress when
14847 not aligned, otherwise undefined
14848
14849 This is just the body. It needs the initializations mentioned above and
14850 some address computing at the end. These things are done in i386.md. */
14851
14852 static void
14853 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14854 {
14855 int align;
14856 rtx tmp;
14857 rtx align_2_label = NULL_RTX;
14858 rtx align_3_label = NULL_RTX;
14859 rtx align_4_label = gen_label_rtx ();
14860 rtx end_0_label = gen_label_rtx ();
14861 rtx mem;
14862 rtx tmpreg = gen_reg_rtx (SImode);
14863 rtx scratch = gen_reg_rtx (SImode);
14864 rtx cmp;
14865
14866 align = 0;
14867 if (CONST_INT_P (align_rtx))
14868 align = INTVAL (align_rtx);
14869
14870 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14871
14872 /* Is there a known alignment and is it less than 4? */
14873 if (align < 4)
14874 {
14875 rtx scratch1 = gen_reg_rtx (Pmode);
14876 emit_move_insn (scratch1, out);
14877 /* Is there a known alignment and is it not 2? */
14878 if (align != 2)
14879 {
14880 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14881 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14882
14883 /* Leave just the 3 lower bits. */
14884 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14885 NULL_RTX, 0, OPTAB_WIDEN);
14886
14887 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14888 Pmode, 1, align_4_label);
14889 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14890 Pmode, 1, align_2_label);
14891 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14892 Pmode, 1, align_3_label);
14893 }
14894 else
14895 {
14896 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14897 check if is aligned to 4 - byte. */
14898
14899 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14900 NULL_RTX, 0, OPTAB_WIDEN);
14901
14902 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14903 Pmode, 1, align_4_label);
14904 }
14905
14906 mem = change_address (src, QImode, out);
14907
14908 /* Now compare the bytes. */
14909
14910 /* Compare the first n unaligned byte on a byte per byte basis. */
14911 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14912 QImode, 1, end_0_label);
14913
14914 /* Increment the address. */
14915 if (TARGET_64BIT)
14916 emit_insn (gen_adddi3 (out, out, const1_rtx));
14917 else
14918 emit_insn (gen_addsi3 (out, out, const1_rtx));
14919
14920 /* Not needed with an alignment of 2 */
14921 if (align != 2)
14922 {
14923 emit_label (align_2_label);
14924
14925 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14926 end_0_label);
14927
14928 if (TARGET_64BIT)
14929 emit_insn (gen_adddi3 (out, out, const1_rtx));
14930 else
14931 emit_insn (gen_addsi3 (out, out, const1_rtx));
14932
14933 emit_label (align_3_label);
14934 }
14935
14936 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14937 end_0_label);
14938
14939 if (TARGET_64BIT)
14940 emit_insn (gen_adddi3 (out, out, const1_rtx));
14941 else
14942 emit_insn (gen_addsi3 (out, out, const1_rtx));
14943 }
14944
14945 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14946 align this loop. It gives only huge programs, but does not help to
14947 speed up. */
14948 emit_label (align_4_label);
14949
14950 mem = change_address (src, SImode, out);
14951 emit_move_insn (scratch, mem);
14952 if (TARGET_64BIT)
14953 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14954 else
14955 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14956
14957 /* This formula yields a nonzero result iff one of the bytes is zero.
14958 This saves three branches inside loop and many cycles. */
14959
14960 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14961 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14962 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14963 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14964 gen_int_mode (0x80808080, SImode)));
14965 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14966 align_4_label);
14967
14968 if (TARGET_CMOVE)
14969 {
14970 rtx reg = gen_reg_rtx (SImode);
14971 rtx reg2 = gen_reg_rtx (Pmode);
14972 emit_move_insn (reg, tmpreg);
14973 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14974
14975 /* If zero is not in the first two bytes, move two bytes forward. */
14976 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14977 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14978 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14979 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14980 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14981 reg,
14982 tmpreg)));
14983 /* Emit lea manually to avoid clobbering of flags. */
14984 emit_insn (gen_rtx_SET (SImode, reg2,
14985 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14986
14987 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14988 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14989 emit_insn (gen_rtx_SET (VOIDmode, out,
14990 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14991 reg2,
14992 out)));
14993
14994 }
14995 else
14996 {
14997 rtx end_2_label = gen_label_rtx ();
14998 /* Is zero in the first two bytes? */
14999
15000 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15001 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15002 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15003 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15004 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15005 pc_rtx);
15006 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15007 JUMP_LABEL (tmp) = end_2_label;
15008
15009 /* Not in the first two. Move two bytes forward. */
15010 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15011 if (TARGET_64BIT)
15012 emit_insn (gen_adddi3 (out, out, const2_rtx));
15013 else
15014 emit_insn (gen_addsi3 (out, out, const2_rtx));
15015
15016 emit_label (end_2_label);
15017
15018 }
15019
15020 /* Avoid branch in fixing the byte. */
15021 tmpreg = gen_lowpart (QImode, tmpreg);
15022 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15023 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15024 if (TARGET_64BIT)
15025 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15026 else
15027 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15028
15029 emit_label (end_0_label);
15030 }
15031
15032 /* For given symbol (function) construct code to compute address of it's PLT
15033 entry in large x86-64 PIC model. */
15034 rtx
15035 construct_plt_address (rtx symbol)
15036 {
15037 rtx tmp = gen_reg_rtx (Pmode);
15038 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15039
15040 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15041 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15042
15043 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15044 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15045 return tmp;
15046 }
15047
15048 void
15049 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15050 rtx callarg2 ATTRIBUTE_UNUSED,
15051 rtx pop, int sibcall)
15052 {
15053 rtx use = NULL, call;
15054
15055 if (pop == const0_rtx)
15056 pop = NULL;
15057 gcc_assert (!TARGET_64BIT || !pop);
15058
15059 if (TARGET_MACHO && !TARGET_64BIT)
15060 {
15061 #if TARGET_MACHO
15062 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15063 fnaddr = machopic_indirect_call_target (fnaddr);
15064 #endif
15065 }
15066 else
15067 {
15068 /* Static functions and indirect calls don't need the pic register. */
15069 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15070 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15071 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15072 use_reg (&use, pic_offset_table_rtx);
15073 }
15074
15075 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15076 {
15077 rtx al = gen_rtx_REG (QImode, 0);
15078 emit_move_insn (al, callarg2);
15079 use_reg (&use, al);
15080 }
15081
15082 if (ix86_cmodel == CM_LARGE_PIC
15083 && GET_CODE (fnaddr) == MEM
15084 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15085 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15086 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15087 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15088 {
15089 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15090 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15091 }
15092 if (sibcall && TARGET_64BIT
15093 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15094 {
15095 rtx addr;
15096 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15097 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15098 emit_move_insn (fnaddr, addr);
15099 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15100 }
15101
15102 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15103 if (retval)
15104 call = gen_rtx_SET (VOIDmode, retval, call);
15105 if (pop)
15106 {
15107 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15108 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15109 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15110 }
15111
15112 call = emit_call_insn (call);
15113 if (use)
15114 CALL_INSN_FUNCTION_USAGE (call) = use;
15115 }
15116
15117 \f
15118 /* Clear stack slot assignments remembered from previous functions.
15119 This is called from INIT_EXPANDERS once before RTL is emitted for each
15120 function. */
15121
15122 static struct machine_function *
15123 ix86_init_machine_status (void)
15124 {
15125 struct machine_function *f;
15126
15127 f = ggc_alloc_cleared (sizeof (struct machine_function));
15128 f->use_fast_prologue_epilogue_nregs = -1;
15129 f->tls_descriptor_call_expanded_p = 0;
15130
15131 return f;
15132 }
15133
15134 /* Return a MEM corresponding to a stack slot with mode MODE.
15135 Allocate a new slot if necessary.
15136
15137 The RTL for a function can have several slots available: N is
15138 which slot to use. */
15139
15140 rtx
15141 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15142 {
15143 struct stack_local_entry *s;
15144
15145 gcc_assert (n < MAX_386_STACK_LOCALS);
15146
15147 for (s = ix86_stack_locals; s; s = s->next)
15148 if (s->mode == mode && s->n == n)
15149 return copy_rtx (s->rtl);
15150
15151 s = (struct stack_local_entry *)
15152 ggc_alloc (sizeof (struct stack_local_entry));
15153 s->n = n;
15154 s->mode = mode;
15155 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15156
15157 s->next = ix86_stack_locals;
15158 ix86_stack_locals = s;
15159 return s->rtl;
15160 }
15161
15162 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15163
15164 static GTY(()) rtx ix86_tls_symbol;
15165 rtx
15166 ix86_tls_get_addr (void)
15167 {
15168
15169 if (!ix86_tls_symbol)
15170 {
15171 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15172 (TARGET_ANY_GNU_TLS
15173 && !TARGET_64BIT)
15174 ? "___tls_get_addr"
15175 : "__tls_get_addr");
15176 }
15177
15178 return ix86_tls_symbol;
15179 }
15180
15181 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15182
15183 static GTY(()) rtx ix86_tls_module_base_symbol;
15184 rtx
15185 ix86_tls_module_base (void)
15186 {
15187
15188 if (!ix86_tls_module_base_symbol)
15189 {
15190 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15191 "_TLS_MODULE_BASE_");
15192 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15193 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15194 }
15195
15196 return ix86_tls_module_base_symbol;
15197 }
15198 \f
15199 /* Calculate the length of the memory address in the instruction
15200 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15201
15202 int
15203 memory_address_length (rtx addr)
15204 {
15205 struct ix86_address parts;
15206 rtx base, index, disp;
15207 int len;
15208 int ok;
15209
15210 if (GET_CODE (addr) == PRE_DEC
15211 || GET_CODE (addr) == POST_INC
15212 || GET_CODE (addr) == PRE_MODIFY
15213 || GET_CODE (addr) == POST_MODIFY)
15214 return 0;
15215
15216 ok = ix86_decompose_address (addr, &parts);
15217 gcc_assert (ok);
15218
15219 if (parts.base && GET_CODE (parts.base) == SUBREG)
15220 parts.base = SUBREG_REG (parts.base);
15221 if (parts.index && GET_CODE (parts.index) == SUBREG)
15222 parts.index = SUBREG_REG (parts.index);
15223
15224 base = parts.base;
15225 index = parts.index;
15226 disp = parts.disp;
15227 len = 0;
15228
15229 /* Rule of thumb:
15230 - esp as the base always wants an index,
15231 - ebp as the base always wants a displacement. */
15232
15233 /* Register Indirect. */
15234 if (base && !index && !disp)
15235 {
15236 /* esp (for its index) and ebp (for its displacement) need
15237 the two-byte modrm form. */
15238 if (addr == stack_pointer_rtx
15239 || addr == arg_pointer_rtx
15240 || addr == frame_pointer_rtx
15241 || addr == hard_frame_pointer_rtx)
15242 len = 1;
15243 }
15244
15245 /* Direct Addressing. */
15246 else if (disp && !base && !index)
15247 len = 4;
15248
15249 else
15250 {
15251 /* Find the length of the displacement constant. */
15252 if (disp)
15253 {
15254 if (base && satisfies_constraint_K (disp))
15255 len = 1;
15256 else
15257 len = 4;
15258 }
15259 /* ebp always wants a displacement. */
15260 else if (base == hard_frame_pointer_rtx)
15261 len = 1;
15262
15263 /* An index requires the two-byte modrm form.... */
15264 if (index
15265 /* ...like esp, which always wants an index. */
15266 || base == stack_pointer_rtx
15267 || base == arg_pointer_rtx
15268 || base == frame_pointer_rtx)
15269 len += 1;
15270 }
15271
15272 return len;
15273 }
15274
15275 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15276 is set, expect that insn have 8bit immediate alternative. */
15277 int
15278 ix86_attr_length_immediate_default (rtx insn, int shortform)
15279 {
15280 int len = 0;
15281 int i;
15282 extract_insn_cached (insn);
15283 for (i = recog_data.n_operands - 1; i >= 0; --i)
15284 if (CONSTANT_P (recog_data.operand[i]))
15285 {
15286 gcc_assert (!len);
15287 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15288 len = 1;
15289 else
15290 {
15291 switch (get_attr_mode (insn))
15292 {
15293 case MODE_QI:
15294 len+=1;
15295 break;
15296 case MODE_HI:
15297 len+=2;
15298 break;
15299 case MODE_SI:
15300 len+=4;
15301 break;
15302 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15303 case MODE_DI:
15304 len+=4;
15305 break;
15306 default:
15307 fatal_insn ("unknown insn mode", insn);
15308 }
15309 }
15310 }
15311 return len;
15312 }
15313 /* Compute default value for "length_address" attribute. */
15314 int
15315 ix86_attr_length_address_default (rtx insn)
15316 {
15317 int i;
15318
15319 if (get_attr_type (insn) == TYPE_LEA)
15320 {
15321 rtx set = PATTERN (insn);
15322
15323 if (GET_CODE (set) == PARALLEL)
15324 set = XVECEXP (set, 0, 0);
15325
15326 gcc_assert (GET_CODE (set) == SET);
15327
15328 return memory_address_length (SET_SRC (set));
15329 }
15330
15331 extract_insn_cached (insn);
15332 for (i = recog_data.n_operands - 1; i >= 0; --i)
15333 if (MEM_P (recog_data.operand[i]))
15334 {
15335 return memory_address_length (XEXP (recog_data.operand[i], 0));
15336 break;
15337 }
15338 return 0;
15339 }
15340 \f
15341 /* Return the maximum number of instructions a cpu can issue. */
15342
15343 static int
15344 ix86_issue_rate (void)
15345 {
15346 switch (ix86_tune)
15347 {
15348 case PROCESSOR_PENTIUM:
15349 case PROCESSOR_K6:
15350 return 2;
15351
15352 case PROCESSOR_PENTIUMPRO:
15353 case PROCESSOR_PENTIUM4:
15354 case PROCESSOR_ATHLON:
15355 case PROCESSOR_K8:
15356 case PROCESSOR_AMDFAM10:
15357 case PROCESSOR_NOCONA:
15358 case PROCESSOR_GENERIC32:
15359 case PROCESSOR_GENERIC64:
15360 return 3;
15361
15362 case PROCESSOR_CORE2:
15363 return 4;
15364
15365 default:
15366 return 1;
15367 }
15368 }
15369
15370 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15371 by DEP_INSN and nothing set by DEP_INSN. */
15372
15373 static int
15374 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15375 {
15376 rtx set, set2;
15377
15378 /* Simplify the test for uninteresting insns. */
15379 if (insn_type != TYPE_SETCC
15380 && insn_type != TYPE_ICMOV
15381 && insn_type != TYPE_FCMOV
15382 && insn_type != TYPE_IBR)
15383 return 0;
15384
15385 if ((set = single_set (dep_insn)) != 0)
15386 {
15387 set = SET_DEST (set);
15388 set2 = NULL_RTX;
15389 }
15390 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15391 && XVECLEN (PATTERN (dep_insn), 0) == 2
15392 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15393 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15394 {
15395 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15396 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15397 }
15398 else
15399 return 0;
15400
15401 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15402 return 0;
15403
15404 /* This test is true if the dependent insn reads the flags but
15405 not any other potentially set register. */
15406 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15407 return 0;
15408
15409 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15410 return 0;
15411
15412 return 1;
15413 }
15414
15415 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15416 address with operands set by DEP_INSN. */
15417
15418 static int
15419 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15420 {
15421 rtx addr;
15422
15423 if (insn_type == TYPE_LEA
15424 && TARGET_PENTIUM)
15425 {
15426 addr = PATTERN (insn);
15427
15428 if (GET_CODE (addr) == PARALLEL)
15429 addr = XVECEXP (addr, 0, 0);
15430
15431 gcc_assert (GET_CODE (addr) == SET);
15432
15433 addr = SET_SRC (addr);
15434 }
15435 else
15436 {
15437 int i;
15438 extract_insn_cached (insn);
15439 for (i = recog_data.n_operands - 1; i >= 0; --i)
15440 if (MEM_P (recog_data.operand[i]))
15441 {
15442 addr = XEXP (recog_data.operand[i], 0);
15443 goto found;
15444 }
15445 return 0;
15446 found:;
15447 }
15448
15449 return modified_in_p (addr, dep_insn);
15450 }
15451
15452 static int
15453 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15454 {
15455 enum attr_type insn_type, dep_insn_type;
15456 enum attr_memory memory;
15457 rtx set, set2;
15458 int dep_insn_code_number;
15459
15460 /* Anti and output dependencies have zero cost on all CPUs. */
15461 if (REG_NOTE_KIND (link) != 0)
15462 return 0;
15463
15464 dep_insn_code_number = recog_memoized (dep_insn);
15465
15466 /* If we can't recognize the insns, we can't really do anything. */
15467 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15468 return cost;
15469
15470 insn_type = get_attr_type (insn);
15471 dep_insn_type = get_attr_type (dep_insn);
15472
15473 switch (ix86_tune)
15474 {
15475 case PROCESSOR_PENTIUM:
15476 /* Address Generation Interlock adds a cycle of latency. */
15477 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15478 cost += 1;
15479
15480 /* ??? Compares pair with jump/setcc. */
15481 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15482 cost = 0;
15483
15484 /* Floating point stores require value to be ready one cycle earlier. */
15485 if (insn_type == TYPE_FMOV
15486 && get_attr_memory (insn) == MEMORY_STORE
15487 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15488 cost += 1;
15489 break;
15490
15491 case PROCESSOR_PENTIUMPRO:
15492 memory = get_attr_memory (insn);
15493
15494 /* INT->FP conversion is expensive. */
15495 if (get_attr_fp_int_src (dep_insn))
15496 cost += 5;
15497
15498 /* There is one cycle extra latency between an FP op and a store. */
15499 if (insn_type == TYPE_FMOV
15500 && (set = single_set (dep_insn)) != NULL_RTX
15501 && (set2 = single_set (insn)) != NULL_RTX
15502 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15503 && MEM_P (SET_DEST (set2)))
15504 cost += 1;
15505
15506 /* Show ability of reorder buffer to hide latency of load by executing
15507 in parallel with previous instruction in case
15508 previous instruction is not needed to compute the address. */
15509 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15510 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15511 {
15512 /* Claim moves to take one cycle, as core can issue one load
15513 at time and the next load can start cycle later. */
15514 if (dep_insn_type == TYPE_IMOV
15515 || dep_insn_type == TYPE_FMOV)
15516 cost = 1;
15517 else if (cost > 1)
15518 cost--;
15519 }
15520 break;
15521
15522 case PROCESSOR_K6:
15523 memory = get_attr_memory (insn);
15524
15525 /* The esp dependency is resolved before the instruction is really
15526 finished. */
15527 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15528 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15529 return 1;
15530
15531 /* INT->FP conversion is expensive. */
15532 if (get_attr_fp_int_src (dep_insn))
15533 cost += 5;
15534
15535 /* Show ability of reorder buffer to hide latency of load by executing
15536 in parallel with previous instruction in case
15537 previous instruction is not needed to compute the address. */
15538 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15539 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15540 {
15541 /* Claim moves to take one cycle, as core can issue one load
15542 at time and the next load can start cycle later. */
15543 if (dep_insn_type == TYPE_IMOV
15544 || dep_insn_type == TYPE_FMOV)
15545 cost = 1;
15546 else if (cost > 2)
15547 cost -= 2;
15548 else
15549 cost = 1;
15550 }
15551 break;
15552
15553 case PROCESSOR_ATHLON:
15554 case PROCESSOR_K8:
15555 case PROCESSOR_AMDFAM10:
15556 case PROCESSOR_GENERIC32:
15557 case PROCESSOR_GENERIC64:
15558 memory = get_attr_memory (insn);
15559
15560 /* Show ability of reorder buffer to hide latency of load by executing
15561 in parallel with previous instruction in case
15562 previous instruction is not needed to compute the address. */
15563 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15564 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15565 {
15566 enum attr_unit unit = get_attr_unit (insn);
15567 int loadcost = 3;
15568
15569 /* Because of the difference between the length of integer and
15570 floating unit pipeline preparation stages, the memory operands
15571 for floating point are cheaper.
15572
15573 ??? For Athlon it the difference is most probably 2. */
15574 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15575 loadcost = 3;
15576 else
15577 loadcost = TARGET_ATHLON ? 2 : 0;
15578
15579 if (cost >= loadcost)
15580 cost -= loadcost;
15581 else
15582 cost = 0;
15583 }
15584
15585 default:
15586 break;
15587 }
15588
15589 return cost;
15590 }
15591
15592 /* How many alternative schedules to try. This should be as wide as the
15593 scheduling freedom in the DFA, but no wider. Making this value too
15594 large results extra work for the scheduler. */
15595
15596 static int
15597 ia32_multipass_dfa_lookahead (void)
15598 {
15599 if (ix86_tune == PROCESSOR_PENTIUM)
15600 return 2;
15601
15602 if (ix86_tune == PROCESSOR_PENTIUMPRO
15603 || ix86_tune == PROCESSOR_K6)
15604 return 1;
15605
15606 else
15607 return 0;
15608 }
15609
15610 \f
15611 /* Compute the alignment given to a constant that is being placed in memory.
15612 EXP is the constant and ALIGN is the alignment that the object would
15613 ordinarily have.
15614 The value of this function is used instead of that alignment to align
15615 the object. */
15616
15617 int
15618 ix86_constant_alignment (tree exp, int align)
15619 {
15620 if (TREE_CODE (exp) == REAL_CST)
15621 {
15622 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15623 return 64;
15624 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15625 return 128;
15626 }
15627 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15628 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15629 return BITS_PER_WORD;
15630
15631 return align;
15632 }
15633
15634 /* Compute the alignment for a static variable.
15635 TYPE is the data type, and ALIGN is the alignment that
15636 the object would ordinarily have. The value of this function is used
15637 instead of that alignment to align the object. */
15638
15639 int
15640 ix86_data_alignment (tree type, int align)
15641 {
15642 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15643
15644 if (AGGREGATE_TYPE_P (type)
15645 && TYPE_SIZE (type)
15646 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15647 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15648 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15649 && align < max_align)
15650 align = max_align;
15651
15652 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15653 to 16byte boundary. */
15654 if (TARGET_64BIT)
15655 {
15656 if (AGGREGATE_TYPE_P (type)
15657 && TYPE_SIZE (type)
15658 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15659 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15660 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15661 return 128;
15662 }
15663
15664 if (TREE_CODE (type) == ARRAY_TYPE)
15665 {
15666 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15667 return 64;
15668 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15669 return 128;
15670 }
15671 else if (TREE_CODE (type) == COMPLEX_TYPE)
15672 {
15673
15674 if (TYPE_MODE (type) == DCmode && align < 64)
15675 return 64;
15676 if (TYPE_MODE (type) == XCmode && align < 128)
15677 return 128;
15678 }
15679 else if ((TREE_CODE (type) == RECORD_TYPE
15680 || TREE_CODE (type) == UNION_TYPE
15681 || TREE_CODE (type) == QUAL_UNION_TYPE)
15682 && TYPE_FIELDS (type))
15683 {
15684 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15685 return 64;
15686 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15687 return 128;
15688 }
15689 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15690 || TREE_CODE (type) == INTEGER_TYPE)
15691 {
15692 if (TYPE_MODE (type) == DFmode && align < 64)
15693 return 64;
15694 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15695 return 128;
15696 }
15697
15698 return align;
15699 }
15700
15701 /* Compute the alignment for a local variable.
15702 TYPE is the data type, and ALIGN is the alignment that
15703 the object would ordinarily have. The value of this macro is used
15704 instead of that alignment to align the object. */
15705
15706 int
15707 ix86_local_alignment (tree type, int align)
15708 {
15709 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15710 to 16byte boundary. */
15711 if (TARGET_64BIT)
15712 {
15713 if (AGGREGATE_TYPE_P (type)
15714 && TYPE_SIZE (type)
15715 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15716 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15717 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15718 return 128;
15719 }
15720 if (TREE_CODE (type) == ARRAY_TYPE)
15721 {
15722 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15723 return 64;
15724 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15725 return 128;
15726 }
15727 else if (TREE_CODE (type) == COMPLEX_TYPE)
15728 {
15729 if (TYPE_MODE (type) == DCmode && align < 64)
15730 return 64;
15731 if (TYPE_MODE (type) == XCmode && align < 128)
15732 return 128;
15733 }
15734 else if ((TREE_CODE (type) == RECORD_TYPE
15735 || TREE_CODE (type) == UNION_TYPE
15736 || TREE_CODE (type) == QUAL_UNION_TYPE)
15737 && TYPE_FIELDS (type))
15738 {
15739 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15740 return 64;
15741 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15742 return 128;
15743 }
15744 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15745 || TREE_CODE (type) == INTEGER_TYPE)
15746 {
15747
15748 if (TYPE_MODE (type) == DFmode && align < 64)
15749 return 64;
15750 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15751 return 128;
15752 }
15753 return align;
15754 }
15755 \f
15756 /* Emit RTL insns to initialize the variable parts of a trampoline.
15757 FNADDR is an RTX for the address of the function's pure code.
15758 CXT is an RTX for the static chain value for the function. */
15759 void
15760 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15761 {
15762 if (!TARGET_64BIT)
15763 {
15764 /* Compute offset from the end of the jmp to the target function. */
15765 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15766 plus_constant (tramp, 10),
15767 NULL_RTX, 1, OPTAB_DIRECT);
15768 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15769 gen_int_mode (0xb9, QImode));
15770 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15771 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15772 gen_int_mode (0xe9, QImode));
15773 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15774 }
15775 else
15776 {
15777 int offset = 0;
15778 /* Try to load address using shorter movl instead of movabs.
15779 We may want to support movq for kernel mode, but kernel does not use
15780 trampolines at the moment. */
15781 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15782 {
15783 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15784 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15785 gen_int_mode (0xbb41, HImode));
15786 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15787 gen_lowpart (SImode, fnaddr));
15788 offset += 6;
15789 }
15790 else
15791 {
15792 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15793 gen_int_mode (0xbb49, HImode));
15794 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15795 fnaddr);
15796 offset += 10;
15797 }
15798 /* Load static chain using movabs to r10. */
15799 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15800 gen_int_mode (0xba49, HImode));
15801 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15802 cxt);
15803 offset += 10;
15804 /* Jump to the r11 */
15805 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15806 gen_int_mode (0xff49, HImode));
15807 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15808 gen_int_mode (0xe3, QImode));
15809 offset += 3;
15810 gcc_assert (offset <= TRAMPOLINE_SIZE);
15811 }
15812
15813 #ifdef ENABLE_EXECUTE_STACK
15814 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15815 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15816 #endif
15817 }
15818 \f
15819 /* Codes for all the SSE/MMX builtins. */
15820 enum ix86_builtins
15821 {
15822 IX86_BUILTIN_ADDPS,
15823 IX86_BUILTIN_ADDSS,
15824 IX86_BUILTIN_DIVPS,
15825 IX86_BUILTIN_DIVSS,
15826 IX86_BUILTIN_MULPS,
15827 IX86_BUILTIN_MULSS,
15828 IX86_BUILTIN_SUBPS,
15829 IX86_BUILTIN_SUBSS,
15830
15831 IX86_BUILTIN_CMPEQPS,
15832 IX86_BUILTIN_CMPLTPS,
15833 IX86_BUILTIN_CMPLEPS,
15834 IX86_BUILTIN_CMPGTPS,
15835 IX86_BUILTIN_CMPGEPS,
15836 IX86_BUILTIN_CMPNEQPS,
15837 IX86_BUILTIN_CMPNLTPS,
15838 IX86_BUILTIN_CMPNLEPS,
15839 IX86_BUILTIN_CMPNGTPS,
15840 IX86_BUILTIN_CMPNGEPS,
15841 IX86_BUILTIN_CMPORDPS,
15842 IX86_BUILTIN_CMPUNORDPS,
15843 IX86_BUILTIN_CMPEQSS,
15844 IX86_BUILTIN_CMPLTSS,
15845 IX86_BUILTIN_CMPLESS,
15846 IX86_BUILTIN_CMPNEQSS,
15847 IX86_BUILTIN_CMPNLTSS,
15848 IX86_BUILTIN_CMPNLESS,
15849 IX86_BUILTIN_CMPNGTSS,
15850 IX86_BUILTIN_CMPNGESS,
15851 IX86_BUILTIN_CMPORDSS,
15852 IX86_BUILTIN_CMPUNORDSS,
15853
15854 IX86_BUILTIN_COMIEQSS,
15855 IX86_BUILTIN_COMILTSS,
15856 IX86_BUILTIN_COMILESS,
15857 IX86_BUILTIN_COMIGTSS,
15858 IX86_BUILTIN_COMIGESS,
15859 IX86_BUILTIN_COMINEQSS,
15860 IX86_BUILTIN_UCOMIEQSS,
15861 IX86_BUILTIN_UCOMILTSS,
15862 IX86_BUILTIN_UCOMILESS,
15863 IX86_BUILTIN_UCOMIGTSS,
15864 IX86_BUILTIN_UCOMIGESS,
15865 IX86_BUILTIN_UCOMINEQSS,
15866
15867 IX86_BUILTIN_CVTPI2PS,
15868 IX86_BUILTIN_CVTPS2PI,
15869 IX86_BUILTIN_CVTSI2SS,
15870 IX86_BUILTIN_CVTSI642SS,
15871 IX86_BUILTIN_CVTSS2SI,
15872 IX86_BUILTIN_CVTSS2SI64,
15873 IX86_BUILTIN_CVTTPS2PI,
15874 IX86_BUILTIN_CVTTSS2SI,
15875 IX86_BUILTIN_CVTTSS2SI64,
15876
15877 IX86_BUILTIN_MAXPS,
15878 IX86_BUILTIN_MAXSS,
15879 IX86_BUILTIN_MINPS,
15880 IX86_BUILTIN_MINSS,
15881
15882 IX86_BUILTIN_LOADUPS,
15883 IX86_BUILTIN_STOREUPS,
15884 IX86_BUILTIN_MOVSS,
15885
15886 IX86_BUILTIN_MOVHLPS,
15887 IX86_BUILTIN_MOVLHPS,
15888 IX86_BUILTIN_LOADHPS,
15889 IX86_BUILTIN_LOADLPS,
15890 IX86_BUILTIN_STOREHPS,
15891 IX86_BUILTIN_STORELPS,
15892
15893 IX86_BUILTIN_MASKMOVQ,
15894 IX86_BUILTIN_MOVMSKPS,
15895 IX86_BUILTIN_PMOVMSKB,
15896
15897 IX86_BUILTIN_MOVNTPS,
15898 IX86_BUILTIN_MOVNTQ,
15899
15900 IX86_BUILTIN_LOADDQU,
15901 IX86_BUILTIN_STOREDQU,
15902
15903 IX86_BUILTIN_PACKSSWB,
15904 IX86_BUILTIN_PACKSSDW,
15905 IX86_BUILTIN_PACKUSWB,
15906
15907 IX86_BUILTIN_PADDB,
15908 IX86_BUILTIN_PADDW,
15909 IX86_BUILTIN_PADDD,
15910 IX86_BUILTIN_PADDQ,
15911 IX86_BUILTIN_PADDSB,
15912 IX86_BUILTIN_PADDSW,
15913 IX86_BUILTIN_PADDUSB,
15914 IX86_BUILTIN_PADDUSW,
15915 IX86_BUILTIN_PSUBB,
15916 IX86_BUILTIN_PSUBW,
15917 IX86_BUILTIN_PSUBD,
15918 IX86_BUILTIN_PSUBQ,
15919 IX86_BUILTIN_PSUBSB,
15920 IX86_BUILTIN_PSUBSW,
15921 IX86_BUILTIN_PSUBUSB,
15922 IX86_BUILTIN_PSUBUSW,
15923
15924 IX86_BUILTIN_PAND,
15925 IX86_BUILTIN_PANDN,
15926 IX86_BUILTIN_POR,
15927 IX86_BUILTIN_PXOR,
15928
15929 IX86_BUILTIN_PAVGB,
15930 IX86_BUILTIN_PAVGW,
15931
15932 IX86_BUILTIN_PCMPEQB,
15933 IX86_BUILTIN_PCMPEQW,
15934 IX86_BUILTIN_PCMPEQD,
15935 IX86_BUILTIN_PCMPGTB,
15936 IX86_BUILTIN_PCMPGTW,
15937 IX86_BUILTIN_PCMPGTD,
15938
15939 IX86_BUILTIN_PMADDWD,
15940
15941 IX86_BUILTIN_PMAXSW,
15942 IX86_BUILTIN_PMAXUB,
15943 IX86_BUILTIN_PMINSW,
15944 IX86_BUILTIN_PMINUB,
15945
15946 IX86_BUILTIN_PMULHUW,
15947 IX86_BUILTIN_PMULHW,
15948 IX86_BUILTIN_PMULLW,
15949
15950 IX86_BUILTIN_PSADBW,
15951 IX86_BUILTIN_PSHUFW,
15952
15953 IX86_BUILTIN_PSLLW,
15954 IX86_BUILTIN_PSLLD,
15955 IX86_BUILTIN_PSLLQ,
15956 IX86_BUILTIN_PSRAW,
15957 IX86_BUILTIN_PSRAD,
15958 IX86_BUILTIN_PSRLW,
15959 IX86_BUILTIN_PSRLD,
15960 IX86_BUILTIN_PSRLQ,
15961 IX86_BUILTIN_PSLLWI,
15962 IX86_BUILTIN_PSLLDI,
15963 IX86_BUILTIN_PSLLQI,
15964 IX86_BUILTIN_PSRAWI,
15965 IX86_BUILTIN_PSRADI,
15966 IX86_BUILTIN_PSRLWI,
15967 IX86_BUILTIN_PSRLDI,
15968 IX86_BUILTIN_PSRLQI,
15969
15970 IX86_BUILTIN_PUNPCKHBW,
15971 IX86_BUILTIN_PUNPCKHWD,
15972 IX86_BUILTIN_PUNPCKHDQ,
15973 IX86_BUILTIN_PUNPCKLBW,
15974 IX86_BUILTIN_PUNPCKLWD,
15975 IX86_BUILTIN_PUNPCKLDQ,
15976
15977 IX86_BUILTIN_SHUFPS,
15978
15979 IX86_BUILTIN_RCPPS,
15980 IX86_BUILTIN_RCPSS,
15981 IX86_BUILTIN_RSQRTPS,
15982 IX86_BUILTIN_RSQRTSS,
15983 IX86_BUILTIN_SQRTPS,
15984 IX86_BUILTIN_SQRTSS,
15985
15986 IX86_BUILTIN_UNPCKHPS,
15987 IX86_BUILTIN_UNPCKLPS,
15988
15989 IX86_BUILTIN_ANDPS,
15990 IX86_BUILTIN_ANDNPS,
15991 IX86_BUILTIN_ORPS,
15992 IX86_BUILTIN_XORPS,
15993
15994 IX86_BUILTIN_EMMS,
15995 IX86_BUILTIN_LDMXCSR,
15996 IX86_BUILTIN_STMXCSR,
15997 IX86_BUILTIN_SFENCE,
15998
15999 /* 3DNow! Original */
16000 IX86_BUILTIN_FEMMS,
16001 IX86_BUILTIN_PAVGUSB,
16002 IX86_BUILTIN_PF2ID,
16003 IX86_BUILTIN_PFACC,
16004 IX86_BUILTIN_PFADD,
16005 IX86_BUILTIN_PFCMPEQ,
16006 IX86_BUILTIN_PFCMPGE,
16007 IX86_BUILTIN_PFCMPGT,
16008 IX86_BUILTIN_PFMAX,
16009 IX86_BUILTIN_PFMIN,
16010 IX86_BUILTIN_PFMUL,
16011 IX86_BUILTIN_PFRCP,
16012 IX86_BUILTIN_PFRCPIT1,
16013 IX86_BUILTIN_PFRCPIT2,
16014 IX86_BUILTIN_PFRSQIT1,
16015 IX86_BUILTIN_PFRSQRT,
16016 IX86_BUILTIN_PFSUB,
16017 IX86_BUILTIN_PFSUBR,
16018 IX86_BUILTIN_PI2FD,
16019 IX86_BUILTIN_PMULHRW,
16020
16021 /* 3DNow! Athlon Extensions */
16022 IX86_BUILTIN_PF2IW,
16023 IX86_BUILTIN_PFNACC,
16024 IX86_BUILTIN_PFPNACC,
16025 IX86_BUILTIN_PI2FW,
16026 IX86_BUILTIN_PSWAPDSI,
16027 IX86_BUILTIN_PSWAPDSF,
16028
16029 /* SSE2 */
16030 IX86_BUILTIN_ADDPD,
16031 IX86_BUILTIN_ADDSD,
16032 IX86_BUILTIN_DIVPD,
16033 IX86_BUILTIN_DIVSD,
16034 IX86_BUILTIN_MULPD,
16035 IX86_BUILTIN_MULSD,
16036 IX86_BUILTIN_SUBPD,
16037 IX86_BUILTIN_SUBSD,
16038
16039 IX86_BUILTIN_CMPEQPD,
16040 IX86_BUILTIN_CMPLTPD,
16041 IX86_BUILTIN_CMPLEPD,
16042 IX86_BUILTIN_CMPGTPD,
16043 IX86_BUILTIN_CMPGEPD,
16044 IX86_BUILTIN_CMPNEQPD,
16045 IX86_BUILTIN_CMPNLTPD,
16046 IX86_BUILTIN_CMPNLEPD,
16047 IX86_BUILTIN_CMPNGTPD,
16048 IX86_BUILTIN_CMPNGEPD,
16049 IX86_BUILTIN_CMPORDPD,
16050 IX86_BUILTIN_CMPUNORDPD,
16051 IX86_BUILTIN_CMPNEPD,
16052 IX86_BUILTIN_CMPEQSD,
16053 IX86_BUILTIN_CMPLTSD,
16054 IX86_BUILTIN_CMPLESD,
16055 IX86_BUILTIN_CMPNEQSD,
16056 IX86_BUILTIN_CMPNLTSD,
16057 IX86_BUILTIN_CMPNLESD,
16058 IX86_BUILTIN_CMPORDSD,
16059 IX86_BUILTIN_CMPUNORDSD,
16060 IX86_BUILTIN_CMPNESD,
16061
16062 IX86_BUILTIN_COMIEQSD,
16063 IX86_BUILTIN_COMILTSD,
16064 IX86_BUILTIN_COMILESD,
16065 IX86_BUILTIN_COMIGTSD,
16066 IX86_BUILTIN_COMIGESD,
16067 IX86_BUILTIN_COMINEQSD,
16068 IX86_BUILTIN_UCOMIEQSD,
16069 IX86_BUILTIN_UCOMILTSD,
16070 IX86_BUILTIN_UCOMILESD,
16071 IX86_BUILTIN_UCOMIGTSD,
16072 IX86_BUILTIN_UCOMIGESD,
16073 IX86_BUILTIN_UCOMINEQSD,
16074
16075 IX86_BUILTIN_MAXPD,
16076 IX86_BUILTIN_MAXSD,
16077 IX86_BUILTIN_MINPD,
16078 IX86_BUILTIN_MINSD,
16079
16080 IX86_BUILTIN_ANDPD,
16081 IX86_BUILTIN_ANDNPD,
16082 IX86_BUILTIN_ORPD,
16083 IX86_BUILTIN_XORPD,
16084
16085 IX86_BUILTIN_SQRTPD,
16086 IX86_BUILTIN_SQRTSD,
16087
16088 IX86_BUILTIN_UNPCKHPD,
16089 IX86_BUILTIN_UNPCKLPD,
16090
16091 IX86_BUILTIN_SHUFPD,
16092
16093 IX86_BUILTIN_LOADUPD,
16094 IX86_BUILTIN_STOREUPD,
16095 IX86_BUILTIN_MOVSD,
16096
16097 IX86_BUILTIN_LOADHPD,
16098 IX86_BUILTIN_LOADLPD,
16099
16100 IX86_BUILTIN_CVTDQ2PD,
16101 IX86_BUILTIN_CVTDQ2PS,
16102
16103 IX86_BUILTIN_CVTPD2DQ,
16104 IX86_BUILTIN_CVTPD2PI,
16105 IX86_BUILTIN_CVTPD2PS,
16106 IX86_BUILTIN_CVTTPD2DQ,
16107 IX86_BUILTIN_CVTTPD2PI,
16108
16109 IX86_BUILTIN_CVTPI2PD,
16110 IX86_BUILTIN_CVTSI2SD,
16111 IX86_BUILTIN_CVTSI642SD,
16112
16113 IX86_BUILTIN_CVTSD2SI,
16114 IX86_BUILTIN_CVTSD2SI64,
16115 IX86_BUILTIN_CVTSD2SS,
16116 IX86_BUILTIN_CVTSS2SD,
16117 IX86_BUILTIN_CVTTSD2SI,
16118 IX86_BUILTIN_CVTTSD2SI64,
16119
16120 IX86_BUILTIN_CVTPS2DQ,
16121 IX86_BUILTIN_CVTPS2PD,
16122 IX86_BUILTIN_CVTTPS2DQ,
16123
16124 IX86_BUILTIN_MOVNTI,
16125 IX86_BUILTIN_MOVNTPD,
16126 IX86_BUILTIN_MOVNTDQ,
16127
16128 /* SSE2 MMX */
16129 IX86_BUILTIN_MASKMOVDQU,
16130 IX86_BUILTIN_MOVMSKPD,
16131 IX86_BUILTIN_PMOVMSKB128,
16132
16133 IX86_BUILTIN_PACKSSWB128,
16134 IX86_BUILTIN_PACKSSDW128,
16135 IX86_BUILTIN_PACKUSWB128,
16136
16137 IX86_BUILTIN_PADDB128,
16138 IX86_BUILTIN_PADDW128,
16139 IX86_BUILTIN_PADDD128,
16140 IX86_BUILTIN_PADDQ128,
16141 IX86_BUILTIN_PADDSB128,
16142 IX86_BUILTIN_PADDSW128,
16143 IX86_BUILTIN_PADDUSB128,
16144 IX86_BUILTIN_PADDUSW128,
16145 IX86_BUILTIN_PSUBB128,
16146 IX86_BUILTIN_PSUBW128,
16147 IX86_BUILTIN_PSUBD128,
16148 IX86_BUILTIN_PSUBQ128,
16149 IX86_BUILTIN_PSUBSB128,
16150 IX86_BUILTIN_PSUBSW128,
16151 IX86_BUILTIN_PSUBUSB128,
16152 IX86_BUILTIN_PSUBUSW128,
16153
16154 IX86_BUILTIN_PAND128,
16155 IX86_BUILTIN_PANDN128,
16156 IX86_BUILTIN_POR128,
16157 IX86_BUILTIN_PXOR128,
16158
16159 IX86_BUILTIN_PAVGB128,
16160 IX86_BUILTIN_PAVGW128,
16161
16162 IX86_BUILTIN_PCMPEQB128,
16163 IX86_BUILTIN_PCMPEQW128,
16164 IX86_BUILTIN_PCMPEQD128,
16165 IX86_BUILTIN_PCMPGTB128,
16166 IX86_BUILTIN_PCMPGTW128,
16167 IX86_BUILTIN_PCMPGTD128,
16168
16169 IX86_BUILTIN_PMADDWD128,
16170
16171 IX86_BUILTIN_PMAXSW128,
16172 IX86_BUILTIN_PMAXUB128,
16173 IX86_BUILTIN_PMINSW128,
16174 IX86_BUILTIN_PMINUB128,
16175
16176 IX86_BUILTIN_PMULUDQ,
16177 IX86_BUILTIN_PMULUDQ128,
16178 IX86_BUILTIN_PMULHUW128,
16179 IX86_BUILTIN_PMULHW128,
16180 IX86_BUILTIN_PMULLW128,
16181
16182 IX86_BUILTIN_PSADBW128,
16183 IX86_BUILTIN_PSHUFHW,
16184 IX86_BUILTIN_PSHUFLW,
16185 IX86_BUILTIN_PSHUFD,
16186
16187 IX86_BUILTIN_PSLLW128,
16188 IX86_BUILTIN_PSLLD128,
16189 IX86_BUILTIN_PSLLQ128,
16190 IX86_BUILTIN_PSRAW128,
16191 IX86_BUILTIN_PSRAD128,
16192 IX86_BUILTIN_PSRLW128,
16193 IX86_BUILTIN_PSRLD128,
16194 IX86_BUILTIN_PSRLQ128,
16195 IX86_BUILTIN_PSLLDQI128,
16196 IX86_BUILTIN_PSLLWI128,
16197 IX86_BUILTIN_PSLLDI128,
16198 IX86_BUILTIN_PSLLQI128,
16199 IX86_BUILTIN_PSRAWI128,
16200 IX86_BUILTIN_PSRADI128,
16201 IX86_BUILTIN_PSRLDQI128,
16202 IX86_BUILTIN_PSRLWI128,
16203 IX86_BUILTIN_PSRLDI128,
16204 IX86_BUILTIN_PSRLQI128,
16205
16206 IX86_BUILTIN_PUNPCKHBW128,
16207 IX86_BUILTIN_PUNPCKHWD128,
16208 IX86_BUILTIN_PUNPCKHDQ128,
16209 IX86_BUILTIN_PUNPCKHQDQ128,
16210 IX86_BUILTIN_PUNPCKLBW128,
16211 IX86_BUILTIN_PUNPCKLWD128,
16212 IX86_BUILTIN_PUNPCKLDQ128,
16213 IX86_BUILTIN_PUNPCKLQDQ128,
16214
16215 IX86_BUILTIN_CLFLUSH,
16216 IX86_BUILTIN_MFENCE,
16217 IX86_BUILTIN_LFENCE,
16218
16219 /* Prescott New Instructions. */
16220 IX86_BUILTIN_ADDSUBPS,
16221 IX86_BUILTIN_HADDPS,
16222 IX86_BUILTIN_HSUBPS,
16223 IX86_BUILTIN_MOVSHDUP,
16224 IX86_BUILTIN_MOVSLDUP,
16225 IX86_BUILTIN_ADDSUBPD,
16226 IX86_BUILTIN_HADDPD,
16227 IX86_BUILTIN_HSUBPD,
16228 IX86_BUILTIN_LDDQU,
16229
16230 IX86_BUILTIN_MONITOR,
16231 IX86_BUILTIN_MWAIT,
16232
16233 /* SSSE3. */
16234 IX86_BUILTIN_PHADDW,
16235 IX86_BUILTIN_PHADDD,
16236 IX86_BUILTIN_PHADDSW,
16237 IX86_BUILTIN_PHSUBW,
16238 IX86_BUILTIN_PHSUBD,
16239 IX86_BUILTIN_PHSUBSW,
16240 IX86_BUILTIN_PMADDUBSW,
16241 IX86_BUILTIN_PMULHRSW,
16242 IX86_BUILTIN_PSHUFB,
16243 IX86_BUILTIN_PSIGNB,
16244 IX86_BUILTIN_PSIGNW,
16245 IX86_BUILTIN_PSIGND,
16246 IX86_BUILTIN_PALIGNR,
16247 IX86_BUILTIN_PABSB,
16248 IX86_BUILTIN_PABSW,
16249 IX86_BUILTIN_PABSD,
16250
16251 IX86_BUILTIN_PHADDW128,
16252 IX86_BUILTIN_PHADDD128,
16253 IX86_BUILTIN_PHADDSW128,
16254 IX86_BUILTIN_PHSUBW128,
16255 IX86_BUILTIN_PHSUBD128,
16256 IX86_BUILTIN_PHSUBSW128,
16257 IX86_BUILTIN_PMADDUBSW128,
16258 IX86_BUILTIN_PMULHRSW128,
16259 IX86_BUILTIN_PSHUFB128,
16260 IX86_BUILTIN_PSIGNB128,
16261 IX86_BUILTIN_PSIGNW128,
16262 IX86_BUILTIN_PSIGND128,
16263 IX86_BUILTIN_PALIGNR128,
16264 IX86_BUILTIN_PABSB128,
16265 IX86_BUILTIN_PABSW128,
16266 IX86_BUILTIN_PABSD128,
16267
16268 /* AMDFAM10 - SSE4A New Instructions. */
16269 IX86_BUILTIN_MOVNTSD,
16270 IX86_BUILTIN_MOVNTSS,
16271 IX86_BUILTIN_EXTRQI,
16272 IX86_BUILTIN_EXTRQ,
16273 IX86_BUILTIN_INSERTQI,
16274 IX86_BUILTIN_INSERTQ,
16275
16276 IX86_BUILTIN_VEC_INIT_V2SI,
16277 IX86_BUILTIN_VEC_INIT_V4HI,
16278 IX86_BUILTIN_VEC_INIT_V8QI,
16279 IX86_BUILTIN_VEC_EXT_V2DF,
16280 IX86_BUILTIN_VEC_EXT_V2DI,
16281 IX86_BUILTIN_VEC_EXT_V4SF,
16282 IX86_BUILTIN_VEC_EXT_V4SI,
16283 IX86_BUILTIN_VEC_EXT_V8HI,
16284 IX86_BUILTIN_VEC_EXT_V2SI,
16285 IX86_BUILTIN_VEC_EXT_V4HI,
16286 IX86_BUILTIN_VEC_SET_V8HI,
16287 IX86_BUILTIN_VEC_SET_V4HI,
16288
16289 IX86_BUILTIN_MAX
16290 };
16291
16292 /* Table for the ix86 builtin decls. */
16293 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16294
16295 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16296 * if the target_flags include one of MASK. Stores the function decl
16297 * in the ix86_builtins array.
16298 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16299
16300 static inline tree
16301 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16302 {
16303 tree decl = NULL_TREE;
16304
16305 if (mask & target_flags
16306 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16307 {
16308 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16309 NULL, NULL_TREE);
16310 ix86_builtins[(int) code] = decl;
16311 }
16312
16313 return decl;
16314 }
16315
16316 /* Like def_builtin, but also marks the function decl "const". */
16317
16318 static inline tree
16319 def_builtin_const (int mask, const char *name, tree type,
16320 enum ix86_builtins code)
16321 {
16322 tree decl = def_builtin (mask, name, type, code);
16323 if (decl)
16324 TREE_READONLY (decl) = 1;
16325 return decl;
16326 }
16327
16328 /* Bits for builtin_description.flag. */
16329
16330 /* Set when we don't support the comparison natively, and should
16331 swap_comparison in order to support it. */
16332 #define BUILTIN_DESC_SWAP_OPERANDS 1
16333
16334 struct builtin_description
16335 {
16336 const unsigned int mask;
16337 const enum insn_code icode;
16338 const char *const name;
16339 const enum ix86_builtins code;
16340 const enum rtx_code comparison;
16341 const unsigned int flag;
16342 };
16343
16344 static const struct builtin_description bdesc_comi[] =
16345 {
16346 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16347 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16348 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16349 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16350 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16351 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16352 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16353 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16354 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16355 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16356 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16357 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16358 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16359 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16360 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16361 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16362 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16363 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16364 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16365 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16366 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16367 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16368 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16369 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16370 };
16371
16372 static const struct builtin_description bdesc_2arg[] =
16373 {
16374 /* SSE */
16375 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16376 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16377 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16378 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16379 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16380 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16381 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16382 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16383
16384 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16385 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16386 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16387 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16388 BUILTIN_DESC_SWAP_OPERANDS },
16389 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16390 BUILTIN_DESC_SWAP_OPERANDS },
16391 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16392 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16393 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16394 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16395 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16396 BUILTIN_DESC_SWAP_OPERANDS },
16397 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16398 BUILTIN_DESC_SWAP_OPERANDS },
16399 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16400 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16401 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16402 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16403 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16404 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16405 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16406 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16407 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16408 BUILTIN_DESC_SWAP_OPERANDS },
16409 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16410 BUILTIN_DESC_SWAP_OPERANDS },
16411 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16412
16413 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16414 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16415 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16416 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16417
16418 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16419 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16420 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16421 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16422
16423 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16424 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16425 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16426 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16427 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16428
16429 /* MMX */
16430 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16431 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16432 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16433 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16434 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16435 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16436 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16437 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16438
16439 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16440 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16441 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16442 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16443 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16444 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16445 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16446 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16447
16448 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16449 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16450 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16451
16452 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16453 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16454 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16455 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16456
16457 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16458 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16459
16460 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16461 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16462 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16463 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16464 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16465 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16466
16467 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16468 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16469 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16470 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16471
16472 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16473 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16474 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16475 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16476 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16477 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16478
16479 /* Special. */
16480 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16481 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16482 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16483
16484 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16485 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16486 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16487
16488 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16489 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16490 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16491 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16492 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16493 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16494
16495 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16496 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16497 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16498 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16499 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16500 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16501
16502 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16503 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16504 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16505 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16506
16507 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16508 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16509
16510 /* SSE2 */
16511 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16512 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16513 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16514 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16515 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16516 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16517 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16518 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16519
16520 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16521 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16522 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16523 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16524 BUILTIN_DESC_SWAP_OPERANDS },
16525 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16526 BUILTIN_DESC_SWAP_OPERANDS },
16527 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16528 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16529 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16530 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16531 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16532 BUILTIN_DESC_SWAP_OPERANDS },
16533 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16534 BUILTIN_DESC_SWAP_OPERANDS },
16535 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16536 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16537 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16538 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16539 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16540 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16541 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16542 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16543 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16544
16545 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16546 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16547 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16548 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16549
16550 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16551 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16552 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16553 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16554
16555 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16556 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16557 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16558
16559 /* SSE2 MMX */
16560 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16561 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16562 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16563 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16564 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16565 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16566 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16567 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16568
16569 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16570 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16571 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16572 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16573 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16574 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16575 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16576 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16577
16578 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16579 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16580
16581 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16582 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16583 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16584 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16585
16586 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16587 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16588
16589 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16590 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16591 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16592 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16593 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16594 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16595
16596 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16597 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16598 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16599 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16600
16601 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16602 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16603 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16604 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16605 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16606 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16607 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16608 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16609
16610 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16611 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16612 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16613
16614 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16615 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16616
16617 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16618 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16619
16620 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16621 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16622 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16623
16624 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16625 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16626 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16627
16628 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16629 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16630
16631 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16632
16633 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16634 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16635 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16636 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16637
16638 /* SSE3 MMX */
16639 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16640 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16641 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16642 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16643 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16644 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16645
16646 /* SSSE3 */
16647 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16648 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16649 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16650 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16651 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16652 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16653 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16654 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16655 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16656 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16657 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16658 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16659 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16660 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16661 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16662 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16663 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16664 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16665 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16666 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16667 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16668 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16669 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16670 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16671 };
16672
16673 static const struct builtin_description bdesc_1arg[] =
16674 {
16675 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16676 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16677
16678 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16679 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16680 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16681
16682 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16683 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16684 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16685 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16686 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16687 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16688
16689 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16690 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16691
16692 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16693
16694 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16695 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16696
16697 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16698 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16699 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16700 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16701 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16702
16703 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16704
16705 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16706 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16707 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16708 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16709
16710 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16711 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16712 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16713
16714 /* SSE3 */
16715 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16716 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16717
16718 /* SSSE3 */
16719 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16720 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16721 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16722 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16723 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16724 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16725 };
16726
16727 static void
16728 ix86_init_builtins (void)
16729 {
16730 if (TARGET_MMX)
16731 ix86_init_mmx_sse_builtins ();
16732 }
16733
16734 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16735 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16736 builtins. */
16737 static void
16738 ix86_init_mmx_sse_builtins (void)
16739 {
16740 const struct builtin_description * d;
16741 size_t i;
16742
16743 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16744 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16745 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16746 tree V2DI_type_node
16747 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16748 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16749 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16750 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16751 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16752 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16753 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16754
16755 tree pchar_type_node = build_pointer_type (char_type_node);
16756 tree pcchar_type_node = build_pointer_type (
16757 build_type_variant (char_type_node, 1, 0));
16758 tree pfloat_type_node = build_pointer_type (float_type_node);
16759 tree pcfloat_type_node = build_pointer_type (
16760 build_type_variant (float_type_node, 1, 0));
16761 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16762 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16763 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16764
16765 /* Comparisons. */
16766 tree int_ftype_v4sf_v4sf
16767 = build_function_type_list (integer_type_node,
16768 V4SF_type_node, V4SF_type_node, NULL_TREE);
16769 tree v4si_ftype_v4sf_v4sf
16770 = build_function_type_list (V4SI_type_node,
16771 V4SF_type_node, V4SF_type_node, NULL_TREE);
16772 /* MMX/SSE/integer conversions. */
16773 tree int_ftype_v4sf
16774 = build_function_type_list (integer_type_node,
16775 V4SF_type_node, NULL_TREE);
16776 tree int64_ftype_v4sf
16777 = build_function_type_list (long_long_integer_type_node,
16778 V4SF_type_node, NULL_TREE);
16779 tree int_ftype_v8qi
16780 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16781 tree v4sf_ftype_v4sf_int
16782 = build_function_type_list (V4SF_type_node,
16783 V4SF_type_node, integer_type_node, NULL_TREE);
16784 tree v4sf_ftype_v4sf_int64
16785 = build_function_type_list (V4SF_type_node,
16786 V4SF_type_node, long_long_integer_type_node,
16787 NULL_TREE);
16788 tree v4sf_ftype_v4sf_v2si
16789 = build_function_type_list (V4SF_type_node,
16790 V4SF_type_node, V2SI_type_node, NULL_TREE);
16791
16792 /* Miscellaneous. */
16793 tree v8qi_ftype_v4hi_v4hi
16794 = build_function_type_list (V8QI_type_node,
16795 V4HI_type_node, V4HI_type_node, NULL_TREE);
16796 tree v4hi_ftype_v2si_v2si
16797 = build_function_type_list (V4HI_type_node,
16798 V2SI_type_node, V2SI_type_node, NULL_TREE);
16799 tree v4sf_ftype_v4sf_v4sf_int
16800 = build_function_type_list (V4SF_type_node,
16801 V4SF_type_node, V4SF_type_node,
16802 integer_type_node, NULL_TREE);
16803 tree v2si_ftype_v4hi_v4hi
16804 = build_function_type_list (V2SI_type_node,
16805 V4HI_type_node, V4HI_type_node, NULL_TREE);
16806 tree v4hi_ftype_v4hi_int
16807 = build_function_type_list (V4HI_type_node,
16808 V4HI_type_node, integer_type_node, NULL_TREE);
16809 tree v4hi_ftype_v4hi_di
16810 = build_function_type_list (V4HI_type_node,
16811 V4HI_type_node, long_long_unsigned_type_node,
16812 NULL_TREE);
16813 tree v2si_ftype_v2si_di
16814 = build_function_type_list (V2SI_type_node,
16815 V2SI_type_node, long_long_unsigned_type_node,
16816 NULL_TREE);
16817 tree void_ftype_void
16818 = build_function_type (void_type_node, void_list_node);
16819 tree void_ftype_unsigned
16820 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16821 tree void_ftype_unsigned_unsigned
16822 = build_function_type_list (void_type_node, unsigned_type_node,
16823 unsigned_type_node, NULL_TREE);
16824 tree void_ftype_pcvoid_unsigned_unsigned
16825 = build_function_type_list (void_type_node, const_ptr_type_node,
16826 unsigned_type_node, unsigned_type_node,
16827 NULL_TREE);
16828 tree unsigned_ftype_void
16829 = build_function_type (unsigned_type_node, void_list_node);
16830 tree v2si_ftype_v4sf
16831 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16832 /* Loads/stores. */
16833 tree void_ftype_v8qi_v8qi_pchar
16834 = build_function_type_list (void_type_node,
16835 V8QI_type_node, V8QI_type_node,
16836 pchar_type_node, NULL_TREE);
16837 tree v4sf_ftype_pcfloat
16838 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16839 /* @@@ the type is bogus */
16840 tree v4sf_ftype_v4sf_pv2si
16841 = build_function_type_list (V4SF_type_node,
16842 V4SF_type_node, pv2si_type_node, NULL_TREE);
16843 tree void_ftype_pv2si_v4sf
16844 = build_function_type_list (void_type_node,
16845 pv2si_type_node, V4SF_type_node, NULL_TREE);
16846 tree void_ftype_pfloat_v4sf
16847 = build_function_type_list (void_type_node,
16848 pfloat_type_node, V4SF_type_node, NULL_TREE);
16849 tree void_ftype_pdi_di
16850 = build_function_type_list (void_type_node,
16851 pdi_type_node, long_long_unsigned_type_node,
16852 NULL_TREE);
16853 tree void_ftype_pv2di_v2di
16854 = build_function_type_list (void_type_node,
16855 pv2di_type_node, V2DI_type_node, NULL_TREE);
16856 /* Normal vector unops. */
16857 tree v4sf_ftype_v4sf
16858 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16859 tree v16qi_ftype_v16qi
16860 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16861 tree v8hi_ftype_v8hi
16862 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16863 tree v4si_ftype_v4si
16864 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16865 tree v8qi_ftype_v8qi
16866 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16867 tree v4hi_ftype_v4hi
16868 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16869
16870 /* Normal vector binops. */
16871 tree v4sf_ftype_v4sf_v4sf
16872 = build_function_type_list (V4SF_type_node,
16873 V4SF_type_node, V4SF_type_node, NULL_TREE);
16874 tree v8qi_ftype_v8qi_v8qi
16875 = build_function_type_list (V8QI_type_node,
16876 V8QI_type_node, V8QI_type_node, NULL_TREE);
16877 tree v4hi_ftype_v4hi_v4hi
16878 = build_function_type_list (V4HI_type_node,
16879 V4HI_type_node, V4HI_type_node, NULL_TREE);
16880 tree v2si_ftype_v2si_v2si
16881 = build_function_type_list (V2SI_type_node,
16882 V2SI_type_node, V2SI_type_node, NULL_TREE);
16883 tree di_ftype_di_di
16884 = build_function_type_list (long_long_unsigned_type_node,
16885 long_long_unsigned_type_node,
16886 long_long_unsigned_type_node, NULL_TREE);
16887
16888 tree di_ftype_di_di_int
16889 = build_function_type_list (long_long_unsigned_type_node,
16890 long_long_unsigned_type_node,
16891 long_long_unsigned_type_node,
16892 integer_type_node, NULL_TREE);
16893
16894 tree v2si_ftype_v2sf
16895 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16896 tree v2sf_ftype_v2si
16897 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16898 tree v2si_ftype_v2si
16899 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16900 tree v2sf_ftype_v2sf
16901 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16902 tree v2sf_ftype_v2sf_v2sf
16903 = build_function_type_list (V2SF_type_node,
16904 V2SF_type_node, V2SF_type_node, NULL_TREE);
16905 tree v2si_ftype_v2sf_v2sf
16906 = build_function_type_list (V2SI_type_node,
16907 V2SF_type_node, V2SF_type_node, NULL_TREE);
16908 tree pint_type_node = build_pointer_type (integer_type_node);
16909 tree pdouble_type_node = build_pointer_type (double_type_node);
16910 tree pcdouble_type_node = build_pointer_type (
16911 build_type_variant (double_type_node, 1, 0));
16912 tree int_ftype_v2df_v2df
16913 = build_function_type_list (integer_type_node,
16914 V2DF_type_node, V2DF_type_node, NULL_TREE);
16915
16916 tree void_ftype_pcvoid
16917 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16918 tree v4sf_ftype_v4si
16919 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16920 tree v4si_ftype_v4sf
16921 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16922 tree v2df_ftype_v4si
16923 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16924 tree v4si_ftype_v2df
16925 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16926 tree v2si_ftype_v2df
16927 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16928 tree v4sf_ftype_v2df
16929 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16930 tree v2df_ftype_v2si
16931 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16932 tree v2df_ftype_v4sf
16933 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16934 tree int_ftype_v2df
16935 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16936 tree int64_ftype_v2df
16937 = build_function_type_list (long_long_integer_type_node,
16938 V2DF_type_node, NULL_TREE);
16939 tree v2df_ftype_v2df_int
16940 = build_function_type_list (V2DF_type_node,
16941 V2DF_type_node, integer_type_node, NULL_TREE);
16942 tree v2df_ftype_v2df_int64
16943 = build_function_type_list (V2DF_type_node,
16944 V2DF_type_node, long_long_integer_type_node,
16945 NULL_TREE);
16946 tree v4sf_ftype_v4sf_v2df
16947 = build_function_type_list (V4SF_type_node,
16948 V4SF_type_node, V2DF_type_node, NULL_TREE);
16949 tree v2df_ftype_v2df_v4sf
16950 = build_function_type_list (V2DF_type_node,
16951 V2DF_type_node, V4SF_type_node, NULL_TREE);
16952 tree v2df_ftype_v2df_v2df_int
16953 = build_function_type_list (V2DF_type_node,
16954 V2DF_type_node, V2DF_type_node,
16955 integer_type_node,
16956 NULL_TREE);
16957 tree v2df_ftype_v2df_pcdouble
16958 = build_function_type_list (V2DF_type_node,
16959 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16960 tree void_ftype_pdouble_v2df
16961 = build_function_type_list (void_type_node,
16962 pdouble_type_node, V2DF_type_node, NULL_TREE);
16963 tree void_ftype_pint_int
16964 = build_function_type_list (void_type_node,
16965 pint_type_node, integer_type_node, NULL_TREE);
16966 tree void_ftype_v16qi_v16qi_pchar
16967 = build_function_type_list (void_type_node,
16968 V16QI_type_node, V16QI_type_node,
16969 pchar_type_node, NULL_TREE);
16970 tree v2df_ftype_pcdouble
16971 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16972 tree v2df_ftype_v2df_v2df
16973 = build_function_type_list (V2DF_type_node,
16974 V2DF_type_node, V2DF_type_node, NULL_TREE);
16975 tree v16qi_ftype_v16qi_v16qi
16976 = build_function_type_list (V16QI_type_node,
16977 V16QI_type_node, V16QI_type_node, NULL_TREE);
16978 tree v8hi_ftype_v8hi_v8hi
16979 = build_function_type_list (V8HI_type_node,
16980 V8HI_type_node, V8HI_type_node, NULL_TREE);
16981 tree v4si_ftype_v4si_v4si
16982 = build_function_type_list (V4SI_type_node,
16983 V4SI_type_node, V4SI_type_node, NULL_TREE);
16984 tree v2di_ftype_v2di_v2di
16985 = build_function_type_list (V2DI_type_node,
16986 V2DI_type_node, V2DI_type_node, NULL_TREE);
16987 tree v2di_ftype_v2df_v2df
16988 = build_function_type_list (V2DI_type_node,
16989 V2DF_type_node, V2DF_type_node, NULL_TREE);
16990 tree v2df_ftype_v2df
16991 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16992 tree v2di_ftype_v2di_int
16993 = build_function_type_list (V2DI_type_node,
16994 V2DI_type_node, integer_type_node, NULL_TREE);
16995 tree v2di_ftype_v2di_v2di_int
16996 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16997 V2DI_type_node, integer_type_node, NULL_TREE);
16998 tree v4si_ftype_v4si_int
16999 = build_function_type_list (V4SI_type_node,
17000 V4SI_type_node, integer_type_node, NULL_TREE);
17001 tree v8hi_ftype_v8hi_int
17002 = build_function_type_list (V8HI_type_node,
17003 V8HI_type_node, integer_type_node, NULL_TREE);
17004 tree v8hi_ftype_v8hi_v2di
17005 = build_function_type_list (V8HI_type_node,
17006 V8HI_type_node, V2DI_type_node, NULL_TREE);
17007 tree v4si_ftype_v4si_v2di
17008 = build_function_type_list (V4SI_type_node,
17009 V4SI_type_node, V2DI_type_node, NULL_TREE);
17010 tree v4si_ftype_v8hi_v8hi
17011 = build_function_type_list (V4SI_type_node,
17012 V8HI_type_node, V8HI_type_node, NULL_TREE);
17013 tree di_ftype_v8qi_v8qi
17014 = build_function_type_list (long_long_unsigned_type_node,
17015 V8QI_type_node, V8QI_type_node, NULL_TREE);
17016 tree di_ftype_v2si_v2si
17017 = build_function_type_list (long_long_unsigned_type_node,
17018 V2SI_type_node, V2SI_type_node, NULL_TREE);
17019 tree v2di_ftype_v16qi_v16qi
17020 = build_function_type_list (V2DI_type_node,
17021 V16QI_type_node, V16QI_type_node, NULL_TREE);
17022 tree v2di_ftype_v4si_v4si
17023 = build_function_type_list (V2DI_type_node,
17024 V4SI_type_node, V4SI_type_node, NULL_TREE);
17025 tree int_ftype_v16qi
17026 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17027 tree v16qi_ftype_pcchar
17028 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17029 tree void_ftype_pchar_v16qi
17030 = build_function_type_list (void_type_node,
17031 pchar_type_node, V16QI_type_node, NULL_TREE);
17032
17033 tree v2di_ftype_v2di_unsigned_unsigned
17034 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17035 unsigned_type_node, unsigned_type_node,
17036 NULL_TREE);
17037 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17038 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17039 unsigned_type_node, unsigned_type_node,
17040 NULL_TREE);
17041 tree v2di_ftype_v2di_v16qi
17042 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17043 NULL_TREE);
17044
17045 tree float80_type;
17046 tree float128_type;
17047 tree ftype;
17048
17049 /* The __float80 type. */
17050 if (TYPE_MODE (long_double_type_node) == XFmode)
17051 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17052 "__float80");
17053 else
17054 {
17055 /* The __float80 type. */
17056 float80_type = make_node (REAL_TYPE);
17057 TYPE_PRECISION (float80_type) = 80;
17058 layout_type (float80_type);
17059 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17060 }
17061
17062 if (TARGET_64BIT)
17063 {
17064 float128_type = make_node (REAL_TYPE);
17065 TYPE_PRECISION (float128_type) = 128;
17066 layout_type (float128_type);
17067 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17068 }
17069
17070 /* Add all builtins that are more or less simple operations on two
17071 operands. */
17072 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17073 {
17074 /* Use one of the operands; the target can have a different mode for
17075 mask-generating compares. */
17076 enum machine_mode mode;
17077 tree type;
17078
17079 if (d->name == 0)
17080 continue;
17081 mode = insn_data[d->icode].operand[1].mode;
17082
17083 switch (mode)
17084 {
17085 case V16QImode:
17086 type = v16qi_ftype_v16qi_v16qi;
17087 break;
17088 case V8HImode:
17089 type = v8hi_ftype_v8hi_v8hi;
17090 break;
17091 case V4SImode:
17092 type = v4si_ftype_v4si_v4si;
17093 break;
17094 case V2DImode:
17095 type = v2di_ftype_v2di_v2di;
17096 break;
17097 case V2DFmode:
17098 type = v2df_ftype_v2df_v2df;
17099 break;
17100 case V4SFmode:
17101 type = v4sf_ftype_v4sf_v4sf;
17102 break;
17103 case V8QImode:
17104 type = v8qi_ftype_v8qi_v8qi;
17105 break;
17106 case V4HImode:
17107 type = v4hi_ftype_v4hi_v4hi;
17108 break;
17109 case V2SImode:
17110 type = v2si_ftype_v2si_v2si;
17111 break;
17112 case DImode:
17113 type = di_ftype_di_di;
17114 break;
17115
17116 default:
17117 gcc_unreachable ();
17118 }
17119
17120 /* Override for comparisons. */
17121 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17122 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17123 type = v4si_ftype_v4sf_v4sf;
17124
17125 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17126 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17127 type = v2di_ftype_v2df_v2df;
17128
17129 def_builtin (d->mask, d->name, type, d->code);
17130 }
17131
17132 /* Add all builtins that are more or less simple operations on 1 operand. */
17133 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17134 {
17135 enum machine_mode mode;
17136 tree type;
17137
17138 if (d->name == 0)
17139 continue;
17140 mode = insn_data[d->icode].operand[1].mode;
17141
17142 switch (mode)
17143 {
17144 case V16QImode:
17145 type = v16qi_ftype_v16qi;
17146 break;
17147 case V8HImode:
17148 type = v8hi_ftype_v8hi;
17149 break;
17150 case V4SImode:
17151 type = v4si_ftype_v4si;
17152 break;
17153 case V2DFmode:
17154 type = v2df_ftype_v2df;
17155 break;
17156 case V4SFmode:
17157 type = v4sf_ftype_v4sf;
17158 break;
17159 case V8QImode:
17160 type = v8qi_ftype_v8qi;
17161 break;
17162 case V4HImode:
17163 type = v4hi_ftype_v4hi;
17164 break;
17165 case V2SImode:
17166 type = v2si_ftype_v2si;
17167 break;
17168
17169 default:
17170 abort ();
17171 }
17172
17173 def_builtin (d->mask, d->name, type, d->code);
17174 }
17175
17176 /* Add the remaining MMX insns with somewhat more complicated types. */
17177 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17178 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17179 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17180 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17181
17182 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17183 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17184 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17185
17186 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17187 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17188
17189 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17190 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17191
17192 /* comi/ucomi insns. */
17193 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17194 if (d->mask == MASK_SSE2)
17195 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17196 else
17197 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17198
17199 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17200 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17201 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17202
17203 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17204 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17205 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17206 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17207 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17208 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17209 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17210 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17211 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17212 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17213 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17214
17215 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17216
17217 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17218 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17219
17220 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17221 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17222 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17223 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17224
17225 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17226 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17227 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17228 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17229
17230 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17231
17232 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17233
17234 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17235 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17236 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17237 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17238 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17239 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17240
17241 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17242
17243 /* Original 3DNow! */
17244 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17245 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17246 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17247 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17248 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17249 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17250 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17251 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17252 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17253 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17254 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17255 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17256 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17257 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17258 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17259 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17260 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17261 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17262 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17263 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17264
17265 /* 3DNow! extension as used in the Athlon CPU. */
17266 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17267 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17268 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17269 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17270 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17271 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17272
17273 /* SSE2 */
17274 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17275
17276 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17277 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17278
17279 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17280 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17281
17282 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17283 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17284 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17285 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17286 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17287
17288 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17289 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17290 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17291 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17292
17293 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17294 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17295
17296 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17297
17298 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17299 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17300
17301 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17302 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17303 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17304 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17305 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17306
17307 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17308
17309 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17310 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17311 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17312 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17313
17314 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17315 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17316 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17317
17318 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17319 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17320 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17321 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17322
17323 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17324 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17325 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17326
17327 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17328 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17329
17330 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17331 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17332
17333 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17334 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17335 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17336
17337 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17338 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17339 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17340
17341 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17342 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17343
17344 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17345 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17346 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17347 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17348
17349 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17350 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17351 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17352 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17353
17354 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17355 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17356
17357 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17358
17359 /* Prescott New Instructions. */
17360 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17361 void_ftype_pcvoid_unsigned_unsigned,
17362 IX86_BUILTIN_MONITOR);
17363 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17364 void_ftype_unsigned_unsigned,
17365 IX86_BUILTIN_MWAIT);
17366 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17367 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17368
17369 /* SSSE3. */
17370 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17371 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17372 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17373 IX86_BUILTIN_PALIGNR);
17374
17375 /* AMDFAM10 SSE4A New built-ins */
17376 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17377 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17378 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17379 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17380 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17381 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17382 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17383 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17384 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17385 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17386 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17387 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17388
17389 /* Access to the vec_init patterns. */
17390 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17391 integer_type_node, NULL_TREE);
17392 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17393 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17394
17395 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17396 short_integer_type_node,
17397 short_integer_type_node,
17398 short_integer_type_node, NULL_TREE);
17399 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17400 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17401
17402 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17403 char_type_node, char_type_node,
17404 char_type_node, char_type_node,
17405 char_type_node, char_type_node,
17406 char_type_node, NULL_TREE);
17407 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17408 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17409
17410 /* Access to the vec_extract patterns. */
17411 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17412 integer_type_node, NULL_TREE);
17413 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17414 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17415
17416 ftype = build_function_type_list (long_long_integer_type_node,
17417 V2DI_type_node, integer_type_node,
17418 NULL_TREE);
17419 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17420 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17421
17422 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17423 integer_type_node, NULL_TREE);
17424 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17425 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17426
17427 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17428 integer_type_node, NULL_TREE);
17429 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17430 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17431
17432 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17433 integer_type_node, NULL_TREE);
17434 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17435 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17436
17437 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17438 integer_type_node, NULL_TREE);
17439 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17440 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17441
17442 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17443 integer_type_node, NULL_TREE);
17444 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17445 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17446
17447 /* Access to the vec_set patterns. */
17448 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17449 intHI_type_node,
17450 integer_type_node, NULL_TREE);
17451 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17452 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17453
17454 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17455 intHI_type_node,
17456 integer_type_node, NULL_TREE);
17457 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17458 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17459 }
17460
17461 /* Errors in the source file can cause expand_expr to return const0_rtx
17462 where we expect a vector. To avoid crashing, use one of the vector
17463 clear instructions. */
17464 static rtx
17465 safe_vector_operand (rtx x, enum machine_mode mode)
17466 {
17467 if (x == const0_rtx)
17468 x = CONST0_RTX (mode);
17469 return x;
17470 }
17471
17472 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17473
17474 static rtx
17475 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17476 {
17477 rtx pat, xops[3];
17478 tree arg0 = CALL_EXPR_ARG (exp, 0);
17479 tree arg1 = CALL_EXPR_ARG (exp, 1);
17480 rtx op0 = expand_normal (arg0);
17481 rtx op1 = expand_normal (arg1);
17482 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17483 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17484 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17485
17486 if (VECTOR_MODE_P (mode0))
17487 op0 = safe_vector_operand (op0, mode0);
17488 if (VECTOR_MODE_P (mode1))
17489 op1 = safe_vector_operand (op1, mode1);
17490
17491 if (optimize || !target
17492 || GET_MODE (target) != tmode
17493 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17494 target = gen_reg_rtx (tmode);
17495
17496 if (GET_MODE (op1) == SImode && mode1 == TImode)
17497 {
17498 rtx x = gen_reg_rtx (V4SImode);
17499 emit_insn (gen_sse2_loadd (x, op1));
17500 op1 = gen_lowpart (TImode, x);
17501 }
17502
17503 /* The insn must want input operands in the same modes as the
17504 result. */
17505 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17506 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17507
17508 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17509 op0 = copy_to_mode_reg (mode0, op0);
17510 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17511 op1 = copy_to_mode_reg (mode1, op1);
17512
17513 /* ??? Using ix86_fixup_binary_operands is problematic when
17514 we've got mismatched modes. Fake it. */
17515
17516 xops[0] = target;
17517 xops[1] = op0;
17518 xops[2] = op1;
17519
17520 if (tmode == mode0 && tmode == mode1)
17521 {
17522 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17523 op0 = xops[1];
17524 op1 = xops[2];
17525 }
17526 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17527 {
17528 op0 = force_reg (mode0, op0);
17529 op1 = force_reg (mode1, op1);
17530 target = gen_reg_rtx (tmode);
17531 }
17532
17533 pat = GEN_FCN (icode) (target, op0, op1);
17534 if (! pat)
17535 return 0;
17536 emit_insn (pat);
17537 return target;
17538 }
17539
17540 /* Subroutine of ix86_expand_builtin to take care of stores. */
17541
17542 static rtx
17543 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17544 {
17545 rtx pat;
17546 tree arg0 = CALL_EXPR_ARG (exp, 0);
17547 tree arg1 = CALL_EXPR_ARG (exp, 1);
17548 rtx op0 = expand_normal (arg0);
17549 rtx op1 = expand_normal (arg1);
17550 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17551 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17552
17553 if (VECTOR_MODE_P (mode1))
17554 op1 = safe_vector_operand (op1, mode1);
17555
17556 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17557 op1 = copy_to_mode_reg (mode1, op1);
17558
17559 pat = GEN_FCN (icode) (op0, op1);
17560 if (pat)
17561 emit_insn (pat);
17562 return 0;
17563 }
17564
17565 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17566
17567 static rtx
17568 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17569 rtx target, int do_load)
17570 {
17571 rtx pat;
17572 tree arg0 = CALL_EXPR_ARG (exp, 0);
17573 rtx op0 = expand_normal (arg0);
17574 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17575 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17576
17577 if (optimize || !target
17578 || GET_MODE (target) != tmode
17579 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17580 target = gen_reg_rtx (tmode);
17581 if (do_load)
17582 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17583 else
17584 {
17585 if (VECTOR_MODE_P (mode0))
17586 op0 = safe_vector_operand (op0, mode0);
17587
17588 if ((optimize && !register_operand (op0, mode0))
17589 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17590 op0 = copy_to_mode_reg (mode0, op0);
17591 }
17592
17593 pat = GEN_FCN (icode) (target, op0);
17594 if (! pat)
17595 return 0;
17596 emit_insn (pat);
17597 return target;
17598 }
17599
17600 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17601 sqrtss, rsqrtss, rcpss. */
17602
17603 static rtx
17604 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17605 {
17606 rtx pat;
17607 tree arg0 = CALL_EXPR_ARG (exp, 0);
17608 rtx op1, op0 = expand_normal (arg0);
17609 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17610 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17611
17612 if (optimize || !target
17613 || GET_MODE (target) != tmode
17614 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17615 target = gen_reg_rtx (tmode);
17616
17617 if (VECTOR_MODE_P (mode0))
17618 op0 = safe_vector_operand (op0, mode0);
17619
17620 if ((optimize && !register_operand (op0, mode0))
17621 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17622 op0 = copy_to_mode_reg (mode0, op0);
17623
17624 op1 = op0;
17625 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17626 op1 = copy_to_mode_reg (mode0, op1);
17627
17628 pat = GEN_FCN (icode) (target, op0, op1);
17629 if (! pat)
17630 return 0;
17631 emit_insn (pat);
17632 return target;
17633 }
17634
17635 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17636
17637 static rtx
17638 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17639 rtx target)
17640 {
17641 rtx pat;
17642 tree arg0 = CALL_EXPR_ARG (exp, 0);
17643 tree arg1 = CALL_EXPR_ARG (exp, 1);
17644 rtx op0 = expand_normal (arg0);
17645 rtx op1 = expand_normal (arg1);
17646 rtx op2;
17647 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17648 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17649 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17650 enum rtx_code comparison = d->comparison;
17651
17652 if (VECTOR_MODE_P (mode0))
17653 op0 = safe_vector_operand (op0, mode0);
17654 if (VECTOR_MODE_P (mode1))
17655 op1 = safe_vector_operand (op1, mode1);
17656
17657 /* Swap operands if we have a comparison that isn't available in
17658 hardware. */
17659 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17660 {
17661 rtx tmp = gen_reg_rtx (mode1);
17662 emit_move_insn (tmp, op1);
17663 op1 = op0;
17664 op0 = tmp;
17665 }
17666
17667 if (optimize || !target
17668 || GET_MODE (target) != tmode
17669 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17670 target = gen_reg_rtx (tmode);
17671
17672 if ((optimize && !register_operand (op0, mode0))
17673 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17674 op0 = copy_to_mode_reg (mode0, op0);
17675 if ((optimize && !register_operand (op1, mode1))
17676 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17677 op1 = copy_to_mode_reg (mode1, op1);
17678
17679 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17680 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17681 if (! pat)
17682 return 0;
17683 emit_insn (pat);
17684 return target;
17685 }
17686
17687 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17688
17689 static rtx
17690 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17691 rtx target)
17692 {
17693 rtx pat;
17694 tree arg0 = CALL_EXPR_ARG (exp, 0);
17695 tree arg1 = CALL_EXPR_ARG (exp, 1);
17696 rtx op0 = expand_normal (arg0);
17697 rtx op1 = expand_normal (arg1);
17698 rtx op2;
17699 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17700 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17701 enum rtx_code comparison = d->comparison;
17702
17703 if (VECTOR_MODE_P (mode0))
17704 op0 = safe_vector_operand (op0, mode0);
17705 if (VECTOR_MODE_P (mode1))
17706 op1 = safe_vector_operand (op1, mode1);
17707
17708 /* Swap operands if we have a comparison that isn't available in
17709 hardware. */
17710 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17711 {
17712 rtx tmp = op1;
17713 op1 = op0;
17714 op0 = tmp;
17715 }
17716
17717 target = gen_reg_rtx (SImode);
17718 emit_move_insn (target, const0_rtx);
17719 target = gen_rtx_SUBREG (QImode, target, 0);
17720
17721 if ((optimize && !register_operand (op0, mode0))
17722 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17723 op0 = copy_to_mode_reg (mode0, op0);
17724 if ((optimize && !register_operand (op1, mode1))
17725 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17726 op1 = copy_to_mode_reg (mode1, op1);
17727
17728 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17729 pat = GEN_FCN (d->icode) (op0, op1);
17730 if (! pat)
17731 return 0;
17732 emit_insn (pat);
17733 emit_insn (gen_rtx_SET (VOIDmode,
17734 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17735 gen_rtx_fmt_ee (comparison, QImode,
17736 SET_DEST (pat),
17737 const0_rtx)));
17738
17739 return SUBREG_REG (target);
17740 }
17741
17742 /* Return the integer constant in ARG. Constrain it to be in the range
17743 of the subparts of VEC_TYPE; issue an error if not. */
17744
17745 static int
17746 get_element_number (tree vec_type, tree arg)
17747 {
17748 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17749
17750 if (!host_integerp (arg, 1)
17751 || (elt = tree_low_cst (arg, 1), elt > max))
17752 {
17753 error ("selector must be an integer constant in the range 0..%wi", max);
17754 return 0;
17755 }
17756
17757 return elt;
17758 }
17759
17760 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17761 ix86_expand_vector_init. We DO have language-level syntax for this, in
17762 the form of (type){ init-list }. Except that since we can't place emms
17763 instructions from inside the compiler, we can't allow the use of MMX
17764 registers unless the user explicitly asks for it. So we do *not* define
17765 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17766 we have builtins invoked by mmintrin.h that gives us license to emit
17767 these sorts of instructions. */
17768
17769 static rtx
17770 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17771 {
17772 enum machine_mode tmode = TYPE_MODE (type);
17773 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17774 int i, n_elt = GET_MODE_NUNITS (tmode);
17775 rtvec v = rtvec_alloc (n_elt);
17776
17777 gcc_assert (VECTOR_MODE_P (tmode));
17778 gcc_assert (call_expr_nargs (exp) == n_elt);
17779
17780 for (i = 0; i < n_elt; ++i)
17781 {
17782 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17783 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17784 }
17785
17786 if (!target || !register_operand (target, tmode))
17787 target = gen_reg_rtx (tmode);
17788
17789 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17790 return target;
17791 }
17792
17793 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17794 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17795 had a language-level syntax for referencing vector elements. */
17796
17797 static rtx
17798 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17799 {
17800 enum machine_mode tmode, mode0;
17801 tree arg0, arg1;
17802 int elt;
17803 rtx op0;
17804
17805 arg0 = CALL_EXPR_ARG (exp, 0);
17806 arg1 = CALL_EXPR_ARG (exp, 1);
17807
17808 op0 = expand_normal (arg0);
17809 elt = get_element_number (TREE_TYPE (arg0), arg1);
17810
17811 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17812 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17813 gcc_assert (VECTOR_MODE_P (mode0));
17814
17815 op0 = force_reg (mode0, op0);
17816
17817 if (optimize || !target || !register_operand (target, tmode))
17818 target = gen_reg_rtx (tmode);
17819
17820 ix86_expand_vector_extract (true, target, op0, elt);
17821
17822 return target;
17823 }
17824
17825 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17826 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17827 a language-level syntax for referencing vector elements. */
17828
17829 static rtx
17830 ix86_expand_vec_set_builtin (tree exp)
17831 {
17832 enum machine_mode tmode, mode1;
17833 tree arg0, arg1, arg2;
17834 int elt;
17835 rtx op0, op1;
17836
17837 arg0 = CALL_EXPR_ARG (exp, 0);
17838 arg1 = CALL_EXPR_ARG (exp, 1);
17839 arg2 = CALL_EXPR_ARG (exp, 2);
17840
17841 tmode = TYPE_MODE (TREE_TYPE (arg0));
17842 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17843 gcc_assert (VECTOR_MODE_P (tmode));
17844
17845 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17846 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17847 elt = get_element_number (TREE_TYPE (arg0), arg2);
17848
17849 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17850 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17851
17852 op0 = force_reg (tmode, op0);
17853 op1 = force_reg (mode1, op1);
17854
17855 ix86_expand_vector_set (true, op0, op1, elt);
17856
17857 return op0;
17858 }
17859
17860 /* Expand an expression EXP that calls a built-in function,
17861 with result going to TARGET if that's convenient
17862 (and in mode MODE if that's convenient).
17863 SUBTARGET may be used as the target for computing one of EXP's operands.
17864 IGNORE is nonzero if the value is to be ignored. */
17865
17866 static rtx
17867 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17868 enum machine_mode mode ATTRIBUTE_UNUSED,
17869 int ignore ATTRIBUTE_UNUSED)
17870 {
17871 const struct builtin_description *d;
17872 size_t i;
17873 enum insn_code icode;
17874 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17875 tree arg0, arg1, arg2, arg3;
17876 rtx op0, op1, op2, op3, pat;
17877 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17878 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17879
17880 switch (fcode)
17881 {
17882 case IX86_BUILTIN_EMMS:
17883 emit_insn (gen_mmx_emms ());
17884 return 0;
17885
17886 case IX86_BUILTIN_SFENCE:
17887 emit_insn (gen_sse_sfence ());
17888 return 0;
17889
17890 case IX86_BUILTIN_MASKMOVQ:
17891 case IX86_BUILTIN_MASKMOVDQU:
17892 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17893 ? CODE_FOR_mmx_maskmovq
17894 : CODE_FOR_sse2_maskmovdqu);
17895 /* Note the arg order is different from the operand order. */
17896 arg1 = CALL_EXPR_ARG (exp, 0);
17897 arg2 = CALL_EXPR_ARG (exp, 1);
17898 arg0 = CALL_EXPR_ARG (exp, 2);
17899 op0 = expand_normal (arg0);
17900 op1 = expand_normal (arg1);
17901 op2 = expand_normal (arg2);
17902 mode0 = insn_data[icode].operand[0].mode;
17903 mode1 = insn_data[icode].operand[1].mode;
17904 mode2 = insn_data[icode].operand[2].mode;
17905
17906 op0 = force_reg (Pmode, op0);
17907 op0 = gen_rtx_MEM (mode1, op0);
17908
17909 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17910 op0 = copy_to_mode_reg (mode0, op0);
17911 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17912 op1 = copy_to_mode_reg (mode1, op1);
17913 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17914 op2 = copy_to_mode_reg (mode2, op2);
17915 pat = GEN_FCN (icode) (op0, op1, op2);
17916 if (! pat)
17917 return 0;
17918 emit_insn (pat);
17919 return 0;
17920
17921 case IX86_BUILTIN_SQRTSS:
17922 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17923 case IX86_BUILTIN_RSQRTSS:
17924 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17925 case IX86_BUILTIN_RCPSS:
17926 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17927
17928 case IX86_BUILTIN_LOADUPS:
17929 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17930
17931 case IX86_BUILTIN_STOREUPS:
17932 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17933
17934 case IX86_BUILTIN_LOADHPS:
17935 case IX86_BUILTIN_LOADLPS:
17936 case IX86_BUILTIN_LOADHPD:
17937 case IX86_BUILTIN_LOADLPD:
17938 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17939 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17940 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17941 : CODE_FOR_sse2_loadlpd);
17942 arg0 = CALL_EXPR_ARG (exp, 0);
17943 arg1 = CALL_EXPR_ARG (exp, 1);
17944 op0 = expand_normal (arg0);
17945 op1 = expand_normal (arg1);
17946 tmode = insn_data[icode].operand[0].mode;
17947 mode0 = insn_data[icode].operand[1].mode;
17948 mode1 = insn_data[icode].operand[2].mode;
17949
17950 op0 = force_reg (mode0, op0);
17951 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17952 if (optimize || target == 0
17953 || GET_MODE (target) != tmode
17954 || !register_operand (target, tmode))
17955 target = gen_reg_rtx (tmode);
17956 pat = GEN_FCN (icode) (target, op0, op1);
17957 if (! pat)
17958 return 0;
17959 emit_insn (pat);
17960 return target;
17961
17962 case IX86_BUILTIN_STOREHPS:
17963 case IX86_BUILTIN_STORELPS:
17964 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17965 : CODE_FOR_sse_storelps);
17966 arg0 = CALL_EXPR_ARG (exp, 0);
17967 arg1 = CALL_EXPR_ARG (exp, 1);
17968 op0 = expand_normal (arg0);
17969 op1 = expand_normal (arg1);
17970 mode0 = insn_data[icode].operand[0].mode;
17971 mode1 = insn_data[icode].operand[1].mode;
17972
17973 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17974 op1 = force_reg (mode1, op1);
17975
17976 pat = GEN_FCN (icode) (op0, op1);
17977 if (! pat)
17978 return 0;
17979 emit_insn (pat);
17980 return const0_rtx;
17981
17982 case IX86_BUILTIN_MOVNTPS:
17983 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17984 case IX86_BUILTIN_MOVNTQ:
17985 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17986
17987 case IX86_BUILTIN_LDMXCSR:
17988 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17989 target = assign_386_stack_local (SImode, SLOT_TEMP);
17990 emit_move_insn (target, op0);
17991 emit_insn (gen_sse_ldmxcsr (target));
17992 return 0;
17993
17994 case IX86_BUILTIN_STMXCSR:
17995 target = assign_386_stack_local (SImode, SLOT_TEMP);
17996 emit_insn (gen_sse_stmxcsr (target));
17997 return copy_to_mode_reg (SImode, target);
17998
17999 case IX86_BUILTIN_SHUFPS:
18000 case IX86_BUILTIN_SHUFPD:
18001 icode = (fcode == IX86_BUILTIN_SHUFPS
18002 ? CODE_FOR_sse_shufps
18003 : CODE_FOR_sse2_shufpd);
18004 arg0 = CALL_EXPR_ARG (exp, 0);
18005 arg1 = CALL_EXPR_ARG (exp, 1);
18006 arg2 = CALL_EXPR_ARG (exp, 2);
18007 op0 = expand_normal (arg0);
18008 op1 = expand_normal (arg1);
18009 op2 = expand_normal (arg2);
18010 tmode = insn_data[icode].operand[0].mode;
18011 mode0 = insn_data[icode].operand[1].mode;
18012 mode1 = insn_data[icode].operand[2].mode;
18013 mode2 = insn_data[icode].operand[3].mode;
18014
18015 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18016 op0 = copy_to_mode_reg (mode0, op0);
18017 if ((optimize && !register_operand (op1, mode1))
18018 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18019 op1 = copy_to_mode_reg (mode1, op1);
18020 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18021 {
18022 /* @@@ better error message */
18023 error ("mask must be an immediate");
18024 return gen_reg_rtx (tmode);
18025 }
18026 if (optimize || target == 0
18027 || GET_MODE (target) != tmode
18028 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18029 target = gen_reg_rtx (tmode);
18030 pat = GEN_FCN (icode) (target, op0, op1, op2);
18031 if (! pat)
18032 return 0;
18033 emit_insn (pat);
18034 return target;
18035
18036 case IX86_BUILTIN_PSHUFW:
18037 case IX86_BUILTIN_PSHUFD:
18038 case IX86_BUILTIN_PSHUFHW:
18039 case IX86_BUILTIN_PSHUFLW:
18040 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18041 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18042 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18043 : CODE_FOR_mmx_pshufw);
18044 arg0 = CALL_EXPR_ARG (exp, 0);
18045 arg1 = CALL_EXPR_ARG (exp, 1);
18046 op0 = expand_normal (arg0);
18047 op1 = expand_normal (arg1);
18048 tmode = insn_data[icode].operand[0].mode;
18049 mode1 = insn_data[icode].operand[1].mode;
18050 mode2 = insn_data[icode].operand[2].mode;
18051
18052 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18053 op0 = copy_to_mode_reg (mode1, op0);
18054 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18055 {
18056 /* @@@ better error message */
18057 error ("mask must be an immediate");
18058 return const0_rtx;
18059 }
18060 if (target == 0
18061 || GET_MODE (target) != tmode
18062 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18063 target = gen_reg_rtx (tmode);
18064 pat = GEN_FCN (icode) (target, op0, op1);
18065 if (! pat)
18066 return 0;
18067 emit_insn (pat);
18068 return target;
18069
18070 case IX86_BUILTIN_PSLLDQI128:
18071 case IX86_BUILTIN_PSRLDQI128:
18072 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18073 : CODE_FOR_sse2_lshrti3);
18074 arg0 = CALL_EXPR_ARG (exp, 0);
18075 arg1 = CALL_EXPR_ARG (exp, 1);
18076 op0 = expand_normal (arg0);
18077 op1 = expand_normal (arg1);
18078 tmode = insn_data[icode].operand[0].mode;
18079 mode1 = insn_data[icode].operand[1].mode;
18080 mode2 = insn_data[icode].operand[2].mode;
18081
18082 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18083 {
18084 op0 = copy_to_reg (op0);
18085 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18086 }
18087 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18088 {
18089 error ("shift must be an immediate");
18090 return const0_rtx;
18091 }
18092 target = gen_reg_rtx (V2DImode);
18093 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
18094 if (! pat)
18095 return 0;
18096 emit_insn (pat);
18097 return target;
18098
18099 case IX86_BUILTIN_FEMMS:
18100 emit_insn (gen_mmx_femms ());
18101 return NULL_RTX;
18102
18103 case IX86_BUILTIN_PAVGUSB:
18104 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18105
18106 case IX86_BUILTIN_PF2ID:
18107 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18108
18109 case IX86_BUILTIN_PFACC:
18110 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18111
18112 case IX86_BUILTIN_PFADD:
18113 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18114
18115 case IX86_BUILTIN_PFCMPEQ:
18116 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18117
18118 case IX86_BUILTIN_PFCMPGE:
18119 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18120
18121 case IX86_BUILTIN_PFCMPGT:
18122 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18123
18124 case IX86_BUILTIN_PFMAX:
18125 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18126
18127 case IX86_BUILTIN_PFMIN:
18128 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18129
18130 case IX86_BUILTIN_PFMUL:
18131 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18132
18133 case IX86_BUILTIN_PFRCP:
18134 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18135
18136 case IX86_BUILTIN_PFRCPIT1:
18137 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18138
18139 case IX86_BUILTIN_PFRCPIT2:
18140 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18141
18142 case IX86_BUILTIN_PFRSQIT1:
18143 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18144
18145 case IX86_BUILTIN_PFRSQRT:
18146 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18147
18148 case IX86_BUILTIN_PFSUB:
18149 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18150
18151 case IX86_BUILTIN_PFSUBR:
18152 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18153
18154 case IX86_BUILTIN_PI2FD:
18155 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18156
18157 case IX86_BUILTIN_PMULHRW:
18158 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18159
18160 case IX86_BUILTIN_PF2IW:
18161 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18162
18163 case IX86_BUILTIN_PFNACC:
18164 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18165
18166 case IX86_BUILTIN_PFPNACC:
18167 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18168
18169 case IX86_BUILTIN_PI2FW:
18170 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18171
18172 case IX86_BUILTIN_PSWAPDSI:
18173 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18174
18175 case IX86_BUILTIN_PSWAPDSF:
18176 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18177
18178 case IX86_BUILTIN_SQRTSD:
18179 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18180 case IX86_BUILTIN_LOADUPD:
18181 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18182 case IX86_BUILTIN_STOREUPD:
18183 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18184
18185 case IX86_BUILTIN_MFENCE:
18186 emit_insn (gen_sse2_mfence ());
18187 return 0;
18188 case IX86_BUILTIN_LFENCE:
18189 emit_insn (gen_sse2_lfence ());
18190 return 0;
18191
18192 case IX86_BUILTIN_CLFLUSH:
18193 arg0 = CALL_EXPR_ARG (exp, 0);
18194 op0 = expand_normal (arg0);
18195 icode = CODE_FOR_sse2_clflush;
18196 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18197 op0 = copy_to_mode_reg (Pmode, op0);
18198
18199 emit_insn (gen_sse2_clflush (op0));
18200 return 0;
18201
18202 case IX86_BUILTIN_MOVNTPD:
18203 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18204 case IX86_BUILTIN_MOVNTDQ:
18205 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18206 case IX86_BUILTIN_MOVNTI:
18207 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18208
18209 case IX86_BUILTIN_LOADDQU:
18210 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18211 case IX86_BUILTIN_STOREDQU:
18212 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18213
18214 case IX86_BUILTIN_MONITOR:
18215 arg0 = CALL_EXPR_ARG (exp, 0);
18216 arg1 = CALL_EXPR_ARG (exp, 1);
18217 arg2 = CALL_EXPR_ARG (exp, 2);
18218 op0 = expand_normal (arg0);
18219 op1 = expand_normal (arg1);
18220 op2 = expand_normal (arg2);
18221 if (!REG_P (op0))
18222 op0 = copy_to_mode_reg (Pmode, op0);
18223 if (!REG_P (op1))
18224 op1 = copy_to_mode_reg (SImode, op1);
18225 if (!REG_P (op2))
18226 op2 = copy_to_mode_reg (SImode, op2);
18227 if (!TARGET_64BIT)
18228 emit_insn (gen_sse3_monitor (op0, op1, op2));
18229 else
18230 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18231 return 0;
18232
18233 case IX86_BUILTIN_MWAIT:
18234 arg0 = CALL_EXPR_ARG (exp, 0);
18235 arg1 = CALL_EXPR_ARG (exp, 1);
18236 op0 = expand_normal (arg0);
18237 op1 = expand_normal (arg1);
18238 if (!REG_P (op0))
18239 op0 = copy_to_mode_reg (SImode, op0);
18240 if (!REG_P (op1))
18241 op1 = copy_to_mode_reg (SImode, op1);
18242 emit_insn (gen_sse3_mwait (op0, op1));
18243 return 0;
18244
18245 case IX86_BUILTIN_LDDQU:
18246 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18247 target, 1);
18248
18249 case IX86_BUILTIN_PALIGNR:
18250 case IX86_BUILTIN_PALIGNR128:
18251 if (fcode == IX86_BUILTIN_PALIGNR)
18252 {
18253 icode = CODE_FOR_ssse3_palignrdi;
18254 mode = DImode;
18255 }
18256 else
18257 {
18258 icode = CODE_FOR_ssse3_palignrti;
18259 mode = V2DImode;
18260 }
18261 arg0 = CALL_EXPR_ARG (exp, 0);
18262 arg1 = CALL_EXPR_ARG (exp, 1);
18263 arg2 = CALL_EXPR_ARG (exp, 2);
18264 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18265 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18266 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18267 tmode = insn_data[icode].operand[0].mode;
18268 mode1 = insn_data[icode].operand[1].mode;
18269 mode2 = insn_data[icode].operand[2].mode;
18270 mode3 = insn_data[icode].operand[3].mode;
18271
18272 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18273 {
18274 op0 = copy_to_reg (op0);
18275 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18276 }
18277 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18278 {
18279 op1 = copy_to_reg (op1);
18280 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18281 }
18282 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18283 {
18284 error ("shift must be an immediate");
18285 return const0_rtx;
18286 }
18287 target = gen_reg_rtx (mode);
18288 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18289 op0, op1, op2);
18290 if (! pat)
18291 return 0;
18292 emit_insn (pat);
18293 return target;
18294
18295 case IX86_BUILTIN_MOVNTSD:
18296 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18297
18298 case IX86_BUILTIN_MOVNTSS:
18299 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18300
18301 case IX86_BUILTIN_INSERTQ:
18302 case IX86_BUILTIN_EXTRQ:
18303 icode = (fcode == IX86_BUILTIN_EXTRQ
18304 ? CODE_FOR_sse4a_extrq
18305 : CODE_FOR_sse4a_insertq);
18306 arg0 = CALL_EXPR_ARG (exp, 0);
18307 arg1 = CALL_EXPR_ARG (exp, 1);
18308 op0 = expand_normal (arg0);
18309 op1 = expand_normal (arg1);
18310 tmode = insn_data[icode].operand[0].mode;
18311 mode1 = insn_data[icode].operand[1].mode;
18312 mode2 = insn_data[icode].operand[2].mode;
18313 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18314 op0 = copy_to_mode_reg (mode1, op0);
18315 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18316 op1 = copy_to_mode_reg (mode2, op1);
18317 if (optimize || target == 0
18318 || GET_MODE (target) != tmode
18319 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18320 target = gen_reg_rtx (tmode);
18321 pat = GEN_FCN (icode) (target, op0, op1);
18322 if (! pat)
18323 return NULL_RTX;
18324 emit_insn (pat);
18325 return target;
18326
18327 case IX86_BUILTIN_EXTRQI:
18328 icode = CODE_FOR_sse4a_extrqi;
18329 arg0 = CALL_EXPR_ARG (exp, 0);
18330 arg1 = CALL_EXPR_ARG (exp, 1);
18331 arg2 = CALL_EXPR_ARG (exp, 2);
18332 op0 = expand_normal (arg0);
18333 op1 = expand_normal (arg1);
18334 op2 = expand_normal (arg2);
18335 tmode = insn_data[icode].operand[0].mode;
18336 mode1 = insn_data[icode].operand[1].mode;
18337 mode2 = insn_data[icode].operand[2].mode;
18338 mode3 = insn_data[icode].operand[3].mode;
18339 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18340 op0 = copy_to_mode_reg (mode1, op0);
18341 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18342 {
18343 error ("index mask must be an immediate");
18344 return gen_reg_rtx (tmode);
18345 }
18346 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18347 {
18348 error ("length mask must be an immediate");
18349 return gen_reg_rtx (tmode);
18350 }
18351 if (optimize || target == 0
18352 || GET_MODE (target) != tmode
18353 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18354 target = gen_reg_rtx (tmode);
18355 pat = GEN_FCN (icode) (target, op0, op1, op2);
18356 if (! pat)
18357 return NULL_RTX;
18358 emit_insn (pat);
18359 return target;
18360
18361 case IX86_BUILTIN_INSERTQI:
18362 icode = CODE_FOR_sse4a_insertqi;
18363 arg0 = CALL_EXPR_ARG (exp, 0);
18364 arg1 = CALL_EXPR_ARG (exp, 1);
18365 arg2 = CALL_EXPR_ARG (exp, 2);
18366 arg3 = CALL_EXPR_ARG (exp, 3);
18367 op0 = expand_normal (arg0);
18368 op1 = expand_normal (arg1);
18369 op2 = expand_normal (arg2);
18370 op3 = expand_normal (arg3);
18371 tmode = insn_data[icode].operand[0].mode;
18372 mode1 = insn_data[icode].operand[1].mode;
18373 mode2 = insn_data[icode].operand[2].mode;
18374 mode3 = insn_data[icode].operand[3].mode;
18375 mode4 = insn_data[icode].operand[4].mode;
18376
18377 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18378 op0 = copy_to_mode_reg (mode1, op0);
18379
18380 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18381 op1 = copy_to_mode_reg (mode2, op1);
18382
18383 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18384 {
18385 error ("index mask must be an immediate");
18386 return gen_reg_rtx (tmode);
18387 }
18388 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18389 {
18390 error ("length mask must be an immediate");
18391 return gen_reg_rtx (tmode);
18392 }
18393 if (optimize || target == 0
18394 || GET_MODE (target) != tmode
18395 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18396 target = gen_reg_rtx (tmode);
18397 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18398 if (! pat)
18399 return NULL_RTX;
18400 emit_insn (pat);
18401 return target;
18402
18403 case IX86_BUILTIN_VEC_INIT_V2SI:
18404 case IX86_BUILTIN_VEC_INIT_V4HI:
18405 case IX86_BUILTIN_VEC_INIT_V8QI:
18406 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18407
18408 case IX86_BUILTIN_VEC_EXT_V2DF:
18409 case IX86_BUILTIN_VEC_EXT_V2DI:
18410 case IX86_BUILTIN_VEC_EXT_V4SF:
18411 case IX86_BUILTIN_VEC_EXT_V4SI:
18412 case IX86_BUILTIN_VEC_EXT_V8HI:
18413 case IX86_BUILTIN_VEC_EXT_V2SI:
18414 case IX86_BUILTIN_VEC_EXT_V4HI:
18415 return ix86_expand_vec_ext_builtin (exp, target);
18416
18417 case IX86_BUILTIN_VEC_SET_V8HI:
18418 case IX86_BUILTIN_VEC_SET_V4HI:
18419 return ix86_expand_vec_set_builtin (exp);
18420
18421 default:
18422 break;
18423 }
18424
18425 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18426 if (d->code == fcode)
18427 {
18428 /* Compares are treated specially. */
18429 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18430 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18431 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18432 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18433 return ix86_expand_sse_compare (d, exp, target);
18434
18435 return ix86_expand_binop_builtin (d->icode, exp, target);
18436 }
18437
18438 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18439 if (d->code == fcode)
18440 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18441
18442 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18443 if (d->code == fcode)
18444 return ix86_expand_sse_comi (d, exp, target);
18445
18446 gcc_unreachable ();
18447 }
18448
18449 /* Returns a function decl for a vectorized version of the builtin function
18450 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18451 if it is not available. */
18452
18453 static tree
18454 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18455 tree type_in)
18456 {
18457 enum machine_mode in_mode, out_mode;
18458 int in_n, out_n;
18459
18460 if (TREE_CODE (type_out) != VECTOR_TYPE
18461 || TREE_CODE (type_in) != VECTOR_TYPE)
18462 return NULL_TREE;
18463
18464 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18465 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18466 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18467 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18468
18469 switch (fn)
18470 {
18471 case BUILT_IN_SQRT:
18472 if (out_mode == DFmode && out_n == 2
18473 && in_mode == DFmode && in_n == 2)
18474 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18475 return NULL_TREE;
18476
18477 case BUILT_IN_SQRTF:
18478 if (out_mode == SFmode && out_n == 4
18479 && in_mode == SFmode && in_n == 4)
18480 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18481 return NULL_TREE;
18482
18483 case BUILT_IN_LRINTF:
18484 if (out_mode == SImode && out_n == 4
18485 && in_mode == SFmode && in_n == 4)
18486 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18487 return NULL_TREE;
18488
18489 default:
18490 ;
18491 }
18492
18493 return NULL_TREE;
18494 }
18495
18496 /* Returns a decl of a function that implements conversion of the
18497 input vector of type TYPE, or NULL_TREE if it is not available. */
18498
18499 static tree
18500 ix86_builtin_conversion (enum tree_code code, tree type)
18501 {
18502 if (TREE_CODE (type) != VECTOR_TYPE)
18503 return NULL_TREE;
18504
18505 switch (code)
18506 {
18507 case FLOAT_EXPR:
18508 switch (TYPE_MODE (type))
18509 {
18510 case V4SImode:
18511 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18512 default:
18513 return NULL_TREE;
18514 }
18515
18516 case FIX_TRUNC_EXPR:
18517 switch (TYPE_MODE (type))
18518 {
18519 case V4SFmode:
18520 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18521 default:
18522 return NULL_TREE;
18523 }
18524 default:
18525 return NULL_TREE;
18526
18527 }
18528 }
18529
18530 /* Store OPERAND to the memory after reload is completed. This means
18531 that we can't easily use assign_stack_local. */
18532 rtx
18533 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18534 {
18535 rtx result;
18536
18537 gcc_assert (reload_completed);
18538 if (TARGET_RED_ZONE)
18539 {
18540 result = gen_rtx_MEM (mode,
18541 gen_rtx_PLUS (Pmode,
18542 stack_pointer_rtx,
18543 GEN_INT (-RED_ZONE_SIZE)));
18544 emit_move_insn (result, operand);
18545 }
18546 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18547 {
18548 switch (mode)
18549 {
18550 case HImode:
18551 case SImode:
18552 operand = gen_lowpart (DImode, operand);
18553 /* FALLTHRU */
18554 case DImode:
18555 emit_insn (
18556 gen_rtx_SET (VOIDmode,
18557 gen_rtx_MEM (DImode,
18558 gen_rtx_PRE_DEC (DImode,
18559 stack_pointer_rtx)),
18560 operand));
18561 break;
18562 default:
18563 gcc_unreachable ();
18564 }
18565 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18566 }
18567 else
18568 {
18569 switch (mode)
18570 {
18571 case DImode:
18572 {
18573 rtx operands[2];
18574 split_di (&operand, 1, operands, operands + 1);
18575 emit_insn (
18576 gen_rtx_SET (VOIDmode,
18577 gen_rtx_MEM (SImode,
18578 gen_rtx_PRE_DEC (Pmode,
18579 stack_pointer_rtx)),
18580 operands[1]));
18581 emit_insn (
18582 gen_rtx_SET (VOIDmode,
18583 gen_rtx_MEM (SImode,
18584 gen_rtx_PRE_DEC (Pmode,
18585 stack_pointer_rtx)),
18586 operands[0]));
18587 }
18588 break;
18589 case HImode:
18590 /* Store HImodes as SImodes. */
18591 operand = gen_lowpart (SImode, operand);
18592 /* FALLTHRU */
18593 case SImode:
18594 emit_insn (
18595 gen_rtx_SET (VOIDmode,
18596 gen_rtx_MEM (GET_MODE (operand),
18597 gen_rtx_PRE_DEC (SImode,
18598 stack_pointer_rtx)),
18599 operand));
18600 break;
18601 default:
18602 gcc_unreachable ();
18603 }
18604 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18605 }
18606 return result;
18607 }
18608
18609 /* Free operand from the memory. */
18610 void
18611 ix86_free_from_memory (enum machine_mode mode)
18612 {
18613 if (!TARGET_RED_ZONE)
18614 {
18615 int size;
18616
18617 if (mode == DImode || TARGET_64BIT)
18618 size = 8;
18619 else
18620 size = 4;
18621 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18622 to pop or add instruction if registers are available. */
18623 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18624 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18625 GEN_INT (size))));
18626 }
18627 }
18628
18629 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18630 QImode must go into class Q_REGS.
18631 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18632 movdf to do mem-to-mem moves through integer regs. */
18633 enum reg_class
18634 ix86_preferred_reload_class (rtx x, enum reg_class class)
18635 {
18636 enum machine_mode mode = GET_MODE (x);
18637
18638 /* We're only allowed to return a subclass of CLASS. Many of the
18639 following checks fail for NO_REGS, so eliminate that early. */
18640 if (class == NO_REGS)
18641 return NO_REGS;
18642
18643 /* All classes can load zeros. */
18644 if (x == CONST0_RTX (mode))
18645 return class;
18646
18647 /* Force constants into memory if we are loading a (nonzero) constant into
18648 an MMX or SSE register. This is because there are no MMX/SSE instructions
18649 to load from a constant. */
18650 if (CONSTANT_P (x)
18651 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18652 return NO_REGS;
18653
18654 /* Prefer SSE regs only, if we can use them for math. */
18655 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18656 return SSE_CLASS_P (class) ? class : NO_REGS;
18657
18658 /* Floating-point constants need more complex checks. */
18659 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18660 {
18661 /* General regs can load everything. */
18662 if (reg_class_subset_p (class, GENERAL_REGS))
18663 return class;
18664
18665 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18666 zero above. We only want to wind up preferring 80387 registers if
18667 we plan on doing computation with them. */
18668 if (TARGET_80387
18669 && standard_80387_constant_p (x))
18670 {
18671 /* Limit class to non-sse. */
18672 if (class == FLOAT_SSE_REGS)
18673 return FLOAT_REGS;
18674 if (class == FP_TOP_SSE_REGS)
18675 return FP_TOP_REG;
18676 if (class == FP_SECOND_SSE_REGS)
18677 return FP_SECOND_REG;
18678 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18679 return class;
18680 }
18681
18682 return NO_REGS;
18683 }
18684
18685 /* Generally when we see PLUS here, it's the function invariant
18686 (plus soft-fp const_int). Which can only be computed into general
18687 regs. */
18688 if (GET_CODE (x) == PLUS)
18689 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18690
18691 /* QImode constants are easy to load, but non-constant QImode data
18692 must go into Q_REGS. */
18693 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18694 {
18695 if (reg_class_subset_p (class, Q_REGS))
18696 return class;
18697 if (reg_class_subset_p (Q_REGS, class))
18698 return Q_REGS;
18699 return NO_REGS;
18700 }
18701
18702 return class;
18703 }
18704
18705 /* Discourage putting floating-point values in SSE registers unless
18706 SSE math is being used, and likewise for the 387 registers. */
18707 enum reg_class
18708 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18709 {
18710 enum machine_mode mode = GET_MODE (x);
18711
18712 /* Restrict the output reload class to the register bank that we are doing
18713 math on. If we would like not to return a subset of CLASS, reject this
18714 alternative: if reload cannot do this, it will still use its choice. */
18715 mode = GET_MODE (x);
18716 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18717 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18718
18719 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18720 {
18721 if (class == FP_TOP_SSE_REGS)
18722 return FP_TOP_REG;
18723 else if (class == FP_SECOND_SSE_REGS)
18724 return FP_SECOND_REG;
18725 else
18726 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18727 }
18728
18729 return class;
18730 }
18731
18732 /* If we are copying between general and FP registers, we need a memory
18733 location. The same is true for SSE and MMX registers.
18734
18735 The macro can't work reliably when one of the CLASSES is class containing
18736 registers from multiple units (SSE, MMX, integer). We avoid this by never
18737 combining those units in single alternative in the machine description.
18738 Ensure that this constraint holds to avoid unexpected surprises.
18739
18740 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18741 enforce these sanity checks. */
18742
18743 int
18744 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18745 enum machine_mode mode, int strict)
18746 {
18747 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18748 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18749 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18750 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18751 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18752 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18753 {
18754 gcc_assert (!strict);
18755 return true;
18756 }
18757
18758 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18759 return true;
18760
18761 /* ??? This is a lie. We do have moves between mmx/general, and for
18762 mmx/sse2. But by saying we need secondary memory we discourage the
18763 register allocator from using the mmx registers unless needed. */
18764 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18765 return true;
18766
18767 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18768 {
18769 /* SSE1 doesn't have any direct moves from other classes. */
18770 if (!TARGET_SSE2)
18771 return true;
18772
18773 /* If the target says that inter-unit moves are more expensive
18774 than moving through memory, then don't generate them. */
18775 if (!TARGET_INTER_UNIT_MOVES)
18776 return true;
18777
18778 /* Between SSE and general, we have moves no larger than word size. */
18779 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18780 return true;
18781 }
18782
18783 return false;
18784 }
18785
18786 /* Return true if the registers in CLASS cannot represent the change from
18787 modes FROM to TO. */
18788
18789 bool
18790 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18791 enum reg_class class)
18792 {
18793 if (from == to)
18794 return false;
18795
18796 /* x87 registers can't do subreg at all, as all values are reformatted
18797 to extended precision. */
18798 if (MAYBE_FLOAT_CLASS_P (class))
18799 return true;
18800
18801 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18802 {
18803 /* Vector registers do not support QI or HImode loads. If we don't
18804 disallow a change to these modes, reload will assume it's ok to
18805 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18806 the vec_dupv4hi pattern. */
18807 if (GET_MODE_SIZE (from) < 4)
18808 return true;
18809
18810 /* Vector registers do not support subreg with nonzero offsets, which
18811 are otherwise valid for integer registers. Since we can't see
18812 whether we have a nonzero offset from here, prohibit all
18813 nonparadoxical subregs changing size. */
18814 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18815 return true;
18816 }
18817
18818 return false;
18819 }
18820
18821 /* Return the cost of moving data from a register in class CLASS1 to
18822 one in class CLASS2.
18823
18824 It is not required that the cost always equal 2 when FROM is the same as TO;
18825 on some machines it is expensive to move between registers if they are not
18826 general registers. */
18827
18828 int
18829 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18830 enum reg_class class2)
18831 {
18832 /* In case we require secondary memory, compute cost of the store followed
18833 by load. In order to avoid bad register allocation choices, we need
18834 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18835
18836 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18837 {
18838 int cost = 1;
18839
18840 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18841 MEMORY_MOVE_COST (mode, class1, 1));
18842 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18843 MEMORY_MOVE_COST (mode, class2, 1));
18844
18845 /* In case of copying from general_purpose_register we may emit multiple
18846 stores followed by single load causing memory size mismatch stall.
18847 Count this as arbitrarily high cost of 20. */
18848 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18849 cost += 20;
18850
18851 /* In the case of FP/MMX moves, the registers actually overlap, and we
18852 have to switch modes in order to treat them differently. */
18853 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18854 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18855 cost += 20;
18856
18857 return cost;
18858 }
18859
18860 /* Moves between SSE/MMX and integer unit are expensive. */
18861 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18862 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18863 return ix86_cost->mmxsse_to_integer;
18864 if (MAYBE_FLOAT_CLASS_P (class1))
18865 return ix86_cost->fp_move;
18866 if (MAYBE_SSE_CLASS_P (class1))
18867 return ix86_cost->sse_move;
18868 if (MAYBE_MMX_CLASS_P (class1))
18869 return ix86_cost->mmx_move;
18870 return 2;
18871 }
18872
18873 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18874
18875 bool
18876 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18877 {
18878 /* Flags and only flags can only hold CCmode values. */
18879 if (CC_REGNO_P (regno))
18880 return GET_MODE_CLASS (mode) == MODE_CC;
18881 if (GET_MODE_CLASS (mode) == MODE_CC
18882 || GET_MODE_CLASS (mode) == MODE_RANDOM
18883 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18884 return 0;
18885 if (FP_REGNO_P (regno))
18886 return VALID_FP_MODE_P (mode);
18887 if (SSE_REGNO_P (regno))
18888 {
18889 /* We implement the move patterns for all vector modes into and
18890 out of SSE registers, even when no operation instructions
18891 are available. */
18892 return (VALID_SSE_REG_MODE (mode)
18893 || VALID_SSE2_REG_MODE (mode)
18894 || VALID_MMX_REG_MODE (mode)
18895 || VALID_MMX_REG_MODE_3DNOW (mode));
18896 }
18897 if (MMX_REGNO_P (regno))
18898 {
18899 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18900 so if the register is available at all, then we can move data of
18901 the given mode into or out of it. */
18902 return (VALID_MMX_REG_MODE (mode)
18903 || VALID_MMX_REG_MODE_3DNOW (mode));
18904 }
18905
18906 if (mode == QImode)
18907 {
18908 /* Take care for QImode values - they can be in non-QI regs,
18909 but then they do cause partial register stalls. */
18910 if (regno < 4 || TARGET_64BIT)
18911 return 1;
18912 if (!TARGET_PARTIAL_REG_STALL)
18913 return 1;
18914 return reload_in_progress || reload_completed;
18915 }
18916 /* We handle both integer and floats in the general purpose registers. */
18917 else if (VALID_INT_MODE_P (mode))
18918 return 1;
18919 else if (VALID_FP_MODE_P (mode))
18920 return 1;
18921 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18922 on to use that value in smaller contexts, this can easily force a
18923 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18924 supporting DImode, allow it. */
18925 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18926 return 1;
18927
18928 return 0;
18929 }
18930
18931 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18932 tieable integer mode. */
18933
18934 static bool
18935 ix86_tieable_integer_mode_p (enum machine_mode mode)
18936 {
18937 switch (mode)
18938 {
18939 case HImode:
18940 case SImode:
18941 return true;
18942
18943 case QImode:
18944 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18945
18946 case DImode:
18947 return TARGET_64BIT;
18948
18949 default:
18950 return false;
18951 }
18952 }
18953
18954 /* Return true if MODE1 is accessible in a register that can hold MODE2
18955 without copying. That is, all register classes that can hold MODE2
18956 can also hold MODE1. */
18957
18958 bool
18959 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18960 {
18961 if (mode1 == mode2)
18962 return true;
18963
18964 if (ix86_tieable_integer_mode_p (mode1)
18965 && ix86_tieable_integer_mode_p (mode2))
18966 return true;
18967
18968 /* MODE2 being XFmode implies fp stack or general regs, which means we
18969 can tie any smaller floating point modes to it. Note that we do not
18970 tie this with TFmode. */
18971 if (mode2 == XFmode)
18972 return mode1 == SFmode || mode1 == DFmode;
18973
18974 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18975 that we can tie it with SFmode. */
18976 if (mode2 == DFmode)
18977 return mode1 == SFmode;
18978
18979 /* If MODE2 is only appropriate for an SSE register, then tie with
18980 any other mode acceptable to SSE registers. */
18981 if (GET_MODE_SIZE (mode2) == 16
18982 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18983 return (GET_MODE_SIZE (mode1) == 16
18984 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18985
18986 /* If MODE2 is appropriate for an MMX register, then tie
18987 with any other mode acceptable to MMX registers. */
18988 if (GET_MODE_SIZE (mode2) == 8
18989 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18990 return (GET_MODE_SIZE (mode1) == 8
18991 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18992
18993 return false;
18994 }
18995
18996 /* Return the cost of moving data of mode M between a
18997 register and memory. A value of 2 is the default; this cost is
18998 relative to those in `REGISTER_MOVE_COST'.
18999
19000 If moving between registers and memory is more expensive than
19001 between two registers, you should define this macro to express the
19002 relative cost.
19003
19004 Model also increased moving costs of QImode registers in non
19005 Q_REGS classes.
19006 */
19007 int
19008 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19009 {
19010 if (FLOAT_CLASS_P (class))
19011 {
19012 int index;
19013 switch (mode)
19014 {
19015 case SFmode:
19016 index = 0;
19017 break;
19018 case DFmode:
19019 index = 1;
19020 break;
19021 case XFmode:
19022 index = 2;
19023 break;
19024 default:
19025 return 100;
19026 }
19027 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19028 }
19029 if (SSE_CLASS_P (class))
19030 {
19031 int index;
19032 switch (GET_MODE_SIZE (mode))
19033 {
19034 case 4:
19035 index = 0;
19036 break;
19037 case 8:
19038 index = 1;
19039 break;
19040 case 16:
19041 index = 2;
19042 break;
19043 default:
19044 return 100;
19045 }
19046 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19047 }
19048 if (MMX_CLASS_P (class))
19049 {
19050 int index;
19051 switch (GET_MODE_SIZE (mode))
19052 {
19053 case 4:
19054 index = 0;
19055 break;
19056 case 8:
19057 index = 1;
19058 break;
19059 default:
19060 return 100;
19061 }
19062 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19063 }
19064 switch (GET_MODE_SIZE (mode))
19065 {
19066 case 1:
19067 if (in)
19068 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19069 : ix86_cost->movzbl_load);
19070 else
19071 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19072 : ix86_cost->int_store[0] + 4);
19073 break;
19074 case 2:
19075 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19076 default:
19077 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19078 if (mode == TFmode)
19079 mode = XFmode;
19080 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19081 * (((int) GET_MODE_SIZE (mode)
19082 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19083 }
19084 }
19085
19086 /* Compute a (partial) cost for rtx X. Return true if the complete
19087 cost has been computed, and false if subexpressions should be
19088 scanned. In either case, *TOTAL contains the cost result. */
19089
19090 static bool
19091 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19092 {
19093 enum machine_mode mode = GET_MODE (x);
19094
19095 switch (code)
19096 {
19097 case CONST_INT:
19098 case CONST:
19099 case LABEL_REF:
19100 case SYMBOL_REF:
19101 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19102 *total = 3;
19103 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19104 *total = 2;
19105 else if (flag_pic && SYMBOLIC_CONST (x)
19106 && (!TARGET_64BIT
19107 || (!GET_CODE (x) != LABEL_REF
19108 && (GET_CODE (x) != SYMBOL_REF
19109 || !SYMBOL_REF_LOCAL_P (x)))))
19110 *total = 1;
19111 else
19112 *total = 0;
19113 return true;
19114
19115 case CONST_DOUBLE:
19116 if (mode == VOIDmode)
19117 *total = 0;
19118 else
19119 switch (standard_80387_constant_p (x))
19120 {
19121 case 1: /* 0.0 */
19122 *total = 1;
19123 break;
19124 default: /* Other constants */
19125 *total = 2;
19126 break;
19127 case 0:
19128 case -1:
19129 /* Start with (MEM (SYMBOL_REF)), since that's where
19130 it'll probably end up. Add a penalty for size. */
19131 *total = (COSTS_N_INSNS (1)
19132 + (flag_pic != 0 && !TARGET_64BIT)
19133 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19134 break;
19135 }
19136 return true;
19137
19138 case ZERO_EXTEND:
19139 /* The zero extensions is often completely free on x86_64, so make
19140 it as cheap as possible. */
19141 if (TARGET_64BIT && mode == DImode
19142 && GET_MODE (XEXP (x, 0)) == SImode)
19143 *total = 1;
19144 else if (TARGET_ZERO_EXTEND_WITH_AND)
19145 *total = ix86_cost->add;
19146 else
19147 *total = ix86_cost->movzx;
19148 return false;
19149
19150 case SIGN_EXTEND:
19151 *total = ix86_cost->movsx;
19152 return false;
19153
19154 case ASHIFT:
19155 if (CONST_INT_P (XEXP (x, 1))
19156 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19157 {
19158 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19159 if (value == 1)
19160 {
19161 *total = ix86_cost->add;
19162 return false;
19163 }
19164 if ((value == 2 || value == 3)
19165 && ix86_cost->lea <= ix86_cost->shift_const)
19166 {
19167 *total = ix86_cost->lea;
19168 return false;
19169 }
19170 }
19171 /* FALLTHRU */
19172
19173 case ROTATE:
19174 case ASHIFTRT:
19175 case LSHIFTRT:
19176 case ROTATERT:
19177 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19178 {
19179 if (CONST_INT_P (XEXP (x, 1)))
19180 {
19181 if (INTVAL (XEXP (x, 1)) > 32)
19182 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19183 else
19184 *total = ix86_cost->shift_const * 2;
19185 }
19186 else
19187 {
19188 if (GET_CODE (XEXP (x, 1)) == AND)
19189 *total = ix86_cost->shift_var * 2;
19190 else
19191 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19192 }
19193 }
19194 else
19195 {
19196 if (CONST_INT_P (XEXP (x, 1)))
19197 *total = ix86_cost->shift_const;
19198 else
19199 *total = ix86_cost->shift_var;
19200 }
19201 return false;
19202
19203 case MULT:
19204 if (FLOAT_MODE_P (mode))
19205 {
19206 *total = ix86_cost->fmul;
19207 return false;
19208 }
19209 else
19210 {
19211 rtx op0 = XEXP (x, 0);
19212 rtx op1 = XEXP (x, 1);
19213 int nbits;
19214 if (CONST_INT_P (XEXP (x, 1)))
19215 {
19216 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19217 for (nbits = 0; value != 0; value &= value - 1)
19218 nbits++;
19219 }
19220 else
19221 /* This is arbitrary. */
19222 nbits = 7;
19223
19224 /* Compute costs correctly for widening multiplication. */
19225 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19226 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19227 == GET_MODE_SIZE (mode))
19228 {
19229 int is_mulwiden = 0;
19230 enum machine_mode inner_mode = GET_MODE (op0);
19231
19232 if (GET_CODE (op0) == GET_CODE (op1))
19233 is_mulwiden = 1, op1 = XEXP (op1, 0);
19234 else if (CONST_INT_P (op1))
19235 {
19236 if (GET_CODE (op0) == SIGN_EXTEND)
19237 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19238 == INTVAL (op1);
19239 else
19240 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19241 }
19242
19243 if (is_mulwiden)
19244 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19245 }
19246
19247 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19248 + nbits * ix86_cost->mult_bit
19249 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19250
19251 return true;
19252 }
19253
19254 case DIV:
19255 case UDIV:
19256 case MOD:
19257 case UMOD:
19258 if (FLOAT_MODE_P (mode))
19259 *total = ix86_cost->fdiv;
19260 else
19261 *total = ix86_cost->divide[MODE_INDEX (mode)];
19262 return false;
19263
19264 case PLUS:
19265 if (FLOAT_MODE_P (mode))
19266 *total = ix86_cost->fadd;
19267 else if (GET_MODE_CLASS (mode) == MODE_INT
19268 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19269 {
19270 if (GET_CODE (XEXP (x, 0)) == PLUS
19271 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19272 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19273 && CONSTANT_P (XEXP (x, 1)))
19274 {
19275 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19276 if (val == 2 || val == 4 || val == 8)
19277 {
19278 *total = ix86_cost->lea;
19279 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19280 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19281 outer_code);
19282 *total += rtx_cost (XEXP (x, 1), outer_code);
19283 return true;
19284 }
19285 }
19286 else if (GET_CODE (XEXP (x, 0)) == MULT
19287 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19288 {
19289 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19290 if (val == 2 || val == 4 || val == 8)
19291 {
19292 *total = ix86_cost->lea;
19293 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19294 *total += rtx_cost (XEXP (x, 1), outer_code);
19295 return true;
19296 }
19297 }
19298 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19299 {
19300 *total = ix86_cost->lea;
19301 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19302 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19303 *total += rtx_cost (XEXP (x, 1), outer_code);
19304 return true;
19305 }
19306 }
19307 /* FALLTHRU */
19308
19309 case MINUS:
19310 if (FLOAT_MODE_P (mode))
19311 {
19312 *total = ix86_cost->fadd;
19313 return false;
19314 }
19315 /* FALLTHRU */
19316
19317 case AND:
19318 case IOR:
19319 case XOR:
19320 if (!TARGET_64BIT && mode == DImode)
19321 {
19322 *total = (ix86_cost->add * 2
19323 + (rtx_cost (XEXP (x, 0), outer_code)
19324 << (GET_MODE (XEXP (x, 0)) != DImode))
19325 + (rtx_cost (XEXP (x, 1), outer_code)
19326 << (GET_MODE (XEXP (x, 1)) != DImode)));
19327 return true;
19328 }
19329 /* FALLTHRU */
19330
19331 case NEG:
19332 if (FLOAT_MODE_P (mode))
19333 {
19334 *total = ix86_cost->fchs;
19335 return false;
19336 }
19337 /* FALLTHRU */
19338
19339 case NOT:
19340 if (!TARGET_64BIT && mode == DImode)
19341 *total = ix86_cost->add * 2;
19342 else
19343 *total = ix86_cost->add;
19344 return false;
19345
19346 case COMPARE:
19347 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19348 && XEXP (XEXP (x, 0), 1) == const1_rtx
19349 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19350 && XEXP (x, 1) == const0_rtx)
19351 {
19352 /* This kind of construct is implemented using test[bwl].
19353 Treat it as if we had an AND. */
19354 *total = (ix86_cost->add
19355 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19356 + rtx_cost (const1_rtx, outer_code));
19357 return true;
19358 }
19359 return false;
19360
19361 case FLOAT_EXTEND:
19362 if (!TARGET_SSE_MATH
19363 || mode == XFmode
19364 || (mode == DFmode && !TARGET_SSE2))
19365 *total = 0;
19366 return false;
19367
19368 case ABS:
19369 if (FLOAT_MODE_P (mode))
19370 *total = ix86_cost->fabs;
19371 return false;
19372
19373 case SQRT:
19374 if (FLOAT_MODE_P (mode))
19375 *total = ix86_cost->fsqrt;
19376 return false;
19377
19378 case UNSPEC:
19379 if (XINT (x, 1) == UNSPEC_TP)
19380 *total = 0;
19381 return false;
19382
19383 default:
19384 return false;
19385 }
19386 }
19387
19388 #if TARGET_MACHO
19389
19390 static int current_machopic_label_num;
19391
19392 /* Given a symbol name and its associated stub, write out the
19393 definition of the stub. */
19394
19395 void
19396 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19397 {
19398 unsigned int length;
19399 char *binder_name, *symbol_name, lazy_ptr_name[32];
19400 int label = ++current_machopic_label_num;
19401
19402 /* For 64-bit we shouldn't get here. */
19403 gcc_assert (!TARGET_64BIT);
19404
19405 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19406 symb = (*targetm.strip_name_encoding) (symb);
19407
19408 length = strlen (stub);
19409 binder_name = alloca (length + 32);
19410 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19411
19412 length = strlen (symb);
19413 symbol_name = alloca (length + 32);
19414 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19415
19416 sprintf (lazy_ptr_name, "L%d$lz", label);
19417
19418 if (MACHOPIC_PURE)
19419 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19420 else
19421 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19422
19423 fprintf (file, "%s:\n", stub);
19424 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19425
19426 if (MACHOPIC_PURE)
19427 {
19428 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19429 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19430 fprintf (file, "\tjmp\t*%%edx\n");
19431 }
19432 else
19433 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19434
19435 fprintf (file, "%s:\n", binder_name);
19436
19437 if (MACHOPIC_PURE)
19438 {
19439 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19440 fprintf (file, "\tpushl\t%%eax\n");
19441 }
19442 else
19443 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19444
19445 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19446
19447 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19448 fprintf (file, "%s:\n", lazy_ptr_name);
19449 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19450 fprintf (file, "\t.long %s\n", binder_name);
19451 }
19452
19453 void
19454 darwin_x86_file_end (void)
19455 {
19456 darwin_file_end ();
19457 ix86_file_end ();
19458 }
19459 #endif /* TARGET_MACHO */
19460
19461 /* Order the registers for register allocator. */
19462
19463 void
19464 x86_order_regs_for_local_alloc (void)
19465 {
19466 int pos = 0;
19467 int i;
19468
19469 /* First allocate the local general purpose registers. */
19470 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19471 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19472 reg_alloc_order [pos++] = i;
19473
19474 /* Global general purpose registers. */
19475 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19476 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19477 reg_alloc_order [pos++] = i;
19478
19479 /* x87 registers come first in case we are doing FP math
19480 using them. */
19481 if (!TARGET_SSE_MATH)
19482 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19483 reg_alloc_order [pos++] = i;
19484
19485 /* SSE registers. */
19486 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19487 reg_alloc_order [pos++] = i;
19488 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19489 reg_alloc_order [pos++] = i;
19490
19491 /* x87 registers. */
19492 if (TARGET_SSE_MATH)
19493 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19494 reg_alloc_order [pos++] = i;
19495
19496 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19497 reg_alloc_order [pos++] = i;
19498
19499 /* Initialize the rest of array as we do not allocate some registers
19500 at all. */
19501 while (pos < FIRST_PSEUDO_REGISTER)
19502 reg_alloc_order [pos++] = 0;
19503 }
19504
19505 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19506 struct attribute_spec.handler. */
19507 static tree
19508 ix86_handle_struct_attribute (tree *node, tree name,
19509 tree args ATTRIBUTE_UNUSED,
19510 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19511 {
19512 tree *type = NULL;
19513 if (DECL_P (*node))
19514 {
19515 if (TREE_CODE (*node) == TYPE_DECL)
19516 type = &TREE_TYPE (*node);
19517 }
19518 else
19519 type = node;
19520
19521 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19522 || TREE_CODE (*type) == UNION_TYPE)))
19523 {
19524 warning (OPT_Wattributes, "%qs attribute ignored",
19525 IDENTIFIER_POINTER (name));
19526 *no_add_attrs = true;
19527 }
19528
19529 else if ((is_attribute_p ("ms_struct", name)
19530 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19531 || ((is_attribute_p ("gcc_struct", name)
19532 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19533 {
19534 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19535 IDENTIFIER_POINTER (name));
19536 *no_add_attrs = true;
19537 }
19538
19539 return NULL_TREE;
19540 }
19541
19542 static bool
19543 ix86_ms_bitfield_layout_p (tree record_type)
19544 {
19545 return (TARGET_MS_BITFIELD_LAYOUT &&
19546 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19547 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19548 }
19549
19550 /* Returns an expression indicating where the this parameter is
19551 located on entry to the FUNCTION. */
19552
19553 static rtx
19554 x86_this_parameter (tree function)
19555 {
19556 tree type = TREE_TYPE (function);
19557
19558 if (TARGET_64BIT)
19559 {
19560 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19561 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19562 }
19563
19564 if (ix86_function_regparm (type, function) > 0)
19565 {
19566 tree parm;
19567
19568 parm = TYPE_ARG_TYPES (type);
19569 /* Figure out whether or not the function has a variable number of
19570 arguments. */
19571 for (; parm; parm = TREE_CHAIN (parm))
19572 if (TREE_VALUE (parm) == void_type_node)
19573 break;
19574 /* If not, the this parameter is in the first argument. */
19575 if (parm)
19576 {
19577 int regno = 0;
19578 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19579 regno = 2;
19580 return gen_rtx_REG (SImode, regno);
19581 }
19582 }
19583
19584 if (aggregate_value_p (TREE_TYPE (type), type))
19585 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19586 else
19587 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19588 }
19589
19590 /* Determine whether x86_output_mi_thunk can succeed. */
19591
19592 static bool
19593 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19594 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19595 HOST_WIDE_INT vcall_offset, tree function)
19596 {
19597 /* 64-bit can handle anything. */
19598 if (TARGET_64BIT)
19599 return true;
19600
19601 /* For 32-bit, everything's fine if we have one free register. */
19602 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19603 return true;
19604
19605 /* Need a free register for vcall_offset. */
19606 if (vcall_offset)
19607 return false;
19608
19609 /* Need a free register for GOT references. */
19610 if (flag_pic && !(*targetm.binds_local_p) (function))
19611 return false;
19612
19613 /* Otherwise ok. */
19614 return true;
19615 }
19616
19617 /* Output the assembler code for a thunk function. THUNK_DECL is the
19618 declaration for the thunk function itself, FUNCTION is the decl for
19619 the target function. DELTA is an immediate constant offset to be
19620 added to THIS. If VCALL_OFFSET is nonzero, the word at
19621 *(*this + vcall_offset) should be added to THIS. */
19622
19623 static void
19624 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19625 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19626 HOST_WIDE_INT vcall_offset, tree function)
19627 {
19628 rtx xops[3];
19629 rtx this = x86_this_parameter (function);
19630 rtx this_reg, tmp;
19631
19632 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19633 pull it in now and let DELTA benefit. */
19634 if (REG_P (this))
19635 this_reg = this;
19636 else if (vcall_offset)
19637 {
19638 /* Put the this parameter into %eax. */
19639 xops[0] = this;
19640 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19641 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19642 }
19643 else
19644 this_reg = NULL_RTX;
19645
19646 /* Adjust the this parameter by a fixed constant. */
19647 if (delta)
19648 {
19649 xops[0] = GEN_INT (delta);
19650 xops[1] = this_reg ? this_reg : this;
19651 if (TARGET_64BIT)
19652 {
19653 if (!x86_64_general_operand (xops[0], DImode))
19654 {
19655 tmp = gen_rtx_REG (DImode, R10_REG);
19656 xops[1] = tmp;
19657 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19658 xops[0] = tmp;
19659 xops[1] = this;
19660 }
19661 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19662 }
19663 else
19664 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19665 }
19666
19667 /* Adjust the this parameter by a value stored in the vtable. */
19668 if (vcall_offset)
19669 {
19670 if (TARGET_64BIT)
19671 tmp = gen_rtx_REG (DImode, R10_REG);
19672 else
19673 {
19674 int tmp_regno = 2 /* ECX */;
19675 if (lookup_attribute ("fastcall",
19676 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19677 tmp_regno = 0 /* EAX */;
19678 tmp = gen_rtx_REG (SImode, tmp_regno);
19679 }
19680
19681 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19682 xops[1] = tmp;
19683 if (TARGET_64BIT)
19684 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19685 else
19686 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19687
19688 /* Adjust the this parameter. */
19689 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19690 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19691 {
19692 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19693 xops[0] = GEN_INT (vcall_offset);
19694 xops[1] = tmp2;
19695 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19696 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19697 }
19698 xops[1] = this_reg;
19699 if (TARGET_64BIT)
19700 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19701 else
19702 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19703 }
19704
19705 /* If necessary, drop THIS back to its stack slot. */
19706 if (this_reg && this_reg != this)
19707 {
19708 xops[0] = this_reg;
19709 xops[1] = this;
19710 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19711 }
19712
19713 xops[0] = XEXP (DECL_RTL (function), 0);
19714 if (TARGET_64BIT)
19715 {
19716 if (!flag_pic || (*targetm.binds_local_p) (function))
19717 output_asm_insn ("jmp\t%P0", xops);
19718 else
19719 {
19720 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19721 tmp = gen_rtx_CONST (Pmode, tmp);
19722 tmp = gen_rtx_MEM (QImode, tmp);
19723 xops[0] = tmp;
19724 output_asm_insn ("jmp\t%A0", xops);
19725 }
19726 }
19727 else
19728 {
19729 if (!flag_pic || (*targetm.binds_local_p) (function))
19730 output_asm_insn ("jmp\t%P0", xops);
19731 else
19732 #if TARGET_MACHO
19733 if (TARGET_MACHO)
19734 {
19735 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19736 tmp = (gen_rtx_SYMBOL_REF
19737 (Pmode,
19738 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19739 tmp = gen_rtx_MEM (QImode, tmp);
19740 xops[0] = tmp;
19741 output_asm_insn ("jmp\t%0", xops);
19742 }
19743 else
19744 #endif /* TARGET_MACHO */
19745 {
19746 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19747 output_set_got (tmp, NULL_RTX);
19748
19749 xops[1] = tmp;
19750 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19751 output_asm_insn ("jmp\t{*}%1", xops);
19752 }
19753 }
19754 }
19755
19756 static void
19757 x86_file_start (void)
19758 {
19759 default_file_start ();
19760 #if TARGET_MACHO
19761 darwin_file_start ();
19762 #endif
19763 if (X86_FILE_START_VERSION_DIRECTIVE)
19764 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19765 if (X86_FILE_START_FLTUSED)
19766 fputs ("\t.global\t__fltused\n", asm_out_file);
19767 if (ix86_asm_dialect == ASM_INTEL)
19768 fputs ("\t.intel_syntax\n", asm_out_file);
19769 }
19770
19771 int
19772 x86_field_alignment (tree field, int computed)
19773 {
19774 enum machine_mode mode;
19775 tree type = TREE_TYPE (field);
19776
19777 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19778 return computed;
19779 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19780 ? get_inner_array_type (type) : type);
19781 if (mode == DFmode || mode == DCmode
19782 || GET_MODE_CLASS (mode) == MODE_INT
19783 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19784 return MIN (32, computed);
19785 return computed;
19786 }
19787
19788 /* Output assembler code to FILE to increment profiler label # LABELNO
19789 for profiling a function entry. */
19790 void
19791 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19792 {
19793 if (TARGET_64BIT)
19794 if (flag_pic)
19795 {
19796 #ifndef NO_PROFILE_COUNTERS
19797 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19798 #endif
19799 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19800 }
19801 else
19802 {
19803 #ifndef NO_PROFILE_COUNTERS
19804 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19805 #endif
19806 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19807 }
19808 else if (flag_pic)
19809 {
19810 #ifndef NO_PROFILE_COUNTERS
19811 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19812 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19813 #endif
19814 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19815 }
19816 else
19817 {
19818 #ifndef NO_PROFILE_COUNTERS
19819 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19820 PROFILE_COUNT_REGISTER);
19821 #endif
19822 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19823 }
19824 }
19825
19826 /* We don't have exact information about the insn sizes, but we may assume
19827 quite safely that we are informed about all 1 byte insns and memory
19828 address sizes. This is enough to eliminate unnecessary padding in
19829 99% of cases. */
19830
19831 static int
19832 min_insn_size (rtx insn)
19833 {
19834 int l = 0;
19835
19836 if (!INSN_P (insn) || !active_insn_p (insn))
19837 return 0;
19838
19839 /* Discard alignments we've emit and jump instructions. */
19840 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19841 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19842 return 0;
19843 if (JUMP_P (insn)
19844 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19845 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19846 return 0;
19847
19848 /* Important case - calls are always 5 bytes.
19849 It is common to have many calls in the row. */
19850 if (CALL_P (insn)
19851 && symbolic_reference_mentioned_p (PATTERN (insn))
19852 && !SIBLING_CALL_P (insn))
19853 return 5;
19854 if (get_attr_length (insn) <= 1)
19855 return 1;
19856
19857 /* For normal instructions we may rely on the sizes of addresses
19858 and the presence of symbol to require 4 bytes of encoding.
19859 This is not the case for jumps where references are PC relative. */
19860 if (!JUMP_P (insn))
19861 {
19862 l = get_attr_length_address (insn);
19863 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19864 l = 4;
19865 }
19866 if (l)
19867 return 1+l;
19868 else
19869 return 2;
19870 }
19871
19872 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19873 window. */
19874
19875 static void
19876 ix86_avoid_jump_misspredicts (void)
19877 {
19878 rtx insn, start = get_insns ();
19879 int nbytes = 0, njumps = 0;
19880 int isjump = 0;
19881
19882 /* Look for all minimal intervals of instructions containing 4 jumps.
19883 The intervals are bounded by START and INSN. NBYTES is the total
19884 size of instructions in the interval including INSN and not including
19885 START. When the NBYTES is smaller than 16 bytes, it is possible
19886 that the end of START and INSN ends up in the same 16byte page.
19887
19888 The smallest offset in the page INSN can start is the case where START
19889 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19890 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19891 */
19892 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19893 {
19894
19895 nbytes += min_insn_size (insn);
19896 if (dump_file)
19897 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19898 INSN_UID (insn), min_insn_size (insn));
19899 if ((JUMP_P (insn)
19900 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19901 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19902 || CALL_P (insn))
19903 njumps++;
19904 else
19905 continue;
19906
19907 while (njumps > 3)
19908 {
19909 start = NEXT_INSN (start);
19910 if ((JUMP_P (start)
19911 && GET_CODE (PATTERN (start)) != ADDR_VEC
19912 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19913 || CALL_P (start))
19914 njumps--, isjump = 1;
19915 else
19916 isjump = 0;
19917 nbytes -= min_insn_size (start);
19918 }
19919 gcc_assert (njumps >= 0);
19920 if (dump_file)
19921 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19922 INSN_UID (start), INSN_UID (insn), nbytes);
19923
19924 if (njumps == 3 && isjump && nbytes < 16)
19925 {
19926 int padsize = 15 - nbytes + min_insn_size (insn);
19927
19928 if (dump_file)
19929 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19930 INSN_UID (insn), padsize);
19931 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19932 }
19933 }
19934 }
19935
19936 /* AMD Athlon works faster
19937 when RET is not destination of conditional jump or directly preceded
19938 by other jump instruction. We avoid the penalty by inserting NOP just
19939 before the RET instructions in such cases. */
19940 static void
19941 ix86_pad_returns (void)
19942 {
19943 edge e;
19944 edge_iterator ei;
19945
19946 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19947 {
19948 basic_block bb = e->src;
19949 rtx ret = BB_END (bb);
19950 rtx prev;
19951 bool replace = false;
19952
19953 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19954 || !maybe_hot_bb_p (bb))
19955 continue;
19956 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19957 if (active_insn_p (prev) || LABEL_P (prev))
19958 break;
19959 if (prev && LABEL_P (prev))
19960 {
19961 edge e;
19962 edge_iterator ei;
19963
19964 FOR_EACH_EDGE (e, ei, bb->preds)
19965 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19966 && !(e->flags & EDGE_FALLTHRU))
19967 replace = true;
19968 }
19969 if (!replace)
19970 {
19971 prev = prev_active_insn (ret);
19972 if (prev
19973 && ((JUMP_P (prev) && any_condjump_p (prev))
19974 || CALL_P (prev)))
19975 replace = true;
19976 /* Empty functions get branch mispredict even when the jump destination
19977 is not visible to us. */
19978 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19979 replace = true;
19980 }
19981 if (replace)
19982 {
19983 emit_insn_before (gen_return_internal_long (), ret);
19984 delete_insn (ret);
19985 }
19986 }
19987 }
19988
19989 /* Implement machine specific optimizations. We implement padding of returns
19990 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19991 static void
19992 ix86_reorg (void)
19993 {
19994 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19995 ix86_pad_returns ();
19996 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19997 ix86_avoid_jump_misspredicts ();
19998 }
19999
20000 /* Return nonzero when QImode register that must be represented via REX prefix
20001 is used. */
20002 bool
20003 x86_extended_QIreg_mentioned_p (rtx insn)
20004 {
20005 int i;
20006 extract_insn_cached (insn);
20007 for (i = 0; i < recog_data.n_operands; i++)
20008 if (REG_P (recog_data.operand[i])
20009 && REGNO (recog_data.operand[i]) >= 4)
20010 return true;
20011 return false;
20012 }
20013
20014 /* Return nonzero when P points to register encoded via REX prefix.
20015 Called via for_each_rtx. */
20016 static int
20017 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20018 {
20019 unsigned int regno;
20020 if (!REG_P (*p))
20021 return 0;
20022 regno = REGNO (*p);
20023 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20024 }
20025
20026 /* Return true when INSN mentions register that must be encoded using REX
20027 prefix. */
20028 bool
20029 x86_extended_reg_mentioned_p (rtx insn)
20030 {
20031 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20032 }
20033
20034 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20035 optabs would emit if we didn't have TFmode patterns. */
20036
20037 void
20038 x86_emit_floatuns (rtx operands[2])
20039 {
20040 rtx neglab, donelab, i0, i1, f0, in, out;
20041 enum machine_mode mode, inmode;
20042
20043 inmode = GET_MODE (operands[1]);
20044 gcc_assert (inmode == SImode || inmode == DImode);
20045
20046 out = operands[0];
20047 in = force_reg (inmode, operands[1]);
20048 mode = GET_MODE (out);
20049 neglab = gen_label_rtx ();
20050 donelab = gen_label_rtx ();
20051 f0 = gen_reg_rtx (mode);
20052
20053 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20054
20055 expand_float (out, in, 0);
20056
20057 emit_jump_insn (gen_jump (donelab));
20058 emit_barrier ();
20059
20060 emit_label (neglab);
20061
20062 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20063 1, OPTAB_DIRECT);
20064 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20065 1, OPTAB_DIRECT);
20066 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20067
20068 expand_float (f0, i0, 0);
20069
20070 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20071
20072 emit_label (donelab);
20073 }
20074 \f
20075 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20076 with all elements equal to VAR. Return true if successful. */
20077
20078 static bool
20079 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20080 rtx target, rtx val)
20081 {
20082 enum machine_mode smode, wsmode, wvmode;
20083 rtx x;
20084
20085 switch (mode)
20086 {
20087 case V2SImode:
20088 case V2SFmode:
20089 if (!mmx_ok)
20090 return false;
20091 /* FALLTHRU */
20092
20093 case V2DFmode:
20094 case V2DImode:
20095 case V4SFmode:
20096 case V4SImode:
20097 val = force_reg (GET_MODE_INNER (mode), val);
20098 x = gen_rtx_VEC_DUPLICATE (mode, val);
20099 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20100 return true;
20101
20102 case V4HImode:
20103 if (!mmx_ok)
20104 return false;
20105 if (TARGET_SSE || TARGET_3DNOW_A)
20106 {
20107 val = gen_lowpart (SImode, val);
20108 x = gen_rtx_TRUNCATE (HImode, val);
20109 x = gen_rtx_VEC_DUPLICATE (mode, x);
20110 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20111 return true;
20112 }
20113 else
20114 {
20115 smode = HImode;
20116 wsmode = SImode;
20117 wvmode = V2SImode;
20118 goto widen;
20119 }
20120
20121 case V8QImode:
20122 if (!mmx_ok)
20123 return false;
20124 smode = QImode;
20125 wsmode = HImode;
20126 wvmode = V4HImode;
20127 goto widen;
20128 case V8HImode:
20129 if (TARGET_SSE2)
20130 {
20131 rtx tmp1, tmp2;
20132 /* Extend HImode to SImode using a paradoxical SUBREG. */
20133 tmp1 = gen_reg_rtx (SImode);
20134 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20135 /* Insert the SImode value as low element of V4SImode vector. */
20136 tmp2 = gen_reg_rtx (V4SImode);
20137 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20138 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20139 CONST0_RTX (V4SImode),
20140 const1_rtx);
20141 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20142 /* Cast the V4SImode vector back to a V8HImode vector. */
20143 tmp1 = gen_reg_rtx (V8HImode);
20144 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20145 /* Duplicate the low short through the whole low SImode word. */
20146 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20147 /* Cast the V8HImode vector back to a V4SImode vector. */
20148 tmp2 = gen_reg_rtx (V4SImode);
20149 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20150 /* Replicate the low element of the V4SImode vector. */
20151 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20152 /* Cast the V2SImode back to V8HImode, and store in target. */
20153 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20154 return true;
20155 }
20156 smode = HImode;
20157 wsmode = SImode;
20158 wvmode = V4SImode;
20159 goto widen;
20160 case V16QImode:
20161 if (TARGET_SSE2)
20162 {
20163 rtx tmp1, tmp2;
20164 /* Extend QImode to SImode using a paradoxical SUBREG. */
20165 tmp1 = gen_reg_rtx (SImode);
20166 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20167 /* Insert the SImode value as low element of V4SImode vector. */
20168 tmp2 = gen_reg_rtx (V4SImode);
20169 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20170 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20171 CONST0_RTX (V4SImode),
20172 const1_rtx);
20173 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20174 /* Cast the V4SImode vector back to a V16QImode vector. */
20175 tmp1 = gen_reg_rtx (V16QImode);
20176 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20177 /* Duplicate the low byte through the whole low SImode word. */
20178 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20179 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20180 /* Cast the V16QImode vector back to a V4SImode vector. */
20181 tmp2 = gen_reg_rtx (V4SImode);
20182 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20183 /* Replicate the low element of the V4SImode vector. */
20184 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20185 /* Cast the V2SImode back to V16QImode, and store in target. */
20186 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20187 return true;
20188 }
20189 smode = QImode;
20190 wsmode = HImode;
20191 wvmode = V8HImode;
20192 goto widen;
20193 widen:
20194 /* Replicate the value once into the next wider mode and recurse. */
20195 val = convert_modes (wsmode, smode, val, true);
20196 x = expand_simple_binop (wsmode, ASHIFT, val,
20197 GEN_INT (GET_MODE_BITSIZE (smode)),
20198 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20199 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20200
20201 x = gen_reg_rtx (wvmode);
20202 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20203 gcc_unreachable ();
20204 emit_move_insn (target, gen_lowpart (mode, x));
20205 return true;
20206
20207 default:
20208 return false;
20209 }
20210 }
20211
20212 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20213 whose ONE_VAR element is VAR, and other elements are zero. Return true
20214 if successful. */
20215
20216 static bool
20217 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20218 rtx target, rtx var, int one_var)
20219 {
20220 enum machine_mode vsimode;
20221 rtx new_target;
20222 rtx x, tmp;
20223
20224 switch (mode)
20225 {
20226 case V2SFmode:
20227 case V2SImode:
20228 if (!mmx_ok)
20229 return false;
20230 /* FALLTHRU */
20231
20232 case V2DFmode:
20233 case V2DImode:
20234 if (one_var != 0)
20235 return false;
20236 var = force_reg (GET_MODE_INNER (mode), var);
20237 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20238 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20239 return true;
20240
20241 case V4SFmode:
20242 case V4SImode:
20243 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20244 new_target = gen_reg_rtx (mode);
20245 else
20246 new_target = target;
20247 var = force_reg (GET_MODE_INNER (mode), var);
20248 x = gen_rtx_VEC_DUPLICATE (mode, var);
20249 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20250 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20251 if (one_var != 0)
20252 {
20253 /* We need to shuffle the value to the correct position, so
20254 create a new pseudo to store the intermediate result. */
20255
20256 /* With SSE2, we can use the integer shuffle insns. */
20257 if (mode != V4SFmode && TARGET_SSE2)
20258 {
20259 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20260 GEN_INT (1),
20261 GEN_INT (one_var == 1 ? 0 : 1),
20262 GEN_INT (one_var == 2 ? 0 : 1),
20263 GEN_INT (one_var == 3 ? 0 : 1)));
20264 if (target != new_target)
20265 emit_move_insn (target, new_target);
20266 return true;
20267 }
20268
20269 /* Otherwise convert the intermediate result to V4SFmode and
20270 use the SSE1 shuffle instructions. */
20271 if (mode != V4SFmode)
20272 {
20273 tmp = gen_reg_rtx (V4SFmode);
20274 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20275 }
20276 else
20277 tmp = new_target;
20278
20279 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20280 GEN_INT (1),
20281 GEN_INT (one_var == 1 ? 0 : 1),
20282 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20283 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20284
20285 if (mode != V4SFmode)
20286 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20287 else if (tmp != target)
20288 emit_move_insn (target, tmp);
20289 }
20290 else if (target != new_target)
20291 emit_move_insn (target, new_target);
20292 return true;
20293
20294 case V8HImode:
20295 case V16QImode:
20296 vsimode = V4SImode;
20297 goto widen;
20298 case V4HImode:
20299 case V8QImode:
20300 if (!mmx_ok)
20301 return false;
20302 vsimode = V2SImode;
20303 goto widen;
20304 widen:
20305 if (one_var != 0)
20306 return false;
20307
20308 /* Zero extend the variable element to SImode and recurse. */
20309 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20310
20311 x = gen_reg_rtx (vsimode);
20312 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20313 var, one_var))
20314 gcc_unreachable ();
20315
20316 emit_move_insn (target, gen_lowpart (mode, x));
20317 return true;
20318
20319 default:
20320 return false;
20321 }
20322 }
20323
20324 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20325 consisting of the values in VALS. It is known that all elements
20326 except ONE_VAR are constants. Return true if successful. */
20327
20328 static bool
20329 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20330 rtx target, rtx vals, int one_var)
20331 {
20332 rtx var = XVECEXP (vals, 0, one_var);
20333 enum machine_mode wmode;
20334 rtx const_vec, x;
20335
20336 const_vec = copy_rtx (vals);
20337 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20338 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20339
20340 switch (mode)
20341 {
20342 case V2DFmode:
20343 case V2DImode:
20344 case V2SFmode:
20345 case V2SImode:
20346 /* For the two element vectors, it's just as easy to use
20347 the general case. */
20348 return false;
20349
20350 case V4SFmode:
20351 case V4SImode:
20352 case V8HImode:
20353 case V4HImode:
20354 break;
20355
20356 case V16QImode:
20357 wmode = V8HImode;
20358 goto widen;
20359 case V8QImode:
20360 wmode = V4HImode;
20361 goto widen;
20362 widen:
20363 /* There's no way to set one QImode entry easily. Combine
20364 the variable value with its adjacent constant value, and
20365 promote to an HImode set. */
20366 x = XVECEXP (vals, 0, one_var ^ 1);
20367 if (one_var & 1)
20368 {
20369 var = convert_modes (HImode, QImode, var, true);
20370 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20371 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20372 x = GEN_INT (INTVAL (x) & 0xff);
20373 }
20374 else
20375 {
20376 var = convert_modes (HImode, QImode, var, true);
20377 x = gen_int_mode (INTVAL (x) << 8, HImode);
20378 }
20379 if (x != const0_rtx)
20380 var = expand_simple_binop (HImode, IOR, var, x, var,
20381 1, OPTAB_LIB_WIDEN);
20382
20383 x = gen_reg_rtx (wmode);
20384 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20385 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20386
20387 emit_move_insn (target, gen_lowpart (mode, x));
20388 return true;
20389
20390 default:
20391 return false;
20392 }
20393
20394 emit_move_insn (target, const_vec);
20395 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20396 return true;
20397 }
20398
20399 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20400 all values variable, and none identical. */
20401
20402 static void
20403 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20404 rtx target, rtx vals)
20405 {
20406 enum machine_mode half_mode = GET_MODE_INNER (mode);
20407 rtx op0 = NULL, op1 = NULL;
20408 bool use_vec_concat = false;
20409
20410 switch (mode)
20411 {
20412 case V2SFmode:
20413 case V2SImode:
20414 if (!mmx_ok && !TARGET_SSE)
20415 break;
20416 /* FALLTHRU */
20417
20418 case V2DFmode:
20419 case V2DImode:
20420 /* For the two element vectors, we always implement VEC_CONCAT. */
20421 op0 = XVECEXP (vals, 0, 0);
20422 op1 = XVECEXP (vals, 0, 1);
20423 use_vec_concat = true;
20424 break;
20425
20426 case V4SFmode:
20427 half_mode = V2SFmode;
20428 goto half;
20429 case V4SImode:
20430 half_mode = V2SImode;
20431 goto half;
20432 half:
20433 {
20434 rtvec v;
20435
20436 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20437 Recurse to load the two halves. */
20438
20439 op0 = gen_reg_rtx (half_mode);
20440 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20441 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20442
20443 op1 = gen_reg_rtx (half_mode);
20444 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20445 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20446
20447 use_vec_concat = true;
20448 }
20449 break;
20450
20451 case V8HImode:
20452 case V16QImode:
20453 case V4HImode:
20454 case V8QImode:
20455 break;
20456
20457 default:
20458 gcc_unreachable ();
20459 }
20460
20461 if (use_vec_concat)
20462 {
20463 if (!register_operand (op0, half_mode))
20464 op0 = force_reg (half_mode, op0);
20465 if (!register_operand (op1, half_mode))
20466 op1 = force_reg (half_mode, op1);
20467
20468 emit_insn (gen_rtx_SET (VOIDmode, target,
20469 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20470 }
20471 else
20472 {
20473 int i, j, n_elts, n_words, n_elt_per_word;
20474 enum machine_mode inner_mode;
20475 rtx words[4], shift;
20476
20477 inner_mode = GET_MODE_INNER (mode);
20478 n_elts = GET_MODE_NUNITS (mode);
20479 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20480 n_elt_per_word = n_elts / n_words;
20481 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20482
20483 for (i = 0; i < n_words; ++i)
20484 {
20485 rtx word = NULL_RTX;
20486
20487 for (j = 0; j < n_elt_per_word; ++j)
20488 {
20489 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20490 elt = convert_modes (word_mode, inner_mode, elt, true);
20491
20492 if (j == 0)
20493 word = elt;
20494 else
20495 {
20496 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20497 word, 1, OPTAB_LIB_WIDEN);
20498 word = expand_simple_binop (word_mode, IOR, word, elt,
20499 word, 1, OPTAB_LIB_WIDEN);
20500 }
20501 }
20502
20503 words[i] = word;
20504 }
20505
20506 if (n_words == 1)
20507 emit_move_insn (target, gen_lowpart (mode, words[0]));
20508 else if (n_words == 2)
20509 {
20510 rtx tmp = gen_reg_rtx (mode);
20511 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20512 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20513 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20514 emit_move_insn (target, tmp);
20515 }
20516 else if (n_words == 4)
20517 {
20518 rtx tmp = gen_reg_rtx (V4SImode);
20519 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20520 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20521 emit_move_insn (target, gen_lowpart (mode, tmp));
20522 }
20523 else
20524 gcc_unreachable ();
20525 }
20526 }
20527
20528 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20529 instructions unless MMX_OK is true. */
20530
20531 void
20532 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20533 {
20534 enum machine_mode mode = GET_MODE (target);
20535 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20536 int n_elts = GET_MODE_NUNITS (mode);
20537 int n_var = 0, one_var = -1;
20538 bool all_same = true, all_const_zero = true;
20539 int i;
20540 rtx x;
20541
20542 for (i = 0; i < n_elts; ++i)
20543 {
20544 x = XVECEXP (vals, 0, i);
20545 if (!CONSTANT_P (x))
20546 n_var++, one_var = i;
20547 else if (x != CONST0_RTX (inner_mode))
20548 all_const_zero = false;
20549 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20550 all_same = false;
20551 }
20552
20553 /* Constants are best loaded from the constant pool. */
20554 if (n_var == 0)
20555 {
20556 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20557 return;
20558 }
20559
20560 /* If all values are identical, broadcast the value. */
20561 if (all_same
20562 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20563 XVECEXP (vals, 0, 0)))
20564 return;
20565
20566 /* Values where only one field is non-constant are best loaded from
20567 the pool and overwritten via move later. */
20568 if (n_var == 1)
20569 {
20570 if (all_const_zero
20571 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20572 XVECEXP (vals, 0, one_var),
20573 one_var))
20574 return;
20575
20576 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20577 return;
20578 }
20579
20580 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20581 }
20582
20583 void
20584 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20585 {
20586 enum machine_mode mode = GET_MODE (target);
20587 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20588 bool use_vec_merge = false;
20589 rtx tmp;
20590
20591 switch (mode)
20592 {
20593 case V2SFmode:
20594 case V2SImode:
20595 if (mmx_ok)
20596 {
20597 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20598 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20599 if (elt == 0)
20600 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20601 else
20602 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20603 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20604 return;
20605 }
20606 break;
20607
20608 case V2DFmode:
20609 case V2DImode:
20610 {
20611 rtx op0, op1;
20612
20613 /* For the two element vectors, we implement a VEC_CONCAT with
20614 the extraction of the other element. */
20615
20616 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20617 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20618
20619 if (elt == 0)
20620 op0 = val, op1 = tmp;
20621 else
20622 op0 = tmp, op1 = val;
20623
20624 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20625 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20626 }
20627 return;
20628
20629 case V4SFmode:
20630 switch (elt)
20631 {
20632 case 0:
20633 use_vec_merge = true;
20634 break;
20635
20636 case 1:
20637 /* tmp = target = A B C D */
20638 tmp = copy_to_reg (target);
20639 /* target = A A B B */
20640 emit_insn (gen_sse_unpcklps (target, target, target));
20641 /* target = X A B B */
20642 ix86_expand_vector_set (false, target, val, 0);
20643 /* target = A X C D */
20644 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20645 GEN_INT (1), GEN_INT (0),
20646 GEN_INT (2+4), GEN_INT (3+4)));
20647 return;
20648
20649 case 2:
20650 /* tmp = target = A B C D */
20651 tmp = copy_to_reg (target);
20652 /* tmp = X B C D */
20653 ix86_expand_vector_set (false, tmp, val, 0);
20654 /* target = A B X D */
20655 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20656 GEN_INT (0), GEN_INT (1),
20657 GEN_INT (0+4), GEN_INT (3+4)));
20658 return;
20659
20660 case 3:
20661 /* tmp = target = A B C D */
20662 tmp = copy_to_reg (target);
20663 /* tmp = X B C D */
20664 ix86_expand_vector_set (false, tmp, val, 0);
20665 /* target = A B X D */
20666 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20667 GEN_INT (0), GEN_INT (1),
20668 GEN_INT (2+4), GEN_INT (0+4)));
20669 return;
20670
20671 default:
20672 gcc_unreachable ();
20673 }
20674 break;
20675
20676 case V4SImode:
20677 /* Element 0 handled by vec_merge below. */
20678 if (elt == 0)
20679 {
20680 use_vec_merge = true;
20681 break;
20682 }
20683
20684 if (TARGET_SSE2)
20685 {
20686 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20687 store into element 0, then shuffle them back. */
20688
20689 rtx order[4];
20690
20691 order[0] = GEN_INT (elt);
20692 order[1] = const1_rtx;
20693 order[2] = const2_rtx;
20694 order[3] = GEN_INT (3);
20695 order[elt] = const0_rtx;
20696
20697 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20698 order[1], order[2], order[3]));
20699
20700 ix86_expand_vector_set (false, target, val, 0);
20701
20702 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20703 order[1], order[2], order[3]));
20704 }
20705 else
20706 {
20707 /* For SSE1, we have to reuse the V4SF code. */
20708 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20709 gen_lowpart (SFmode, val), elt);
20710 }
20711 return;
20712
20713 case V8HImode:
20714 use_vec_merge = TARGET_SSE2;
20715 break;
20716 case V4HImode:
20717 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20718 break;
20719
20720 case V16QImode:
20721 case V8QImode:
20722 default:
20723 break;
20724 }
20725
20726 if (use_vec_merge)
20727 {
20728 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20729 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20730 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20731 }
20732 else
20733 {
20734 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20735
20736 emit_move_insn (mem, target);
20737
20738 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20739 emit_move_insn (tmp, val);
20740
20741 emit_move_insn (target, mem);
20742 }
20743 }
20744
20745 void
20746 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20747 {
20748 enum machine_mode mode = GET_MODE (vec);
20749 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20750 bool use_vec_extr = false;
20751 rtx tmp;
20752
20753 switch (mode)
20754 {
20755 case V2SImode:
20756 case V2SFmode:
20757 if (!mmx_ok)
20758 break;
20759 /* FALLTHRU */
20760
20761 case V2DFmode:
20762 case V2DImode:
20763 use_vec_extr = true;
20764 break;
20765
20766 case V4SFmode:
20767 switch (elt)
20768 {
20769 case 0:
20770 tmp = vec;
20771 break;
20772
20773 case 1:
20774 case 3:
20775 tmp = gen_reg_rtx (mode);
20776 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20777 GEN_INT (elt), GEN_INT (elt),
20778 GEN_INT (elt+4), GEN_INT (elt+4)));
20779 break;
20780
20781 case 2:
20782 tmp = gen_reg_rtx (mode);
20783 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20784 break;
20785
20786 default:
20787 gcc_unreachable ();
20788 }
20789 vec = tmp;
20790 use_vec_extr = true;
20791 elt = 0;
20792 break;
20793
20794 case V4SImode:
20795 if (TARGET_SSE2)
20796 {
20797 switch (elt)
20798 {
20799 case 0:
20800 tmp = vec;
20801 break;
20802
20803 case 1:
20804 case 3:
20805 tmp = gen_reg_rtx (mode);
20806 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20807 GEN_INT (elt), GEN_INT (elt),
20808 GEN_INT (elt), GEN_INT (elt)));
20809 break;
20810
20811 case 2:
20812 tmp = gen_reg_rtx (mode);
20813 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20814 break;
20815
20816 default:
20817 gcc_unreachable ();
20818 }
20819 vec = tmp;
20820 use_vec_extr = true;
20821 elt = 0;
20822 }
20823 else
20824 {
20825 /* For SSE1, we have to reuse the V4SF code. */
20826 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20827 gen_lowpart (V4SFmode, vec), elt);
20828 return;
20829 }
20830 break;
20831
20832 case V8HImode:
20833 use_vec_extr = TARGET_SSE2;
20834 break;
20835 case V4HImode:
20836 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20837 break;
20838
20839 case V16QImode:
20840 case V8QImode:
20841 /* ??? Could extract the appropriate HImode element and shift. */
20842 default:
20843 break;
20844 }
20845
20846 if (use_vec_extr)
20847 {
20848 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20849 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20850
20851 /* Let the rtl optimizers know about the zero extension performed. */
20852 if (inner_mode == HImode)
20853 {
20854 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20855 target = gen_lowpart (SImode, target);
20856 }
20857
20858 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20859 }
20860 else
20861 {
20862 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20863
20864 emit_move_insn (mem, vec);
20865
20866 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20867 emit_move_insn (target, tmp);
20868 }
20869 }
20870
20871 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20872 pattern to reduce; DEST is the destination; IN is the input vector. */
20873
20874 void
20875 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20876 {
20877 rtx tmp1, tmp2, tmp3;
20878
20879 tmp1 = gen_reg_rtx (V4SFmode);
20880 tmp2 = gen_reg_rtx (V4SFmode);
20881 tmp3 = gen_reg_rtx (V4SFmode);
20882
20883 emit_insn (gen_sse_movhlps (tmp1, in, in));
20884 emit_insn (fn (tmp2, tmp1, in));
20885
20886 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20887 GEN_INT (1), GEN_INT (1),
20888 GEN_INT (1+4), GEN_INT (1+4)));
20889 emit_insn (fn (dest, tmp2, tmp3));
20890 }
20891 \f
20892 /* Target hook for scalar_mode_supported_p. */
20893 static bool
20894 ix86_scalar_mode_supported_p (enum machine_mode mode)
20895 {
20896 if (DECIMAL_FLOAT_MODE_P (mode))
20897 return true;
20898 else
20899 return default_scalar_mode_supported_p (mode);
20900 }
20901
20902 /* Implements target hook vector_mode_supported_p. */
20903 static bool
20904 ix86_vector_mode_supported_p (enum machine_mode mode)
20905 {
20906 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20907 return true;
20908 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20909 return true;
20910 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20911 return true;
20912 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20913 return true;
20914 return false;
20915 }
20916
20917 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20918
20919 We do this in the new i386 backend to maintain source compatibility
20920 with the old cc0-based compiler. */
20921
20922 static tree
20923 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20924 tree inputs ATTRIBUTE_UNUSED,
20925 tree clobbers)
20926 {
20927 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20928 clobbers);
20929 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20930 clobbers);
20931 return clobbers;
20932 }
20933
20934 /* Return true if this goes in small data/bss. */
20935
20936 static bool
20937 ix86_in_large_data_p (tree exp)
20938 {
20939 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20940 return false;
20941
20942 /* Functions are never large data. */
20943 if (TREE_CODE (exp) == FUNCTION_DECL)
20944 return false;
20945
20946 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20947 {
20948 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20949 if (strcmp (section, ".ldata") == 0
20950 || strcmp (section, ".lbss") == 0)
20951 return true;
20952 return false;
20953 }
20954 else
20955 {
20956 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20957
20958 /* If this is an incomplete type with size 0, then we can't put it
20959 in data because it might be too big when completed. */
20960 if (!size || size > ix86_section_threshold)
20961 return true;
20962 }
20963
20964 return false;
20965 }
20966 static void
20967 ix86_encode_section_info (tree decl, rtx rtl, int first)
20968 {
20969 default_encode_section_info (decl, rtl, first);
20970
20971 if (TREE_CODE (decl) == VAR_DECL
20972 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20973 && ix86_in_large_data_p (decl))
20974 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20975 }
20976
20977 /* Worker function for REVERSE_CONDITION. */
20978
20979 enum rtx_code
20980 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20981 {
20982 return (mode != CCFPmode && mode != CCFPUmode
20983 ? reverse_condition (code)
20984 : reverse_condition_maybe_unordered (code));
20985 }
20986
20987 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20988 to OPERANDS[0]. */
20989
20990 const char *
20991 output_387_reg_move (rtx insn, rtx *operands)
20992 {
20993 if (REG_P (operands[1])
20994 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20995 {
20996 if (REGNO (operands[0]) == FIRST_STACK_REG)
20997 return output_387_ffreep (operands, 0);
20998 return "fstp\t%y0";
20999 }
21000 if (STACK_TOP_P (operands[0]))
21001 return "fld%z1\t%y1";
21002 return "fst\t%y0";
21003 }
21004
21005 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21006 FP status register is set. */
21007
21008 void
21009 ix86_emit_fp_unordered_jump (rtx label)
21010 {
21011 rtx reg = gen_reg_rtx (HImode);
21012 rtx temp;
21013
21014 emit_insn (gen_x86_fnstsw_1 (reg));
21015
21016 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21017 {
21018 emit_insn (gen_x86_sahf_1 (reg));
21019
21020 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21021 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21022 }
21023 else
21024 {
21025 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21026
21027 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21028 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21029 }
21030
21031 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21032 gen_rtx_LABEL_REF (VOIDmode, label),
21033 pc_rtx);
21034 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21035 emit_jump_insn (temp);
21036 }
21037
21038 /* Output code to perform a log1p XFmode calculation. */
21039
21040 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21041 {
21042 rtx label1 = gen_label_rtx ();
21043 rtx label2 = gen_label_rtx ();
21044
21045 rtx tmp = gen_reg_rtx (XFmode);
21046 rtx tmp2 = gen_reg_rtx (XFmode);
21047
21048 emit_insn (gen_absxf2 (tmp, op1));
21049 emit_insn (gen_cmpxf (tmp,
21050 CONST_DOUBLE_FROM_REAL_VALUE (
21051 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21052 XFmode)));
21053 emit_jump_insn (gen_bge (label1));
21054
21055 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21056 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21057 emit_jump (label2);
21058
21059 emit_label (label1);
21060 emit_move_insn (tmp, CONST1_RTX (XFmode));
21061 emit_insn (gen_addxf3 (tmp, op1, tmp));
21062 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21063 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21064
21065 emit_label (label2);
21066 }
21067
21068 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21069
21070 static void
21071 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21072 tree decl)
21073 {
21074 /* With Binutils 2.15, the "@unwind" marker must be specified on
21075 every occurrence of the ".eh_frame" section, not just the first
21076 one. */
21077 if (TARGET_64BIT
21078 && strcmp (name, ".eh_frame") == 0)
21079 {
21080 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21081 flags & SECTION_WRITE ? "aw" : "a");
21082 return;
21083 }
21084 default_elf_asm_named_section (name, flags, decl);
21085 }
21086
21087 /* Return the mangling of TYPE if it is an extended fundamental type. */
21088
21089 static const char *
21090 ix86_mangle_fundamental_type (tree type)
21091 {
21092 switch (TYPE_MODE (type))
21093 {
21094 case TFmode:
21095 /* __float128 is "g". */
21096 return "g";
21097 case XFmode:
21098 /* "long double" or __float80 is "e". */
21099 return "e";
21100 default:
21101 return NULL;
21102 }
21103 }
21104
21105 /* For 32-bit code we can save PIC register setup by using
21106 __stack_chk_fail_local hidden function instead of calling
21107 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21108 register, so it is better to call __stack_chk_fail directly. */
21109
21110 static tree
21111 ix86_stack_protect_fail (void)
21112 {
21113 return TARGET_64BIT
21114 ? default_external_stack_protect_fail ()
21115 : default_hidden_stack_protect_fail ();
21116 }
21117
21118 /* Select a format to encode pointers in exception handling data. CODE
21119 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21120 true if the symbol may be affected by dynamic relocations.
21121
21122 ??? All x86 object file formats are capable of representing this.
21123 After all, the relocation needed is the same as for the call insn.
21124 Whether or not a particular assembler allows us to enter such, I
21125 guess we'll have to see. */
21126 int
21127 asm_preferred_eh_data_format (int code, int global)
21128 {
21129 if (flag_pic)
21130 {
21131 int type = DW_EH_PE_sdata8;
21132 if (!TARGET_64BIT
21133 || ix86_cmodel == CM_SMALL_PIC
21134 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21135 type = DW_EH_PE_sdata4;
21136 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21137 }
21138 if (ix86_cmodel == CM_SMALL
21139 || (ix86_cmodel == CM_MEDIUM && code))
21140 return DW_EH_PE_udata4;
21141 return DW_EH_PE_absptr;
21142 }
21143 \f
21144 /* Expand copysign from SIGN to the positive value ABS_VALUE
21145 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21146 the sign-bit. */
21147 static void
21148 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21149 {
21150 enum machine_mode mode = GET_MODE (sign);
21151 rtx sgn = gen_reg_rtx (mode);
21152 if (mask == NULL_RTX)
21153 {
21154 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21155 if (!VECTOR_MODE_P (mode))
21156 {
21157 /* We need to generate a scalar mode mask in this case. */
21158 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21159 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21160 mask = gen_reg_rtx (mode);
21161 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21162 }
21163 }
21164 else
21165 mask = gen_rtx_NOT (mode, mask);
21166 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21167 gen_rtx_AND (mode, mask, sign)));
21168 emit_insn (gen_rtx_SET (VOIDmode, result,
21169 gen_rtx_IOR (mode, abs_value, sgn)));
21170 }
21171
21172 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21173 mask for masking out the sign-bit is stored in *SMASK, if that is
21174 non-null. */
21175 static rtx
21176 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21177 {
21178 enum machine_mode mode = GET_MODE (op0);
21179 rtx xa, mask;
21180
21181 xa = gen_reg_rtx (mode);
21182 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21183 if (!VECTOR_MODE_P (mode))
21184 {
21185 /* We need to generate a scalar mode mask in this case. */
21186 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21187 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21188 mask = gen_reg_rtx (mode);
21189 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21190 }
21191 emit_insn (gen_rtx_SET (VOIDmode, xa,
21192 gen_rtx_AND (mode, op0, mask)));
21193
21194 if (smask)
21195 *smask = mask;
21196
21197 return xa;
21198 }
21199
21200 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21201 swapping the operands if SWAP_OPERANDS is true. The expanded
21202 code is a forward jump to a newly created label in case the
21203 comparison is true. The generated label rtx is returned. */
21204 static rtx
21205 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21206 bool swap_operands)
21207 {
21208 rtx label, tmp;
21209
21210 if (swap_operands)
21211 {
21212 tmp = op0;
21213 op0 = op1;
21214 op1 = tmp;
21215 }
21216
21217 label = gen_label_rtx ();
21218 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21219 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21220 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21221 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21222 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21223 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21224 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21225 JUMP_LABEL (tmp) = label;
21226
21227 return label;
21228 }
21229
21230 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21231 using comparison code CODE. Operands are swapped for the comparison if
21232 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21233 static rtx
21234 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21235 bool swap_operands)
21236 {
21237 enum machine_mode mode = GET_MODE (op0);
21238 rtx mask = gen_reg_rtx (mode);
21239
21240 if (swap_operands)
21241 {
21242 rtx tmp = op0;
21243 op0 = op1;
21244 op1 = tmp;
21245 }
21246
21247 if (mode == DFmode)
21248 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21249 gen_rtx_fmt_ee (code, mode, op0, op1)));
21250 else
21251 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21252 gen_rtx_fmt_ee (code, mode, op0, op1)));
21253
21254 return mask;
21255 }
21256
21257 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21258 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21259 static rtx
21260 ix86_gen_TWO52 (enum machine_mode mode)
21261 {
21262 REAL_VALUE_TYPE TWO52r;
21263 rtx TWO52;
21264
21265 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21266 TWO52 = const_double_from_real_value (TWO52r, mode);
21267 TWO52 = force_reg (mode, TWO52);
21268
21269 return TWO52;
21270 }
21271
21272 /* Expand SSE sequence for computing lround from OP1 storing
21273 into OP0. */
21274 void
21275 ix86_expand_lround (rtx op0, rtx op1)
21276 {
21277 /* C code for the stuff we're doing below:
21278 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21279 return (long)tmp;
21280 */
21281 enum machine_mode mode = GET_MODE (op1);
21282 const struct real_format *fmt;
21283 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21284 rtx adj;
21285
21286 /* load nextafter (0.5, 0.0) */
21287 fmt = REAL_MODE_FORMAT (mode);
21288 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21289 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21290
21291 /* adj = copysign (0.5, op1) */
21292 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21293 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21294
21295 /* adj = op1 + adj */
21296 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21297
21298 /* op0 = (imode)adj */
21299 expand_fix (op0, adj, 0);
21300 }
21301
21302 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21303 into OPERAND0. */
21304 void
21305 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21306 {
21307 /* C code for the stuff we're doing below (for do_floor):
21308 xi = (long)op1;
21309 xi -= (double)xi > op1 ? 1 : 0;
21310 return xi;
21311 */
21312 enum machine_mode fmode = GET_MODE (op1);
21313 enum machine_mode imode = GET_MODE (op0);
21314 rtx ireg, freg, label, tmp;
21315
21316 /* reg = (long)op1 */
21317 ireg = gen_reg_rtx (imode);
21318 expand_fix (ireg, op1, 0);
21319
21320 /* freg = (double)reg */
21321 freg = gen_reg_rtx (fmode);
21322 expand_float (freg, ireg, 0);
21323
21324 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21325 label = ix86_expand_sse_compare_and_jump (UNLE,
21326 freg, op1, !do_floor);
21327 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21328 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21329 emit_move_insn (ireg, tmp);
21330
21331 emit_label (label);
21332 LABEL_NUSES (label) = 1;
21333
21334 emit_move_insn (op0, ireg);
21335 }
21336
21337 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21338 result in OPERAND0. */
21339 void
21340 ix86_expand_rint (rtx operand0, rtx operand1)
21341 {
21342 /* C code for the stuff we're doing below:
21343 xa = fabs (operand1);
21344 if (!isless (xa, 2**52))
21345 return operand1;
21346 xa = xa + 2**52 - 2**52;
21347 return copysign (xa, operand1);
21348 */
21349 enum machine_mode mode = GET_MODE (operand0);
21350 rtx res, xa, label, TWO52, mask;
21351
21352 res = gen_reg_rtx (mode);
21353 emit_move_insn (res, operand1);
21354
21355 /* xa = abs (operand1) */
21356 xa = ix86_expand_sse_fabs (res, &mask);
21357
21358 /* if (!isless (xa, TWO52)) goto label; */
21359 TWO52 = ix86_gen_TWO52 (mode);
21360 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21361
21362 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21363 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21364
21365 ix86_sse_copysign_to_positive (res, xa, res, mask);
21366
21367 emit_label (label);
21368 LABEL_NUSES (label) = 1;
21369
21370 emit_move_insn (operand0, res);
21371 }
21372
21373 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21374 into OPERAND0. */
21375 void
21376 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21377 {
21378 /* C code for the stuff we expand below.
21379 double xa = fabs (x), x2;
21380 if (!isless (xa, TWO52))
21381 return x;
21382 xa = xa + TWO52 - TWO52;
21383 x2 = copysign (xa, x);
21384 Compensate. Floor:
21385 if (x2 > x)
21386 x2 -= 1;
21387 Compensate. Ceil:
21388 if (x2 < x)
21389 x2 -= -1;
21390 return x2;
21391 */
21392 enum machine_mode mode = GET_MODE (operand0);
21393 rtx xa, TWO52, tmp, label, one, res, mask;
21394
21395 TWO52 = ix86_gen_TWO52 (mode);
21396
21397 /* Temporary for holding the result, initialized to the input
21398 operand to ease control flow. */
21399 res = gen_reg_rtx (mode);
21400 emit_move_insn (res, operand1);
21401
21402 /* xa = abs (operand1) */
21403 xa = ix86_expand_sse_fabs (res, &mask);
21404
21405 /* if (!isless (xa, TWO52)) goto label; */
21406 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21407
21408 /* xa = xa + TWO52 - TWO52; */
21409 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21410 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21411
21412 /* xa = copysign (xa, operand1) */
21413 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21414
21415 /* generate 1.0 or -1.0 */
21416 one = force_reg (mode,
21417 const_double_from_real_value (do_floor
21418 ? dconst1 : dconstm1, mode));
21419
21420 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21421 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21422 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21423 gen_rtx_AND (mode, one, tmp)));
21424 /* We always need to subtract here to preserve signed zero. */
21425 tmp = expand_simple_binop (mode, MINUS,
21426 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21427 emit_move_insn (res, tmp);
21428
21429 emit_label (label);
21430 LABEL_NUSES (label) = 1;
21431
21432 emit_move_insn (operand0, res);
21433 }
21434
21435 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21436 into OPERAND0. */
21437 void
21438 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21439 {
21440 /* C code for the stuff we expand below.
21441 double xa = fabs (x), x2;
21442 if (!isless (xa, TWO52))
21443 return x;
21444 x2 = (double)(long)x;
21445 Compensate. Floor:
21446 if (x2 > x)
21447 x2 -= 1;
21448 Compensate. Ceil:
21449 if (x2 < x)
21450 x2 += 1;
21451 if (HONOR_SIGNED_ZEROS (mode))
21452 return copysign (x2, x);
21453 return x2;
21454 */
21455 enum machine_mode mode = GET_MODE (operand0);
21456 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21457
21458 TWO52 = ix86_gen_TWO52 (mode);
21459
21460 /* Temporary for holding the result, initialized to the input
21461 operand to ease control flow. */
21462 res = gen_reg_rtx (mode);
21463 emit_move_insn (res, operand1);
21464
21465 /* xa = abs (operand1) */
21466 xa = ix86_expand_sse_fabs (res, &mask);
21467
21468 /* if (!isless (xa, TWO52)) goto label; */
21469 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21470
21471 /* xa = (double)(long)x */
21472 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21473 expand_fix (xi, res, 0);
21474 expand_float (xa, xi, 0);
21475
21476 /* generate 1.0 */
21477 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21478
21479 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21480 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21481 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21482 gen_rtx_AND (mode, one, tmp)));
21483 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21484 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21485 emit_move_insn (res, tmp);
21486
21487 if (HONOR_SIGNED_ZEROS (mode))
21488 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21489
21490 emit_label (label);
21491 LABEL_NUSES (label) = 1;
21492
21493 emit_move_insn (operand0, res);
21494 }
21495
21496 /* Expand SSE sequence for computing round from OPERAND1 storing
21497 into OPERAND0. Sequence that works without relying on DImode truncation
21498 via cvttsd2siq that is only available on 64bit targets. */
21499 void
21500 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21501 {
21502 /* C code for the stuff we expand below.
21503 double xa = fabs (x), xa2, x2;
21504 if (!isless (xa, TWO52))
21505 return x;
21506 Using the absolute value and copying back sign makes
21507 -0.0 -> -0.0 correct.
21508 xa2 = xa + TWO52 - TWO52;
21509 Compensate.
21510 dxa = xa2 - xa;
21511 if (dxa <= -0.5)
21512 xa2 += 1;
21513 else if (dxa > 0.5)
21514 xa2 -= 1;
21515 x2 = copysign (xa2, x);
21516 return x2;
21517 */
21518 enum machine_mode mode = GET_MODE (operand0);
21519 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21520
21521 TWO52 = ix86_gen_TWO52 (mode);
21522
21523 /* Temporary for holding the result, initialized to the input
21524 operand to ease control flow. */
21525 res = gen_reg_rtx (mode);
21526 emit_move_insn (res, operand1);
21527
21528 /* xa = abs (operand1) */
21529 xa = ix86_expand_sse_fabs (res, &mask);
21530
21531 /* if (!isless (xa, TWO52)) goto label; */
21532 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21533
21534 /* xa2 = xa + TWO52 - TWO52; */
21535 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21536 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21537
21538 /* dxa = xa2 - xa; */
21539 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21540
21541 /* generate 0.5, 1.0 and -0.5 */
21542 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21543 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21544 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21545 0, OPTAB_DIRECT);
21546
21547 /* Compensate. */
21548 tmp = gen_reg_rtx (mode);
21549 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21550 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21551 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21552 gen_rtx_AND (mode, one, tmp)));
21553 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21554 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21555 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21556 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21557 gen_rtx_AND (mode, one, tmp)));
21558 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21559
21560 /* res = copysign (xa2, operand1) */
21561 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21562
21563 emit_label (label);
21564 LABEL_NUSES (label) = 1;
21565
21566 emit_move_insn (operand0, res);
21567 }
21568
21569 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21570 into OPERAND0. */
21571 void
21572 ix86_expand_trunc (rtx operand0, rtx operand1)
21573 {
21574 /* C code for SSE variant we expand below.
21575 double xa = fabs (x), x2;
21576 if (!isless (xa, TWO52))
21577 return x;
21578 x2 = (double)(long)x;
21579 if (HONOR_SIGNED_ZEROS (mode))
21580 return copysign (x2, x);
21581 return x2;
21582 */
21583 enum machine_mode mode = GET_MODE (operand0);
21584 rtx xa, xi, TWO52, label, res, mask;
21585
21586 TWO52 = ix86_gen_TWO52 (mode);
21587
21588 /* Temporary for holding the result, initialized to the input
21589 operand to ease control flow. */
21590 res = gen_reg_rtx (mode);
21591 emit_move_insn (res, operand1);
21592
21593 /* xa = abs (operand1) */
21594 xa = ix86_expand_sse_fabs (res, &mask);
21595
21596 /* if (!isless (xa, TWO52)) goto label; */
21597 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21598
21599 /* x = (double)(long)x */
21600 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21601 expand_fix (xi, res, 0);
21602 expand_float (res, xi, 0);
21603
21604 if (HONOR_SIGNED_ZEROS (mode))
21605 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21606
21607 emit_label (label);
21608 LABEL_NUSES (label) = 1;
21609
21610 emit_move_insn (operand0, res);
21611 }
21612
21613 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21614 into OPERAND0. */
21615 void
21616 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21617 {
21618 enum machine_mode mode = GET_MODE (operand0);
21619 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21620
21621 /* C code for SSE variant we expand below.
21622 double xa = fabs (x), x2;
21623 if (!isless (xa, TWO52))
21624 return x;
21625 xa2 = xa + TWO52 - TWO52;
21626 Compensate:
21627 if (xa2 > xa)
21628 xa2 -= 1.0;
21629 x2 = copysign (xa2, x);
21630 return x2;
21631 */
21632
21633 TWO52 = ix86_gen_TWO52 (mode);
21634
21635 /* Temporary for holding the result, initialized to the input
21636 operand to ease control flow. */
21637 res = gen_reg_rtx (mode);
21638 emit_move_insn (res, operand1);
21639
21640 /* xa = abs (operand1) */
21641 xa = ix86_expand_sse_fabs (res, &smask);
21642
21643 /* if (!isless (xa, TWO52)) goto label; */
21644 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21645
21646 /* res = xa + TWO52 - TWO52; */
21647 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21648 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21649 emit_move_insn (res, tmp);
21650
21651 /* generate 1.0 */
21652 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21653
21654 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21655 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21656 emit_insn (gen_rtx_SET (VOIDmode, mask,
21657 gen_rtx_AND (mode, mask, one)));
21658 tmp = expand_simple_binop (mode, MINUS,
21659 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21660 emit_move_insn (res, tmp);
21661
21662 /* res = copysign (res, operand1) */
21663 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21664
21665 emit_label (label);
21666 LABEL_NUSES (label) = 1;
21667
21668 emit_move_insn (operand0, res);
21669 }
21670
21671 /* Expand SSE sequence for computing round from OPERAND1 storing
21672 into OPERAND0. */
21673 void
21674 ix86_expand_round (rtx operand0, rtx operand1)
21675 {
21676 /* C code for the stuff we're doing below:
21677 double xa = fabs (x);
21678 if (!isless (xa, TWO52))
21679 return x;
21680 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21681 return copysign (xa, x);
21682 */
21683 enum machine_mode mode = GET_MODE (operand0);
21684 rtx res, TWO52, xa, label, xi, half, mask;
21685 const struct real_format *fmt;
21686 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21687
21688 /* Temporary for holding the result, initialized to the input
21689 operand to ease control flow. */
21690 res = gen_reg_rtx (mode);
21691 emit_move_insn (res, operand1);
21692
21693 TWO52 = ix86_gen_TWO52 (mode);
21694 xa = ix86_expand_sse_fabs (res, &mask);
21695 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21696
21697 /* load nextafter (0.5, 0.0) */
21698 fmt = REAL_MODE_FORMAT (mode);
21699 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21700 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21701
21702 /* xa = xa + 0.5 */
21703 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21704 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21705
21706 /* xa = (double)(int64_t)xa */
21707 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21708 expand_fix (xi, xa, 0);
21709 expand_float (xa, xi, 0);
21710
21711 /* res = copysign (xa, operand1) */
21712 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21713
21714 emit_label (label);
21715 LABEL_NUSES (label) = 1;
21716
21717 emit_move_insn (operand0, res);
21718 }
21719
21720 #include "gt-i386.h"