i386.c (x86_use_leave, [...]): Merge into ...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32,
1043 /* | m_GENERIC | m_ATHLON_K8 ? */
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
1196 };
1197
1198 /* Feature tests against the various architecture variations. */
1199 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1200 /* X86_ARCH_CMOVE */
1201 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1202
1203 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1204 ~m_386,
1205
1206 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1207 ~(m_386 | m_486),
1208
1209 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1210 ~m_386,
1211
1212 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1213 ~m_386,
1214 };
1215
1216 static const unsigned int x86_accumulate_outgoing_args
1217 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1218
1219 static const unsigned int x86_arch_always_fancy_math_387
1220 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1221 | m_NOCONA | m_CORE2 | m_GENERIC;
1222
1223 static enum stringop_alg stringop_alg = no_stringop;
1224
1225 /* In case the average insn count for single function invocation is
1226 lower than this constant, emit fast (but longer) prologue and
1227 epilogue code. */
1228 #define FAST_PROLOGUE_INSN_COUNT 20
1229
1230 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1231 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1232 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1233 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1234
1235 /* Array of the smallest class containing reg number REGNO, indexed by
1236 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1237
1238 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1239 {
1240 /* ax, dx, cx, bx */
1241 AREG, DREG, CREG, BREG,
1242 /* si, di, bp, sp */
1243 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1244 /* FP registers */
1245 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1246 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1247 /* arg pointer */
1248 NON_Q_REGS,
1249 /* flags, fpsr, fpcr, frame */
1250 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1251 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1252 SSE_REGS, SSE_REGS,
1253 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1254 MMX_REGS, MMX_REGS,
1255 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1256 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1257 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1258 SSE_REGS, SSE_REGS,
1259 };
1260
1261 /* The "default" register map used in 32bit mode. */
1262
1263 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1264 {
1265 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1266 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1267 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1268 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1269 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1270 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1271 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1272 };
1273
1274 static int const x86_64_int_parameter_registers[6] =
1275 {
1276 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1277 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1278 };
1279
1280 static int const x86_64_int_return_registers[4] =
1281 {
1282 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1283 };
1284
1285 /* The "default" register map used in 64bit mode. */
1286 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1287 {
1288 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1289 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1290 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1291 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1292 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1293 8,9,10,11,12,13,14,15, /* extended integer registers */
1294 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1295 };
1296
1297 /* Define the register numbers to be used in Dwarf debugging information.
1298 The SVR4 reference port C compiler uses the following register numbers
1299 in its Dwarf output code:
1300 0 for %eax (gcc regno = 0)
1301 1 for %ecx (gcc regno = 2)
1302 2 for %edx (gcc regno = 1)
1303 3 for %ebx (gcc regno = 3)
1304 4 for %esp (gcc regno = 7)
1305 5 for %ebp (gcc regno = 6)
1306 6 for %esi (gcc regno = 4)
1307 7 for %edi (gcc regno = 5)
1308 The following three DWARF register numbers are never generated by
1309 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1310 believes these numbers have these meanings.
1311 8 for %eip (no gcc equivalent)
1312 9 for %eflags (gcc regno = 17)
1313 10 for %trapno (no gcc equivalent)
1314 It is not at all clear how we should number the FP stack registers
1315 for the x86 architecture. If the version of SDB on x86/svr4 were
1316 a bit less brain dead with respect to floating-point then we would
1317 have a precedent to follow with respect to DWARF register numbers
1318 for x86 FP registers, but the SDB on x86/svr4 is so completely
1319 broken with respect to FP registers that it is hardly worth thinking
1320 of it as something to strive for compatibility with.
1321 The version of x86/svr4 SDB I have at the moment does (partially)
1322 seem to believe that DWARF register number 11 is associated with
1323 the x86 register %st(0), but that's about all. Higher DWARF
1324 register numbers don't seem to be associated with anything in
1325 particular, and even for DWARF regno 11, SDB only seems to under-
1326 stand that it should say that a variable lives in %st(0) (when
1327 asked via an `=' command) if we said it was in DWARF regno 11,
1328 but SDB still prints garbage when asked for the value of the
1329 variable in question (via a `/' command).
1330 (Also note that the labels SDB prints for various FP stack regs
1331 when doing an `x' command are all wrong.)
1332 Note that these problems generally don't affect the native SVR4
1333 C compiler because it doesn't allow the use of -O with -g and
1334 because when it is *not* optimizing, it allocates a memory
1335 location for each floating-point variable, and the memory
1336 location is what gets described in the DWARF AT_location
1337 attribute for the variable in question.
1338 Regardless of the severe mental illness of the x86/svr4 SDB, we
1339 do something sensible here and we use the following DWARF
1340 register numbers. Note that these are all stack-top-relative
1341 numbers.
1342 11 for %st(0) (gcc regno = 8)
1343 12 for %st(1) (gcc regno = 9)
1344 13 for %st(2) (gcc regno = 10)
1345 14 for %st(3) (gcc regno = 11)
1346 15 for %st(4) (gcc regno = 12)
1347 16 for %st(5) (gcc regno = 13)
1348 17 for %st(6) (gcc regno = 14)
1349 18 for %st(7) (gcc regno = 15)
1350 */
1351 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1352 {
1353 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1354 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1355 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1356 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1357 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1358 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1359 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1360 };
1361
1362 /* Test and compare insns in i386.md store the information needed to
1363 generate branch and scc insns here. */
1364
1365 rtx ix86_compare_op0 = NULL_RTX;
1366 rtx ix86_compare_op1 = NULL_RTX;
1367 rtx ix86_compare_emitted = NULL_RTX;
1368
1369 /* Size of the register save area. */
1370 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1371
1372 /* Define the structure for the machine field in struct function. */
1373
1374 struct stack_local_entry GTY(())
1375 {
1376 unsigned short mode;
1377 unsigned short n;
1378 rtx rtl;
1379 struct stack_local_entry *next;
1380 };
1381
1382 /* Structure describing stack frame layout.
1383 Stack grows downward:
1384
1385 [arguments]
1386 <- ARG_POINTER
1387 saved pc
1388
1389 saved frame pointer if frame_pointer_needed
1390 <- HARD_FRAME_POINTER
1391 [saved regs]
1392
1393 [padding1] \
1394 )
1395 [va_arg registers] (
1396 > to_allocate <- FRAME_POINTER
1397 [frame] (
1398 )
1399 [padding2] /
1400 */
1401 struct ix86_frame
1402 {
1403 int nregs;
1404 int padding1;
1405 int va_arg_size;
1406 HOST_WIDE_INT frame;
1407 int padding2;
1408 int outgoing_arguments_size;
1409 int red_zone_size;
1410
1411 HOST_WIDE_INT to_allocate;
1412 /* The offsets relative to ARG_POINTER. */
1413 HOST_WIDE_INT frame_pointer_offset;
1414 HOST_WIDE_INT hard_frame_pointer_offset;
1415 HOST_WIDE_INT stack_pointer_offset;
1416
1417 /* When save_regs_using_mov is set, emit prologue using
1418 move instead of push instructions. */
1419 bool save_regs_using_mov;
1420 };
1421
1422 /* Code model option. */
1423 enum cmodel ix86_cmodel;
1424 /* Asm dialect. */
1425 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1426 /* TLS dialects. */
1427 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1428
1429 /* Which unit we are generating floating point math for. */
1430 enum fpmath_unit ix86_fpmath;
1431
1432 /* Which cpu are we scheduling for. */
1433 enum processor_type ix86_tune;
1434
1435 /* Which instruction set architecture to use. */
1436 enum processor_type ix86_arch;
1437
1438 /* true if sse prefetch instruction is not NOOP. */
1439 int x86_prefetch_sse;
1440
1441 /* true if cmpxchg16b is supported. */
1442 int x86_cmpxchg16b;
1443
1444 /* ix86_regparm_string as a number */
1445 static int ix86_regparm;
1446
1447 /* -mstackrealign option */
1448 extern int ix86_force_align_arg_pointer;
1449 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1450
1451 /* Preferred alignment for stack boundary in bits. */
1452 unsigned int ix86_preferred_stack_boundary;
1453
1454 /* Values 1-5: see jump.c */
1455 int ix86_branch_cost;
1456
1457 /* Variables which are this size or smaller are put in the data/bss
1458 or ldata/lbss sections. */
1459
1460 int ix86_section_threshold = 65536;
1461
1462 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1463 char internal_label_prefix[16];
1464 int internal_label_prefix_len;
1465 \f
1466 static bool ix86_handle_option (size_t, const char *, int);
1467 static void output_pic_addr_const (FILE *, rtx, int);
1468 static void put_condition_code (enum rtx_code, enum machine_mode,
1469 int, int, FILE *);
1470 static const char *get_some_local_dynamic_name (void);
1471 static int get_some_local_dynamic_name_1 (rtx *, void *);
1472 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1473 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1474 rtx *);
1475 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1476 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1477 enum machine_mode);
1478 static rtx get_thread_pointer (int);
1479 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1480 static void get_pc_thunk_name (char [32], unsigned int);
1481 static rtx gen_push (rtx);
1482 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1483 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1484 static struct machine_function * ix86_init_machine_status (void);
1485 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1486 static int ix86_nsaved_regs (void);
1487 static void ix86_emit_save_regs (void);
1488 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1489 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1490 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1491 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1492 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1493 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1494 static int ix86_issue_rate (void);
1495 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1496 static int ia32_multipass_dfa_lookahead (void);
1497 static void ix86_init_mmx_sse_builtins (void);
1498 static rtx x86_this_parameter (tree);
1499 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1500 HOST_WIDE_INT, tree);
1501 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1502 static void x86_file_start (void);
1503 static void ix86_reorg (void);
1504 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1505 static tree ix86_build_builtin_va_list (void);
1506 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1507 tree, int *, int);
1508 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1509 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1510 static bool ix86_vector_mode_supported_p (enum machine_mode);
1511
1512 static int ix86_address_cost (rtx);
1513 static bool ix86_cannot_force_const_mem (rtx);
1514 static rtx ix86_delegitimize_address (rtx);
1515
1516 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1517
1518 struct builtin_description;
1519 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1520 tree, rtx);
1521 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1522 tree, rtx);
1523 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1524 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1525 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1526 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1527 static rtx safe_vector_operand (rtx, enum machine_mode);
1528 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1529 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1530 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1531 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1532 static int ix86_fp_comparison_cost (enum rtx_code code);
1533 static unsigned int ix86_select_alt_pic_regnum (void);
1534 static int ix86_save_reg (unsigned int, int);
1535 static void ix86_compute_frame_layout (struct ix86_frame *);
1536 static int ix86_comp_type_attributes (tree, tree);
1537 static int ix86_function_regparm (tree, tree);
1538 const struct attribute_spec ix86_attribute_table[];
1539 static bool ix86_function_ok_for_sibcall (tree, tree);
1540 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1541 static int ix86_value_regno (enum machine_mode, tree, tree);
1542 static bool contains_128bit_aligned_vector_p (tree);
1543 static rtx ix86_struct_value_rtx (tree, int);
1544 static bool ix86_ms_bitfield_layout_p (tree);
1545 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1546 static int extended_reg_mentioned_1 (rtx *, void *);
1547 static bool ix86_rtx_costs (rtx, int, int, int *);
1548 static int min_insn_size (rtx);
1549 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1550 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1551 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1552 tree, bool);
1553 static void ix86_init_builtins (void);
1554 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1555 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1556 static tree ix86_builtin_conversion (enum tree_code, tree);
1557 static const char *ix86_mangle_fundamental_type (tree);
1558 static tree ix86_stack_protect_fail (void);
1559 static rtx ix86_internal_arg_pointer (void);
1560 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1561 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1562 rtx, rtx, int);
1563
1564 /* This function is only used on Solaris. */
1565 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1566 ATTRIBUTE_UNUSED;
1567
1568 /* Register class used for passing given 64bit part of the argument.
1569 These represent classes as documented by the PS ABI, with the exception
1570 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1571 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1572
1573 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1574 whenever possible (upper half does contain padding).
1575 */
1576 enum x86_64_reg_class
1577 {
1578 X86_64_NO_CLASS,
1579 X86_64_INTEGER_CLASS,
1580 X86_64_INTEGERSI_CLASS,
1581 X86_64_SSE_CLASS,
1582 X86_64_SSESF_CLASS,
1583 X86_64_SSEDF_CLASS,
1584 X86_64_SSEUP_CLASS,
1585 X86_64_X87_CLASS,
1586 X86_64_X87UP_CLASS,
1587 X86_64_COMPLEX_X87_CLASS,
1588 X86_64_MEMORY_CLASS
1589 };
1590 static const char * const x86_64_reg_class_name[] = {
1591 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1592 "sseup", "x87", "x87up", "cplx87", "no"
1593 };
1594
1595 #define MAX_CLASSES 4
1596
1597 /* Table of constants used by fldpi, fldln2, etc.... */
1598 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1599 static bool ext_80387_constants_init = 0;
1600 static void init_ext_80387_constants (void);
1601 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1602 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1603 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1604 static section *x86_64_elf_select_section (tree decl, int reloc,
1605 unsigned HOST_WIDE_INT align)
1606 ATTRIBUTE_UNUSED;
1607 \f
1608 /* Initialize the GCC target structure. */
1609 #undef TARGET_ATTRIBUTE_TABLE
1610 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1611 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1612 # undef TARGET_MERGE_DECL_ATTRIBUTES
1613 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1614 #endif
1615
1616 #undef TARGET_COMP_TYPE_ATTRIBUTES
1617 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1618
1619 #undef TARGET_INIT_BUILTINS
1620 #define TARGET_INIT_BUILTINS ix86_init_builtins
1621 #undef TARGET_EXPAND_BUILTIN
1622 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1623
1624 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1625 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1626 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1627 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1628
1629 #undef TARGET_ASM_FUNCTION_EPILOGUE
1630 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1631
1632 #undef TARGET_ENCODE_SECTION_INFO
1633 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1634 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1635 #else
1636 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1637 #endif
1638
1639 #undef TARGET_ASM_OPEN_PAREN
1640 #define TARGET_ASM_OPEN_PAREN ""
1641 #undef TARGET_ASM_CLOSE_PAREN
1642 #define TARGET_ASM_CLOSE_PAREN ""
1643
1644 #undef TARGET_ASM_ALIGNED_HI_OP
1645 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1646 #undef TARGET_ASM_ALIGNED_SI_OP
1647 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1648 #ifdef ASM_QUAD
1649 #undef TARGET_ASM_ALIGNED_DI_OP
1650 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1651 #endif
1652
1653 #undef TARGET_ASM_UNALIGNED_HI_OP
1654 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1655 #undef TARGET_ASM_UNALIGNED_SI_OP
1656 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1657 #undef TARGET_ASM_UNALIGNED_DI_OP
1658 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1659
1660 #undef TARGET_SCHED_ADJUST_COST
1661 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1662 #undef TARGET_SCHED_ISSUE_RATE
1663 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1666 ia32_multipass_dfa_lookahead
1667
1668 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1669 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1670
1671 #ifdef HAVE_AS_TLS
1672 #undef TARGET_HAVE_TLS
1673 #define TARGET_HAVE_TLS true
1674 #endif
1675 #undef TARGET_CANNOT_FORCE_CONST_MEM
1676 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1677 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1678 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1679
1680 #undef TARGET_DELEGITIMIZE_ADDRESS
1681 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1682
1683 #undef TARGET_MS_BITFIELD_LAYOUT_P
1684 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1685
1686 #if TARGET_MACHO
1687 #undef TARGET_BINDS_LOCAL_P
1688 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1689 #endif
1690
1691 #undef TARGET_ASM_OUTPUT_MI_THUNK
1692 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1693 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1694 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1695
1696 #undef TARGET_ASM_FILE_START
1697 #define TARGET_ASM_FILE_START x86_file_start
1698
1699 #undef TARGET_DEFAULT_TARGET_FLAGS
1700 #define TARGET_DEFAULT_TARGET_FLAGS \
1701 (TARGET_DEFAULT \
1702 | TARGET_64BIT_DEFAULT \
1703 | TARGET_SUBTARGET_DEFAULT \
1704 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1705
1706 #undef TARGET_HANDLE_OPTION
1707 #define TARGET_HANDLE_OPTION ix86_handle_option
1708
1709 #undef TARGET_RTX_COSTS
1710 #define TARGET_RTX_COSTS ix86_rtx_costs
1711 #undef TARGET_ADDRESS_COST
1712 #define TARGET_ADDRESS_COST ix86_address_cost
1713
1714 #undef TARGET_FIXED_CONDITION_CODE_REGS
1715 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1716 #undef TARGET_CC_MODES_COMPATIBLE
1717 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1718
1719 #undef TARGET_MACHINE_DEPENDENT_REORG
1720 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1721
1722 #undef TARGET_BUILD_BUILTIN_VA_LIST
1723 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1724
1725 #undef TARGET_MD_ASM_CLOBBERS
1726 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1727
1728 #undef TARGET_PROMOTE_PROTOTYPES
1729 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1730 #undef TARGET_STRUCT_VALUE_RTX
1731 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1732 #undef TARGET_SETUP_INCOMING_VARARGS
1733 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1734 #undef TARGET_MUST_PASS_IN_STACK
1735 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1736 #undef TARGET_PASS_BY_REFERENCE
1737 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1738 #undef TARGET_INTERNAL_ARG_POINTER
1739 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1740 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1741 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1742
1743 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1744 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1745
1746 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1747 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1748
1749 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1750 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1751
1752 #ifdef HAVE_AS_TLS
1753 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1754 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1755 #endif
1756
1757 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1758 #undef TARGET_INSERT_ATTRIBUTES
1759 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1760 #endif
1761
1762 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1763 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1764
1765 #undef TARGET_STACK_PROTECT_FAIL
1766 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1767
1768 #undef TARGET_FUNCTION_VALUE
1769 #define TARGET_FUNCTION_VALUE ix86_function_value
1770
1771 struct gcc_target targetm = TARGET_INITIALIZER;
1772
1773 \f
1774 /* The svr4 ABI for the i386 says that records and unions are returned
1775 in memory. */
1776 #ifndef DEFAULT_PCC_STRUCT_RETURN
1777 #define DEFAULT_PCC_STRUCT_RETURN 1
1778 #endif
1779
1780 /* Implement TARGET_HANDLE_OPTION. */
1781
1782 static bool
1783 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1784 {
1785 switch (code)
1786 {
1787 case OPT_m3dnow:
1788 if (!value)
1789 {
1790 target_flags &= ~MASK_3DNOW_A;
1791 target_flags_explicit |= MASK_3DNOW_A;
1792 }
1793 return true;
1794
1795 case OPT_mmmx:
1796 if (!value)
1797 {
1798 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1799 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1800 }
1801 return true;
1802
1803 case OPT_msse:
1804 if (!value)
1805 {
1806 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1807 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1808 }
1809 return true;
1810
1811 case OPT_msse2:
1812 if (!value)
1813 {
1814 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1815 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1816 }
1817 return true;
1818
1819 case OPT_msse3:
1820 if (!value)
1821 {
1822 target_flags &= ~MASK_SSE4A;
1823 target_flags_explicit |= MASK_SSE4A;
1824 }
1825 return true;
1826
1827 default:
1828 return true;
1829 }
1830 }
1831
1832 /* Sometimes certain combinations of command options do not make
1833 sense on a particular target machine. You can define a macro
1834 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1835 defined, is executed once just after all the command options have
1836 been parsed.
1837
1838 Don't use this macro to turn on various extra optimizations for
1839 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1840
1841 void
1842 override_options (void)
1843 {
1844 int i;
1845 int ix86_tune_defaulted = 0;
1846 unsigned int ix86_arch_mask, ix86_tune_mask;
1847
1848 /* Comes from final.c -- no real reason to change it. */
1849 #define MAX_CODE_ALIGN 16
1850
1851 static struct ptt
1852 {
1853 const struct processor_costs *cost; /* Processor costs */
1854 const int target_enable; /* Target flags to enable. */
1855 const int target_disable; /* Target flags to disable. */
1856 const int align_loop; /* Default alignments. */
1857 const int align_loop_max_skip;
1858 const int align_jump;
1859 const int align_jump_max_skip;
1860 const int align_func;
1861 }
1862 const processor_target_table[PROCESSOR_max] =
1863 {
1864 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1865 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1866 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1867 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1868 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1869 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1870 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1871 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1872 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1873 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1874 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1875 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1876 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1877 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1878 };
1879
1880 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1881 static struct pta
1882 {
1883 const char *const name; /* processor name or nickname. */
1884 const enum processor_type processor;
1885 const enum pta_flags
1886 {
1887 PTA_SSE = 1,
1888 PTA_SSE2 = 2,
1889 PTA_SSE3 = 4,
1890 PTA_MMX = 8,
1891 PTA_PREFETCH_SSE = 16,
1892 PTA_3DNOW = 32,
1893 PTA_3DNOW_A = 64,
1894 PTA_64BIT = 128,
1895 PTA_SSSE3 = 256,
1896 PTA_CX16 = 512,
1897 PTA_POPCNT = 1024,
1898 PTA_ABM = 2048,
1899 PTA_SSE4A = 4096
1900 } flags;
1901 }
1902 const processor_alias_table[] =
1903 {
1904 {"i386", PROCESSOR_I386, 0},
1905 {"i486", PROCESSOR_I486, 0},
1906 {"i586", PROCESSOR_PENTIUM, 0},
1907 {"pentium", PROCESSOR_PENTIUM, 0},
1908 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1909 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1910 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1911 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1912 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1913 {"i686", PROCESSOR_PENTIUMPRO, 0},
1914 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1915 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1916 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1917 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1918 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1919 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1920 | PTA_MMX | PTA_PREFETCH_SSE},
1921 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1922 | PTA_MMX | PTA_PREFETCH_SSE},
1923 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1924 | PTA_MMX | PTA_PREFETCH_SSE},
1925 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1926 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1927 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1928 | PTA_64BIT | PTA_MMX
1929 | PTA_PREFETCH_SSE | PTA_CX16},
1930 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1931 | PTA_3DNOW_A},
1932 {"k6", PROCESSOR_K6, PTA_MMX},
1933 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1934 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1935 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1936 | PTA_3DNOW_A},
1937 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1938 | PTA_3DNOW | PTA_3DNOW_A},
1939 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1940 | PTA_3DNOW_A | PTA_SSE},
1941 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1942 | PTA_3DNOW_A | PTA_SSE},
1943 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1944 | PTA_3DNOW_A | PTA_SSE},
1945 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1946 | PTA_SSE | PTA_SSE2 },
1947 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1948 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1949 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1950 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1951 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1952 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1953 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1954 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1955 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1956 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1957 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1958 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1959 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1960 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1961 };
1962
1963 int const pta_size = ARRAY_SIZE (processor_alias_table);
1964
1965 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1966 SUBTARGET_OVERRIDE_OPTIONS;
1967 #endif
1968
1969 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1970 SUBSUBTARGET_OVERRIDE_OPTIONS;
1971 #endif
1972
1973 /* -fPIC is the default for x86_64. */
1974 if (TARGET_MACHO && TARGET_64BIT)
1975 flag_pic = 2;
1976
1977 /* Set the default values for switches whose default depends on TARGET_64BIT
1978 in case they weren't overwritten by command line options. */
1979 if (TARGET_64BIT)
1980 {
1981 /* Mach-O doesn't support omitting the frame pointer for now. */
1982 if (flag_omit_frame_pointer == 2)
1983 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1984 if (flag_asynchronous_unwind_tables == 2)
1985 flag_asynchronous_unwind_tables = 1;
1986 if (flag_pcc_struct_return == 2)
1987 flag_pcc_struct_return = 0;
1988 }
1989 else
1990 {
1991 if (flag_omit_frame_pointer == 2)
1992 flag_omit_frame_pointer = 0;
1993 if (flag_asynchronous_unwind_tables == 2)
1994 flag_asynchronous_unwind_tables = 0;
1995 if (flag_pcc_struct_return == 2)
1996 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1997 }
1998
1999 /* Need to check -mtune=generic first. */
2000 if (ix86_tune_string)
2001 {
2002 if (!strcmp (ix86_tune_string, "generic")
2003 || !strcmp (ix86_tune_string, "i686")
2004 /* As special support for cross compilers we read -mtune=native
2005 as -mtune=generic. With native compilers we won't see the
2006 -mtune=native, as it was changed by the driver. */
2007 || !strcmp (ix86_tune_string, "native"))
2008 {
2009 if (TARGET_64BIT)
2010 ix86_tune_string = "generic64";
2011 else
2012 ix86_tune_string = "generic32";
2013 }
2014 else if (!strncmp (ix86_tune_string, "generic", 7))
2015 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2016 }
2017 else
2018 {
2019 if (ix86_arch_string)
2020 ix86_tune_string = ix86_arch_string;
2021 if (!ix86_tune_string)
2022 {
2023 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2024 ix86_tune_defaulted = 1;
2025 }
2026
2027 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2028 need to use a sensible tune option. */
2029 if (!strcmp (ix86_tune_string, "generic")
2030 || !strcmp (ix86_tune_string, "x86-64")
2031 || !strcmp (ix86_tune_string, "i686"))
2032 {
2033 if (TARGET_64BIT)
2034 ix86_tune_string = "generic64";
2035 else
2036 ix86_tune_string = "generic32";
2037 }
2038 }
2039 if (ix86_stringop_string)
2040 {
2041 if (!strcmp (ix86_stringop_string, "rep_byte"))
2042 stringop_alg = rep_prefix_1_byte;
2043 else if (!strcmp (ix86_stringop_string, "libcall"))
2044 stringop_alg = libcall;
2045 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2046 stringop_alg = rep_prefix_4_byte;
2047 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2048 stringop_alg = rep_prefix_8_byte;
2049 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2050 stringop_alg = loop_1_byte;
2051 else if (!strcmp (ix86_stringop_string, "loop"))
2052 stringop_alg = loop;
2053 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2054 stringop_alg = unrolled_loop;
2055 else
2056 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2057 }
2058 if (!strcmp (ix86_tune_string, "x86-64"))
2059 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2060 "-mtune=generic instead as appropriate.");
2061
2062 if (!ix86_arch_string)
2063 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2064 if (!strcmp (ix86_arch_string, "generic"))
2065 error ("generic CPU can be used only for -mtune= switch");
2066 if (!strncmp (ix86_arch_string, "generic", 7))
2067 error ("bad value (%s) for -march= switch", ix86_arch_string);
2068
2069 if (ix86_cmodel_string != 0)
2070 {
2071 if (!strcmp (ix86_cmodel_string, "small"))
2072 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2073 else if (!strcmp (ix86_cmodel_string, "medium"))
2074 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2075 else if (flag_pic)
2076 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
2077 else if (!strcmp (ix86_cmodel_string, "32"))
2078 ix86_cmodel = CM_32;
2079 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2080 ix86_cmodel = CM_KERNEL;
2081 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
2082 ix86_cmodel = CM_LARGE;
2083 else
2084 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2085 }
2086 else
2087 {
2088 ix86_cmodel = CM_32;
2089 if (TARGET_64BIT)
2090 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2091 }
2092 if (ix86_asm_string != 0)
2093 {
2094 if (! TARGET_MACHO
2095 && !strcmp (ix86_asm_string, "intel"))
2096 ix86_asm_dialect = ASM_INTEL;
2097 else if (!strcmp (ix86_asm_string, "att"))
2098 ix86_asm_dialect = ASM_ATT;
2099 else
2100 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2101 }
2102 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2103 error ("code model %qs not supported in the %s bit mode",
2104 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2105 if (ix86_cmodel == CM_LARGE)
2106 sorry ("code model %<large%> not supported yet");
2107 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2108 sorry ("%i-bit mode not compiled in",
2109 (target_flags & MASK_64BIT) ? 64 : 32);
2110
2111 for (i = 0; i < pta_size; i++)
2112 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2113 {
2114 ix86_arch = processor_alias_table[i].processor;
2115 /* Default cpu tuning to the architecture. */
2116 ix86_tune = ix86_arch;
2117 if (processor_alias_table[i].flags & PTA_MMX
2118 && !(target_flags_explicit & MASK_MMX))
2119 target_flags |= MASK_MMX;
2120 if (processor_alias_table[i].flags & PTA_3DNOW
2121 && !(target_flags_explicit & MASK_3DNOW))
2122 target_flags |= MASK_3DNOW;
2123 if (processor_alias_table[i].flags & PTA_3DNOW_A
2124 && !(target_flags_explicit & MASK_3DNOW_A))
2125 target_flags |= MASK_3DNOW_A;
2126 if (processor_alias_table[i].flags & PTA_SSE
2127 && !(target_flags_explicit & MASK_SSE))
2128 target_flags |= MASK_SSE;
2129 if (processor_alias_table[i].flags & PTA_SSE2
2130 && !(target_flags_explicit & MASK_SSE2))
2131 target_flags |= MASK_SSE2;
2132 if (processor_alias_table[i].flags & PTA_SSE3
2133 && !(target_flags_explicit & MASK_SSE3))
2134 target_flags |= MASK_SSE3;
2135 if (processor_alias_table[i].flags & PTA_SSSE3
2136 && !(target_flags_explicit & MASK_SSSE3))
2137 target_flags |= MASK_SSSE3;
2138 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2139 x86_prefetch_sse = true;
2140 if (processor_alias_table[i].flags & PTA_CX16)
2141 x86_cmpxchg16b = true;
2142 if (processor_alias_table[i].flags & PTA_POPCNT
2143 && !(target_flags_explicit & MASK_POPCNT))
2144 target_flags |= MASK_POPCNT;
2145 if (processor_alias_table[i].flags & PTA_ABM
2146 && !(target_flags_explicit & MASK_ABM))
2147 target_flags |= MASK_ABM;
2148 if (processor_alias_table[i].flags & PTA_SSE4A
2149 && !(target_flags_explicit & MASK_SSE4A))
2150 target_flags |= MASK_SSE4A;
2151 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2152 error ("CPU you selected does not support x86-64 "
2153 "instruction set");
2154 break;
2155 }
2156
2157 if (i == pta_size)
2158 error ("bad value (%s) for -march= switch", ix86_arch_string);
2159
2160 ix86_arch_mask = 1u << ix86_arch;
2161 for (i = 0; i < X86_ARCH_LAST; ++i)
2162 ix86_arch_features[i] &= ix86_arch_mask;
2163
2164 for (i = 0; i < pta_size; i++)
2165 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2166 {
2167 ix86_tune = processor_alias_table[i].processor;
2168 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2169 {
2170 if (ix86_tune_defaulted)
2171 {
2172 ix86_tune_string = "x86-64";
2173 for (i = 0; i < pta_size; i++)
2174 if (! strcmp (ix86_tune_string,
2175 processor_alias_table[i].name))
2176 break;
2177 ix86_tune = processor_alias_table[i].processor;
2178 }
2179 else
2180 error ("CPU you selected does not support x86-64 "
2181 "instruction set");
2182 }
2183 /* Intel CPUs have always interpreted SSE prefetch instructions as
2184 NOPs; so, we can enable SSE prefetch instructions even when
2185 -mtune (rather than -march) points us to a processor that has them.
2186 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2187 higher processors. */
2188 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2189 x86_prefetch_sse = true;
2190 break;
2191 }
2192 if (i == pta_size)
2193 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2194
2195 ix86_tune_mask = 1u << ix86_tune;
2196 for (i = 0; i < X86_TUNE_LAST; ++i)
2197 ix86_tune_features[i] &= ix86_tune_mask;
2198
2199 if (optimize_size)
2200 ix86_cost = &size_cost;
2201 else
2202 ix86_cost = processor_target_table[ix86_tune].cost;
2203 target_flags |= processor_target_table[ix86_tune].target_enable;
2204 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2205
2206 /* Arrange to set up i386_stack_locals for all functions. */
2207 init_machine_status = ix86_init_machine_status;
2208
2209 /* Validate -mregparm= value. */
2210 if (ix86_regparm_string)
2211 {
2212 i = atoi (ix86_regparm_string);
2213 if (i < 0 || i > REGPARM_MAX)
2214 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2215 else
2216 ix86_regparm = i;
2217 }
2218 else
2219 if (TARGET_64BIT)
2220 ix86_regparm = REGPARM_MAX;
2221
2222 /* If the user has provided any of the -malign-* options,
2223 warn and use that value only if -falign-* is not set.
2224 Remove this code in GCC 3.2 or later. */
2225 if (ix86_align_loops_string)
2226 {
2227 warning (0, "-malign-loops is obsolete, use -falign-loops");
2228 if (align_loops == 0)
2229 {
2230 i = atoi (ix86_align_loops_string);
2231 if (i < 0 || i > MAX_CODE_ALIGN)
2232 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2233 else
2234 align_loops = 1 << i;
2235 }
2236 }
2237
2238 if (ix86_align_jumps_string)
2239 {
2240 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2241 if (align_jumps == 0)
2242 {
2243 i = atoi (ix86_align_jumps_string);
2244 if (i < 0 || i > MAX_CODE_ALIGN)
2245 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2246 else
2247 align_jumps = 1 << i;
2248 }
2249 }
2250
2251 if (ix86_align_funcs_string)
2252 {
2253 warning (0, "-malign-functions is obsolete, use -falign-functions");
2254 if (align_functions == 0)
2255 {
2256 i = atoi (ix86_align_funcs_string);
2257 if (i < 0 || i > MAX_CODE_ALIGN)
2258 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2259 else
2260 align_functions = 1 << i;
2261 }
2262 }
2263
2264 /* Default align_* from the processor table. */
2265 if (align_loops == 0)
2266 {
2267 align_loops = processor_target_table[ix86_tune].align_loop;
2268 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2269 }
2270 if (align_jumps == 0)
2271 {
2272 align_jumps = processor_target_table[ix86_tune].align_jump;
2273 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2274 }
2275 if (align_functions == 0)
2276 {
2277 align_functions = processor_target_table[ix86_tune].align_func;
2278 }
2279
2280 /* Validate -mbranch-cost= value, or provide default. */
2281 ix86_branch_cost = ix86_cost->branch_cost;
2282 if (ix86_branch_cost_string)
2283 {
2284 i = atoi (ix86_branch_cost_string);
2285 if (i < 0 || i > 5)
2286 error ("-mbranch-cost=%d is not between 0 and 5", i);
2287 else
2288 ix86_branch_cost = i;
2289 }
2290 if (ix86_section_threshold_string)
2291 {
2292 i = atoi (ix86_section_threshold_string);
2293 if (i < 0)
2294 error ("-mlarge-data-threshold=%d is negative", i);
2295 else
2296 ix86_section_threshold = i;
2297 }
2298
2299 if (ix86_tls_dialect_string)
2300 {
2301 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2302 ix86_tls_dialect = TLS_DIALECT_GNU;
2303 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2304 ix86_tls_dialect = TLS_DIALECT_GNU2;
2305 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2306 ix86_tls_dialect = TLS_DIALECT_SUN;
2307 else
2308 error ("bad value (%s) for -mtls-dialect= switch",
2309 ix86_tls_dialect_string);
2310 }
2311
2312 /* Keep nonleaf frame pointers. */
2313 if (flag_omit_frame_pointer)
2314 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2315 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2316 flag_omit_frame_pointer = 1;
2317
2318 /* If we're doing fast math, we don't care about comparison order
2319 wrt NaNs. This lets us use a shorter comparison sequence. */
2320 if (flag_finite_math_only)
2321 target_flags &= ~MASK_IEEE_FP;
2322
2323 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2324 since the insns won't need emulation. */
2325 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2326 target_flags &= ~MASK_NO_FANCY_MATH_387;
2327
2328 /* Likewise, if the target doesn't have a 387, or we've specified
2329 software floating point, don't use 387 inline intrinsics. */
2330 if (!TARGET_80387)
2331 target_flags |= MASK_NO_FANCY_MATH_387;
2332
2333 /* Turn on SSE3 builtins for -mssse3. */
2334 if (TARGET_SSSE3)
2335 target_flags |= MASK_SSE3;
2336
2337 /* Turn on SSE3 builtins for -msse4a. */
2338 if (TARGET_SSE4A)
2339 target_flags |= MASK_SSE3;
2340
2341 /* Turn on SSE2 builtins for -msse3. */
2342 if (TARGET_SSE3)
2343 target_flags |= MASK_SSE2;
2344
2345 /* Turn on SSE builtins for -msse2. */
2346 if (TARGET_SSE2)
2347 target_flags |= MASK_SSE;
2348
2349 /* Turn on MMX builtins for -msse. */
2350 if (TARGET_SSE)
2351 {
2352 target_flags |= MASK_MMX & ~target_flags_explicit;
2353 x86_prefetch_sse = true;
2354 }
2355
2356 /* Turn on MMX builtins for 3Dnow. */
2357 if (TARGET_3DNOW)
2358 target_flags |= MASK_MMX;
2359
2360 /* Turn on POPCNT builtins for -mabm. */
2361 if (TARGET_ABM)
2362 target_flags |= MASK_POPCNT;
2363
2364 if (TARGET_64BIT)
2365 {
2366 if (TARGET_ALIGN_DOUBLE)
2367 error ("-malign-double makes no sense in the 64bit mode");
2368 if (TARGET_RTD)
2369 error ("-mrtd calling convention not supported in the 64bit mode");
2370
2371 /* Enable by default the SSE and MMX builtins. Do allow the user to
2372 explicitly disable any of these. In particular, disabling SSE and
2373 MMX for kernel code is extremely useful. */
2374 target_flags
2375 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2376 & ~target_flags_explicit);
2377 }
2378 else
2379 {
2380 /* i386 ABI does not specify red zone. It still makes sense to use it
2381 when programmer takes care to stack from being destroyed. */
2382 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2383 target_flags |= MASK_NO_RED_ZONE;
2384 }
2385
2386 /* Validate -mpreferred-stack-boundary= value, or provide default.
2387 The default of 128 bits is for Pentium III's SSE __m128. We can't
2388 change it because of optimize_size. Otherwise, we can't mix object
2389 files compiled with -Os and -On. */
2390 ix86_preferred_stack_boundary = 128;
2391 if (ix86_preferred_stack_boundary_string)
2392 {
2393 i = atoi (ix86_preferred_stack_boundary_string);
2394 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2395 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2396 TARGET_64BIT ? 4 : 2);
2397 else
2398 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2399 }
2400
2401 /* Accept -msseregparm only if at least SSE support is enabled. */
2402 if (TARGET_SSEREGPARM
2403 && ! TARGET_SSE)
2404 error ("-msseregparm used without SSE enabled");
2405
2406 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2407 if (ix86_fpmath_string != 0)
2408 {
2409 if (! strcmp (ix86_fpmath_string, "387"))
2410 ix86_fpmath = FPMATH_387;
2411 else if (! strcmp (ix86_fpmath_string, "sse"))
2412 {
2413 if (!TARGET_SSE)
2414 {
2415 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2416 ix86_fpmath = FPMATH_387;
2417 }
2418 else
2419 ix86_fpmath = FPMATH_SSE;
2420 }
2421 else if (! strcmp (ix86_fpmath_string, "387,sse")
2422 || ! strcmp (ix86_fpmath_string, "sse,387"))
2423 {
2424 if (!TARGET_SSE)
2425 {
2426 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2427 ix86_fpmath = FPMATH_387;
2428 }
2429 else if (!TARGET_80387)
2430 {
2431 warning (0, "387 instruction set disabled, using SSE arithmetics");
2432 ix86_fpmath = FPMATH_SSE;
2433 }
2434 else
2435 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2436 }
2437 else
2438 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2439 }
2440
2441 /* If the i387 is disabled, then do not return values in it. */
2442 if (!TARGET_80387)
2443 target_flags &= ~MASK_FLOAT_RETURNS;
2444
2445 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2446 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2447 && !optimize_size)
2448 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2449
2450 /* ??? Unwind info is not correct around the CFG unless either a frame
2451 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2452 unwind info generation to be aware of the CFG and propagating states
2453 around edges. */
2454 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2455 || flag_exceptions || flag_non_call_exceptions)
2456 && flag_omit_frame_pointer
2457 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2458 {
2459 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2460 warning (0, "unwind tables currently require either a frame pointer "
2461 "or -maccumulate-outgoing-args for correctness");
2462 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2463 }
2464
2465 /* For sane SSE instruction set generation we need fcomi instruction.
2466 It is safe to enable all CMOVE instructions. */
2467 if (TARGET_SSE)
2468 TARGET_CMOVE = 1;
2469
2470 /* ??? Any idea why this is unconditionally disabled for 64-bit? */
2471 if (TARGET_64BIT)
2472 TARGET_USE_SAHF = 0;
2473
2474 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2475 {
2476 char *p;
2477 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2478 p = strchr (internal_label_prefix, 'X');
2479 internal_label_prefix_len = p - internal_label_prefix;
2480 *p = '\0';
2481 }
2482
2483 /* When scheduling description is not available, disable scheduler pass
2484 so it won't slow down the compilation and make x87 code slower. */
2485 if (!TARGET_SCHEDULE)
2486 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2487
2488 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2489 set_param_value ("simultaneous-prefetches",
2490 ix86_cost->simultaneous_prefetches);
2491 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2492 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2493 }
2494 \f
2495 /* switch to the appropriate section for output of DECL.
2496 DECL is either a `VAR_DECL' node or a constant of some sort.
2497 RELOC indicates whether forming the initial value of DECL requires
2498 link-time relocations. */
2499
2500 static section *
2501 x86_64_elf_select_section (tree decl, int reloc,
2502 unsigned HOST_WIDE_INT align)
2503 {
2504 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2505 && ix86_in_large_data_p (decl))
2506 {
2507 const char *sname = NULL;
2508 unsigned int flags = SECTION_WRITE;
2509 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2510 {
2511 case SECCAT_DATA:
2512 sname = ".ldata";
2513 break;
2514 case SECCAT_DATA_REL:
2515 sname = ".ldata.rel";
2516 break;
2517 case SECCAT_DATA_REL_LOCAL:
2518 sname = ".ldata.rel.local";
2519 break;
2520 case SECCAT_DATA_REL_RO:
2521 sname = ".ldata.rel.ro";
2522 break;
2523 case SECCAT_DATA_REL_RO_LOCAL:
2524 sname = ".ldata.rel.ro.local";
2525 break;
2526 case SECCAT_BSS:
2527 sname = ".lbss";
2528 flags |= SECTION_BSS;
2529 break;
2530 case SECCAT_RODATA:
2531 case SECCAT_RODATA_MERGE_STR:
2532 case SECCAT_RODATA_MERGE_STR_INIT:
2533 case SECCAT_RODATA_MERGE_CONST:
2534 sname = ".lrodata";
2535 flags = 0;
2536 break;
2537 case SECCAT_SRODATA:
2538 case SECCAT_SDATA:
2539 case SECCAT_SBSS:
2540 gcc_unreachable ();
2541 case SECCAT_TEXT:
2542 case SECCAT_TDATA:
2543 case SECCAT_TBSS:
2544 /* We don't split these for medium model. Place them into
2545 default sections and hope for best. */
2546 break;
2547 }
2548 if (sname)
2549 {
2550 /* We might get called with string constants, but get_named_section
2551 doesn't like them as they are not DECLs. Also, we need to set
2552 flags in that case. */
2553 if (!DECL_P (decl))
2554 return get_section (sname, flags, NULL);
2555 return get_named_section (decl, sname, reloc);
2556 }
2557 }
2558 return default_elf_select_section (decl, reloc, align);
2559 }
2560
2561 /* Build up a unique section name, expressed as a
2562 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2563 RELOC indicates whether the initial value of EXP requires
2564 link-time relocations. */
2565
2566 static void
2567 x86_64_elf_unique_section (tree decl, int reloc)
2568 {
2569 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2570 && ix86_in_large_data_p (decl))
2571 {
2572 const char *prefix = NULL;
2573 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2574 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2575
2576 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2577 {
2578 case SECCAT_DATA:
2579 case SECCAT_DATA_REL:
2580 case SECCAT_DATA_REL_LOCAL:
2581 case SECCAT_DATA_REL_RO:
2582 case SECCAT_DATA_REL_RO_LOCAL:
2583 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2584 break;
2585 case SECCAT_BSS:
2586 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2587 break;
2588 case SECCAT_RODATA:
2589 case SECCAT_RODATA_MERGE_STR:
2590 case SECCAT_RODATA_MERGE_STR_INIT:
2591 case SECCAT_RODATA_MERGE_CONST:
2592 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2593 break;
2594 case SECCAT_SRODATA:
2595 case SECCAT_SDATA:
2596 case SECCAT_SBSS:
2597 gcc_unreachable ();
2598 case SECCAT_TEXT:
2599 case SECCAT_TDATA:
2600 case SECCAT_TBSS:
2601 /* We don't split these for medium model. Place them into
2602 default sections and hope for best. */
2603 break;
2604 }
2605 if (prefix)
2606 {
2607 const char *name;
2608 size_t nlen, plen;
2609 char *string;
2610 plen = strlen (prefix);
2611
2612 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2613 name = targetm.strip_name_encoding (name);
2614 nlen = strlen (name);
2615
2616 string = alloca (nlen + plen + 1);
2617 memcpy (string, prefix, plen);
2618 memcpy (string + plen, name, nlen + 1);
2619
2620 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2621 return;
2622 }
2623 }
2624 default_unique_section (decl, reloc);
2625 }
2626
2627 #ifdef COMMON_ASM_OP
2628 /* This says how to output assembler code to declare an
2629 uninitialized external linkage data object.
2630
2631 For medium model x86-64 we need to use .largecomm opcode for
2632 large objects. */
2633 void
2634 x86_elf_aligned_common (FILE *file,
2635 const char *name, unsigned HOST_WIDE_INT size,
2636 int align)
2637 {
2638 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2639 && size > (unsigned int)ix86_section_threshold)
2640 fprintf (file, ".largecomm\t");
2641 else
2642 fprintf (file, "%s", COMMON_ASM_OP);
2643 assemble_name (file, name);
2644 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2645 size, align / BITS_PER_UNIT);
2646 }
2647 #endif
2648 /* Utility function for targets to use in implementing
2649 ASM_OUTPUT_ALIGNED_BSS. */
2650
2651 void
2652 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2653 const char *name, unsigned HOST_WIDE_INT size,
2654 int align)
2655 {
2656 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2657 && size > (unsigned int)ix86_section_threshold)
2658 switch_to_section (get_named_section (decl, ".lbss", 0));
2659 else
2660 switch_to_section (bss_section);
2661 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2662 #ifdef ASM_DECLARE_OBJECT_NAME
2663 last_assemble_variable_decl = decl;
2664 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2665 #else
2666 /* Standard thing is just output label for the object. */
2667 ASM_OUTPUT_LABEL (file, name);
2668 #endif /* ASM_DECLARE_OBJECT_NAME */
2669 ASM_OUTPUT_SKIP (file, size ? size : 1);
2670 }
2671 \f
2672 void
2673 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2674 {
2675 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2676 make the problem with not enough registers even worse. */
2677 #ifdef INSN_SCHEDULING
2678 if (level > 1)
2679 flag_schedule_insns = 0;
2680 #endif
2681
2682 if (TARGET_MACHO)
2683 /* The Darwin libraries never set errno, so we might as well
2684 avoid calling them when that's the only reason we would. */
2685 flag_errno_math = 0;
2686
2687 /* The default values of these switches depend on the TARGET_64BIT
2688 that is not known at this moment. Mark these values with 2 and
2689 let user the to override these. In case there is no command line option
2690 specifying them, we will set the defaults in override_options. */
2691 if (optimize >= 1)
2692 flag_omit_frame_pointer = 2;
2693 flag_pcc_struct_return = 2;
2694 flag_asynchronous_unwind_tables = 2;
2695 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2696 SUBTARGET_OPTIMIZATION_OPTIONS;
2697 #endif
2698 }
2699 \f
2700 /* Table of valid machine attributes. */
2701 const struct attribute_spec ix86_attribute_table[] =
2702 {
2703 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2704 /* Stdcall attribute says callee is responsible for popping arguments
2705 if they are not variable. */
2706 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2707 /* Fastcall attribute says callee is responsible for popping arguments
2708 if they are not variable. */
2709 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2710 /* Cdecl attribute says the callee is a normal C declaration */
2711 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2712 /* Regparm attribute specifies how many integer arguments are to be
2713 passed in registers. */
2714 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2715 /* Sseregparm attribute says we are using x86_64 calling conventions
2716 for FP arguments. */
2717 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2718 /* force_align_arg_pointer says this function realigns the stack at entry. */
2719 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2720 false, true, true, ix86_handle_cconv_attribute },
2721 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2722 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2723 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2724 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2725 #endif
2726 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2727 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2728 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2729 SUBTARGET_ATTRIBUTE_TABLE,
2730 #endif
2731 { NULL, 0, 0, false, false, false, NULL }
2732 };
2733
2734 /* Decide whether we can make a sibling call to a function. DECL is the
2735 declaration of the function being targeted by the call and EXP is the
2736 CALL_EXPR representing the call. */
2737
2738 static bool
2739 ix86_function_ok_for_sibcall (tree decl, tree exp)
2740 {
2741 tree func;
2742 rtx a, b;
2743
2744 /* If we are generating position-independent code, we cannot sibcall
2745 optimize any indirect call, or a direct call to a global function,
2746 as the PLT requires %ebx be live. */
2747 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2748 return false;
2749
2750 if (decl)
2751 func = decl;
2752 else
2753 {
2754 func = TREE_TYPE (CALL_EXPR_FN (exp));
2755 if (POINTER_TYPE_P (func))
2756 func = TREE_TYPE (func);
2757 }
2758
2759 /* Check that the return value locations are the same. Like
2760 if we are returning floats on the 80387 register stack, we cannot
2761 make a sibcall from a function that doesn't return a float to a
2762 function that does or, conversely, from a function that does return
2763 a float to a function that doesn't; the necessary stack adjustment
2764 would not be executed. This is also the place we notice
2765 differences in the return value ABI. Note that it is ok for one
2766 of the functions to have void return type as long as the return
2767 value of the other is passed in a register. */
2768 a = ix86_function_value (TREE_TYPE (exp), func, false);
2769 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2770 cfun->decl, false);
2771 if (STACK_REG_P (a) || STACK_REG_P (b))
2772 {
2773 if (!rtx_equal_p (a, b))
2774 return false;
2775 }
2776 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2777 ;
2778 else if (!rtx_equal_p (a, b))
2779 return false;
2780
2781 /* If this call is indirect, we'll need to be able to use a call-clobbered
2782 register for the address of the target function. Make sure that all
2783 such registers are not used for passing parameters. */
2784 if (!decl && !TARGET_64BIT)
2785 {
2786 tree type;
2787
2788 /* We're looking at the CALL_EXPR, we need the type of the function. */
2789 type = CALL_EXPR_FN (exp); /* pointer expression */
2790 type = TREE_TYPE (type); /* pointer type */
2791 type = TREE_TYPE (type); /* function type */
2792
2793 if (ix86_function_regparm (type, NULL) >= 3)
2794 {
2795 /* ??? Need to count the actual number of registers to be used,
2796 not the possible number of registers. Fix later. */
2797 return false;
2798 }
2799 }
2800
2801 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2802 /* Dllimport'd functions are also called indirectly. */
2803 if (decl && DECL_DLLIMPORT_P (decl)
2804 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2805 return false;
2806 #endif
2807
2808 /* If we forced aligned the stack, then sibcalling would unalign the
2809 stack, which may break the called function. */
2810 if (cfun->machine->force_align_arg_pointer)
2811 return false;
2812
2813 /* Otherwise okay. That also includes certain types of indirect calls. */
2814 return true;
2815 }
2816
2817 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2818 calling convention attributes;
2819 arguments as in struct attribute_spec.handler. */
2820
2821 static tree
2822 ix86_handle_cconv_attribute (tree *node, tree name,
2823 tree args,
2824 int flags ATTRIBUTE_UNUSED,
2825 bool *no_add_attrs)
2826 {
2827 if (TREE_CODE (*node) != FUNCTION_TYPE
2828 && TREE_CODE (*node) != METHOD_TYPE
2829 && TREE_CODE (*node) != FIELD_DECL
2830 && TREE_CODE (*node) != TYPE_DECL)
2831 {
2832 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2833 IDENTIFIER_POINTER (name));
2834 *no_add_attrs = true;
2835 return NULL_TREE;
2836 }
2837
2838 /* Can combine regparm with all attributes but fastcall. */
2839 if (is_attribute_p ("regparm", name))
2840 {
2841 tree cst;
2842
2843 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2844 {
2845 error ("fastcall and regparm attributes are not compatible");
2846 }
2847
2848 cst = TREE_VALUE (args);
2849 if (TREE_CODE (cst) != INTEGER_CST)
2850 {
2851 warning (OPT_Wattributes,
2852 "%qs attribute requires an integer constant argument",
2853 IDENTIFIER_POINTER (name));
2854 *no_add_attrs = true;
2855 }
2856 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2857 {
2858 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2859 IDENTIFIER_POINTER (name), REGPARM_MAX);
2860 *no_add_attrs = true;
2861 }
2862
2863 if (!TARGET_64BIT
2864 && lookup_attribute (ix86_force_align_arg_pointer_string,
2865 TYPE_ATTRIBUTES (*node))
2866 && compare_tree_int (cst, REGPARM_MAX-1))
2867 {
2868 error ("%s functions limited to %d register parameters",
2869 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2870 }
2871
2872 return NULL_TREE;
2873 }
2874
2875 if (TARGET_64BIT)
2876 {
2877 warning (OPT_Wattributes, "%qs attribute ignored",
2878 IDENTIFIER_POINTER (name));
2879 *no_add_attrs = true;
2880 return NULL_TREE;
2881 }
2882
2883 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2884 if (is_attribute_p ("fastcall", name))
2885 {
2886 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2887 {
2888 error ("fastcall and cdecl attributes are not compatible");
2889 }
2890 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2891 {
2892 error ("fastcall and stdcall attributes are not compatible");
2893 }
2894 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2895 {
2896 error ("fastcall and regparm attributes are not compatible");
2897 }
2898 }
2899
2900 /* Can combine stdcall with fastcall (redundant), regparm and
2901 sseregparm. */
2902 else if (is_attribute_p ("stdcall", name))
2903 {
2904 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2905 {
2906 error ("stdcall and cdecl attributes are not compatible");
2907 }
2908 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2909 {
2910 error ("stdcall and fastcall attributes are not compatible");
2911 }
2912 }
2913
2914 /* Can combine cdecl with regparm and sseregparm. */
2915 else if (is_attribute_p ("cdecl", name))
2916 {
2917 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2918 {
2919 error ("stdcall and cdecl attributes are not compatible");
2920 }
2921 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2922 {
2923 error ("fastcall and cdecl attributes are not compatible");
2924 }
2925 }
2926
2927 /* Can combine sseregparm with all attributes. */
2928
2929 return NULL_TREE;
2930 }
2931
2932 /* Return 0 if the attributes for two types are incompatible, 1 if they
2933 are compatible, and 2 if they are nearly compatible (which causes a
2934 warning to be generated). */
2935
2936 static int
2937 ix86_comp_type_attributes (tree type1, tree type2)
2938 {
2939 /* Check for mismatch of non-default calling convention. */
2940 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2941
2942 if (TREE_CODE (type1) != FUNCTION_TYPE)
2943 return 1;
2944
2945 /* Check for mismatched fastcall/regparm types. */
2946 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2947 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2948 || (ix86_function_regparm (type1, NULL)
2949 != ix86_function_regparm (type2, NULL)))
2950 return 0;
2951
2952 /* Check for mismatched sseregparm types. */
2953 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2954 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2955 return 0;
2956
2957 /* Check for mismatched return types (cdecl vs stdcall). */
2958 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2959 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2960 return 0;
2961
2962 return 1;
2963 }
2964 \f
2965 /* Return the regparm value for a function with the indicated TYPE and DECL.
2966 DECL may be NULL when calling function indirectly
2967 or considering a libcall. */
2968
2969 static int
2970 ix86_function_regparm (tree type, tree decl)
2971 {
2972 tree attr;
2973 int regparm = ix86_regparm;
2974 bool user_convention = false;
2975
2976 if (!TARGET_64BIT)
2977 {
2978 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2979 if (attr)
2980 {
2981 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2982 user_convention = true;
2983 }
2984
2985 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2986 {
2987 regparm = 2;
2988 user_convention = true;
2989 }
2990
2991 /* Use register calling convention for local functions when possible. */
2992 if (!TARGET_64BIT && !user_convention && decl
2993 && flag_unit_at_a_time && !profile_flag)
2994 {
2995 struct cgraph_local_info *i = cgraph_local_info (decl);
2996 if (i && i->local)
2997 {
2998 int local_regparm, globals = 0, regno;
2999
3000 /* Make sure no regparm register is taken by a global register
3001 variable. */
3002 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3003 if (global_regs[local_regparm])
3004 break;
3005 /* We can't use regparm(3) for nested functions as these use
3006 static chain pointer in third argument. */
3007 if (local_regparm == 3
3008 && decl_function_context (decl)
3009 && !DECL_NO_STATIC_CHAIN (decl))
3010 local_regparm = 2;
3011 /* If the function realigns its stackpointer, the
3012 prologue will clobber %ecx. If we've already
3013 generated code for the callee, the callee
3014 DECL_STRUCT_FUNCTION is gone, so we fall back to
3015 scanning the attributes for the self-realigning
3016 property. */
3017 if ((DECL_STRUCT_FUNCTION (decl)
3018 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
3019 || (!DECL_STRUCT_FUNCTION (decl)
3020 && lookup_attribute (ix86_force_align_arg_pointer_string,
3021 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3022 local_regparm = 2;
3023 /* Each global register variable increases register preassure,
3024 so the more global reg vars there are, the smaller regparm
3025 optimization use, unless requested by the user explicitly. */
3026 for (regno = 0; regno < 6; regno++)
3027 if (global_regs[regno])
3028 globals++;
3029 local_regparm
3030 = globals < local_regparm ? local_regparm - globals : 0;
3031
3032 if (local_regparm > regparm)
3033 regparm = local_regparm;
3034 }
3035 }
3036 }
3037 return regparm;
3038 }
3039
3040 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3041 DFmode (2) arguments in SSE registers for a function with the
3042 indicated TYPE and DECL. DECL may be NULL when calling function
3043 indirectly or considering a libcall. Otherwise return 0. */
3044
3045 static int
3046 ix86_function_sseregparm (tree type, tree decl)
3047 {
3048 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3049 by the sseregparm attribute. */
3050 if (TARGET_SSEREGPARM
3051 || (type
3052 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3053 {
3054 if (!TARGET_SSE)
3055 {
3056 if (decl)
3057 error ("Calling %qD with attribute sseregparm without "
3058 "SSE/SSE2 enabled", decl);
3059 else
3060 error ("Calling %qT with attribute sseregparm without "
3061 "SSE/SSE2 enabled", type);
3062 return 0;
3063 }
3064
3065 return 2;
3066 }
3067
3068 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3069 (and DFmode for SSE2) arguments in SSE registers,
3070 even for 32-bit targets. */
3071 if (!TARGET_64BIT && decl
3072 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3073 {
3074 struct cgraph_local_info *i = cgraph_local_info (decl);
3075 if (i && i->local)
3076 return TARGET_SSE2 ? 2 : 1;
3077 }
3078
3079 return 0;
3080 }
3081
3082 /* Return true if EAX is live at the start of the function. Used by
3083 ix86_expand_prologue to determine if we need special help before
3084 calling allocate_stack_worker. */
3085
3086 static bool
3087 ix86_eax_live_at_start_p (void)
3088 {
3089 /* Cheat. Don't bother working forward from ix86_function_regparm
3090 to the function type to whether an actual argument is located in
3091 eax. Instead just look at cfg info, which is still close enough
3092 to correct at this point. This gives false positives for broken
3093 functions that might use uninitialized data that happens to be
3094 allocated in eax, but who cares? */
3095 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3096 }
3097
3098 /* Value is the number of bytes of arguments automatically
3099 popped when returning from a subroutine call.
3100 FUNDECL is the declaration node of the function (as a tree),
3101 FUNTYPE is the data type of the function (as a tree),
3102 or for a library call it is an identifier node for the subroutine name.
3103 SIZE is the number of bytes of arguments passed on the stack.
3104
3105 On the 80386, the RTD insn may be used to pop them if the number
3106 of args is fixed, but if the number is variable then the caller
3107 must pop them all. RTD can't be used for library calls now
3108 because the library is compiled with the Unix compiler.
3109 Use of RTD is a selectable option, since it is incompatible with
3110 standard Unix calling sequences. If the option is not selected,
3111 the caller must always pop the args.
3112
3113 The attribute stdcall is equivalent to RTD on a per module basis. */
3114
3115 int
3116 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3117 {
3118 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3119
3120 /* Cdecl functions override -mrtd, and never pop the stack. */
3121 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3122
3123 /* Stdcall and fastcall functions will pop the stack if not
3124 variable args. */
3125 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3126 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3127 rtd = 1;
3128
3129 if (rtd
3130 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3131 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3132 == void_type_node)))
3133 return size;
3134 }
3135
3136 /* Lose any fake structure return argument if it is passed on the stack. */
3137 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3138 && !TARGET_64BIT
3139 && !KEEP_AGGREGATE_RETURN_POINTER)
3140 {
3141 int nregs = ix86_function_regparm (funtype, fundecl);
3142
3143 if (!nregs)
3144 return GET_MODE_SIZE (Pmode);
3145 }
3146
3147 return 0;
3148 }
3149 \f
3150 /* Argument support functions. */
3151
3152 /* Return true when register may be used to pass function parameters. */
3153 bool
3154 ix86_function_arg_regno_p (int regno)
3155 {
3156 int i;
3157 if (!TARGET_64BIT)
3158 {
3159 if (TARGET_MACHO)
3160 return (regno < REGPARM_MAX
3161 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3162 else
3163 return (regno < REGPARM_MAX
3164 || (TARGET_MMX && MMX_REGNO_P (regno)
3165 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3166 || (TARGET_SSE && SSE_REGNO_P (regno)
3167 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3168 }
3169
3170 if (TARGET_MACHO)
3171 {
3172 if (SSE_REGNO_P (regno) && TARGET_SSE)
3173 return true;
3174 }
3175 else
3176 {
3177 if (TARGET_SSE && SSE_REGNO_P (regno)
3178 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3179 return true;
3180 }
3181 /* RAX is used as hidden argument to va_arg functions. */
3182 if (!regno)
3183 return true;
3184 for (i = 0; i < REGPARM_MAX; i++)
3185 if (regno == x86_64_int_parameter_registers[i])
3186 return true;
3187 return false;
3188 }
3189
3190 /* Return if we do not know how to pass TYPE solely in registers. */
3191
3192 static bool
3193 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3194 {
3195 if (must_pass_in_stack_var_size_or_pad (mode, type))
3196 return true;
3197
3198 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3199 The layout_type routine is crafty and tries to trick us into passing
3200 currently unsupported vector types on the stack by using TImode. */
3201 return (!TARGET_64BIT && mode == TImode
3202 && type && TREE_CODE (type) != VECTOR_TYPE);
3203 }
3204
3205 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3206 for a call to a function whose data type is FNTYPE.
3207 For a library call, FNTYPE is 0. */
3208
3209 void
3210 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3211 tree fntype, /* tree ptr for function decl */
3212 rtx libname, /* SYMBOL_REF of library name or 0 */
3213 tree fndecl)
3214 {
3215 static CUMULATIVE_ARGS zero_cum;
3216 tree param, next_param;
3217
3218 if (TARGET_DEBUG_ARG)
3219 {
3220 fprintf (stderr, "\ninit_cumulative_args (");
3221 if (fntype)
3222 fprintf (stderr, "fntype code = %s, ret code = %s",
3223 tree_code_name[(int) TREE_CODE (fntype)],
3224 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3225 else
3226 fprintf (stderr, "no fntype");
3227
3228 if (libname)
3229 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3230 }
3231
3232 *cum = zero_cum;
3233
3234 /* Set up the number of registers to use for passing arguments. */
3235 cum->nregs = ix86_regparm;
3236 if (TARGET_SSE)
3237 cum->sse_nregs = SSE_REGPARM_MAX;
3238 if (TARGET_MMX)
3239 cum->mmx_nregs = MMX_REGPARM_MAX;
3240 cum->warn_sse = true;
3241 cum->warn_mmx = true;
3242 cum->maybe_vaarg = false;
3243
3244 /* Use ecx and edx registers if function has fastcall attribute,
3245 else look for regparm information. */
3246 if (fntype && !TARGET_64BIT)
3247 {
3248 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3249 {
3250 cum->nregs = 2;
3251 cum->fastcall = 1;
3252 }
3253 else
3254 cum->nregs = ix86_function_regparm (fntype, fndecl);
3255 }
3256
3257 /* Set up the number of SSE registers used for passing SFmode
3258 and DFmode arguments. Warn for mismatching ABI. */
3259 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3260
3261 /* Determine if this function has variable arguments. This is
3262 indicated by the last argument being 'void_type_mode' if there
3263 are no variable arguments. If there are variable arguments, then
3264 we won't pass anything in registers in 32-bit mode. */
3265
3266 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3267 {
3268 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3269 param != 0; param = next_param)
3270 {
3271 next_param = TREE_CHAIN (param);
3272 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3273 {
3274 if (!TARGET_64BIT)
3275 {
3276 cum->nregs = 0;
3277 cum->sse_nregs = 0;
3278 cum->mmx_nregs = 0;
3279 cum->warn_sse = 0;
3280 cum->warn_mmx = 0;
3281 cum->fastcall = 0;
3282 cum->float_in_sse = 0;
3283 }
3284 cum->maybe_vaarg = true;
3285 }
3286 }
3287 }
3288 if ((!fntype && !libname)
3289 || (fntype && !TYPE_ARG_TYPES (fntype)))
3290 cum->maybe_vaarg = true;
3291
3292 if (TARGET_DEBUG_ARG)
3293 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3294
3295 return;
3296 }
3297
3298 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3299 But in the case of vector types, it is some vector mode.
3300
3301 When we have only some of our vector isa extensions enabled, then there
3302 are some modes for which vector_mode_supported_p is false. For these
3303 modes, the generic vector support in gcc will choose some non-vector mode
3304 in order to implement the type. By computing the natural mode, we'll
3305 select the proper ABI location for the operand and not depend on whatever
3306 the middle-end decides to do with these vector types. */
3307
3308 static enum machine_mode
3309 type_natural_mode (tree type)
3310 {
3311 enum machine_mode mode = TYPE_MODE (type);
3312
3313 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3314 {
3315 HOST_WIDE_INT size = int_size_in_bytes (type);
3316 if ((size == 8 || size == 16)
3317 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3318 && TYPE_VECTOR_SUBPARTS (type) > 1)
3319 {
3320 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3321
3322 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3323 mode = MIN_MODE_VECTOR_FLOAT;
3324 else
3325 mode = MIN_MODE_VECTOR_INT;
3326
3327 /* Get the mode which has this inner mode and number of units. */
3328 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3329 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3330 && GET_MODE_INNER (mode) == innermode)
3331 return mode;
3332
3333 gcc_unreachable ();
3334 }
3335 }
3336
3337 return mode;
3338 }
3339
3340 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3341 this may not agree with the mode that the type system has chosen for the
3342 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3343 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3344
3345 static rtx
3346 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3347 unsigned int regno)
3348 {
3349 rtx tmp;
3350
3351 if (orig_mode != BLKmode)
3352 tmp = gen_rtx_REG (orig_mode, regno);
3353 else
3354 {
3355 tmp = gen_rtx_REG (mode, regno);
3356 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3357 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3358 }
3359
3360 return tmp;
3361 }
3362
3363 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3364 of this code is to classify each 8bytes of incoming argument by the register
3365 class and assign registers accordingly. */
3366
3367 /* Return the union class of CLASS1 and CLASS2.
3368 See the x86-64 PS ABI for details. */
3369
3370 static enum x86_64_reg_class
3371 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3372 {
3373 /* Rule #1: If both classes are equal, this is the resulting class. */
3374 if (class1 == class2)
3375 return class1;
3376
3377 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3378 the other class. */
3379 if (class1 == X86_64_NO_CLASS)
3380 return class2;
3381 if (class2 == X86_64_NO_CLASS)
3382 return class1;
3383
3384 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3385 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3386 return X86_64_MEMORY_CLASS;
3387
3388 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3389 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3390 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3391 return X86_64_INTEGERSI_CLASS;
3392 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3393 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3394 return X86_64_INTEGER_CLASS;
3395
3396 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3397 MEMORY is used. */
3398 if (class1 == X86_64_X87_CLASS
3399 || class1 == X86_64_X87UP_CLASS
3400 || class1 == X86_64_COMPLEX_X87_CLASS
3401 || class2 == X86_64_X87_CLASS
3402 || class2 == X86_64_X87UP_CLASS
3403 || class2 == X86_64_COMPLEX_X87_CLASS)
3404 return X86_64_MEMORY_CLASS;
3405
3406 /* Rule #6: Otherwise class SSE is used. */
3407 return X86_64_SSE_CLASS;
3408 }
3409
3410 /* Classify the argument of type TYPE and mode MODE.
3411 CLASSES will be filled by the register class used to pass each word
3412 of the operand. The number of words is returned. In case the parameter
3413 should be passed in memory, 0 is returned. As a special case for zero
3414 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3415
3416 BIT_OFFSET is used internally for handling records and specifies offset
3417 of the offset in bits modulo 256 to avoid overflow cases.
3418
3419 See the x86-64 PS ABI for details.
3420 */
3421
3422 static int
3423 classify_argument (enum machine_mode mode, tree type,
3424 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3425 {
3426 HOST_WIDE_INT bytes =
3427 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3428 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3429
3430 /* Variable sized entities are always passed/returned in memory. */
3431 if (bytes < 0)
3432 return 0;
3433
3434 if (mode != VOIDmode
3435 && targetm.calls.must_pass_in_stack (mode, type))
3436 return 0;
3437
3438 if (type && AGGREGATE_TYPE_P (type))
3439 {
3440 int i;
3441 tree field;
3442 enum x86_64_reg_class subclasses[MAX_CLASSES];
3443
3444 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3445 if (bytes > 16)
3446 return 0;
3447
3448 for (i = 0; i < words; i++)
3449 classes[i] = X86_64_NO_CLASS;
3450
3451 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3452 signalize memory class, so handle it as special case. */
3453 if (!words)
3454 {
3455 classes[0] = X86_64_NO_CLASS;
3456 return 1;
3457 }
3458
3459 /* Classify each field of record and merge classes. */
3460 switch (TREE_CODE (type))
3461 {
3462 case RECORD_TYPE:
3463 /* And now merge the fields of structure. */
3464 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3465 {
3466 if (TREE_CODE (field) == FIELD_DECL)
3467 {
3468 int num;
3469
3470 if (TREE_TYPE (field) == error_mark_node)
3471 continue;
3472
3473 /* Bitfields are always classified as integer. Handle them
3474 early, since later code would consider them to be
3475 misaligned integers. */
3476 if (DECL_BIT_FIELD (field))
3477 {
3478 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3479 i < ((int_bit_position (field) + (bit_offset % 64))
3480 + tree_low_cst (DECL_SIZE (field), 0)
3481 + 63) / 8 / 8; i++)
3482 classes[i] =
3483 merge_classes (X86_64_INTEGER_CLASS,
3484 classes[i]);
3485 }
3486 else
3487 {
3488 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3489 TREE_TYPE (field), subclasses,
3490 (int_bit_position (field)
3491 + bit_offset) % 256);
3492 if (!num)
3493 return 0;
3494 for (i = 0; i < num; i++)
3495 {
3496 int pos =
3497 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3498 classes[i + pos] =
3499 merge_classes (subclasses[i], classes[i + pos]);
3500 }
3501 }
3502 }
3503 }
3504 break;
3505
3506 case ARRAY_TYPE:
3507 /* Arrays are handled as small records. */
3508 {
3509 int num;
3510 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3511 TREE_TYPE (type), subclasses, bit_offset);
3512 if (!num)
3513 return 0;
3514
3515 /* The partial classes are now full classes. */
3516 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3517 subclasses[0] = X86_64_SSE_CLASS;
3518 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3519 subclasses[0] = X86_64_INTEGER_CLASS;
3520
3521 for (i = 0; i < words; i++)
3522 classes[i] = subclasses[i % num];
3523
3524 break;
3525 }
3526 case UNION_TYPE:
3527 case QUAL_UNION_TYPE:
3528 /* Unions are similar to RECORD_TYPE but offset is always 0.
3529 */
3530 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3531 {
3532 if (TREE_CODE (field) == FIELD_DECL)
3533 {
3534 int num;
3535
3536 if (TREE_TYPE (field) == error_mark_node)
3537 continue;
3538
3539 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3540 TREE_TYPE (field), subclasses,
3541 bit_offset);
3542 if (!num)
3543 return 0;
3544 for (i = 0; i < num; i++)
3545 classes[i] = merge_classes (subclasses[i], classes[i]);
3546 }
3547 }
3548 break;
3549
3550 default:
3551 gcc_unreachable ();
3552 }
3553
3554 /* Final merger cleanup. */
3555 for (i = 0; i < words; i++)
3556 {
3557 /* If one class is MEMORY, everything should be passed in
3558 memory. */
3559 if (classes[i] == X86_64_MEMORY_CLASS)
3560 return 0;
3561
3562 /* The X86_64_SSEUP_CLASS should be always preceded by
3563 X86_64_SSE_CLASS. */
3564 if (classes[i] == X86_64_SSEUP_CLASS
3565 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3566 classes[i] = X86_64_SSE_CLASS;
3567
3568 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3569 if (classes[i] == X86_64_X87UP_CLASS
3570 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3571 classes[i] = X86_64_SSE_CLASS;
3572 }
3573 return words;
3574 }
3575
3576 /* Compute alignment needed. We align all types to natural boundaries with
3577 exception of XFmode that is aligned to 64bits. */
3578 if (mode != VOIDmode && mode != BLKmode)
3579 {
3580 int mode_alignment = GET_MODE_BITSIZE (mode);
3581
3582 if (mode == XFmode)
3583 mode_alignment = 128;
3584 else if (mode == XCmode)
3585 mode_alignment = 256;
3586 if (COMPLEX_MODE_P (mode))
3587 mode_alignment /= 2;
3588 /* Misaligned fields are always returned in memory. */
3589 if (bit_offset % mode_alignment)
3590 return 0;
3591 }
3592
3593 /* for V1xx modes, just use the base mode */
3594 if (VECTOR_MODE_P (mode)
3595 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3596 mode = GET_MODE_INNER (mode);
3597
3598 /* Classification of atomic types. */
3599 switch (mode)
3600 {
3601 case SDmode:
3602 case DDmode:
3603 classes[0] = X86_64_SSE_CLASS;
3604 return 1;
3605 case TDmode:
3606 classes[0] = X86_64_SSE_CLASS;
3607 classes[1] = X86_64_SSEUP_CLASS;
3608 return 2;
3609 case DImode:
3610 case SImode:
3611 case HImode:
3612 case QImode:
3613 case CSImode:
3614 case CHImode:
3615 case CQImode:
3616 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3617 classes[0] = X86_64_INTEGERSI_CLASS;
3618 else
3619 classes[0] = X86_64_INTEGER_CLASS;
3620 return 1;
3621 case CDImode:
3622 case TImode:
3623 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3624 return 2;
3625 case CTImode:
3626 return 0;
3627 case SFmode:
3628 if (!(bit_offset % 64))
3629 classes[0] = X86_64_SSESF_CLASS;
3630 else
3631 classes[0] = X86_64_SSE_CLASS;
3632 return 1;
3633 case DFmode:
3634 classes[0] = X86_64_SSEDF_CLASS;
3635 return 1;
3636 case XFmode:
3637 classes[0] = X86_64_X87_CLASS;
3638 classes[1] = X86_64_X87UP_CLASS;
3639 return 2;
3640 case TFmode:
3641 classes[0] = X86_64_SSE_CLASS;
3642 classes[1] = X86_64_SSEUP_CLASS;
3643 return 2;
3644 case SCmode:
3645 classes[0] = X86_64_SSE_CLASS;
3646 return 1;
3647 case DCmode:
3648 classes[0] = X86_64_SSEDF_CLASS;
3649 classes[1] = X86_64_SSEDF_CLASS;
3650 return 2;
3651 case XCmode:
3652 classes[0] = X86_64_COMPLEX_X87_CLASS;
3653 return 1;
3654 case TCmode:
3655 /* This modes is larger than 16 bytes. */
3656 return 0;
3657 case V4SFmode:
3658 case V4SImode:
3659 case V16QImode:
3660 case V8HImode:
3661 case V2DFmode:
3662 case V2DImode:
3663 classes[0] = X86_64_SSE_CLASS;
3664 classes[1] = X86_64_SSEUP_CLASS;
3665 return 2;
3666 case V2SFmode:
3667 case V2SImode:
3668 case V4HImode:
3669 case V8QImode:
3670 classes[0] = X86_64_SSE_CLASS;
3671 return 1;
3672 case BLKmode:
3673 case VOIDmode:
3674 return 0;
3675 default:
3676 gcc_assert (VECTOR_MODE_P (mode));
3677
3678 if (bytes > 16)
3679 return 0;
3680
3681 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3682
3683 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3684 classes[0] = X86_64_INTEGERSI_CLASS;
3685 else
3686 classes[0] = X86_64_INTEGER_CLASS;
3687 classes[1] = X86_64_INTEGER_CLASS;
3688 return 1 + (bytes > 8);
3689 }
3690 }
3691
3692 /* Examine the argument and return set number of register required in each
3693 class. Return 0 iff parameter should be passed in memory. */
3694 static int
3695 examine_argument (enum machine_mode mode, tree type, int in_return,
3696 int *int_nregs, int *sse_nregs)
3697 {
3698 enum x86_64_reg_class class[MAX_CLASSES];
3699 int n = classify_argument (mode, type, class, 0);
3700
3701 *int_nregs = 0;
3702 *sse_nregs = 0;
3703 if (!n)
3704 return 0;
3705 for (n--; n >= 0; n--)
3706 switch (class[n])
3707 {
3708 case X86_64_INTEGER_CLASS:
3709 case X86_64_INTEGERSI_CLASS:
3710 (*int_nregs)++;
3711 break;
3712 case X86_64_SSE_CLASS:
3713 case X86_64_SSESF_CLASS:
3714 case X86_64_SSEDF_CLASS:
3715 (*sse_nregs)++;
3716 break;
3717 case X86_64_NO_CLASS:
3718 case X86_64_SSEUP_CLASS:
3719 break;
3720 case X86_64_X87_CLASS:
3721 case X86_64_X87UP_CLASS:
3722 if (!in_return)
3723 return 0;
3724 break;
3725 case X86_64_COMPLEX_X87_CLASS:
3726 return in_return ? 2 : 0;
3727 case X86_64_MEMORY_CLASS:
3728 gcc_unreachable ();
3729 }
3730 return 1;
3731 }
3732
3733 /* Construct container for the argument used by GCC interface. See
3734 FUNCTION_ARG for the detailed description. */
3735
3736 static rtx
3737 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3738 tree type, int in_return, int nintregs, int nsseregs,
3739 const int *intreg, int sse_regno)
3740 {
3741 /* The following variables hold the static issued_error state. */
3742 static bool issued_sse_arg_error;
3743 static bool issued_sse_ret_error;
3744 static bool issued_x87_ret_error;
3745
3746 enum machine_mode tmpmode;
3747 int bytes =
3748 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3749 enum x86_64_reg_class class[MAX_CLASSES];
3750 int n;
3751 int i;
3752 int nexps = 0;
3753 int needed_sseregs, needed_intregs;
3754 rtx exp[MAX_CLASSES];
3755 rtx ret;
3756
3757 n = classify_argument (mode, type, class, 0);
3758 if (TARGET_DEBUG_ARG)
3759 {
3760 if (!n)
3761 fprintf (stderr, "Memory class\n");
3762 else
3763 {
3764 fprintf (stderr, "Classes:");
3765 for (i = 0; i < n; i++)
3766 {
3767 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3768 }
3769 fprintf (stderr, "\n");
3770 }
3771 }
3772 if (!n)
3773 return NULL;
3774 if (!examine_argument (mode, type, in_return, &needed_intregs,
3775 &needed_sseregs))
3776 return NULL;
3777 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3778 return NULL;
3779
3780 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3781 some less clueful developer tries to use floating-point anyway. */
3782 if (needed_sseregs && !TARGET_SSE)
3783 {
3784 if (in_return)
3785 {
3786 if (!issued_sse_ret_error)
3787 {
3788 error ("SSE register return with SSE disabled");
3789 issued_sse_ret_error = true;
3790 }
3791 }
3792 else if (!issued_sse_arg_error)
3793 {
3794 error ("SSE register argument with SSE disabled");
3795 issued_sse_arg_error = true;
3796 }
3797 return NULL;
3798 }
3799
3800 /* Likewise, error if the ABI requires us to return values in the
3801 x87 registers and the user specified -mno-80387. */
3802 if (!TARGET_80387 && in_return)
3803 for (i = 0; i < n; i++)
3804 if (class[i] == X86_64_X87_CLASS
3805 || class[i] == X86_64_X87UP_CLASS
3806 || class[i] == X86_64_COMPLEX_X87_CLASS)
3807 {
3808 if (!issued_x87_ret_error)
3809 {
3810 error ("x87 register return with x87 disabled");
3811 issued_x87_ret_error = true;
3812 }
3813 return NULL;
3814 }
3815
3816 /* First construct simple cases. Avoid SCmode, since we want to use
3817 single register to pass this type. */
3818 if (n == 1 && mode != SCmode)
3819 switch (class[0])
3820 {
3821 case X86_64_INTEGER_CLASS:
3822 case X86_64_INTEGERSI_CLASS:
3823 return gen_rtx_REG (mode, intreg[0]);
3824 case X86_64_SSE_CLASS:
3825 case X86_64_SSESF_CLASS:
3826 case X86_64_SSEDF_CLASS:
3827 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3828 case X86_64_X87_CLASS:
3829 case X86_64_COMPLEX_X87_CLASS:
3830 return gen_rtx_REG (mode, FIRST_STACK_REG);
3831 case X86_64_NO_CLASS:
3832 /* Zero sized array, struct or class. */
3833 return NULL;
3834 default:
3835 gcc_unreachable ();
3836 }
3837 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3838 && mode != BLKmode)
3839 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3840 if (n == 2
3841 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3842 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3843 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3844 && class[1] == X86_64_INTEGER_CLASS
3845 && (mode == CDImode || mode == TImode || mode == TFmode)
3846 && intreg[0] + 1 == intreg[1])
3847 return gen_rtx_REG (mode, intreg[0]);
3848
3849 /* Otherwise figure out the entries of the PARALLEL. */
3850 for (i = 0; i < n; i++)
3851 {
3852 switch (class[i])
3853 {
3854 case X86_64_NO_CLASS:
3855 break;
3856 case X86_64_INTEGER_CLASS:
3857 case X86_64_INTEGERSI_CLASS:
3858 /* Merge TImodes on aligned occasions here too. */
3859 if (i * 8 + 8 > bytes)
3860 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3861 else if (class[i] == X86_64_INTEGERSI_CLASS)
3862 tmpmode = SImode;
3863 else
3864 tmpmode = DImode;
3865 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3866 if (tmpmode == BLKmode)
3867 tmpmode = DImode;
3868 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3869 gen_rtx_REG (tmpmode, *intreg),
3870 GEN_INT (i*8));
3871 intreg++;
3872 break;
3873 case X86_64_SSESF_CLASS:
3874 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3875 gen_rtx_REG (SFmode,
3876 SSE_REGNO (sse_regno)),
3877 GEN_INT (i*8));
3878 sse_regno++;
3879 break;
3880 case X86_64_SSEDF_CLASS:
3881 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3882 gen_rtx_REG (DFmode,
3883 SSE_REGNO (sse_regno)),
3884 GEN_INT (i*8));
3885 sse_regno++;
3886 break;
3887 case X86_64_SSE_CLASS:
3888 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3889 tmpmode = TImode;
3890 else
3891 tmpmode = DImode;
3892 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3893 gen_rtx_REG (tmpmode,
3894 SSE_REGNO (sse_regno)),
3895 GEN_INT (i*8));
3896 if (tmpmode == TImode)
3897 i++;
3898 sse_regno++;
3899 break;
3900 default:
3901 gcc_unreachable ();
3902 }
3903 }
3904
3905 /* Empty aligned struct, union or class. */
3906 if (nexps == 0)
3907 return NULL;
3908
3909 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3910 for (i = 0; i < nexps; i++)
3911 XVECEXP (ret, 0, i) = exp [i];
3912 return ret;
3913 }
3914
3915 /* Update the data in CUM to advance over an argument
3916 of mode MODE and data type TYPE.
3917 (TYPE is null for libcalls where that information may not be available.) */
3918
3919 void
3920 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3921 tree type, int named)
3922 {
3923 int bytes =
3924 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3925 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3926
3927 if (type)
3928 mode = type_natural_mode (type);
3929
3930 if (TARGET_DEBUG_ARG)
3931 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3932 "mode=%s, named=%d)\n\n",
3933 words, cum->words, cum->nregs, cum->sse_nregs,
3934 GET_MODE_NAME (mode), named);
3935
3936 if (TARGET_64BIT)
3937 {
3938 int int_nregs, sse_nregs;
3939 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3940 cum->words += words;
3941 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3942 {
3943 cum->nregs -= int_nregs;
3944 cum->sse_nregs -= sse_nregs;
3945 cum->regno += int_nregs;
3946 cum->sse_regno += sse_nregs;
3947 }
3948 else
3949 cum->words += words;
3950 }
3951 else
3952 {
3953 switch (mode)
3954 {
3955 default:
3956 break;
3957
3958 case BLKmode:
3959 if (bytes < 0)
3960 break;
3961 /* FALLTHRU */
3962
3963 case DImode:
3964 case SImode:
3965 case HImode:
3966 case QImode:
3967 cum->words += words;
3968 cum->nregs -= words;
3969 cum->regno += words;
3970
3971 if (cum->nregs <= 0)
3972 {
3973 cum->nregs = 0;
3974 cum->regno = 0;
3975 }
3976 break;
3977
3978 case DFmode:
3979 if (cum->float_in_sse < 2)
3980 break;
3981 case SFmode:
3982 if (cum->float_in_sse < 1)
3983 break;
3984 /* FALLTHRU */
3985
3986 case TImode:
3987 case V16QImode:
3988 case V8HImode:
3989 case V4SImode:
3990 case V2DImode:
3991 case V4SFmode:
3992 case V2DFmode:
3993 if (!type || !AGGREGATE_TYPE_P (type))
3994 {
3995 cum->sse_words += words;
3996 cum->sse_nregs -= 1;
3997 cum->sse_regno += 1;
3998 if (cum->sse_nregs <= 0)
3999 {
4000 cum->sse_nregs = 0;
4001 cum->sse_regno = 0;
4002 }
4003 }
4004 break;
4005
4006 case V8QImode:
4007 case V4HImode:
4008 case V2SImode:
4009 case V2SFmode:
4010 if (!type || !AGGREGATE_TYPE_P (type))
4011 {
4012 cum->mmx_words += words;
4013 cum->mmx_nregs -= 1;
4014 cum->mmx_regno += 1;
4015 if (cum->mmx_nregs <= 0)
4016 {
4017 cum->mmx_nregs = 0;
4018 cum->mmx_regno = 0;
4019 }
4020 }
4021 break;
4022 }
4023 }
4024 }
4025
4026 /* Define where to put the arguments to a function.
4027 Value is zero to push the argument on the stack,
4028 or a hard register in which to store the argument.
4029
4030 MODE is the argument's machine mode.
4031 TYPE is the data type of the argument (as a tree).
4032 This is null for libcalls where that information may
4033 not be available.
4034 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4035 the preceding args and about the function being called.
4036 NAMED is nonzero if this argument is a named parameter
4037 (otherwise it is an extra parameter matching an ellipsis). */
4038
4039 rtx
4040 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
4041 tree type, int named)
4042 {
4043 enum machine_mode mode = orig_mode;
4044 rtx ret = NULL_RTX;
4045 int bytes =
4046 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4047 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4048 static bool warnedsse, warnedmmx;
4049
4050 /* To simplify the code below, represent vector types with a vector mode
4051 even if MMX/SSE are not active. */
4052 if (type && TREE_CODE (type) == VECTOR_TYPE)
4053 mode = type_natural_mode (type);
4054
4055 /* Handle a hidden AL argument containing number of registers for varargs
4056 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4057 any AL settings. */
4058 if (mode == VOIDmode)
4059 {
4060 if (TARGET_64BIT)
4061 return GEN_INT (cum->maybe_vaarg
4062 ? (cum->sse_nregs < 0
4063 ? SSE_REGPARM_MAX
4064 : cum->sse_regno)
4065 : -1);
4066 else
4067 return constm1_rtx;
4068 }
4069 if (TARGET_64BIT)
4070 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4071 cum->sse_nregs,
4072 &x86_64_int_parameter_registers [cum->regno],
4073 cum->sse_regno);
4074 else
4075 switch (mode)
4076 {
4077 /* For now, pass fp/complex values on the stack. */
4078 default:
4079 break;
4080
4081 case BLKmode:
4082 if (bytes < 0)
4083 break;
4084 /* FALLTHRU */
4085 case DImode:
4086 case SImode:
4087 case HImode:
4088 case QImode:
4089 if (words <= cum->nregs)
4090 {
4091 int regno = cum->regno;
4092
4093 /* Fastcall allocates the first two DWORD (SImode) or
4094 smaller arguments to ECX and EDX. */
4095 if (cum->fastcall)
4096 {
4097 if (mode == BLKmode || mode == DImode)
4098 break;
4099
4100 /* ECX not EAX is the first allocated register. */
4101 if (regno == 0)
4102 regno = 2;
4103 }
4104 ret = gen_rtx_REG (mode, regno);
4105 }
4106 break;
4107 case DFmode:
4108 if (cum->float_in_sse < 2)
4109 break;
4110 case SFmode:
4111 if (cum->float_in_sse < 1)
4112 break;
4113 /* FALLTHRU */
4114 case TImode:
4115 case V16QImode:
4116 case V8HImode:
4117 case V4SImode:
4118 case V2DImode:
4119 case V4SFmode:
4120 case V2DFmode:
4121 if (!type || !AGGREGATE_TYPE_P (type))
4122 {
4123 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4124 {
4125 warnedsse = true;
4126 warning (0, "SSE vector argument without SSE enabled "
4127 "changes the ABI");
4128 }
4129 if (cum->sse_nregs)
4130 ret = gen_reg_or_parallel (mode, orig_mode,
4131 cum->sse_regno + FIRST_SSE_REG);
4132 }
4133 break;
4134 case V8QImode:
4135 case V4HImode:
4136 case V2SImode:
4137 case V2SFmode:
4138 if (!type || !AGGREGATE_TYPE_P (type))
4139 {
4140 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4141 {
4142 warnedmmx = true;
4143 warning (0, "MMX vector argument without MMX enabled "
4144 "changes the ABI");
4145 }
4146 if (cum->mmx_nregs)
4147 ret = gen_reg_or_parallel (mode, orig_mode,
4148 cum->mmx_regno + FIRST_MMX_REG);
4149 }
4150 break;
4151 }
4152
4153 if (TARGET_DEBUG_ARG)
4154 {
4155 fprintf (stderr,
4156 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4157 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4158
4159 if (ret)
4160 print_simple_rtl (stderr, ret);
4161 else
4162 fprintf (stderr, ", stack");
4163
4164 fprintf (stderr, " )\n");
4165 }
4166
4167 return ret;
4168 }
4169
4170 /* A C expression that indicates when an argument must be passed by
4171 reference. If nonzero for an argument, a copy of that argument is
4172 made in memory and a pointer to the argument is passed instead of
4173 the argument itself. The pointer is passed in whatever way is
4174 appropriate for passing a pointer to that type. */
4175
4176 static bool
4177 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4178 enum machine_mode mode ATTRIBUTE_UNUSED,
4179 tree type, bool named ATTRIBUTE_UNUSED)
4180 {
4181 if (!TARGET_64BIT)
4182 return 0;
4183
4184 if (type && int_size_in_bytes (type) == -1)
4185 {
4186 if (TARGET_DEBUG_ARG)
4187 fprintf (stderr, "function_arg_pass_by_reference\n");
4188 return 1;
4189 }
4190
4191 return 0;
4192 }
4193
4194 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4195 ABI. Only called if TARGET_SSE. */
4196 static bool
4197 contains_128bit_aligned_vector_p (tree type)
4198 {
4199 enum machine_mode mode = TYPE_MODE (type);
4200 if (SSE_REG_MODE_P (mode)
4201 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4202 return true;
4203 if (TYPE_ALIGN (type) < 128)
4204 return false;
4205
4206 if (AGGREGATE_TYPE_P (type))
4207 {
4208 /* Walk the aggregates recursively. */
4209 switch (TREE_CODE (type))
4210 {
4211 case RECORD_TYPE:
4212 case UNION_TYPE:
4213 case QUAL_UNION_TYPE:
4214 {
4215 tree field;
4216
4217 /* Walk all the structure fields. */
4218 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4219 {
4220 if (TREE_CODE (field) == FIELD_DECL
4221 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4222 return true;
4223 }
4224 break;
4225 }
4226
4227 case ARRAY_TYPE:
4228 /* Just for use if some languages passes arrays by value. */
4229 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4230 return true;
4231 break;
4232
4233 default:
4234 gcc_unreachable ();
4235 }
4236 }
4237 return false;
4238 }
4239
4240 /* Gives the alignment boundary, in bits, of an argument with the
4241 specified mode and type. */
4242
4243 int
4244 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4245 {
4246 int align;
4247 if (type)
4248 align = TYPE_ALIGN (type);
4249 else
4250 align = GET_MODE_ALIGNMENT (mode);
4251 if (align < PARM_BOUNDARY)
4252 align = PARM_BOUNDARY;
4253 if (!TARGET_64BIT)
4254 {
4255 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4256 make an exception for SSE modes since these require 128bit
4257 alignment.
4258
4259 The handling here differs from field_alignment. ICC aligns MMX
4260 arguments to 4 byte boundaries, while structure fields are aligned
4261 to 8 byte boundaries. */
4262 if (!TARGET_SSE)
4263 align = PARM_BOUNDARY;
4264 else if (!type)
4265 {
4266 if (!SSE_REG_MODE_P (mode))
4267 align = PARM_BOUNDARY;
4268 }
4269 else
4270 {
4271 if (!contains_128bit_aligned_vector_p (type))
4272 align = PARM_BOUNDARY;
4273 }
4274 }
4275 if (align > 128)
4276 align = 128;
4277 return align;
4278 }
4279
4280 /* Return true if N is a possible register number of function value. */
4281 bool
4282 ix86_function_value_regno_p (int regno)
4283 {
4284 if (TARGET_MACHO)
4285 {
4286 if (!TARGET_64BIT)
4287 {
4288 return ((regno) == 0
4289 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4290 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4291 }
4292 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4293 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4294 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4295 }
4296 else
4297 {
4298 if (regno == 0
4299 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4300 || (regno == FIRST_SSE_REG && TARGET_SSE))
4301 return true;
4302
4303 if (!TARGET_64BIT
4304 && (regno == FIRST_MMX_REG && TARGET_MMX))
4305 return true;
4306
4307 return false;
4308 }
4309 }
4310
4311 /* Define how to find the value returned by a function.
4312 VALTYPE is the data type of the value (as a tree).
4313 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4314 otherwise, FUNC is 0. */
4315 rtx
4316 ix86_function_value (tree valtype, tree fntype_or_decl,
4317 bool outgoing ATTRIBUTE_UNUSED)
4318 {
4319 enum machine_mode natmode = type_natural_mode (valtype);
4320
4321 if (TARGET_64BIT)
4322 {
4323 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4324 1, REGPARM_MAX, SSE_REGPARM_MAX,
4325 x86_64_int_return_registers, 0);
4326 /* For zero sized structures, construct_container return NULL, but we
4327 need to keep rest of compiler happy by returning meaningful value. */
4328 if (!ret)
4329 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4330 return ret;
4331 }
4332 else
4333 {
4334 tree fn = NULL_TREE, fntype;
4335 if (fntype_or_decl
4336 && DECL_P (fntype_or_decl))
4337 fn = fntype_or_decl;
4338 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4339 return gen_rtx_REG (TYPE_MODE (valtype),
4340 ix86_value_regno (natmode, fn, fntype));
4341 }
4342 }
4343
4344 /* Return true iff type is returned in memory. */
4345 int
4346 ix86_return_in_memory (tree type)
4347 {
4348 int needed_intregs, needed_sseregs, size;
4349 enum machine_mode mode = type_natural_mode (type);
4350
4351 if (TARGET_64BIT)
4352 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4353
4354 if (mode == BLKmode)
4355 return 1;
4356
4357 size = int_size_in_bytes (type);
4358
4359 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4360 return 0;
4361
4362 if (VECTOR_MODE_P (mode) || mode == TImode)
4363 {
4364 /* User-created vectors small enough to fit in EAX. */
4365 if (size < 8)
4366 return 0;
4367
4368 /* MMX/3dNow values are returned in MM0,
4369 except when it doesn't exits. */
4370 if (size == 8)
4371 return (TARGET_MMX ? 0 : 1);
4372
4373 /* SSE values are returned in XMM0, except when it doesn't exist. */
4374 if (size == 16)
4375 return (TARGET_SSE ? 0 : 1);
4376 }
4377
4378 if (mode == XFmode)
4379 return 0;
4380
4381 if (mode == TDmode)
4382 return 1;
4383
4384 if (size > 12)
4385 return 1;
4386 return 0;
4387 }
4388
4389 /* When returning SSE vector types, we have a choice of either
4390 (1) being abi incompatible with a -march switch, or
4391 (2) generating an error.
4392 Given no good solution, I think the safest thing is one warning.
4393 The user won't be able to use -Werror, but....
4394
4395 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4396 called in response to actually generating a caller or callee that
4397 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4398 via aggregate_value_p for general type probing from tree-ssa. */
4399
4400 static rtx
4401 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4402 {
4403 static bool warnedsse, warnedmmx;
4404
4405 if (type)
4406 {
4407 /* Look at the return type of the function, not the function type. */
4408 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4409
4410 if (!TARGET_SSE && !warnedsse)
4411 {
4412 if (mode == TImode
4413 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4414 {
4415 warnedsse = true;
4416 warning (0, "SSE vector return without SSE enabled "
4417 "changes the ABI");
4418 }
4419 }
4420
4421 if (!TARGET_MMX && !warnedmmx)
4422 {
4423 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4424 {
4425 warnedmmx = true;
4426 warning (0, "MMX vector return without MMX enabled "
4427 "changes the ABI");
4428 }
4429 }
4430 }
4431
4432 return NULL;
4433 }
4434
4435 /* Define how to find the value returned by a library function
4436 assuming the value has mode MODE. */
4437 rtx
4438 ix86_libcall_value (enum machine_mode mode)
4439 {
4440 if (TARGET_64BIT)
4441 {
4442 switch (mode)
4443 {
4444 case SFmode:
4445 case SCmode:
4446 case DFmode:
4447 case DCmode:
4448 case TFmode:
4449 case SDmode:
4450 case DDmode:
4451 case TDmode:
4452 return gen_rtx_REG (mode, FIRST_SSE_REG);
4453 case XFmode:
4454 case XCmode:
4455 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4456 case TCmode:
4457 return NULL;
4458 default:
4459 return gen_rtx_REG (mode, 0);
4460 }
4461 }
4462 else
4463 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4464 }
4465
4466 /* Given a mode, return the register to use for a return value. */
4467
4468 static int
4469 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4470 {
4471 gcc_assert (!TARGET_64BIT);
4472
4473 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4474 we normally prevent this case when mmx is not available. However
4475 some ABIs may require the result to be returned like DImode. */
4476 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4477 return TARGET_MMX ? FIRST_MMX_REG : 0;
4478
4479 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4480 we prevent this case when sse is not available. However some ABIs
4481 may require the result to be returned like integer TImode. */
4482 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4483 return TARGET_SSE ? FIRST_SSE_REG : 0;
4484
4485 /* Decimal floating point values can go in %eax, unlike other float modes. */
4486 if (DECIMAL_FLOAT_MODE_P (mode))
4487 return 0;
4488
4489 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4490 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4491 return 0;
4492
4493 /* Floating point return values in %st(0), except for local functions when
4494 SSE math is enabled or for functions with sseregparm attribute. */
4495 if ((func || fntype)
4496 && (mode == SFmode || mode == DFmode))
4497 {
4498 int sse_level = ix86_function_sseregparm (fntype, func);
4499 if ((sse_level >= 1 && mode == SFmode)
4500 || (sse_level == 2 && mode == DFmode))
4501 return FIRST_SSE_REG;
4502 }
4503
4504 return FIRST_FLOAT_REG;
4505 }
4506 \f
4507 /* Create the va_list data type. */
4508
4509 static tree
4510 ix86_build_builtin_va_list (void)
4511 {
4512 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4513
4514 /* For i386 we use plain pointer to argument area. */
4515 if (!TARGET_64BIT)
4516 return build_pointer_type (char_type_node);
4517
4518 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4519 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4520
4521 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4522 unsigned_type_node);
4523 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4524 unsigned_type_node);
4525 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4526 ptr_type_node);
4527 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4528 ptr_type_node);
4529
4530 va_list_gpr_counter_field = f_gpr;
4531 va_list_fpr_counter_field = f_fpr;
4532
4533 DECL_FIELD_CONTEXT (f_gpr) = record;
4534 DECL_FIELD_CONTEXT (f_fpr) = record;
4535 DECL_FIELD_CONTEXT (f_ovf) = record;
4536 DECL_FIELD_CONTEXT (f_sav) = record;
4537
4538 TREE_CHAIN (record) = type_decl;
4539 TYPE_NAME (record) = type_decl;
4540 TYPE_FIELDS (record) = f_gpr;
4541 TREE_CHAIN (f_gpr) = f_fpr;
4542 TREE_CHAIN (f_fpr) = f_ovf;
4543 TREE_CHAIN (f_ovf) = f_sav;
4544
4545 layout_type (record);
4546
4547 /* The correct type is an array type of one element. */
4548 return build_array_type (record, build_index_type (size_zero_node));
4549 }
4550
4551 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4552
4553 static void
4554 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4555 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4556 int no_rtl)
4557 {
4558 CUMULATIVE_ARGS next_cum;
4559 rtx save_area = NULL_RTX, mem;
4560 rtx label;
4561 rtx label_ref;
4562 rtx tmp_reg;
4563 rtx nsse_reg;
4564 int set;
4565 tree fntype;
4566 int stdarg_p;
4567 int i;
4568
4569 if (!TARGET_64BIT)
4570 return;
4571
4572 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4573 return;
4574
4575 /* Indicate to allocate space on the stack for varargs save area. */
4576 ix86_save_varrargs_registers = 1;
4577
4578 cfun->stack_alignment_needed = 128;
4579
4580 fntype = TREE_TYPE (current_function_decl);
4581 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4582 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4583 != void_type_node));
4584
4585 /* For varargs, we do not want to skip the dummy va_dcl argument.
4586 For stdargs, we do want to skip the last named argument. */
4587 next_cum = *cum;
4588 if (stdarg_p)
4589 function_arg_advance (&next_cum, mode, type, 1);
4590
4591 if (!no_rtl)
4592 save_area = frame_pointer_rtx;
4593
4594 set = get_varargs_alias_set ();
4595
4596 for (i = next_cum.regno;
4597 i < ix86_regparm
4598 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4599 i++)
4600 {
4601 mem = gen_rtx_MEM (Pmode,
4602 plus_constant (save_area, i * UNITS_PER_WORD));
4603 MEM_NOTRAP_P (mem) = 1;
4604 set_mem_alias_set (mem, set);
4605 emit_move_insn (mem, gen_rtx_REG (Pmode,
4606 x86_64_int_parameter_registers[i]));
4607 }
4608
4609 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4610 {
4611 /* Now emit code to save SSE registers. The AX parameter contains number
4612 of SSE parameter registers used to call this function. We use
4613 sse_prologue_save insn template that produces computed jump across
4614 SSE saves. We need some preparation work to get this working. */
4615
4616 label = gen_label_rtx ();
4617 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4618
4619 /* Compute address to jump to :
4620 label - 5*eax + nnamed_sse_arguments*5 */
4621 tmp_reg = gen_reg_rtx (Pmode);
4622 nsse_reg = gen_reg_rtx (Pmode);
4623 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4624 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4625 gen_rtx_MULT (Pmode, nsse_reg,
4626 GEN_INT (4))));
4627 if (next_cum.sse_regno)
4628 emit_move_insn
4629 (nsse_reg,
4630 gen_rtx_CONST (DImode,
4631 gen_rtx_PLUS (DImode,
4632 label_ref,
4633 GEN_INT (next_cum.sse_regno * 4))));
4634 else
4635 emit_move_insn (nsse_reg, label_ref);
4636 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4637
4638 /* Compute address of memory block we save into. We always use pointer
4639 pointing 127 bytes after first byte to store - this is needed to keep
4640 instruction size limited by 4 bytes. */
4641 tmp_reg = gen_reg_rtx (Pmode);
4642 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4643 plus_constant (save_area,
4644 8 * REGPARM_MAX + 127)));
4645 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4646 MEM_NOTRAP_P (mem) = 1;
4647 set_mem_alias_set (mem, set);
4648 set_mem_align (mem, BITS_PER_WORD);
4649
4650 /* And finally do the dirty job! */
4651 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4652 GEN_INT (next_cum.sse_regno), label));
4653 }
4654
4655 }
4656
4657 /* Implement va_start. */
4658
4659 void
4660 ix86_va_start (tree valist, rtx nextarg)
4661 {
4662 HOST_WIDE_INT words, n_gpr, n_fpr;
4663 tree f_gpr, f_fpr, f_ovf, f_sav;
4664 tree gpr, fpr, ovf, sav, t;
4665 tree type;
4666
4667 /* Only 64bit target needs something special. */
4668 if (!TARGET_64BIT)
4669 {
4670 std_expand_builtin_va_start (valist, nextarg);
4671 return;
4672 }
4673
4674 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4675 f_fpr = TREE_CHAIN (f_gpr);
4676 f_ovf = TREE_CHAIN (f_fpr);
4677 f_sav = TREE_CHAIN (f_ovf);
4678
4679 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4680 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4681 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4682 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4683 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4684
4685 /* Count number of gp and fp argument registers used. */
4686 words = current_function_args_info.words;
4687 n_gpr = current_function_args_info.regno;
4688 n_fpr = current_function_args_info.sse_regno;
4689
4690 if (TARGET_DEBUG_ARG)
4691 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4692 (int) words, (int) n_gpr, (int) n_fpr);
4693
4694 if (cfun->va_list_gpr_size)
4695 {
4696 type = TREE_TYPE (gpr);
4697 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4698 build_int_cst (type, n_gpr * 8));
4699 TREE_SIDE_EFFECTS (t) = 1;
4700 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4701 }
4702
4703 if (cfun->va_list_fpr_size)
4704 {
4705 type = TREE_TYPE (fpr);
4706 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4707 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4708 TREE_SIDE_EFFECTS (t) = 1;
4709 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4710 }
4711
4712 /* Find the overflow area. */
4713 type = TREE_TYPE (ovf);
4714 t = make_tree (type, virtual_incoming_args_rtx);
4715 if (words != 0)
4716 t = build2 (PLUS_EXPR, type, t,
4717 build_int_cst (type, words * UNITS_PER_WORD));
4718 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4719 TREE_SIDE_EFFECTS (t) = 1;
4720 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4721
4722 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4723 {
4724 /* Find the register save area.
4725 Prologue of the function save it right above stack frame. */
4726 type = TREE_TYPE (sav);
4727 t = make_tree (type, frame_pointer_rtx);
4728 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4729 TREE_SIDE_EFFECTS (t) = 1;
4730 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4731 }
4732 }
4733
4734 /* Implement va_arg. */
4735
4736 tree
4737 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4738 {
4739 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4740 tree f_gpr, f_fpr, f_ovf, f_sav;
4741 tree gpr, fpr, ovf, sav, t;
4742 int size, rsize;
4743 tree lab_false, lab_over = NULL_TREE;
4744 tree addr, t2;
4745 rtx container;
4746 int indirect_p = 0;
4747 tree ptrtype;
4748 enum machine_mode nat_mode;
4749
4750 /* Only 64bit target needs something special. */
4751 if (!TARGET_64BIT)
4752 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4753
4754 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4755 f_fpr = TREE_CHAIN (f_gpr);
4756 f_ovf = TREE_CHAIN (f_fpr);
4757 f_sav = TREE_CHAIN (f_ovf);
4758
4759 valist = build_va_arg_indirect_ref (valist);
4760 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4761 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4762 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4763 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4764
4765 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4766 if (indirect_p)
4767 type = build_pointer_type (type);
4768 size = int_size_in_bytes (type);
4769 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4770
4771 nat_mode = type_natural_mode (type);
4772 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4773 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4774
4775 /* Pull the value out of the saved registers. */
4776
4777 addr = create_tmp_var (ptr_type_node, "addr");
4778 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4779
4780 if (container)
4781 {
4782 int needed_intregs, needed_sseregs;
4783 bool need_temp;
4784 tree int_addr, sse_addr;
4785
4786 lab_false = create_artificial_label ();
4787 lab_over = create_artificial_label ();
4788
4789 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4790
4791 need_temp = (!REG_P (container)
4792 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4793 || TYPE_ALIGN (type) > 128));
4794
4795 /* In case we are passing structure, verify that it is consecutive block
4796 on the register save area. If not we need to do moves. */
4797 if (!need_temp && !REG_P (container))
4798 {
4799 /* Verify that all registers are strictly consecutive */
4800 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4801 {
4802 int i;
4803
4804 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4805 {
4806 rtx slot = XVECEXP (container, 0, i);
4807 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4808 || INTVAL (XEXP (slot, 1)) != i * 16)
4809 need_temp = 1;
4810 }
4811 }
4812 else
4813 {
4814 int i;
4815
4816 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4817 {
4818 rtx slot = XVECEXP (container, 0, i);
4819 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4820 || INTVAL (XEXP (slot, 1)) != i * 8)
4821 need_temp = 1;
4822 }
4823 }
4824 }
4825 if (!need_temp)
4826 {
4827 int_addr = addr;
4828 sse_addr = addr;
4829 }
4830 else
4831 {
4832 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4833 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4834 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4835 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4836 }
4837
4838 /* First ensure that we fit completely in registers. */
4839 if (needed_intregs)
4840 {
4841 t = build_int_cst (TREE_TYPE (gpr),
4842 (REGPARM_MAX - needed_intregs + 1) * 8);
4843 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4844 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4845 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4846 gimplify_and_add (t, pre_p);
4847 }
4848 if (needed_sseregs)
4849 {
4850 t = build_int_cst (TREE_TYPE (fpr),
4851 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4852 + REGPARM_MAX * 8);
4853 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4854 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4855 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4856 gimplify_and_add (t, pre_p);
4857 }
4858
4859 /* Compute index to start of area used for integer regs. */
4860 if (needed_intregs)
4861 {
4862 /* int_addr = gpr + sav; */
4863 t = fold_convert (ptr_type_node, gpr);
4864 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4865 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4866 gimplify_and_add (t, pre_p);
4867 }
4868 if (needed_sseregs)
4869 {
4870 /* sse_addr = fpr + sav; */
4871 t = fold_convert (ptr_type_node, fpr);
4872 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4873 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4874 gimplify_and_add (t, pre_p);
4875 }
4876 if (need_temp)
4877 {
4878 int i;
4879 tree temp = create_tmp_var (type, "va_arg_tmp");
4880
4881 /* addr = &temp; */
4882 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4883 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4884 gimplify_and_add (t, pre_p);
4885
4886 for (i = 0; i < XVECLEN (container, 0); i++)
4887 {
4888 rtx slot = XVECEXP (container, 0, i);
4889 rtx reg = XEXP (slot, 0);
4890 enum machine_mode mode = GET_MODE (reg);
4891 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4892 tree addr_type = build_pointer_type (piece_type);
4893 tree src_addr, src;
4894 int src_offset;
4895 tree dest_addr, dest;
4896
4897 if (SSE_REGNO_P (REGNO (reg)))
4898 {
4899 src_addr = sse_addr;
4900 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4901 }
4902 else
4903 {
4904 src_addr = int_addr;
4905 src_offset = REGNO (reg) * 8;
4906 }
4907 src_addr = fold_convert (addr_type, src_addr);
4908 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4909 size_int (src_offset));
4910 src = build_va_arg_indirect_ref (src_addr);
4911
4912 dest_addr = fold_convert (addr_type, addr);
4913 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4914 size_int (INTVAL (XEXP (slot, 1))));
4915 dest = build_va_arg_indirect_ref (dest_addr);
4916
4917 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4918 gimplify_and_add (t, pre_p);
4919 }
4920 }
4921
4922 if (needed_intregs)
4923 {
4924 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4925 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4926 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4927 gimplify_and_add (t, pre_p);
4928 }
4929 if (needed_sseregs)
4930 {
4931 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4932 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4933 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4934 gimplify_and_add (t, pre_p);
4935 }
4936
4937 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4938 gimplify_and_add (t, pre_p);
4939
4940 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4941 append_to_statement_list (t, pre_p);
4942 }
4943
4944 /* ... otherwise out of the overflow area. */
4945
4946 /* Care for on-stack alignment if needed. */
4947 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4948 || integer_zerop (TYPE_SIZE (type)))
4949 t = ovf;
4950 else
4951 {
4952 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4953 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4954 build_int_cst (TREE_TYPE (ovf), align - 1));
4955 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4956 build_int_cst (TREE_TYPE (t), -align));
4957 }
4958 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4959
4960 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4961 gimplify_and_add (t2, pre_p);
4962
4963 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4964 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4965 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4966 gimplify_and_add (t, pre_p);
4967
4968 if (container)
4969 {
4970 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4971 append_to_statement_list (t, pre_p);
4972 }
4973
4974 ptrtype = build_pointer_type (type);
4975 addr = fold_convert (ptrtype, addr);
4976
4977 if (indirect_p)
4978 addr = build_va_arg_indirect_ref (addr);
4979 return build_va_arg_indirect_ref (addr);
4980 }
4981 \f
4982 /* Return nonzero if OPNUM's MEM should be matched
4983 in movabs* patterns. */
4984
4985 int
4986 ix86_check_movabs (rtx insn, int opnum)
4987 {
4988 rtx set, mem;
4989
4990 set = PATTERN (insn);
4991 if (GET_CODE (set) == PARALLEL)
4992 set = XVECEXP (set, 0, 0);
4993 gcc_assert (GET_CODE (set) == SET);
4994 mem = XEXP (set, opnum);
4995 while (GET_CODE (mem) == SUBREG)
4996 mem = SUBREG_REG (mem);
4997 gcc_assert (MEM_P (mem));
4998 return (volatile_ok || !MEM_VOLATILE_P (mem));
4999 }
5000 \f
5001 /* Initialize the table of extra 80387 mathematical constants. */
5002
5003 static void
5004 init_ext_80387_constants (void)
5005 {
5006 static const char * cst[5] =
5007 {
5008 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5009 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5010 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5011 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5012 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5013 };
5014 int i;
5015
5016 for (i = 0; i < 5; i++)
5017 {
5018 real_from_string (&ext_80387_constants_table[i], cst[i]);
5019 /* Ensure each constant is rounded to XFmode precision. */
5020 real_convert (&ext_80387_constants_table[i],
5021 XFmode, &ext_80387_constants_table[i]);
5022 }
5023
5024 ext_80387_constants_init = 1;
5025 }
5026
5027 /* Return true if the constant is something that can be loaded with
5028 a special instruction. */
5029
5030 int
5031 standard_80387_constant_p (rtx x)
5032 {
5033 REAL_VALUE_TYPE r;
5034
5035 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
5036 return -1;
5037
5038 if (x == CONST0_RTX (GET_MODE (x)))
5039 return 1;
5040 if (x == CONST1_RTX (GET_MODE (x)))
5041 return 2;
5042
5043 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5044
5045 /* For XFmode constants, try to find a special 80387 instruction when
5046 optimizing for size or on those CPUs that benefit from them. */
5047 if (GET_MODE (x) == XFmode
5048 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5049 {
5050 int i;
5051
5052 if (! ext_80387_constants_init)
5053 init_ext_80387_constants ();
5054
5055 for (i = 0; i < 5; i++)
5056 if (real_identical (&r, &ext_80387_constants_table[i]))
5057 return i + 3;
5058 }
5059
5060 /* Load of the constant -0.0 or -1.0 will be split as
5061 fldz;fchs or fld1;fchs sequence. */
5062 if (real_isnegzero (&r))
5063 return 8;
5064 if (real_identical (&r, &dconstm1))
5065 return 9;
5066
5067 return 0;
5068 }
5069
5070 /* Return the opcode of the special instruction to be used to load
5071 the constant X. */
5072
5073 const char *
5074 standard_80387_constant_opcode (rtx x)
5075 {
5076 switch (standard_80387_constant_p (x))
5077 {
5078 case 1:
5079 return "fldz";
5080 case 2:
5081 return "fld1";
5082 case 3:
5083 return "fldlg2";
5084 case 4:
5085 return "fldln2";
5086 case 5:
5087 return "fldl2e";
5088 case 6:
5089 return "fldl2t";
5090 case 7:
5091 return "fldpi";
5092 case 8:
5093 case 9:
5094 return "#";
5095 default:
5096 gcc_unreachable ();
5097 }
5098 }
5099
5100 /* Return the CONST_DOUBLE representing the 80387 constant that is
5101 loaded by the specified special instruction. The argument IDX
5102 matches the return value from standard_80387_constant_p. */
5103
5104 rtx
5105 standard_80387_constant_rtx (int idx)
5106 {
5107 int i;
5108
5109 if (! ext_80387_constants_init)
5110 init_ext_80387_constants ();
5111
5112 switch (idx)
5113 {
5114 case 3:
5115 case 4:
5116 case 5:
5117 case 6:
5118 case 7:
5119 i = idx - 3;
5120 break;
5121
5122 default:
5123 gcc_unreachable ();
5124 }
5125
5126 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5127 XFmode);
5128 }
5129
5130 /* Return 1 if mode is a valid mode for sse. */
5131 static int
5132 standard_sse_mode_p (enum machine_mode mode)
5133 {
5134 switch (mode)
5135 {
5136 case V16QImode:
5137 case V8HImode:
5138 case V4SImode:
5139 case V2DImode:
5140 case V4SFmode:
5141 case V2DFmode:
5142 return 1;
5143
5144 default:
5145 return 0;
5146 }
5147 }
5148
5149 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5150 */
5151 int
5152 standard_sse_constant_p (rtx x)
5153 {
5154 enum machine_mode mode = GET_MODE (x);
5155
5156 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5157 return 1;
5158 if (vector_all_ones_operand (x, mode)
5159 && standard_sse_mode_p (mode))
5160 return TARGET_SSE2 ? 2 : -1;
5161
5162 return 0;
5163 }
5164
5165 /* Return the opcode of the special instruction to be used to load
5166 the constant X. */
5167
5168 const char *
5169 standard_sse_constant_opcode (rtx insn, rtx x)
5170 {
5171 switch (standard_sse_constant_p (x))
5172 {
5173 case 1:
5174 if (get_attr_mode (insn) == MODE_V4SF)
5175 return "xorps\t%0, %0";
5176 else if (get_attr_mode (insn) == MODE_V2DF)
5177 return "xorpd\t%0, %0";
5178 else
5179 return "pxor\t%0, %0";
5180 case 2:
5181 return "pcmpeqd\t%0, %0";
5182 }
5183 gcc_unreachable ();
5184 }
5185
5186 /* Returns 1 if OP contains a symbol reference */
5187
5188 int
5189 symbolic_reference_mentioned_p (rtx op)
5190 {
5191 const char *fmt;
5192 int i;
5193
5194 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5195 return 1;
5196
5197 fmt = GET_RTX_FORMAT (GET_CODE (op));
5198 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5199 {
5200 if (fmt[i] == 'E')
5201 {
5202 int j;
5203
5204 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5205 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5206 return 1;
5207 }
5208
5209 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5210 return 1;
5211 }
5212
5213 return 0;
5214 }
5215
5216 /* Return 1 if it is appropriate to emit `ret' instructions in the
5217 body of a function. Do this only if the epilogue is simple, needing a
5218 couple of insns. Prior to reloading, we can't tell how many registers
5219 must be saved, so return 0 then. Return 0 if there is no frame
5220 marker to de-allocate. */
5221
5222 int
5223 ix86_can_use_return_insn_p (void)
5224 {
5225 struct ix86_frame frame;
5226
5227 if (! reload_completed || frame_pointer_needed)
5228 return 0;
5229
5230 /* Don't allow more than 32 pop, since that's all we can do
5231 with one instruction. */
5232 if (current_function_pops_args
5233 && current_function_args_size >= 32768)
5234 return 0;
5235
5236 ix86_compute_frame_layout (&frame);
5237 return frame.to_allocate == 0 && frame.nregs == 0;
5238 }
5239 \f
5240 /* Value should be nonzero if functions must have frame pointers.
5241 Zero means the frame pointer need not be set up (and parms may
5242 be accessed via the stack pointer) in functions that seem suitable. */
5243
5244 int
5245 ix86_frame_pointer_required (void)
5246 {
5247 /* If we accessed previous frames, then the generated code expects
5248 to be able to access the saved ebp value in our frame. */
5249 if (cfun->machine->accesses_prev_frame)
5250 return 1;
5251
5252 /* Several x86 os'es need a frame pointer for other reasons,
5253 usually pertaining to setjmp. */
5254 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5255 return 1;
5256
5257 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5258 the frame pointer by default. Turn it back on now if we've not
5259 got a leaf function. */
5260 if (TARGET_OMIT_LEAF_FRAME_POINTER
5261 && (!current_function_is_leaf
5262 || ix86_current_function_calls_tls_descriptor))
5263 return 1;
5264
5265 if (current_function_profile)
5266 return 1;
5267
5268 return 0;
5269 }
5270
5271 /* Record that the current function accesses previous call frames. */
5272
5273 void
5274 ix86_setup_frame_addresses (void)
5275 {
5276 cfun->machine->accesses_prev_frame = 1;
5277 }
5278 \f
5279 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5280 # define USE_HIDDEN_LINKONCE 1
5281 #else
5282 # define USE_HIDDEN_LINKONCE 0
5283 #endif
5284
5285 static int pic_labels_used;
5286
5287 /* Fills in the label name that should be used for a pc thunk for
5288 the given register. */
5289
5290 static void
5291 get_pc_thunk_name (char name[32], unsigned int regno)
5292 {
5293 gcc_assert (!TARGET_64BIT);
5294
5295 if (USE_HIDDEN_LINKONCE)
5296 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5297 else
5298 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5299 }
5300
5301
5302 /* This function generates code for -fpic that loads %ebx with
5303 the return address of the caller and then returns. */
5304
5305 void
5306 ix86_file_end (void)
5307 {
5308 rtx xops[2];
5309 int regno;
5310
5311 for (regno = 0; regno < 8; ++regno)
5312 {
5313 char name[32];
5314
5315 if (! ((pic_labels_used >> regno) & 1))
5316 continue;
5317
5318 get_pc_thunk_name (name, regno);
5319
5320 #if TARGET_MACHO
5321 if (TARGET_MACHO)
5322 {
5323 switch_to_section (darwin_sections[text_coal_section]);
5324 fputs ("\t.weak_definition\t", asm_out_file);
5325 assemble_name (asm_out_file, name);
5326 fputs ("\n\t.private_extern\t", asm_out_file);
5327 assemble_name (asm_out_file, name);
5328 fputs ("\n", asm_out_file);
5329 ASM_OUTPUT_LABEL (asm_out_file, name);
5330 }
5331 else
5332 #endif
5333 if (USE_HIDDEN_LINKONCE)
5334 {
5335 tree decl;
5336
5337 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5338 error_mark_node);
5339 TREE_PUBLIC (decl) = 1;
5340 TREE_STATIC (decl) = 1;
5341 DECL_ONE_ONLY (decl) = 1;
5342
5343 (*targetm.asm_out.unique_section) (decl, 0);
5344 switch_to_section (get_named_section (decl, NULL, 0));
5345
5346 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5347 fputs ("\t.hidden\t", asm_out_file);
5348 assemble_name (asm_out_file, name);
5349 fputc ('\n', asm_out_file);
5350 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5351 }
5352 else
5353 {
5354 switch_to_section (text_section);
5355 ASM_OUTPUT_LABEL (asm_out_file, name);
5356 }
5357
5358 xops[0] = gen_rtx_REG (SImode, regno);
5359 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5360 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5361 output_asm_insn ("ret", xops);
5362 }
5363
5364 if (NEED_INDICATE_EXEC_STACK)
5365 file_end_indicate_exec_stack ();
5366 }
5367
5368 /* Emit code for the SET_GOT patterns. */
5369
5370 const char *
5371 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5372 {
5373 rtx xops[3];
5374
5375 xops[0] = dest;
5376 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5377
5378 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5379 {
5380 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5381
5382 if (!flag_pic)
5383 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5384 else
5385 output_asm_insn ("call\t%a2", xops);
5386
5387 #if TARGET_MACHO
5388 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5389 is what will be referenced by the Mach-O PIC subsystem. */
5390 if (!label)
5391 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5392 #endif
5393
5394 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5395 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5396
5397 if (flag_pic)
5398 output_asm_insn ("pop{l}\t%0", xops);
5399 }
5400 else
5401 {
5402 char name[32];
5403 get_pc_thunk_name (name, REGNO (dest));
5404 pic_labels_used |= 1 << REGNO (dest);
5405
5406 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5407 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5408 output_asm_insn ("call\t%X2", xops);
5409 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5410 is what will be referenced by the Mach-O PIC subsystem. */
5411 #if TARGET_MACHO
5412 if (!label)
5413 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5414 else
5415 targetm.asm_out.internal_label (asm_out_file, "L",
5416 CODE_LABEL_NUMBER (label));
5417 #endif
5418 }
5419
5420 if (TARGET_MACHO)
5421 return "";
5422
5423 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5424 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5425 else
5426 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5427
5428 return "";
5429 }
5430
5431 /* Generate an "push" pattern for input ARG. */
5432
5433 static rtx
5434 gen_push (rtx arg)
5435 {
5436 return gen_rtx_SET (VOIDmode,
5437 gen_rtx_MEM (Pmode,
5438 gen_rtx_PRE_DEC (Pmode,
5439 stack_pointer_rtx)),
5440 arg);
5441 }
5442
5443 /* Return >= 0 if there is an unused call-clobbered register available
5444 for the entire function. */
5445
5446 static unsigned int
5447 ix86_select_alt_pic_regnum (void)
5448 {
5449 if (current_function_is_leaf && !current_function_profile
5450 && !ix86_current_function_calls_tls_descriptor)
5451 {
5452 int i;
5453 for (i = 2; i >= 0; --i)
5454 if (!regs_ever_live[i])
5455 return i;
5456 }
5457
5458 return INVALID_REGNUM;
5459 }
5460
5461 /* Return 1 if we need to save REGNO. */
5462 static int
5463 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5464 {
5465 if (pic_offset_table_rtx
5466 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5467 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5468 || current_function_profile
5469 || current_function_calls_eh_return
5470 || current_function_uses_const_pool))
5471 {
5472 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5473 return 0;
5474 return 1;
5475 }
5476
5477 if (current_function_calls_eh_return && maybe_eh_return)
5478 {
5479 unsigned i;
5480 for (i = 0; ; i++)
5481 {
5482 unsigned test = EH_RETURN_DATA_REGNO (i);
5483 if (test == INVALID_REGNUM)
5484 break;
5485 if (test == regno)
5486 return 1;
5487 }
5488 }
5489
5490 if (cfun->machine->force_align_arg_pointer
5491 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5492 return 1;
5493
5494 return (regs_ever_live[regno]
5495 && !call_used_regs[regno]
5496 && !fixed_regs[regno]
5497 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5498 }
5499
5500 /* Return number of registers to be saved on the stack. */
5501
5502 static int
5503 ix86_nsaved_regs (void)
5504 {
5505 int nregs = 0;
5506 int regno;
5507
5508 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5509 if (ix86_save_reg (regno, true))
5510 nregs++;
5511 return nregs;
5512 }
5513
5514 /* Return the offset between two registers, one to be eliminated, and the other
5515 its replacement, at the start of a routine. */
5516
5517 HOST_WIDE_INT
5518 ix86_initial_elimination_offset (int from, int to)
5519 {
5520 struct ix86_frame frame;
5521 ix86_compute_frame_layout (&frame);
5522
5523 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5524 return frame.hard_frame_pointer_offset;
5525 else if (from == FRAME_POINTER_REGNUM
5526 && to == HARD_FRAME_POINTER_REGNUM)
5527 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5528 else
5529 {
5530 gcc_assert (to == STACK_POINTER_REGNUM);
5531
5532 if (from == ARG_POINTER_REGNUM)
5533 return frame.stack_pointer_offset;
5534
5535 gcc_assert (from == FRAME_POINTER_REGNUM);
5536 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5537 }
5538 }
5539
5540 /* Fill structure ix86_frame about frame of currently computed function. */
5541
5542 static void
5543 ix86_compute_frame_layout (struct ix86_frame *frame)
5544 {
5545 HOST_WIDE_INT total_size;
5546 unsigned int stack_alignment_needed;
5547 HOST_WIDE_INT offset;
5548 unsigned int preferred_alignment;
5549 HOST_WIDE_INT size = get_frame_size ();
5550
5551 frame->nregs = ix86_nsaved_regs ();
5552 total_size = size;
5553
5554 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5555 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5556
5557 /* During reload iteration the amount of registers saved can change.
5558 Recompute the value as needed. Do not recompute when amount of registers
5559 didn't change as reload does multiple calls to the function and does not
5560 expect the decision to change within single iteration. */
5561 if (!optimize_size
5562 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5563 {
5564 int count = frame->nregs;
5565
5566 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5567 /* The fast prologue uses move instead of push to save registers. This
5568 is significantly longer, but also executes faster as modern hardware
5569 can execute the moves in parallel, but can't do that for push/pop.
5570
5571 Be careful about choosing what prologue to emit: When function takes
5572 many instructions to execute we may use slow version as well as in
5573 case function is known to be outside hot spot (this is known with
5574 feedback only). Weight the size of function by number of registers
5575 to save as it is cheap to use one or two push instructions but very
5576 slow to use many of them. */
5577 if (count)
5578 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5579 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5580 || (flag_branch_probabilities
5581 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5582 cfun->machine->use_fast_prologue_epilogue = false;
5583 else
5584 cfun->machine->use_fast_prologue_epilogue
5585 = !expensive_function_p (count);
5586 }
5587 if (TARGET_PROLOGUE_USING_MOVE
5588 && cfun->machine->use_fast_prologue_epilogue)
5589 frame->save_regs_using_mov = true;
5590 else
5591 frame->save_regs_using_mov = false;
5592
5593
5594 /* Skip return address and saved base pointer. */
5595 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5596
5597 frame->hard_frame_pointer_offset = offset;
5598
5599 /* Do some sanity checking of stack_alignment_needed and
5600 preferred_alignment, since i386 port is the only using those features
5601 that may break easily. */
5602
5603 gcc_assert (!size || stack_alignment_needed);
5604 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5605 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5606 gcc_assert (stack_alignment_needed
5607 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5608
5609 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5610 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5611
5612 /* Register save area */
5613 offset += frame->nregs * UNITS_PER_WORD;
5614
5615 /* Va-arg area */
5616 if (ix86_save_varrargs_registers)
5617 {
5618 offset += X86_64_VARARGS_SIZE;
5619 frame->va_arg_size = X86_64_VARARGS_SIZE;
5620 }
5621 else
5622 frame->va_arg_size = 0;
5623
5624 /* Align start of frame for local function. */
5625 frame->padding1 = ((offset + stack_alignment_needed - 1)
5626 & -stack_alignment_needed) - offset;
5627
5628 offset += frame->padding1;
5629
5630 /* Frame pointer points here. */
5631 frame->frame_pointer_offset = offset;
5632
5633 offset += size;
5634
5635 /* Add outgoing arguments area. Can be skipped if we eliminated
5636 all the function calls as dead code.
5637 Skipping is however impossible when function calls alloca. Alloca
5638 expander assumes that last current_function_outgoing_args_size
5639 of stack frame are unused. */
5640 if (ACCUMULATE_OUTGOING_ARGS
5641 && (!current_function_is_leaf || current_function_calls_alloca
5642 || ix86_current_function_calls_tls_descriptor))
5643 {
5644 offset += current_function_outgoing_args_size;
5645 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5646 }
5647 else
5648 frame->outgoing_arguments_size = 0;
5649
5650 /* Align stack boundary. Only needed if we're calling another function
5651 or using alloca. */
5652 if (!current_function_is_leaf || current_function_calls_alloca
5653 || ix86_current_function_calls_tls_descriptor)
5654 frame->padding2 = ((offset + preferred_alignment - 1)
5655 & -preferred_alignment) - offset;
5656 else
5657 frame->padding2 = 0;
5658
5659 offset += frame->padding2;
5660
5661 /* We've reached end of stack frame. */
5662 frame->stack_pointer_offset = offset;
5663
5664 /* Size prologue needs to allocate. */
5665 frame->to_allocate =
5666 (size + frame->padding1 + frame->padding2
5667 + frame->outgoing_arguments_size + frame->va_arg_size);
5668
5669 if ((!frame->to_allocate && frame->nregs <= 1)
5670 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5671 frame->save_regs_using_mov = false;
5672
5673 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5674 && current_function_is_leaf
5675 && !ix86_current_function_calls_tls_descriptor)
5676 {
5677 frame->red_zone_size = frame->to_allocate;
5678 if (frame->save_regs_using_mov)
5679 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5680 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5681 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5682 }
5683 else
5684 frame->red_zone_size = 0;
5685 frame->to_allocate -= frame->red_zone_size;
5686 frame->stack_pointer_offset -= frame->red_zone_size;
5687 #if 0
5688 fprintf (stderr, "\n");
5689 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5690 fprintf (stderr, "size: %ld\n", (long)size);
5691 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5692 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5693 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5694 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5695 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5696 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5697 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5698 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5699 (long)frame->hard_frame_pointer_offset);
5700 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5701 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5702 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5703 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5704 #endif
5705 }
5706
5707 /* Emit code to save registers in the prologue. */
5708
5709 static void
5710 ix86_emit_save_regs (void)
5711 {
5712 unsigned int regno;
5713 rtx insn;
5714
5715 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5716 if (ix86_save_reg (regno, true))
5717 {
5718 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5719 RTX_FRAME_RELATED_P (insn) = 1;
5720 }
5721 }
5722
5723 /* Emit code to save registers using MOV insns. First register
5724 is restored from POINTER + OFFSET. */
5725 static void
5726 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5727 {
5728 unsigned int regno;
5729 rtx insn;
5730
5731 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5732 if (ix86_save_reg (regno, true))
5733 {
5734 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5735 Pmode, offset),
5736 gen_rtx_REG (Pmode, regno));
5737 RTX_FRAME_RELATED_P (insn) = 1;
5738 offset += UNITS_PER_WORD;
5739 }
5740 }
5741
5742 /* Expand prologue or epilogue stack adjustment.
5743 The pattern exist to put a dependency on all ebp-based memory accesses.
5744 STYLE should be negative if instructions should be marked as frame related,
5745 zero if %r11 register is live and cannot be freely used and positive
5746 otherwise. */
5747
5748 static void
5749 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5750 {
5751 rtx insn;
5752
5753 if (! TARGET_64BIT)
5754 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5755 else if (x86_64_immediate_operand (offset, DImode))
5756 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5757 else
5758 {
5759 rtx r11;
5760 /* r11 is used by indirect sibcall return as well, set before the
5761 epilogue and used after the epilogue. ATM indirect sibcall
5762 shouldn't be used together with huge frame sizes in one
5763 function because of the frame_size check in sibcall.c. */
5764 gcc_assert (style);
5765 r11 = gen_rtx_REG (DImode, R11_REG);
5766 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5767 if (style < 0)
5768 RTX_FRAME_RELATED_P (insn) = 1;
5769 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5770 offset));
5771 }
5772 if (style < 0)
5773 RTX_FRAME_RELATED_P (insn) = 1;
5774 }
5775
5776 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5777
5778 static rtx
5779 ix86_internal_arg_pointer (void)
5780 {
5781 bool has_force_align_arg_pointer =
5782 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5783 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5784 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5785 && DECL_NAME (current_function_decl)
5786 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5787 && DECL_FILE_SCOPE_P (current_function_decl))
5788 || ix86_force_align_arg_pointer
5789 || has_force_align_arg_pointer)
5790 {
5791 /* Nested functions can't realign the stack due to a register
5792 conflict. */
5793 if (DECL_CONTEXT (current_function_decl)
5794 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5795 {
5796 if (ix86_force_align_arg_pointer)
5797 warning (0, "-mstackrealign ignored for nested functions");
5798 if (has_force_align_arg_pointer)
5799 error ("%s not supported for nested functions",
5800 ix86_force_align_arg_pointer_string);
5801 return virtual_incoming_args_rtx;
5802 }
5803 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5804 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5805 }
5806 else
5807 return virtual_incoming_args_rtx;
5808 }
5809
5810 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5811 This is called from dwarf2out.c to emit call frame instructions
5812 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5813 static void
5814 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5815 {
5816 rtx unspec = SET_SRC (pattern);
5817 gcc_assert (GET_CODE (unspec) == UNSPEC);
5818
5819 switch (index)
5820 {
5821 case UNSPEC_REG_SAVE:
5822 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5823 SET_DEST (pattern));
5824 break;
5825 case UNSPEC_DEF_CFA:
5826 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5827 INTVAL (XVECEXP (unspec, 0, 0)));
5828 break;
5829 default:
5830 gcc_unreachable ();
5831 }
5832 }
5833
5834 /* Expand the prologue into a bunch of separate insns. */
5835
5836 void
5837 ix86_expand_prologue (void)
5838 {
5839 rtx insn;
5840 bool pic_reg_used;
5841 struct ix86_frame frame;
5842 HOST_WIDE_INT allocate;
5843
5844 ix86_compute_frame_layout (&frame);
5845
5846 if (cfun->machine->force_align_arg_pointer)
5847 {
5848 rtx x, y;
5849
5850 /* Grab the argument pointer. */
5851 x = plus_constant (stack_pointer_rtx, 4);
5852 y = cfun->machine->force_align_arg_pointer;
5853 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5854 RTX_FRAME_RELATED_P (insn) = 1;
5855
5856 /* The unwind info consists of two parts: install the fafp as the cfa,
5857 and record the fafp as the "save register" of the stack pointer.
5858 The later is there in order that the unwinder can see where it
5859 should restore the stack pointer across the and insn. */
5860 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5861 x = gen_rtx_SET (VOIDmode, y, x);
5862 RTX_FRAME_RELATED_P (x) = 1;
5863 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5864 UNSPEC_REG_SAVE);
5865 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5866 RTX_FRAME_RELATED_P (y) = 1;
5867 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5868 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5869 REG_NOTES (insn) = x;
5870
5871 /* Align the stack. */
5872 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5873 GEN_INT (-16)));
5874
5875 /* And here we cheat like madmen with the unwind info. We force the
5876 cfa register back to sp+4, which is exactly what it was at the
5877 start of the function. Re-pushing the return address results in
5878 the return at the same spot relative to the cfa, and thus is
5879 correct wrt the unwind info. */
5880 x = cfun->machine->force_align_arg_pointer;
5881 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5882 insn = emit_insn (gen_push (x));
5883 RTX_FRAME_RELATED_P (insn) = 1;
5884
5885 x = GEN_INT (4);
5886 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5887 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5888 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5889 REG_NOTES (insn) = x;
5890 }
5891
5892 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5893 slower on all targets. Also sdb doesn't like it. */
5894
5895 if (frame_pointer_needed)
5896 {
5897 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5898 RTX_FRAME_RELATED_P (insn) = 1;
5899
5900 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5901 RTX_FRAME_RELATED_P (insn) = 1;
5902 }
5903
5904 allocate = frame.to_allocate;
5905
5906 if (!frame.save_regs_using_mov)
5907 ix86_emit_save_regs ();
5908 else
5909 allocate += frame.nregs * UNITS_PER_WORD;
5910
5911 /* When using red zone we may start register saving before allocating
5912 the stack frame saving one cycle of the prologue. */
5913 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5914 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5915 : stack_pointer_rtx,
5916 -frame.nregs * UNITS_PER_WORD);
5917
5918 if (allocate == 0)
5919 ;
5920 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5921 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5922 GEN_INT (-allocate), -1);
5923 else
5924 {
5925 /* Only valid for Win32. */
5926 rtx eax = gen_rtx_REG (SImode, 0);
5927 bool eax_live = ix86_eax_live_at_start_p ();
5928 rtx t;
5929
5930 gcc_assert (!TARGET_64BIT);
5931
5932 if (eax_live)
5933 {
5934 emit_insn (gen_push (eax));
5935 allocate -= 4;
5936 }
5937
5938 emit_move_insn (eax, GEN_INT (allocate));
5939
5940 insn = emit_insn (gen_allocate_stack_worker (eax));
5941 RTX_FRAME_RELATED_P (insn) = 1;
5942 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5943 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5944 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5945 t, REG_NOTES (insn));
5946
5947 if (eax_live)
5948 {
5949 if (frame_pointer_needed)
5950 t = plus_constant (hard_frame_pointer_rtx,
5951 allocate
5952 - frame.to_allocate
5953 - frame.nregs * UNITS_PER_WORD);
5954 else
5955 t = plus_constant (stack_pointer_rtx, allocate);
5956 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5957 }
5958 }
5959
5960 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5961 {
5962 if (!frame_pointer_needed || !frame.to_allocate)
5963 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5964 else
5965 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5966 -frame.nregs * UNITS_PER_WORD);
5967 }
5968
5969 pic_reg_used = false;
5970 if (pic_offset_table_rtx
5971 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5972 || current_function_profile))
5973 {
5974 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5975
5976 if (alt_pic_reg_used != INVALID_REGNUM)
5977 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5978
5979 pic_reg_used = true;
5980 }
5981
5982 if (pic_reg_used)
5983 {
5984 if (TARGET_64BIT)
5985 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5986 else
5987 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5988
5989 /* Even with accurate pre-reload life analysis, we can wind up
5990 deleting all references to the pic register after reload.
5991 Consider if cross-jumping unifies two sides of a branch
5992 controlled by a comparison vs the only read from a global.
5993 In which case, allow the set_got to be deleted, though we're
5994 too late to do anything about the ebx save in the prologue. */
5995 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5996 }
5997
5998 /* Prevent function calls from be scheduled before the call to mcount.
5999 In the pic_reg_used case, make sure that the got load isn't deleted. */
6000 if (current_function_profile)
6001 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6002 }
6003
6004 /* Emit code to restore saved registers using MOV insns. First register
6005 is restored from POINTER + OFFSET. */
6006 static void
6007 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6008 int maybe_eh_return)
6009 {
6010 int regno;
6011 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6012
6013 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6014 if (ix86_save_reg (regno, maybe_eh_return))
6015 {
6016 /* Ensure that adjust_address won't be forced to produce pointer
6017 out of range allowed by x86-64 instruction set. */
6018 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6019 {
6020 rtx r11;
6021
6022 r11 = gen_rtx_REG (DImode, R11_REG);
6023 emit_move_insn (r11, GEN_INT (offset));
6024 emit_insn (gen_adddi3 (r11, r11, pointer));
6025 base_address = gen_rtx_MEM (Pmode, r11);
6026 offset = 0;
6027 }
6028 emit_move_insn (gen_rtx_REG (Pmode, regno),
6029 adjust_address (base_address, Pmode, offset));
6030 offset += UNITS_PER_WORD;
6031 }
6032 }
6033
6034 /* Restore function stack, frame, and registers. */
6035
6036 void
6037 ix86_expand_epilogue (int style)
6038 {
6039 int regno;
6040 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6041 struct ix86_frame frame;
6042 HOST_WIDE_INT offset;
6043
6044 ix86_compute_frame_layout (&frame);
6045
6046 /* Calculate start of saved registers relative to ebp. Special care
6047 must be taken for the normal return case of a function using
6048 eh_return: the eax and edx registers are marked as saved, but not
6049 restored along this path. */
6050 offset = frame.nregs;
6051 if (current_function_calls_eh_return && style != 2)
6052 offset -= 2;
6053 offset *= -UNITS_PER_WORD;
6054
6055 /* If we're only restoring one register and sp is not valid then
6056 using a move instruction to restore the register since it's
6057 less work than reloading sp and popping the register.
6058
6059 The default code result in stack adjustment using add/lea instruction,
6060 while this code results in LEAVE instruction (or discrete equivalent),
6061 so it is profitable in some other cases as well. Especially when there
6062 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6063 and there is exactly one register to pop. This heuristic may need some
6064 tuning in future. */
6065 if ((!sp_valid && frame.nregs <= 1)
6066 || (TARGET_EPILOGUE_USING_MOVE
6067 && cfun->machine->use_fast_prologue_epilogue
6068 && (frame.nregs > 1 || frame.to_allocate))
6069 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6070 || (frame_pointer_needed && TARGET_USE_LEAVE
6071 && cfun->machine->use_fast_prologue_epilogue
6072 && frame.nregs == 1)
6073 || current_function_calls_eh_return)
6074 {
6075 /* Restore registers. We can use ebp or esp to address the memory
6076 locations. If both are available, default to ebp, since offsets
6077 are known to be small. Only exception is esp pointing directly to the
6078 end of block of saved registers, where we may simplify addressing
6079 mode. */
6080
6081 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6082 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6083 frame.to_allocate, style == 2);
6084 else
6085 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6086 offset, style == 2);
6087
6088 /* eh_return epilogues need %ecx added to the stack pointer. */
6089 if (style == 2)
6090 {
6091 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6092
6093 if (frame_pointer_needed)
6094 {
6095 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6096 tmp = plus_constant (tmp, UNITS_PER_WORD);
6097 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6098
6099 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6100 emit_move_insn (hard_frame_pointer_rtx, tmp);
6101
6102 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6103 const0_rtx, style);
6104 }
6105 else
6106 {
6107 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6108 tmp = plus_constant (tmp, (frame.to_allocate
6109 + frame.nregs * UNITS_PER_WORD));
6110 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6111 }
6112 }
6113 else if (!frame_pointer_needed)
6114 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6115 GEN_INT (frame.to_allocate
6116 + frame.nregs * UNITS_PER_WORD),
6117 style);
6118 /* If not an i386, mov & pop is faster than "leave". */
6119 else if (TARGET_USE_LEAVE || optimize_size
6120 || !cfun->machine->use_fast_prologue_epilogue)
6121 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6122 else
6123 {
6124 pro_epilogue_adjust_stack (stack_pointer_rtx,
6125 hard_frame_pointer_rtx,
6126 const0_rtx, style);
6127 if (TARGET_64BIT)
6128 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6129 else
6130 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6131 }
6132 }
6133 else
6134 {
6135 /* First step is to deallocate the stack frame so that we can
6136 pop the registers. */
6137 if (!sp_valid)
6138 {
6139 gcc_assert (frame_pointer_needed);
6140 pro_epilogue_adjust_stack (stack_pointer_rtx,
6141 hard_frame_pointer_rtx,
6142 GEN_INT (offset), style);
6143 }
6144 else if (frame.to_allocate)
6145 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6146 GEN_INT (frame.to_allocate), style);
6147
6148 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6149 if (ix86_save_reg (regno, false))
6150 {
6151 if (TARGET_64BIT)
6152 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6153 else
6154 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6155 }
6156 if (frame_pointer_needed)
6157 {
6158 /* Leave results in shorter dependency chains on CPUs that are
6159 able to grok it fast. */
6160 if (TARGET_USE_LEAVE)
6161 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6162 else if (TARGET_64BIT)
6163 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6164 else
6165 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6166 }
6167 }
6168
6169 if (cfun->machine->force_align_arg_pointer)
6170 {
6171 emit_insn (gen_addsi3 (stack_pointer_rtx,
6172 cfun->machine->force_align_arg_pointer,
6173 GEN_INT (-4)));
6174 }
6175
6176 /* Sibcall epilogues don't want a return instruction. */
6177 if (style == 0)
6178 return;
6179
6180 if (current_function_pops_args && current_function_args_size)
6181 {
6182 rtx popc = GEN_INT (current_function_pops_args);
6183
6184 /* i386 can only pop 64K bytes. If asked to pop more, pop
6185 return address, do explicit add, and jump indirectly to the
6186 caller. */
6187
6188 if (current_function_pops_args >= 65536)
6189 {
6190 rtx ecx = gen_rtx_REG (SImode, 2);
6191
6192 /* There is no "pascal" calling convention in 64bit ABI. */
6193 gcc_assert (!TARGET_64BIT);
6194
6195 emit_insn (gen_popsi1 (ecx));
6196 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6197 emit_jump_insn (gen_return_indirect_internal (ecx));
6198 }
6199 else
6200 emit_jump_insn (gen_return_pop_internal (popc));
6201 }
6202 else
6203 emit_jump_insn (gen_return_internal ());
6204 }
6205
6206 /* Reset from the function's potential modifications. */
6207
6208 static void
6209 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6210 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6211 {
6212 if (pic_offset_table_rtx)
6213 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6214 #if TARGET_MACHO
6215 /* Mach-O doesn't support labels at the end of objects, so if
6216 it looks like we might want one, insert a NOP. */
6217 {
6218 rtx insn = get_last_insn ();
6219 while (insn
6220 && NOTE_P (insn)
6221 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6222 insn = PREV_INSN (insn);
6223 if (insn
6224 && (LABEL_P (insn)
6225 || (NOTE_P (insn)
6226 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6227 fputs ("\tnop\n", file);
6228 }
6229 #endif
6230
6231 }
6232 \f
6233 /* Extract the parts of an RTL expression that is a valid memory address
6234 for an instruction. Return 0 if the structure of the address is
6235 grossly off. Return -1 if the address contains ASHIFT, so it is not
6236 strictly valid, but still used for computing length of lea instruction. */
6237
6238 int
6239 ix86_decompose_address (rtx addr, struct ix86_address *out)
6240 {
6241 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6242 rtx base_reg, index_reg;
6243 HOST_WIDE_INT scale = 1;
6244 rtx scale_rtx = NULL_RTX;
6245 int retval = 1;
6246 enum ix86_address_seg seg = SEG_DEFAULT;
6247
6248 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6249 base = addr;
6250 else if (GET_CODE (addr) == PLUS)
6251 {
6252 rtx addends[4], op;
6253 int n = 0, i;
6254
6255 op = addr;
6256 do
6257 {
6258 if (n >= 4)
6259 return 0;
6260 addends[n++] = XEXP (op, 1);
6261 op = XEXP (op, 0);
6262 }
6263 while (GET_CODE (op) == PLUS);
6264 if (n >= 4)
6265 return 0;
6266 addends[n] = op;
6267
6268 for (i = n; i >= 0; --i)
6269 {
6270 op = addends[i];
6271 switch (GET_CODE (op))
6272 {
6273 case MULT:
6274 if (index)
6275 return 0;
6276 index = XEXP (op, 0);
6277 scale_rtx = XEXP (op, 1);
6278 break;
6279
6280 case UNSPEC:
6281 if (XINT (op, 1) == UNSPEC_TP
6282 && TARGET_TLS_DIRECT_SEG_REFS
6283 && seg == SEG_DEFAULT)
6284 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6285 else
6286 return 0;
6287 break;
6288
6289 case REG:
6290 case SUBREG:
6291 if (!base)
6292 base = op;
6293 else if (!index)
6294 index = op;
6295 else
6296 return 0;
6297 break;
6298
6299 case CONST:
6300 case CONST_INT:
6301 case SYMBOL_REF:
6302 case LABEL_REF:
6303 if (disp)
6304 return 0;
6305 disp = op;
6306 break;
6307
6308 default:
6309 return 0;
6310 }
6311 }
6312 }
6313 else if (GET_CODE (addr) == MULT)
6314 {
6315 index = XEXP (addr, 0); /* index*scale */
6316 scale_rtx = XEXP (addr, 1);
6317 }
6318 else if (GET_CODE (addr) == ASHIFT)
6319 {
6320 rtx tmp;
6321
6322 /* We're called for lea too, which implements ashift on occasion. */
6323 index = XEXP (addr, 0);
6324 tmp = XEXP (addr, 1);
6325 if (!CONST_INT_P (tmp))
6326 return 0;
6327 scale = INTVAL (tmp);
6328 if ((unsigned HOST_WIDE_INT) scale > 3)
6329 return 0;
6330 scale = 1 << scale;
6331 retval = -1;
6332 }
6333 else
6334 disp = addr; /* displacement */
6335
6336 /* Extract the integral value of scale. */
6337 if (scale_rtx)
6338 {
6339 if (!CONST_INT_P (scale_rtx))
6340 return 0;
6341 scale = INTVAL (scale_rtx);
6342 }
6343
6344 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6345 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6346
6347 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6348 if (base_reg && index_reg && scale == 1
6349 && (index_reg == arg_pointer_rtx
6350 || index_reg == frame_pointer_rtx
6351 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6352 {
6353 rtx tmp;
6354 tmp = base, base = index, index = tmp;
6355 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6356 }
6357
6358 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6359 if ((base_reg == hard_frame_pointer_rtx
6360 || base_reg == frame_pointer_rtx
6361 || base_reg == arg_pointer_rtx) && !disp)
6362 disp = const0_rtx;
6363
6364 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6365 Avoid this by transforming to [%esi+0]. */
6366 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6367 && base_reg && !index_reg && !disp
6368 && REG_P (base_reg)
6369 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6370 disp = const0_rtx;
6371
6372 /* Special case: encode reg+reg instead of reg*2. */
6373 if (!base && index && scale && scale == 2)
6374 base = index, base_reg = index_reg, scale = 1;
6375
6376 /* Special case: scaling cannot be encoded without base or displacement. */
6377 if (!base && !disp && index && scale != 1)
6378 disp = const0_rtx;
6379
6380 out->base = base;
6381 out->index = index;
6382 out->disp = disp;
6383 out->scale = scale;
6384 out->seg = seg;
6385
6386 return retval;
6387 }
6388 \f
6389 /* Return cost of the memory address x.
6390 For i386, it is better to use a complex address than let gcc copy
6391 the address into a reg and make a new pseudo. But not if the address
6392 requires to two regs - that would mean more pseudos with longer
6393 lifetimes. */
6394 static int
6395 ix86_address_cost (rtx x)
6396 {
6397 struct ix86_address parts;
6398 int cost = 1;
6399 int ok = ix86_decompose_address (x, &parts);
6400
6401 gcc_assert (ok);
6402
6403 if (parts.base && GET_CODE (parts.base) == SUBREG)
6404 parts.base = SUBREG_REG (parts.base);
6405 if (parts.index && GET_CODE (parts.index) == SUBREG)
6406 parts.index = SUBREG_REG (parts.index);
6407
6408 /* More complex memory references are better. */
6409 if (parts.disp && parts.disp != const0_rtx)
6410 cost--;
6411 if (parts.seg != SEG_DEFAULT)
6412 cost--;
6413
6414 /* Attempt to minimize number of registers in the address. */
6415 if ((parts.base
6416 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6417 || (parts.index
6418 && (!REG_P (parts.index)
6419 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6420 cost++;
6421
6422 if (parts.base
6423 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6424 && parts.index
6425 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6426 && parts.base != parts.index)
6427 cost++;
6428
6429 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6430 since it's predecode logic can't detect the length of instructions
6431 and it degenerates to vector decoded. Increase cost of such
6432 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6433 to split such addresses or even refuse such addresses at all.
6434
6435 Following addressing modes are affected:
6436 [base+scale*index]
6437 [scale*index+disp]
6438 [base+index]
6439
6440 The first and last case may be avoidable by explicitly coding the zero in
6441 memory address, but I don't have AMD-K6 machine handy to check this
6442 theory. */
6443
6444 if (TARGET_K6
6445 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6446 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6447 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6448 cost += 10;
6449
6450 return cost;
6451 }
6452 \f
6453 /* If X is a machine specific address (i.e. a symbol or label being
6454 referenced as a displacement from the GOT implemented using an
6455 UNSPEC), then return the base term. Otherwise return X. */
6456
6457 rtx
6458 ix86_find_base_term (rtx x)
6459 {
6460 rtx term;
6461
6462 if (TARGET_64BIT)
6463 {
6464 if (GET_CODE (x) != CONST)
6465 return x;
6466 term = XEXP (x, 0);
6467 if (GET_CODE (term) == PLUS
6468 && (CONST_INT_P (XEXP (term, 1))
6469 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6470 term = XEXP (term, 0);
6471 if (GET_CODE (term) != UNSPEC
6472 || XINT (term, 1) != UNSPEC_GOTPCREL)
6473 return x;
6474
6475 term = XVECEXP (term, 0, 0);
6476
6477 if (GET_CODE (term) != SYMBOL_REF
6478 && GET_CODE (term) != LABEL_REF)
6479 return x;
6480
6481 return term;
6482 }
6483
6484 term = ix86_delegitimize_address (x);
6485
6486 if (GET_CODE (term) != SYMBOL_REF
6487 && GET_CODE (term) != LABEL_REF)
6488 return x;
6489
6490 return term;
6491 }
6492
6493 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6494 this is used for to form addresses to local data when -fPIC is in
6495 use. */
6496
6497 static bool
6498 darwin_local_data_pic (rtx disp)
6499 {
6500 if (GET_CODE (disp) == MINUS)
6501 {
6502 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6503 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6504 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6505 {
6506 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6507 if (! strcmp (sym_name, "<pic base>"))
6508 return true;
6509 }
6510 }
6511
6512 return false;
6513 }
6514 \f
6515 /* Determine if a given RTX is a valid constant. We already know this
6516 satisfies CONSTANT_P. */
6517
6518 bool
6519 legitimate_constant_p (rtx x)
6520 {
6521 switch (GET_CODE (x))
6522 {
6523 case CONST:
6524 x = XEXP (x, 0);
6525
6526 if (GET_CODE (x) == PLUS)
6527 {
6528 if (!CONST_INT_P (XEXP (x, 1)))
6529 return false;
6530 x = XEXP (x, 0);
6531 }
6532
6533 if (TARGET_MACHO && darwin_local_data_pic (x))
6534 return true;
6535
6536 /* Only some unspecs are valid as "constants". */
6537 if (GET_CODE (x) == UNSPEC)
6538 switch (XINT (x, 1))
6539 {
6540 case UNSPEC_GOTOFF:
6541 return TARGET_64BIT;
6542 case UNSPEC_TPOFF:
6543 case UNSPEC_NTPOFF:
6544 x = XVECEXP (x, 0, 0);
6545 return (GET_CODE (x) == SYMBOL_REF
6546 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6547 case UNSPEC_DTPOFF:
6548 x = XVECEXP (x, 0, 0);
6549 return (GET_CODE (x) == SYMBOL_REF
6550 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6551 default:
6552 return false;
6553 }
6554
6555 /* We must have drilled down to a symbol. */
6556 if (GET_CODE (x) == LABEL_REF)
6557 return true;
6558 if (GET_CODE (x) != SYMBOL_REF)
6559 return false;
6560 /* FALLTHRU */
6561
6562 case SYMBOL_REF:
6563 /* TLS symbols are never valid. */
6564 if (SYMBOL_REF_TLS_MODEL (x))
6565 return false;
6566 break;
6567
6568 case CONST_DOUBLE:
6569 if (GET_MODE (x) == TImode
6570 && x != CONST0_RTX (TImode)
6571 && !TARGET_64BIT)
6572 return false;
6573 break;
6574
6575 case CONST_VECTOR:
6576 if (x == CONST0_RTX (GET_MODE (x)))
6577 return true;
6578 return false;
6579
6580 default:
6581 break;
6582 }
6583
6584 /* Otherwise we handle everything else in the move patterns. */
6585 return true;
6586 }
6587
6588 /* Determine if it's legal to put X into the constant pool. This
6589 is not possible for the address of thread-local symbols, which
6590 is checked above. */
6591
6592 static bool
6593 ix86_cannot_force_const_mem (rtx x)
6594 {
6595 /* We can always put integral constants and vectors in memory. */
6596 switch (GET_CODE (x))
6597 {
6598 case CONST_INT:
6599 case CONST_DOUBLE:
6600 case CONST_VECTOR:
6601 return false;
6602
6603 default:
6604 break;
6605 }
6606 return !legitimate_constant_p (x);
6607 }
6608
6609 /* Determine if a given RTX is a valid constant address. */
6610
6611 bool
6612 constant_address_p (rtx x)
6613 {
6614 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6615 }
6616
6617 /* Nonzero if the constant value X is a legitimate general operand
6618 when generating PIC code. It is given that flag_pic is on and
6619 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6620
6621 bool
6622 legitimate_pic_operand_p (rtx x)
6623 {
6624 rtx inner;
6625
6626 switch (GET_CODE (x))
6627 {
6628 case CONST:
6629 inner = XEXP (x, 0);
6630 if (GET_CODE (inner) == PLUS
6631 && CONST_INT_P (XEXP (inner, 1)))
6632 inner = XEXP (inner, 0);
6633
6634 /* Only some unspecs are valid as "constants". */
6635 if (GET_CODE (inner) == UNSPEC)
6636 switch (XINT (inner, 1))
6637 {
6638 case UNSPEC_GOTOFF:
6639 return TARGET_64BIT;
6640 case UNSPEC_TPOFF:
6641 x = XVECEXP (inner, 0, 0);
6642 return (GET_CODE (x) == SYMBOL_REF
6643 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6644 default:
6645 return false;
6646 }
6647 /* FALLTHRU */
6648
6649 case SYMBOL_REF:
6650 case LABEL_REF:
6651 return legitimate_pic_address_disp_p (x);
6652
6653 default:
6654 return true;
6655 }
6656 }
6657
6658 /* Determine if a given CONST RTX is a valid memory displacement
6659 in PIC mode. */
6660
6661 int
6662 legitimate_pic_address_disp_p (rtx disp)
6663 {
6664 bool saw_plus;
6665
6666 /* In 64bit mode we can allow direct addresses of symbols and labels
6667 when they are not dynamic symbols. */
6668 if (TARGET_64BIT)
6669 {
6670 rtx op0 = disp, op1;
6671
6672 switch (GET_CODE (disp))
6673 {
6674 case LABEL_REF:
6675 return true;
6676
6677 case CONST:
6678 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6679 break;
6680 op0 = XEXP (XEXP (disp, 0), 0);
6681 op1 = XEXP (XEXP (disp, 0), 1);
6682 if (!CONST_INT_P (op1)
6683 || INTVAL (op1) >= 16*1024*1024
6684 || INTVAL (op1) < -16*1024*1024)
6685 break;
6686 if (GET_CODE (op0) == LABEL_REF)
6687 return true;
6688 if (GET_CODE (op0) != SYMBOL_REF)
6689 break;
6690 /* FALLTHRU */
6691
6692 case SYMBOL_REF:
6693 /* TLS references should always be enclosed in UNSPEC. */
6694 if (SYMBOL_REF_TLS_MODEL (op0))
6695 return false;
6696 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6697 return true;
6698 break;
6699
6700 default:
6701 break;
6702 }
6703 }
6704 if (GET_CODE (disp) != CONST)
6705 return 0;
6706 disp = XEXP (disp, 0);
6707
6708 if (TARGET_64BIT)
6709 {
6710 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6711 of GOT tables. We should not need these anyway. */
6712 if (GET_CODE (disp) != UNSPEC
6713 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6714 && XINT (disp, 1) != UNSPEC_GOTOFF))
6715 return 0;
6716
6717 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6718 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6719 return 0;
6720 return 1;
6721 }
6722
6723 saw_plus = false;
6724 if (GET_CODE (disp) == PLUS)
6725 {
6726 if (!CONST_INT_P (XEXP (disp, 1)))
6727 return 0;
6728 disp = XEXP (disp, 0);
6729 saw_plus = true;
6730 }
6731
6732 if (TARGET_MACHO && darwin_local_data_pic (disp))
6733 return 1;
6734
6735 if (GET_CODE (disp) != UNSPEC)
6736 return 0;
6737
6738 switch (XINT (disp, 1))
6739 {
6740 case UNSPEC_GOT:
6741 if (saw_plus)
6742 return false;
6743 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6744 case UNSPEC_GOTOFF:
6745 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6746 While ABI specify also 32bit relocation but we don't produce it in
6747 small PIC model at all. */
6748 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6749 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6750 && !TARGET_64BIT)
6751 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6752 return false;
6753 case UNSPEC_GOTTPOFF:
6754 case UNSPEC_GOTNTPOFF:
6755 case UNSPEC_INDNTPOFF:
6756 if (saw_plus)
6757 return false;
6758 disp = XVECEXP (disp, 0, 0);
6759 return (GET_CODE (disp) == SYMBOL_REF
6760 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6761 case UNSPEC_NTPOFF:
6762 disp = XVECEXP (disp, 0, 0);
6763 return (GET_CODE (disp) == SYMBOL_REF
6764 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6765 case UNSPEC_DTPOFF:
6766 disp = XVECEXP (disp, 0, 0);
6767 return (GET_CODE (disp) == SYMBOL_REF
6768 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6769 }
6770
6771 return 0;
6772 }
6773
6774 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6775 memory address for an instruction. The MODE argument is the machine mode
6776 for the MEM expression that wants to use this address.
6777
6778 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6779 convert common non-canonical forms to canonical form so that they will
6780 be recognized. */
6781
6782 int
6783 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6784 {
6785 struct ix86_address parts;
6786 rtx base, index, disp;
6787 HOST_WIDE_INT scale;
6788 const char *reason = NULL;
6789 rtx reason_rtx = NULL_RTX;
6790
6791 if (TARGET_DEBUG_ADDR)
6792 {
6793 fprintf (stderr,
6794 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6795 GET_MODE_NAME (mode), strict);
6796 debug_rtx (addr);
6797 }
6798
6799 if (ix86_decompose_address (addr, &parts) <= 0)
6800 {
6801 reason = "decomposition failed";
6802 goto report_error;
6803 }
6804
6805 base = parts.base;
6806 index = parts.index;
6807 disp = parts.disp;
6808 scale = parts.scale;
6809
6810 /* Validate base register.
6811
6812 Don't allow SUBREG's that span more than a word here. It can lead to spill
6813 failures when the base is one word out of a two word structure, which is
6814 represented internally as a DImode int. */
6815
6816 if (base)
6817 {
6818 rtx reg;
6819 reason_rtx = base;
6820
6821 if (REG_P (base))
6822 reg = base;
6823 else if (GET_CODE (base) == SUBREG
6824 && REG_P (SUBREG_REG (base))
6825 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6826 <= UNITS_PER_WORD)
6827 reg = SUBREG_REG (base);
6828 else
6829 {
6830 reason = "base is not a register";
6831 goto report_error;
6832 }
6833
6834 if (GET_MODE (base) != Pmode)
6835 {
6836 reason = "base is not in Pmode";
6837 goto report_error;
6838 }
6839
6840 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6841 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6842 {
6843 reason = "base is not valid";
6844 goto report_error;
6845 }
6846 }
6847
6848 /* Validate index register.
6849
6850 Don't allow SUBREG's that span more than a word here -- same as above. */
6851
6852 if (index)
6853 {
6854 rtx reg;
6855 reason_rtx = index;
6856
6857 if (REG_P (index))
6858 reg = index;
6859 else if (GET_CODE (index) == SUBREG
6860 && REG_P (SUBREG_REG (index))
6861 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6862 <= UNITS_PER_WORD)
6863 reg = SUBREG_REG (index);
6864 else
6865 {
6866 reason = "index is not a register";
6867 goto report_error;
6868 }
6869
6870 if (GET_MODE (index) != Pmode)
6871 {
6872 reason = "index is not in Pmode";
6873 goto report_error;
6874 }
6875
6876 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6877 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6878 {
6879 reason = "index is not valid";
6880 goto report_error;
6881 }
6882 }
6883
6884 /* Validate scale factor. */
6885 if (scale != 1)
6886 {
6887 reason_rtx = GEN_INT (scale);
6888 if (!index)
6889 {
6890 reason = "scale without index";
6891 goto report_error;
6892 }
6893
6894 if (scale != 2 && scale != 4 && scale != 8)
6895 {
6896 reason = "scale is not a valid multiplier";
6897 goto report_error;
6898 }
6899 }
6900
6901 /* Validate displacement. */
6902 if (disp)
6903 {
6904 reason_rtx = disp;
6905
6906 if (GET_CODE (disp) == CONST
6907 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6908 switch (XINT (XEXP (disp, 0), 1))
6909 {
6910 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6911 used. While ABI specify also 32bit relocations, we don't produce
6912 them at all and use IP relative instead. */
6913 case UNSPEC_GOT:
6914 case UNSPEC_GOTOFF:
6915 gcc_assert (flag_pic);
6916 if (!TARGET_64BIT)
6917 goto is_legitimate_pic;
6918 reason = "64bit address unspec";
6919 goto report_error;
6920
6921 case UNSPEC_GOTPCREL:
6922 gcc_assert (flag_pic);
6923 goto is_legitimate_pic;
6924
6925 case UNSPEC_GOTTPOFF:
6926 case UNSPEC_GOTNTPOFF:
6927 case UNSPEC_INDNTPOFF:
6928 case UNSPEC_NTPOFF:
6929 case UNSPEC_DTPOFF:
6930 break;
6931
6932 default:
6933 reason = "invalid address unspec";
6934 goto report_error;
6935 }
6936
6937 else if (SYMBOLIC_CONST (disp)
6938 && (flag_pic
6939 || (TARGET_MACHO
6940 #if TARGET_MACHO
6941 && MACHOPIC_INDIRECT
6942 && !machopic_operand_p (disp)
6943 #endif
6944 )))
6945 {
6946
6947 is_legitimate_pic:
6948 if (TARGET_64BIT && (index || base))
6949 {
6950 /* foo@dtpoff(%rX) is ok. */
6951 if (GET_CODE (disp) != CONST
6952 || GET_CODE (XEXP (disp, 0)) != PLUS
6953 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6954 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6955 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6956 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6957 {
6958 reason = "non-constant pic memory reference";
6959 goto report_error;
6960 }
6961 }
6962 else if (! legitimate_pic_address_disp_p (disp))
6963 {
6964 reason = "displacement is an invalid pic construct";
6965 goto report_error;
6966 }
6967
6968 /* This code used to verify that a symbolic pic displacement
6969 includes the pic_offset_table_rtx register.
6970
6971 While this is good idea, unfortunately these constructs may
6972 be created by "adds using lea" optimization for incorrect
6973 code like:
6974
6975 int a;
6976 int foo(int i)
6977 {
6978 return *(&a+i);
6979 }
6980
6981 This code is nonsensical, but results in addressing
6982 GOT table with pic_offset_table_rtx base. We can't
6983 just refuse it easily, since it gets matched by
6984 "addsi3" pattern, that later gets split to lea in the
6985 case output register differs from input. While this
6986 can be handled by separate addsi pattern for this case
6987 that never results in lea, this seems to be easier and
6988 correct fix for crash to disable this test. */
6989 }
6990 else if (GET_CODE (disp) != LABEL_REF
6991 && !CONST_INT_P (disp)
6992 && (GET_CODE (disp) != CONST
6993 || !legitimate_constant_p (disp))
6994 && (GET_CODE (disp) != SYMBOL_REF
6995 || !legitimate_constant_p (disp)))
6996 {
6997 reason = "displacement is not constant";
6998 goto report_error;
6999 }
7000 else if (TARGET_64BIT
7001 && !x86_64_immediate_operand (disp, VOIDmode))
7002 {
7003 reason = "displacement is out of range";
7004 goto report_error;
7005 }
7006 }
7007
7008 /* Everything looks valid. */
7009 if (TARGET_DEBUG_ADDR)
7010 fprintf (stderr, "Success.\n");
7011 return TRUE;
7012
7013 report_error:
7014 if (TARGET_DEBUG_ADDR)
7015 {
7016 fprintf (stderr, "Error: %s\n", reason);
7017 debug_rtx (reason_rtx);
7018 }
7019 return FALSE;
7020 }
7021 \f
7022 /* Return a unique alias set for the GOT. */
7023
7024 static HOST_WIDE_INT
7025 ix86_GOT_alias_set (void)
7026 {
7027 static HOST_WIDE_INT set = -1;
7028 if (set == -1)
7029 set = new_alias_set ();
7030 return set;
7031 }
7032
7033 /* Return a legitimate reference for ORIG (an address) using the
7034 register REG. If REG is 0, a new pseudo is generated.
7035
7036 There are two types of references that must be handled:
7037
7038 1. Global data references must load the address from the GOT, via
7039 the PIC reg. An insn is emitted to do this load, and the reg is
7040 returned.
7041
7042 2. Static data references, constant pool addresses, and code labels
7043 compute the address as an offset from the GOT, whose base is in
7044 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7045 differentiate them from global data objects. The returned
7046 address is the PIC reg + an unspec constant.
7047
7048 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7049 reg also appears in the address. */
7050
7051 static rtx
7052 legitimize_pic_address (rtx orig, rtx reg)
7053 {
7054 rtx addr = orig;
7055 rtx new = orig;
7056 rtx base;
7057
7058 #if TARGET_MACHO
7059 if (TARGET_MACHO && !TARGET_64BIT)
7060 {
7061 if (reg == 0)
7062 reg = gen_reg_rtx (Pmode);
7063 /* Use the generic Mach-O PIC machinery. */
7064 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7065 }
7066 #endif
7067
7068 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7069 new = addr;
7070 else if (TARGET_64BIT
7071 && ix86_cmodel != CM_SMALL_PIC
7072 && local_symbolic_operand (addr, Pmode))
7073 {
7074 rtx tmpreg;
7075 /* This symbol may be referenced via a displacement from the PIC
7076 base address (@GOTOFF). */
7077
7078 if (reload_in_progress)
7079 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7080 if (GET_CODE (addr) == CONST)
7081 addr = XEXP (addr, 0);
7082 if (GET_CODE (addr) == PLUS)
7083 {
7084 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7085 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7086 }
7087 else
7088 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7089 new = gen_rtx_CONST (Pmode, new);
7090 if (!reg)
7091 tmpreg = gen_reg_rtx (Pmode);
7092 else
7093 tmpreg = reg;
7094 emit_move_insn (tmpreg, new);
7095
7096 if (reg != 0)
7097 {
7098 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7099 tmpreg, 1, OPTAB_DIRECT);
7100 new = reg;
7101 }
7102 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7103 }
7104 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
7105 {
7106 /* This symbol may be referenced via a displacement from the PIC
7107 base address (@GOTOFF). */
7108
7109 if (reload_in_progress)
7110 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7111 if (GET_CODE (addr) == CONST)
7112 addr = XEXP (addr, 0);
7113 if (GET_CODE (addr) == PLUS)
7114 {
7115 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7116 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7117 }
7118 else
7119 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7120 new = gen_rtx_CONST (Pmode, new);
7121 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7122
7123 if (reg != 0)
7124 {
7125 emit_move_insn (reg, new);
7126 new = reg;
7127 }
7128 }
7129 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7130 {
7131 if (TARGET_64BIT)
7132 {
7133 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7134 new = gen_rtx_CONST (Pmode, new);
7135 new = gen_const_mem (Pmode, new);
7136 set_mem_alias_set (new, ix86_GOT_alias_set ());
7137
7138 if (reg == 0)
7139 reg = gen_reg_rtx (Pmode);
7140 /* Use directly gen_movsi, otherwise the address is loaded
7141 into register for CSE. We don't want to CSE this addresses,
7142 instead we CSE addresses from the GOT table, so skip this. */
7143 emit_insn (gen_movsi (reg, new));
7144 new = reg;
7145 }
7146 else
7147 {
7148 /* This symbol must be referenced via a load from the
7149 Global Offset Table (@GOT). */
7150
7151 if (reload_in_progress)
7152 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7153 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7154 new = gen_rtx_CONST (Pmode, new);
7155 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7156 new = gen_const_mem (Pmode, new);
7157 set_mem_alias_set (new, ix86_GOT_alias_set ());
7158
7159 if (reg == 0)
7160 reg = gen_reg_rtx (Pmode);
7161 emit_move_insn (reg, new);
7162 new = reg;
7163 }
7164 }
7165 else
7166 {
7167 if (CONST_INT_P (addr)
7168 && !x86_64_immediate_operand (addr, VOIDmode))
7169 {
7170 if (reg)
7171 {
7172 emit_move_insn (reg, addr);
7173 new = reg;
7174 }
7175 else
7176 new = force_reg (Pmode, addr);
7177 }
7178 else if (GET_CODE (addr) == CONST)
7179 {
7180 addr = XEXP (addr, 0);
7181
7182 /* We must match stuff we generate before. Assume the only
7183 unspecs that can get here are ours. Not that we could do
7184 anything with them anyway.... */
7185 if (GET_CODE (addr) == UNSPEC
7186 || (GET_CODE (addr) == PLUS
7187 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7188 return orig;
7189 gcc_assert (GET_CODE (addr) == PLUS);
7190 }
7191 if (GET_CODE (addr) == PLUS)
7192 {
7193 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7194
7195 /* Check first to see if this is a constant offset from a @GOTOFF
7196 symbol reference. */
7197 if (local_symbolic_operand (op0, Pmode)
7198 && CONST_INT_P (op1))
7199 {
7200 if (!TARGET_64BIT)
7201 {
7202 if (reload_in_progress)
7203 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7204 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7205 UNSPEC_GOTOFF);
7206 new = gen_rtx_PLUS (Pmode, new, op1);
7207 new = gen_rtx_CONST (Pmode, new);
7208 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7209
7210 if (reg != 0)
7211 {
7212 emit_move_insn (reg, new);
7213 new = reg;
7214 }
7215 }
7216 else
7217 {
7218 if (INTVAL (op1) < -16*1024*1024
7219 || INTVAL (op1) >= 16*1024*1024)
7220 {
7221 if (!x86_64_immediate_operand (op1, Pmode))
7222 op1 = force_reg (Pmode, op1);
7223 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7224 }
7225 }
7226 }
7227 else
7228 {
7229 base = legitimize_pic_address (XEXP (addr, 0), reg);
7230 new = legitimize_pic_address (XEXP (addr, 1),
7231 base == reg ? NULL_RTX : reg);
7232
7233 if (CONST_INT_P (new))
7234 new = plus_constant (base, INTVAL (new));
7235 else
7236 {
7237 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7238 {
7239 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7240 new = XEXP (new, 1);
7241 }
7242 new = gen_rtx_PLUS (Pmode, base, new);
7243 }
7244 }
7245 }
7246 }
7247 return new;
7248 }
7249 \f
7250 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7251
7252 static rtx
7253 get_thread_pointer (int to_reg)
7254 {
7255 rtx tp, reg, insn;
7256
7257 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7258 if (!to_reg)
7259 return tp;
7260
7261 reg = gen_reg_rtx (Pmode);
7262 insn = gen_rtx_SET (VOIDmode, reg, tp);
7263 insn = emit_insn (insn);
7264
7265 return reg;
7266 }
7267
7268 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7269 false if we expect this to be used for a memory address and true if
7270 we expect to load the address into a register. */
7271
7272 static rtx
7273 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7274 {
7275 rtx dest, base, off, pic, tp;
7276 int type;
7277
7278 switch (model)
7279 {
7280 case TLS_MODEL_GLOBAL_DYNAMIC:
7281 dest = gen_reg_rtx (Pmode);
7282 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7283
7284 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7285 {
7286 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7287
7288 start_sequence ();
7289 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7290 insns = get_insns ();
7291 end_sequence ();
7292
7293 emit_libcall_block (insns, dest, rax, x);
7294 }
7295 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7296 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7297 else
7298 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7299
7300 if (TARGET_GNU2_TLS)
7301 {
7302 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7303
7304 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7305 }
7306 break;
7307
7308 case TLS_MODEL_LOCAL_DYNAMIC:
7309 base = gen_reg_rtx (Pmode);
7310 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7311
7312 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7313 {
7314 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7315
7316 start_sequence ();
7317 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7318 insns = get_insns ();
7319 end_sequence ();
7320
7321 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7322 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7323 emit_libcall_block (insns, base, rax, note);
7324 }
7325 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7326 emit_insn (gen_tls_local_dynamic_base_64 (base));
7327 else
7328 emit_insn (gen_tls_local_dynamic_base_32 (base));
7329
7330 if (TARGET_GNU2_TLS)
7331 {
7332 rtx x = ix86_tls_module_base ();
7333
7334 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7335 gen_rtx_MINUS (Pmode, x, tp));
7336 }
7337
7338 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7339 off = gen_rtx_CONST (Pmode, off);
7340
7341 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7342
7343 if (TARGET_GNU2_TLS)
7344 {
7345 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7346
7347 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7348 }
7349
7350 break;
7351
7352 case TLS_MODEL_INITIAL_EXEC:
7353 if (TARGET_64BIT)
7354 {
7355 pic = NULL;
7356 type = UNSPEC_GOTNTPOFF;
7357 }
7358 else if (flag_pic)
7359 {
7360 if (reload_in_progress)
7361 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7362 pic = pic_offset_table_rtx;
7363 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7364 }
7365 else if (!TARGET_ANY_GNU_TLS)
7366 {
7367 pic = gen_reg_rtx (Pmode);
7368 emit_insn (gen_set_got (pic));
7369 type = UNSPEC_GOTTPOFF;
7370 }
7371 else
7372 {
7373 pic = NULL;
7374 type = UNSPEC_INDNTPOFF;
7375 }
7376
7377 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7378 off = gen_rtx_CONST (Pmode, off);
7379 if (pic)
7380 off = gen_rtx_PLUS (Pmode, pic, off);
7381 off = gen_const_mem (Pmode, off);
7382 set_mem_alias_set (off, ix86_GOT_alias_set ());
7383
7384 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7385 {
7386 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7387 off = force_reg (Pmode, off);
7388 return gen_rtx_PLUS (Pmode, base, off);
7389 }
7390 else
7391 {
7392 base = get_thread_pointer (true);
7393 dest = gen_reg_rtx (Pmode);
7394 emit_insn (gen_subsi3 (dest, base, off));
7395 }
7396 break;
7397
7398 case TLS_MODEL_LOCAL_EXEC:
7399 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7400 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7401 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7402 off = gen_rtx_CONST (Pmode, off);
7403
7404 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7405 {
7406 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7407 return gen_rtx_PLUS (Pmode, base, off);
7408 }
7409 else
7410 {
7411 base = get_thread_pointer (true);
7412 dest = gen_reg_rtx (Pmode);
7413 emit_insn (gen_subsi3 (dest, base, off));
7414 }
7415 break;
7416
7417 default:
7418 gcc_unreachable ();
7419 }
7420
7421 return dest;
7422 }
7423
7424 /* Try machine-dependent ways of modifying an illegitimate address
7425 to be legitimate. If we find one, return the new, valid address.
7426 This macro is used in only one place: `memory_address' in explow.c.
7427
7428 OLDX is the address as it was before break_out_memory_refs was called.
7429 In some cases it is useful to look at this to decide what needs to be done.
7430
7431 MODE and WIN are passed so that this macro can use
7432 GO_IF_LEGITIMATE_ADDRESS.
7433
7434 It is always safe for this macro to do nothing. It exists to recognize
7435 opportunities to optimize the output.
7436
7437 For the 80386, we handle X+REG by loading X into a register R and
7438 using R+REG. R will go in a general reg and indexing will be used.
7439 However, if REG is a broken-out memory address or multiplication,
7440 nothing needs to be done because REG can certainly go in a general reg.
7441
7442 When -fpic is used, special handling is needed for symbolic references.
7443 See comments by legitimize_pic_address in i386.c for details. */
7444
7445 rtx
7446 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7447 {
7448 int changed = 0;
7449 unsigned log;
7450
7451 if (TARGET_DEBUG_ADDR)
7452 {
7453 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7454 GET_MODE_NAME (mode));
7455 debug_rtx (x);
7456 }
7457
7458 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7459 if (log)
7460 return legitimize_tls_address (x, log, false);
7461 if (GET_CODE (x) == CONST
7462 && GET_CODE (XEXP (x, 0)) == PLUS
7463 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7464 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7465 {
7466 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7467 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7468 }
7469
7470 if (flag_pic && SYMBOLIC_CONST (x))
7471 return legitimize_pic_address (x, 0);
7472
7473 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7474 if (GET_CODE (x) == ASHIFT
7475 && CONST_INT_P (XEXP (x, 1))
7476 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7477 {
7478 changed = 1;
7479 log = INTVAL (XEXP (x, 1));
7480 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7481 GEN_INT (1 << log));
7482 }
7483
7484 if (GET_CODE (x) == PLUS)
7485 {
7486 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7487
7488 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7489 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7490 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7491 {
7492 changed = 1;
7493 log = INTVAL (XEXP (XEXP (x, 0), 1));
7494 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7495 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7496 GEN_INT (1 << log));
7497 }
7498
7499 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7500 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7501 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7502 {
7503 changed = 1;
7504 log = INTVAL (XEXP (XEXP (x, 1), 1));
7505 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7506 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7507 GEN_INT (1 << log));
7508 }
7509
7510 /* Put multiply first if it isn't already. */
7511 if (GET_CODE (XEXP (x, 1)) == MULT)
7512 {
7513 rtx tmp = XEXP (x, 0);
7514 XEXP (x, 0) = XEXP (x, 1);
7515 XEXP (x, 1) = tmp;
7516 changed = 1;
7517 }
7518
7519 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7520 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7521 created by virtual register instantiation, register elimination, and
7522 similar optimizations. */
7523 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7524 {
7525 changed = 1;
7526 x = gen_rtx_PLUS (Pmode,
7527 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7528 XEXP (XEXP (x, 1), 0)),
7529 XEXP (XEXP (x, 1), 1));
7530 }
7531
7532 /* Canonicalize
7533 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7534 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7535 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7536 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7537 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7538 && CONSTANT_P (XEXP (x, 1)))
7539 {
7540 rtx constant;
7541 rtx other = NULL_RTX;
7542
7543 if (CONST_INT_P (XEXP (x, 1)))
7544 {
7545 constant = XEXP (x, 1);
7546 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7547 }
7548 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7549 {
7550 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7551 other = XEXP (x, 1);
7552 }
7553 else
7554 constant = 0;
7555
7556 if (constant)
7557 {
7558 changed = 1;
7559 x = gen_rtx_PLUS (Pmode,
7560 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7561 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7562 plus_constant (other, INTVAL (constant)));
7563 }
7564 }
7565
7566 if (changed && legitimate_address_p (mode, x, FALSE))
7567 return x;
7568
7569 if (GET_CODE (XEXP (x, 0)) == MULT)
7570 {
7571 changed = 1;
7572 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7573 }
7574
7575 if (GET_CODE (XEXP (x, 1)) == MULT)
7576 {
7577 changed = 1;
7578 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7579 }
7580
7581 if (changed
7582 && REG_P (XEXP (x, 1))
7583 && REG_P (XEXP (x, 0)))
7584 return x;
7585
7586 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7587 {
7588 changed = 1;
7589 x = legitimize_pic_address (x, 0);
7590 }
7591
7592 if (changed && legitimate_address_p (mode, x, FALSE))
7593 return x;
7594
7595 if (REG_P (XEXP (x, 0)))
7596 {
7597 rtx temp = gen_reg_rtx (Pmode);
7598 rtx val = force_operand (XEXP (x, 1), temp);
7599 if (val != temp)
7600 emit_move_insn (temp, val);
7601
7602 XEXP (x, 1) = temp;
7603 return x;
7604 }
7605
7606 else if (REG_P (XEXP (x, 1)))
7607 {
7608 rtx temp = gen_reg_rtx (Pmode);
7609 rtx val = force_operand (XEXP (x, 0), temp);
7610 if (val != temp)
7611 emit_move_insn (temp, val);
7612
7613 XEXP (x, 0) = temp;
7614 return x;
7615 }
7616 }
7617
7618 return x;
7619 }
7620 \f
7621 /* Print an integer constant expression in assembler syntax. Addition
7622 and subtraction are the only arithmetic that may appear in these
7623 expressions. FILE is the stdio stream to write to, X is the rtx, and
7624 CODE is the operand print code from the output string. */
7625
7626 static void
7627 output_pic_addr_const (FILE *file, rtx x, int code)
7628 {
7629 char buf[256];
7630
7631 switch (GET_CODE (x))
7632 {
7633 case PC:
7634 gcc_assert (flag_pic);
7635 putc ('.', file);
7636 break;
7637
7638 case SYMBOL_REF:
7639 if (! TARGET_MACHO || TARGET_64BIT)
7640 output_addr_const (file, x);
7641 else
7642 {
7643 const char *name = XSTR (x, 0);
7644
7645 /* Mark the decl as referenced so that cgraph will output the function. */
7646 if (SYMBOL_REF_DECL (x))
7647 mark_decl_referenced (SYMBOL_REF_DECL (x));
7648
7649 #if TARGET_MACHO
7650 if (MACHOPIC_INDIRECT
7651 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7652 name = machopic_indirection_name (x, /*stub_p=*/true);
7653 #endif
7654 assemble_name (file, name);
7655 }
7656 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7657 fputs ("@PLT", file);
7658 break;
7659
7660 case LABEL_REF:
7661 x = XEXP (x, 0);
7662 /* FALLTHRU */
7663 case CODE_LABEL:
7664 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7665 assemble_name (asm_out_file, buf);
7666 break;
7667
7668 case CONST_INT:
7669 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7670 break;
7671
7672 case CONST:
7673 /* This used to output parentheses around the expression,
7674 but that does not work on the 386 (either ATT or BSD assembler). */
7675 output_pic_addr_const (file, XEXP (x, 0), code);
7676 break;
7677
7678 case CONST_DOUBLE:
7679 if (GET_MODE (x) == VOIDmode)
7680 {
7681 /* We can use %d if the number is <32 bits and positive. */
7682 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7683 fprintf (file, "0x%lx%08lx",
7684 (unsigned long) CONST_DOUBLE_HIGH (x),
7685 (unsigned long) CONST_DOUBLE_LOW (x));
7686 else
7687 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7688 }
7689 else
7690 /* We can't handle floating point constants;
7691 PRINT_OPERAND must handle them. */
7692 output_operand_lossage ("floating constant misused");
7693 break;
7694
7695 case PLUS:
7696 /* Some assemblers need integer constants to appear first. */
7697 if (CONST_INT_P (XEXP (x, 0)))
7698 {
7699 output_pic_addr_const (file, XEXP (x, 0), code);
7700 putc ('+', file);
7701 output_pic_addr_const (file, XEXP (x, 1), code);
7702 }
7703 else
7704 {
7705 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7706 output_pic_addr_const (file, XEXP (x, 1), code);
7707 putc ('+', file);
7708 output_pic_addr_const (file, XEXP (x, 0), code);
7709 }
7710 break;
7711
7712 case MINUS:
7713 if (!TARGET_MACHO)
7714 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7715 output_pic_addr_const (file, XEXP (x, 0), code);
7716 putc ('-', file);
7717 output_pic_addr_const (file, XEXP (x, 1), code);
7718 if (!TARGET_MACHO)
7719 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7720 break;
7721
7722 case UNSPEC:
7723 gcc_assert (XVECLEN (x, 0) == 1);
7724 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7725 switch (XINT (x, 1))
7726 {
7727 case UNSPEC_GOT:
7728 fputs ("@GOT", file);
7729 break;
7730 case UNSPEC_GOTOFF:
7731 fputs ("@GOTOFF", file);
7732 break;
7733 case UNSPEC_GOTPCREL:
7734 fputs ("@GOTPCREL(%rip)", file);
7735 break;
7736 case UNSPEC_GOTTPOFF:
7737 /* FIXME: This might be @TPOFF in Sun ld too. */
7738 fputs ("@GOTTPOFF", file);
7739 break;
7740 case UNSPEC_TPOFF:
7741 fputs ("@TPOFF", file);
7742 break;
7743 case UNSPEC_NTPOFF:
7744 if (TARGET_64BIT)
7745 fputs ("@TPOFF", file);
7746 else
7747 fputs ("@NTPOFF", file);
7748 break;
7749 case UNSPEC_DTPOFF:
7750 fputs ("@DTPOFF", file);
7751 break;
7752 case UNSPEC_GOTNTPOFF:
7753 if (TARGET_64BIT)
7754 fputs ("@GOTTPOFF(%rip)", file);
7755 else
7756 fputs ("@GOTNTPOFF", file);
7757 break;
7758 case UNSPEC_INDNTPOFF:
7759 fputs ("@INDNTPOFF", file);
7760 break;
7761 default:
7762 output_operand_lossage ("invalid UNSPEC as operand");
7763 break;
7764 }
7765 break;
7766
7767 default:
7768 output_operand_lossage ("invalid expression as operand");
7769 }
7770 }
7771
7772 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7773 We need to emit DTP-relative relocations. */
7774
7775 static void
7776 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7777 {
7778 fputs (ASM_LONG, file);
7779 output_addr_const (file, x);
7780 fputs ("@DTPOFF", file);
7781 switch (size)
7782 {
7783 case 4:
7784 break;
7785 case 8:
7786 fputs (", 0", file);
7787 break;
7788 default:
7789 gcc_unreachable ();
7790 }
7791 }
7792
7793 /* In the name of slightly smaller debug output, and to cater to
7794 general assembler lossage, recognize PIC+GOTOFF and turn it back
7795 into a direct symbol reference.
7796
7797 On Darwin, this is necessary to avoid a crash, because Darwin
7798 has a different PIC label for each routine but the DWARF debugging
7799 information is not associated with any particular routine, so it's
7800 necessary to remove references to the PIC label from RTL stored by
7801 the DWARF output code. */
7802
7803 static rtx
7804 ix86_delegitimize_address (rtx orig_x)
7805 {
7806 rtx x = orig_x;
7807 /* reg_addend is NULL or a multiple of some register. */
7808 rtx reg_addend = NULL_RTX;
7809 /* const_addend is NULL or a const_int. */
7810 rtx const_addend = NULL_RTX;
7811 /* This is the result, or NULL. */
7812 rtx result = NULL_RTX;
7813
7814 if (MEM_P (x))
7815 x = XEXP (x, 0);
7816
7817 if (TARGET_64BIT)
7818 {
7819 if (GET_CODE (x) != CONST
7820 || GET_CODE (XEXP (x, 0)) != UNSPEC
7821 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7822 || !MEM_P (orig_x))
7823 return orig_x;
7824 return XVECEXP (XEXP (x, 0), 0, 0);
7825 }
7826
7827 if (GET_CODE (x) != PLUS
7828 || GET_CODE (XEXP (x, 1)) != CONST)
7829 return orig_x;
7830
7831 if (REG_P (XEXP (x, 0))
7832 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7833 /* %ebx + GOT/GOTOFF */
7834 ;
7835 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7836 {
7837 /* %ebx + %reg * scale + GOT/GOTOFF */
7838 reg_addend = XEXP (x, 0);
7839 if (REG_P (XEXP (reg_addend, 0))
7840 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7841 reg_addend = XEXP (reg_addend, 1);
7842 else if (REG_P (XEXP (reg_addend, 1))
7843 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7844 reg_addend = XEXP (reg_addend, 0);
7845 else
7846 return orig_x;
7847 if (!REG_P (reg_addend)
7848 && GET_CODE (reg_addend) != MULT
7849 && GET_CODE (reg_addend) != ASHIFT)
7850 return orig_x;
7851 }
7852 else
7853 return orig_x;
7854
7855 x = XEXP (XEXP (x, 1), 0);
7856 if (GET_CODE (x) == PLUS
7857 && CONST_INT_P (XEXP (x, 1)))
7858 {
7859 const_addend = XEXP (x, 1);
7860 x = XEXP (x, 0);
7861 }
7862
7863 if (GET_CODE (x) == UNSPEC
7864 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7865 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7866 result = XVECEXP (x, 0, 0);
7867
7868 if (TARGET_MACHO && darwin_local_data_pic (x)
7869 && !MEM_P (orig_x))
7870 result = XEXP (x, 0);
7871
7872 if (! result)
7873 return orig_x;
7874
7875 if (const_addend)
7876 result = gen_rtx_PLUS (Pmode, result, const_addend);
7877 if (reg_addend)
7878 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7879 return result;
7880 }
7881 \f
7882 static void
7883 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7884 int fp, FILE *file)
7885 {
7886 const char *suffix;
7887
7888 if (mode == CCFPmode || mode == CCFPUmode)
7889 {
7890 enum rtx_code second_code, bypass_code;
7891 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7892 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7893 code = ix86_fp_compare_code_to_integer (code);
7894 mode = CCmode;
7895 }
7896 if (reverse)
7897 code = reverse_condition (code);
7898
7899 switch (code)
7900 {
7901 case EQ:
7902 suffix = "e";
7903 break;
7904 case NE:
7905 suffix = "ne";
7906 break;
7907 case GT:
7908 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7909 suffix = "g";
7910 break;
7911 case GTU:
7912 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7913 Those same assemblers have the same but opposite lossage on cmov. */
7914 gcc_assert (mode == CCmode);
7915 suffix = fp ? "nbe" : "a";
7916 break;
7917 case LT:
7918 switch (mode)
7919 {
7920 case CCNOmode:
7921 case CCGOCmode:
7922 suffix = "s";
7923 break;
7924
7925 case CCmode:
7926 case CCGCmode:
7927 suffix = "l";
7928 break;
7929
7930 default:
7931 gcc_unreachable ();
7932 }
7933 break;
7934 case LTU:
7935 gcc_assert (mode == CCmode);
7936 suffix = "b";
7937 break;
7938 case GE:
7939 switch (mode)
7940 {
7941 case CCNOmode:
7942 case CCGOCmode:
7943 suffix = "ns";
7944 break;
7945
7946 case CCmode:
7947 case CCGCmode:
7948 suffix = "ge";
7949 break;
7950
7951 default:
7952 gcc_unreachable ();
7953 }
7954 break;
7955 case GEU:
7956 /* ??? As above. */
7957 gcc_assert (mode == CCmode);
7958 suffix = fp ? "nb" : "ae";
7959 break;
7960 case LE:
7961 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7962 suffix = "le";
7963 break;
7964 case LEU:
7965 gcc_assert (mode == CCmode);
7966 suffix = "be";
7967 break;
7968 case UNORDERED:
7969 suffix = fp ? "u" : "p";
7970 break;
7971 case ORDERED:
7972 suffix = fp ? "nu" : "np";
7973 break;
7974 default:
7975 gcc_unreachable ();
7976 }
7977 fputs (suffix, file);
7978 }
7979
7980 /* Print the name of register X to FILE based on its machine mode and number.
7981 If CODE is 'w', pretend the mode is HImode.
7982 If CODE is 'b', pretend the mode is QImode.
7983 If CODE is 'k', pretend the mode is SImode.
7984 If CODE is 'q', pretend the mode is DImode.
7985 If CODE is 'h', pretend the reg is the 'high' byte register.
7986 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7987
7988 void
7989 print_reg (rtx x, int code, FILE *file)
7990 {
7991 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7992 && REGNO (x) != FRAME_POINTER_REGNUM
7993 && REGNO (x) != FLAGS_REG
7994 && REGNO (x) != FPSR_REG
7995 && REGNO (x) != FPCR_REG);
7996
7997 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7998 putc ('%', file);
7999
8000 if (code == 'w' || MMX_REG_P (x))
8001 code = 2;
8002 else if (code == 'b')
8003 code = 1;
8004 else if (code == 'k')
8005 code = 4;
8006 else if (code == 'q')
8007 code = 8;
8008 else if (code == 'y')
8009 code = 3;
8010 else if (code == 'h')
8011 code = 0;
8012 else
8013 code = GET_MODE_SIZE (GET_MODE (x));
8014
8015 /* Irritatingly, AMD extended registers use different naming convention
8016 from the normal registers. */
8017 if (REX_INT_REG_P (x))
8018 {
8019 gcc_assert (TARGET_64BIT);
8020 switch (code)
8021 {
8022 case 0:
8023 error ("extended registers have no high halves");
8024 break;
8025 case 1:
8026 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8027 break;
8028 case 2:
8029 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8030 break;
8031 case 4:
8032 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8033 break;
8034 case 8:
8035 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8036 break;
8037 default:
8038 error ("unsupported operand size for extended register");
8039 break;
8040 }
8041 return;
8042 }
8043 switch (code)
8044 {
8045 case 3:
8046 if (STACK_TOP_P (x))
8047 {
8048 fputs ("st(0)", file);
8049 break;
8050 }
8051 /* FALLTHRU */
8052 case 8:
8053 case 4:
8054 case 12:
8055 if (! ANY_FP_REG_P (x))
8056 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8057 /* FALLTHRU */
8058 case 16:
8059 case 2:
8060 normal:
8061 fputs (hi_reg_name[REGNO (x)], file);
8062 break;
8063 case 1:
8064 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8065 goto normal;
8066 fputs (qi_reg_name[REGNO (x)], file);
8067 break;
8068 case 0:
8069 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8070 goto normal;
8071 fputs (qi_high_reg_name[REGNO (x)], file);
8072 break;
8073 default:
8074 gcc_unreachable ();
8075 }
8076 }
8077
8078 /* Locate some local-dynamic symbol still in use by this function
8079 so that we can print its name in some tls_local_dynamic_base
8080 pattern. */
8081
8082 static const char *
8083 get_some_local_dynamic_name (void)
8084 {
8085 rtx insn;
8086
8087 if (cfun->machine->some_ld_name)
8088 return cfun->machine->some_ld_name;
8089
8090 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8091 if (INSN_P (insn)
8092 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8093 return cfun->machine->some_ld_name;
8094
8095 gcc_unreachable ();
8096 }
8097
8098 static int
8099 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8100 {
8101 rtx x = *px;
8102
8103 if (GET_CODE (x) == SYMBOL_REF
8104 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8105 {
8106 cfun->machine->some_ld_name = XSTR (x, 0);
8107 return 1;
8108 }
8109
8110 return 0;
8111 }
8112
8113 /* Meaning of CODE:
8114 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8115 C -- print opcode suffix for set/cmov insn.
8116 c -- like C, but print reversed condition
8117 F,f -- likewise, but for floating-point.
8118 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8119 otherwise nothing
8120 R -- print the prefix for register names.
8121 z -- print the opcode suffix for the size of the current operand.
8122 * -- print a star (in certain assembler syntax)
8123 A -- print an absolute memory reference.
8124 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8125 s -- print a shift double count, followed by the assemblers argument
8126 delimiter.
8127 b -- print the QImode name of the register for the indicated operand.
8128 %b0 would print %al if operands[0] is reg 0.
8129 w -- likewise, print the HImode name of the register.
8130 k -- likewise, print the SImode name of the register.
8131 q -- likewise, print the DImode name of the register.
8132 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8133 y -- print "st(0)" instead of "st" as a register.
8134 D -- print condition for SSE cmp instruction.
8135 P -- if PIC, print an @PLT suffix.
8136 X -- don't print any sort of PIC '@' suffix for a symbol.
8137 & -- print some in-use local-dynamic symbol name.
8138 H -- print a memory address offset by 8; used for sse high-parts
8139 */
8140
8141 void
8142 print_operand (FILE *file, rtx x, int code)
8143 {
8144 if (code)
8145 {
8146 switch (code)
8147 {
8148 case '*':
8149 if (ASSEMBLER_DIALECT == ASM_ATT)
8150 putc ('*', file);
8151 return;
8152
8153 case '&':
8154 assemble_name (file, get_some_local_dynamic_name ());
8155 return;
8156
8157 case 'A':
8158 switch (ASSEMBLER_DIALECT)
8159 {
8160 case ASM_ATT:
8161 putc ('*', file);
8162 break;
8163
8164 case ASM_INTEL:
8165 /* Intel syntax. For absolute addresses, registers should not
8166 be surrounded by braces. */
8167 if (!REG_P (x))
8168 {
8169 putc ('[', file);
8170 PRINT_OPERAND (file, x, 0);
8171 putc (']', file);
8172 return;
8173 }
8174 break;
8175
8176 default:
8177 gcc_unreachable ();
8178 }
8179
8180 PRINT_OPERAND (file, x, 0);
8181 return;
8182
8183
8184 case 'L':
8185 if (ASSEMBLER_DIALECT == ASM_ATT)
8186 putc ('l', file);
8187 return;
8188
8189 case 'W':
8190 if (ASSEMBLER_DIALECT == ASM_ATT)
8191 putc ('w', file);
8192 return;
8193
8194 case 'B':
8195 if (ASSEMBLER_DIALECT == ASM_ATT)
8196 putc ('b', file);
8197 return;
8198
8199 case 'Q':
8200 if (ASSEMBLER_DIALECT == ASM_ATT)
8201 putc ('l', file);
8202 return;
8203
8204 case 'S':
8205 if (ASSEMBLER_DIALECT == ASM_ATT)
8206 putc ('s', file);
8207 return;
8208
8209 case 'T':
8210 if (ASSEMBLER_DIALECT == ASM_ATT)
8211 putc ('t', file);
8212 return;
8213
8214 case 'z':
8215 /* 387 opcodes don't get size suffixes if the operands are
8216 registers. */
8217 if (STACK_REG_P (x))
8218 return;
8219
8220 /* Likewise if using Intel opcodes. */
8221 if (ASSEMBLER_DIALECT == ASM_INTEL)
8222 return;
8223
8224 /* This is the size of op from size of operand. */
8225 switch (GET_MODE_SIZE (GET_MODE (x)))
8226 {
8227 case 1:
8228 putc ('b', file);
8229 return;
8230
8231 case 2:
8232 #ifdef HAVE_GAS_FILDS_FISTS
8233 putc ('s', file);
8234 #endif
8235 return;
8236
8237 case 4:
8238 if (GET_MODE (x) == SFmode)
8239 {
8240 putc ('s', file);
8241 return;
8242 }
8243 else
8244 putc ('l', file);
8245 return;
8246
8247 case 12:
8248 case 16:
8249 putc ('t', file);
8250 return;
8251
8252 case 8:
8253 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8254 {
8255 #ifdef GAS_MNEMONICS
8256 putc ('q', file);
8257 #else
8258 putc ('l', file);
8259 putc ('l', file);
8260 #endif
8261 }
8262 else
8263 putc ('l', file);
8264 return;
8265
8266 default:
8267 gcc_unreachable ();
8268 }
8269
8270 case 'b':
8271 case 'w':
8272 case 'k':
8273 case 'q':
8274 case 'h':
8275 case 'y':
8276 case 'X':
8277 case 'P':
8278 break;
8279
8280 case 's':
8281 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8282 {
8283 PRINT_OPERAND (file, x, 0);
8284 putc (',', file);
8285 }
8286 return;
8287
8288 case 'D':
8289 /* Little bit of braindamage here. The SSE compare instructions
8290 does use completely different names for the comparisons that the
8291 fp conditional moves. */
8292 switch (GET_CODE (x))
8293 {
8294 case EQ:
8295 case UNEQ:
8296 fputs ("eq", file);
8297 break;
8298 case LT:
8299 case UNLT:
8300 fputs ("lt", file);
8301 break;
8302 case LE:
8303 case UNLE:
8304 fputs ("le", file);
8305 break;
8306 case UNORDERED:
8307 fputs ("unord", file);
8308 break;
8309 case NE:
8310 case LTGT:
8311 fputs ("neq", file);
8312 break;
8313 case UNGE:
8314 case GE:
8315 fputs ("nlt", file);
8316 break;
8317 case UNGT:
8318 case GT:
8319 fputs ("nle", file);
8320 break;
8321 case ORDERED:
8322 fputs ("ord", file);
8323 break;
8324 default:
8325 gcc_unreachable ();
8326 }
8327 return;
8328 case 'O':
8329 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8330 if (ASSEMBLER_DIALECT == ASM_ATT)
8331 {
8332 switch (GET_MODE (x))
8333 {
8334 case HImode: putc ('w', file); break;
8335 case SImode:
8336 case SFmode: putc ('l', file); break;
8337 case DImode:
8338 case DFmode: putc ('q', file); break;
8339 default: gcc_unreachable ();
8340 }
8341 putc ('.', file);
8342 }
8343 #endif
8344 return;
8345 case 'C':
8346 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8347 return;
8348 case 'F':
8349 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8350 if (ASSEMBLER_DIALECT == ASM_ATT)
8351 putc ('.', file);
8352 #endif
8353 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8354 return;
8355
8356 /* Like above, but reverse condition */
8357 case 'c':
8358 /* Check to see if argument to %c is really a constant
8359 and not a condition code which needs to be reversed. */
8360 if (!COMPARISON_P (x))
8361 {
8362 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8363 return;
8364 }
8365 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8366 return;
8367 case 'f':
8368 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8369 if (ASSEMBLER_DIALECT == ASM_ATT)
8370 putc ('.', file);
8371 #endif
8372 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8373 return;
8374
8375 case 'H':
8376 /* It doesn't actually matter what mode we use here, as we're
8377 only going to use this for printing. */
8378 x = adjust_address_nv (x, DImode, 8);
8379 break;
8380
8381 case '+':
8382 {
8383 rtx x;
8384
8385 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8386 return;
8387
8388 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8389 if (x)
8390 {
8391 int pred_val = INTVAL (XEXP (x, 0));
8392
8393 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8394 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8395 {
8396 int taken = pred_val > REG_BR_PROB_BASE / 2;
8397 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8398
8399 /* Emit hints only in the case default branch prediction
8400 heuristics would fail. */
8401 if (taken != cputaken)
8402 {
8403 /* We use 3e (DS) prefix for taken branches and
8404 2e (CS) prefix for not taken branches. */
8405 if (taken)
8406 fputs ("ds ; ", file);
8407 else
8408 fputs ("cs ; ", file);
8409 }
8410 }
8411 }
8412 return;
8413 }
8414 default:
8415 output_operand_lossage ("invalid operand code '%c'", code);
8416 }
8417 }
8418
8419 if (REG_P (x))
8420 print_reg (x, code, file);
8421
8422 else if (MEM_P (x))
8423 {
8424 /* No `byte ptr' prefix for call instructions. */
8425 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8426 {
8427 const char * size;
8428 switch (GET_MODE_SIZE (GET_MODE (x)))
8429 {
8430 case 1: size = "BYTE"; break;
8431 case 2: size = "WORD"; break;
8432 case 4: size = "DWORD"; break;
8433 case 8: size = "QWORD"; break;
8434 case 12: size = "XWORD"; break;
8435 case 16: size = "XMMWORD"; break;
8436 default:
8437 gcc_unreachable ();
8438 }
8439
8440 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8441 if (code == 'b')
8442 size = "BYTE";
8443 else if (code == 'w')
8444 size = "WORD";
8445 else if (code == 'k')
8446 size = "DWORD";
8447
8448 fputs (size, file);
8449 fputs (" PTR ", file);
8450 }
8451
8452 x = XEXP (x, 0);
8453 /* Avoid (%rip) for call operands. */
8454 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8455 && !CONST_INT_P (x))
8456 output_addr_const (file, x);
8457 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8458 output_operand_lossage ("invalid constraints for operand");
8459 else
8460 output_address (x);
8461 }
8462
8463 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8464 {
8465 REAL_VALUE_TYPE r;
8466 long l;
8467
8468 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8469 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8470
8471 if (ASSEMBLER_DIALECT == ASM_ATT)
8472 putc ('$', file);
8473 fprintf (file, "0x%08lx", l);
8474 }
8475
8476 /* These float cases don't actually occur as immediate operands. */
8477 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8478 {
8479 char dstr[30];
8480
8481 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8482 fprintf (file, "%s", dstr);
8483 }
8484
8485 else if (GET_CODE (x) == CONST_DOUBLE
8486 && GET_MODE (x) == XFmode)
8487 {
8488 char dstr[30];
8489
8490 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8491 fprintf (file, "%s", dstr);
8492 }
8493
8494 else
8495 {
8496 /* We have patterns that allow zero sets of memory, for instance.
8497 In 64-bit mode, we should probably support all 8-byte vectors,
8498 since we can in fact encode that into an immediate. */
8499 if (GET_CODE (x) == CONST_VECTOR)
8500 {
8501 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8502 x = const0_rtx;
8503 }
8504
8505 if (code != 'P')
8506 {
8507 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8508 {
8509 if (ASSEMBLER_DIALECT == ASM_ATT)
8510 putc ('$', file);
8511 }
8512 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8513 || GET_CODE (x) == LABEL_REF)
8514 {
8515 if (ASSEMBLER_DIALECT == ASM_ATT)
8516 putc ('$', file);
8517 else
8518 fputs ("OFFSET FLAT:", file);
8519 }
8520 }
8521 if (CONST_INT_P (x))
8522 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8523 else if (flag_pic)
8524 output_pic_addr_const (file, x, code);
8525 else
8526 output_addr_const (file, x);
8527 }
8528 }
8529 \f
8530 /* Print a memory operand whose address is ADDR. */
8531
8532 void
8533 print_operand_address (FILE *file, rtx addr)
8534 {
8535 struct ix86_address parts;
8536 rtx base, index, disp;
8537 int scale;
8538 int ok = ix86_decompose_address (addr, &parts);
8539
8540 gcc_assert (ok);
8541
8542 base = parts.base;
8543 index = parts.index;
8544 disp = parts.disp;
8545 scale = parts.scale;
8546
8547 switch (parts.seg)
8548 {
8549 case SEG_DEFAULT:
8550 break;
8551 case SEG_FS:
8552 case SEG_GS:
8553 if (USER_LABEL_PREFIX[0] == 0)
8554 putc ('%', file);
8555 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8556 break;
8557 default:
8558 gcc_unreachable ();
8559 }
8560
8561 if (!base && !index)
8562 {
8563 /* Displacement only requires special attention. */
8564
8565 if (CONST_INT_P (disp))
8566 {
8567 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8568 {
8569 if (USER_LABEL_PREFIX[0] == 0)
8570 putc ('%', file);
8571 fputs ("ds:", file);
8572 }
8573 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8574 }
8575 else if (flag_pic)
8576 output_pic_addr_const (file, disp, 0);
8577 else
8578 output_addr_const (file, disp);
8579
8580 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8581 if (TARGET_64BIT)
8582 {
8583 if (GET_CODE (disp) == CONST
8584 && GET_CODE (XEXP (disp, 0)) == PLUS
8585 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8586 disp = XEXP (XEXP (disp, 0), 0);
8587 if (GET_CODE (disp) == LABEL_REF
8588 || (GET_CODE (disp) == SYMBOL_REF
8589 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8590 fputs ("(%rip)", file);
8591 }
8592 }
8593 else
8594 {
8595 if (ASSEMBLER_DIALECT == ASM_ATT)
8596 {
8597 if (disp)
8598 {
8599 if (flag_pic)
8600 output_pic_addr_const (file, disp, 0);
8601 else if (GET_CODE (disp) == LABEL_REF)
8602 output_asm_label (disp);
8603 else
8604 output_addr_const (file, disp);
8605 }
8606
8607 putc ('(', file);
8608 if (base)
8609 print_reg (base, 0, file);
8610 if (index)
8611 {
8612 putc (',', file);
8613 print_reg (index, 0, file);
8614 if (scale != 1)
8615 fprintf (file, ",%d", scale);
8616 }
8617 putc (')', file);
8618 }
8619 else
8620 {
8621 rtx offset = NULL_RTX;
8622
8623 if (disp)
8624 {
8625 /* Pull out the offset of a symbol; print any symbol itself. */
8626 if (GET_CODE (disp) == CONST
8627 && GET_CODE (XEXP (disp, 0)) == PLUS
8628 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8629 {
8630 offset = XEXP (XEXP (disp, 0), 1);
8631 disp = gen_rtx_CONST (VOIDmode,
8632 XEXP (XEXP (disp, 0), 0));
8633 }
8634
8635 if (flag_pic)
8636 output_pic_addr_const (file, disp, 0);
8637 else if (GET_CODE (disp) == LABEL_REF)
8638 output_asm_label (disp);
8639 else if (CONST_INT_P (disp))
8640 offset = disp;
8641 else
8642 output_addr_const (file, disp);
8643 }
8644
8645 putc ('[', file);
8646 if (base)
8647 {
8648 print_reg (base, 0, file);
8649 if (offset)
8650 {
8651 if (INTVAL (offset) >= 0)
8652 putc ('+', file);
8653 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8654 }
8655 }
8656 else if (offset)
8657 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8658 else
8659 putc ('0', file);
8660
8661 if (index)
8662 {
8663 putc ('+', file);
8664 print_reg (index, 0, file);
8665 if (scale != 1)
8666 fprintf (file, "*%d", scale);
8667 }
8668 putc (']', file);
8669 }
8670 }
8671 }
8672
8673 bool
8674 output_addr_const_extra (FILE *file, rtx x)
8675 {
8676 rtx op;
8677
8678 if (GET_CODE (x) != UNSPEC)
8679 return false;
8680
8681 op = XVECEXP (x, 0, 0);
8682 switch (XINT (x, 1))
8683 {
8684 case UNSPEC_GOTTPOFF:
8685 output_addr_const (file, op);
8686 /* FIXME: This might be @TPOFF in Sun ld. */
8687 fputs ("@GOTTPOFF", file);
8688 break;
8689 case UNSPEC_TPOFF:
8690 output_addr_const (file, op);
8691 fputs ("@TPOFF", file);
8692 break;
8693 case UNSPEC_NTPOFF:
8694 output_addr_const (file, op);
8695 if (TARGET_64BIT)
8696 fputs ("@TPOFF", file);
8697 else
8698 fputs ("@NTPOFF", file);
8699 break;
8700 case UNSPEC_DTPOFF:
8701 output_addr_const (file, op);
8702 fputs ("@DTPOFF", file);
8703 break;
8704 case UNSPEC_GOTNTPOFF:
8705 output_addr_const (file, op);
8706 if (TARGET_64BIT)
8707 fputs ("@GOTTPOFF(%rip)", file);
8708 else
8709 fputs ("@GOTNTPOFF", file);
8710 break;
8711 case UNSPEC_INDNTPOFF:
8712 output_addr_const (file, op);
8713 fputs ("@INDNTPOFF", file);
8714 break;
8715
8716 default:
8717 return false;
8718 }
8719
8720 return true;
8721 }
8722 \f
8723 /* Split one or more DImode RTL references into pairs of SImode
8724 references. The RTL can be REG, offsettable MEM, integer constant, or
8725 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8726 split and "num" is its length. lo_half and hi_half are output arrays
8727 that parallel "operands". */
8728
8729 void
8730 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8731 {
8732 while (num--)
8733 {
8734 rtx op = operands[num];
8735
8736 /* simplify_subreg refuse to split volatile memory addresses,
8737 but we still have to handle it. */
8738 if (MEM_P (op))
8739 {
8740 lo_half[num] = adjust_address (op, SImode, 0);
8741 hi_half[num] = adjust_address (op, SImode, 4);
8742 }
8743 else
8744 {
8745 lo_half[num] = simplify_gen_subreg (SImode, op,
8746 GET_MODE (op) == VOIDmode
8747 ? DImode : GET_MODE (op), 0);
8748 hi_half[num] = simplify_gen_subreg (SImode, op,
8749 GET_MODE (op) == VOIDmode
8750 ? DImode : GET_MODE (op), 4);
8751 }
8752 }
8753 }
8754 /* Split one or more TImode RTL references into pairs of DImode
8755 references. The RTL can be REG, offsettable MEM, integer constant, or
8756 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8757 split and "num" is its length. lo_half and hi_half are output arrays
8758 that parallel "operands". */
8759
8760 void
8761 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8762 {
8763 while (num--)
8764 {
8765 rtx op = operands[num];
8766
8767 /* simplify_subreg refuse to split volatile memory addresses, but we
8768 still have to handle it. */
8769 if (MEM_P (op))
8770 {
8771 lo_half[num] = adjust_address (op, DImode, 0);
8772 hi_half[num] = adjust_address (op, DImode, 8);
8773 }
8774 else
8775 {
8776 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8777 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8778 }
8779 }
8780 }
8781 \f
8782 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8783 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8784 is the expression of the binary operation. The output may either be
8785 emitted here, or returned to the caller, like all output_* functions.
8786
8787 There is no guarantee that the operands are the same mode, as they
8788 might be within FLOAT or FLOAT_EXTEND expressions. */
8789
8790 #ifndef SYSV386_COMPAT
8791 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8792 wants to fix the assemblers because that causes incompatibility
8793 with gcc. No-one wants to fix gcc because that causes
8794 incompatibility with assemblers... You can use the option of
8795 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8796 #define SYSV386_COMPAT 1
8797 #endif
8798
8799 const char *
8800 output_387_binary_op (rtx insn, rtx *operands)
8801 {
8802 static char buf[30];
8803 const char *p;
8804 const char *ssep;
8805 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8806
8807 #ifdef ENABLE_CHECKING
8808 /* Even if we do not want to check the inputs, this documents input
8809 constraints. Which helps in understanding the following code. */
8810 if (STACK_REG_P (operands[0])
8811 && ((REG_P (operands[1])
8812 && REGNO (operands[0]) == REGNO (operands[1])
8813 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8814 || (REG_P (operands[2])
8815 && REGNO (operands[0]) == REGNO (operands[2])
8816 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8817 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8818 ; /* ok */
8819 else
8820 gcc_assert (is_sse);
8821 #endif
8822
8823 switch (GET_CODE (operands[3]))
8824 {
8825 case PLUS:
8826 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8827 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8828 p = "fiadd";
8829 else
8830 p = "fadd";
8831 ssep = "add";
8832 break;
8833
8834 case MINUS:
8835 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8836 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8837 p = "fisub";
8838 else
8839 p = "fsub";
8840 ssep = "sub";
8841 break;
8842
8843 case MULT:
8844 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8845 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8846 p = "fimul";
8847 else
8848 p = "fmul";
8849 ssep = "mul";
8850 break;
8851
8852 case DIV:
8853 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8854 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8855 p = "fidiv";
8856 else
8857 p = "fdiv";
8858 ssep = "div";
8859 break;
8860
8861 default:
8862 gcc_unreachable ();
8863 }
8864
8865 if (is_sse)
8866 {
8867 strcpy (buf, ssep);
8868 if (GET_MODE (operands[0]) == SFmode)
8869 strcat (buf, "ss\t{%2, %0|%0, %2}");
8870 else
8871 strcat (buf, "sd\t{%2, %0|%0, %2}");
8872 return buf;
8873 }
8874 strcpy (buf, p);
8875
8876 switch (GET_CODE (operands[3]))
8877 {
8878 case MULT:
8879 case PLUS:
8880 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8881 {
8882 rtx temp = operands[2];
8883 operands[2] = operands[1];
8884 operands[1] = temp;
8885 }
8886
8887 /* know operands[0] == operands[1]. */
8888
8889 if (MEM_P (operands[2]))
8890 {
8891 p = "%z2\t%2";
8892 break;
8893 }
8894
8895 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8896 {
8897 if (STACK_TOP_P (operands[0]))
8898 /* How is it that we are storing to a dead operand[2]?
8899 Well, presumably operands[1] is dead too. We can't
8900 store the result to st(0) as st(0) gets popped on this
8901 instruction. Instead store to operands[2] (which I
8902 think has to be st(1)). st(1) will be popped later.
8903 gcc <= 2.8.1 didn't have this check and generated
8904 assembly code that the Unixware assembler rejected. */
8905 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8906 else
8907 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8908 break;
8909 }
8910
8911 if (STACK_TOP_P (operands[0]))
8912 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8913 else
8914 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8915 break;
8916
8917 case MINUS:
8918 case DIV:
8919 if (MEM_P (operands[1]))
8920 {
8921 p = "r%z1\t%1";
8922 break;
8923 }
8924
8925 if (MEM_P (operands[2]))
8926 {
8927 p = "%z2\t%2";
8928 break;
8929 }
8930
8931 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8932 {
8933 #if SYSV386_COMPAT
8934 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8935 derived assemblers, confusingly reverse the direction of
8936 the operation for fsub{r} and fdiv{r} when the
8937 destination register is not st(0). The Intel assembler
8938 doesn't have this brain damage. Read !SYSV386_COMPAT to
8939 figure out what the hardware really does. */
8940 if (STACK_TOP_P (operands[0]))
8941 p = "{p\t%0, %2|rp\t%2, %0}";
8942 else
8943 p = "{rp\t%2, %0|p\t%0, %2}";
8944 #else
8945 if (STACK_TOP_P (operands[0]))
8946 /* As above for fmul/fadd, we can't store to st(0). */
8947 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8948 else
8949 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8950 #endif
8951 break;
8952 }
8953
8954 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8955 {
8956 #if SYSV386_COMPAT
8957 if (STACK_TOP_P (operands[0]))
8958 p = "{rp\t%0, %1|p\t%1, %0}";
8959 else
8960 p = "{p\t%1, %0|rp\t%0, %1}";
8961 #else
8962 if (STACK_TOP_P (operands[0]))
8963 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8964 else
8965 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8966 #endif
8967 break;
8968 }
8969
8970 if (STACK_TOP_P (operands[0]))
8971 {
8972 if (STACK_TOP_P (operands[1]))
8973 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8974 else
8975 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8976 break;
8977 }
8978 else if (STACK_TOP_P (operands[1]))
8979 {
8980 #if SYSV386_COMPAT
8981 p = "{\t%1, %0|r\t%0, %1}";
8982 #else
8983 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8984 #endif
8985 }
8986 else
8987 {
8988 #if SYSV386_COMPAT
8989 p = "{r\t%2, %0|\t%0, %2}";
8990 #else
8991 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8992 #endif
8993 }
8994 break;
8995
8996 default:
8997 gcc_unreachable ();
8998 }
8999
9000 strcat (buf, p);
9001 return buf;
9002 }
9003
9004 /* Return needed mode for entity in optimize_mode_switching pass. */
9005
9006 int
9007 ix86_mode_needed (int entity, rtx insn)
9008 {
9009 enum attr_i387_cw mode;
9010
9011 /* The mode UNINITIALIZED is used to store control word after a
9012 function call or ASM pattern. The mode ANY specify that function
9013 has no requirements on the control word and make no changes in the
9014 bits we are interested in. */
9015
9016 if (CALL_P (insn)
9017 || (NONJUMP_INSN_P (insn)
9018 && (asm_noperands (PATTERN (insn)) >= 0
9019 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9020 return I387_CW_UNINITIALIZED;
9021
9022 if (recog_memoized (insn) < 0)
9023 return I387_CW_ANY;
9024
9025 mode = get_attr_i387_cw (insn);
9026
9027 switch (entity)
9028 {
9029 case I387_TRUNC:
9030 if (mode == I387_CW_TRUNC)
9031 return mode;
9032 break;
9033
9034 case I387_FLOOR:
9035 if (mode == I387_CW_FLOOR)
9036 return mode;
9037 break;
9038
9039 case I387_CEIL:
9040 if (mode == I387_CW_CEIL)
9041 return mode;
9042 break;
9043
9044 case I387_MASK_PM:
9045 if (mode == I387_CW_MASK_PM)
9046 return mode;
9047 break;
9048
9049 default:
9050 gcc_unreachable ();
9051 }
9052
9053 return I387_CW_ANY;
9054 }
9055
9056 /* Output code to initialize control word copies used by trunc?f?i and
9057 rounding patterns. CURRENT_MODE is set to current control word,
9058 while NEW_MODE is set to new control word. */
9059
9060 void
9061 emit_i387_cw_initialization (int mode)
9062 {
9063 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9064 rtx new_mode;
9065
9066 int slot;
9067
9068 rtx reg = gen_reg_rtx (HImode);
9069
9070 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9071 emit_move_insn (reg, copy_rtx (stored_mode));
9072
9073 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9074 {
9075 switch (mode)
9076 {
9077 case I387_CW_TRUNC:
9078 /* round toward zero (truncate) */
9079 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9080 slot = SLOT_CW_TRUNC;
9081 break;
9082
9083 case I387_CW_FLOOR:
9084 /* round down toward -oo */
9085 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9086 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9087 slot = SLOT_CW_FLOOR;
9088 break;
9089
9090 case I387_CW_CEIL:
9091 /* round up toward +oo */
9092 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9093 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9094 slot = SLOT_CW_CEIL;
9095 break;
9096
9097 case I387_CW_MASK_PM:
9098 /* mask precision exception for nearbyint() */
9099 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9100 slot = SLOT_CW_MASK_PM;
9101 break;
9102
9103 default:
9104 gcc_unreachable ();
9105 }
9106 }
9107 else
9108 {
9109 switch (mode)
9110 {
9111 case I387_CW_TRUNC:
9112 /* round toward zero (truncate) */
9113 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9114 slot = SLOT_CW_TRUNC;
9115 break;
9116
9117 case I387_CW_FLOOR:
9118 /* round down toward -oo */
9119 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9120 slot = SLOT_CW_FLOOR;
9121 break;
9122
9123 case I387_CW_CEIL:
9124 /* round up toward +oo */
9125 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9126 slot = SLOT_CW_CEIL;
9127 break;
9128
9129 case I387_CW_MASK_PM:
9130 /* mask precision exception for nearbyint() */
9131 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9132 slot = SLOT_CW_MASK_PM;
9133 break;
9134
9135 default:
9136 gcc_unreachable ();
9137 }
9138 }
9139
9140 gcc_assert (slot < MAX_386_STACK_LOCALS);
9141
9142 new_mode = assign_386_stack_local (HImode, slot);
9143 emit_move_insn (new_mode, reg);
9144 }
9145
9146 /* Output code for INSN to convert a float to a signed int. OPERANDS
9147 are the insn operands. The output may be [HSD]Imode and the input
9148 operand may be [SDX]Fmode. */
9149
9150 const char *
9151 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9152 {
9153 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9154 int dimode_p = GET_MODE (operands[0]) == DImode;
9155 int round_mode = get_attr_i387_cw (insn);
9156
9157 /* Jump through a hoop or two for DImode, since the hardware has no
9158 non-popping instruction. We used to do this a different way, but
9159 that was somewhat fragile and broke with post-reload splitters. */
9160 if ((dimode_p || fisttp) && !stack_top_dies)
9161 output_asm_insn ("fld\t%y1", operands);
9162
9163 gcc_assert (STACK_TOP_P (operands[1]));
9164 gcc_assert (MEM_P (operands[0]));
9165
9166 if (fisttp)
9167 output_asm_insn ("fisttp%z0\t%0", operands);
9168 else
9169 {
9170 if (round_mode != I387_CW_ANY)
9171 output_asm_insn ("fldcw\t%3", operands);
9172 if (stack_top_dies || dimode_p)
9173 output_asm_insn ("fistp%z0\t%0", operands);
9174 else
9175 output_asm_insn ("fist%z0\t%0", operands);
9176 if (round_mode != I387_CW_ANY)
9177 output_asm_insn ("fldcw\t%2", operands);
9178 }
9179
9180 return "";
9181 }
9182
9183 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9184 have the values zero or one, indicates the ffreep insn's operand
9185 from the OPERANDS array. */
9186
9187 static const char *
9188 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9189 {
9190 if (TARGET_USE_FFREEP)
9191 #if HAVE_AS_IX86_FFREEP
9192 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9193 #else
9194 {
9195 static char retval[] = ".word\t0xc_df";
9196 int regno = REGNO (operands[opno]);
9197
9198 gcc_assert (FP_REGNO_P (regno));
9199
9200 retval[9] = '0' + (regno - FIRST_STACK_REG);
9201 return retval;
9202 }
9203 #endif
9204
9205 return opno ? "fstp\t%y1" : "fstp\t%y0";
9206 }
9207
9208
9209 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9210 should be used. UNORDERED_P is true when fucom should be used. */
9211
9212 const char *
9213 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9214 {
9215 int stack_top_dies;
9216 rtx cmp_op0, cmp_op1;
9217 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9218
9219 if (eflags_p)
9220 {
9221 cmp_op0 = operands[0];
9222 cmp_op1 = operands[1];
9223 }
9224 else
9225 {
9226 cmp_op0 = operands[1];
9227 cmp_op1 = operands[2];
9228 }
9229
9230 if (is_sse)
9231 {
9232 if (GET_MODE (operands[0]) == SFmode)
9233 if (unordered_p)
9234 return "ucomiss\t{%1, %0|%0, %1}";
9235 else
9236 return "comiss\t{%1, %0|%0, %1}";
9237 else
9238 if (unordered_p)
9239 return "ucomisd\t{%1, %0|%0, %1}";
9240 else
9241 return "comisd\t{%1, %0|%0, %1}";
9242 }
9243
9244 gcc_assert (STACK_TOP_P (cmp_op0));
9245
9246 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9247
9248 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9249 {
9250 if (stack_top_dies)
9251 {
9252 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9253 return output_387_ffreep (operands, 1);
9254 }
9255 else
9256 return "ftst\n\tfnstsw\t%0";
9257 }
9258
9259 if (STACK_REG_P (cmp_op1)
9260 && stack_top_dies
9261 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9262 && REGNO (cmp_op1) != FIRST_STACK_REG)
9263 {
9264 /* If both the top of the 387 stack dies, and the other operand
9265 is also a stack register that dies, then this must be a
9266 `fcompp' float compare */
9267
9268 if (eflags_p)
9269 {
9270 /* There is no double popping fcomi variant. Fortunately,
9271 eflags is immune from the fstp's cc clobbering. */
9272 if (unordered_p)
9273 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9274 else
9275 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9276 return output_387_ffreep (operands, 0);
9277 }
9278 else
9279 {
9280 if (unordered_p)
9281 return "fucompp\n\tfnstsw\t%0";
9282 else
9283 return "fcompp\n\tfnstsw\t%0";
9284 }
9285 }
9286 else
9287 {
9288 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9289
9290 static const char * const alt[16] =
9291 {
9292 "fcom%z2\t%y2\n\tfnstsw\t%0",
9293 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9294 "fucom%z2\t%y2\n\tfnstsw\t%0",
9295 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9296
9297 "ficom%z2\t%y2\n\tfnstsw\t%0",
9298 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9299 NULL,
9300 NULL,
9301
9302 "fcomi\t{%y1, %0|%0, %y1}",
9303 "fcomip\t{%y1, %0|%0, %y1}",
9304 "fucomi\t{%y1, %0|%0, %y1}",
9305 "fucomip\t{%y1, %0|%0, %y1}",
9306
9307 NULL,
9308 NULL,
9309 NULL,
9310 NULL
9311 };
9312
9313 int mask;
9314 const char *ret;
9315
9316 mask = eflags_p << 3;
9317 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9318 mask |= unordered_p << 1;
9319 mask |= stack_top_dies;
9320
9321 gcc_assert (mask < 16);
9322 ret = alt[mask];
9323 gcc_assert (ret);
9324
9325 return ret;
9326 }
9327 }
9328
9329 void
9330 ix86_output_addr_vec_elt (FILE *file, int value)
9331 {
9332 const char *directive = ASM_LONG;
9333
9334 #ifdef ASM_QUAD
9335 if (TARGET_64BIT)
9336 directive = ASM_QUAD;
9337 #else
9338 gcc_assert (!TARGET_64BIT);
9339 #endif
9340
9341 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9342 }
9343
9344 void
9345 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9346 {
9347 if (TARGET_64BIT)
9348 fprintf (file, "%s%s%d-%s%d\n",
9349 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9350 else if (HAVE_AS_GOTOFF_IN_DATA)
9351 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9352 #if TARGET_MACHO
9353 else if (TARGET_MACHO)
9354 {
9355 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9356 machopic_output_function_base_name (file);
9357 fprintf(file, "\n");
9358 }
9359 #endif
9360 else
9361 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9362 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9363 }
9364 \f
9365 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9366 for the target. */
9367
9368 void
9369 ix86_expand_clear (rtx dest)
9370 {
9371 rtx tmp;
9372
9373 /* We play register width games, which are only valid after reload. */
9374 gcc_assert (reload_completed);
9375
9376 /* Avoid HImode and its attendant prefix byte. */
9377 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9378 dest = gen_rtx_REG (SImode, REGNO (dest));
9379
9380 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9381
9382 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9383 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9384 {
9385 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9386 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9387 }
9388
9389 emit_insn (tmp);
9390 }
9391
9392 /* X is an unchanging MEM. If it is a constant pool reference, return
9393 the constant pool rtx, else NULL. */
9394
9395 rtx
9396 maybe_get_pool_constant (rtx x)
9397 {
9398 x = ix86_delegitimize_address (XEXP (x, 0));
9399
9400 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9401 return get_pool_constant (x);
9402
9403 return NULL_RTX;
9404 }
9405
9406 void
9407 ix86_expand_move (enum machine_mode mode, rtx operands[])
9408 {
9409 int strict = (reload_in_progress || reload_completed);
9410 rtx op0, op1;
9411 enum tls_model model;
9412
9413 op0 = operands[0];
9414 op1 = operands[1];
9415
9416 if (GET_CODE (op1) == SYMBOL_REF)
9417 {
9418 model = SYMBOL_REF_TLS_MODEL (op1);
9419 if (model)
9420 {
9421 op1 = legitimize_tls_address (op1, model, true);
9422 op1 = force_operand (op1, op0);
9423 if (op1 == op0)
9424 return;
9425 }
9426 }
9427 else if (GET_CODE (op1) == CONST
9428 && GET_CODE (XEXP (op1, 0)) == PLUS
9429 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9430 {
9431 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9432 if (model)
9433 {
9434 rtx addend = XEXP (XEXP (op1, 0), 1);
9435 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9436 op1 = force_operand (op1, NULL);
9437 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9438 op0, 1, OPTAB_DIRECT);
9439 if (op1 == op0)
9440 return;
9441 }
9442 }
9443
9444 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9445 {
9446 if (TARGET_MACHO && !TARGET_64BIT)
9447 {
9448 #if TARGET_MACHO
9449 if (MACHOPIC_PURE)
9450 {
9451 rtx temp = ((reload_in_progress
9452 || ((op0 && REG_P (op0))
9453 && mode == Pmode))
9454 ? op0 : gen_reg_rtx (Pmode));
9455 op1 = machopic_indirect_data_reference (op1, temp);
9456 op1 = machopic_legitimize_pic_address (op1, mode,
9457 temp == op1 ? 0 : temp);
9458 }
9459 else if (MACHOPIC_INDIRECT)
9460 op1 = machopic_indirect_data_reference (op1, 0);
9461 if (op0 == op1)
9462 return;
9463 #endif
9464 }
9465 else
9466 {
9467 if (MEM_P (op0))
9468 op1 = force_reg (Pmode, op1);
9469 else
9470 op1 = legitimize_address (op1, op1, Pmode);
9471 }
9472 }
9473 else
9474 {
9475 if (MEM_P (op0)
9476 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9477 || !push_operand (op0, mode))
9478 && MEM_P (op1))
9479 op1 = force_reg (mode, op1);
9480
9481 if (push_operand (op0, mode)
9482 && ! general_no_elim_operand (op1, mode))
9483 op1 = copy_to_mode_reg (mode, op1);
9484
9485 /* Force large constants in 64bit compilation into register
9486 to get them CSEed. */
9487 if (TARGET_64BIT && mode == DImode
9488 && immediate_operand (op1, mode)
9489 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9490 && !register_operand (op0, mode)
9491 && optimize && !reload_completed && !reload_in_progress)
9492 op1 = copy_to_mode_reg (mode, op1);
9493
9494 if (FLOAT_MODE_P (mode))
9495 {
9496 /* If we are loading a floating point constant to a register,
9497 force the value to memory now, since we'll get better code
9498 out the back end. */
9499
9500 if (strict)
9501 ;
9502 else if (GET_CODE (op1) == CONST_DOUBLE)
9503 {
9504 op1 = validize_mem (force_const_mem (mode, op1));
9505 if (!register_operand (op0, mode))
9506 {
9507 rtx temp = gen_reg_rtx (mode);
9508 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9509 emit_move_insn (op0, temp);
9510 return;
9511 }
9512 }
9513 }
9514 }
9515
9516 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9517 }
9518
9519 void
9520 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9521 {
9522 rtx op0 = operands[0], op1 = operands[1];
9523
9524 /* Force constants other than zero into memory. We do not know how
9525 the instructions used to build constants modify the upper 64 bits
9526 of the register, once we have that information we may be able
9527 to handle some of them more efficiently. */
9528 if ((reload_in_progress | reload_completed) == 0
9529 && register_operand (op0, mode)
9530 && CONSTANT_P (op1)
9531 && standard_sse_constant_p (op1) <= 0)
9532 op1 = validize_mem (force_const_mem (mode, op1));
9533
9534 /* Make operand1 a register if it isn't already. */
9535 if (!no_new_pseudos
9536 && !register_operand (op0, mode)
9537 && !register_operand (op1, mode))
9538 {
9539 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9540 return;
9541 }
9542
9543 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9544 }
9545
9546 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9547 straight to ix86_expand_vector_move. */
9548 /* Code generation for scalar reg-reg moves of single and double precision data:
9549 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9550 movaps reg, reg
9551 else
9552 movss reg, reg
9553 if (x86_sse_partial_reg_dependency == true)
9554 movapd reg, reg
9555 else
9556 movsd reg, reg
9557
9558 Code generation for scalar loads of double precision data:
9559 if (x86_sse_split_regs == true)
9560 movlpd mem, reg (gas syntax)
9561 else
9562 movsd mem, reg
9563
9564 Code generation for unaligned packed loads of single precision data
9565 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9566 if (x86_sse_unaligned_move_optimal)
9567 movups mem, reg
9568
9569 if (x86_sse_partial_reg_dependency == true)
9570 {
9571 xorps reg, reg
9572 movlps mem, reg
9573 movhps mem+8, reg
9574 }
9575 else
9576 {
9577 movlps mem, reg
9578 movhps mem+8, reg
9579 }
9580
9581 Code generation for unaligned packed loads of double precision data
9582 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9583 if (x86_sse_unaligned_move_optimal)
9584 movupd mem, reg
9585
9586 if (x86_sse_split_regs == true)
9587 {
9588 movlpd mem, reg
9589 movhpd mem+8, reg
9590 }
9591 else
9592 {
9593 movsd mem, reg
9594 movhpd mem+8, reg
9595 }
9596 */
9597
9598 void
9599 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9600 {
9601 rtx op0, op1, m;
9602
9603 op0 = operands[0];
9604 op1 = operands[1];
9605
9606 if (MEM_P (op1))
9607 {
9608 /* If we're optimizing for size, movups is the smallest. */
9609 if (optimize_size)
9610 {
9611 op0 = gen_lowpart (V4SFmode, op0);
9612 op1 = gen_lowpart (V4SFmode, op1);
9613 emit_insn (gen_sse_movups (op0, op1));
9614 return;
9615 }
9616
9617 /* ??? If we have typed data, then it would appear that using
9618 movdqu is the only way to get unaligned data loaded with
9619 integer type. */
9620 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9621 {
9622 op0 = gen_lowpart (V16QImode, op0);
9623 op1 = gen_lowpart (V16QImode, op1);
9624 emit_insn (gen_sse2_movdqu (op0, op1));
9625 return;
9626 }
9627
9628 if (TARGET_SSE2 && mode == V2DFmode)
9629 {
9630 rtx zero;
9631
9632 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9633 {
9634 op0 = gen_lowpart (V2DFmode, op0);
9635 op1 = gen_lowpart (V2DFmode, op1);
9636 emit_insn (gen_sse2_movupd (op0, op1));
9637 return;
9638 }
9639
9640 /* When SSE registers are split into halves, we can avoid
9641 writing to the top half twice. */
9642 if (TARGET_SSE_SPLIT_REGS)
9643 {
9644 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9645 zero = op0;
9646 }
9647 else
9648 {
9649 /* ??? Not sure about the best option for the Intel chips.
9650 The following would seem to satisfy; the register is
9651 entirely cleared, breaking the dependency chain. We
9652 then store to the upper half, with a dependency depth
9653 of one. A rumor has it that Intel recommends two movsd
9654 followed by an unpacklpd, but this is unconfirmed. And
9655 given that the dependency depth of the unpacklpd would
9656 still be one, I'm not sure why this would be better. */
9657 zero = CONST0_RTX (V2DFmode);
9658 }
9659
9660 m = adjust_address (op1, DFmode, 0);
9661 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9662 m = adjust_address (op1, DFmode, 8);
9663 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9664 }
9665 else
9666 {
9667 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9668 {
9669 op0 = gen_lowpart (V4SFmode, op0);
9670 op1 = gen_lowpart (V4SFmode, op1);
9671 emit_insn (gen_sse_movups (op0, op1));
9672 return;
9673 }
9674
9675 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9676 emit_move_insn (op0, CONST0_RTX (mode));
9677 else
9678 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9679
9680 if (mode != V4SFmode)
9681 op0 = gen_lowpart (V4SFmode, op0);
9682 m = adjust_address (op1, V2SFmode, 0);
9683 emit_insn (gen_sse_loadlps (op0, op0, m));
9684 m = adjust_address (op1, V2SFmode, 8);
9685 emit_insn (gen_sse_loadhps (op0, op0, m));
9686 }
9687 }
9688 else if (MEM_P (op0))
9689 {
9690 /* If we're optimizing for size, movups is the smallest. */
9691 if (optimize_size)
9692 {
9693 op0 = gen_lowpart (V4SFmode, op0);
9694 op1 = gen_lowpart (V4SFmode, op1);
9695 emit_insn (gen_sse_movups (op0, op1));
9696 return;
9697 }
9698
9699 /* ??? Similar to above, only less clear because of quote
9700 typeless stores unquote. */
9701 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9702 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9703 {
9704 op0 = gen_lowpart (V16QImode, op0);
9705 op1 = gen_lowpart (V16QImode, op1);
9706 emit_insn (gen_sse2_movdqu (op0, op1));
9707 return;
9708 }
9709
9710 if (TARGET_SSE2 && mode == V2DFmode)
9711 {
9712 m = adjust_address (op0, DFmode, 0);
9713 emit_insn (gen_sse2_storelpd (m, op1));
9714 m = adjust_address (op0, DFmode, 8);
9715 emit_insn (gen_sse2_storehpd (m, op1));
9716 }
9717 else
9718 {
9719 if (mode != V4SFmode)
9720 op1 = gen_lowpart (V4SFmode, op1);
9721 m = adjust_address (op0, V2SFmode, 0);
9722 emit_insn (gen_sse_storelps (m, op1));
9723 m = adjust_address (op0, V2SFmode, 8);
9724 emit_insn (gen_sse_storehps (m, op1));
9725 }
9726 }
9727 else
9728 gcc_unreachable ();
9729 }
9730
9731 /* Expand a push in MODE. This is some mode for which we do not support
9732 proper push instructions, at least from the registers that we expect
9733 the value to live in. */
9734
9735 void
9736 ix86_expand_push (enum machine_mode mode, rtx x)
9737 {
9738 rtx tmp;
9739
9740 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9741 GEN_INT (-GET_MODE_SIZE (mode)),
9742 stack_pointer_rtx, 1, OPTAB_DIRECT);
9743 if (tmp != stack_pointer_rtx)
9744 emit_move_insn (stack_pointer_rtx, tmp);
9745
9746 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9747 emit_move_insn (tmp, x);
9748 }
9749
9750 /* Helper function of ix86_fixup_binary_operands to canonicalize
9751 operand order. Returns true if the operands should be swapped. */
9752
9753 static bool
9754 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9755 rtx operands[])
9756 {
9757 rtx dst = operands[0];
9758 rtx src1 = operands[1];
9759 rtx src2 = operands[2];
9760
9761 /* If the operation is not commutative, we can't do anything. */
9762 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9763 return false;
9764
9765 /* Highest priority is that src1 should match dst. */
9766 if (rtx_equal_p (dst, src1))
9767 return false;
9768 if (rtx_equal_p (dst, src2))
9769 return true;
9770
9771 /* Next highest priority is that immediate constants come second. */
9772 if (immediate_operand (src2, mode))
9773 return false;
9774 if (immediate_operand (src1, mode))
9775 return true;
9776
9777 /* Lowest priority is that memory references should come second. */
9778 if (MEM_P (src2))
9779 return false;
9780 if (MEM_P (src1))
9781 return true;
9782
9783 return false;
9784 }
9785
9786
9787 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9788 destination to use for the operation. If different from the true
9789 destination in operands[0], a copy operation will be required. */
9790
9791 rtx
9792 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9793 rtx operands[])
9794 {
9795 rtx dst = operands[0];
9796 rtx src1 = operands[1];
9797 rtx src2 = operands[2];
9798
9799 /* Canonicalize operand order. */
9800 if (ix86_swap_binary_operands_p (code, mode, operands))
9801 {
9802 rtx temp = src1;
9803 src1 = src2;
9804 src2 = temp;
9805 }
9806
9807 /* Both source operands cannot be in memory. */
9808 if (MEM_P (src1) && MEM_P (src2))
9809 {
9810 /* Optimization: Only read from memory once. */
9811 if (rtx_equal_p (src1, src2))
9812 {
9813 src2 = force_reg (mode, src2);
9814 src1 = src2;
9815 }
9816 else
9817 src2 = force_reg (mode, src2);
9818 }
9819
9820 /* If the destination is memory, and we do not have matching source
9821 operands, do things in registers. */
9822 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9823 dst = gen_reg_rtx (mode);
9824
9825 /* Source 1 cannot be a constant. */
9826 if (CONSTANT_P (src1))
9827 src1 = force_reg (mode, src1);
9828
9829 /* Source 1 cannot be a non-matching memory. */
9830 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9831 src1 = force_reg (mode, src1);
9832
9833 operands[1] = src1;
9834 operands[2] = src2;
9835 return dst;
9836 }
9837
9838 /* Similarly, but assume that the destination has already been
9839 set up properly. */
9840
9841 void
9842 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9843 enum machine_mode mode, rtx operands[])
9844 {
9845 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9846 gcc_assert (dst == operands[0]);
9847 }
9848
9849 /* Attempt to expand a binary operator. Make the expansion closer to the
9850 actual machine, then just general_operand, which will allow 3 separate
9851 memory references (one output, two input) in a single insn. */
9852
9853 void
9854 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9855 rtx operands[])
9856 {
9857 rtx src1, src2, dst, op, clob;
9858
9859 dst = ix86_fixup_binary_operands (code, mode, operands);
9860 src1 = operands[1];
9861 src2 = operands[2];
9862
9863 /* Emit the instruction. */
9864
9865 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9866 if (reload_in_progress)
9867 {
9868 /* Reload doesn't know about the flags register, and doesn't know that
9869 it doesn't want to clobber it. We can only do this with PLUS. */
9870 gcc_assert (code == PLUS);
9871 emit_insn (op);
9872 }
9873 else
9874 {
9875 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9876 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9877 }
9878
9879 /* Fix up the destination if needed. */
9880 if (dst != operands[0])
9881 emit_move_insn (operands[0], dst);
9882 }
9883
9884 /* Return TRUE or FALSE depending on whether the binary operator meets the
9885 appropriate constraints. */
9886
9887 int
9888 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9889 rtx operands[3])
9890 {
9891 rtx dst = operands[0];
9892 rtx src1 = operands[1];
9893 rtx src2 = operands[2];
9894
9895 /* Both source operands cannot be in memory. */
9896 if (MEM_P (src1) && MEM_P (src2))
9897 return 0;
9898
9899 /* Canonicalize operand order for commutative operators. */
9900 if (ix86_swap_binary_operands_p (code, mode, operands))
9901 {
9902 rtx temp = src1;
9903 src1 = src2;
9904 src2 = temp;
9905 }
9906
9907 /* If the destination is memory, we must have a matching source operand. */
9908 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9909 return 0;
9910
9911 /* Source 1 cannot be a constant. */
9912 if (CONSTANT_P (src1))
9913 return 0;
9914
9915 /* Source 1 cannot be a non-matching memory. */
9916 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9917 return 0;
9918
9919 return 1;
9920 }
9921
9922 /* Attempt to expand a unary operator. Make the expansion closer to the
9923 actual machine, then just general_operand, which will allow 2 separate
9924 memory references (one output, one input) in a single insn. */
9925
9926 void
9927 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9928 rtx operands[])
9929 {
9930 int matching_memory;
9931 rtx src, dst, op, clob;
9932
9933 dst = operands[0];
9934 src = operands[1];
9935
9936 /* If the destination is memory, and we do not have matching source
9937 operands, do things in registers. */
9938 matching_memory = 0;
9939 if (MEM_P (dst))
9940 {
9941 if (rtx_equal_p (dst, src))
9942 matching_memory = 1;
9943 else
9944 dst = gen_reg_rtx (mode);
9945 }
9946
9947 /* When source operand is memory, destination must match. */
9948 if (MEM_P (src) && !matching_memory)
9949 src = force_reg (mode, src);
9950
9951 /* Emit the instruction. */
9952
9953 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9954 if (reload_in_progress || code == NOT)
9955 {
9956 /* Reload doesn't know about the flags register, and doesn't know that
9957 it doesn't want to clobber it. */
9958 gcc_assert (code == NOT);
9959 emit_insn (op);
9960 }
9961 else
9962 {
9963 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9964 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9965 }
9966
9967 /* Fix up the destination if needed. */
9968 if (dst != operands[0])
9969 emit_move_insn (operands[0], dst);
9970 }
9971
9972 /* Return TRUE or FALSE depending on whether the unary operator meets the
9973 appropriate constraints. */
9974
9975 int
9976 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9977 enum machine_mode mode ATTRIBUTE_UNUSED,
9978 rtx operands[2] ATTRIBUTE_UNUSED)
9979 {
9980 /* If one of operands is memory, source and destination must match. */
9981 if ((MEM_P (operands[0])
9982 || MEM_P (operands[1]))
9983 && ! rtx_equal_p (operands[0], operands[1]))
9984 return FALSE;
9985 return TRUE;
9986 }
9987
9988 /* Post-reload splitter for converting an SF or DFmode value in an
9989 SSE register into an unsigned SImode. */
9990
9991 void
9992 ix86_split_convert_uns_si_sse (rtx operands[])
9993 {
9994 enum machine_mode vecmode;
9995 rtx value, large, zero_or_two31, input, two31, x;
9996
9997 large = operands[1];
9998 zero_or_two31 = operands[2];
9999 input = operands[3];
10000 two31 = operands[4];
10001 vecmode = GET_MODE (large);
10002 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10003
10004 /* Load up the value into the low element. We must ensure that the other
10005 elements are valid floats -- zero is the easiest such value. */
10006 if (MEM_P (input))
10007 {
10008 if (vecmode == V4SFmode)
10009 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10010 else
10011 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10012 }
10013 else
10014 {
10015 input = gen_rtx_REG (vecmode, REGNO (input));
10016 emit_move_insn (value, CONST0_RTX (vecmode));
10017 if (vecmode == V4SFmode)
10018 emit_insn (gen_sse_movss (value, value, input));
10019 else
10020 emit_insn (gen_sse2_movsd (value, value, input));
10021 }
10022
10023 emit_move_insn (large, two31);
10024 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10025
10026 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10027 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10028
10029 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10030 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10031
10032 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10033 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10034
10035 large = gen_rtx_REG (V4SImode, REGNO (large));
10036 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10037
10038 x = gen_rtx_REG (V4SImode, REGNO (value));
10039 if (vecmode == V4SFmode)
10040 emit_insn (gen_sse2_cvttps2dq (x, value));
10041 else
10042 emit_insn (gen_sse2_cvttpd2dq (x, value));
10043 value = x;
10044
10045 emit_insn (gen_xorv4si3 (value, value, large));
10046 }
10047
10048 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10049 Expects the 64-bit DImode to be supplied in a pair of integral
10050 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10051 -mfpmath=sse, !optimize_size only. */
10052
10053 void
10054 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10055 {
10056 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10057 rtx int_xmm, fp_xmm;
10058 rtx biases, exponents;
10059 rtx x;
10060
10061 int_xmm = gen_reg_rtx (V4SImode);
10062 if (TARGET_INTER_UNIT_MOVES)
10063 emit_insn (gen_movdi_to_sse (int_xmm, input));
10064 else if (TARGET_SSE_SPLIT_REGS)
10065 {
10066 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10067 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10068 }
10069 else
10070 {
10071 x = gen_reg_rtx (V2DImode);
10072 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10073 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10074 }
10075
10076 x = gen_rtx_CONST_VECTOR (V4SImode,
10077 gen_rtvec (4, GEN_INT (0x43300000UL),
10078 GEN_INT (0x45300000UL),
10079 const0_rtx, const0_rtx));
10080 exponents = validize_mem (force_const_mem (V4SImode, x));
10081
10082 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10083 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10084
10085 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10086 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10087 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10088 (0x1.0p84 + double(fp_value_hi_xmm)).
10089 Note these exponents differ by 32. */
10090
10091 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10092
10093 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10094 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10095 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10096 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10097 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10098 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10099 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10100 biases = validize_mem (force_const_mem (V2DFmode, biases));
10101 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10102
10103 /* Add the upper and lower DFmode values together. */
10104 if (TARGET_SSE3)
10105 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10106 else
10107 {
10108 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10109 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10110 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10111 }
10112
10113 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10114 }
10115
10116 /* Convert an unsigned SImode value into a DFmode. Only currently used
10117 for SSE, but applicable anywhere. */
10118
10119 void
10120 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10121 {
10122 REAL_VALUE_TYPE TWO31r;
10123 rtx x, fp;
10124
10125 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10126 NULL, 1, OPTAB_DIRECT);
10127
10128 fp = gen_reg_rtx (DFmode);
10129 emit_insn (gen_floatsidf2 (fp, x));
10130
10131 real_ldexp (&TWO31r, &dconst1, 31);
10132 x = const_double_from_real_value (TWO31r, DFmode);
10133
10134 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10135 if (x != target)
10136 emit_move_insn (target, x);
10137 }
10138
10139 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10140 32-bit mode; otherwise we have a direct convert instruction. */
10141
10142 void
10143 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10144 {
10145 REAL_VALUE_TYPE TWO32r;
10146 rtx fp_lo, fp_hi, x;
10147
10148 fp_lo = gen_reg_rtx (DFmode);
10149 fp_hi = gen_reg_rtx (DFmode);
10150
10151 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10152
10153 real_ldexp (&TWO32r, &dconst1, 32);
10154 x = const_double_from_real_value (TWO32r, DFmode);
10155 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10156
10157 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10158
10159 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10160 0, OPTAB_DIRECT);
10161 if (x != target)
10162 emit_move_insn (target, x);
10163 }
10164
10165 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10166 For x86_32, -mfpmath=sse, !optimize_size only. */
10167 void
10168 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10169 {
10170 REAL_VALUE_TYPE ONE16r;
10171 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10172
10173 real_ldexp (&ONE16r, &dconst1, 16);
10174 x = const_double_from_real_value (ONE16r, SFmode);
10175 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10176 NULL, 0, OPTAB_DIRECT);
10177 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10178 NULL, 0, OPTAB_DIRECT);
10179 fp_hi = gen_reg_rtx (SFmode);
10180 fp_lo = gen_reg_rtx (SFmode);
10181 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10182 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10183 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10184 0, OPTAB_DIRECT);
10185 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10186 0, OPTAB_DIRECT);
10187 if (!rtx_equal_p (target, fp_hi))
10188 emit_move_insn (target, fp_hi);
10189 }
10190
10191 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10192 then replicate the value for all elements of the vector
10193 register. */
10194
10195 rtx
10196 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10197 {
10198 rtvec v;
10199 switch (mode)
10200 {
10201 case SFmode:
10202 if (vect)
10203 v = gen_rtvec (4, value, value, value, value);
10204 else
10205 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10206 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10207 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10208
10209 case DFmode:
10210 if (vect)
10211 v = gen_rtvec (2, value, value);
10212 else
10213 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10214 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10215
10216 default:
10217 gcc_unreachable ();
10218 }
10219 }
10220
10221 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10222 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10223 true, then replicate the mask for all elements of the vector register.
10224 If INVERT is true, then create a mask excluding the sign bit. */
10225
10226 rtx
10227 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10228 {
10229 enum machine_mode vec_mode;
10230 HOST_WIDE_INT hi, lo;
10231 int shift = 63;
10232 rtx v;
10233 rtx mask;
10234
10235 /* Find the sign bit, sign extended to 2*HWI. */
10236 if (mode == SFmode)
10237 lo = 0x80000000, hi = lo < 0;
10238 else if (HOST_BITS_PER_WIDE_INT >= 64)
10239 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10240 else
10241 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10242
10243 if (invert)
10244 lo = ~lo, hi = ~hi;
10245
10246 /* Force this value into the low part of a fp vector constant. */
10247 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10248 mask = gen_lowpart (mode, mask);
10249
10250 v = ix86_build_const_vector (mode, vect, mask);
10251 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10252 return force_reg (vec_mode, v);
10253 }
10254
10255 /* Generate code for floating point ABS or NEG. */
10256
10257 void
10258 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10259 rtx operands[])
10260 {
10261 rtx mask, set, use, clob, dst, src;
10262 bool matching_memory;
10263 bool use_sse = false;
10264 bool vector_mode = VECTOR_MODE_P (mode);
10265 enum machine_mode elt_mode = mode;
10266
10267 if (vector_mode)
10268 {
10269 elt_mode = GET_MODE_INNER (mode);
10270 use_sse = true;
10271 }
10272 else if (TARGET_SSE_MATH)
10273 use_sse = SSE_FLOAT_MODE_P (mode);
10274
10275 /* NEG and ABS performed with SSE use bitwise mask operations.
10276 Create the appropriate mask now. */
10277 if (use_sse)
10278 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10279 else
10280 mask = NULL_RTX;
10281
10282 dst = operands[0];
10283 src = operands[1];
10284
10285 /* If the destination is memory, and we don't have matching source
10286 operands or we're using the x87, do things in registers. */
10287 matching_memory = false;
10288 if (MEM_P (dst))
10289 {
10290 if (use_sse && rtx_equal_p (dst, src))
10291 matching_memory = true;
10292 else
10293 dst = gen_reg_rtx (mode);
10294 }
10295 if (MEM_P (src) && !matching_memory)
10296 src = force_reg (mode, src);
10297
10298 if (vector_mode)
10299 {
10300 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10301 set = gen_rtx_SET (VOIDmode, dst, set);
10302 emit_insn (set);
10303 }
10304 else
10305 {
10306 set = gen_rtx_fmt_e (code, mode, src);
10307 set = gen_rtx_SET (VOIDmode, dst, set);
10308 if (mask)
10309 {
10310 use = gen_rtx_USE (VOIDmode, mask);
10311 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10312 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10313 gen_rtvec (3, set, use, clob)));
10314 }
10315 else
10316 emit_insn (set);
10317 }
10318
10319 if (dst != operands[0])
10320 emit_move_insn (operands[0], dst);
10321 }
10322
10323 /* Expand a copysign operation. Special case operand 0 being a constant. */
10324
10325 void
10326 ix86_expand_copysign (rtx operands[])
10327 {
10328 enum machine_mode mode, vmode;
10329 rtx dest, op0, op1, mask, nmask;
10330
10331 dest = operands[0];
10332 op0 = operands[1];
10333 op1 = operands[2];
10334
10335 mode = GET_MODE (dest);
10336 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10337
10338 if (GET_CODE (op0) == CONST_DOUBLE)
10339 {
10340 rtvec v;
10341
10342 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10343 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10344
10345 if (op0 == CONST0_RTX (mode))
10346 op0 = CONST0_RTX (vmode);
10347 else
10348 {
10349 if (mode == SFmode)
10350 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10351 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10352 else
10353 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10354 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10355 }
10356
10357 mask = ix86_build_signbit_mask (mode, 0, 0);
10358
10359 if (mode == SFmode)
10360 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10361 else
10362 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10363 }
10364 else
10365 {
10366 nmask = ix86_build_signbit_mask (mode, 0, 1);
10367 mask = ix86_build_signbit_mask (mode, 0, 0);
10368
10369 if (mode == SFmode)
10370 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10371 else
10372 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10373 }
10374 }
10375
10376 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10377 be a constant, and so has already been expanded into a vector constant. */
10378
10379 void
10380 ix86_split_copysign_const (rtx operands[])
10381 {
10382 enum machine_mode mode, vmode;
10383 rtx dest, op0, op1, mask, x;
10384
10385 dest = operands[0];
10386 op0 = operands[1];
10387 op1 = operands[2];
10388 mask = operands[3];
10389
10390 mode = GET_MODE (dest);
10391 vmode = GET_MODE (mask);
10392
10393 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10394 x = gen_rtx_AND (vmode, dest, mask);
10395 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10396
10397 if (op0 != CONST0_RTX (vmode))
10398 {
10399 x = gen_rtx_IOR (vmode, dest, op0);
10400 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10401 }
10402 }
10403
10404 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10405 so we have to do two masks. */
10406
10407 void
10408 ix86_split_copysign_var (rtx operands[])
10409 {
10410 enum machine_mode mode, vmode;
10411 rtx dest, scratch, op0, op1, mask, nmask, x;
10412
10413 dest = operands[0];
10414 scratch = operands[1];
10415 op0 = operands[2];
10416 op1 = operands[3];
10417 nmask = operands[4];
10418 mask = operands[5];
10419
10420 mode = GET_MODE (dest);
10421 vmode = GET_MODE (mask);
10422
10423 if (rtx_equal_p (op0, op1))
10424 {
10425 /* Shouldn't happen often (it's useless, obviously), but when it does
10426 we'd generate incorrect code if we continue below. */
10427 emit_move_insn (dest, op0);
10428 return;
10429 }
10430
10431 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10432 {
10433 gcc_assert (REGNO (op1) == REGNO (scratch));
10434
10435 x = gen_rtx_AND (vmode, scratch, mask);
10436 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10437
10438 dest = mask;
10439 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10440 x = gen_rtx_NOT (vmode, dest);
10441 x = gen_rtx_AND (vmode, x, op0);
10442 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10443 }
10444 else
10445 {
10446 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10447 {
10448 x = gen_rtx_AND (vmode, scratch, mask);
10449 }
10450 else /* alternative 2,4 */
10451 {
10452 gcc_assert (REGNO (mask) == REGNO (scratch));
10453 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10454 x = gen_rtx_AND (vmode, scratch, op1);
10455 }
10456 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10457
10458 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10459 {
10460 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10461 x = gen_rtx_AND (vmode, dest, nmask);
10462 }
10463 else /* alternative 3,4 */
10464 {
10465 gcc_assert (REGNO (nmask) == REGNO (dest));
10466 dest = nmask;
10467 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10468 x = gen_rtx_AND (vmode, dest, op0);
10469 }
10470 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10471 }
10472
10473 x = gen_rtx_IOR (vmode, dest, scratch);
10474 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10475 }
10476
10477 /* Return TRUE or FALSE depending on whether the first SET in INSN
10478 has source and destination with matching CC modes, and that the
10479 CC mode is at least as constrained as REQ_MODE. */
10480
10481 int
10482 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10483 {
10484 rtx set;
10485 enum machine_mode set_mode;
10486
10487 set = PATTERN (insn);
10488 if (GET_CODE (set) == PARALLEL)
10489 set = XVECEXP (set, 0, 0);
10490 gcc_assert (GET_CODE (set) == SET);
10491 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10492
10493 set_mode = GET_MODE (SET_DEST (set));
10494 switch (set_mode)
10495 {
10496 case CCNOmode:
10497 if (req_mode != CCNOmode
10498 && (req_mode != CCmode
10499 || XEXP (SET_SRC (set), 1) != const0_rtx))
10500 return 0;
10501 break;
10502 case CCmode:
10503 if (req_mode == CCGCmode)
10504 return 0;
10505 /* FALLTHRU */
10506 case CCGCmode:
10507 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10508 return 0;
10509 /* FALLTHRU */
10510 case CCGOCmode:
10511 if (req_mode == CCZmode)
10512 return 0;
10513 /* FALLTHRU */
10514 case CCZmode:
10515 break;
10516
10517 default:
10518 gcc_unreachable ();
10519 }
10520
10521 return (GET_MODE (SET_SRC (set)) == set_mode);
10522 }
10523
10524 /* Generate insn patterns to do an integer compare of OPERANDS. */
10525
10526 static rtx
10527 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10528 {
10529 enum machine_mode cmpmode;
10530 rtx tmp, flags;
10531
10532 cmpmode = SELECT_CC_MODE (code, op0, op1);
10533 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10534
10535 /* This is very simple, but making the interface the same as in the
10536 FP case makes the rest of the code easier. */
10537 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10538 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10539
10540 /* Return the test that should be put into the flags user, i.e.
10541 the bcc, scc, or cmov instruction. */
10542 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10543 }
10544
10545 /* Figure out whether to use ordered or unordered fp comparisons.
10546 Return the appropriate mode to use. */
10547
10548 enum machine_mode
10549 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10550 {
10551 /* ??? In order to make all comparisons reversible, we do all comparisons
10552 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10553 all forms trapping and nontrapping comparisons, we can make inequality
10554 comparisons trapping again, since it results in better code when using
10555 FCOM based compares. */
10556 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10557 }
10558
10559 enum machine_mode
10560 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10561 {
10562 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10563 return ix86_fp_compare_mode (code);
10564 switch (code)
10565 {
10566 /* Only zero flag is needed. */
10567 case EQ: /* ZF=0 */
10568 case NE: /* ZF!=0 */
10569 return CCZmode;
10570 /* Codes needing carry flag. */
10571 case GEU: /* CF=0 */
10572 case GTU: /* CF=0 & ZF=0 */
10573 case LTU: /* CF=1 */
10574 case LEU: /* CF=1 | ZF=1 */
10575 return CCmode;
10576 /* Codes possibly doable only with sign flag when
10577 comparing against zero. */
10578 case GE: /* SF=OF or SF=0 */
10579 case LT: /* SF<>OF or SF=1 */
10580 if (op1 == const0_rtx)
10581 return CCGOCmode;
10582 else
10583 /* For other cases Carry flag is not required. */
10584 return CCGCmode;
10585 /* Codes doable only with sign flag when comparing
10586 against zero, but we miss jump instruction for it
10587 so we need to use relational tests against overflow
10588 that thus needs to be zero. */
10589 case GT: /* ZF=0 & SF=OF */
10590 case LE: /* ZF=1 | SF<>OF */
10591 if (op1 == const0_rtx)
10592 return CCNOmode;
10593 else
10594 return CCGCmode;
10595 /* strcmp pattern do (use flags) and combine may ask us for proper
10596 mode. */
10597 case USE:
10598 return CCmode;
10599 default:
10600 gcc_unreachable ();
10601 }
10602 }
10603
10604 /* Return the fixed registers used for condition codes. */
10605
10606 static bool
10607 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10608 {
10609 *p1 = FLAGS_REG;
10610 *p2 = FPSR_REG;
10611 return true;
10612 }
10613
10614 /* If two condition code modes are compatible, return a condition code
10615 mode which is compatible with both. Otherwise, return
10616 VOIDmode. */
10617
10618 static enum machine_mode
10619 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10620 {
10621 if (m1 == m2)
10622 return m1;
10623
10624 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10625 return VOIDmode;
10626
10627 if ((m1 == CCGCmode && m2 == CCGOCmode)
10628 || (m1 == CCGOCmode && m2 == CCGCmode))
10629 return CCGCmode;
10630
10631 switch (m1)
10632 {
10633 default:
10634 gcc_unreachable ();
10635
10636 case CCmode:
10637 case CCGCmode:
10638 case CCGOCmode:
10639 case CCNOmode:
10640 case CCZmode:
10641 switch (m2)
10642 {
10643 default:
10644 return VOIDmode;
10645
10646 case CCmode:
10647 case CCGCmode:
10648 case CCGOCmode:
10649 case CCNOmode:
10650 case CCZmode:
10651 return CCmode;
10652 }
10653
10654 case CCFPmode:
10655 case CCFPUmode:
10656 /* These are only compatible with themselves, which we already
10657 checked above. */
10658 return VOIDmode;
10659 }
10660 }
10661
10662 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10663
10664 int
10665 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10666 {
10667 enum rtx_code swapped_code = swap_condition (code);
10668 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10669 || (ix86_fp_comparison_cost (swapped_code)
10670 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10671 }
10672
10673 /* Swap, force into registers, or otherwise massage the two operands
10674 to a fp comparison. The operands are updated in place; the new
10675 comparison code is returned. */
10676
10677 static enum rtx_code
10678 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10679 {
10680 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10681 rtx op0 = *pop0, op1 = *pop1;
10682 enum machine_mode op_mode = GET_MODE (op0);
10683 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10684
10685 /* All of the unordered compare instructions only work on registers.
10686 The same is true of the fcomi compare instructions. The XFmode
10687 compare instructions require registers except when comparing
10688 against zero or when converting operand 1 from fixed point to
10689 floating point. */
10690
10691 if (!is_sse
10692 && (fpcmp_mode == CCFPUmode
10693 || (op_mode == XFmode
10694 && ! (standard_80387_constant_p (op0) == 1
10695 || standard_80387_constant_p (op1) == 1)
10696 && GET_CODE (op1) != FLOAT)
10697 || ix86_use_fcomi_compare (code)))
10698 {
10699 op0 = force_reg (op_mode, op0);
10700 op1 = force_reg (op_mode, op1);
10701 }
10702 else
10703 {
10704 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10705 things around if they appear profitable, otherwise force op0
10706 into a register. */
10707
10708 if (standard_80387_constant_p (op0) == 0
10709 || (MEM_P (op0)
10710 && ! (standard_80387_constant_p (op1) == 0
10711 || MEM_P (op1))))
10712 {
10713 rtx tmp;
10714 tmp = op0, op0 = op1, op1 = tmp;
10715 code = swap_condition (code);
10716 }
10717
10718 if (!REG_P (op0))
10719 op0 = force_reg (op_mode, op0);
10720
10721 if (CONSTANT_P (op1))
10722 {
10723 int tmp = standard_80387_constant_p (op1);
10724 if (tmp == 0)
10725 op1 = validize_mem (force_const_mem (op_mode, op1));
10726 else if (tmp == 1)
10727 {
10728 if (TARGET_CMOVE)
10729 op1 = force_reg (op_mode, op1);
10730 }
10731 else
10732 op1 = force_reg (op_mode, op1);
10733 }
10734 }
10735
10736 /* Try to rearrange the comparison to make it cheaper. */
10737 if (ix86_fp_comparison_cost (code)
10738 > ix86_fp_comparison_cost (swap_condition (code))
10739 && (REG_P (op1) || !no_new_pseudos))
10740 {
10741 rtx tmp;
10742 tmp = op0, op0 = op1, op1 = tmp;
10743 code = swap_condition (code);
10744 if (!REG_P (op0))
10745 op0 = force_reg (op_mode, op0);
10746 }
10747
10748 *pop0 = op0;
10749 *pop1 = op1;
10750 return code;
10751 }
10752
10753 /* Convert comparison codes we use to represent FP comparison to integer
10754 code that will result in proper branch. Return UNKNOWN if no such code
10755 is available. */
10756
10757 enum rtx_code
10758 ix86_fp_compare_code_to_integer (enum rtx_code code)
10759 {
10760 switch (code)
10761 {
10762 case GT:
10763 return GTU;
10764 case GE:
10765 return GEU;
10766 case ORDERED:
10767 case UNORDERED:
10768 return code;
10769 break;
10770 case UNEQ:
10771 return EQ;
10772 break;
10773 case UNLT:
10774 return LTU;
10775 break;
10776 case UNLE:
10777 return LEU;
10778 break;
10779 case LTGT:
10780 return NE;
10781 break;
10782 default:
10783 return UNKNOWN;
10784 }
10785 }
10786
10787 /* Split comparison code CODE into comparisons we can do using branch
10788 instructions. BYPASS_CODE is comparison code for branch that will
10789 branch around FIRST_CODE and SECOND_CODE. If some of branches
10790 is not required, set value to UNKNOWN.
10791 We never require more than two branches. */
10792
10793 void
10794 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10795 enum rtx_code *first_code,
10796 enum rtx_code *second_code)
10797 {
10798 *first_code = code;
10799 *bypass_code = UNKNOWN;
10800 *second_code = UNKNOWN;
10801
10802 /* The fcomi comparison sets flags as follows:
10803
10804 cmp ZF PF CF
10805 > 0 0 0
10806 < 0 0 1
10807 = 1 0 0
10808 un 1 1 1 */
10809
10810 switch (code)
10811 {
10812 case GT: /* GTU - CF=0 & ZF=0 */
10813 case GE: /* GEU - CF=0 */
10814 case ORDERED: /* PF=0 */
10815 case UNORDERED: /* PF=1 */
10816 case UNEQ: /* EQ - ZF=1 */
10817 case UNLT: /* LTU - CF=1 */
10818 case UNLE: /* LEU - CF=1 | ZF=1 */
10819 case LTGT: /* EQ - ZF=0 */
10820 break;
10821 case LT: /* LTU - CF=1 - fails on unordered */
10822 *first_code = UNLT;
10823 *bypass_code = UNORDERED;
10824 break;
10825 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10826 *first_code = UNLE;
10827 *bypass_code = UNORDERED;
10828 break;
10829 case EQ: /* EQ - ZF=1 - fails on unordered */
10830 *first_code = UNEQ;
10831 *bypass_code = UNORDERED;
10832 break;
10833 case NE: /* NE - ZF=0 - fails on unordered */
10834 *first_code = LTGT;
10835 *second_code = UNORDERED;
10836 break;
10837 case UNGE: /* GEU - CF=0 - fails on unordered */
10838 *first_code = GE;
10839 *second_code = UNORDERED;
10840 break;
10841 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10842 *first_code = GT;
10843 *second_code = UNORDERED;
10844 break;
10845 default:
10846 gcc_unreachable ();
10847 }
10848 if (!TARGET_IEEE_FP)
10849 {
10850 *second_code = UNKNOWN;
10851 *bypass_code = UNKNOWN;
10852 }
10853 }
10854
10855 /* Return cost of comparison done fcom + arithmetics operations on AX.
10856 All following functions do use number of instructions as a cost metrics.
10857 In future this should be tweaked to compute bytes for optimize_size and
10858 take into account performance of various instructions on various CPUs. */
10859 static int
10860 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10861 {
10862 if (!TARGET_IEEE_FP)
10863 return 4;
10864 /* The cost of code output by ix86_expand_fp_compare. */
10865 switch (code)
10866 {
10867 case UNLE:
10868 case UNLT:
10869 case LTGT:
10870 case GT:
10871 case GE:
10872 case UNORDERED:
10873 case ORDERED:
10874 case UNEQ:
10875 return 4;
10876 break;
10877 case LT:
10878 case NE:
10879 case EQ:
10880 case UNGE:
10881 return 5;
10882 break;
10883 case LE:
10884 case UNGT:
10885 return 6;
10886 break;
10887 default:
10888 gcc_unreachable ();
10889 }
10890 }
10891
10892 /* Return cost of comparison done using fcomi operation.
10893 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10894 static int
10895 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10896 {
10897 enum rtx_code bypass_code, first_code, second_code;
10898 /* Return arbitrarily high cost when instruction is not supported - this
10899 prevents gcc from using it. */
10900 if (!TARGET_CMOVE)
10901 return 1024;
10902 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10903 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10904 }
10905
10906 /* Return cost of comparison done using sahf operation.
10907 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10908 static int
10909 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10910 {
10911 enum rtx_code bypass_code, first_code, second_code;
10912 /* Return arbitrarily high cost when instruction is not preferred - this
10913 avoids gcc from using it. */
10914 if (!TARGET_USE_SAHF && !optimize_size)
10915 return 1024;
10916 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10917 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10918 }
10919
10920 /* Compute cost of the comparison done using any method.
10921 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10922 static int
10923 ix86_fp_comparison_cost (enum rtx_code code)
10924 {
10925 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10926 int min;
10927
10928 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10929 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10930
10931 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10932 if (min > sahf_cost)
10933 min = sahf_cost;
10934 if (min > fcomi_cost)
10935 min = fcomi_cost;
10936 return min;
10937 }
10938
10939 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10940
10941 static rtx
10942 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10943 rtx *second_test, rtx *bypass_test)
10944 {
10945 enum machine_mode fpcmp_mode, intcmp_mode;
10946 rtx tmp, tmp2;
10947 int cost = ix86_fp_comparison_cost (code);
10948 enum rtx_code bypass_code, first_code, second_code;
10949
10950 fpcmp_mode = ix86_fp_compare_mode (code);
10951 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10952
10953 if (second_test)
10954 *second_test = NULL_RTX;
10955 if (bypass_test)
10956 *bypass_test = NULL_RTX;
10957
10958 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10959
10960 /* Do fcomi/sahf based test when profitable. */
10961 if ((bypass_code == UNKNOWN || bypass_test)
10962 && (second_code == UNKNOWN || second_test)
10963 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10964 {
10965 if (TARGET_CMOVE)
10966 {
10967 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10968 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10969 tmp);
10970 emit_insn (tmp);
10971 }
10972 else
10973 {
10974 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10975 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10976 if (!scratch)
10977 scratch = gen_reg_rtx (HImode);
10978 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10979 emit_insn (gen_x86_sahf_1 (scratch));
10980 }
10981
10982 /* The FP codes work out to act like unsigned. */
10983 intcmp_mode = fpcmp_mode;
10984 code = first_code;
10985 if (bypass_code != UNKNOWN)
10986 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10987 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10988 const0_rtx);
10989 if (second_code != UNKNOWN)
10990 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10991 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10992 const0_rtx);
10993 }
10994 else
10995 {
10996 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10997 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10998 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10999 if (!scratch)
11000 scratch = gen_reg_rtx (HImode);
11001 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11002
11003 /* In the unordered case, we have to check C2 for NaN's, which
11004 doesn't happen to work out to anything nice combination-wise.
11005 So do some bit twiddling on the value we've got in AH to come
11006 up with an appropriate set of condition codes. */
11007
11008 intcmp_mode = CCNOmode;
11009 switch (code)
11010 {
11011 case GT:
11012 case UNGT:
11013 if (code == GT || !TARGET_IEEE_FP)
11014 {
11015 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11016 code = EQ;
11017 }
11018 else
11019 {
11020 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11021 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11022 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11023 intcmp_mode = CCmode;
11024 code = GEU;
11025 }
11026 break;
11027 case LT:
11028 case UNLT:
11029 if (code == LT && TARGET_IEEE_FP)
11030 {
11031 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11032 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11033 intcmp_mode = CCmode;
11034 code = EQ;
11035 }
11036 else
11037 {
11038 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11039 code = NE;
11040 }
11041 break;
11042 case GE:
11043 case UNGE:
11044 if (code == GE || !TARGET_IEEE_FP)
11045 {
11046 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11047 code = EQ;
11048 }
11049 else
11050 {
11051 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11052 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11053 GEN_INT (0x01)));
11054 code = NE;
11055 }
11056 break;
11057 case LE:
11058 case UNLE:
11059 if (code == LE && TARGET_IEEE_FP)
11060 {
11061 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11062 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11063 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11064 intcmp_mode = CCmode;
11065 code = LTU;
11066 }
11067 else
11068 {
11069 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11070 code = NE;
11071 }
11072 break;
11073 case EQ:
11074 case UNEQ:
11075 if (code == EQ && TARGET_IEEE_FP)
11076 {
11077 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11078 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11079 intcmp_mode = CCmode;
11080 code = EQ;
11081 }
11082 else
11083 {
11084 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11085 code = NE;
11086 break;
11087 }
11088 break;
11089 case NE:
11090 case LTGT:
11091 if (code == NE && TARGET_IEEE_FP)
11092 {
11093 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11094 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11095 GEN_INT (0x40)));
11096 code = NE;
11097 }
11098 else
11099 {
11100 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11101 code = EQ;
11102 }
11103 break;
11104
11105 case UNORDERED:
11106 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11107 code = NE;
11108 break;
11109 case ORDERED:
11110 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11111 code = EQ;
11112 break;
11113
11114 default:
11115 gcc_unreachable ();
11116 }
11117 }
11118
11119 /* Return the test that should be put into the flags user, i.e.
11120 the bcc, scc, or cmov instruction. */
11121 return gen_rtx_fmt_ee (code, VOIDmode,
11122 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11123 const0_rtx);
11124 }
11125
11126 rtx
11127 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11128 {
11129 rtx op0, op1, ret;
11130 op0 = ix86_compare_op0;
11131 op1 = ix86_compare_op1;
11132
11133 if (second_test)
11134 *second_test = NULL_RTX;
11135 if (bypass_test)
11136 *bypass_test = NULL_RTX;
11137
11138 if (ix86_compare_emitted)
11139 {
11140 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11141 ix86_compare_emitted = NULL_RTX;
11142 }
11143 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11144 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11145 second_test, bypass_test);
11146 else
11147 ret = ix86_expand_int_compare (code, op0, op1);
11148
11149 return ret;
11150 }
11151
11152 /* Return true if the CODE will result in nontrivial jump sequence. */
11153 bool
11154 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11155 {
11156 enum rtx_code bypass_code, first_code, second_code;
11157 if (!TARGET_CMOVE)
11158 return true;
11159 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11160 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11161 }
11162
11163 void
11164 ix86_expand_branch (enum rtx_code code, rtx label)
11165 {
11166 rtx tmp;
11167
11168 /* If we have emitted a compare insn, go straight to simple.
11169 ix86_expand_compare won't emit anything if ix86_compare_emitted
11170 is non NULL. */
11171 if (ix86_compare_emitted)
11172 goto simple;
11173
11174 switch (GET_MODE (ix86_compare_op0))
11175 {
11176 case QImode:
11177 case HImode:
11178 case SImode:
11179 simple:
11180 tmp = ix86_expand_compare (code, NULL, NULL);
11181 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11182 gen_rtx_LABEL_REF (VOIDmode, label),
11183 pc_rtx);
11184 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11185 return;
11186
11187 case SFmode:
11188 case DFmode:
11189 case XFmode:
11190 {
11191 rtvec vec;
11192 int use_fcomi;
11193 enum rtx_code bypass_code, first_code, second_code;
11194
11195 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11196 &ix86_compare_op1);
11197
11198 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11199
11200 /* Check whether we will use the natural sequence with one jump. If
11201 so, we can expand jump early. Otherwise delay expansion by
11202 creating compound insn to not confuse optimizers. */
11203 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11204 && TARGET_CMOVE)
11205 {
11206 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11207 gen_rtx_LABEL_REF (VOIDmode, label),
11208 pc_rtx, NULL_RTX, NULL_RTX);
11209 }
11210 else
11211 {
11212 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11213 ix86_compare_op0, ix86_compare_op1);
11214 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11215 gen_rtx_LABEL_REF (VOIDmode, label),
11216 pc_rtx);
11217 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11218
11219 use_fcomi = ix86_use_fcomi_compare (code);
11220 vec = rtvec_alloc (3 + !use_fcomi);
11221 RTVEC_ELT (vec, 0) = tmp;
11222 RTVEC_ELT (vec, 1)
11223 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11224 RTVEC_ELT (vec, 2)
11225 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11226 if (! use_fcomi)
11227 RTVEC_ELT (vec, 3)
11228 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11229
11230 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11231 }
11232 return;
11233 }
11234
11235 case DImode:
11236 if (TARGET_64BIT)
11237 goto simple;
11238 case TImode:
11239 /* Expand DImode branch into multiple compare+branch. */
11240 {
11241 rtx lo[2], hi[2], label2;
11242 enum rtx_code code1, code2, code3;
11243 enum machine_mode submode;
11244
11245 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11246 {
11247 tmp = ix86_compare_op0;
11248 ix86_compare_op0 = ix86_compare_op1;
11249 ix86_compare_op1 = tmp;
11250 code = swap_condition (code);
11251 }
11252 if (GET_MODE (ix86_compare_op0) == DImode)
11253 {
11254 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11255 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11256 submode = SImode;
11257 }
11258 else
11259 {
11260 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11261 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11262 submode = DImode;
11263 }
11264
11265 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11266 avoid two branches. This costs one extra insn, so disable when
11267 optimizing for size. */
11268
11269 if ((code == EQ || code == NE)
11270 && (!optimize_size
11271 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11272 {
11273 rtx xor0, xor1;
11274
11275 xor1 = hi[0];
11276 if (hi[1] != const0_rtx)
11277 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11278 NULL_RTX, 0, OPTAB_WIDEN);
11279
11280 xor0 = lo[0];
11281 if (lo[1] != const0_rtx)
11282 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11283 NULL_RTX, 0, OPTAB_WIDEN);
11284
11285 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11286 NULL_RTX, 0, OPTAB_WIDEN);
11287
11288 ix86_compare_op0 = tmp;
11289 ix86_compare_op1 = const0_rtx;
11290 ix86_expand_branch (code, label);
11291 return;
11292 }
11293
11294 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11295 op1 is a constant and the low word is zero, then we can just
11296 examine the high word. */
11297
11298 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11299 switch (code)
11300 {
11301 case LT: case LTU: case GE: case GEU:
11302 ix86_compare_op0 = hi[0];
11303 ix86_compare_op1 = hi[1];
11304 ix86_expand_branch (code, label);
11305 return;
11306 default:
11307 break;
11308 }
11309
11310 /* Otherwise, we need two or three jumps. */
11311
11312 label2 = gen_label_rtx ();
11313
11314 code1 = code;
11315 code2 = swap_condition (code);
11316 code3 = unsigned_condition (code);
11317
11318 switch (code)
11319 {
11320 case LT: case GT: case LTU: case GTU:
11321 break;
11322
11323 case LE: code1 = LT; code2 = GT; break;
11324 case GE: code1 = GT; code2 = LT; break;
11325 case LEU: code1 = LTU; code2 = GTU; break;
11326 case GEU: code1 = GTU; code2 = LTU; break;
11327
11328 case EQ: code1 = UNKNOWN; code2 = NE; break;
11329 case NE: code2 = UNKNOWN; break;
11330
11331 default:
11332 gcc_unreachable ();
11333 }
11334
11335 /*
11336 * a < b =>
11337 * if (hi(a) < hi(b)) goto true;
11338 * if (hi(a) > hi(b)) goto false;
11339 * if (lo(a) < lo(b)) goto true;
11340 * false:
11341 */
11342
11343 ix86_compare_op0 = hi[0];
11344 ix86_compare_op1 = hi[1];
11345
11346 if (code1 != UNKNOWN)
11347 ix86_expand_branch (code1, label);
11348 if (code2 != UNKNOWN)
11349 ix86_expand_branch (code2, label2);
11350
11351 ix86_compare_op0 = lo[0];
11352 ix86_compare_op1 = lo[1];
11353 ix86_expand_branch (code3, label);
11354
11355 if (code2 != UNKNOWN)
11356 emit_label (label2);
11357 return;
11358 }
11359
11360 default:
11361 gcc_unreachable ();
11362 }
11363 }
11364
11365 /* Split branch based on floating point condition. */
11366 void
11367 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11368 rtx target1, rtx target2, rtx tmp, rtx pushed)
11369 {
11370 rtx second, bypass;
11371 rtx label = NULL_RTX;
11372 rtx condition;
11373 int bypass_probability = -1, second_probability = -1, probability = -1;
11374 rtx i;
11375
11376 if (target2 != pc_rtx)
11377 {
11378 rtx tmp = target2;
11379 code = reverse_condition_maybe_unordered (code);
11380 target2 = target1;
11381 target1 = tmp;
11382 }
11383
11384 condition = ix86_expand_fp_compare (code, op1, op2,
11385 tmp, &second, &bypass);
11386
11387 /* Remove pushed operand from stack. */
11388 if (pushed)
11389 ix86_free_from_memory (GET_MODE (pushed));
11390
11391 if (split_branch_probability >= 0)
11392 {
11393 /* Distribute the probabilities across the jumps.
11394 Assume the BYPASS and SECOND to be always test
11395 for UNORDERED. */
11396 probability = split_branch_probability;
11397
11398 /* Value of 1 is low enough to make no need for probability
11399 to be updated. Later we may run some experiments and see
11400 if unordered values are more frequent in practice. */
11401 if (bypass)
11402 bypass_probability = 1;
11403 if (second)
11404 second_probability = 1;
11405 }
11406 if (bypass != NULL_RTX)
11407 {
11408 label = gen_label_rtx ();
11409 i = emit_jump_insn (gen_rtx_SET
11410 (VOIDmode, pc_rtx,
11411 gen_rtx_IF_THEN_ELSE (VOIDmode,
11412 bypass,
11413 gen_rtx_LABEL_REF (VOIDmode,
11414 label),
11415 pc_rtx)));
11416 if (bypass_probability >= 0)
11417 REG_NOTES (i)
11418 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11419 GEN_INT (bypass_probability),
11420 REG_NOTES (i));
11421 }
11422 i = emit_jump_insn (gen_rtx_SET
11423 (VOIDmode, pc_rtx,
11424 gen_rtx_IF_THEN_ELSE (VOIDmode,
11425 condition, target1, target2)));
11426 if (probability >= 0)
11427 REG_NOTES (i)
11428 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11429 GEN_INT (probability),
11430 REG_NOTES (i));
11431 if (second != NULL_RTX)
11432 {
11433 i = emit_jump_insn (gen_rtx_SET
11434 (VOIDmode, pc_rtx,
11435 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11436 target2)));
11437 if (second_probability >= 0)
11438 REG_NOTES (i)
11439 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11440 GEN_INT (second_probability),
11441 REG_NOTES (i));
11442 }
11443 if (label != NULL_RTX)
11444 emit_label (label);
11445 }
11446
11447 int
11448 ix86_expand_setcc (enum rtx_code code, rtx dest)
11449 {
11450 rtx ret, tmp, tmpreg, equiv;
11451 rtx second_test, bypass_test;
11452
11453 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11454 return 0; /* FAIL */
11455
11456 gcc_assert (GET_MODE (dest) == QImode);
11457
11458 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11459 PUT_MODE (ret, QImode);
11460
11461 tmp = dest;
11462 tmpreg = dest;
11463
11464 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11465 if (bypass_test || second_test)
11466 {
11467 rtx test = second_test;
11468 int bypass = 0;
11469 rtx tmp2 = gen_reg_rtx (QImode);
11470 if (bypass_test)
11471 {
11472 gcc_assert (!second_test);
11473 test = bypass_test;
11474 bypass = 1;
11475 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11476 }
11477 PUT_MODE (test, QImode);
11478 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11479
11480 if (bypass)
11481 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11482 else
11483 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11484 }
11485
11486 /* Attach a REG_EQUAL note describing the comparison result. */
11487 if (ix86_compare_op0 && ix86_compare_op1)
11488 {
11489 equiv = simplify_gen_relational (code, QImode,
11490 GET_MODE (ix86_compare_op0),
11491 ix86_compare_op0, ix86_compare_op1);
11492 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11493 }
11494
11495 return 1; /* DONE */
11496 }
11497
11498 /* Expand comparison setting or clearing carry flag. Return true when
11499 successful and set pop for the operation. */
11500 static bool
11501 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11502 {
11503 enum machine_mode mode =
11504 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11505
11506 /* Do not handle DImode compares that go through special path. Also we can't
11507 deal with FP compares yet. This is possible to add. */
11508 if (mode == (TARGET_64BIT ? TImode : DImode))
11509 return false;
11510 if (FLOAT_MODE_P (mode))
11511 {
11512 rtx second_test = NULL, bypass_test = NULL;
11513 rtx compare_op, compare_seq;
11514
11515 /* Shortcut: following common codes never translate into carry flag compares. */
11516 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11517 || code == ORDERED || code == UNORDERED)
11518 return false;
11519
11520 /* These comparisons require zero flag; swap operands so they won't. */
11521 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11522 && !TARGET_IEEE_FP)
11523 {
11524 rtx tmp = op0;
11525 op0 = op1;
11526 op1 = tmp;
11527 code = swap_condition (code);
11528 }
11529
11530 /* Try to expand the comparison and verify that we end up with carry flag
11531 based comparison. This is fails to be true only when we decide to expand
11532 comparison using arithmetic that is not too common scenario. */
11533 start_sequence ();
11534 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11535 &second_test, &bypass_test);
11536 compare_seq = get_insns ();
11537 end_sequence ();
11538
11539 if (second_test || bypass_test)
11540 return false;
11541 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11542 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11543 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11544 else
11545 code = GET_CODE (compare_op);
11546 if (code != LTU && code != GEU)
11547 return false;
11548 emit_insn (compare_seq);
11549 *pop = compare_op;
11550 return true;
11551 }
11552 if (!INTEGRAL_MODE_P (mode))
11553 return false;
11554 switch (code)
11555 {
11556 case LTU:
11557 case GEU:
11558 break;
11559
11560 /* Convert a==0 into (unsigned)a<1. */
11561 case EQ:
11562 case NE:
11563 if (op1 != const0_rtx)
11564 return false;
11565 op1 = const1_rtx;
11566 code = (code == EQ ? LTU : GEU);
11567 break;
11568
11569 /* Convert a>b into b<a or a>=b-1. */
11570 case GTU:
11571 case LEU:
11572 if (CONST_INT_P (op1))
11573 {
11574 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11575 /* Bail out on overflow. We still can swap operands but that
11576 would force loading of the constant into register. */
11577 if (op1 == const0_rtx
11578 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11579 return false;
11580 code = (code == GTU ? GEU : LTU);
11581 }
11582 else
11583 {
11584 rtx tmp = op1;
11585 op1 = op0;
11586 op0 = tmp;
11587 code = (code == GTU ? LTU : GEU);
11588 }
11589 break;
11590
11591 /* Convert a>=0 into (unsigned)a<0x80000000. */
11592 case LT:
11593 case GE:
11594 if (mode == DImode || op1 != const0_rtx)
11595 return false;
11596 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11597 code = (code == LT ? GEU : LTU);
11598 break;
11599 case LE:
11600 case GT:
11601 if (mode == DImode || op1 != constm1_rtx)
11602 return false;
11603 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11604 code = (code == LE ? GEU : LTU);
11605 break;
11606
11607 default:
11608 return false;
11609 }
11610 /* Swapping operands may cause constant to appear as first operand. */
11611 if (!nonimmediate_operand (op0, VOIDmode))
11612 {
11613 if (no_new_pseudos)
11614 return false;
11615 op0 = force_reg (mode, op0);
11616 }
11617 ix86_compare_op0 = op0;
11618 ix86_compare_op1 = op1;
11619 *pop = ix86_expand_compare (code, NULL, NULL);
11620 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11621 return true;
11622 }
11623
11624 int
11625 ix86_expand_int_movcc (rtx operands[])
11626 {
11627 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11628 rtx compare_seq, compare_op;
11629 rtx second_test, bypass_test;
11630 enum machine_mode mode = GET_MODE (operands[0]);
11631 bool sign_bit_compare_p = false;;
11632
11633 start_sequence ();
11634 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11635 compare_seq = get_insns ();
11636 end_sequence ();
11637
11638 compare_code = GET_CODE (compare_op);
11639
11640 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11641 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11642 sign_bit_compare_p = true;
11643
11644 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11645 HImode insns, we'd be swallowed in word prefix ops. */
11646
11647 if ((mode != HImode || TARGET_FAST_PREFIX)
11648 && (mode != (TARGET_64BIT ? TImode : DImode))
11649 && CONST_INT_P (operands[2])
11650 && CONST_INT_P (operands[3]))
11651 {
11652 rtx out = operands[0];
11653 HOST_WIDE_INT ct = INTVAL (operands[2]);
11654 HOST_WIDE_INT cf = INTVAL (operands[3]);
11655 HOST_WIDE_INT diff;
11656
11657 diff = ct - cf;
11658 /* Sign bit compares are better done using shifts than we do by using
11659 sbb. */
11660 if (sign_bit_compare_p
11661 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11662 ix86_compare_op1, &compare_op))
11663 {
11664 /* Detect overlap between destination and compare sources. */
11665 rtx tmp = out;
11666
11667 if (!sign_bit_compare_p)
11668 {
11669 bool fpcmp = false;
11670
11671 compare_code = GET_CODE (compare_op);
11672
11673 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11674 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11675 {
11676 fpcmp = true;
11677 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11678 }
11679
11680 /* To simplify rest of code, restrict to the GEU case. */
11681 if (compare_code == LTU)
11682 {
11683 HOST_WIDE_INT tmp = ct;
11684 ct = cf;
11685 cf = tmp;
11686 compare_code = reverse_condition (compare_code);
11687 code = reverse_condition (code);
11688 }
11689 else
11690 {
11691 if (fpcmp)
11692 PUT_CODE (compare_op,
11693 reverse_condition_maybe_unordered
11694 (GET_CODE (compare_op)));
11695 else
11696 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11697 }
11698 diff = ct - cf;
11699
11700 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11701 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11702 tmp = gen_reg_rtx (mode);
11703
11704 if (mode == DImode)
11705 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11706 else
11707 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11708 }
11709 else
11710 {
11711 if (code == GT || code == GE)
11712 code = reverse_condition (code);
11713 else
11714 {
11715 HOST_WIDE_INT tmp = ct;
11716 ct = cf;
11717 cf = tmp;
11718 diff = ct - cf;
11719 }
11720 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11721 ix86_compare_op1, VOIDmode, 0, -1);
11722 }
11723
11724 if (diff == 1)
11725 {
11726 /*
11727 * cmpl op0,op1
11728 * sbbl dest,dest
11729 * [addl dest, ct]
11730 *
11731 * Size 5 - 8.
11732 */
11733 if (ct)
11734 tmp = expand_simple_binop (mode, PLUS,
11735 tmp, GEN_INT (ct),
11736 copy_rtx (tmp), 1, OPTAB_DIRECT);
11737 }
11738 else if (cf == -1)
11739 {
11740 /*
11741 * cmpl op0,op1
11742 * sbbl dest,dest
11743 * orl $ct, dest
11744 *
11745 * Size 8.
11746 */
11747 tmp = expand_simple_binop (mode, IOR,
11748 tmp, GEN_INT (ct),
11749 copy_rtx (tmp), 1, OPTAB_DIRECT);
11750 }
11751 else if (diff == -1 && ct)
11752 {
11753 /*
11754 * cmpl op0,op1
11755 * sbbl dest,dest
11756 * notl dest
11757 * [addl dest, cf]
11758 *
11759 * Size 8 - 11.
11760 */
11761 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11762 if (cf)
11763 tmp = expand_simple_binop (mode, PLUS,
11764 copy_rtx (tmp), GEN_INT (cf),
11765 copy_rtx (tmp), 1, OPTAB_DIRECT);
11766 }
11767 else
11768 {
11769 /*
11770 * cmpl op0,op1
11771 * sbbl dest,dest
11772 * [notl dest]
11773 * andl cf - ct, dest
11774 * [addl dest, ct]
11775 *
11776 * Size 8 - 11.
11777 */
11778
11779 if (cf == 0)
11780 {
11781 cf = ct;
11782 ct = 0;
11783 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11784 }
11785
11786 tmp = expand_simple_binop (mode, AND,
11787 copy_rtx (tmp),
11788 gen_int_mode (cf - ct, mode),
11789 copy_rtx (tmp), 1, OPTAB_DIRECT);
11790 if (ct)
11791 tmp = expand_simple_binop (mode, PLUS,
11792 copy_rtx (tmp), GEN_INT (ct),
11793 copy_rtx (tmp), 1, OPTAB_DIRECT);
11794 }
11795
11796 if (!rtx_equal_p (tmp, out))
11797 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11798
11799 return 1; /* DONE */
11800 }
11801
11802 if (diff < 0)
11803 {
11804 HOST_WIDE_INT tmp;
11805 tmp = ct, ct = cf, cf = tmp;
11806 diff = -diff;
11807 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11808 {
11809 /* We may be reversing unordered compare to normal compare, that
11810 is not valid in general (we may convert non-trapping condition
11811 to trapping one), however on i386 we currently emit all
11812 comparisons unordered. */
11813 compare_code = reverse_condition_maybe_unordered (compare_code);
11814 code = reverse_condition_maybe_unordered (code);
11815 }
11816 else
11817 {
11818 compare_code = reverse_condition (compare_code);
11819 code = reverse_condition (code);
11820 }
11821 }
11822
11823 compare_code = UNKNOWN;
11824 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11825 && CONST_INT_P (ix86_compare_op1))
11826 {
11827 if (ix86_compare_op1 == const0_rtx
11828 && (code == LT || code == GE))
11829 compare_code = code;
11830 else if (ix86_compare_op1 == constm1_rtx)
11831 {
11832 if (code == LE)
11833 compare_code = LT;
11834 else if (code == GT)
11835 compare_code = GE;
11836 }
11837 }
11838
11839 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11840 if (compare_code != UNKNOWN
11841 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11842 && (cf == -1 || ct == -1))
11843 {
11844 /* If lea code below could be used, only optimize
11845 if it results in a 2 insn sequence. */
11846
11847 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11848 || diff == 3 || diff == 5 || diff == 9)
11849 || (compare_code == LT && ct == -1)
11850 || (compare_code == GE && cf == -1))
11851 {
11852 /*
11853 * notl op1 (if necessary)
11854 * sarl $31, op1
11855 * orl cf, op1
11856 */
11857 if (ct != -1)
11858 {
11859 cf = ct;
11860 ct = -1;
11861 code = reverse_condition (code);
11862 }
11863
11864 out = emit_store_flag (out, code, ix86_compare_op0,
11865 ix86_compare_op1, VOIDmode, 0, -1);
11866
11867 out = expand_simple_binop (mode, IOR,
11868 out, GEN_INT (cf),
11869 out, 1, OPTAB_DIRECT);
11870 if (out != operands[0])
11871 emit_move_insn (operands[0], out);
11872
11873 return 1; /* DONE */
11874 }
11875 }
11876
11877
11878 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11879 || diff == 3 || diff == 5 || diff == 9)
11880 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11881 && (mode != DImode
11882 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11883 {
11884 /*
11885 * xorl dest,dest
11886 * cmpl op1,op2
11887 * setcc dest
11888 * lea cf(dest*(ct-cf)),dest
11889 *
11890 * Size 14.
11891 *
11892 * This also catches the degenerate setcc-only case.
11893 */
11894
11895 rtx tmp;
11896 int nops;
11897
11898 out = emit_store_flag (out, code, ix86_compare_op0,
11899 ix86_compare_op1, VOIDmode, 0, 1);
11900
11901 nops = 0;
11902 /* On x86_64 the lea instruction operates on Pmode, so we need
11903 to get arithmetics done in proper mode to match. */
11904 if (diff == 1)
11905 tmp = copy_rtx (out);
11906 else
11907 {
11908 rtx out1;
11909 out1 = copy_rtx (out);
11910 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11911 nops++;
11912 if (diff & 1)
11913 {
11914 tmp = gen_rtx_PLUS (mode, tmp, out1);
11915 nops++;
11916 }
11917 }
11918 if (cf != 0)
11919 {
11920 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11921 nops++;
11922 }
11923 if (!rtx_equal_p (tmp, out))
11924 {
11925 if (nops == 1)
11926 out = force_operand (tmp, copy_rtx (out));
11927 else
11928 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11929 }
11930 if (!rtx_equal_p (out, operands[0]))
11931 emit_move_insn (operands[0], copy_rtx (out));
11932
11933 return 1; /* DONE */
11934 }
11935
11936 /*
11937 * General case: Jumpful:
11938 * xorl dest,dest cmpl op1, op2
11939 * cmpl op1, op2 movl ct, dest
11940 * setcc dest jcc 1f
11941 * decl dest movl cf, dest
11942 * andl (cf-ct),dest 1:
11943 * addl ct,dest
11944 *
11945 * Size 20. Size 14.
11946 *
11947 * This is reasonably steep, but branch mispredict costs are
11948 * high on modern cpus, so consider failing only if optimizing
11949 * for space.
11950 */
11951
11952 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11953 && BRANCH_COST >= 2)
11954 {
11955 if (cf == 0)
11956 {
11957 cf = ct;
11958 ct = 0;
11959 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11960 /* We may be reversing unordered compare to normal compare,
11961 that is not valid in general (we may convert non-trapping
11962 condition to trapping one), however on i386 we currently
11963 emit all comparisons unordered. */
11964 code = reverse_condition_maybe_unordered (code);
11965 else
11966 {
11967 code = reverse_condition (code);
11968 if (compare_code != UNKNOWN)
11969 compare_code = reverse_condition (compare_code);
11970 }
11971 }
11972
11973 if (compare_code != UNKNOWN)
11974 {
11975 /* notl op1 (if needed)
11976 sarl $31, op1
11977 andl (cf-ct), op1
11978 addl ct, op1
11979
11980 For x < 0 (resp. x <= -1) there will be no notl,
11981 so if possible swap the constants to get rid of the
11982 complement.
11983 True/false will be -1/0 while code below (store flag
11984 followed by decrement) is 0/-1, so the constants need
11985 to be exchanged once more. */
11986
11987 if (compare_code == GE || !cf)
11988 {
11989 code = reverse_condition (code);
11990 compare_code = LT;
11991 }
11992 else
11993 {
11994 HOST_WIDE_INT tmp = cf;
11995 cf = ct;
11996 ct = tmp;
11997 }
11998
11999 out = emit_store_flag (out, code, ix86_compare_op0,
12000 ix86_compare_op1, VOIDmode, 0, -1);
12001 }
12002 else
12003 {
12004 out = emit_store_flag (out, code, ix86_compare_op0,
12005 ix86_compare_op1, VOIDmode, 0, 1);
12006
12007 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12008 copy_rtx (out), 1, OPTAB_DIRECT);
12009 }
12010
12011 out = expand_simple_binop (mode, AND, copy_rtx (out),
12012 gen_int_mode (cf - ct, mode),
12013 copy_rtx (out), 1, OPTAB_DIRECT);
12014 if (ct)
12015 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12016 copy_rtx (out), 1, OPTAB_DIRECT);
12017 if (!rtx_equal_p (out, operands[0]))
12018 emit_move_insn (operands[0], copy_rtx (out));
12019
12020 return 1; /* DONE */
12021 }
12022 }
12023
12024 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12025 {
12026 /* Try a few things more with specific constants and a variable. */
12027
12028 optab op;
12029 rtx var, orig_out, out, tmp;
12030
12031 if (BRANCH_COST <= 2)
12032 return 0; /* FAIL */
12033
12034 /* If one of the two operands is an interesting constant, load a
12035 constant with the above and mask it in with a logical operation. */
12036
12037 if (CONST_INT_P (operands[2]))
12038 {
12039 var = operands[3];
12040 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12041 operands[3] = constm1_rtx, op = and_optab;
12042 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12043 operands[3] = const0_rtx, op = ior_optab;
12044 else
12045 return 0; /* FAIL */
12046 }
12047 else if (CONST_INT_P (operands[3]))
12048 {
12049 var = operands[2];
12050 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12051 operands[2] = constm1_rtx, op = and_optab;
12052 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12053 operands[2] = const0_rtx, op = ior_optab;
12054 else
12055 return 0; /* FAIL */
12056 }
12057 else
12058 return 0; /* FAIL */
12059
12060 orig_out = operands[0];
12061 tmp = gen_reg_rtx (mode);
12062 operands[0] = tmp;
12063
12064 /* Recurse to get the constant loaded. */
12065 if (ix86_expand_int_movcc (operands) == 0)
12066 return 0; /* FAIL */
12067
12068 /* Mask in the interesting variable. */
12069 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12070 OPTAB_WIDEN);
12071 if (!rtx_equal_p (out, orig_out))
12072 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12073
12074 return 1; /* DONE */
12075 }
12076
12077 /*
12078 * For comparison with above,
12079 *
12080 * movl cf,dest
12081 * movl ct,tmp
12082 * cmpl op1,op2
12083 * cmovcc tmp,dest
12084 *
12085 * Size 15.
12086 */
12087
12088 if (! nonimmediate_operand (operands[2], mode))
12089 operands[2] = force_reg (mode, operands[2]);
12090 if (! nonimmediate_operand (operands[3], mode))
12091 operands[3] = force_reg (mode, operands[3]);
12092
12093 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12094 {
12095 rtx tmp = gen_reg_rtx (mode);
12096 emit_move_insn (tmp, operands[3]);
12097 operands[3] = tmp;
12098 }
12099 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12100 {
12101 rtx tmp = gen_reg_rtx (mode);
12102 emit_move_insn (tmp, operands[2]);
12103 operands[2] = tmp;
12104 }
12105
12106 if (! register_operand (operands[2], VOIDmode)
12107 && (mode == QImode
12108 || ! register_operand (operands[3], VOIDmode)))
12109 operands[2] = force_reg (mode, operands[2]);
12110
12111 if (mode == QImode
12112 && ! register_operand (operands[3], VOIDmode))
12113 operands[3] = force_reg (mode, operands[3]);
12114
12115 emit_insn (compare_seq);
12116 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12117 gen_rtx_IF_THEN_ELSE (mode,
12118 compare_op, operands[2],
12119 operands[3])));
12120 if (bypass_test)
12121 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12122 gen_rtx_IF_THEN_ELSE (mode,
12123 bypass_test,
12124 copy_rtx (operands[3]),
12125 copy_rtx (operands[0]))));
12126 if (second_test)
12127 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12128 gen_rtx_IF_THEN_ELSE (mode,
12129 second_test,
12130 copy_rtx (operands[2]),
12131 copy_rtx (operands[0]))));
12132
12133 return 1; /* DONE */
12134 }
12135
12136 /* Swap, force into registers, or otherwise massage the two operands
12137 to an sse comparison with a mask result. Thus we differ a bit from
12138 ix86_prepare_fp_compare_args which expects to produce a flags result.
12139
12140 The DEST operand exists to help determine whether to commute commutative
12141 operators. The POP0/POP1 operands are updated in place. The new
12142 comparison code is returned, or UNKNOWN if not implementable. */
12143
12144 static enum rtx_code
12145 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12146 rtx *pop0, rtx *pop1)
12147 {
12148 rtx tmp;
12149
12150 switch (code)
12151 {
12152 case LTGT:
12153 case UNEQ:
12154 /* We have no LTGT as an operator. We could implement it with
12155 NE & ORDERED, but this requires an extra temporary. It's
12156 not clear that it's worth it. */
12157 return UNKNOWN;
12158
12159 case LT:
12160 case LE:
12161 case UNGT:
12162 case UNGE:
12163 /* These are supported directly. */
12164 break;
12165
12166 case EQ:
12167 case NE:
12168 case UNORDERED:
12169 case ORDERED:
12170 /* For commutative operators, try to canonicalize the destination
12171 operand to be first in the comparison - this helps reload to
12172 avoid extra moves. */
12173 if (!dest || !rtx_equal_p (dest, *pop1))
12174 break;
12175 /* FALLTHRU */
12176
12177 case GE:
12178 case GT:
12179 case UNLE:
12180 case UNLT:
12181 /* These are not supported directly. Swap the comparison operands
12182 to transform into something that is supported. */
12183 tmp = *pop0;
12184 *pop0 = *pop1;
12185 *pop1 = tmp;
12186 code = swap_condition (code);
12187 break;
12188
12189 default:
12190 gcc_unreachable ();
12191 }
12192
12193 return code;
12194 }
12195
12196 /* Detect conditional moves that exactly match min/max operational
12197 semantics. Note that this is IEEE safe, as long as we don't
12198 interchange the operands.
12199
12200 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12201 and TRUE if the operation is successful and instructions are emitted. */
12202
12203 static bool
12204 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12205 rtx cmp_op1, rtx if_true, rtx if_false)
12206 {
12207 enum machine_mode mode;
12208 bool is_min;
12209 rtx tmp;
12210
12211 if (code == LT)
12212 ;
12213 else if (code == UNGE)
12214 {
12215 tmp = if_true;
12216 if_true = if_false;
12217 if_false = tmp;
12218 }
12219 else
12220 return false;
12221
12222 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12223 is_min = true;
12224 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12225 is_min = false;
12226 else
12227 return false;
12228
12229 mode = GET_MODE (dest);
12230
12231 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12232 but MODE may be a vector mode and thus not appropriate. */
12233 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12234 {
12235 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12236 rtvec v;
12237
12238 if_true = force_reg (mode, if_true);
12239 v = gen_rtvec (2, if_true, if_false);
12240 tmp = gen_rtx_UNSPEC (mode, v, u);
12241 }
12242 else
12243 {
12244 code = is_min ? SMIN : SMAX;
12245 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12246 }
12247
12248 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12249 return true;
12250 }
12251
12252 /* Expand an sse vector comparison. Return the register with the result. */
12253
12254 static rtx
12255 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12256 rtx op_true, rtx op_false)
12257 {
12258 enum machine_mode mode = GET_MODE (dest);
12259 rtx x;
12260
12261 cmp_op0 = force_reg (mode, cmp_op0);
12262 if (!nonimmediate_operand (cmp_op1, mode))
12263 cmp_op1 = force_reg (mode, cmp_op1);
12264
12265 if (optimize
12266 || reg_overlap_mentioned_p (dest, op_true)
12267 || reg_overlap_mentioned_p (dest, op_false))
12268 dest = gen_reg_rtx (mode);
12269
12270 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12271 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12272
12273 return dest;
12274 }
12275
12276 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12277 operations. This is used for both scalar and vector conditional moves. */
12278
12279 static void
12280 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12281 {
12282 enum machine_mode mode = GET_MODE (dest);
12283 rtx t2, t3, x;
12284
12285 if (op_false == CONST0_RTX (mode))
12286 {
12287 op_true = force_reg (mode, op_true);
12288 x = gen_rtx_AND (mode, cmp, op_true);
12289 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12290 }
12291 else if (op_true == CONST0_RTX (mode))
12292 {
12293 op_false = force_reg (mode, op_false);
12294 x = gen_rtx_NOT (mode, cmp);
12295 x = gen_rtx_AND (mode, x, op_false);
12296 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12297 }
12298 else
12299 {
12300 op_true = force_reg (mode, op_true);
12301 op_false = force_reg (mode, op_false);
12302
12303 t2 = gen_reg_rtx (mode);
12304 if (optimize)
12305 t3 = gen_reg_rtx (mode);
12306 else
12307 t3 = dest;
12308
12309 x = gen_rtx_AND (mode, op_true, cmp);
12310 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12311
12312 x = gen_rtx_NOT (mode, cmp);
12313 x = gen_rtx_AND (mode, x, op_false);
12314 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12315
12316 x = gen_rtx_IOR (mode, t3, t2);
12317 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12318 }
12319 }
12320
12321 /* Expand a floating-point conditional move. Return true if successful. */
12322
12323 int
12324 ix86_expand_fp_movcc (rtx operands[])
12325 {
12326 enum machine_mode mode = GET_MODE (operands[0]);
12327 enum rtx_code code = GET_CODE (operands[1]);
12328 rtx tmp, compare_op, second_test, bypass_test;
12329
12330 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12331 {
12332 enum machine_mode cmode;
12333
12334 /* Since we've no cmove for sse registers, don't force bad register
12335 allocation just to gain access to it. Deny movcc when the
12336 comparison mode doesn't match the move mode. */
12337 cmode = GET_MODE (ix86_compare_op0);
12338 if (cmode == VOIDmode)
12339 cmode = GET_MODE (ix86_compare_op1);
12340 if (cmode != mode)
12341 return 0;
12342
12343 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12344 &ix86_compare_op0,
12345 &ix86_compare_op1);
12346 if (code == UNKNOWN)
12347 return 0;
12348
12349 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12350 ix86_compare_op1, operands[2],
12351 operands[3]))
12352 return 1;
12353
12354 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12355 ix86_compare_op1, operands[2], operands[3]);
12356 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12357 return 1;
12358 }
12359
12360 /* The floating point conditional move instructions don't directly
12361 support conditions resulting from a signed integer comparison. */
12362
12363 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12364
12365 /* The floating point conditional move instructions don't directly
12366 support signed integer comparisons. */
12367
12368 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12369 {
12370 gcc_assert (!second_test && !bypass_test);
12371 tmp = gen_reg_rtx (QImode);
12372 ix86_expand_setcc (code, tmp);
12373 code = NE;
12374 ix86_compare_op0 = tmp;
12375 ix86_compare_op1 = const0_rtx;
12376 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12377 }
12378 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12379 {
12380 tmp = gen_reg_rtx (mode);
12381 emit_move_insn (tmp, operands[3]);
12382 operands[3] = tmp;
12383 }
12384 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12385 {
12386 tmp = gen_reg_rtx (mode);
12387 emit_move_insn (tmp, operands[2]);
12388 operands[2] = tmp;
12389 }
12390
12391 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12392 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12393 operands[2], operands[3])));
12394 if (bypass_test)
12395 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12396 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12397 operands[3], operands[0])));
12398 if (second_test)
12399 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12400 gen_rtx_IF_THEN_ELSE (mode, second_test,
12401 operands[2], operands[0])));
12402
12403 return 1;
12404 }
12405
12406 /* Expand a floating-point vector conditional move; a vcond operation
12407 rather than a movcc operation. */
12408
12409 bool
12410 ix86_expand_fp_vcond (rtx operands[])
12411 {
12412 enum rtx_code code = GET_CODE (operands[3]);
12413 rtx cmp;
12414
12415 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12416 &operands[4], &operands[5]);
12417 if (code == UNKNOWN)
12418 return false;
12419
12420 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12421 operands[5], operands[1], operands[2]))
12422 return true;
12423
12424 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12425 operands[1], operands[2]);
12426 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12427 return true;
12428 }
12429
12430 /* Expand a signed integral vector conditional move. */
12431
12432 bool
12433 ix86_expand_int_vcond (rtx operands[])
12434 {
12435 enum machine_mode mode = GET_MODE (operands[0]);
12436 enum rtx_code code = GET_CODE (operands[3]);
12437 bool negate = false;
12438 rtx x, cop0, cop1;
12439
12440 cop0 = operands[4];
12441 cop1 = operands[5];
12442
12443 /* Canonicalize the comparison to EQ, GT, GTU. */
12444 switch (code)
12445 {
12446 case EQ:
12447 case GT:
12448 case GTU:
12449 break;
12450
12451 case NE:
12452 case LE:
12453 case LEU:
12454 code = reverse_condition (code);
12455 negate = true;
12456 break;
12457
12458 case GE:
12459 case GEU:
12460 code = reverse_condition (code);
12461 negate = true;
12462 /* FALLTHRU */
12463
12464 case LT:
12465 case LTU:
12466 code = swap_condition (code);
12467 x = cop0, cop0 = cop1, cop1 = x;
12468 break;
12469
12470 default:
12471 gcc_unreachable ();
12472 }
12473
12474 /* Unsigned parallel compare is not supported by the hardware. Play some
12475 tricks to turn this into a signed comparison against 0. */
12476 if (code == GTU)
12477 {
12478 cop0 = force_reg (mode, cop0);
12479
12480 switch (mode)
12481 {
12482 case V4SImode:
12483 {
12484 rtx t1, t2, mask;
12485
12486 /* Perform a parallel modulo subtraction. */
12487 t1 = gen_reg_rtx (mode);
12488 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12489
12490 /* Extract the original sign bit of op0. */
12491 mask = GEN_INT (-0x80000000);
12492 mask = gen_rtx_CONST_VECTOR (mode,
12493 gen_rtvec (4, mask, mask, mask, mask));
12494 mask = force_reg (mode, mask);
12495 t2 = gen_reg_rtx (mode);
12496 emit_insn (gen_andv4si3 (t2, cop0, mask));
12497
12498 /* XOR it back into the result of the subtraction. This results
12499 in the sign bit set iff we saw unsigned underflow. */
12500 x = gen_reg_rtx (mode);
12501 emit_insn (gen_xorv4si3 (x, t1, t2));
12502
12503 code = GT;
12504 }
12505 break;
12506
12507 case V16QImode:
12508 case V8HImode:
12509 /* Perform a parallel unsigned saturating subtraction. */
12510 x = gen_reg_rtx (mode);
12511 emit_insn (gen_rtx_SET (VOIDmode, x,
12512 gen_rtx_US_MINUS (mode, cop0, cop1)));
12513
12514 code = EQ;
12515 negate = !negate;
12516 break;
12517
12518 default:
12519 gcc_unreachable ();
12520 }
12521
12522 cop0 = x;
12523 cop1 = CONST0_RTX (mode);
12524 }
12525
12526 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12527 operands[1+negate], operands[2-negate]);
12528
12529 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12530 operands[2-negate]);
12531 return true;
12532 }
12533
12534 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12535 true if we should do zero extension, else sign extension. HIGH_P is
12536 true if we want the N/2 high elements, else the low elements. */
12537
12538 void
12539 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12540 {
12541 enum machine_mode imode = GET_MODE (operands[1]);
12542 rtx (*unpack)(rtx, rtx, rtx);
12543 rtx se, dest;
12544
12545 switch (imode)
12546 {
12547 case V16QImode:
12548 if (high_p)
12549 unpack = gen_vec_interleave_highv16qi;
12550 else
12551 unpack = gen_vec_interleave_lowv16qi;
12552 break;
12553 case V8HImode:
12554 if (high_p)
12555 unpack = gen_vec_interleave_highv8hi;
12556 else
12557 unpack = gen_vec_interleave_lowv8hi;
12558 break;
12559 case V4SImode:
12560 if (high_p)
12561 unpack = gen_vec_interleave_highv4si;
12562 else
12563 unpack = gen_vec_interleave_lowv4si;
12564 break;
12565 default:
12566 gcc_unreachable ();
12567 }
12568
12569 dest = gen_lowpart (imode, operands[0]);
12570
12571 if (unsigned_p)
12572 se = force_reg (imode, CONST0_RTX (imode));
12573 else
12574 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12575 operands[1], pc_rtx, pc_rtx);
12576
12577 emit_insn (unpack (dest, operands[1], se));
12578 }
12579
12580 /* Expand conditional increment or decrement using adb/sbb instructions.
12581 The default case using setcc followed by the conditional move can be
12582 done by generic code. */
12583 int
12584 ix86_expand_int_addcc (rtx operands[])
12585 {
12586 enum rtx_code code = GET_CODE (operands[1]);
12587 rtx compare_op;
12588 rtx val = const0_rtx;
12589 bool fpcmp = false;
12590 enum machine_mode mode = GET_MODE (operands[0]);
12591
12592 if (operands[3] != const1_rtx
12593 && operands[3] != constm1_rtx)
12594 return 0;
12595 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12596 ix86_compare_op1, &compare_op))
12597 return 0;
12598 code = GET_CODE (compare_op);
12599
12600 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12601 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12602 {
12603 fpcmp = true;
12604 code = ix86_fp_compare_code_to_integer (code);
12605 }
12606
12607 if (code != LTU)
12608 {
12609 val = constm1_rtx;
12610 if (fpcmp)
12611 PUT_CODE (compare_op,
12612 reverse_condition_maybe_unordered
12613 (GET_CODE (compare_op)));
12614 else
12615 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12616 }
12617 PUT_MODE (compare_op, mode);
12618
12619 /* Construct either adc or sbb insn. */
12620 if ((code == LTU) == (operands[3] == constm1_rtx))
12621 {
12622 switch (GET_MODE (operands[0]))
12623 {
12624 case QImode:
12625 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12626 break;
12627 case HImode:
12628 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12629 break;
12630 case SImode:
12631 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12632 break;
12633 case DImode:
12634 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12635 break;
12636 default:
12637 gcc_unreachable ();
12638 }
12639 }
12640 else
12641 {
12642 switch (GET_MODE (operands[0]))
12643 {
12644 case QImode:
12645 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12646 break;
12647 case HImode:
12648 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12649 break;
12650 case SImode:
12651 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12652 break;
12653 case DImode:
12654 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12655 break;
12656 default:
12657 gcc_unreachable ();
12658 }
12659 }
12660 return 1; /* DONE */
12661 }
12662
12663
12664 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12665 works for floating pointer parameters and nonoffsetable memories.
12666 For pushes, it returns just stack offsets; the values will be saved
12667 in the right order. Maximally three parts are generated. */
12668
12669 static int
12670 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12671 {
12672 int size;
12673
12674 if (!TARGET_64BIT)
12675 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12676 else
12677 size = (GET_MODE_SIZE (mode) + 4) / 8;
12678
12679 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12680 gcc_assert (size >= 2 && size <= 3);
12681
12682 /* Optimize constant pool reference to immediates. This is used by fp
12683 moves, that force all constants to memory to allow combining. */
12684 if (MEM_P (operand) && MEM_READONLY_P (operand))
12685 {
12686 rtx tmp = maybe_get_pool_constant (operand);
12687 if (tmp)
12688 operand = tmp;
12689 }
12690
12691 if (MEM_P (operand) && !offsettable_memref_p (operand))
12692 {
12693 /* The only non-offsetable memories we handle are pushes. */
12694 int ok = push_operand (operand, VOIDmode);
12695
12696 gcc_assert (ok);
12697
12698 operand = copy_rtx (operand);
12699 PUT_MODE (operand, Pmode);
12700 parts[0] = parts[1] = parts[2] = operand;
12701 return size;
12702 }
12703
12704 if (GET_CODE (operand) == CONST_VECTOR)
12705 {
12706 enum machine_mode imode = int_mode_for_mode (mode);
12707 /* Caution: if we looked through a constant pool memory above,
12708 the operand may actually have a different mode now. That's
12709 ok, since we want to pun this all the way back to an integer. */
12710 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12711 gcc_assert (operand != NULL);
12712 mode = imode;
12713 }
12714
12715 if (!TARGET_64BIT)
12716 {
12717 if (mode == DImode)
12718 split_di (&operand, 1, &parts[0], &parts[1]);
12719 else
12720 {
12721 if (REG_P (operand))
12722 {
12723 gcc_assert (reload_completed);
12724 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12725 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12726 if (size == 3)
12727 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12728 }
12729 else if (offsettable_memref_p (operand))
12730 {
12731 operand = adjust_address (operand, SImode, 0);
12732 parts[0] = operand;
12733 parts[1] = adjust_address (operand, SImode, 4);
12734 if (size == 3)
12735 parts[2] = adjust_address (operand, SImode, 8);
12736 }
12737 else if (GET_CODE (operand) == CONST_DOUBLE)
12738 {
12739 REAL_VALUE_TYPE r;
12740 long l[4];
12741
12742 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12743 switch (mode)
12744 {
12745 case XFmode:
12746 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12747 parts[2] = gen_int_mode (l[2], SImode);
12748 break;
12749 case DFmode:
12750 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12751 break;
12752 default:
12753 gcc_unreachable ();
12754 }
12755 parts[1] = gen_int_mode (l[1], SImode);
12756 parts[0] = gen_int_mode (l[0], SImode);
12757 }
12758 else
12759 gcc_unreachable ();
12760 }
12761 }
12762 else
12763 {
12764 if (mode == TImode)
12765 split_ti (&operand, 1, &parts[0], &parts[1]);
12766 if (mode == XFmode || mode == TFmode)
12767 {
12768 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12769 if (REG_P (operand))
12770 {
12771 gcc_assert (reload_completed);
12772 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12773 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12774 }
12775 else if (offsettable_memref_p (operand))
12776 {
12777 operand = adjust_address (operand, DImode, 0);
12778 parts[0] = operand;
12779 parts[1] = adjust_address (operand, upper_mode, 8);
12780 }
12781 else if (GET_CODE (operand) == CONST_DOUBLE)
12782 {
12783 REAL_VALUE_TYPE r;
12784 long l[4];
12785
12786 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12787 real_to_target (l, &r, mode);
12788
12789 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12790 if (HOST_BITS_PER_WIDE_INT >= 64)
12791 parts[0]
12792 = gen_int_mode
12793 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12794 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12795 DImode);
12796 else
12797 parts[0] = immed_double_const (l[0], l[1], DImode);
12798
12799 if (upper_mode == SImode)
12800 parts[1] = gen_int_mode (l[2], SImode);
12801 else if (HOST_BITS_PER_WIDE_INT >= 64)
12802 parts[1]
12803 = gen_int_mode
12804 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12805 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12806 DImode);
12807 else
12808 parts[1] = immed_double_const (l[2], l[3], DImode);
12809 }
12810 else
12811 gcc_unreachable ();
12812 }
12813 }
12814
12815 return size;
12816 }
12817
12818 /* Emit insns to perform a move or push of DI, DF, and XF values.
12819 Return false when normal moves are needed; true when all required
12820 insns have been emitted. Operands 2-4 contain the input values
12821 int the correct order; operands 5-7 contain the output values. */
12822
12823 void
12824 ix86_split_long_move (rtx operands[])
12825 {
12826 rtx part[2][3];
12827 int nparts;
12828 int push = 0;
12829 int collisions = 0;
12830 enum machine_mode mode = GET_MODE (operands[0]);
12831
12832 /* The DFmode expanders may ask us to move double.
12833 For 64bit target this is single move. By hiding the fact
12834 here we simplify i386.md splitters. */
12835 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12836 {
12837 /* Optimize constant pool reference to immediates. This is used by
12838 fp moves, that force all constants to memory to allow combining. */
12839
12840 if (MEM_P (operands[1])
12841 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12842 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12843 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12844 if (push_operand (operands[0], VOIDmode))
12845 {
12846 operands[0] = copy_rtx (operands[0]);
12847 PUT_MODE (operands[0], Pmode);
12848 }
12849 else
12850 operands[0] = gen_lowpart (DImode, operands[0]);
12851 operands[1] = gen_lowpart (DImode, operands[1]);
12852 emit_move_insn (operands[0], operands[1]);
12853 return;
12854 }
12855
12856 /* The only non-offsettable memory we handle is push. */
12857 if (push_operand (operands[0], VOIDmode))
12858 push = 1;
12859 else
12860 gcc_assert (!MEM_P (operands[0])
12861 || offsettable_memref_p (operands[0]));
12862
12863 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12864 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12865
12866 /* When emitting push, take care for source operands on the stack. */
12867 if (push && MEM_P (operands[1])
12868 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12869 {
12870 if (nparts == 3)
12871 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12872 XEXP (part[1][2], 0));
12873 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12874 XEXP (part[1][1], 0));
12875 }
12876
12877 /* We need to do copy in the right order in case an address register
12878 of the source overlaps the destination. */
12879 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12880 {
12881 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12882 collisions++;
12883 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12884 collisions++;
12885 if (nparts == 3
12886 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12887 collisions++;
12888
12889 /* Collision in the middle part can be handled by reordering. */
12890 if (collisions == 1 && nparts == 3
12891 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12892 {
12893 rtx tmp;
12894 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12895 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12896 }
12897
12898 /* If there are more collisions, we can't handle it by reordering.
12899 Do an lea to the last part and use only one colliding move. */
12900 else if (collisions > 1)
12901 {
12902 rtx base;
12903
12904 collisions = 1;
12905
12906 base = part[0][nparts - 1];
12907
12908 /* Handle the case when the last part isn't valid for lea.
12909 Happens in 64-bit mode storing the 12-byte XFmode. */
12910 if (GET_MODE (base) != Pmode)
12911 base = gen_rtx_REG (Pmode, REGNO (base));
12912
12913 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12914 part[1][0] = replace_equiv_address (part[1][0], base);
12915 part[1][1] = replace_equiv_address (part[1][1],
12916 plus_constant (base, UNITS_PER_WORD));
12917 if (nparts == 3)
12918 part[1][2] = replace_equiv_address (part[1][2],
12919 plus_constant (base, 8));
12920 }
12921 }
12922
12923 if (push)
12924 {
12925 if (!TARGET_64BIT)
12926 {
12927 if (nparts == 3)
12928 {
12929 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12930 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12931 emit_move_insn (part[0][2], part[1][2]);
12932 }
12933 }
12934 else
12935 {
12936 /* In 64bit mode we don't have 32bit push available. In case this is
12937 register, it is OK - we will just use larger counterpart. We also
12938 retype memory - these comes from attempt to avoid REX prefix on
12939 moving of second half of TFmode value. */
12940 if (GET_MODE (part[1][1]) == SImode)
12941 {
12942 switch (GET_CODE (part[1][1]))
12943 {
12944 case MEM:
12945 part[1][1] = adjust_address (part[1][1], DImode, 0);
12946 break;
12947
12948 case REG:
12949 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12950 break;
12951
12952 default:
12953 gcc_unreachable ();
12954 }
12955
12956 if (GET_MODE (part[1][0]) == SImode)
12957 part[1][0] = part[1][1];
12958 }
12959 }
12960 emit_move_insn (part[0][1], part[1][1]);
12961 emit_move_insn (part[0][0], part[1][0]);
12962 return;
12963 }
12964
12965 /* Choose correct order to not overwrite the source before it is copied. */
12966 if ((REG_P (part[0][0])
12967 && REG_P (part[1][1])
12968 && (REGNO (part[0][0]) == REGNO (part[1][1])
12969 || (nparts == 3
12970 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12971 || (collisions > 0
12972 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12973 {
12974 if (nparts == 3)
12975 {
12976 operands[2] = part[0][2];
12977 operands[3] = part[0][1];
12978 operands[4] = part[0][0];
12979 operands[5] = part[1][2];
12980 operands[6] = part[1][1];
12981 operands[7] = part[1][0];
12982 }
12983 else
12984 {
12985 operands[2] = part[0][1];
12986 operands[3] = part[0][0];
12987 operands[5] = part[1][1];
12988 operands[6] = part[1][0];
12989 }
12990 }
12991 else
12992 {
12993 if (nparts == 3)
12994 {
12995 operands[2] = part[0][0];
12996 operands[3] = part[0][1];
12997 operands[4] = part[0][2];
12998 operands[5] = part[1][0];
12999 operands[6] = part[1][1];
13000 operands[7] = part[1][2];
13001 }
13002 else
13003 {
13004 operands[2] = part[0][0];
13005 operands[3] = part[0][1];
13006 operands[5] = part[1][0];
13007 operands[6] = part[1][1];
13008 }
13009 }
13010
13011 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13012 if (optimize_size)
13013 {
13014 if (CONST_INT_P (operands[5])
13015 && operands[5] != const0_rtx
13016 && REG_P (operands[2]))
13017 {
13018 if (CONST_INT_P (operands[6])
13019 && INTVAL (operands[6]) == INTVAL (operands[5]))
13020 operands[6] = operands[2];
13021
13022 if (nparts == 3
13023 && CONST_INT_P (operands[7])
13024 && INTVAL (operands[7]) == INTVAL (operands[5]))
13025 operands[7] = operands[2];
13026 }
13027
13028 if (nparts == 3
13029 && CONST_INT_P (operands[6])
13030 && operands[6] != const0_rtx
13031 && REG_P (operands[3])
13032 && CONST_INT_P (operands[7])
13033 && INTVAL (operands[7]) == INTVAL (operands[6]))
13034 operands[7] = operands[3];
13035 }
13036
13037 emit_move_insn (operands[2], operands[5]);
13038 emit_move_insn (operands[3], operands[6]);
13039 if (nparts == 3)
13040 emit_move_insn (operands[4], operands[7]);
13041
13042 return;
13043 }
13044
13045 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13046 left shift by a constant, either using a single shift or
13047 a sequence of add instructions. */
13048
13049 static void
13050 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13051 {
13052 if (count == 1)
13053 {
13054 emit_insn ((mode == DImode
13055 ? gen_addsi3
13056 : gen_adddi3) (operand, operand, operand));
13057 }
13058 else if (!optimize_size
13059 && count * ix86_cost->add <= ix86_cost->shift_const)
13060 {
13061 int i;
13062 for (i=0; i<count; i++)
13063 {
13064 emit_insn ((mode == DImode
13065 ? gen_addsi3
13066 : gen_adddi3) (operand, operand, operand));
13067 }
13068 }
13069 else
13070 emit_insn ((mode == DImode
13071 ? gen_ashlsi3
13072 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13073 }
13074
13075 void
13076 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13077 {
13078 rtx low[2], high[2];
13079 int count;
13080 const int single_width = mode == DImode ? 32 : 64;
13081
13082 if (CONST_INT_P (operands[2]))
13083 {
13084 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13085 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13086
13087 if (count >= single_width)
13088 {
13089 emit_move_insn (high[0], low[1]);
13090 emit_move_insn (low[0], const0_rtx);
13091
13092 if (count > single_width)
13093 ix86_expand_ashl_const (high[0], count - single_width, mode);
13094 }
13095 else
13096 {
13097 if (!rtx_equal_p (operands[0], operands[1]))
13098 emit_move_insn (operands[0], operands[1]);
13099 emit_insn ((mode == DImode
13100 ? gen_x86_shld_1
13101 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13102 ix86_expand_ashl_const (low[0], count, mode);
13103 }
13104 return;
13105 }
13106
13107 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13108
13109 if (operands[1] == const1_rtx)
13110 {
13111 /* Assuming we've chosen a QImode capable registers, then 1 << N
13112 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13113 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13114 {
13115 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13116
13117 ix86_expand_clear (low[0]);
13118 ix86_expand_clear (high[0]);
13119 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13120
13121 d = gen_lowpart (QImode, low[0]);
13122 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13123 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13124 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13125
13126 d = gen_lowpart (QImode, high[0]);
13127 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13128 s = gen_rtx_NE (QImode, flags, const0_rtx);
13129 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13130 }
13131
13132 /* Otherwise, we can get the same results by manually performing
13133 a bit extract operation on bit 5/6, and then performing the two
13134 shifts. The two methods of getting 0/1 into low/high are exactly
13135 the same size. Avoiding the shift in the bit extract case helps
13136 pentium4 a bit; no one else seems to care much either way. */
13137 else
13138 {
13139 rtx x;
13140
13141 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13142 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13143 else
13144 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13145 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13146
13147 emit_insn ((mode == DImode
13148 ? gen_lshrsi3
13149 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13150 emit_insn ((mode == DImode
13151 ? gen_andsi3
13152 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13153 emit_move_insn (low[0], high[0]);
13154 emit_insn ((mode == DImode
13155 ? gen_xorsi3
13156 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13157 }
13158
13159 emit_insn ((mode == DImode
13160 ? gen_ashlsi3
13161 : gen_ashldi3) (low[0], low[0], operands[2]));
13162 emit_insn ((mode == DImode
13163 ? gen_ashlsi3
13164 : gen_ashldi3) (high[0], high[0], operands[2]));
13165 return;
13166 }
13167
13168 if (operands[1] == constm1_rtx)
13169 {
13170 /* For -1 << N, we can avoid the shld instruction, because we
13171 know that we're shifting 0...31/63 ones into a -1. */
13172 emit_move_insn (low[0], constm1_rtx);
13173 if (optimize_size)
13174 emit_move_insn (high[0], low[0]);
13175 else
13176 emit_move_insn (high[0], constm1_rtx);
13177 }
13178 else
13179 {
13180 if (!rtx_equal_p (operands[0], operands[1]))
13181 emit_move_insn (operands[0], operands[1]);
13182
13183 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13184 emit_insn ((mode == DImode
13185 ? gen_x86_shld_1
13186 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13187 }
13188
13189 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13190
13191 if (TARGET_CMOVE && scratch)
13192 {
13193 ix86_expand_clear (scratch);
13194 emit_insn ((mode == DImode
13195 ? gen_x86_shift_adj_1
13196 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13197 }
13198 else
13199 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13200 }
13201
13202 void
13203 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13204 {
13205 rtx low[2], high[2];
13206 int count;
13207 const int single_width = mode == DImode ? 32 : 64;
13208
13209 if (CONST_INT_P (operands[2]))
13210 {
13211 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13212 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13213
13214 if (count == single_width * 2 - 1)
13215 {
13216 emit_move_insn (high[0], high[1]);
13217 emit_insn ((mode == DImode
13218 ? gen_ashrsi3
13219 : gen_ashrdi3) (high[0], high[0],
13220 GEN_INT (single_width - 1)));
13221 emit_move_insn (low[0], high[0]);
13222
13223 }
13224 else if (count >= single_width)
13225 {
13226 emit_move_insn (low[0], high[1]);
13227 emit_move_insn (high[0], low[0]);
13228 emit_insn ((mode == DImode
13229 ? gen_ashrsi3
13230 : gen_ashrdi3) (high[0], high[0],
13231 GEN_INT (single_width - 1)));
13232 if (count > single_width)
13233 emit_insn ((mode == DImode
13234 ? gen_ashrsi3
13235 : gen_ashrdi3) (low[0], low[0],
13236 GEN_INT (count - single_width)));
13237 }
13238 else
13239 {
13240 if (!rtx_equal_p (operands[0], operands[1]))
13241 emit_move_insn (operands[0], operands[1]);
13242 emit_insn ((mode == DImode
13243 ? gen_x86_shrd_1
13244 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13245 emit_insn ((mode == DImode
13246 ? gen_ashrsi3
13247 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13248 }
13249 }
13250 else
13251 {
13252 if (!rtx_equal_p (operands[0], operands[1]))
13253 emit_move_insn (operands[0], operands[1]);
13254
13255 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13256
13257 emit_insn ((mode == DImode
13258 ? gen_x86_shrd_1
13259 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13260 emit_insn ((mode == DImode
13261 ? gen_ashrsi3
13262 : gen_ashrdi3) (high[0], high[0], operands[2]));
13263
13264 if (TARGET_CMOVE && scratch)
13265 {
13266 emit_move_insn (scratch, high[0]);
13267 emit_insn ((mode == DImode
13268 ? gen_ashrsi3
13269 : gen_ashrdi3) (scratch, scratch,
13270 GEN_INT (single_width - 1)));
13271 emit_insn ((mode == DImode
13272 ? gen_x86_shift_adj_1
13273 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13274 scratch));
13275 }
13276 else
13277 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13278 }
13279 }
13280
13281 void
13282 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13283 {
13284 rtx low[2], high[2];
13285 int count;
13286 const int single_width = mode == DImode ? 32 : 64;
13287
13288 if (CONST_INT_P (operands[2]))
13289 {
13290 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13291 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13292
13293 if (count >= single_width)
13294 {
13295 emit_move_insn (low[0], high[1]);
13296 ix86_expand_clear (high[0]);
13297
13298 if (count > single_width)
13299 emit_insn ((mode == DImode
13300 ? gen_lshrsi3
13301 : gen_lshrdi3) (low[0], low[0],
13302 GEN_INT (count - single_width)));
13303 }
13304 else
13305 {
13306 if (!rtx_equal_p (operands[0], operands[1]))
13307 emit_move_insn (operands[0], operands[1]);
13308 emit_insn ((mode == DImode
13309 ? gen_x86_shrd_1
13310 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13311 emit_insn ((mode == DImode
13312 ? gen_lshrsi3
13313 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13314 }
13315 }
13316 else
13317 {
13318 if (!rtx_equal_p (operands[0], operands[1]))
13319 emit_move_insn (operands[0], operands[1]);
13320
13321 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13322
13323 emit_insn ((mode == DImode
13324 ? gen_x86_shrd_1
13325 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13326 emit_insn ((mode == DImode
13327 ? gen_lshrsi3
13328 : gen_lshrdi3) (high[0], high[0], operands[2]));
13329
13330 /* Heh. By reversing the arguments, we can reuse this pattern. */
13331 if (TARGET_CMOVE && scratch)
13332 {
13333 ix86_expand_clear (scratch);
13334 emit_insn ((mode == DImode
13335 ? gen_x86_shift_adj_1
13336 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13337 scratch));
13338 }
13339 else
13340 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13341 }
13342 }
13343
13344 /* Predict just emitted jump instruction to be taken with probability PROB. */
13345 static void
13346 predict_jump (int prob)
13347 {
13348 rtx insn = get_last_insn ();
13349 gcc_assert (JUMP_P (insn));
13350 REG_NOTES (insn)
13351 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13352 GEN_INT (prob),
13353 REG_NOTES (insn));
13354 }
13355
13356 /* Helper function for the string operations below. Dest VARIABLE whether
13357 it is aligned to VALUE bytes. If true, jump to the label. */
13358 static rtx
13359 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13360 {
13361 rtx label = gen_label_rtx ();
13362 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13363 if (GET_MODE (variable) == DImode)
13364 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13365 else
13366 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13367 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13368 1, label);
13369 if (epilogue)
13370 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13371 else
13372 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13373 return label;
13374 }
13375
13376 /* Adjust COUNTER by the VALUE. */
13377 static void
13378 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13379 {
13380 if (GET_MODE (countreg) == DImode)
13381 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13382 else
13383 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13384 }
13385
13386 /* Zero extend possibly SImode EXP to Pmode register. */
13387 rtx
13388 ix86_zero_extend_to_Pmode (rtx exp)
13389 {
13390 rtx r;
13391 if (GET_MODE (exp) == VOIDmode)
13392 return force_reg (Pmode, exp);
13393 if (GET_MODE (exp) == Pmode)
13394 return copy_to_mode_reg (Pmode, exp);
13395 r = gen_reg_rtx (Pmode);
13396 emit_insn (gen_zero_extendsidi2 (r, exp));
13397 return r;
13398 }
13399
13400 /* Divide COUNTREG by SCALE. */
13401 static rtx
13402 scale_counter (rtx countreg, int scale)
13403 {
13404 rtx sc;
13405 rtx piece_size_mask;
13406
13407 if (scale == 1)
13408 return countreg;
13409 if (CONST_INT_P (countreg))
13410 return GEN_INT (INTVAL (countreg) / scale);
13411 gcc_assert (REG_P (countreg));
13412
13413 piece_size_mask = GEN_INT (scale - 1);
13414 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13415 GEN_INT (exact_log2 (scale)),
13416 NULL, 1, OPTAB_DIRECT);
13417 return sc;
13418 }
13419
13420 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13421 for constant loop counts. */
13422
13423 static enum machine_mode
13424 counter_mode (rtx count_exp)
13425 {
13426 if (GET_MODE (count_exp) != VOIDmode)
13427 return GET_MODE (count_exp);
13428 if (GET_CODE (count_exp) != CONST_INT)
13429 return Pmode;
13430 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13431 return DImode;
13432 return SImode;
13433 }
13434
13435 /* When SRCPTR is non-NULL, output simple loop to move memory
13436 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13437 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13438 equivalent loop to set memory by VALUE (supposed to be in MODE).
13439
13440 The size is rounded down to whole number of chunk size moved at once.
13441 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13442
13443
13444 static void
13445 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13446 rtx destptr, rtx srcptr, rtx value,
13447 rtx count, enum machine_mode mode, int unroll,
13448 int expected_size)
13449 {
13450 rtx out_label, top_label, iter, tmp;
13451 enum machine_mode iter_mode = counter_mode (count);
13452 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13453 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13454 rtx size;
13455 rtx x_addr;
13456 rtx y_addr;
13457 int i;
13458
13459 top_label = gen_label_rtx ();
13460 out_label = gen_label_rtx ();
13461 iter = gen_reg_rtx (iter_mode);
13462
13463 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13464 NULL, 1, OPTAB_DIRECT);
13465 /* Those two should combine. */
13466 if (piece_size == const1_rtx)
13467 {
13468 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13469 true, out_label);
13470 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13471 }
13472 emit_move_insn (iter, const0_rtx);
13473
13474 emit_label (top_label);
13475
13476 tmp = convert_modes (Pmode, iter_mode, iter, true);
13477 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13478 destmem = change_address (destmem, mode, x_addr);
13479
13480 if (srcmem)
13481 {
13482 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13483 srcmem = change_address (srcmem, mode, y_addr);
13484
13485 /* When unrolling for chips that reorder memory reads and writes,
13486 we can save registers by using single temporary.
13487 Also using 4 temporaries is overkill in 32bit mode. */
13488 if (!TARGET_64BIT && 0)
13489 {
13490 for (i = 0; i < unroll; i++)
13491 {
13492 if (i)
13493 {
13494 destmem =
13495 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13496 srcmem =
13497 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13498 }
13499 emit_move_insn (destmem, srcmem);
13500 }
13501 }
13502 else
13503 {
13504 rtx tmpreg[4];
13505 gcc_assert (unroll <= 4);
13506 for (i = 0; i < unroll; i++)
13507 {
13508 tmpreg[i] = gen_reg_rtx (mode);
13509 if (i)
13510 {
13511 srcmem =
13512 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13513 }
13514 emit_move_insn (tmpreg[i], srcmem);
13515 }
13516 for (i = 0; i < unroll; i++)
13517 {
13518 if (i)
13519 {
13520 destmem =
13521 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13522 }
13523 emit_move_insn (destmem, tmpreg[i]);
13524 }
13525 }
13526 }
13527 else
13528 for (i = 0; i < unroll; i++)
13529 {
13530 if (i)
13531 destmem =
13532 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13533 emit_move_insn (destmem, value);
13534 }
13535
13536 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13537 true, OPTAB_LIB_WIDEN);
13538 if (tmp != iter)
13539 emit_move_insn (iter, tmp);
13540
13541 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13542 true, top_label);
13543 if (expected_size != -1)
13544 {
13545 expected_size /= GET_MODE_SIZE (mode) * unroll;
13546 if (expected_size == 0)
13547 predict_jump (0);
13548 else if (expected_size > REG_BR_PROB_BASE)
13549 predict_jump (REG_BR_PROB_BASE - 1);
13550 else
13551 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13552 }
13553 else
13554 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13555 iter = ix86_zero_extend_to_Pmode (iter);
13556 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13557 true, OPTAB_LIB_WIDEN);
13558 if (tmp != destptr)
13559 emit_move_insn (destptr, tmp);
13560 if (srcptr)
13561 {
13562 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13563 true, OPTAB_LIB_WIDEN);
13564 if (tmp != srcptr)
13565 emit_move_insn (srcptr, tmp);
13566 }
13567 emit_label (out_label);
13568 }
13569
13570 /* Output "rep; mov" instruction.
13571 Arguments have same meaning as for previous function */
13572 static void
13573 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13574 rtx destptr, rtx srcptr,
13575 rtx count,
13576 enum machine_mode mode)
13577 {
13578 rtx destexp;
13579 rtx srcexp;
13580 rtx countreg;
13581
13582 /* If the size is known, it is shorter to use rep movs. */
13583 if (mode == QImode && CONST_INT_P (count)
13584 && !(INTVAL (count) & 3))
13585 mode = SImode;
13586
13587 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13588 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13589 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13590 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13591 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13592 if (mode != QImode)
13593 {
13594 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13595 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13596 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13597 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13598 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13599 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13600 }
13601 else
13602 {
13603 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13604 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13605 }
13606 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13607 destexp, srcexp));
13608 }
13609
13610 /* Output "rep; stos" instruction.
13611 Arguments have same meaning as for previous function */
13612 static void
13613 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13614 rtx count,
13615 enum machine_mode mode)
13616 {
13617 rtx destexp;
13618 rtx countreg;
13619
13620 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13621 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13622 value = force_reg (mode, gen_lowpart (mode, value));
13623 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13624 if (mode != QImode)
13625 {
13626 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13627 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13628 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13629 }
13630 else
13631 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13632 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13633 }
13634
13635 static void
13636 emit_strmov (rtx destmem, rtx srcmem,
13637 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13638 {
13639 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13640 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13641 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13642 }
13643
13644 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13645 static void
13646 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13647 rtx destptr, rtx srcptr, rtx count, int max_size)
13648 {
13649 rtx src, dest;
13650 if (CONST_INT_P (count))
13651 {
13652 HOST_WIDE_INT countval = INTVAL (count);
13653 int offset = 0;
13654
13655 if ((countval & 0x10) && max_size > 16)
13656 {
13657 if (TARGET_64BIT)
13658 {
13659 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13660 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13661 }
13662 else
13663 gcc_unreachable ();
13664 offset += 16;
13665 }
13666 if ((countval & 0x08) && max_size > 8)
13667 {
13668 if (TARGET_64BIT)
13669 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13670 else
13671 {
13672 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13673 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13674 }
13675 offset += 8;
13676 }
13677 if ((countval & 0x04) && max_size > 4)
13678 {
13679 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13680 offset += 4;
13681 }
13682 if ((countval & 0x02) && max_size > 2)
13683 {
13684 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13685 offset += 2;
13686 }
13687 if ((countval & 0x01) && max_size > 1)
13688 {
13689 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13690 offset += 1;
13691 }
13692 return;
13693 }
13694 if (max_size > 8)
13695 {
13696 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13697 count, 1, OPTAB_DIRECT);
13698 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13699 count, QImode, 1, 4);
13700 return;
13701 }
13702
13703 /* When there are stringops, we can cheaply increase dest and src pointers.
13704 Otherwise we save code size by maintaining offset (zero is readily
13705 available from preceding rep operation) and using x86 addressing modes.
13706 */
13707 if (TARGET_SINGLE_STRINGOP)
13708 {
13709 if (max_size > 4)
13710 {
13711 rtx label = ix86_expand_aligntest (count, 4, true);
13712 src = change_address (srcmem, SImode, srcptr);
13713 dest = change_address (destmem, SImode, destptr);
13714 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13715 emit_label (label);
13716 LABEL_NUSES (label) = 1;
13717 }
13718 if (max_size > 2)
13719 {
13720 rtx label = ix86_expand_aligntest (count, 2, true);
13721 src = change_address (srcmem, HImode, srcptr);
13722 dest = change_address (destmem, HImode, destptr);
13723 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13724 emit_label (label);
13725 LABEL_NUSES (label) = 1;
13726 }
13727 if (max_size > 1)
13728 {
13729 rtx label = ix86_expand_aligntest (count, 1, true);
13730 src = change_address (srcmem, QImode, srcptr);
13731 dest = change_address (destmem, QImode, destptr);
13732 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13733 emit_label (label);
13734 LABEL_NUSES (label) = 1;
13735 }
13736 }
13737 else
13738 {
13739 rtx offset = force_reg (Pmode, const0_rtx);
13740 rtx tmp;
13741
13742 if (max_size > 4)
13743 {
13744 rtx label = ix86_expand_aligntest (count, 4, true);
13745 src = change_address (srcmem, SImode, srcptr);
13746 dest = change_address (destmem, SImode, destptr);
13747 emit_move_insn (dest, src);
13748 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13749 true, OPTAB_LIB_WIDEN);
13750 if (tmp != offset)
13751 emit_move_insn (offset, tmp);
13752 emit_label (label);
13753 LABEL_NUSES (label) = 1;
13754 }
13755 if (max_size > 2)
13756 {
13757 rtx label = ix86_expand_aligntest (count, 2, true);
13758 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13759 src = change_address (srcmem, HImode, tmp);
13760 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13761 dest = change_address (destmem, HImode, tmp);
13762 emit_move_insn (dest, src);
13763 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13764 true, OPTAB_LIB_WIDEN);
13765 if (tmp != offset)
13766 emit_move_insn (offset, tmp);
13767 emit_label (label);
13768 LABEL_NUSES (label) = 1;
13769 }
13770 if (max_size > 1)
13771 {
13772 rtx label = ix86_expand_aligntest (count, 1, true);
13773 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13774 src = change_address (srcmem, QImode, tmp);
13775 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13776 dest = change_address (destmem, QImode, tmp);
13777 emit_move_insn (dest, src);
13778 emit_label (label);
13779 LABEL_NUSES (label) = 1;
13780 }
13781 }
13782 }
13783
13784 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13785 static void
13786 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13787 rtx count, int max_size)
13788 {
13789 count =
13790 expand_simple_binop (counter_mode (count), AND, count,
13791 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13792 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13793 gen_lowpart (QImode, value), count, QImode,
13794 1, max_size / 2);
13795 }
13796
13797 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13798 static void
13799 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13800 {
13801 rtx dest;
13802
13803 if (CONST_INT_P (count))
13804 {
13805 HOST_WIDE_INT countval = INTVAL (count);
13806 int offset = 0;
13807
13808 if ((countval & 0x10) && max_size > 16)
13809 {
13810 if (TARGET_64BIT)
13811 {
13812 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13813 emit_insn (gen_strset (destptr, dest, value));
13814 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13815 emit_insn (gen_strset (destptr, dest, value));
13816 }
13817 else
13818 gcc_unreachable ();
13819 offset += 16;
13820 }
13821 if ((countval & 0x08) && max_size > 8)
13822 {
13823 if (TARGET_64BIT)
13824 {
13825 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13826 emit_insn (gen_strset (destptr, dest, value));
13827 }
13828 else
13829 {
13830 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13831 emit_insn (gen_strset (destptr, dest, value));
13832 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13833 emit_insn (gen_strset (destptr, dest, value));
13834 }
13835 offset += 8;
13836 }
13837 if ((countval & 0x04) && max_size > 4)
13838 {
13839 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13840 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13841 offset += 4;
13842 }
13843 if ((countval & 0x02) && max_size > 2)
13844 {
13845 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13846 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13847 offset += 2;
13848 }
13849 if ((countval & 0x01) && max_size > 1)
13850 {
13851 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13852 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13853 offset += 1;
13854 }
13855 return;
13856 }
13857 if (max_size > 32)
13858 {
13859 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13860 return;
13861 }
13862 if (max_size > 16)
13863 {
13864 rtx label = ix86_expand_aligntest (count, 16, true);
13865 if (TARGET_64BIT)
13866 {
13867 dest = change_address (destmem, DImode, destptr);
13868 emit_insn (gen_strset (destptr, dest, value));
13869 emit_insn (gen_strset (destptr, dest, value));
13870 }
13871 else
13872 {
13873 dest = change_address (destmem, SImode, destptr);
13874 emit_insn (gen_strset (destptr, dest, value));
13875 emit_insn (gen_strset (destptr, dest, value));
13876 emit_insn (gen_strset (destptr, dest, value));
13877 emit_insn (gen_strset (destptr, dest, value));
13878 }
13879 emit_label (label);
13880 LABEL_NUSES (label) = 1;
13881 }
13882 if (max_size > 8)
13883 {
13884 rtx label = ix86_expand_aligntest (count, 8, true);
13885 if (TARGET_64BIT)
13886 {
13887 dest = change_address (destmem, DImode, destptr);
13888 emit_insn (gen_strset (destptr, dest, value));
13889 }
13890 else
13891 {
13892 dest = change_address (destmem, SImode, destptr);
13893 emit_insn (gen_strset (destptr, dest, value));
13894 emit_insn (gen_strset (destptr, dest, value));
13895 }
13896 emit_label (label);
13897 LABEL_NUSES (label) = 1;
13898 }
13899 if (max_size > 4)
13900 {
13901 rtx label = ix86_expand_aligntest (count, 4, true);
13902 dest = change_address (destmem, SImode, destptr);
13903 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13904 emit_label (label);
13905 LABEL_NUSES (label) = 1;
13906 }
13907 if (max_size > 2)
13908 {
13909 rtx label = ix86_expand_aligntest (count, 2, true);
13910 dest = change_address (destmem, HImode, destptr);
13911 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13912 emit_label (label);
13913 LABEL_NUSES (label) = 1;
13914 }
13915 if (max_size > 1)
13916 {
13917 rtx label = ix86_expand_aligntest (count, 1, true);
13918 dest = change_address (destmem, QImode, destptr);
13919 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13920 emit_label (label);
13921 LABEL_NUSES (label) = 1;
13922 }
13923 }
13924
13925 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13926 DESIRED_ALIGNMENT. */
13927 static void
13928 expand_movmem_prologue (rtx destmem, rtx srcmem,
13929 rtx destptr, rtx srcptr, rtx count,
13930 int align, int desired_alignment)
13931 {
13932 if (align <= 1 && desired_alignment > 1)
13933 {
13934 rtx label = ix86_expand_aligntest (destptr, 1, false);
13935 srcmem = change_address (srcmem, QImode, srcptr);
13936 destmem = change_address (destmem, QImode, destptr);
13937 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13938 ix86_adjust_counter (count, 1);
13939 emit_label (label);
13940 LABEL_NUSES (label) = 1;
13941 }
13942 if (align <= 2 && desired_alignment > 2)
13943 {
13944 rtx label = ix86_expand_aligntest (destptr, 2, false);
13945 srcmem = change_address (srcmem, HImode, srcptr);
13946 destmem = change_address (destmem, HImode, destptr);
13947 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13948 ix86_adjust_counter (count, 2);
13949 emit_label (label);
13950 LABEL_NUSES (label) = 1;
13951 }
13952 if (align <= 4 && desired_alignment > 4)
13953 {
13954 rtx label = ix86_expand_aligntest (destptr, 4, false);
13955 srcmem = change_address (srcmem, SImode, srcptr);
13956 destmem = change_address (destmem, SImode, destptr);
13957 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13958 ix86_adjust_counter (count, 4);
13959 emit_label (label);
13960 LABEL_NUSES (label) = 1;
13961 }
13962 gcc_assert (desired_alignment <= 8);
13963 }
13964
13965 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13966 DESIRED_ALIGNMENT. */
13967 static void
13968 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13969 int align, int desired_alignment)
13970 {
13971 if (align <= 1 && desired_alignment > 1)
13972 {
13973 rtx label = ix86_expand_aligntest (destptr, 1, false);
13974 destmem = change_address (destmem, QImode, destptr);
13975 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13976 ix86_adjust_counter (count, 1);
13977 emit_label (label);
13978 LABEL_NUSES (label) = 1;
13979 }
13980 if (align <= 2 && desired_alignment > 2)
13981 {
13982 rtx label = ix86_expand_aligntest (destptr, 2, false);
13983 destmem = change_address (destmem, HImode, destptr);
13984 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13985 ix86_adjust_counter (count, 2);
13986 emit_label (label);
13987 LABEL_NUSES (label) = 1;
13988 }
13989 if (align <= 4 && desired_alignment > 4)
13990 {
13991 rtx label = ix86_expand_aligntest (destptr, 4, false);
13992 destmem = change_address (destmem, SImode, destptr);
13993 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13994 ix86_adjust_counter (count, 4);
13995 emit_label (label);
13996 LABEL_NUSES (label) = 1;
13997 }
13998 gcc_assert (desired_alignment <= 8);
13999 }
14000
14001 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14002 static enum stringop_alg
14003 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14004 int *dynamic_check)
14005 {
14006 const struct stringop_algs * algs;
14007
14008 *dynamic_check = -1;
14009 if (memset)
14010 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14011 else
14012 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14013 if (stringop_alg != no_stringop)
14014 return stringop_alg;
14015 /* rep; movq or rep; movl is the smallest variant. */
14016 else if (optimize_size)
14017 {
14018 if (!count || (count & 3))
14019 return rep_prefix_1_byte;
14020 else
14021 return rep_prefix_4_byte;
14022 }
14023 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14024 */
14025 else if (expected_size != -1 && expected_size < 4)
14026 return loop_1_byte;
14027 else if (expected_size != -1)
14028 {
14029 unsigned int i;
14030 enum stringop_alg alg = libcall;
14031 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14032 {
14033 gcc_assert (algs->size[i].max);
14034 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14035 {
14036 if (algs->size[i].alg != libcall)
14037 alg = algs->size[i].alg;
14038 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14039 last non-libcall inline algorithm. */
14040 if (TARGET_INLINE_ALL_STRINGOPS)
14041 {
14042 /* When the current size is best to be copied by a libcall,
14043 but we are still forced to inline, run the heuristic bellow
14044 that will pick code for medium sized blocks. */
14045 if (alg != libcall)
14046 return alg;
14047 break;
14048 }
14049 else
14050 return algs->size[i].alg;
14051 }
14052 }
14053 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14054 }
14055 /* When asked to inline the call anyway, try to pick meaningful choice.
14056 We look for maximal size of block that is faster to copy by hand and
14057 take blocks of at most of that size guessing that average size will
14058 be roughly half of the block.
14059
14060 If this turns out to be bad, we might simply specify the preferred
14061 choice in ix86_costs. */
14062 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14063 && algs->unknown_size == libcall)
14064 {
14065 int max = -1;
14066 enum stringop_alg alg;
14067 int i;
14068
14069 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14070 if (algs->size[i].alg != libcall && algs->size[i].alg)
14071 max = algs->size[i].max;
14072 if (max == -1)
14073 max = 4096;
14074 alg = decide_alg (count, max / 2, memset, dynamic_check);
14075 gcc_assert (*dynamic_check == -1);
14076 gcc_assert (alg != libcall);
14077 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14078 *dynamic_check = max;
14079 return alg;
14080 }
14081 return algs->unknown_size;
14082 }
14083
14084 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14085 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14086 static int
14087 decide_alignment (int align,
14088 enum stringop_alg alg,
14089 int expected_size)
14090 {
14091 int desired_align = 0;
14092 switch (alg)
14093 {
14094 case no_stringop:
14095 gcc_unreachable ();
14096 case loop:
14097 case unrolled_loop:
14098 desired_align = GET_MODE_SIZE (Pmode);
14099 break;
14100 case rep_prefix_8_byte:
14101 desired_align = 8;
14102 break;
14103 case rep_prefix_4_byte:
14104 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14105 copying whole cacheline at once. */
14106 if (TARGET_PENTIUMPRO)
14107 desired_align = 8;
14108 else
14109 desired_align = 4;
14110 break;
14111 case rep_prefix_1_byte:
14112 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14113 copying whole cacheline at once. */
14114 if (TARGET_PENTIUMPRO)
14115 desired_align = 8;
14116 else
14117 desired_align = 1;
14118 break;
14119 case loop_1_byte:
14120 desired_align = 1;
14121 break;
14122 case libcall:
14123 return 0;
14124 }
14125
14126 if (optimize_size)
14127 desired_align = 1;
14128 if (desired_align < align)
14129 desired_align = align;
14130 if (expected_size != -1 && expected_size < 4)
14131 desired_align = align;
14132 return desired_align;
14133 }
14134
14135 /* Return the smallest power of 2 greater than VAL. */
14136 static int
14137 smallest_pow2_greater_than (int val)
14138 {
14139 int ret = 1;
14140 while (ret <= val)
14141 ret <<= 1;
14142 return ret;
14143 }
14144
14145 /* Expand string move (memcpy) operation. Use i386 string operations when
14146 profitable. expand_clrmem contains similar code. The code depends upon
14147 architecture, block size and alignment, but always has the same
14148 overall structure:
14149
14150 1) Prologue guard: Conditional that jumps up to epilogues for small
14151 blocks that can be handled by epilogue alone. This is faster but
14152 also needed for correctness, since prologue assume the block is larger
14153 than the desired alignment.
14154
14155 Optional dynamic check for size and libcall for large
14156 blocks is emitted here too, with -minline-stringops-dynamically.
14157
14158 2) Prologue: copy first few bytes in order to get destination aligned
14159 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14160 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14161 We emit either a jump tree on power of two sized blocks, or a byte loop.
14162
14163 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14164 with specified algorithm.
14165
14166 4) Epilogue: code copying tail of the block that is too small to be
14167 handled by main body (or up to size guarded by prologue guard). */
14168
14169 int
14170 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14171 rtx expected_align_exp, rtx expected_size_exp)
14172 {
14173 rtx destreg;
14174 rtx srcreg;
14175 rtx label = NULL;
14176 rtx tmp;
14177 rtx jump_around_label = NULL;
14178 HOST_WIDE_INT align = 1;
14179 unsigned HOST_WIDE_INT count = 0;
14180 HOST_WIDE_INT expected_size = -1;
14181 int size_needed = 0, epilogue_size_needed;
14182 int desired_align = 0;
14183 enum stringop_alg alg;
14184 int dynamic_check;
14185
14186 if (CONST_INT_P (align_exp))
14187 align = INTVAL (align_exp);
14188 /* i386 can do misaligned access on reasonably increased cost. */
14189 if (CONST_INT_P (expected_align_exp)
14190 && INTVAL (expected_align_exp) > align)
14191 align = INTVAL (expected_align_exp);
14192 if (CONST_INT_P (count_exp))
14193 count = expected_size = INTVAL (count_exp);
14194 if (CONST_INT_P (expected_size_exp) && count == 0)
14195 expected_size = INTVAL (expected_size_exp);
14196
14197 /* Step 0: Decide on preferred algorithm, desired alignment and
14198 size of chunks to be copied by main loop. */
14199
14200 alg = decide_alg (count, expected_size, false, &dynamic_check);
14201 desired_align = decide_alignment (align, alg, expected_size);
14202
14203 if (!TARGET_ALIGN_STRINGOPS)
14204 align = desired_align;
14205
14206 if (alg == libcall)
14207 return 0;
14208 gcc_assert (alg != no_stringop);
14209 if (!count)
14210 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14211 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14212 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14213 switch (alg)
14214 {
14215 case libcall:
14216 case no_stringop:
14217 gcc_unreachable ();
14218 case loop:
14219 size_needed = GET_MODE_SIZE (Pmode);
14220 break;
14221 case unrolled_loop:
14222 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14223 break;
14224 case rep_prefix_8_byte:
14225 size_needed = 8;
14226 break;
14227 case rep_prefix_4_byte:
14228 size_needed = 4;
14229 break;
14230 case rep_prefix_1_byte:
14231 case loop_1_byte:
14232 size_needed = 1;
14233 break;
14234 }
14235
14236 epilogue_size_needed = size_needed;
14237
14238 /* Step 1: Prologue guard. */
14239
14240 /* Alignment code needs count to be in register. */
14241 if (CONST_INT_P (count_exp) && desired_align > align)
14242 {
14243 enum machine_mode mode = SImode;
14244 if (TARGET_64BIT && (count & ~0xffffffff))
14245 mode = DImode;
14246 count_exp = force_reg (mode, count_exp);
14247 }
14248 gcc_assert (desired_align >= 1 && align >= 1);
14249
14250 /* Ensure that alignment prologue won't copy past end of block. */
14251 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14252 {
14253 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14254 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14255 Make sure it is power of 2. */
14256 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14257
14258 label = gen_label_rtx ();
14259 emit_cmp_and_jump_insns (count_exp,
14260 GEN_INT (epilogue_size_needed),
14261 LTU, 0, counter_mode (count_exp), 1, label);
14262 if (GET_CODE (count_exp) == CONST_INT)
14263 ;
14264 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14265 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14266 else
14267 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14268 }
14269 /* Emit code to decide on runtime whether library call or inline should be
14270 used. */
14271 if (dynamic_check != -1)
14272 {
14273 rtx hot_label = gen_label_rtx ();
14274 jump_around_label = gen_label_rtx ();
14275 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14276 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14277 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14278 emit_block_move_via_libcall (dst, src, count_exp, false);
14279 emit_jump (jump_around_label);
14280 emit_label (hot_label);
14281 }
14282
14283 /* Step 2: Alignment prologue. */
14284
14285 if (desired_align > align)
14286 {
14287 /* Except for the first move in epilogue, we no longer know
14288 constant offset in aliasing info. It don't seems to worth
14289 the pain to maintain it for the first move, so throw away
14290 the info early. */
14291 src = change_address (src, BLKmode, srcreg);
14292 dst = change_address (dst, BLKmode, destreg);
14293 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14294 desired_align);
14295 }
14296 if (label && size_needed == 1)
14297 {
14298 emit_label (label);
14299 LABEL_NUSES (label) = 1;
14300 label = NULL;
14301 }
14302
14303 /* Step 3: Main loop. */
14304
14305 switch (alg)
14306 {
14307 case libcall:
14308 case no_stringop:
14309 gcc_unreachable ();
14310 case loop_1_byte:
14311 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14312 count_exp, QImode, 1, expected_size);
14313 break;
14314 case loop:
14315 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14316 count_exp, Pmode, 1, expected_size);
14317 break;
14318 case unrolled_loop:
14319 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14320 registers for 4 temporaries anyway. */
14321 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14322 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14323 expected_size);
14324 break;
14325 case rep_prefix_8_byte:
14326 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14327 DImode);
14328 break;
14329 case rep_prefix_4_byte:
14330 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14331 SImode);
14332 break;
14333 case rep_prefix_1_byte:
14334 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14335 QImode);
14336 break;
14337 }
14338 /* Adjust properly the offset of src and dest memory for aliasing. */
14339 if (CONST_INT_P (count_exp))
14340 {
14341 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14342 (count / size_needed) * size_needed);
14343 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14344 (count / size_needed) * size_needed);
14345 }
14346 else
14347 {
14348 src = change_address (src, BLKmode, srcreg);
14349 dst = change_address (dst, BLKmode, destreg);
14350 }
14351
14352 /* Step 4: Epilogue to copy the remaining bytes. */
14353
14354 if (label)
14355 {
14356 /* When the main loop is done, COUNT_EXP might hold original count,
14357 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14358 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14359 bytes. Compensate if needed. */
14360
14361 if (size_needed < epilogue_size_needed)
14362 {
14363 tmp =
14364 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14365 GEN_INT (size_needed - 1), count_exp, 1,
14366 OPTAB_DIRECT);
14367 if (tmp != count_exp)
14368 emit_move_insn (count_exp, tmp);
14369 }
14370 emit_label (label);
14371 LABEL_NUSES (label) = 1;
14372 }
14373
14374 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14375 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14376 epilogue_size_needed);
14377 if (jump_around_label)
14378 emit_label (jump_around_label);
14379 return 1;
14380 }
14381
14382 /* Helper function for memcpy. For QImode value 0xXY produce
14383 0xXYXYXYXY of wide specified by MODE. This is essentially
14384 a * 0x10101010, but we can do slightly better than
14385 synth_mult by unwinding the sequence by hand on CPUs with
14386 slow multiply. */
14387 static rtx
14388 promote_duplicated_reg (enum machine_mode mode, rtx val)
14389 {
14390 enum machine_mode valmode = GET_MODE (val);
14391 rtx tmp;
14392 int nops = mode == DImode ? 3 : 2;
14393
14394 gcc_assert (mode == SImode || mode == DImode);
14395 if (val == const0_rtx)
14396 return copy_to_mode_reg (mode, const0_rtx);
14397 if (CONST_INT_P (val))
14398 {
14399 HOST_WIDE_INT v = INTVAL (val) & 255;
14400
14401 v |= v << 8;
14402 v |= v << 16;
14403 if (mode == DImode)
14404 v |= (v << 16) << 16;
14405 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14406 }
14407
14408 if (valmode == VOIDmode)
14409 valmode = QImode;
14410 if (valmode != QImode)
14411 val = gen_lowpart (QImode, val);
14412 if (mode == QImode)
14413 return val;
14414 if (!TARGET_PARTIAL_REG_STALL)
14415 nops--;
14416 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14417 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14418 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14419 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14420 {
14421 rtx reg = convert_modes (mode, QImode, val, true);
14422 tmp = promote_duplicated_reg (mode, const1_rtx);
14423 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14424 OPTAB_DIRECT);
14425 }
14426 else
14427 {
14428 rtx reg = convert_modes (mode, QImode, val, true);
14429
14430 if (!TARGET_PARTIAL_REG_STALL)
14431 if (mode == SImode)
14432 emit_insn (gen_movsi_insv_1 (reg, reg));
14433 else
14434 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14435 else
14436 {
14437 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14438 NULL, 1, OPTAB_DIRECT);
14439 reg =
14440 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14441 }
14442 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14443 NULL, 1, OPTAB_DIRECT);
14444 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14445 if (mode == SImode)
14446 return reg;
14447 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14448 NULL, 1, OPTAB_DIRECT);
14449 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14450 return reg;
14451 }
14452 }
14453
14454 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14455 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14456 alignment from ALIGN to DESIRED_ALIGN. */
14457 static rtx
14458 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14459 {
14460 rtx promoted_val;
14461
14462 if (TARGET_64BIT
14463 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14464 promoted_val = promote_duplicated_reg (DImode, val);
14465 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14466 promoted_val = promote_duplicated_reg (SImode, val);
14467 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14468 promoted_val = promote_duplicated_reg (HImode, val);
14469 else
14470 promoted_val = val;
14471
14472 return promoted_val;
14473 }
14474
14475 /* Expand string clear operation (bzero). Use i386 string operations when
14476 profitable. See expand_movmem comment for explanation of individual
14477 steps performed. */
14478 int
14479 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14480 rtx expected_align_exp, rtx expected_size_exp)
14481 {
14482 rtx destreg;
14483 rtx label = NULL;
14484 rtx tmp;
14485 rtx jump_around_label = NULL;
14486 HOST_WIDE_INT align = 1;
14487 unsigned HOST_WIDE_INT count = 0;
14488 HOST_WIDE_INT expected_size = -1;
14489 int size_needed = 0, epilogue_size_needed;
14490 int desired_align = 0;
14491 enum stringop_alg alg;
14492 rtx promoted_val = NULL;
14493 bool force_loopy_epilogue = false;
14494 int dynamic_check;
14495
14496 if (CONST_INT_P (align_exp))
14497 align = INTVAL (align_exp);
14498 /* i386 can do misaligned access on reasonably increased cost. */
14499 if (CONST_INT_P (expected_align_exp)
14500 && INTVAL (expected_align_exp) > align)
14501 align = INTVAL (expected_align_exp);
14502 if (CONST_INT_P (count_exp))
14503 count = expected_size = INTVAL (count_exp);
14504 if (CONST_INT_P (expected_size_exp) && count == 0)
14505 expected_size = INTVAL (expected_size_exp);
14506
14507 /* Step 0: Decide on preferred algorithm, desired alignment and
14508 size of chunks to be copied by main loop. */
14509
14510 alg = decide_alg (count, expected_size, true, &dynamic_check);
14511 desired_align = decide_alignment (align, alg, expected_size);
14512
14513 if (!TARGET_ALIGN_STRINGOPS)
14514 align = desired_align;
14515
14516 if (alg == libcall)
14517 return 0;
14518 gcc_assert (alg != no_stringop);
14519 if (!count)
14520 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14521 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14522 switch (alg)
14523 {
14524 case libcall:
14525 case no_stringop:
14526 gcc_unreachable ();
14527 case loop:
14528 size_needed = GET_MODE_SIZE (Pmode);
14529 break;
14530 case unrolled_loop:
14531 size_needed = GET_MODE_SIZE (Pmode) * 4;
14532 break;
14533 case rep_prefix_8_byte:
14534 size_needed = 8;
14535 break;
14536 case rep_prefix_4_byte:
14537 size_needed = 4;
14538 break;
14539 case rep_prefix_1_byte:
14540 case loop_1_byte:
14541 size_needed = 1;
14542 break;
14543 }
14544 epilogue_size_needed = size_needed;
14545
14546 /* Step 1: Prologue guard. */
14547
14548 /* Alignment code needs count to be in register. */
14549 if (CONST_INT_P (count_exp) && desired_align > align)
14550 {
14551 enum machine_mode mode = SImode;
14552 if (TARGET_64BIT && (count & ~0xffffffff))
14553 mode = DImode;
14554 count_exp = force_reg (mode, count_exp);
14555 }
14556 /* Do the cheap promotion to allow better CSE across the
14557 main loop and epilogue (ie one load of the big constant in the
14558 front of all code. */
14559 if (CONST_INT_P (val_exp))
14560 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14561 desired_align, align);
14562 /* Ensure that alignment prologue won't copy past end of block. */
14563 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14564 {
14565 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14566 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14567 Make sure it is power of 2. */
14568 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14569
14570 /* To improve performance of small blocks, we jump around the VAL
14571 promoting mode. This mean that if the promoted VAL is not constant,
14572 we might not use it in the epilogue and have to use byte
14573 loop variant. */
14574 if (epilogue_size_needed > 2 && !promoted_val)
14575 force_loopy_epilogue = true;
14576 label = gen_label_rtx ();
14577 emit_cmp_and_jump_insns (count_exp,
14578 GEN_INT (epilogue_size_needed),
14579 LTU, 0, counter_mode (count_exp), 1, label);
14580 if (GET_CODE (count_exp) == CONST_INT)
14581 ;
14582 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14583 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14584 else
14585 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14586 }
14587 if (dynamic_check != -1)
14588 {
14589 rtx hot_label = gen_label_rtx ();
14590 jump_around_label = gen_label_rtx ();
14591 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14592 LEU, 0, counter_mode (count_exp), 1, hot_label);
14593 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14594 set_storage_via_libcall (dst, count_exp, val_exp, false);
14595 emit_jump (jump_around_label);
14596 emit_label (hot_label);
14597 }
14598
14599 /* Step 2: Alignment prologue. */
14600
14601 /* Do the expensive promotion once we branched off the small blocks. */
14602 if (!promoted_val)
14603 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14604 desired_align, align);
14605 gcc_assert (desired_align >= 1 && align >= 1);
14606
14607 if (desired_align > align)
14608 {
14609 /* Except for the first move in epilogue, we no longer know
14610 constant offset in aliasing info. It don't seems to worth
14611 the pain to maintain it for the first move, so throw away
14612 the info early. */
14613 dst = change_address (dst, BLKmode, destreg);
14614 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14615 desired_align);
14616 }
14617 if (label && size_needed == 1)
14618 {
14619 emit_label (label);
14620 LABEL_NUSES (label) = 1;
14621 label = NULL;
14622 }
14623
14624 /* Step 3: Main loop. */
14625
14626 switch (alg)
14627 {
14628 case libcall:
14629 case no_stringop:
14630 gcc_unreachable ();
14631 case loop_1_byte:
14632 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14633 count_exp, QImode, 1, expected_size);
14634 break;
14635 case loop:
14636 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14637 count_exp, Pmode, 1, expected_size);
14638 break;
14639 case unrolled_loop:
14640 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14641 count_exp, Pmode, 4, expected_size);
14642 break;
14643 case rep_prefix_8_byte:
14644 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14645 DImode);
14646 break;
14647 case rep_prefix_4_byte:
14648 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14649 SImode);
14650 break;
14651 case rep_prefix_1_byte:
14652 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14653 QImode);
14654 break;
14655 }
14656 /* Adjust properly the offset of src and dest memory for aliasing. */
14657 if (CONST_INT_P (count_exp))
14658 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14659 (count / size_needed) * size_needed);
14660 else
14661 dst = change_address (dst, BLKmode, destreg);
14662
14663 /* Step 4: Epilogue to copy the remaining bytes. */
14664
14665 if (label)
14666 {
14667 /* When the main loop is done, COUNT_EXP might hold original count,
14668 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14669 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14670 bytes. Compensate if needed. */
14671
14672 if (size_needed < desired_align - align)
14673 {
14674 tmp =
14675 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14676 GEN_INT (size_needed - 1), count_exp, 1,
14677 OPTAB_DIRECT);
14678 size_needed = desired_align - align + 1;
14679 if (tmp != count_exp)
14680 emit_move_insn (count_exp, tmp);
14681 }
14682 emit_label (label);
14683 LABEL_NUSES (label) = 1;
14684 }
14685 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14686 {
14687 if (force_loopy_epilogue)
14688 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14689 size_needed);
14690 else
14691 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14692 size_needed);
14693 }
14694 if (jump_around_label)
14695 emit_label (jump_around_label);
14696 return 1;
14697 }
14698
14699 /* Expand strlen. */
14700 int
14701 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14702 {
14703 rtx addr, scratch1, scratch2, scratch3, scratch4;
14704
14705 /* The generic case of strlen expander is long. Avoid it's
14706 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14707
14708 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14709 && !TARGET_INLINE_ALL_STRINGOPS
14710 && !optimize_size
14711 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14712 return 0;
14713
14714 addr = force_reg (Pmode, XEXP (src, 0));
14715 scratch1 = gen_reg_rtx (Pmode);
14716
14717 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14718 && !optimize_size)
14719 {
14720 /* Well it seems that some optimizer does not combine a call like
14721 foo(strlen(bar), strlen(bar));
14722 when the move and the subtraction is done here. It does calculate
14723 the length just once when these instructions are done inside of
14724 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14725 often used and I use one fewer register for the lifetime of
14726 output_strlen_unroll() this is better. */
14727
14728 emit_move_insn (out, addr);
14729
14730 ix86_expand_strlensi_unroll_1 (out, src, align);
14731
14732 /* strlensi_unroll_1 returns the address of the zero at the end of
14733 the string, like memchr(), so compute the length by subtracting
14734 the start address. */
14735 if (TARGET_64BIT)
14736 emit_insn (gen_subdi3 (out, out, addr));
14737 else
14738 emit_insn (gen_subsi3 (out, out, addr));
14739 }
14740 else
14741 {
14742 rtx unspec;
14743 scratch2 = gen_reg_rtx (Pmode);
14744 scratch3 = gen_reg_rtx (Pmode);
14745 scratch4 = force_reg (Pmode, constm1_rtx);
14746
14747 emit_move_insn (scratch3, addr);
14748 eoschar = force_reg (QImode, eoschar);
14749
14750 src = replace_equiv_address_nv (src, scratch3);
14751
14752 /* If .md starts supporting :P, this can be done in .md. */
14753 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14754 scratch4), UNSPEC_SCAS);
14755 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14756 if (TARGET_64BIT)
14757 {
14758 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14759 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14760 }
14761 else
14762 {
14763 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14764 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14765 }
14766 }
14767 return 1;
14768 }
14769
14770 /* Expand the appropriate insns for doing strlen if not just doing
14771 repnz; scasb
14772
14773 out = result, initialized with the start address
14774 align_rtx = alignment of the address.
14775 scratch = scratch register, initialized with the startaddress when
14776 not aligned, otherwise undefined
14777
14778 This is just the body. It needs the initializations mentioned above and
14779 some address computing at the end. These things are done in i386.md. */
14780
14781 static void
14782 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14783 {
14784 int align;
14785 rtx tmp;
14786 rtx align_2_label = NULL_RTX;
14787 rtx align_3_label = NULL_RTX;
14788 rtx align_4_label = gen_label_rtx ();
14789 rtx end_0_label = gen_label_rtx ();
14790 rtx mem;
14791 rtx tmpreg = gen_reg_rtx (SImode);
14792 rtx scratch = gen_reg_rtx (SImode);
14793 rtx cmp;
14794
14795 align = 0;
14796 if (CONST_INT_P (align_rtx))
14797 align = INTVAL (align_rtx);
14798
14799 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14800
14801 /* Is there a known alignment and is it less than 4? */
14802 if (align < 4)
14803 {
14804 rtx scratch1 = gen_reg_rtx (Pmode);
14805 emit_move_insn (scratch1, out);
14806 /* Is there a known alignment and is it not 2? */
14807 if (align != 2)
14808 {
14809 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14810 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14811
14812 /* Leave just the 3 lower bits. */
14813 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14814 NULL_RTX, 0, OPTAB_WIDEN);
14815
14816 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14817 Pmode, 1, align_4_label);
14818 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14819 Pmode, 1, align_2_label);
14820 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14821 Pmode, 1, align_3_label);
14822 }
14823 else
14824 {
14825 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14826 check if is aligned to 4 - byte. */
14827
14828 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14829 NULL_RTX, 0, OPTAB_WIDEN);
14830
14831 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14832 Pmode, 1, align_4_label);
14833 }
14834
14835 mem = change_address (src, QImode, out);
14836
14837 /* Now compare the bytes. */
14838
14839 /* Compare the first n unaligned byte on a byte per byte basis. */
14840 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14841 QImode, 1, end_0_label);
14842
14843 /* Increment the address. */
14844 if (TARGET_64BIT)
14845 emit_insn (gen_adddi3 (out, out, const1_rtx));
14846 else
14847 emit_insn (gen_addsi3 (out, out, const1_rtx));
14848
14849 /* Not needed with an alignment of 2 */
14850 if (align != 2)
14851 {
14852 emit_label (align_2_label);
14853
14854 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14855 end_0_label);
14856
14857 if (TARGET_64BIT)
14858 emit_insn (gen_adddi3 (out, out, const1_rtx));
14859 else
14860 emit_insn (gen_addsi3 (out, out, const1_rtx));
14861
14862 emit_label (align_3_label);
14863 }
14864
14865 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14866 end_0_label);
14867
14868 if (TARGET_64BIT)
14869 emit_insn (gen_adddi3 (out, out, const1_rtx));
14870 else
14871 emit_insn (gen_addsi3 (out, out, const1_rtx));
14872 }
14873
14874 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14875 align this loop. It gives only huge programs, but does not help to
14876 speed up. */
14877 emit_label (align_4_label);
14878
14879 mem = change_address (src, SImode, out);
14880 emit_move_insn (scratch, mem);
14881 if (TARGET_64BIT)
14882 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14883 else
14884 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14885
14886 /* This formula yields a nonzero result iff one of the bytes is zero.
14887 This saves three branches inside loop and many cycles. */
14888
14889 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14890 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14891 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14892 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14893 gen_int_mode (0x80808080, SImode)));
14894 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14895 align_4_label);
14896
14897 if (TARGET_CMOVE)
14898 {
14899 rtx reg = gen_reg_rtx (SImode);
14900 rtx reg2 = gen_reg_rtx (Pmode);
14901 emit_move_insn (reg, tmpreg);
14902 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14903
14904 /* If zero is not in the first two bytes, move two bytes forward. */
14905 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14906 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14907 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14908 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14909 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14910 reg,
14911 tmpreg)));
14912 /* Emit lea manually to avoid clobbering of flags. */
14913 emit_insn (gen_rtx_SET (SImode, reg2,
14914 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14915
14916 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14917 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14918 emit_insn (gen_rtx_SET (VOIDmode, out,
14919 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14920 reg2,
14921 out)));
14922
14923 }
14924 else
14925 {
14926 rtx end_2_label = gen_label_rtx ();
14927 /* Is zero in the first two bytes? */
14928
14929 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14930 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14931 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14932 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14933 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14934 pc_rtx);
14935 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14936 JUMP_LABEL (tmp) = end_2_label;
14937
14938 /* Not in the first two. Move two bytes forward. */
14939 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14940 if (TARGET_64BIT)
14941 emit_insn (gen_adddi3 (out, out, const2_rtx));
14942 else
14943 emit_insn (gen_addsi3 (out, out, const2_rtx));
14944
14945 emit_label (end_2_label);
14946
14947 }
14948
14949 /* Avoid branch in fixing the byte. */
14950 tmpreg = gen_lowpart (QImode, tmpreg);
14951 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14952 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14953 if (TARGET_64BIT)
14954 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14955 else
14956 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14957
14958 emit_label (end_0_label);
14959 }
14960
14961 void
14962 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14963 rtx callarg2 ATTRIBUTE_UNUSED,
14964 rtx pop, int sibcall)
14965 {
14966 rtx use = NULL, call;
14967
14968 if (pop == const0_rtx)
14969 pop = NULL;
14970 gcc_assert (!TARGET_64BIT || !pop);
14971
14972 if (TARGET_MACHO && !TARGET_64BIT)
14973 {
14974 #if TARGET_MACHO
14975 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14976 fnaddr = machopic_indirect_call_target (fnaddr);
14977 #endif
14978 }
14979 else
14980 {
14981 /* Static functions and indirect calls don't need the pic register. */
14982 if (! TARGET_64BIT && flag_pic
14983 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14984 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14985 use_reg (&use, pic_offset_table_rtx);
14986 }
14987
14988 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14989 {
14990 rtx al = gen_rtx_REG (QImode, 0);
14991 emit_move_insn (al, callarg2);
14992 use_reg (&use, al);
14993 }
14994
14995 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14996 {
14997 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14998 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14999 }
15000 if (sibcall && TARGET_64BIT
15001 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15002 {
15003 rtx addr;
15004 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15005 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15006 emit_move_insn (fnaddr, addr);
15007 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15008 }
15009
15010 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15011 if (retval)
15012 call = gen_rtx_SET (VOIDmode, retval, call);
15013 if (pop)
15014 {
15015 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15016 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15017 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15018 }
15019
15020 call = emit_call_insn (call);
15021 if (use)
15022 CALL_INSN_FUNCTION_USAGE (call) = use;
15023 }
15024
15025 \f
15026 /* Clear stack slot assignments remembered from previous functions.
15027 This is called from INIT_EXPANDERS once before RTL is emitted for each
15028 function. */
15029
15030 static struct machine_function *
15031 ix86_init_machine_status (void)
15032 {
15033 struct machine_function *f;
15034
15035 f = ggc_alloc_cleared (sizeof (struct machine_function));
15036 f->use_fast_prologue_epilogue_nregs = -1;
15037 f->tls_descriptor_call_expanded_p = 0;
15038
15039 return f;
15040 }
15041
15042 /* Return a MEM corresponding to a stack slot with mode MODE.
15043 Allocate a new slot if necessary.
15044
15045 The RTL for a function can have several slots available: N is
15046 which slot to use. */
15047
15048 rtx
15049 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15050 {
15051 struct stack_local_entry *s;
15052
15053 gcc_assert (n < MAX_386_STACK_LOCALS);
15054
15055 for (s = ix86_stack_locals; s; s = s->next)
15056 if (s->mode == mode && s->n == n)
15057 return copy_rtx (s->rtl);
15058
15059 s = (struct stack_local_entry *)
15060 ggc_alloc (sizeof (struct stack_local_entry));
15061 s->n = n;
15062 s->mode = mode;
15063 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15064
15065 s->next = ix86_stack_locals;
15066 ix86_stack_locals = s;
15067 return s->rtl;
15068 }
15069
15070 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15071
15072 static GTY(()) rtx ix86_tls_symbol;
15073 rtx
15074 ix86_tls_get_addr (void)
15075 {
15076
15077 if (!ix86_tls_symbol)
15078 {
15079 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15080 (TARGET_ANY_GNU_TLS
15081 && !TARGET_64BIT)
15082 ? "___tls_get_addr"
15083 : "__tls_get_addr");
15084 }
15085
15086 return ix86_tls_symbol;
15087 }
15088
15089 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15090
15091 static GTY(()) rtx ix86_tls_module_base_symbol;
15092 rtx
15093 ix86_tls_module_base (void)
15094 {
15095
15096 if (!ix86_tls_module_base_symbol)
15097 {
15098 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15099 "_TLS_MODULE_BASE_");
15100 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15101 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15102 }
15103
15104 return ix86_tls_module_base_symbol;
15105 }
15106 \f
15107 /* Calculate the length of the memory address in the instruction
15108 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15109
15110 int
15111 memory_address_length (rtx addr)
15112 {
15113 struct ix86_address parts;
15114 rtx base, index, disp;
15115 int len;
15116 int ok;
15117
15118 if (GET_CODE (addr) == PRE_DEC
15119 || GET_CODE (addr) == POST_INC
15120 || GET_CODE (addr) == PRE_MODIFY
15121 || GET_CODE (addr) == POST_MODIFY)
15122 return 0;
15123
15124 ok = ix86_decompose_address (addr, &parts);
15125 gcc_assert (ok);
15126
15127 if (parts.base && GET_CODE (parts.base) == SUBREG)
15128 parts.base = SUBREG_REG (parts.base);
15129 if (parts.index && GET_CODE (parts.index) == SUBREG)
15130 parts.index = SUBREG_REG (parts.index);
15131
15132 base = parts.base;
15133 index = parts.index;
15134 disp = parts.disp;
15135 len = 0;
15136
15137 /* Rule of thumb:
15138 - esp as the base always wants an index,
15139 - ebp as the base always wants a displacement. */
15140
15141 /* Register Indirect. */
15142 if (base && !index && !disp)
15143 {
15144 /* esp (for its index) and ebp (for its displacement) need
15145 the two-byte modrm form. */
15146 if (addr == stack_pointer_rtx
15147 || addr == arg_pointer_rtx
15148 || addr == frame_pointer_rtx
15149 || addr == hard_frame_pointer_rtx)
15150 len = 1;
15151 }
15152
15153 /* Direct Addressing. */
15154 else if (disp && !base && !index)
15155 len = 4;
15156
15157 else
15158 {
15159 /* Find the length of the displacement constant. */
15160 if (disp)
15161 {
15162 if (base && satisfies_constraint_K (disp))
15163 len = 1;
15164 else
15165 len = 4;
15166 }
15167 /* ebp always wants a displacement. */
15168 else if (base == hard_frame_pointer_rtx)
15169 len = 1;
15170
15171 /* An index requires the two-byte modrm form.... */
15172 if (index
15173 /* ...like esp, which always wants an index. */
15174 || base == stack_pointer_rtx
15175 || base == arg_pointer_rtx
15176 || base == frame_pointer_rtx)
15177 len += 1;
15178 }
15179
15180 return len;
15181 }
15182
15183 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15184 is set, expect that insn have 8bit immediate alternative. */
15185 int
15186 ix86_attr_length_immediate_default (rtx insn, int shortform)
15187 {
15188 int len = 0;
15189 int i;
15190 extract_insn_cached (insn);
15191 for (i = recog_data.n_operands - 1; i >= 0; --i)
15192 if (CONSTANT_P (recog_data.operand[i]))
15193 {
15194 gcc_assert (!len);
15195 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15196 len = 1;
15197 else
15198 {
15199 switch (get_attr_mode (insn))
15200 {
15201 case MODE_QI:
15202 len+=1;
15203 break;
15204 case MODE_HI:
15205 len+=2;
15206 break;
15207 case MODE_SI:
15208 len+=4;
15209 break;
15210 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15211 case MODE_DI:
15212 len+=4;
15213 break;
15214 default:
15215 fatal_insn ("unknown insn mode", insn);
15216 }
15217 }
15218 }
15219 return len;
15220 }
15221 /* Compute default value for "length_address" attribute. */
15222 int
15223 ix86_attr_length_address_default (rtx insn)
15224 {
15225 int i;
15226
15227 if (get_attr_type (insn) == TYPE_LEA)
15228 {
15229 rtx set = PATTERN (insn);
15230
15231 if (GET_CODE (set) == PARALLEL)
15232 set = XVECEXP (set, 0, 0);
15233
15234 gcc_assert (GET_CODE (set) == SET);
15235
15236 return memory_address_length (SET_SRC (set));
15237 }
15238
15239 extract_insn_cached (insn);
15240 for (i = recog_data.n_operands - 1; i >= 0; --i)
15241 if (MEM_P (recog_data.operand[i]))
15242 {
15243 return memory_address_length (XEXP (recog_data.operand[i], 0));
15244 break;
15245 }
15246 return 0;
15247 }
15248 \f
15249 /* Return the maximum number of instructions a cpu can issue. */
15250
15251 static int
15252 ix86_issue_rate (void)
15253 {
15254 switch (ix86_tune)
15255 {
15256 case PROCESSOR_PENTIUM:
15257 case PROCESSOR_K6:
15258 return 2;
15259
15260 case PROCESSOR_PENTIUMPRO:
15261 case PROCESSOR_PENTIUM4:
15262 case PROCESSOR_ATHLON:
15263 case PROCESSOR_K8:
15264 case PROCESSOR_AMDFAM10:
15265 case PROCESSOR_NOCONA:
15266 case PROCESSOR_GENERIC32:
15267 case PROCESSOR_GENERIC64:
15268 return 3;
15269
15270 case PROCESSOR_CORE2:
15271 return 4;
15272
15273 default:
15274 return 1;
15275 }
15276 }
15277
15278 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15279 by DEP_INSN and nothing set by DEP_INSN. */
15280
15281 static int
15282 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15283 {
15284 rtx set, set2;
15285
15286 /* Simplify the test for uninteresting insns. */
15287 if (insn_type != TYPE_SETCC
15288 && insn_type != TYPE_ICMOV
15289 && insn_type != TYPE_FCMOV
15290 && insn_type != TYPE_IBR)
15291 return 0;
15292
15293 if ((set = single_set (dep_insn)) != 0)
15294 {
15295 set = SET_DEST (set);
15296 set2 = NULL_RTX;
15297 }
15298 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15299 && XVECLEN (PATTERN (dep_insn), 0) == 2
15300 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15301 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15302 {
15303 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15304 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15305 }
15306 else
15307 return 0;
15308
15309 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15310 return 0;
15311
15312 /* This test is true if the dependent insn reads the flags but
15313 not any other potentially set register. */
15314 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15315 return 0;
15316
15317 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15318 return 0;
15319
15320 return 1;
15321 }
15322
15323 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15324 address with operands set by DEP_INSN. */
15325
15326 static int
15327 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15328 {
15329 rtx addr;
15330
15331 if (insn_type == TYPE_LEA
15332 && TARGET_PENTIUM)
15333 {
15334 addr = PATTERN (insn);
15335
15336 if (GET_CODE (addr) == PARALLEL)
15337 addr = XVECEXP (addr, 0, 0);
15338
15339 gcc_assert (GET_CODE (addr) == SET);
15340
15341 addr = SET_SRC (addr);
15342 }
15343 else
15344 {
15345 int i;
15346 extract_insn_cached (insn);
15347 for (i = recog_data.n_operands - 1; i >= 0; --i)
15348 if (MEM_P (recog_data.operand[i]))
15349 {
15350 addr = XEXP (recog_data.operand[i], 0);
15351 goto found;
15352 }
15353 return 0;
15354 found:;
15355 }
15356
15357 return modified_in_p (addr, dep_insn);
15358 }
15359
15360 static int
15361 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15362 {
15363 enum attr_type insn_type, dep_insn_type;
15364 enum attr_memory memory;
15365 rtx set, set2;
15366 int dep_insn_code_number;
15367
15368 /* Anti and output dependencies have zero cost on all CPUs. */
15369 if (REG_NOTE_KIND (link) != 0)
15370 return 0;
15371
15372 dep_insn_code_number = recog_memoized (dep_insn);
15373
15374 /* If we can't recognize the insns, we can't really do anything. */
15375 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15376 return cost;
15377
15378 insn_type = get_attr_type (insn);
15379 dep_insn_type = get_attr_type (dep_insn);
15380
15381 switch (ix86_tune)
15382 {
15383 case PROCESSOR_PENTIUM:
15384 /* Address Generation Interlock adds a cycle of latency. */
15385 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15386 cost += 1;
15387
15388 /* ??? Compares pair with jump/setcc. */
15389 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15390 cost = 0;
15391
15392 /* Floating point stores require value to be ready one cycle earlier. */
15393 if (insn_type == TYPE_FMOV
15394 && get_attr_memory (insn) == MEMORY_STORE
15395 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15396 cost += 1;
15397 break;
15398
15399 case PROCESSOR_PENTIUMPRO:
15400 memory = get_attr_memory (insn);
15401
15402 /* INT->FP conversion is expensive. */
15403 if (get_attr_fp_int_src (dep_insn))
15404 cost += 5;
15405
15406 /* There is one cycle extra latency between an FP op and a store. */
15407 if (insn_type == TYPE_FMOV
15408 && (set = single_set (dep_insn)) != NULL_RTX
15409 && (set2 = single_set (insn)) != NULL_RTX
15410 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15411 && MEM_P (SET_DEST (set2)))
15412 cost += 1;
15413
15414 /* Show ability of reorder buffer to hide latency of load by executing
15415 in parallel with previous instruction in case
15416 previous instruction is not needed to compute the address. */
15417 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15418 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15419 {
15420 /* Claim moves to take one cycle, as core can issue one load
15421 at time and the next load can start cycle later. */
15422 if (dep_insn_type == TYPE_IMOV
15423 || dep_insn_type == TYPE_FMOV)
15424 cost = 1;
15425 else if (cost > 1)
15426 cost--;
15427 }
15428 break;
15429
15430 case PROCESSOR_K6:
15431 memory = get_attr_memory (insn);
15432
15433 /* The esp dependency is resolved before the instruction is really
15434 finished. */
15435 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15436 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15437 return 1;
15438
15439 /* INT->FP conversion is expensive. */
15440 if (get_attr_fp_int_src (dep_insn))
15441 cost += 5;
15442
15443 /* Show ability of reorder buffer to hide latency of load by executing
15444 in parallel with previous instruction in case
15445 previous instruction is not needed to compute the address. */
15446 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15447 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15448 {
15449 /* Claim moves to take one cycle, as core can issue one load
15450 at time and the next load can start cycle later. */
15451 if (dep_insn_type == TYPE_IMOV
15452 || dep_insn_type == TYPE_FMOV)
15453 cost = 1;
15454 else if (cost > 2)
15455 cost -= 2;
15456 else
15457 cost = 1;
15458 }
15459 break;
15460
15461 case PROCESSOR_ATHLON:
15462 case PROCESSOR_K8:
15463 case PROCESSOR_AMDFAM10:
15464 case PROCESSOR_GENERIC32:
15465 case PROCESSOR_GENERIC64:
15466 memory = get_attr_memory (insn);
15467
15468 /* Show ability of reorder buffer to hide latency of load by executing
15469 in parallel with previous instruction in case
15470 previous instruction is not needed to compute the address. */
15471 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15472 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15473 {
15474 enum attr_unit unit = get_attr_unit (insn);
15475 int loadcost = 3;
15476
15477 /* Because of the difference between the length of integer and
15478 floating unit pipeline preparation stages, the memory operands
15479 for floating point are cheaper.
15480
15481 ??? For Athlon it the difference is most probably 2. */
15482 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15483 loadcost = 3;
15484 else
15485 loadcost = TARGET_ATHLON ? 2 : 0;
15486
15487 if (cost >= loadcost)
15488 cost -= loadcost;
15489 else
15490 cost = 0;
15491 }
15492
15493 default:
15494 break;
15495 }
15496
15497 return cost;
15498 }
15499
15500 /* How many alternative schedules to try. This should be as wide as the
15501 scheduling freedom in the DFA, but no wider. Making this value too
15502 large results extra work for the scheduler. */
15503
15504 static int
15505 ia32_multipass_dfa_lookahead (void)
15506 {
15507 if (ix86_tune == PROCESSOR_PENTIUM)
15508 return 2;
15509
15510 if (ix86_tune == PROCESSOR_PENTIUMPRO
15511 || ix86_tune == PROCESSOR_K6)
15512 return 1;
15513
15514 else
15515 return 0;
15516 }
15517
15518 \f
15519 /* Compute the alignment given to a constant that is being placed in memory.
15520 EXP is the constant and ALIGN is the alignment that the object would
15521 ordinarily have.
15522 The value of this function is used instead of that alignment to align
15523 the object. */
15524
15525 int
15526 ix86_constant_alignment (tree exp, int align)
15527 {
15528 if (TREE_CODE (exp) == REAL_CST)
15529 {
15530 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15531 return 64;
15532 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15533 return 128;
15534 }
15535 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15536 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15537 return BITS_PER_WORD;
15538
15539 return align;
15540 }
15541
15542 /* Compute the alignment for a static variable.
15543 TYPE is the data type, and ALIGN is the alignment that
15544 the object would ordinarily have. The value of this function is used
15545 instead of that alignment to align the object. */
15546
15547 int
15548 ix86_data_alignment (tree type, int align)
15549 {
15550 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15551
15552 if (AGGREGATE_TYPE_P (type)
15553 && TYPE_SIZE (type)
15554 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15555 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15556 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15557 && align < max_align)
15558 align = max_align;
15559
15560 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15561 to 16byte boundary. */
15562 if (TARGET_64BIT)
15563 {
15564 if (AGGREGATE_TYPE_P (type)
15565 && TYPE_SIZE (type)
15566 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15567 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15568 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15569 return 128;
15570 }
15571
15572 if (TREE_CODE (type) == ARRAY_TYPE)
15573 {
15574 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15575 return 64;
15576 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15577 return 128;
15578 }
15579 else if (TREE_CODE (type) == COMPLEX_TYPE)
15580 {
15581
15582 if (TYPE_MODE (type) == DCmode && align < 64)
15583 return 64;
15584 if (TYPE_MODE (type) == XCmode && align < 128)
15585 return 128;
15586 }
15587 else if ((TREE_CODE (type) == RECORD_TYPE
15588 || TREE_CODE (type) == UNION_TYPE
15589 || TREE_CODE (type) == QUAL_UNION_TYPE)
15590 && TYPE_FIELDS (type))
15591 {
15592 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15593 return 64;
15594 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15595 return 128;
15596 }
15597 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15598 || TREE_CODE (type) == INTEGER_TYPE)
15599 {
15600 if (TYPE_MODE (type) == DFmode && align < 64)
15601 return 64;
15602 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15603 return 128;
15604 }
15605
15606 return align;
15607 }
15608
15609 /* Compute the alignment for a local variable.
15610 TYPE is the data type, and ALIGN is the alignment that
15611 the object would ordinarily have. The value of this macro is used
15612 instead of that alignment to align the object. */
15613
15614 int
15615 ix86_local_alignment (tree type, int align)
15616 {
15617 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15618 to 16byte boundary. */
15619 if (TARGET_64BIT)
15620 {
15621 if (AGGREGATE_TYPE_P (type)
15622 && TYPE_SIZE (type)
15623 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15624 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15625 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15626 return 128;
15627 }
15628 if (TREE_CODE (type) == ARRAY_TYPE)
15629 {
15630 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15631 return 64;
15632 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15633 return 128;
15634 }
15635 else if (TREE_CODE (type) == COMPLEX_TYPE)
15636 {
15637 if (TYPE_MODE (type) == DCmode && align < 64)
15638 return 64;
15639 if (TYPE_MODE (type) == XCmode && align < 128)
15640 return 128;
15641 }
15642 else if ((TREE_CODE (type) == RECORD_TYPE
15643 || TREE_CODE (type) == UNION_TYPE
15644 || TREE_CODE (type) == QUAL_UNION_TYPE)
15645 && TYPE_FIELDS (type))
15646 {
15647 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15648 return 64;
15649 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15650 return 128;
15651 }
15652 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15653 || TREE_CODE (type) == INTEGER_TYPE)
15654 {
15655
15656 if (TYPE_MODE (type) == DFmode && align < 64)
15657 return 64;
15658 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15659 return 128;
15660 }
15661 return align;
15662 }
15663 \f
15664 /* Emit RTL insns to initialize the variable parts of a trampoline.
15665 FNADDR is an RTX for the address of the function's pure code.
15666 CXT is an RTX for the static chain value for the function. */
15667 void
15668 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15669 {
15670 if (!TARGET_64BIT)
15671 {
15672 /* Compute offset from the end of the jmp to the target function. */
15673 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15674 plus_constant (tramp, 10),
15675 NULL_RTX, 1, OPTAB_DIRECT);
15676 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15677 gen_int_mode (0xb9, QImode));
15678 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15679 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15680 gen_int_mode (0xe9, QImode));
15681 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15682 }
15683 else
15684 {
15685 int offset = 0;
15686 /* Try to load address using shorter movl instead of movabs.
15687 We may want to support movq for kernel mode, but kernel does not use
15688 trampolines at the moment. */
15689 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15690 {
15691 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15692 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15693 gen_int_mode (0xbb41, HImode));
15694 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15695 gen_lowpart (SImode, fnaddr));
15696 offset += 6;
15697 }
15698 else
15699 {
15700 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15701 gen_int_mode (0xbb49, HImode));
15702 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15703 fnaddr);
15704 offset += 10;
15705 }
15706 /* Load static chain using movabs to r10. */
15707 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15708 gen_int_mode (0xba49, HImode));
15709 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15710 cxt);
15711 offset += 10;
15712 /* Jump to the r11 */
15713 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15714 gen_int_mode (0xff49, HImode));
15715 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15716 gen_int_mode (0xe3, QImode));
15717 offset += 3;
15718 gcc_assert (offset <= TRAMPOLINE_SIZE);
15719 }
15720
15721 #ifdef ENABLE_EXECUTE_STACK
15722 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15723 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15724 #endif
15725 }
15726 \f
15727 /* Codes for all the SSE/MMX builtins. */
15728 enum ix86_builtins
15729 {
15730 IX86_BUILTIN_ADDPS,
15731 IX86_BUILTIN_ADDSS,
15732 IX86_BUILTIN_DIVPS,
15733 IX86_BUILTIN_DIVSS,
15734 IX86_BUILTIN_MULPS,
15735 IX86_BUILTIN_MULSS,
15736 IX86_BUILTIN_SUBPS,
15737 IX86_BUILTIN_SUBSS,
15738
15739 IX86_BUILTIN_CMPEQPS,
15740 IX86_BUILTIN_CMPLTPS,
15741 IX86_BUILTIN_CMPLEPS,
15742 IX86_BUILTIN_CMPGTPS,
15743 IX86_BUILTIN_CMPGEPS,
15744 IX86_BUILTIN_CMPNEQPS,
15745 IX86_BUILTIN_CMPNLTPS,
15746 IX86_BUILTIN_CMPNLEPS,
15747 IX86_BUILTIN_CMPNGTPS,
15748 IX86_BUILTIN_CMPNGEPS,
15749 IX86_BUILTIN_CMPORDPS,
15750 IX86_BUILTIN_CMPUNORDPS,
15751 IX86_BUILTIN_CMPEQSS,
15752 IX86_BUILTIN_CMPLTSS,
15753 IX86_BUILTIN_CMPLESS,
15754 IX86_BUILTIN_CMPNEQSS,
15755 IX86_BUILTIN_CMPNLTSS,
15756 IX86_BUILTIN_CMPNLESS,
15757 IX86_BUILTIN_CMPNGTSS,
15758 IX86_BUILTIN_CMPNGESS,
15759 IX86_BUILTIN_CMPORDSS,
15760 IX86_BUILTIN_CMPUNORDSS,
15761
15762 IX86_BUILTIN_COMIEQSS,
15763 IX86_BUILTIN_COMILTSS,
15764 IX86_BUILTIN_COMILESS,
15765 IX86_BUILTIN_COMIGTSS,
15766 IX86_BUILTIN_COMIGESS,
15767 IX86_BUILTIN_COMINEQSS,
15768 IX86_BUILTIN_UCOMIEQSS,
15769 IX86_BUILTIN_UCOMILTSS,
15770 IX86_BUILTIN_UCOMILESS,
15771 IX86_BUILTIN_UCOMIGTSS,
15772 IX86_BUILTIN_UCOMIGESS,
15773 IX86_BUILTIN_UCOMINEQSS,
15774
15775 IX86_BUILTIN_CVTPI2PS,
15776 IX86_BUILTIN_CVTPS2PI,
15777 IX86_BUILTIN_CVTSI2SS,
15778 IX86_BUILTIN_CVTSI642SS,
15779 IX86_BUILTIN_CVTSS2SI,
15780 IX86_BUILTIN_CVTSS2SI64,
15781 IX86_BUILTIN_CVTTPS2PI,
15782 IX86_BUILTIN_CVTTSS2SI,
15783 IX86_BUILTIN_CVTTSS2SI64,
15784
15785 IX86_BUILTIN_MAXPS,
15786 IX86_BUILTIN_MAXSS,
15787 IX86_BUILTIN_MINPS,
15788 IX86_BUILTIN_MINSS,
15789
15790 IX86_BUILTIN_LOADUPS,
15791 IX86_BUILTIN_STOREUPS,
15792 IX86_BUILTIN_MOVSS,
15793
15794 IX86_BUILTIN_MOVHLPS,
15795 IX86_BUILTIN_MOVLHPS,
15796 IX86_BUILTIN_LOADHPS,
15797 IX86_BUILTIN_LOADLPS,
15798 IX86_BUILTIN_STOREHPS,
15799 IX86_BUILTIN_STORELPS,
15800
15801 IX86_BUILTIN_MASKMOVQ,
15802 IX86_BUILTIN_MOVMSKPS,
15803 IX86_BUILTIN_PMOVMSKB,
15804
15805 IX86_BUILTIN_MOVNTPS,
15806 IX86_BUILTIN_MOVNTQ,
15807
15808 IX86_BUILTIN_LOADDQU,
15809 IX86_BUILTIN_STOREDQU,
15810
15811 IX86_BUILTIN_PACKSSWB,
15812 IX86_BUILTIN_PACKSSDW,
15813 IX86_BUILTIN_PACKUSWB,
15814
15815 IX86_BUILTIN_PADDB,
15816 IX86_BUILTIN_PADDW,
15817 IX86_BUILTIN_PADDD,
15818 IX86_BUILTIN_PADDQ,
15819 IX86_BUILTIN_PADDSB,
15820 IX86_BUILTIN_PADDSW,
15821 IX86_BUILTIN_PADDUSB,
15822 IX86_BUILTIN_PADDUSW,
15823 IX86_BUILTIN_PSUBB,
15824 IX86_BUILTIN_PSUBW,
15825 IX86_BUILTIN_PSUBD,
15826 IX86_BUILTIN_PSUBQ,
15827 IX86_BUILTIN_PSUBSB,
15828 IX86_BUILTIN_PSUBSW,
15829 IX86_BUILTIN_PSUBUSB,
15830 IX86_BUILTIN_PSUBUSW,
15831
15832 IX86_BUILTIN_PAND,
15833 IX86_BUILTIN_PANDN,
15834 IX86_BUILTIN_POR,
15835 IX86_BUILTIN_PXOR,
15836
15837 IX86_BUILTIN_PAVGB,
15838 IX86_BUILTIN_PAVGW,
15839
15840 IX86_BUILTIN_PCMPEQB,
15841 IX86_BUILTIN_PCMPEQW,
15842 IX86_BUILTIN_PCMPEQD,
15843 IX86_BUILTIN_PCMPGTB,
15844 IX86_BUILTIN_PCMPGTW,
15845 IX86_BUILTIN_PCMPGTD,
15846
15847 IX86_BUILTIN_PMADDWD,
15848
15849 IX86_BUILTIN_PMAXSW,
15850 IX86_BUILTIN_PMAXUB,
15851 IX86_BUILTIN_PMINSW,
15852 IX86_BUILTIN_PMINUB,
15853
15854 IX86_BUILTIN_PMULHUW,
15855 IX86_BUILTIN_PMULHW,
15856 IX86_BUILTIN_PMULLW,
15857
15858 IX86_BUILTIN_PSADBW,
15859 IX86_BUILTIN_PSHUFW,
15860
15861 IX86_BUILTIN_PSLLW,
15862 IX86_BUILTIN_PSLLD,
15863 IX86_BUILTIN_PSLLQ,
15864 IX86_BUILTIN_PSRAW,
15865 IX86_BUILTIN_PSRAD,
15866 IX86_BUILTIN_PSRLW,
15867 IX86_BUILTIN_PSRLD,
15868 IX86_BUILTIN_PSRLQ,
15869 IX86_BUILTIN_PSLLWI,
15870 IX86_BUILTIN_PSLLDI,
15871 IX86_BUILTIN_PSLLQI,
15872 IX86_BUILTIN_PSRAWI,
15873 IX86_BUILTIN_PSRADI,
15874 IX86_BUILTIN_PSRLWI,
15875 IX86_BUILTIN_PSRLDI,
15876 IX86_BUILTIN_PSRLQI,
15877
15878 IX86_BUILTIN_PUNPCKHBW,
15879 IX86_BUILTIN_PUNPCKHWD,
15880 IX86_BUILTIN_PUNPCKHDQ,
15881 IX86_BUILTIN_PUNPCKLBW,
15882 IX86_BUILTIN_PUNPCKLWD,
15883 IX86_BUILTIN_PUNPCKLDQ,
15884
15885 IX86_BUILTIN_SHUFPS,
15886
15887 IX86_BUILTIN_RCPPS,
15888 IX86_BUILTIN_RCPSS,
15889 IX86_BUILTIN_RSQRTPS,
15890 IX86_BUILTIN_RSQRTSS,
15891 IX86_BUILTIN_SQRTPS,
15892 IX86_BUILTIN_SQRTSS,
15893
15894 IX86_BUILTIN_UNPCKHPS,
15895 IX86_BUILTIN_UNPCKLPS,
15896
15897 IX86_BUILTIN_ANDPS,
15898 IX86_BUILTIN_ANDNPS,
15899 IX86_BUILTIN_ORPS,
15900 IX86_BUILTIN_XORPS,
15901
15902 IX86_BUILTIN_EMMS,
15903 IX86_BUILTIN_LDMXCSR,
15904 IX86_BUILTIN_STMXCSR,
15905 IX86_BUILTIN_SFENCE,
15906
15907 /* 3DNow! Original */
15908 IX86_BUILTIN_FEMMS,
15909 IX86_BUILTIN_PAVGUSB,
15910 IX86_BUILTIN_PF2ID,
15911 IX86_BUILTIN_PFACC,
15912 IX86_BUILTIN_PFADD,
15913 IX86_BUILTIN_PFCMPEQ,
15914 IX86_BUILTIN_PFCMPGE,
15915 IX86_BUILTIN_PFCMPGT,
15916 IX86_BUILTIN_PFMAX,
15917 IX86_BUILTIN_PFMIN,
15918 IX86_BUILTIN_PFMUL,
15919 IX86_BUILTIN_PFRCP,
15920 IX86_BUILTIN_PFRCPIT1,
15921 IX86_BUILTIN_PFRCPIT2,
15922 IX86_BUILTIN_PFRSQIT1,
15923 IX86_BUILTIN_PFRSQRT,
15924 IX86_BUILTIN_PFSUB,
15925 IX86_BUILTIN_PFSUBR,
15926 IX86_BUILTIN_PI2FD,
15927 IX86_BUILTIN_PMULHRW,
15928
15929 /* 3DNow! Athlon Extensions */
15930 IX86_BUILTIN_PF2IW,
15931 IX86_BUILTIN_PFNACC,
15932 IX86_BUILTIN_PFPNACC,
15933 IX86_BUILTIN_PI2FW,
15934 IX86_BUILTIN_PSWAPDSI,
15935 IX86_BUILTIN_PSWAPDSF,
15936
15937 /* SSE2 */
15938 IX86_BUILTIN_ADDPD,
15939 IX86_BUILTIN_ADDSD,
15940 IX86_BUILTIN_DIVPD,
15941 IX86_BUILTIN_DIVSD,
15942 IX86_BUILTIN_MULPD,
15943 IX86_BUILTIN_MULSD,
15944 IX86_BUILTIN_SUBPD,
15945 IX86_BUILTIN_SUBSD,
15946
15947 IX86_BUILTIN_CMPEQPD,
15948 IX86_BUILTIN_CMPLTPD,
15949 IX86_BUILTIN_CMPLEPD,
15950 IX86_BUILTIN_CMPGTPD,
15951 IX86_BUILTIN_CMPGEPD,
15952 IX86_BUILTIN_CMPNEQPD,
15953 IX86_BUILTIN_CMPNLTPD,
15954 IX86_BUILTIN_CMPNLEPD,
15955 IX86_BUILTIN_CMPNGTPD,
15956 IX86_BUILTIN_CMPNGEPD,
15957 IX86_BUILTIN_CMPORDPD,
15958 IX86_BUILTIN_CMPUNORDPD,
15959 IX86_BUILTIN_CMPNEPD,
15960 IX86_BUILTIN_CMPEQSD,
15961 IX86_BUILTIN_CMPLTSD,
15962 IX86_BUILTIN_CMPLESD,
15963 IX86_BUILTIN_CMPNEQSD,
15964 IX86_BUILTIN_CMPNLTSD,
15965 IX86_BUILTIN_CMPNLESD,
15966 IX86_BUILTIN_CMPORDSD,
15967 IX86_BUILTIN_CMPUNORDSD,
15968 IX86_BUILTIN_CMPNESD,
15969
15970 IX86_BUILTIN_COMIEQSD,
15971 IX86_BUILTIN_COMILTSD,
15972 IX86_BUILTIN_COMILESD,
15973 IX86_BUILTIN_COMIGTSD,
15974 IX86_BUILTIN_COMIGESD,
15975 IX86_BUILTIN_COMINEQSD,
15976 IX86_BUILTIN_UCOMIEQSD,
15977 IX86_BUILTIN_UCOMILTSD,
15978 IX86_BUILTIN_UCOMILESD,
15979 IX86_BUILTIN_UCOMIGTSD,
15980 IX86_BUILTIN_UCOMIGESD,
15981 IX86_BUILTIN_UCOMINEQSD,
15982
15983 IX86_BUILTIN_MAXPD,
15984 IX86_BUILTIN_MAXSD,
15985 IX86_BUILTIN_MINPD,
15986 IX86_BUILTIN_MINSD,
15987
15988 IX86_BUILTIN_ANDPD,
15989 IX86_BUILTIN_ANDNPD,
15990 IX86_BUILTIN_ORPD,
15991 IX86_BUILTIN_XORPD,
15992
15993 IX86_BUILTIN_SQRTPD,
15994 IX86_BUILTIN_SQRTSD,
15995
15996 IX86_BUILTIN_UNPCKHPD,
15997 IX86_BUILTIN_UNPCKLPD,
15998
15999 IX86_BUILTIN_SHUFPD,
16000
16001 IX86_BUILTIN_LOADUPD,
16002 IX86_BUILTIN_STOREUPD,
16003 IX86_BUILTIN_MOVSD,
16004
16005 IX86_BUILTIN_LOADHPD,
16006 IX86_BUILTIN_LOADLPD,
16007
16008 IX86_BUILTIN_CVTDQ2PD,
16009 IX86_BUILTIN_CVTDQ2PS,
16010
16011 IX86_BUILTIN_CVTPD2DQ,
16012 IX86_BUILTIN_CVTPD2PI,
16013 IX86_BUILTIN_CVTPD2PS,
16014 IX86_BUILTIN_CVTTPD2DQ,
16015 IX86_BUILTIN_CVTTPD2PI,
16016
16017 IX86_BUILTIN_CVTPI2PD,
16018 IX86_BUILTIN_CVTSI2SD,
16019 IX86_BUILTIN_CVTSI642SD,
16020
16021 IX86_BUILTIN_CVTSD2SI,
16022 IX86_BUILTIN_CVTSD2SI64,
16023 IX86_BUILTIN_CVTSD2SS,
16024 IX86_BUILTIN_CVTSS2SD,
16025 IX86_BUILTIN_CVTTSD2SI,
16026 IX86_BUILTIN_CVTTSD2SI64,
16027
16028 IX86_BUILTIN_CVTPS2DQ,
16029 IX86_BUILTIN_CVTPS2PD,
16030 IX86_BUILTIN_CVTTPS2DQ,
16031
16032 IX86_BUILTIN_MOVNTI,
16033 IX86_BUILTIN_MOVNTPD,
16034 IX86_BUILTIN_MOVNTDQ,
16035
16036 /* SSE2 MMX */
16037 IX86_BUILTIN_MASKMOVDQU,
16038 IX86_BUILTIN_MOVMSKPD,
16039 IX86_BUILTIN_PMOVMSKB128,
16040
16041 IX86_BUILTIN_PACKSSWB128,
16042 IX86_BUILTIN_PACKSSDW128,
16043 IX86_BUILTIN_PACKUSWB128,
16044
16045 IX86_BUILTIN_PADDB128,
16046 IX86_BUILTIN_PADDW128,
16047 IX86_BUILTIN_PADDD128,
16048 IX86_BUILTIN_PADDQ128,
16049 IX86_BUILTIN_PADDSB128,
16050 IX86_BUILTIN_PADDSW128,
16051 IX86_BUILTIN_PADDUSB128,
16052 IX86_BUILTIN_PADDUSW128,
16053 IX86_BUILTIN_PSUBB128,
16054 IX86_BUILTIN_PSUBW128,
16055 IX86_BUILTIN_PSUBD128,
16056 IX86_BUILTIN_PSUBQ128,
16057 IX86_BUILTIN_PSUBSB128,
16058 IX86_BUILTIN_PSUBSW128,
16059 IX86_BUILTIN_PSUBUSB128,
16060 IX86_BUILTIN_PSUBUSW128,
16061
16062 IX86_BUILTIN_PAND128,
16063 IX86_BUILTIN_PANDN128,
16064 IX86_BUILTIN_POR128,
16065 IX86_BUILTIN_PXOR128,
16066
16067 IX86_BUILTIN_PAVGB128,
16068 IX86_BUILTIN_PAVGW128,
16069
16070 IX86_BUILTIN_PCMPEQB128,
16071 IX86_BUILTIN_PCMPEQW128,
16072 IX86_BUILTIN_PCMPEQD128,
16073 IX86_BUILTIN_PCMPGTB128,
16074 IX86_BUILTIN_PCMPGTW128,
16075 IX86_BUILTIN_PCMPGTD128,
16076
16077 IX86_BUILTIN_PMADDWD128,
16078
16079 IX86_BUILTIN_PMAXSW128,
16080 IX86_BUILTIN_PMAXUB128,
16081 IX86_BUILTIN_PMINSW128,
16082 IX86_BUILTIN_PMINUB128,
16083
16084 IX86_BUILTIN_PMULUDQ,
16085 IX86_BUILTIN_PMULUDQ128,
16086 IX86_BUILTIN_PMULHUW128,
16087 IX86_BUILTIN_PMULHW128,
16088 IX86_BUILTIN_PMULLW128,
16089
16090 IX86_BUILTIN_PSADBW128,
16091 IX86_BUILTIN_PSHUFHW,
16092 IX86_BUILTIN_PSHUFLW,
16093 IX86_BUILTIN_PSHUFD,
16094
16095 IX86_BUILTIN_PSLLW128,
16096 IX86_BUILTIN_PSLLD128,
16097 IX86_BUILTIN_PSLLQ128,
16098 IX86_BUILTIN_PSRAW128,
16099 IX86_BUILTIN_PSRAD128,
16100 IX86_BUILTIN_PSRLW128,
16101 IX86_BUILTIN_PSRLD128,
16102 IX86_BUILTIN_PSRLQ128,
16103 IX86_BUILTIN_PSLLDQI128,
16104 IX86_BUILTIN_PSLLWI128,
16105 IX86_BUILTIN_PSLLDI128,
16106 IX86_BUILTIN_PSLLQI128,
16107 IX86_BUILTIN_PSRAWI128,
16108 IX86_BUILTIN_PSRADI128,
16109 IX86_BUILTIN_PSRLDQI128,
16110 IX86_BUILTIN_PSRLWI128,
16111 IX86_BUILTIN_PSRLDI128,
16112 IX86_BUILTIN_PSRLQI128,
16113
16114 IX86_BUILTIN_PUNPCKHBW128,
16115 IX86_BUILTIN_PUNPCKHWD128,
16116 IX86_BUILTIN_PUNPCKHDQ128,
16117 IX86_BUILTIN_PUNPCKHQDQ128,
16118 IX86_BUILTIN_PUNPCKLBW128,
16119 IX86_BUILTIN_PUNPCKLWD128,
16120 IX86_BUILTIN_PUNPCKLDQ128,
16121 IX86_BUILTIN_PUNPCKLQDQ128,
16122
16123 IX86_BUILTIN_CLFLUSH,
16124 IX86_BUILTIN_MFENCE,
16125 IX86_BUILTIN_LFENCE,
16126
16127 /* Prescott New Instructions. */
16128 IX86_BUILTIN_ADDSUBPS,
16129 IX86_BUILTIN_HADDPS,
16130 IX86_BUILTIN_HSUBPS,
16131 IX86_BUILTIN_MOVSHDUP,
16132 IX86_BUILTIN_MOVSLDUP,
16133 IX86_BUILTIN_ADDSUBPD,
16134 IX86_BUILTIN_HADDPD,
16135 IX86_BUILTIN_HSUBPD,
16136 IX86_BUILTIN_LDDQU,
16137
16138 IX86_BUILTIN_MONITOR,
16139 IX86_BUILTIN_MWAIT,
16140
16141 /* SSSE3. */
16142 IX86_BUILTIN_PHADDW,
16143 IX86_BUILTIN_PHADDD,
16144 IX86_BUILTIN_PHADDSW,
16145 IX86_BUILTIN_PHSUBW,
16146 IX86_BUILTIN_PHSUBD,
16147 IX86_BUILTIN_PHSUBSW,
16148 IX86_BUILTIN_PMADDUBSW,
16149 IX86_BUILTIN_PMULHRSW,
16150 IX86_BUILTIN_PSHUFB,
16151 IX86_BUILTIN_PSIGNB,
16152 IX86_BUILTIN_PSIGNW,
16153 IX86_BUILTIN_PSIGND,
16154 IX86_BUILTIN_PALIGNR,
16155 IX86_BUILTIN_PABSB,
16156 IX86_BUILTIN_PABSW,
16157 IX86_BUILTIN_PABSD,
16158
16159 IX86_BUILTIN_PHADDW128,
16160 IX86_BUILTIN_PHADDD128,
16161 IX86_BUILTIN_PHADDSW128,
16162 IX86_BUILTIN_PHSUBW128,
16163 IX86_BUILTIN_PHSUBD128,
16164 IX86_BUILTIN_PHSUBSW128,
16165 IX86_BUILTIN_PMADDUBSW128,
16166 IX86_BUILTIN_PMULHRSW128,
16167 IX86_BUILTIN_PSHUFB128,
16168 IX86_BUILTIN_PSIGNB128,
16169 IX86_BUILTIN_PSIGNW128,
16170 IX86_BUILTIN_PSIGND128,
16171 IX86_BUILTIN_PALIGNR128,
16172 IX86_BUILTIN_PABSB128,
16173 IX86_BUILTIN_PABSW128,
16174 IX86_BUILTIN_PABSD128,
16175
16176 /* AMDFAM10 - SSE4A New Instructions. */
16177 IX86_BUILTIN_MOVNTSD,
16178 IX86_BUILTIN_MOVNTSS,
16179 IX86_BUILTIN_EXTRQI,
16180 IX86_BUILTIN_EXTRQ,
16181 IX86_BUILTIN_INSERTQI,
16182 IX86_BUILTIN_INSERTQ,
16183
16184 IX86_BUILTIN_VEC_INIT_V2SI,
16185 IX86_BUILTIN_VEC_INIT_V4HI,
16186 IX86_BUILTIN_VEC_INIT_V8QI,
16187 IX86_BUILTIN_VEC_EXT_V2DF,
16188 IX86_BUILTIN_VEC_EXT_V2DI,
16189 IX86_BUILTIN_VEC_EXT_V4SF,
16190 IX86_BUILTIN_VEC_EXT_V4SI,
16191 IX86_BUILTIN_VEC_EXT_V8HI,
16192 IX86_BUILTIN_VEC_EXT_V2SI,
16193 IX86_BUILTIN_VEC_EXT_V4HI,
16194 IX86_BUILTIN_VEC_SET_V8HI,
16195 IX86_BUILTIN_VEC_SET_V4HI,
16196
16197 IX86_BUILTIN_MAX
16198 };
16199
16200 /* Table for the ix86 builtin decls. */
16201 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16202
16203 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16204 * if the target_flags include one of MASK. Stores the function decl
16205 * in the ix86_builtins array.
16206 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16207
16208 static inline tree
16209 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16210 {
16211 tree decl = NULL_TREE;
16212
16213 if (mask & target_flags
16214 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16215 {
16216 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16217 NULL, NULL_TREE);
16218 ix86_builtins[(int) code] = decl;
16219 }
16220
16221 return decl;
16222 }
16223
16224 /* Like def_builtin, but also marks the function decl "const". */
16225
16226 static inline tree
16227 def_builtin_const (int mask, const char *name, tree type,
16228 enum ix86_builtins code)
16229 {
16230 tree decl = def_builtin (mask, name, type, code);
16231 if (decl)
16232 TREE_READONLY (decl) = 1;
16233 return decl;
16234 }
16235
16236 /* Bits for builtin_description.flag. */
16237
16238 /* Set when we don't support the comparison natively, and should
16239 swap_comparison in order to support it. */
16240 #define BUILTIN_DESC_SWAP_OPERANDS 1
16241
16242 struct builtin_description
16243 {
16244 const unsigned int mask;
16245 const enum insn_code icode;
16246 const char *const name;
16247 const enum ix86_builtins code;
16248 const enum rtx_code comparison;
16249 const unsigned int flag;
16250 };
16251
16252 static const struct builtin_description bdesc_comi[] =
16253 {
16254 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16255 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16256 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16257 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16258 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16259 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16260 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16261 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16262 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16263 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16264 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16265 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16266 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16267 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16268 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16269 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16270 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16271 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16272 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16273 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16274 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16275 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16276 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16277 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16278 };
16279
16280 static const struct builtin_description bdesc_2arg[] =
16281 {
16282 /* SSE */
16283 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16284 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16285 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16286 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16287 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16288 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16289 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16290 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16291
16292 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16293 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16294 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16295 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16296 BUILTIN_DESC_SWAP_OPERANDS },
16297 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16298 BUILTIN_DESC_SWAP_OPERANDS },
16299 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16300 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16301 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16302 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16303 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16304 BUILTIN_DESC_SWAP_OPERANDS },
16305 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16306 BUILTIN_DESC_SWAP_OPERANDS },
16307 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16308 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16309 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16310 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16311 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16312 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16313 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16314 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16315 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16316 BUILTIN_DESC_SWAP_OPERANDS },
16317 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16318 BUILTIN_DESC_SWAP_OPERANDS },
16319 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16320
16321 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16322 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16323 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16324 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16325
16326 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16327 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16328 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16329 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16330
16331 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16332 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16333 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16334 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16335 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16336
16337 /* MMX */
16338 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16339 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16340 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16341 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16342 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16343 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16344 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16345 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16346
16347 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16348 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16349 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16350 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16351 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16352 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16353 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16354 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16355
16356 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16357 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16358 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16359
16360 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16361 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16362 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16363 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16364
16365 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16366 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16367
16368 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16369 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16370 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16371 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16372 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16373 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16374
16375 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16376 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16377 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16378 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16379
16380 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16381 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16382 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16383 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16384 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16385 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16386
16387 /* Special. */
16388 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16389 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16390 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16391
16392 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16393 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16394 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16395
16396 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16397 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16398 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16399 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16400 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16401 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16402
16403 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16404 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16405 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16406 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16407 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16408 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16409
16410 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16411 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16412 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16413 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16414
16415 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16416 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16417
16418 /* SSE2 */
16419 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16420 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16421 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16422 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16423 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16424 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16425 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16426 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16427
16428 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16429 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16430 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16431 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16432 BUILTIN_DESC_SWAP_OPERANDS },
16433 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16434 BUILTIN_DESC_SWAP_OPERANDS },
16435 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16436 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16437 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16438 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16439 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16440 BUILTIN_DESC_SWAP_OPERANDS },
16441 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16442 BUILTIN_DESC_SWAP_OPERANDS },
16443 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16444 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16445 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16446 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16447 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16448 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16449 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16450 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16451 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16452
16453 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16454 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16455 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16456 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16457
16458 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16459 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16460 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16461 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16462
16463 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16464 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16465 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16466
16467 /* SSE2 MMX */
16468 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16469 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16470 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16471 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16472 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16473 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16474 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16475 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16476
16477 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16478 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16479 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16480 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16481 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16482 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16483 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16484 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16485
16486 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16487 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16488
16489 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16490 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16491 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16492 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16493
16494 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16495 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16496
16497 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16498 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16499 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16500 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16501 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16502 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16503
16504 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16505 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16506 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16507 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16508
16509 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16510 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16511 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16512 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16513 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16514 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16515 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16516 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16517
16518 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16519 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16520 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16521
16522 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16523 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16524
16525 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16526 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16527
16528 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16529 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16530 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16531
16532 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16533 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16534 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16535
16536 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16537 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16538
16539 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16540
16541 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16542 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16543 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16544 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16545
16546 /* SSE3 MMX */
16547 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16548 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16549 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16550 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16551 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16552 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16553
16554 /* SSSE3 */
16555 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16556 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16557 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16558 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16559 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16560 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16561 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16562 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16563 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16564 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16565 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16566 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16567 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16568 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16569 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16570 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16571 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16572 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16573 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16574 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16575 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16576 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16577 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16578 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16579 };
16580
16581 static const struct builtin_description bdesc_1arg[] =
16582 {
16583 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16584 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16585
16586 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16587 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16588 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16589
16590 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16591 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16592 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16593 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16594 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16595 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16596
16597 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16598 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16599
16600 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16601
16602 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16603 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16604
16605 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16606 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16607 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16608 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16609 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16610
16611 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16612
16613 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16614 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16615 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16616 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16617
16618 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16619 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16620 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16621
16622 /* SSE3 */
16623 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16624 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16625
16626 /* SSSE3 */
16627 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16628 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16629 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16630 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16631 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16632 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16633 };
16634
16635 static void
16636 ix86_init_builtins (void)
16637 {
16638 if (TARGET_MMX)
16639 ix86_init_mmx_sse_builtins ();
16640 }
16641
16642 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16643 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16644 builtins. */
16645 static void
16646 ix86_init_mmx_sse_builtins (void)
16647 {
16648 const struct builtin_description * d;
16649 size_t i;
16650
16651 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16652 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16653 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16654 tree V2DI_type_node
16655 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16656 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16657 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16658 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16659 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16660 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16661 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16662
16663 tree pchar_type_node = build_pointer_type (char_type_node);
16664 tree pcchar_type_node = build_pointer_type (
16665 build_type_variant (char_type_node, 1, 0));
16666 tree pfloat_type_node = build_pointer_type (float_type_node);
16667 tree pcfloat_type_node = build_pointer_type (
16668 build_type_variant (float_type_node, 1, 0));
16669 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16670 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16671 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16672
16673 /* Comparisons. */
16674 tree int_ftype_v4sf_v4sf
16675 = build_function_type_list (integer_type_node,
16676 V4SF_type_node, V4SF_type_node, NULL_TREE);
16677 tree v4si_ftype_v4sf_v4sf
16678 = build_function_type_list (V4SI_type_node,
16679 V4SF_type_node, V4SF_type_node, NULL_TREE);
16680 /* MMX/SSE/integer conversions. */
16681 tree int_ftype_v4sf
16682 = build_function_type_list (integer_type_node,
16683 V4SF_type_node, NULL_TREE);
16684 tree int64_ftype_v4sf
16685 = build_function_type_list (long_long_integer_type_node,
16686 V4SF_type_node, NULL_TREE);
16687 tree int_ftype_v8qi
16688 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16689 tree v4sf_ftype_v4sf_int
16690 = build_function_type_list (V4SF_type_node,
16691 V4SF_type_node, integer_type_node, NULL_TREE);
16692 tree v4sf_ftype_v4sf_int64
16693 = build_function_type_list (V4SF_type_node,
16694 V4SF_type_node, long_long_integer_type_node,
16695 NULL_TREE);
16696 tree v4sf_ftype_v4sf_v2si
16697 = build_function_type_list (V4SF_type_node,
16698 V4SF_type_node, V2SI_type_node, NULL_TREE);
16699
16700 /* Miscellaneous. */
16701 tree v8qi_ftype_v4hi_v4hi
16702 = build_function_type_list (V8QI_type_node,
16703 V4HI_type_node, V4HI_type_node, NULL_TREE);
16704 tree v4hi_ftype_v2si_v2si
16705 = build_function_type_list (V4HI_type_node,
16706 V2SI_type_node, V2SI_type_node, NULL_TREE);
16707 tree v4sf_ftype_v4sf_v4sf_int
16708 = build_function_type_list (V4SF_type_node,
16709 V4SF_type_node, V4SF_type_node,
16710 integer_type_node, NULL_TREE);
16711 tree v2si_ftype_v4hi_v4hi
16712 = build_function_type_list (V2SI_type_node,
16713 V4HI_type_node, V4HI_type_node, NULL_TREE);
16714 tree v4hi_ftype_v4hi_int
16715 = build_function_type_list (V4HI_type_node,
16716 V4HI_type_node, integer_type_node, NULL_TREE);
16717 tree v4hi_ftype_v4hi_di
16718 = build_function_type_list (V4HI_type_node,
16719 V4HI_type_node, long_long_unsigned_type_node,
16720 NULL_TREE);
16721 tree v2si_ftype_v2si_di
16722 = build_function_type_list (V2SI_type_node,
16723 V2SI_type_node, long_long_unsigned_type_node,
16724 NULL_TREE);
16725 tree void_ftype_void
16726 = build_function_type (void_type_node, void_list_node);
16727 tree void_ftype_unsigned
16728 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16729 tree void_ftype_unsigned_unsigned
16730 = build_function_type_list (void_type_node, unsigned_type_node,
16731 unsigned_type_node, NULL_TREE);
16732 tree void_ftype_pcvoid_unsigned_unsigned
16733 = build_function_type_list (void_type_node, const_ptr_type_node,
16734 unsigned_type_node, unsigned_type_node,
16735 NULL_TREE);
16736 tree unsigned_ftype_void
16737 = build_function_type (unsigned_type_node, void_list_node);
16738 tree v2si_ftype_v4sf
16739 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16740 /* Loads/stores. */
16741 tree void_ftype_v8qi_v8qi_pchar
16742 = build_function_type_list (void_type_node,
16743 V8QI_type_node, V8QI_type_node,
16744 pchar_type_node, NULL_TREE);
16745 tree v4sf_ftype_pcfloat
16746 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16747 /* @@@ the type is bogus */
16748 tree v4sf_ftype_v4sf_pv2si
16749 = build_function_type_list (V4SF_type_node,
16750 V4SF_type_node, pv2si_type_node, NULL_TREE);
16751 tree void_ftype_pv2si_v4sf
16752 = build_function_type_list (void_type_node,
16753 pv2si_type_node, V4SF_type_node, NULL_TREE);
16754 tree void_ftype_pfloat_v4sf
16755 = build_function_type_list (void_type_node,
16756 pfloat_type_node, V4SF_type_node, NULL_TREE);
16757 tree void_ftype_pdi_di
16758 = build_function_type_list (void_type_node,
16759 pdi_type_node, long_long_unsigned_type_node,
16760 NULL_TREE);
16761 tree void_ftype_pv2di_v2di
16762 = build_function_type_list (void_type_node,
16763 pv2di_type_node, V2DI_type_node, NULL_TREE);
16764 /* Normal vector unops. */
16765 tree v4sf_ftype_v4sf
16766 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16767 tree v16qi_ftype_v16qi
16768 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16769 tree v8hi_ftype_v8hi
16770 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16771 tree v4si_ftype_v4si
16772 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16773 tree v8qi_ftype_v8qi
16774 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16775 tree v4hi_ftype_v4hi
16776 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16777
16778 /* Normal vector binops. */
16779 tree v4sf_ftype_v4sf_v4sf
16780 = build_function_type_list (V4SF_type_node,
16781 V4SF_type_node, V4SF_type_node, NULL_TREE);
16782 tree v8qi_ftype_v8qi_v8qi
16783 = build_function_type_list (V8QI_type_node,
16784 V8QI_type_node, V8QI_type_node, NULL_TREE);
16785 tree v4hi_ftype_v4hi_v4hi
16786 = build_function_type_list (V4HI_type_node,
16787 V4HI_type_node, V4HI_type_node, NULL_TREE);
16788 tree v2si_ftype_v2si_v2si
16789 = build_function_type_list (V2SI_type_node,
16790 V2SI_type_node, V2SI_type_node, NULL_TREE);
16791 tree di_ftype_di_di
16792 = build_function_type_list (long_long_unsigned_type_node,
16793 long_long_unsigned_type_node,
16794 long_long_unsigned_type_node, NULL_TREE);
16795
16796 tree di_ftype_di_di_int
16797 = build_function_type_list (long_long_unsigned_type_node,
16798 long_long_unsigned_type_node,
16799 long_long_unsigned_type_node,
16800 integer_type_node, NULL_TREE);
16801
16802 tree v2si_ftype_v2sf
16803 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16804 tree v2sf_ftype_v2si
16805 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16806 tree v2si_ftype_v2si
16807 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16808 tree v2sf_ftype_v2sf
16809 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16810 tree v2sf_ftype_v2sf_v2sf
16811 = build_function_type_list (V2SF_type_node,
16812 V2SF_type_node, V2SF_type_node, NULL_TREE);
16813 tree v2si_ftype_v2sf_v2sf
16814 = build_function_type_list (V2SI_type_node,
16815 V2SF_type_node, V2SF_type_node, NULL_TREE);
16816 tree pint_type_node = build_pointer_type (integer_type_node);
16817 tree pdouble_type_node = build_pointer_type (double_type_node);
16818 tree pcdouble_type_node = build_pointer_type (
16819 build_type_variant (double_type_node, 1, 0));
16820 tree int_ftype_v2df_v2df
16821 = build_function_type_list (integer_type_node,
16822 V2DF_type_node, V2DF_type_node, NULL_TREE);
16823
16824 tree void_ftype_pcvoid
16825 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16826 tree v4sf_ftype_v4si
16827 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16828 tree v4si_ftype_v4sf
16829 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16830 tree v2df_ftype_v4si
16831 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16832 tree v4si_ftype_v2df
16833 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16834 tree v2si_ftype_v2df
16835 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16836 tree v4sf_ftype_v2df
16837 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16838 tree v2df_ftype_v2si
16839 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16840 tree v2df_ftype_v4sf
16841 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16842 tree int_ftype_v2df
16843 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16844 tree int64_ftype_v2df
16845 = build_function_type_list (long_long_integer_type_node,
16846 V2DF_type_node, NULL_TREE);
16847 tree v2df_ftype_v2df_int
16848 = build_function_type_list (V2DF_type_node,
16849 V2DF_type_node, integer_type_node, NULL_TREE);
16850 tree v2df_ftype_v2df_int64
16851 = build_function_type_list (V2DF_type_node,
16852 V2DF_type_node, long_long_integer_type_node,
16853 NULL_TREE);
16854 tree v4sf_ftype_v4sf_v2df
16855 = build_function_type_list (V4SF_type_node,
16856 V4SF_type_node, V2DF_type_node, NULL_TREE);
16857 tree v2df_ftype_v2df_v4sf
16858 = build_function_type_list (V2DF_type_node,
16859 V2DF_type_node, V4SF_type_node, NULL_TREE);
16860 tree v2df_ftype_v2df_v2df_int
16861 = build_function_type_list (V2DF_type_node,
16862 V2DF_type_node, V2DF_type_node,
16863 integer_type_node,
16864 NULL_TREE);
16865 tree v2df_ftype_v2df_pcdouble
16866 = build_function_type_list (V2DF_type_node,
16867 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16868 tree void_ftype_pdouble_v2df
16869 = build_function_type_list (void_type_node,
16870 pdouble_type_node, V2DF_type_node, NULL_TREE);
16871 tree void_ftype_pint_int
16872 = build_function_type_list (void_type_node,
16873 pint_type_node, integer_type_node, NULL_TREE);
16874 tree void_ftype_v16qi_v16qi_pchar
16875 = build_function_type_list (void_type_node,
16876 V16QI_type_node, V16QI_type_node,
16877 pchar_type_node, NULL_TREE);
16878 tree v2df_ftype_pcdouble
16879 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16880 tree v2df_ftype_v2df_v2df
16881 = build_function_type_list (V2DF_type_node,
16882 V2DF_type_node, V2DF_type_node, NULL_TREE);
16883 tree v16qi_ftype_v16qi_v16qi
16884 = build_function_type_list (V16QI_type_node,
16885 V16QI_type_node, V16QI_type_node, NULL_TREE);
16886 tree v8hi_ftype_v8hi_v8hi
16887 = build_function_type_list (V8HI_type_node,
16888 V8HI_type_node, V8HI_type_node, NULL_TREE);
16889 tree v4si_ftype_v4si_v4si
16890 = build_function_type_list (V4SI_type_node,
16891 V4SI_type_node, V4SI_type_node, NULL_TREE);
16892 tree v2di_ftype_v2di_v2di
16893 = build_function_type_list (V2DI_type_node,
16894 V2DI_type_node, V2DI_type_node, NULL_TREE);
16895 tree v2di_ftype_v2df_v2df
16896 = build_function_type_list (V2DI_type_node,
16897 V2DF_type_node, V2DF_type_node, NULL_TREE);
16898 tree v2df_ftype_v2df
16899 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16900 tree v2di_ftype_v2di_int
16901 = build_function_type_list (V2DI_type_node,
16902 V2DI_type_node, integer_type_node, NULL_TREE);
16903 tree v2di_ftype_v2di_v2di_int
16904 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16905 V2DI_type_node, integer_type_node, NULL_TREE);
16906 tree v4si_ftype_v4si_int
16907 = build_function_type_list (V4SI_type_node,
16908 V4SI_type_node, integer_type_node, NULL_TREE);
16909 tree v8hi_ftype_v8hi_int
16910 = build_function_type_list (V8HI_type_node,
16911 V8HI_type_node, integer_type_node, NULL_TREE);
16912 tree v8hi_ftype_v8hi_v2di
16913 = build_function_type_list (V8HI_type_node,
16914 V8HI_type_node, V2DI_type_node, NULL_TREE);
16915 tree v4si_ftype_v4si_v2di
16916 = build_function_type_list (V4SI_type_node,
16917 V4SI_type_node, V2DI_type_node, NULL_TREE);
16918 tree v4si_ftype_v8hi_v8hi
16919 = build_function_type_list (V4SI_type_node,
16920 V8HI_type_node, V8HI_type_node, NULL_TREE);
16921 tree di_ftype_v8qi_v8qi
16922 = build_function_type_list (long_long_unsigned_type_node,
16923 V8QI_type_node, V8QI_type_node, NULL_TREE);
16924 tree di_ftype_v2si_v2si
16925 = build_function_type_list (long_long_unsigned_type_node,
16926 V2SI_type_node, V2SI_type_node, NULL_TREE);
16927 tree v2di_ftype_v16qi_v16qi
16928 = build_function_type_list (V2DI_type_node,
16929 V16QI_type_node, V16QI_type_node, NULL_TREE);
16930 tree v2di_ftype_v4si_v4si
16931 = build_function_type_list (V2DI_type_node,
16932 V4SI_type_node, V4SI_type_node, NULL_TREE);
16933 tree int_ftype_v16qi
16934 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16935 tree v16qi_ftype_pcchar
16936 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16937 tree void_ftype_pchar_v16qi
16938 = build_function_type_list (void_type_node,
16939 pchar_type_node, V16QI_type_node, NULL_TREE);
16940
16941 tree v2di_ftype_v2di_unsigned_unsigned
16942 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16943 unsigned_type_node, unsigned_type_node,
16944 NULL_TREE);
16945 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16946 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16947 unsigned_type_node, unsigned_type_node,
16948 NULL_TREE);
16949 tree v2di_ftype_v2di_v16qi
16950 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16951 NULL_TREE);
16952
16953 tree float80_type;
16954 tree float128_type;
16955 tree ftype;
16956
16957 /* The __float80 type. */
16958 if (TYPE_MODE (long_double_type_node) == XFmode)
16959 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16960 "__float80");
16961 else
16962 {
16963 /* The __float80 type. */
16964 float80_type = make_node (REAL_TYPE);
16965 TYPE_PRECISION (float80_type) = 80;
16966 layout_type (float80_type);
16967 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16968 }
16969
16970 if (TARGET_64BIT)
16971 {
16972 float128_type = make_node (REAL_TYPE);
16973 TYPE_PRECISION (float128_type) = 128;
16974 layout_type (float128_type);
16975 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16976 }
16977
16978 /* Add all builtins that are more or less simple operations on two
16979 operands. */
16980 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16981 {
16982 /* Use one of the operands; the target can have a different mode for
16983 mask-generating compares. */
16984 enum machine_mode mode;
16985 tree type;
16986
16987 if (d->name == 0)
16988 continue;
16989 mode = insn_data[d->icode].operand[1].mode;
16990
16991 switch (mode)
16992 {
16993 case V16QImode:
16994 type = v16qi_ftype_v16qi_v16qi;
16995 break;
16996 case V8HImode:
16997 type = v8hi_ftype_v8hi_v8hi;
16998 break;
16999 case V4SImode:
17000 type = v4si_ftype_v4si_v4si;
17001 break;
17002 case V2DImode:
17003 type = v2di_ftype_v2di_v2di;
17004 break;
17005 case V2DFmode:
17006 type = v2df_ftype_v2df_v2df;
17007 break;
17008 case V4SFmode:
17009 type = v4sf_ftype_v4sf_v4sf;
17010 break;
17011 case V8QImode:
17012 type = v8qi_ftype_v8qi_v8qi;
17013 break;
17014 case V4HImode:
17015 type = v4hi_ftype_v4hi_v4hi;
17016 break;
17017 case V2SImode:
17018 type = v2si_ftype_v2si_v2si;
17019 break;
17020 case DImode:
17021 type = di_ftype_di_di;
17022 break;
17023
17024 default:
17025 gcc_unreachable ();
17026 }
17027
17028 /* Override for comparisons. */
17029 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17030 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17031 type = v4si_ftype_v4sf_v4sf;
17032
17033 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17034 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17035 type = v2di_ftype_v2df_v2df;
17036
17037 def_builtin (d->mask, d->name, type, d->code);
17038 }
17039
17040 /* Add all builtins that are more or less simple operations on 1 operand. */
17041 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17042 {
17043 enum machine_mode mode;
17044 tree type;
17045
17046 if (d->name == 0)
17047 continue;
17048 mode = insn_data[d->icode].operand[1].mode;
17049
17050 switch (mode)
17051 {
17052 case V16QImode:
17053 type = v16qi_ftype_v16qi;
17054 break;
17055 case V8HImode:
17056 type = v8hi_ftype_v8hi;
17057 break;
17058 case V4SImode:
17059 type = v4si_ftype_v4si;
17060 break;
17061 case V2DFmode:
17062 type = v2df_ftype_v2df;
17063 break;
17064 case V4SFmode:
17065 type = v4sf_ftype_v4sf;
17066 break;
17067 case V8QImode:
17068 type = v8qi_ftype_v8qi;
17069 break;
17070 case V4HImode:
17071 type = v4hi_ftype_v4hi;
17072 break;
17073 case V2SImode:
17074 type = v2si_ftype_v2si;
17075 break;
17076
17077 default:
17078 abort ();
17079 }
17080
17081 def_builtin (d->mask, d->name, type, d->code);
17082 }
17083
17084 /* Add the remaining MMX insns with somewhat more complicated types. */
17085 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17086 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17087 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17088 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17089
17090 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17091 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17092 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17093
17094 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17095 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17096
17097 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17098 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17099
17100 /* comi/ucomi insns. */
17101 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17102 if (d->mask == MASK_SSE2)
17103 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17104 else
17105 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17106
17107 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17108 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17109 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17110
17111 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17112 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17113 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17114 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17115 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17116 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17117 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17118 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17119 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17120 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17121 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17122
17123 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17124
17125 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17126 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17127
17128 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17129 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17130 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17131 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17132
17133 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17134 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17135 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17136 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17137
17138 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17139
17140 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17141
17142 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17143 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17144 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17145 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17146 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17147 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17148
17149 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17150
17151 /* Original 3DNow! */
17152 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17153 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17154 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17155 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17156 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17157 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17158 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17159 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17160 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17161 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17162 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17163 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17164 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17165 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17166 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17167 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17168 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17169 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17170 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17171 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17172
17173 /* 3DNow! extension as used in the Athlon CPU. */
17174 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17175 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17176 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17177 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17178 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17179 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17180
17181 /* SSE2 */
17182 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17183
17184 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17185 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17186
17187 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17188 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17189
17190 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17191 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17192 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17193 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17194 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17195
17196 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17197 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17198 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17199 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17200
17201 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17202 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17203
17204 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17205
17206 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17207 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17208
17209 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17210 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17211 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17212 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17213 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17214
17215 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17216
17217 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17218 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17219 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17220 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17221
17222 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17223 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17224 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17225
17226 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17227 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17228 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17229 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17230
17231 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17232 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17233 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17234
17235 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17236 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17237
17238 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17239 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17240
17241 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17242 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17243 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17244
17245 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17246 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17247 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17248
17249 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17250 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17251
17252 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17253 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17254 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17255 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17256
17257 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17258 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17259 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17260 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17261
17262 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17263 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17264
17265 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17266
17267 /* Prescott New Instructions. */
17268 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17269 void_ftype_pcvoid_unsigned_unsigned,
17270 IX86_BUILTIN_MONITOR);
17271 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17272 void_ftype_unsigned_unsigned,
17273 IX86_BUILTIN_MWAIT);
17274 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17275 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17276
17277 /* SSSE3. */
17278 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17279 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17280 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17281 IX86_BUILTIN_PALIGNR);
17282
17283 /* AMDFAM10 SSE4A New built-ins */
17284 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17285 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17286 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17287 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17288 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17289 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17290 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17291 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17292 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17293 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17294 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17295 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17296
17297 /* Access to the vec_init patterns. */
17298 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17299 integer_type_node, NULL_TREE);
17300 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17301 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17302
17303 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17304 short_integer_type_node,
17305 short_integer_type_node,
17306 short_integer_type_node, NULL_TREE);
17307 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17308 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17309
17310 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17311 char_type_node, char_type_node,
17312 char_type_node, char_type_node,
17313 char_type_node, char_type_node,
17314 char_type_node, NULL_TREE);
17315 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17316 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17317
17318 /* Access to the vec_extract patterns. */
17319 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17320 integer_type_node, NULL_TREE);
17321 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17322 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17323
17324 ftype = build_function_type_list (long_long_integer_type_node,
17325 V2DI_type_node, integer_type_node,
17326 NULL_TREE);
17327 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17328 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17329
17330 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17331 integer_type_node, NULL_TREE);
17332 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17333 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17334
17335 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17336 integer_type_node, NULL_TREE);
17337 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17338 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17339
17340 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17341 integer_type_node, NULL_TREE);
17342 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17343 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17344
17345 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17346 integer_type_node, NULL_TREE);
17347 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17348 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17349
17350 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17351 integer_type_node, NULL_TREE);
17352 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17353 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17354
17355 /* Access to the vec_set patterns. */
17356 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17357 intHI_type_node,
17358 integer_type_node, NULL_TREE);
17359 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17360 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17361
17362 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17363 intHI_type_node,
17364 integer_type_node, NULL_TREE);
17365 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17366 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17367 }
17368
17369 /* Errors in the source file can cause expand_expr to return const0_rtx
17370 where we expect a vector. To avoid crashing, use one of the vector
17371 clear instructions. */
17372 static rtx
17373 safe_vector_operand (rtx x, enum machine_mode mode)
17374 {
17375 if (x == const0_rtx)
17376 x = CONST0_RTX (mode);
17377 return x;
17378 }
17379
17380 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17381
17382 static rtx
17383 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17384 {
17385 rtx pat, xops[3];
17386 tree arg0 = CALL_EXPR_ARG (exp, 0);
17387 tree arg1 = CALL_EXPR_ARG (exp, 1);
17388 rtx op0 = expand_normal (arg0);
17389 rtx op1 = expand_normal (arg1);
17390 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17391 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17392 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17393
17394 if (VECTOR_MODE_P (mode0))
17395 op0 = safe_vector_operand (op0, mode0);
17396 if (VECTOR_MODE_P (mode1))
17397 op1 = safe_vector_operand (op1, mode1);
17398
17399 if (optimize || !target
17400 || GET_MODE (target) != tmode
17401 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17402 target = gen_reg_rtx (tmode);
17403
17404 if (GET_MODE (op1) == SImode && mode1 == TImode)
17405 {
17406 rtx x = gen_reg_rtx (V4SImode);
17407 emit_insn (gen_sse2_loadd (x, op1));
17408 op1 = gen_lowpart (TImode, x);
17409 }
17410
17411 /* The insn must want input operands in the same modes as the
17412 result. */
17413 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17414 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17415
17416 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17417 op0 = copy_to_mode_reg (mode0, op0);
17418 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17419 op1 = copy_to_mode_reg (mode1, op1);
17420
17421 /* ??? Using ix86_fixup_binary_operands is problematic when
17422 we've got mismatched modes. Fake it. */
17423
17424 xops[0] = target;
17425 xops[1] = op0;
17426 xops[2] = op1;
17427
17428 if (tmode == mode0 && tmode == mode1)
17429 {
17430 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17431 op0 = xops[1];
17432 op1 = xops[2];
17433 }
17434 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17435 {
17436 op0 = force_reg (mode0, op0);
17437 op1 = force_reg (mode1, op1);
17438 target = gen_reg_rtx (tmode);
17439 }
17440
17441 pat = GEN_FCN (icode) (target, op0, op1);
17442 if (! pat)
17443 return 0;
17444 emit_insn (pat);
17445 return target;
17446 }
17447
17448 /* Subroutine of ix86_expand_builtin to take care of stores. */
17449
17450 static rtx
17451 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17452 {
17453 rtx pat;
17454 tree arg0 = CALL_EXPR_ARG (exp, 0);
17455 tree arg1 = CALL_EXPR_ARG (exp, 1);
17456 rtx op0 = expand_normal (arg0);
17457 rtx op1 = expand_normal (arg1);
17458 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17459 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17460
17461 if (VECTOR_MODE_P (mode1))
17462 op1 = safe_vector_operand (op1, mode1);
17463
17464 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17465 op1 = copy_to_mode_reg (mode1, op1);
17466
17467 pat = GEN_FCN (icode) (op0, op1);
17468 if (pat)
17469 emit_insn (pat);
17470 return 0;
17471 }
17472
17473 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17474
17475 static rtx
17476 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17477 rtx target, int do_load)
17478 {
17479 rtx pat;
17480 tree arg0 = CALL_EXPR_ARG (exp, 0);
17481 rtx op0 = expand_normal (arg0);
17482 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17483 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17484
17485 if (optimize || !target
17486 || GET_MODE (target) != tmode
17487 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17488 target = gen_reg_rtx (tmode);
17489 if (do_load)
17490 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17491 else
17492 {
17493 if (VECTOR_MODE_P (mode0))
17494 op0 = safe_vector_operand (op0, mode0);
17495
17496 if ((optimize && !register_operand (op0, mode0))
17497 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17498 op0 = copy_to_mode_reg (mode0, op0);
17499 }
17500
17501 pat = GEN_FCN (icode) (target, op0);
17502 if (! pat)
17503 return 0;
17504 emit_insn (pat);
17505 return target;
17506 }
17507
17508 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17509 sqrtss, rsqrtss, rcpss. */
17510
17511 static rtx
17512 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17513 {
17514 rtx pat;
17515 tree arg0 = CALL_EXPR_ARG (exp, 0);
17516 rtx op1, op0 = expand_normal (arg0);
17517 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17518 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17519
17520 if (optimize || !target
17521 || GET_MODE (target) != tmode
17522 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17523 target = gen_reg_rtx (tmode);
17524
17525 if (VECTOR_MODE_P (mode0))
17526 op0 = safe_vector_operand (op0, mode0);
17527
17528 if ((optimize && !register_operand (op0, mode0))
17529 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17530 op0 = copy_to_mode_reg (mode0, op0);
17531
17532 op1 = op0;
17533 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17534 op1 = copy_to_mode_reg (mode0, op1);
17535
17536 pat = GEN_FCN (icode) (target, op0, op1);
17537 if (! pat)
17538 return 0;
17539 emit_insn (pat);
17540 return target;
17541 }
17542
17543 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17544
17545 static rtx
17546 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17547 rtx target)
17548 {
17549 rtx pat;
17550 tree arg0 = CALL_EXPR_ARG (exp, 0);
17551 tree arg1 = CALL_EXPR_ARG (exp, 1);
17552 rtx op0 = expand_normal (arg0);
17553 rtx op1 = expand_normal (arg1);
17554 rtx op2;
17555 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17556 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17557 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17558 enum rtx_code comparison = d->comparison;
17559
17560 if (VECTOR_MODE_P (mode0))
17561 op0 = safe_vector_operand (op0, mode0);
17562 if (VECTOR_MODE_P (mode1))
17563 op1 = safe_vector_operand (op1, mode1);
17564
17565 /* Swap operands if we have a comparison that isn't available in
17566 hardware. */
17567 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17568 {
17569 rtx tmp = gen_reg_rtx (mode1);
17570 emit_move_insn (tmp, op1);
17571 op1 = op0;
17572 op0 = tmp;
17573 }
17574
17575 if (optimize || !target
17576 || GET_MODE (target) != tmode
17577 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17578 target = gen_reg_rtx (tmode);
17579
17580 if ((optimize && !register_operand (op0, mode0))
17581 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17582 op0 = copy_to_mode_reg (mode0, op0);
17583 if ((optimize && !register_operand (op1, mode1))
17584 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17585 op1 = copy_to_mode_reg (mode1, op1);
17586
17587 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17588 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17589 if (! pat)
17590 return 0;
17591 emit_insn (pat);
17592 return target;
17593 }
17594
17595 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17596
17597 static rtx
17598 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17599 rtx target)
17600 {
17601 rtx pat;
17602 tree arg0 = CALL_EXPR_ARG (exp, 0);
17603 tree arg1 = CALL_EXPR_ARG (exp, 1);
17604 rtx op0 = expand_normal (arg0);
17605 rtx op1 = expand_normal (arg1);
17606 rtx op2;
17607 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17608 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17609 enum rtx_code comparison = d->comparison;
17610
17611 if (VECTOR_MODE_P (mode0))
17612 op0 = safe_vector_operand (op0, mode0);
17613 if (VECTOR_MODE_P (mode1))
17614 op1 = safe_vector_operand (op1, mode1);
17615
17616 /* Swap operands if we have a comparison that isn't available in
17617 hardware. */
17618 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17619 {
17620 rtx tmp = op1;
17621 op1 = op0;
17622 op0 = tmp;
17623 }
17624
17625 target = gen_reg_rtx (SImode);
17626 emit_move_insn (target, const0_rtx);
17627 target = gen_rtx_SUBREG (QImode, target, 0);
17628
17629 if ((optimize && !register_operand (op0, mode0))
17630 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17631 op0 = copy_to_mode_reg (mode0, op0);
17632 if ((optimize && !register_operand (op1, mode1))
17633 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17634 op1 = copy_to_mode_reg (mode1, op1);
17635
17636 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17637 pat = GEN_FCN (d->icode) (op0, op1);
17638 if (! pat)
17639 return 0;
17640 emit_insn (pat);
17641 emit_insn (gen_rtx_SET (VOIDmode,
17642 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17643 gen_rtx_fmt_ee (comparison, QImode,
17644 SET_DEST (pat),
17645 const0_rtx)));
17646
17647 return SUBREG_REG (target);
17648 }
17649
17650 /* Return the integer constant in ARG. Constrain it to be in the range
17651 of the subparts of VEC_TYPE; issue an error if not. */
17652
17653 static int
17654 get_element_number (tree vec_type, tree arg)
17655 {
17656 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17657
17658 if (!host_integerp (arg, 1)
17659 || (elt = tree_low_cst (arg, 1), elt > max))
17660 {
17661 error ("selector must be an integer constant in the range 0..%wi", max);
17662 return 0;
17663 }
17664
17665 return elt;
17666 }
17667
17668 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17669 ix86_expand_vector_init. We DO have language-level syntax for this, in
17670 the form of (type){ init-list }. Except that since we can't place emms
17671 instructions from inside the compiler, we can't allow the use of MMX
17672 registers unless the user explicitly asks for it. So we do *not* define
17673 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17674 we have builtins invoked by mmintrin.h that gives us license to emit
17675 these sorts of instructions. */
17676
17677 static rtx
17678 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17679 {
17680 enum machine_mode tmode = TYPE_MODE (type);
17681 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17682 int i, n_elt = GET_MODE_NUNITS (tmode);
17683 rtvec v = rtvec_alloc (n_elt);
17684
17685 gcc_assert (VECTOR_MODE_P (tmode));
17686 gcc_assert (call_expr_nargs (exp) == n_elt);
17687
17688 for (i = 0; i < n_elt; ++i)
17689 {
17690 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17691 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17692 }
17693
17694 if (!target || !register_operand (target, tmode))
17695 target = gen_reg_rtx (tmode);
17696
17697 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17698 return target;
17699 }
17700
17701 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17702 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17703 had a language-level syntax for referencing vector elements. */
17704
17705 static rtx
17706 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17707 {
17708 enum machine_mode tmode, mode0;
17709 tree arg0, arg1;
17710 int elt;
17711 rtx op0;
17712
17713 arg0 = CALL_EXPR_ARG (exp, 0);
17714 arg1 = CALL_EXPR_ARG (exp, 1);
17715
17716 op0 = expand_normal (arg0);
17717 elt = get_element_number (TREE_TYPE (arg0), arg1);
17718
17719 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17720 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17721 gcc_assert (VECTOR_MODE_P (mode0));
17722
17723 op0 = force_reg (mode0, op0);
17724
17725 if (optimize || !target || !register_operand (target, tmode))
17726 target = gen_reg_rtx (tmode);
17727
17728 ix86_expand_vector_extract (true, target, op0, elt);
17729
17730 return target;
17731 }
17732
17733 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17734 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17735 a language-level syntax for referencing vector elements. */
17736
17737 static rtx
17738 ix86_expand_vec_set_builtin (tree exp)
17739 {
17740 enum machine_mode tmode, mode1;
17741 tree arg0, arg1, arg2;
17742 int elt;
17743 rtx op0, op1;
17744
17745 arg0 = CALL_EXPR_ARG (exp, 0);
17746 arg1 = CALL_EXPR_ARG (exp, 1);
17747 arg2 = CALL_EXPR_ARG (exp, 2);
17748
17749 tmode = TYPE_MODE (TREE_TYPE (arg0));
17750 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17751 gcc_assert (VECTOR_MODE_P (tmode));
17752
17753 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17754 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17755 elt = get_element_number (TREE_TYPE (arg0), arg2);
17756
17757 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17758 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17759
17760 op0 = force_reg (tmode, op0);
17761 op1 = force_reg (mode1, op1);
17762
17763 ix86_expand_vector_set (true, op0, op1, elt);
17764
17765 return op0;
17766 }
17767
17768 /* Expand an expression EXP that calls a built-in function,
17769 with result going to TARGET if that's convenient
17770 (and in mode MODE if that's convenient).
17771 SUBTARGET may be used as the target for computing one of EXP's operands.
17772 IGNORE is nonzero if the value is to be ignored. */
17773
17774 static rtx
17775 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17776 enum machine_mode mode ATTRIBUTE_UNUSED,
17777 int ignore ATTRIBUTE_UNUSED)
17778 {
17779 const struct builtin_description *d;
17780 size_t i;
17781 enum insn_code icode;
17782 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17783 tree arg0, arg1, arg2, arg3;
17784 rtx op0, op1, op2, op3, pat;
17785 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17786 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17787
17788 switch (fcode)
17789 {
17790 case IX86_BUILTIN_EMMS:
17791 emit_insn (gen_mmx_emms ());
17792 return 0;
17793
17794 case IX86_BUILTIN_SFENCE:
17795 emit_insn (gen_sse_sfence ());
17796 return 0;
17797
17798 case IX86_BUILTIN_MASKMOVQ:
17799 case IX86_BUILTIN_MASKMOVDQU:
17800 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17801 ? CODE_FOR_mmx_maskmovq
17802 : CODE_FOR_sse2_maskmovdqu);
17803 /* Note the arg order is different from the operand order. */
17804 arg1 = CALL_EXPR_ARG (exp, 0);
17805 arg2 = CALL_EXPR_ARG (exp, 1);
17806 arg0 = CALL_EXPR_ARG (exp, 2);
17807 op0 = expand_normal (arg0);
17808 op1 = expand_normal (arg1);
17809 op2 = expand_normal (arg2);
17810 mode0 = insn_data[icode].operand[0].mode;
17811 mode1 = insn_data[icode].operand[1].mode;
17812 mode2 = insn_data[icode].operand[2].mode;
17813
17814 op0 = force_reg (Pmode, op0);
17815 op0 = gen_rtx_MEM (mode1, op0);
17816
17817 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17818 op0 = copy_to_mode_reg (mode0, op0);
17819 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17820 op1 = copy_to_mode_reg (mode1, op1);
17821 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17822 op2 = copy_to_mode_reg (mode2, op2);
17823 pat = GEN_FCN (icode) (op0, op1, op2);
17824 if (! pat)
17825 return 0;
17826 emit_insn (pat);
17827 return 0;
17828
17829 case IX86_BUILTIN_SQRTSS:
17830 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17831 case IX86_BUILTIN_RSQRTSS:
17832 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17833 case IX86_BUILTIN_RCPSS:
17834 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17835
17836 case IX86_BUILTIN_LOADUPS:
17837 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17838
17839 case IX86_BUILTIN_STOREUPS:
17840 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17841
17842 case IX86_BUILTIN_LOADHPS:
17843 case IX86_BUILTIN_LOADLPS:
17844 case IX86_BUILTIN_LOADHPD:
17845 case IX86_BUILTIN_LOADLPD:
17846 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17847 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17848 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17849 : CODE_FOR_sse2_loadlpd);
17850 arg0 = CALL_EXPR_ARG (exp, 0);
17851 arg1 = CALL_EXPR_ARG (exp, 1);
17852 op0 = expand_normal (arg0);
17853 op1 = expand_normal (arg1);
17854 tmode = insn_data[icode].operand[0].mode;
17855 mode0 = insn_data[icode].operand[1].mode;
17856 mode1 = insn_data[icode].operand[2].mode;
17857
17858 op0 = force_reg (mode0, op0);
17859 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17860 if (optimize || target == 0
17861 || GET_MODE (target) != tmode
17862 || !register_operand (target, tmode))
17863 target = gen_reg_rtx (tmode);
17864 pat = GEN_FCN (icode) (target, op0, op1);
17865 if (! pat)
17866 return 0;
17867 emit_insn (pat);
17868 return target;
17869
17870 case IX86_BUILTIN_STOREHPS:
17871 case IX86_BUILTIN_STORELPS:
17872 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17873 : CODE_FOR_sse_storelps);
17874 arg0 = CALL_EXPR_ARG (exp, 0);
17875 arg1 = CALL_EXPR_ARG (exp, 1);
17876 op0 = expand_normal (arg0);
17877 op1 = expand_normal (arg1);
17878 mode0 = insn_data[icode].operand[0].mode;
17879 mode1 = insn_data[icode].operand[1].mode;
17880
17881 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17882 op1 = force_reg (mode1, op1);
17883
17884 pat = GEN_FCN (icode) (op0, op1);
17885 if (! pat)
17886 return 0;
17887 emit_insn (pat);
17888 return const0_rtx;
17889
17890 case IX86_BUILTIN_MOVNTPS:
17891 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17892 case IX86_BUILTIN_MOVNTQ:
17893 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17894
17895 case IX86_BUILTIN_LDMXCSR:
17896 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17897 target = assign_386_stack_local (SImode, SLOT_TEMP);
17898 emit_move_insn (target, op0);
17899 emit_insn (gen_sse_ldmxcsr (target));
17900 return 0;
17901
17902 case IX86_BUILTIN_STMXCSR:
17903 target = assign_386_stack_local (SImode, SLOT_TEMP);
17904 emit_insn (gen_sse_stmxcsr (target));
17905 return copy_to_mode_reg (SImode, target);
17906
17907 case IX86_BUILTIN_SHUFPS:
17908 case IX86_BUILTIN_SHUFPD:
17909 icode = (fcode == IX86_BUILTIN_SHUFPS
17910 ? CODE_FOR_sse_shufps
17911 : CODE_FOR_sse2_shufpd);
17912 arg0 = CALL_EXPR_ARG (exp, 0);
17913 arg1 = CALL_EXPR_ARG (exp, 1);
17914 arg2 = CALL_EXPR_ARG (exp, 2);
17915 op0 = expand_normal (arg0);
17916 op1 = expand_normal (arg1);
17917 op2 = expand_normal (arg2);
17918 tmode = insn_data[icode].operand[0].mode;
17919 mode0 = insn_data[icode].operand[1].mode;
17920 mode1 = insn_data[icode].operand[2].mode;
17921 mode2 = insn_data[icode].operand[3].mode;
17922
17923 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17924 op0 = copy_to_mode_reg (mode0, op0);
17925 if ((optimize && !register_operand (op1, mode1))
17926 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17927 op1 = copy_to_mode_reg (mode1, op1);
17928 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17929 {
17930 /* @@@ better error message */
17931 error ("mask must be an immediate");
17932 return gen_reg_rtx (tmode);
17933 }
17934 if (optimize || target == 0
17935 || GET_MODE (target) != tmode
17936 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17937 target = gen_reg_rtx (tmode);
17938 pat = GEN_FCN (icode) (target, op0, op1, op2);
17939 if (! pat)
17940 return 0;
17941 emit_insn (pat);
17942 return target;
17943
17944 case IX86_BUILTIN_PSHUFW:
17945 case IX86_BUILTIN_PSHUFD:
17946 case IX86_BUILTIN_PSHUFHW:
17947 case IX86_BUILTIN_PSHUFLW:
17948 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17949 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17950 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17951 : CODE_FOR_mmx_pshufw);
17952 arg0 = CALL_EXPR_ARG (exp, 0);
17953 arg1 = CALL_EXPR_ARG (exp, 1);
17954 op0 = expand_normal (arg0);
17955 op1 = expand_normal (arg1);
17956 tmode = insn_data[icode].operand[0].mode;
17957 mode1 = insn_data[icode].operand[1].mode;
17958 mode2 = insn_data[icode].operand[2].mode;
17959
17960 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17961 op0 = copy_to_mode_reg (mode1, op0);
17962 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17963 {
17964 /* @@@ better error message */
17965 error ("mask must be an immediate");
17966 return const0_rtx;
17967 }
17968 if (target == 0
17969 || GET_MODE (target) != tmode
17970 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17971 target = gen_reg_rtx (tmode);
17972 pat = GEN_FCN (icode) (target, op0, op1);
17973 if (! pat)
17974 return 0;
17975 emit_insn (pat);
17976 return target;
17977
17978 case IX86_BUILTIN_PSLLDQI128:
17979 case IX86_BUILTIN_PSRLDQI128:
17980 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17981 : CODE_FOR_sse2_lshrti3);
17982 arg0 = CALL_EXPR_ARG (exp, 0);
17983 arg1 = CALL_EXPR_ARG (exp, 1);
17984 op0 = expand_normal (arg0);
17985 op1 = expand_normal (arg1);
17986 tmode = insn_data[icode].operand[0].mode;
17987 mode1 = insn_data[icode].operand[1].mode;
17988 mode2 = insn_data[icode].operand[2].mode;
17989
17990 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17991 {
17992 op0 = copy_to_reg (op0);
17993 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17994 }
17995 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17996 {
17997 error ("shift must be an immediate");
17998 return const0_rtx;
17999 }
18000 target = gen_reg_rtx (V2DImode);
18001 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
18002 if (! pat)
18003 return 0;
18004 emit_insn (pat);
18005 return target;
18006
18007 case IX86_BUILTIN_FEMMS:
18008 emit_insn (gen_mmx_femms ());
18009 return NULL_RTX;
18010
18011 case IX86_BUILTIN_PAVGUSB:
18012 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18013
18014 case IX86_BUILTIN_PF2ID:
18015 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18016
18017 case IX86_BUILTIN_PFACC:
18018 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18019
18020 case IX86_BUILTIN_PFADD:
18021 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18022
18023 case IX86_BUILTIN_PFCMPEQ:
18024 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18025
18026 case IX86_BUILTIN_PFCMPGE:
18027 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18028
18029 case IX86_BUILTIN_PFCMPGT:
18030 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18031
18032 case IX86_BUILTIN_PFMAX:
18033 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18034
18035 case IX86_BUILTIN_PFMIN:
18036 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18037
18038 case IX86_BUILTIN_PFMUL:
18039 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18040
18041 case IX86_BUILTIN_PFRCP:
18042 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18043
18044 case IX86_BUILTIN_PFRCPIT1:
18045 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18046
18047 case IX86_BUILTIN_PFRCPIT2:
18048 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18049
18050 case IX86_BUILTIN_PFRSQIT1:
18051 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18052
18053 case IX86_BUILTIN_PFRSQRT:
18054 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18055
18056 case IX86_BUILTIN_PFSUB:
18057 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18058
18059 case IX86_BUILTIN_PFSUBR:
18060 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18061
18062 case IX86_BUILTIN_PI2FD:
18063 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18064
18065 case IX86_BUILTIN_PMULHRW:
18066 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18067
18068 case IX86_BUILTIN_PF2IW:
18069 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18070
18071 case IX86_BUILTIN_PFNACC:
18072 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18073
18074 case IX86_BUILTIN_PFPNACC:
18075 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18076
18077 case IX86_BUILTIN_PI2FW:
18078 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18079
18080 case IX86_BUILTIN_PSWAPDSI:
18081 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18082
18083 case IX86_BUILTIN_PSWAPDSF:
18084 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18085
18086 case IX86_BUILTIN_SQRTSD:
18087 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18088 case IX86_BUILTIN_LOADUPD:
18089 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18090 case IX86_BUILTIN_STOREUPD:
18091 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18092
18093 case IX86_BUILTIN_MFENCE:
18094 emit_insn (gen_sse2_mfence ());
18095 return 0;
18096 case IX86_BUILTIN_LFENCE:
18097 emit_insn (gen_sse2_lfence ());
18098 return 0;
18099
18100 case IX86_BUILTIN_CLFLUSH:
18101 arg0 = CALL_EXPR_ARG (exp, 0);
18102 op0 = expand_normal (arg0);
18103 icode = CODE_FOR_sse2_clflush;
18104 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18105 op0 = copy_to_mode_reg (Pmode, op0);
18106
18107 emit_insn (gen_sse2_clflush (op0));
18108 return 0;
18109
18110 case IX86_BUILTIN_MOVNTPD:
18111 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18112 case IX86_BUILTIN_MOVNTDQ:
18113 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18114 case IX86_BUILTIN_MOVNTI:
18115 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18116
18117 case IX86_BUILTIN_LOADDQU:
18118 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18119 case IX86_BUILTIN_STOREDQU:
18120 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18121
18122 case IX86_BUILTIN_MONITOR:
18123 arg0 = CALL_EXPR_ARG (exp, 0);
18124 arg1 = CALL_EXPR_ARG (exp, 1);
18125 arg2 = CALL_EXPR_ARG (exp, 2);
18126 op0 = expand_normal (arg0);
18127 op1 = expand_normal (arg1);
18128 op2 = expand_normal (arg2);
18129 if (!REG_P (op0))
18130 op0 = copy_to_mode_reg (Pmode, op0);
18131 if (!REG_P (op1))
18132 op1 = copy_to_mode_reg (SImode, op1);
18133 if (!REG_P (op2))
18134 op2 = copy_to_mode_reg (SImode, op2);
18135 if (!TARGET_64BIT)
18136 emit_insn (gen_sse3_monitor (op0, op1, op2));
18137 else
18138 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18139 return 0;
18140
18141 case IX86_BUILTIN_MWAIT:
18142 arg0 = CALL_EXPR_ARG (exp, 0);
18143 arg1 = CALL_EXPR_ARG (exp, 1);
18144 op0 = expand_normal (arg0);
18145 op1 = expand_normal (arg1);
18146 if (!REG_P (op0))
18147 op0 = copy_to_mode_reg (SImode, op0);
18148 if (!REG_P (op1))
18149 op1 = copy_to_mode_reg (SImode, op1);
18150 emit_insn (gen_sse3_mwait (op0, op1));
18151 return 0;
18152
18153 case IX86_BUILTIN_LDDQU:
18154 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18155 target, 1);
18156
18157 case IX86_BUILTIN_PALIGNR:
18158 case IX86_BUILTIN_PALIGNR128:
18159 if (fcode == IX86_BUILTIN_PALIGNR)
18160 {
18161 icode = CODE_FOR_ssse3_palignrdi;
18162 mode = DImode;
18163 }
18164 else
18165 {
18166 icode = CODE_FOR_ssse3_palignrti;
18167 mode = V2DImode;
18168 }
18169 arg0 = CALL_EXPR_ARG (exp, 0);
18170 arg1 = CALL_EXPR_ARG (exp, 1);
18171 arg2 = CALL_EXPR_ARG (exp, 2);
18172 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18173 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18174 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18175 tmode = insn_data[icode].operand[0].mode;
18176 mode1 = insn_data[icode].operand[1].mode;
18177 mode2 = insn_data[icode].operand[2].mode;
18178 mode3 = insn_data[icode].operand[3].mode;
18179
18180 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18181 {
18182 op0 = copy_to_reg (op0);
18183 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18184 }
18185 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18186 {
18187 op1 = copy_to_reg (op1);
18188 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18189 }
18190 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18191 {
18192 error ("shift must be an immediate");
18193 return const0_rtx;
18194 }
18195 target = gen_reg_rtx (mode);
18196 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18197 op0, op1, op2);
18198 if (! pat)
18199 return 0;
18200 emit_insn (pat);
18201 return target;
18202
18203 case IX86_BUILTIN_MOVNTSD:
18204 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18205
18206 case IX86_BUILTIN_MOVNTSS:
18207 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18208
18209 case IX86_BUILTIN_INSERTQ:
18210 case IX86_BUILTIN_EXTRQ:
18211 icode = (fcode == IX86_BUILTIN_EXTRQ
18212 ? CODE_FOR_sse4a_extrq
18213 : CODE_FOR_sse4a_insertq);
18214 arg0 = CALL_EXPR_ARG (exp, 0);
18215 arg1 = CALL_EXPR_ARG (exp, 1);
18216 op0 = expand_normal (arg0);
18217 op1 = expand_normal (arg1);
18218 tmode = insn_data[icode].operand[0].mode;
18219 mode1 = insn_data[icode].operand[1].mode;
18220 mode2 = insn_data[icode].operand[2].mode;
18221 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18222 op0 = copy_to_mode_reg (mode1, op0);
18223 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18224 op1 = copy_to_mode_reg (mode2, op1);
18225 if (optimize || target == 0
18226 || GET_MODE (target) != tmode
18227 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18228 target = gen_reg_rtx (tmode);
18229 pat = GEN_FCN (icode) (target, op0, op1);
18230 if (! pat)
18231 return NULL_RTX;
18232 emit_insn (pat);
18233 return target;
18234
18235 case IX86_BUILTIN_EXTRQI:
18236 icode = CODE_FOR_sse4a_extrqi;
18237 arg0 = CALL_EXPR_ARG (exp, 0);
18238 arg1 = CALL_EXPR_ARG (exp, 1);
18239 arg2 = CALL_EXPR_ARG (exp, 2);
18240 op0 = expand_normal (arg0);
18241 op1 = expand_normal (arg1);
18242 op2 = expand_normal (arg2);
18243 tmode = insn_data[icode].operand[0].mode;
18244 mode1 = insn_data[icode].operand[1].mode;
18245 mode2 = insn_data[icode].operand[2].mode;
18246 mode3 = insn_data[icode].operand[3].mode;
18247 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18248 op0 = copy_to_mode_reg (mode1, op0);
18249 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18250 {
18251 error ("index mask must be an immediate");
18252 return gen_reg_rtx (tmode);
18253 }
18254 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18255 {
18256 error ("length mask must be an immediate");
18257 return gen_reg_rtx (tmode);
18258 }
18259 if (optimize || target == 0
18260 || GET_MODE (target) != tmode
18261 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18262 target = gen_reg_rtx (tmode);
18263 pat = GEN_FCN (icode) (target, op0, op1, op2);
18264 if (! pat)
18265 return NULL_RTX;
18266 emit_insn (pat);
18267 return target;
18268
18269 case IX86_BUILTIN_INSERTQI:
18270 icode = CODE_FOR_sse4a_insertqi;
18271 arg0 = CALL_EXPR_ARG (exp, 0);
18272 arg1 = CALL_EXPR_ARG (exp, 1);
18273 arg2 = CALL_EXPR_ARG (exp, 2);
18274 arg3 = CALL_EXPR_ARG (exp, 3);
18275 op0 = expand_normal (arg0);
18276 op1 = expand_normal (arg1);
18277 op2 = expand_normal (arg2);
18278 op3 = expand_normal (arg3);
18279 tmode = insn_data[icode].operand[0].mode;
18280 mode1 = insn_data[icode].operand[1].mode;
18281 mode2 = insn_data[icode].operand[2].mode;
18282 mode3 = insn_data[icode].operand[3].mode;
18283 mode4 = insn_data[icode].operand[4].mode;
18284
18285 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18286 op0 = copy_to_mode_reg (mode1, op0);
18287
18288 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18289 op1 = copy_to_mode_reg (mode2, op1);
18290
18291 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18292 {
18293 error ("index mask must be an immediate");
18294 return gen_reg_rtx (tmode);
18295 }
18296 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18297 {
18298 error ("length mask must be an immediate");
18299 return gen_reg_rtx (tmode);
18300 }
18301 if (optimize || target == 0
18302 || GET_MODE (target) != tmode
18303 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18304 target = gen_reg_rtx (tmode);
18305 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18306 if (! pat)
18307 return NULL_RTX;
18308 emit_insn (pat);
18309 return target;
18310
18311 case IX86_BUILTIN_VEC_INIT_V2SI:
18312 case IX86_BUILTIN_VEC_INIT_V4HI:
18313 case IX86_BUILTIN_VEC_INIT_V8QI:
18314 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18315
18316 case IX86_BUILTIN_VEC_EXT_V2DF:
18317 case IX86_BUILTIN_VEC_EXT_V2DI:
18318 case IX86_BUILTIN_VEC_EXT_V4SF:
18319 case IX86_BUILTIN_VEC_EXT_V4SI:
18320 case IX86_BUILTIN_VEC_EXT_V8HI:
18321 case IX86_BUILTIN_VEC_EXT_V2SI:
18322 case IX86_BUILTIN_VEC_EXT_V4HI:
18323 return ix86_expand_vec_ext_builtin (exp, target);
18324
18325 case IX86_BUILTIN_VEC_SET_V8HI:
18326 case IX86_BUILTIN_VEC_SET_V4HI:
18327 return ix86_expand_vec_set_builtin (exp);
18328
18329 default:
18330 break;
18331 }
18332
18333 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18334 if (d->code == fcode)
18335 {
18336 /* Compares are treated specially. */
18337 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18338 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18339 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18340 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18341 return ix86_expand_sse_compare (d, exp, target);
18342
18343 return ix86_expand_binop_builtin (d->icode, exp, target);
18344 }
18345
18346 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18347 if (d->code == fcode)
18348 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18349
18350 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18351 if (d->code == fcode)
18352 return ix86_expand_sse_comi (d, exp, target);
18353
18354 gcc_unreachable ();
18355 }
18356
18357 /* Returns a function decl for a vectorized version of the builtin function
18358 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18359 if it is not available. */
18360
18361 static tree
18362 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18363 tree type_in)
18364 {
18365 enum machine_mode in_mode, out_mode;
18366 int in_n, out_n;
18367
18368 if (TREE_CODE (type_out) != VECTOR_TYPE
18369 || TREE_CODE (type_in) != VECTOR_TYPE)
18370 return NULL_TREE;
18371
18372 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18373 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18374 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18375 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18376
18377 switch (fn)
18378 {
18379 case BUILT_IN_SQRT:
18380 if (out_mode == DFmode && out_n == 2
18381 && in_mode == DFmode && in_n == 2)
18382 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18383 return NULL_TREE;
18384
18385 case BUILT_IN_SQRTF:
18386 if (out_mode == SFmode && out_n == 4
18387 && in_mode == SFmode && in_n == 4)
18388 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18389 return NULL_TREE;
18390
18391 case BUILT_IN_LRINTF:
18392 if (out_mode == SImode && out_n == 4
18393 && in_mode == SFmode && in_n == 4)
18394 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18395 return NULL_TREE;
18396
18397 default:
18398 ;
18399 }
18400
18401 return NULL_TREE;
18402 }
18403
18404 /* Returns a decl of a function that implements conversion of the
18405 input vector of type TYPE, or NULL_TREE if it is not available. */
18406
18407 static tree
18408 ix86_builtin_conversion (enum tree_code code, tree type)
18409 {
18410 if (TREE_CODE (type) != VECTOR_TYPE)
18411 return NULL_TREE;
18412
18413 switch (code)
18414 {
18415 case FLOAT_EXPR:
18416 switch (TYPE_MODE (type))
18417 {
18418 case V4SImode:
18419 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18420 default:
18421 return NULL_TREE;
18422 }
18423
18424 case FIX_TRUNC_EXPR:
18425 switch (TYPE_MODE (type))
18426 {
18427 case V4SFmode:
18428 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18429 default:
18430 return NULL_TREE;
18431 }
18432 default:
18433 return NULL_TREE;
18434
18435 }
18436 }
18437
18438 /* Store OPERAND to the memory after reload is completed. This means
18439 that we can't easily use assign_stack_local. */
18440 rtx
18441 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18442 {
18443 rtx result;
18444
18445 gcc_assert (reload_completed);
18446 if (TARGET_RED_ZONE)
18447 {
18448 result = gen_rtx_MEM (mode,
18449 gen_rtx_PLUS (Pmode,
18450 stack_pointer_rtx,
18451 GEN_INT (-RED_ZONE_SIZE)));
18452 emit_move_insn (result, operand);
18453 }
18454 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18455 {
18456 switch (mode)
18457 {
18458 case HImode:
18459 case SImode:
18460 operand = gen_lowpart (DImode, operand);
18461 /* FALLTHRU */
18462 case DImode:
18463 emit_insn (
18464 gen_rtx_SET (VOIDmode,
18465 gen_rtx_MEM (DImode,
18466 gen_rtx_PRE_DEC (DImode,
18467 stack_pointer_rtx)),
18468 operand));
18469 break;
18470 default:
18471 gcc_unreachable ();
18472 }
18473 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18474 }
18475 else
18476 {
18477 switch (mode)
18478 {
18479 case DImode:
18480 {
18481 rtx operands[2];
18482 split_di (&operand, 1, operands, operands + 1);
18483 emit_insn (
18484 gen_rtx_SET (VOIDmode,
18485 gen_rtx_MEM (SImode,
18486 gen_rtx_PRE_DEC (Pmode,
18487 stack_pointer_rtx)),
18488 operands[1]));
18489 emit_insn (
18490 gen_rtx_SET (VOIDmode,
18491 gen_rtx_MEM (SImode,
18492 gen_rtx_PRE_DEC (Pmode,
18493 stack_pointer_rtx)),
18494 operands[0]));
18495 }
18496 break;
18497 case HImode:
18498 /* Store HImodes as SImodes. */
18499 operand = gen_lowpart (SImode, operand);
18500 /* FALLTHRU */
18501 case SImode:
18502 emit_insn (
18503 gen_rtx_SET (VOIDmode,
18504 gen_rtx_MEM (GET_MODE (operand),
18505 gen_rtx_PRE_DEC (SImode,
18506 stack_pointer_rtx)),
18507 operand));
18508 break;
18509 default:
18510 gcc_unreachable ();
18511 }
18512 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18513 }
18514 return result;
18515 }
18516
18517 /* Free operand from the memory. */
18518 void
18519 ix86_free_from_memory (enum machine_mode mode)
18520 {
18521 if (!TARGET_RED_ZONE)
18522 {
18523 int size;
18524
18525 if (mode == DImode || TARGET_64BIT)
18526 size = 8;
18527 else
18528 size = 4;
18529 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18530 to pop or add instruction if registers are available. */
18531 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18532 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18533 GEN_INT (size))));
18534 }
18535 }
18536
18537 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18538 QImode must go into class Q_REGS.
18539 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18540 movdf to do mem-to-mem moves through integer regs. */
18541 enum reg_class
18542 ix86_preferred_reload_class (rtx x, enum reg_class class)
18543 {
18544 enum machine_mode mode = GET_MODE (x);
18545
18546 /* We're only allowed to return a subclass of CLASS. Many of the
18547 following checks fail for NO_REGS, so eliminate that early. */
18548 if (class == NO_REGS)
18549 return NO_REGS;
18550
18551 /* All classes can load zeros. */
18552 if (x == CONST0_RTX (mode))
18553 return class;
18554
18555 /* Force constants into memory if we are loading a (nonzero) constant into
18556 an MMX or SSE register. This is because there are no MMX/SSE instructions
18557 to load from a constant. */
18558 if (CONSTANT_P (x)
18559 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18560 return NO_REGS;
18561
18562 /* Prefer SSE regs only, if we can use them for math. */
18563 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18564 return SSE_CLASS_P (class) ? class : NO_REGS;
18565
18566 /* Floating-point constants need more complex checks. */
18567 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18568 {
18569 /* General regs can load everything. */
18570 if (reg_class_subset_p (class, GENERAL_REGS))
18571 return class;
18572
18573 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18574 zero above. We only want to wind up preferring 80387 registers if
18575 we plan on doing computation with them. */
18576 if (TARGET_80387
18577 && standard_80387_constant_p (x))
18578 {
18579 /* Limit class to non-sse. */
18580 if (class == FLOAT_SSE_REGS)
18581 return FLOAT_REGS;
18582 if (class == FP_TOP_SSE_REGS)
18583 return FP_TOP_REG;
18584 if (class == FP_SECOND_SSE_REGS)
18585 return FP_SECOND_REG;
18586 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18587 return class;
18588 }
18589
18590 return NO_REGS;
18591 }
18592
18593 /* Generally when we see PLUS here, it's the function invariant
18594 (plus soft-fp const_int). Which can only be computed into general
18595 regs. */
18596 if (GET_CODE (x) == PLUS)
18597 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18598
18599 /* QImode constants are easy to load, but non-constant QImode data
18600 must go into Q_REGS. */
18601 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18602 {
18603 if (reg_class_subset_p (class, Q_REGS))
18604 return class;
18605 if (reg_class_subset_p (Q_REGS, class))
18606 return Q_REGS;
18607 return NO_REGS;
18608 }
18609
18610 return class;
18611 }
18612
18613 /* Discourage putting floating-point values in SSE registers unless
18614 SSE math is being used, and likewise for the 387 registers. */
18615 enum reg_class
18616 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18617 {
18618 enum machine_mode mode = GET_MODE (x);
18619
18620 /* Restrict the output reload class to the register bank that we are doing
18621 math on. If we would like not to return a subset of CLASS, reject this
18622 alternative: if reload cannot do this, it will still use its choice. */
18623 mode = GET_MODE (x);
18624 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18625 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18626
18627 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18628 {
18629 if (class == FP_TOP_SSE_REGS)
18630 return FP_TOP_REG;
18631 else if (class == FP_SECOND_SSE_REGS)
18632 return FP_SECOND_REG;
18633 else
18634 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18635 }
18636
18637 return class;
18638 }
18639
18640 /* If we are copying between general and FP registers, we need a memory
18641 location. The same is true for SSE and MMX registers.
18642
18643 The macro can't work reliably when one of the CLASSES is class containing
18644 registers from multiple units (SSE, MMX, integer). We avoid this by never
18645 combining those units in single alternative in the machine description.
18646 Ensure that this constraint holds to avoid unexpected surprises.
18647
18648 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18649 enforce these sanity checks. */
18650
18651 int
18652 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18653 enum machine_mode mode, int strict)
18654 {
18655 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18656 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18657 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18658 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18659 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18660 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18661 {
18662 gcc_assert (!strict);
18663 return true;
18664 }
18665
18666 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18667 return true;
18668
18669 /* ??? This is a lie. We do have moves between mmx/general, and for
18670 mmx/sse2. But by saying we need secondary memory we discourage the
18671 register allocator from using the mmx registers unless needed. */
18672 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18673 return true;
18674
18675 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18676 {
18677 /* SSE1 doesn't have any direct moves from other classes. */
18678 if (!TARGET_SSE2)
18679 return true;
18680
18681 /* If the target says that inter-unit moves are more expensive
18682 than moving through memory, then don't generate them. */
18683 if (!TARGET_INTER_UNIT_MOVES)
18684 return true;
18685
18686 /* Between SSE and general, we have moves no larger than word size. */
18687 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18688 return true;
18689 }
18690
18691 return false;
18692 }
18693
18694 /* Return true if the registers in CLASS cannot represent the change from
18695 modes FROM to TO. */
18696
18697 bool
18698 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18699 enum reg_class class)
18700 {
18701 if (from == to)
18702 return false;
18703
18704 /* x87 registers can't do subreg at all, as all values are reformatted
18705 to extended precision. */
18706 if (MAYBE_FLOAT_CLASS_P (class))
18707 return true;
18708
18709 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18710 {
18711 /* Vector registers do not support QI or HImode loads. If we don't
18712 disallow a change to these modes, reload will assume it's ok to
18713 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18714 the vec_dupv4hi pattern. */
18715 if (GET_MODE_SIZE (from) < 4)
18716 return true;
18717
18718 /* Vector registers do not support subreg with nonzero offsets, which
18719 are otherwise valid for integer registers. Since we can't see
18720 whether we have a nonzero offset from here, prohibit all
18721 nonparadoxical subregs changing size. */
18722 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18723 return true;
18724 }
18725
18726 return false;
18727 }
18728
18729 /* Return the cost of moving data from a register in class CLASS1 to
18730 one in class CLASS2.
18731
18732 It is not required that the cost always equal 2 when FROM is the same as TO;
18733 on some machines it is expensive to move between registers if they are not
18734 general registers. */
18735
18736 int
18737 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18738 enum reg_class class2)
18739 {
18740 /* In case we require secondary memory, compute cost of the store followed
18741 by load. In order to avoid bad register allocation choices, we need
18742 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18743
18744 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18745 {
18746 int cost = 1;
18747
18748 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18749 MEMORY_MOVE_COST (mode, class1, 1));
18750 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18751 MEMORY_MOVE_COST (mode, class2, 1));
18752
18753 /* In case of copying from general_purpose_register we may emit multiple
18754 stores followed by single load causing memory size mismatch stall.
18755 Count this as arbitrarily high cost of 20. */
18756 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18757 cost += 20;
18758
18759 /* In the case of FP/MMX moves, the registers actually overlap, and we
18760 have to switch modes in order to treat them differently. */
18761 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18762 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18763 cost += 20;
18764
18765 return cost;
18766 }
18767
18768 /* Moves between SSE/MMX and integer unit are expensive. */
18769 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18770 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18771 return ix86_cost->mmxsse_to_integer;
18772 if (MAYBE_FLOAT_CLASS_P (class1))
18773 return ix86_cost->fp_move;
18774 if (MAYBE_SSE_CLASS_P (class1))
18775 return ix86_cost->sse_move;
18776 if (MAYBE_MMX_CLASS_P (class1))
18777 return ix86_cost->mmx_move;
18778 return 2;
18779 }
18780
18781 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18782
18783 bool
18784 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18785 {
18786 /* Flags and only flags can only hold CCmode values. */
18787 if (CC_REGNO_P (regno))
18788 return GET_MODE_CLASS (mode) == MODE_CC;
18789 if (GET_MODE_CLASS (mode) == MODE_CC
18790 || GET_MODE_CLASS (mode) == MODE_RANDOM
18791 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18792 return 0;
18793 if (FP_REGNO_P (regno))
18794 return VALID_FP_MODE_P (mode);
18795 if (SSE_REGNO_P (regno))
18796 {
18797 /* We implement the move patterns for all vector modes into and
18798 out of SSE registers, even when no operation instructions
18799 are available. */
18800 return (VALID_SSE_REG_MODE (mode)
18801 || VALID_SSE2_REG_MODE (mode)
18802 || VALID_MMX_REG_MODE (mode)
18803 || VALID_MMX_REG_MODE_3DNOW (mode));
18804 }
18805 if (MMX_REGNO_P (regno))
18806 {
18807 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18808 so if the register is available at all, then we can move data of
18809 the given mode into or out of it. */
18810 return (VALID_MMX_REG_MODE (mode)
18811 || VALID_MMX_REG_MODE_3DNOW (mode));
18812 }
18813
18814 if (mode == QImode)
18815 {
18816 /* Take care for QImode values - they can be in non-QI regs,
18817 but then they do cause partial register stalls. */
18818 if (regno < 4 || TARGET_64BIT)
18819 return 1;
18820 if (!TARGET_PARTIAL_REG_STALL)
18821 return 1;
18822 return reload_in_progress || reload_completed;
18823 }
18824 /* We handle both integer and floats in the general purpose registers. */
18825 else if (VALID_INT_MODE_P (mode))
18826 return 1;
18827 else if (VALID_FP_MODE_P (mode))
18828 return 1;
18829 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18830 on to use that value in smaller contexts, this can easily force a
18831 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18832 supporting DImode, allow it. */
18833 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18834 return 1;
18835
18836 return 0;
18837 }
18838
18839 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18840 tieable integer mode. */
18841
18842 static bool
18843 ix86_tieable_integer_mode_p (enum machine_mode mode)
18844 {
18845 switch (mode)
18846 {
18847 case HImode:
18848 case SImode:
18849 return true;
18850
18851 case QImode:
18852 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18853
18854 case DImode:
18855 return TARGET_64BIT;
18856
18857 default:
18858 return false;
18859 }
18860 }
18861
18862 /* Return true if MODE1 is accessible in a register that can hold MODE2
18863 without copying. That is, all register classes that can hold MODE2
18864 can also hold MODE1. */
18865
18866 bool
18867 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18868 {
18869 if (mode1 == mode2)
18870 return true;
18871
18872 if (ix86_tieable_integer_mode_p (mode1)
18873 && ix86_tieable_integer_mode_p (mode2))
18874 return true;
18875
18876 /* MODE2 being XFmode implies fp stack or general regs, which means we
18877 can tie any smaller floating point modes to it. Note that we do not
18878 tie this with TFmode. */
18879 if (mode2 == XFmode)
18880 return mode1 == SFmode || mode1 == DFmode;
18881
18882 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18883 that we can tie it with SFmode. */
18884 if (mode2 == DFmode)
18885 return mode1 == SFmode;
18886
18887 /* If MODE2 is only appropriate for an SSE register, then tie with
18888 any other mode acceptable to SSE registers. */
18889 if (GET_MODE_SIZE (mode2) == 16
18890 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18891 return (GET_MODE_SIZE (mode1) == 16
18892 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18893
18894 /* If MODE2 is appropriate for an MMX register, then tie
18895 with any other mode acceptable to MMX registers. */
18896 if (GET_MODE_SIZE (mode2) == 8
18897 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18898 return (GET_MODE_SIZE (mode1) == 8
18899 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18900
18901 return false;
18902 }
18903
18904 /* Return the cost of moving data of mode M between a
18905 register and memory. A value of 2 is the default; this cost is
18906 relative to those in `REGISTER_MOVE_COST'.
18907
18908 If moving between registers and memory is more expensive than
18909 between two registers, you should define this macro to express the
18910 relative cost.
18911
18912 Model also increased moving costs of QImode registers in non
18913 Q_REGS classes.
18914 */
18915 int
18916 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18917 {
18918 if (FLOAT_CLASS_P (class))
18919 {
18920 int index;
18921 switch (mode)
18922 {
18923 case SFmode:
18924 index = 0;
18925 break;
18926 case DFmode:
18927 index = 1;
18928 break;
18929 case XFmode:
18930 index = 2;
18931 break;
18932 default:
18933 return 100;
18934 }
18935 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18936 }
18937 if (SSE_CLASS_P (class))
18938 {
18939 int index;
18940 switch (GET_MODE_SIZE (mode))
18941 {
18942 case 4:
18943 index = 0;
18944 break;
18945 case 8:
18946 index = 1;
18947 break;
18948 case 16:
18949 index = 2;
18950 break;
18951 default:
18952 return 100;
18953 }
18954 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18955 }
18956 if (MMX_CLASS_P (class))
18957 {
18958 int index;
18959 switch (GET_MODE_SIZE (mode))
18960 {
18961 case 4:
18962 index = 0;
18963 break;
18964 case 8:
18965 index = 1;
18966 break;
18967 default:
18968 return 100;
18969 }
18970 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18971 }
18972 switch (GET_MODE_SIZE (mode))
18973 {
18974 case 1:
18975 if (in)
18976 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18977 : ix86_cost->movzbl_load);
18978 else
18979 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18980 : ix86_cost->int_store[0] + 4);
18981 break;
18982 case 2:
18983 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18984 default:
18985 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18986 if (mode == TFmode)
18987 mode = XFmode;
18988 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18989 * (((int) GET_MODE_SIZE (mode)
18990 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18991 }
18992 }
18993
18994 /* Compute a (partial) cost for rtx X. Return true if the complete
18995 cost has been computed, and false if subexpressions should be
18996 scanned. In either case, *TOTAL contains the cost result. */
18997
18998 static bool
18999 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19000 {
19001 enum machine_mode mode = GET_MODE (x);
19002
19003 switch (code)
19004 {
19005 case CONST_INT:
19006 case CONST:
19007 case LABEL_REF:
19008 case SYMBOL_REF:
19009 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19010 *total = 3;
19011 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19012 *total = 2;
19013 else if (flag_pic && SYMBOLIC_CONST (x)
19014 && (!TARGET_64BIT
19015 || (!GET_CODE (x) != LABEL_REF
19016 && (GET_CODE (x) != SYMBOL_REF
19017 || !SYMBOL_REF_LOCAL_P (x)))))
19018 *total = 1;
19019 else
19020 *total = 0;
19021 return true;
19022
19023 case CONST_DOUBLE:
19024 if (mode == VOIDmode)
19025 *total = 0;
19026 else
19027 switch (standard_80387_constant_p (x))
19028 {
19029 case 1: /* 0.0 */
19030 *total = 1;
19031 break;
19032 default: /* Other constants */
19033 *total = 2;
19034 break;
19035 case 0:
19036 case -1:
19037 /* Start with (MEM (SYMBOL_REF)), since that's where
19038 it'll probably end up. Add a penalty for size. */
19039 *total = (COSTS_N_INSNS (1)
19040 + (flag_pic != 0 && !TARGET_64BIT)
19041 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19042 break;
19043 }
19044 return true;
19045
19046 case ZERO_EXTEND:
19047 /* The zero extensions is often completely free on x86_64, so make
19048 it as cheap as possible. */
19049 if (TARGET_64BIT && mode == DImode
19050 && GET_MODE (XEXP (x, 0)) == SImode)
19051 *total = 1;
19052 else if (TARGET_ZERO_EXTEND_WITH_AND)
19053 *total = ix86_cost->add;
19054 else
19055 *total = ix86_cost->movzx;
19056 return false;
19057
19058 case SIGN_EXTEND:
19059 *total = ix86_cost->movsx;
19060 return false;
19061
19062 case ASHIFT:
19063 if (CONST_INT_P (XEXP (x, 1))
19064 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19065 {
19066 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19067 if (value == 1)
19068 {
19069 *total = ix86_cost->add;
19070 return false;
19071 }
19072 if ((value == 2 || value == 3)
19073 && ix86_cost->lea <= ix86_cost->shift_const)
19074 {
19075 *total = ix86_cost->lea;
19076 return false;
19077 }
19078 }
19079 /* FALLTHRU */
19080
19081 case ROTATE:
19082 case ASHIFTRT:
19083 case LSHIFTRT:
19084 case ROTATERT:
19085 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19086 {
19087 if (CONST_INT_P (XEXP (x, 1)))
19088 {
19089 if (INTVAL (XEXP (x, 1)) > 32)
19090 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19091 else
19092 *total = ix86_cost->shift_const * 2;
19093 }
19094 else
19095 {
19096 if (GET_CODE (XEXP (x, 1)) == AND)
19097 *total = ix86_cost->shift_var * 2;
19098 else
19099 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19100 }
19101 }
19102 else
19103 {
19104 if (CONST_INT_P (XEXP (x, 1)))
19105 *total = ix86_cost->shift_const;
19106 else
19107 *total = ix86_cost->shift_var;
19108 }
19109 return false;
19110
19111 case MULT:
19112 if (FLOAT_MODE_P (mode))
19113 {
19114 *total = ix86_cost->fmul;
19115 return false;
19116 }
19117 else
19118 {
19119 rtx op0 = XEXP (x, 0);
19120 rtx op1 = XEXP (x, 1);
19121 int nbits;
19122 if (CONST_INT_P (XEXP (x, 1)))
19123 {
19124 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19125 for (nbits = 0; value != 0; value &= value - 1)
19126 nbits++;
19127 }
19128 else
19129 /* This is arbitrary. */
19130 nbits = 7;
19131
19132 /* Compute costs correctly for widening multiplication. */
19133 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19134 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19135 == GET_MODE_SIZE (mode))
19136 {
19137 int is_mulwiden = 0;
19138 enum machine_mode inner_mode = GET_MODE (op0);
19139
19140 if (GET_CODE (op0) == GET_CODE (op1))
19141 is_mulwiden = 1, op1 = XEXP (op1, 0);
19142 else if (CONST_INT_P (op1))
19143 {
19144 if (GET_CODE (op0) == SIGN_EXTEND)
19145 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19146 == INTVAL (op1);
19147 else
19148 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19149 }
19150
19151 if (is_mulwiden)
19152 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19153 }
19154
19155 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19156 + nbits * ix86_cost->mult_bit
19157 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19158
19159 return true;
19160 }
19161
19162 case DIV:
19163 case UDIV:
19164 case MOD:
19165 case UMOD:
19166 if (FLOAT_MODE_P (mode))
19167 *total = ix86_cost->fdiv;
19168 else
19169 *total = ix86_cost->divide[MODE_INDEX (mode)];
19170 return false;
19171
19172 case PLUS:
19173 if (FLOAT_MODE_P (mode))
19174 *total = ix86_cost->fadd;
19175 else if (GET_MODE_CLASS (mode) == MODE_INT
19176 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19177 {
19178 if (GET_CODE (XEXP (x, 0)) == PLUS
19179 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19180 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19181 && CONSTANT_P (XEXP (x, 1)))
19182 {
19183 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19184 if (val == 2 || val == 4 || val == 8)
19185 {
19186 *total = ix86_cost->lea;
19187 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19188 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19189 outer_code);
19190 *total += rtx_cost (XEXP (x, 1), outer_code);
19191 return true;
19192 }
19193 }
19194 else if (GET_CODE (XEXP (x, 0)) == MULT
19195 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19196 {
19197 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19198 if (val == 2 || val == 4 || val == 8)
19199 {
19200 *total = ix86_cost->lea;
19201 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19202 *total += rtx_cost (XEXP (x, 1), outer_code);
19203 return true;
19204 }
19205 }
19206 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19207 {
19208 *total = ix86_cost->lea;
19209 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19210 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19211 *total += rtx_cost (XEXP (x, 1), outer_code);
19212 return true;
19213 }
19214 }
19215 /* FALLTHRU */
19216
19217 case MINUS:
19218 if (FLOAT_MODE_P (mode))
19219 {
19220 *total = ix86_cost->fadd;
19221 return false;
19222 }
19223 /* FALLTHRU */
19224
19225 case AND:
19226 case IOR:
19227 case XOR:
19228 if (!TARGET_64BIT && mode == DImode)
19229 {
19230 *total = (ix86_cost->add * 2
19231 + (rtx_cost (XEXP (x, 0), outer_code)
19232 << (GET_MODE (XEXP (x, 0)) != DImode))
19233 + (rtx_cost (XEXP (x, 1), outer_code)
19234 << (GET_MODE (XEXP (x, 1)) != DImode)));
19235 return true;
19236 }
19237 /* FALLTHRU */
19238
19239 case NEG:
19240 if (FLOAT_MODE_P (mode))
19241 {
19242 *total = ix86_cost->fchs;
19243 return false;
19244 }
19245 /* FALLTHRU */
19246
19247 case NOT:
19248 if (!TARGET_64BIT && mode == DImode)
19249 *total = ix86_cost->add * 2;
19250 else
19251 *total = ix86_cost->add;
19252 return false;
19253
19254 case COMPARE:
19255 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19256 && XEXP (XEXP (x, 0), 1) == const1_rtx
19257 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19258 && XEXP (x, 1) == const0_rtx)
19259 {
19260 /* This kind of construct is implemented using test[bwl].
19261 Treat it as if we had an AND. */
19262 *total = (ix86_cost->add
19263 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19264 + rtx_cost (const1_rtx, outer_code));
19265 return true;
19266 }
19267 return false;
19268
19269 case FLOAT_EXTEND:
19270 if (!TARGET_SSE_MATH
19271 || mode == XFmode
19272 || (mode == DFmode && !TARGET_SSE2))
19273 *total = 0;
19274 return false;
19275
19276 case ABS:
19277 if (FLOAT_MODE_P (mode))
19278 *total = ix86_cost->fabs;
19279 return false;
19280
19281 case SQRT:
19282 if (FLOAT_MODE_P (mode))
19283 *total = ix86_cost->fsqrt;
19284 return false;
19285
19286 case UNSPEC:
19287 if (XINT (x, 1) == UNSPEC_TP)
19288 *total = 0;
19289 return false;
19290
19291 default:
19292 return false;
19293 }
19294 }
19295
19296 #if TARGET_MACHO
19297
19298 static int current_machopic_label_num;
19299
19300 /* Given a symbol name and its associated stub, write out the
19301 definition of the stub. */
19302
19303 void
19304 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19305 {
19306 unsigned int length;
19307 char *binder_name, *symbol_name, lazy_ptr_name[32];
19308 int label = ++current_machopic_label_num;
19309
19310 /* For 64-bit we shouldn't get here. */
19311 gcc_assert (!TARGET_64BIT);
19312
19313 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19314 symb = (*targetm.strip_name_encoding) (symb);
19315
19316 length = strlen (stub);
19317 binder_name = alloca (length + 32);
19318 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19319
19320 length = strlen (symb);
19321 symbol_name = alloca (length + 32);
19322 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19323
19324 sprintf (lazy_ptr_name, "L%d$lz", label);
19325
19326 if (MACHOPIC_PURE)
19327 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19328 else
19329 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19330
19331 fprintf (file, "%s:\n", stub);
19332 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19333
19334 if (MACHOPIC_PURE)
19335 {
19336 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19337 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19338 fprintf (file, "\tjmp\t*%%edx\n");
19339 }
19340 else
19341 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19342
19343 fprintf (file, "%s:\n", binder_name);
19344
19345 if (MACHOPIC_PURE)
19346 {
19347 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19348 fprintf (file, "\tpushl\t%%eax\n");
19349 }
19350 else
19351 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19352
19353 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19354
19355 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19356 fprintf (file, "%s:\n", lazy_ptr_name);
19357 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19358 fprintf (file, "\t.long %s\n", binder_name);
19359 }
19360
19361 void
19362 darwin_x86_file_end (void)
19363 {
19364 darwin_file_end ();
19365 ix86_file_end ();
19366 }
19367 #endif /* TARGET_MACHO */
19368
19369 /* Order the registers for register allocator. */
19370
19371 void
19372 x86_order_regs_for_local_alloc (void)
19373 {
19374 int pos = 0;
19375 int i;
19376
19377 /* First allocate the local general purpose registers. */
19378 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19379 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19380 reg_alloc_order [pos++] = i;
19381
19382 /* Global general purpose registers. */
19383 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19384 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19385 reg_alloc_order [pos++] = i;
19386
19387 /* x87 registers come first in case we are doing FP math
19388 using them. */
19389 if (!TARGET_SSE_MATH)
19390 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19391 reg_alloc_order [pos++] = i;
19392
19393 /* SSE registers. */
19394 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19395 reg_alloc_order [pos++] = i;
19396 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19397 reg_alloc_order [pos++] = i;
19398
19399 /* x87 registers. */
19400 if (TARGET_SSE_MATH)
19401 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19402 reg_alloc_order [pos++] = i;
19403
19404 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19405 reg_alloc_order [pos++] = i;
19406
19407 /* Initialize the rest of array as we do not allocate some registers
19408 at all. */
19409 while (pos < FIRST_PSEUDO_REGISTER)
19410 reg_alloc_order [pos++] = 0;
19411 }
19412
19413 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19414 struct attribute_spec.handler. */
19415 static tree
19416 ix86_handle_struct_attribute (tree *node, tree name,
19417 tree args ATTRIBUTE_UNUSED,
19418 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19419 {
19420 tree *type = NULL;
19421 if (DECL_P (*node))
19422 {
19423 if (TREE_CODE (*node) == TYPE_DECL)
19424 type = &TREE_TYPE (*node);
19425 }
19426 else
19427 type = node;
19428
19429 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19430 || TREE_CODE (*type) == UNION_TYPE)))
19431 {
19432 warning (OPT_Wattributes, "%qs attribute ignored",
19433 IDENTIFIER_POINTER (name));
19434 *no_add_attrs = true;
19435 }
19436
19437 else if ((is_attribute_p ("ms_struct", name)
19438 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19439 || ((is_attribute_p ("gcc_struct", name)
19440 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19441 {
19442 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19443 IDENTIFIER_POINTER (name));
19444 *no_add_attrs = true;
19445 }
19446
19447 return NULL_TREE;
19448 }
19449
19450 static bool
19451 ix86_ms_bitfield_layout_p (tree record_type)
19452 {
19453 return (TARGET_MS_BITFIELD_LAYOUT &&
19454 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19455 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19456 }
19457
19458 /* Returns an expression indicating where the this parameter is
19459 located on entry to the FUNCTION. */
19460
19461 static rtx
19462 x86_this_parameter (tree function)
19463 {
19464 tree type = TREE_TYPE (function);
19465
19466 if (TARGET_64BIT)
19467 {
19468 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19469 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19470 }
19471
19472 if (ix86_function_regparm (type, function) > 0)
19473 {
19474 tree parm;
19475
19476 parm = TYPE_ARG_TYPES (type);
19477 /* Figure out whether or not the function has a variable number of
19478 arguments. */
19479 for (; parm; parm = TREE_CHAIN (parm))
19480 if (TREE_VALUE (parm) == void_type_node)
19481 break;
19482 /* If not, the this parameter is in the first argument. */
19483 if (parm)
19484 {
19485 int regno = 0;
19486 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19487 regno = 2;
19488 return gen_rtx_REG (SImode, regno);
19489 }
19490 }
19491
19492 if (aggregate_value_p (TREE_TYPE (type), type))
19493 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19494 else
19495 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19496 }
19497
19498 /* Determine whether x86_output_mi_thunk can succeed. */
19499
19500 static bool
19501 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19502 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19503 HOST_WIDE_INT vcall_offset, tree function)
19504 {
19505 /* 64-bit can handle anything. */
19506 if (TARGET_64BIT)
19507 return true;
19508
19509 /* For 32-bit, everything's fine if we have one free register. */
19510 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19511 return true;
19512
19513 /* Need a free register for vcall_offset. */
19514 if (vcall_offset)
19515 return false;
19516
19517 /* Need a free register for GOT references. */
19518 if (flag_pic && !(*targetm.binds_local_p) (function))
19519 return false;
19520
19521 /* Otherwise ok. */
19522 return true;
19523 }
19524
19525 /* Output the assembler code for a thunk function. THUNK_DECL is the
19526 declaration for the thunk function itself, FUNCTION is the decl for
19527 the target function. DELTA is an immediate constant offset to be
19528 added to THIS. If VCALL_OFFSET is nonzero, the word at
19529 *(*this + vcall_offset) should be added to THIS. */
19530
19531 static void
19532 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19533 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19534 HOST_WIDE_INT vcall_offset, tree function)
19535 {
19536 rtx xops[3];
19537 rtx this = x86_this_parameter (function);
19538 rtx this_reg, tmp;
19539
19540 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19541 pull it in now and let DELTA benefit. */
19542 if (REG_P (this))
19543 this_reg = this;
19544 else if (vcall_offset)
19545 {
19546 /* Put the this parameter into %eax. */
19547 xops[0] = this;
19548 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19549 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19550 }
19551 else
19552 this_reg = NULL_RTX;
19553
19554 /* Adjust the this parameter by a fixed constant. */
19555 if (delta)
19556 {
19557 xops[0] = GEN_INT (delta);
19558 xops[1] = this_reg ? this_reg : this;
19559 if (TARGET_64BIT)
19560 {
19561 if (!x86_64_general_operand (xops[0], DImode))
19562 {
19563 tmp = gen_rtx_REG (DImode, R10_REG);
19564 xops[1] = tmp;
19565 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19566 xops[0] = tmp;
19567 xops[1] = this;
19568 }
19569 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19570 }
19571 else
19572 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19573 }
19574
19575 /* Adjust the this parameter by a value stored in the vtable. */
19576 if (vcall_offset)
19577 {
19578 if (TARGET_64BIT)
19579 tmp = gen_rtx_REG (DImode, R10_REG);
19580 else
19581 {
19582 int tmp_regno = 2 /* ECX */;
19583 if (lookup_attribute ("fastcall",
19584 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19585 tmp_regno = 0 /* EAX */;
19586 tmp = gen_rtx_REG (SImode, tmp_regno);
19587 }
19588
19589 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19590 xops[1] = tmp;
19591 if (TARGET_64BIT)
19592 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19593 else
19594 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19595
19596 /* Adjust the this parameter. */
19597 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19598 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19599 {
19600 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19601 xops[0] = GEN_INT (vcall_offset);
19602 xops[1] = tmp2;
19603 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19604 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19605 }
19606 xops[1] = this_reg;
19607 if (TARGET_64BIT)
19608 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19609 else
19610 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19611 }
19612
19613 /* If necessary, drop THIS back to its stack slot. */
19614 if (this_reg && this_reg != this)
19615 {
19616 xops[0] = this_reg;
19617 xops[1] = this;
19618 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19619 }
19620
19621 xops[0] = XEXP (DECL_RTL (function), 0);
19622 if (TARGET_64BIT)
19623 {
19624 if (!flag_pic || (*targetm.binds_local_p) (function))
19625 output_asm_insn ("jmp\t%P0", xops);
19626 else
19627 {
19628 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19629 tmp = gen_rtx_CONST (Pmode, tmp);
19630 tmp = gen_rtx_MEM (QImode, tmp);
19631 xops[0] = tmp;
19632 output_asm_insn ("jmp\t%A0", xops);
19633 }
19634 }
19635 else
19636 {
19637 if (!flag_pic || (*targetm.binds_local_p) (function))
19638 output_asm_insn ("jmp\t%P0", xops);
19639 else
19640 #if TARGET_MACHO
19641 if (TARGET_MACHO)
19642 {
19643 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19644 tmp = (gen_rtx_SYMBOL_REF
19645 (Pmode,
19646 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19647 tmp = gen_rtx_MEM (QImode, tmp);
19648 xops[0] = tmp;
19649 output_asm_insn ("jmp\t%0", xops);
19650 }
19651 else
19652 #endif /* TARGET_MACHO */
19653 {
19654 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19655 output_set_got (tmp, NULL_RTX);
19656
19657 xops[1] = tmp;
19658 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19659 output_asm_insn ("jmp\t{*}%1", xops);
19660 }
19661 }
19662 }
19663
19664 static void
19665 x86_file_start (void)
19666 {
19667 default_file_start ();
19668 #if TARGET_MACHO
19669 darwin_file_start ();
19670 #endif
19671 if (X86_FILE_START_VERSION_DIRECTIVE)
19672 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19673 if (X86_FILE_START_FLTUSED)
19674 fputs ("\t.global\t__fltused\n", asm_out_file);
19675 if (ix86_asm_dialect == ASM_INTEL)
19676 fputs ("\t.intel_syntax\n", asm_out_file);
19677 }
19678
19679 int
19680 x86_field_alignment (tree field, int computed)
19681 {
19682 enum machine_mode mode;
19683 tree type = TREE_TYPE (field);
19684
19685 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19686 return computed;
19687 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19688 ? get_inner_array_type (type) : type);
19689 if (mode == DFmode || mode == DCmode
19690 || GET_MODE_CLASS (mode) == MODE_INT
19691 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19692 return MIN (32, computed);
19693 return computed;
19694 }
19695
19696 /* Output assembler code to FILE to increment profiler label # LABELNO
19697 for profiling a function entry. */
19698 void
19699 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19700 {
19701 if (TARGET_64BIT)
19702 if (flag_pic)
19703 {
19704 #ifndef NO_PROFILE_COUNTERS
19705 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19706 #endif
19707 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19708 }
19709 else
19710 {
19711 #ifndef NO_PROFILE_COUNTERS
19712 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19713 #endif
19714 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19715 }
19716 else if (flag_pic)
19717 {
19718 #ifndef NO_PROFILE_COUNTERS
19719 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19720 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19721 #endif
19722 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19723 }
19724 else
19725 {
19726 #ifndef NO_PROFILE_COUNTERS
19727 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19728 PROFILE_COUNT_REGISTER);
19729 #endif
19730 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19731 }
19732 }
19733
19734 /* We don't have exact information about the insn sizes, but we may assume
19735 quite safely that we are informed about all 1 byte insns and memory
19736 address sizes. This is enough to eliminate unnecessary padding in
19737 99% of cases. */
19738
19739 static int
19740 min_insn_size (rtx insn)
19741 {
19742 int l = 0;
19743
19744 if (!INSN_P (insn) || !active_insn_p (insn))
19745 return 0;
19746
19747 /* Discard alignments we've emit and jump instructions. */
19748 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19749 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19750 return 0;
19751 if (JUMP_P (insn)
19752 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19753 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19754 return 0;
19755
19756 /* Important case - calls are always 5 bytes.
19757 It is common to have many calls in the row. */
19758 if (CALL_P (insn)
19759 && symbolic_reference_mentioned_p (PATTERN (insn))
19760 && !SIBLING_CALL_P (insn))
19761 return 5;
19762 if (get_attr_length (insn) <= 1)
19763 return 1;
19764
19765 /* For normal instructions we may rely on the sizes of addresses
19766 and the presence of symbol to require 4 bytes of encoding.
19767 This is not the case for jumps where references are PC relative. */
19768 if (!JUMP_P (insn))
19769 {
19770 l = get_attr_length_address (insn);
19771 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19772 l = 4;
19773 }
19774 if (l)
19775 return 1+l;
19776 else
19777 return 2;
19778 }
19779
19780 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19781 window. */
19782
19783 static void
19784 ix86_avoid_jump_misspredicts (void)
19785 {
19786 rtx insn, start = get_insns ();
19787 int nbytes = 0, njumps = 0;
19788 int isjump = 0;
19789
19790 /* Look for all minimal intervals of instructions containing 4 jumps.
19791 The intervals are bounded by START and INSN. NBYTES is the total
19792 size of instructions in the interval including INSN and not including
19793 START. When the NBYTES is smaller than 16 bytes, it is possible
19794 that the end of START and INSN ends up in the same 16byte page.
19795
19796 The smallest offset in the page INSN can start is the case where START
19797 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19798 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19799 */
19800 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19801 {
19802
19803 nbytes += min_insn_size (insn);
19804 if (dump_file)
19805 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19806 INSN_UID (insn), min_insn_size (insn));
19807 if ((JUMP_P (insn)
19808 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19809 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19810 || CALL_P (insn))
19811 njumps++;
19812 else
19813 continue;
19814
19815 while (njumps > 3)
19816 {
19817 start = NEXT_INSN (start);
19818 if ((JUMP_P (start)
19819 && GET_CODE (PATTERN (start)) != ADDR_VEC
19820 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19821 || CALL_P (start))
19822 njumps--, isjump = 1;
19823 else
19824 isjump = 0;
19825 nbytes -= min_insn_size (start);
19826 }
19827 gcc_assert (njumps >= 0);
19828 if (dump_file)
19829 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19830 INSN_UID (start), INSN_UID (insn), nbytes);
19831
19832 if (njumps == 3 && isjump && nbytes < 16)
19833 {
19834 int padsize = 15 - nbytes + min_insn_size (insn);
19835
19836 if (dump_file)
19837 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19838 INSN_UID (insn), padsize);
19839 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19840 }
19841 }
19842 }
19843
19844 /* AMD Athlon works faster
19845 when RET is not destination of conditional jump or directly preceded
19846 by other jump instruction. We avoid the penalty by inserting NOP just
19847 before the RET instructions in such cases. */
19848 static void
19849 ix86_pad_returns (void)
19850 {
19851 edge e;
19852 edge_iterator ei;
19853
19854 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19855 {
19856 basic_block bb = e->src;
19857 rtx ret = BB_END (bb);
19858 rtx prev;
19859 bool replace = false;
19860
19861 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19862 || !maybe_hot_bb_p (bb))
19863 continue;
19864 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19865 if (active_insn_p (prev) || LABEL_P (prev))
19866 break;
19867 if (prev && LABEL_P (prev))
19868 {
19869 edge e;
19870 edge_iterator ei;
19871
19872 FOR_EACH_EDGE (e, ei, bb->preds)
19873 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19874 && !(e->flags & EDGE_FALLTHRU))
19875 replace = true;
19876 }
19877 if (!replace)
19878 {
19879 prev = prev_active_insn (ret);
19880 if (prev
19881 && ((JUMP_P (prev) && any_condjump_p (prev))
19882 || CALL_P (prev)))
19883 replace = true;
19884 /* Empty functions get branch mispredict even when the jump destination
19885 is not visible to us. */
19886 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19887 replace = true;
19888 }
19889 if (replace)
19890 {
19891 emit_insn_before (gen_return_internal_long (), ret);
19892 delete_insn (ret);
19893 }
19894 }
19895 }
19896
19897 /* Implement machine specific optimizations. We implement padding of returns
19898 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19899 static void
19900 ix86_reorg (void)
19901 {
19902 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19903 ix86_pad_returns ();
19904 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19905 ix86_avoid_jump_misspredicts ();
19906 }
19907
19908 /* Return nonzero when QImode register that must be represented via REX prefix
19909 is used. */
19910 bool
19911 x86_extended_QIreg_mentioned_p (rtx insn)
19912 {
19913 int i;
19914 extract_insn_cached (insn);
19915 for (i = 0; i < recog_data.n_operands; i++)
19916 if (REG_P (recog_data.operand[i])
19917 && REGNO (recog_data.operand[i]) >= 4)
19918 return true;
19919 return false;
19920 }
19921
19922 /* Return nonzero when P points to register encoded via REX prefix.
19923 Called via for_each_rtx. */
19924 static int
19925 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19926 {
19927 unsigned int regno;
19928 if (!REG_P (*p))
19929 return 0;
19930 regno = REGNO (*p);
19931 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19932 }
19933
19934 /* Return true when INSN mentions register that must be encoded using REX
19935 prefix. */
19936 bool
19937 x86_extended_reg_mentioned_p (rtx insn)
19938 {
19939 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19940 }
19941
19942 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19943 optabs would emit if we didn't have TFmode patterns. */
19944
19945 void
19946 x86_emit_floatuns (rtx operands[2])
19947 {
19948 rtx neglab, donelab, i0, i1, f0, in, out;
19949 enum machine_mode mode, inmode;
19950
19951 inmode = GET_MODE (operands[1]);
19952 gcc_assert (inmode == SImode || inmode == DImode);
19953
19954 out = operands[0];
19955 in = force_reg (inmode, operands[1]);
19956 mode = GET_MODE (out);
19957 neglab = gen_label_rtx ();
19958 donelab = gen_label_rtx ();
19959 f0 = gen_reg_rtx (mode);
19960
19961 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
19962
19963 expand_float (out, in, 0);
19964
19965 emit_jump_insn (gen_jump (donelab));
19966 emit_barrier ();
19967
19968 emit_label (neglab);
19969
19970 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
19971 1, OPTAB_DIRECT);
19972 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
19973 1, OPTAB_DIRECT);
19974 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19975
19976 expand_float (f0, i0, 0);
19977
19978 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19979
19980 emit_label (donelab);
19981 }
19982 \f
19983 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19984 with all elements equal to VAR. Return true if successful. */
19985
19986 static bool
19987 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19988 rtx target, rtx val)
19989 {
19990 enum machine_mode smode, wsmode, wvmode;
19991 rtx x;
19992
19993 switch (mode)
19994 {
19995 case V2SImode:
19996 case V2SFmode:
19997 if (!mmx_ok)
19998 return false;
19999 /* FALLTHRU */
20000
20001 case V2DFmode:
20002 case V2DImode:
20003 case V4SFmode:
20004 case V4SImode:
20005 val = force_reg (GET_MODE_INNER (mode), val);
20006 x = gen_rtx_VEC_DUPLICATE (mode, val);
20007 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20008 return true;
20009
20010 case V4HImode:
20011 if (!mmx_ok)
20012 return false;
20013 if (TARGET_SSE || TARGET_3DNOW_A)
20014 {
20015 val = gen_lowpart (SImode, val);
20016 x = gen_rtx_TRUNCATE (HImode, val);
20017 x = gen_rtx_VEC_DUPLICATE (mode, x);
20018 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20019 return true;
20020 }
20021 else
20022 {
20023 smode = HImode;
20024 wsmode = SImode;
20025 wvmode = V2SImode;
20026 goto widen;
20027 }
20028
20029 case V8QImode:
20030 if (!mmx_ok)
20031 return false;
20032 smode = QImode;
20033 wsmode = HImode;
20034 wvmode = V4HImode;
20035 goto widen;
20036 case V8HImode:
20037 if (TARGET_SSE2)
20038 {
20039 rtx tmp1, tmp2;
20040 /* Extend HImode to SImode using a paradoxical SUBREG. */
20041 tmp1 = gen_reg_rtx (SImode);
20042 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20043 /* Insert the SImode value as low element of V4SImode vector. */
20044 tmp2 = gen_reg_rtx (V4SImode);
20045 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20046 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20047 CONST0_RTX (V4SImode),
20048 const1_rtx);
20049 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20050 /* Cast the V4SImode vector back to a V8HImode vector. */
20051 tmp1 = gen_reg_rtx (V8HImode);
20052 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20053 /* Duplicate the low short through the whole low SImode word. */
20054 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20055 /* Cast the V8HImode vector back to a V4SImode vector. */
20056 tmp2 = gen_reg_rtx (V4SImode);
20057 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20058 /* Replicate the low element of the V4SImode vector. */
20059 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20060 /* Cast the V2SImode back to V8HImode, and store in target. */
20061 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20062 return true;
20063 }
20064 smode = HImode;
20065 wsmode = SImode;
20066 wvmode = V4SImode;
20067 goto widen;
20068 case V16QImode:
20069 if (TARGET_SSE2)
20070 {
20071 rtx tmp1, tmp2;
20072 /* Extend QImode to SImode using a paradoxical SUBREG. */
20073 tmp1 = gen_reg_rtx (SImode);
20074 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20075 /* Insert the SImode value as low element of V4SImode vector. */
20076 tmp2 = gen_reg_rtx (V4SImode);
20077 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20078 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20079 CONST0_RTX (V4SImode),
20080 const1_rtx);
20081 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20082 /* Cast the V4SImode vector back to a V16QImode vector. */
20083 tmp1 = gen_reg_rtx (V16QImode);
20084 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20085 /* Duplicate the low byte through the whole low SImode word. */
20086 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20087 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20088 /* Cast the V16QImode vector back to a V4SImode vector. */
20089 tmp2 = gen_reg_rtx (V4SImode);
20090 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20091 /* Replicate the low element of the V4SImode vector. */
20092 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20093 /* Cast the V2SImode back to V16QImode, and store in target. */
20094 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20095 return true;
20096 }
20097 smode = QImode;
20098 wsmode = HImode;
20099 wvmode = V8HImode;
20100 goto widen;
20101 widen:
20102 /* Replicate the value once into the next wider mode and recurse. */
20103 val = convert_modes (wsmode, smode, val, true);
20104 x = expand_simple_binop (wsmode, ASHIFT, val,
20105 GEN_INT (GET_MODE_BITSIZE (smode)),
20106 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20107 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20108
20109 x = gen_reg_rtx (wvmode);
20110 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20111 gcc_unreachable ();
20112 emit_move_insn (target, gen_lowpart (mode, x));
20113 return true;
20114
20115 default:
20116 return false;
20117 }
20118 }
20119
20120 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20121 whose ONE_VAR element is VAR, and other elements are zero. Return true
20122 if successful. */
20123
20124 static bool
20125 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20126 rtx target, rtx var, int one_var)
20127 {
20128 enum machine_mode vsimode;
20129 rtx new_target;
20130 rtx x, tmp;
20131
20132 switch (mode)
20133 {
20134 case V2SFmode:
20135 case V2SImode:
20136 if (!mmx_ok)
20137 return false;
20138 /* FALLTHRU */
20139
20140 case V2DFmode:
20141 case V2DImode:
20142 if (one_var != 0)
20143 return false;
20144 var = force_reg (GET_MODE_INNER (mode), var);
20145 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20146 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20147 return true;
20148
20149 case V4SFmode:
20150 case V4SImode:
20151 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20152 new_target = gen_reg_rtx (mode);
20153 else
20154 new_target = target;
20155 var = force_reg (GET_MODE_INNER (mode), var);
20156 x = gen_rtx_VEC_DUPLICATE (mode, var);
20157 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20158 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20159 if (one_var != 0)
20160 {
20161 /* We need to shuffle the value to the correct position, so
20162 create a new pseudo to store the intermediate result. */
20163
20164 /* With SSE2, we can use the integer shuffle insns. */
20165 if (mode != V4SFmode && TARGET_SSE2)
20166 {
20167 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20168 GEN_INT (1),
20169 GEN_INT (one_var == 1 ? 0 : 1),
20170 GEN_INT (one_var == 2 ? 0 : 1),
20171 GEN_INT (one_var == 3 ? 0 : 1)));
20172 if (target != new_target)
20173 emit_move_insn (target, new_target);
20174 return true;
20175 }
20176
20177 /* Otherwise convert the intermediate result to V4SFmode and
20178 use the SSE1 shuffle instructions. */
20179 if (mode != V4SFmode)
20180 {
20181 tmp = gen_reg_rtx (V4SFmode);
20182 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20183 }
20184 else
20185 tmp = new_target;
20186
20187 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20188 GEN_INT (1),
20189 GEN_INT (one_var == 1 ? 0 : 1),
20190 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20191 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20192
20193 if (mode != V4SFmode)
20194 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20195 else if (tmp != target)
20196 emit_move_insn (target, tmp);
20197 }
20198 else if (target != new_target)
20199 emit_move_insn (target, new_target);
20200 return true;
20201
20202 case V8HImode:
20203 case V16QImode:
20204 vsimode = V4SImode;
20205 goto widen;
20206 case V4HImode:
20207 case V8QImode:
20208 if (!mmx_ok)
20209 return false;
20210 vsimode = V2SImode;
20211 goto widen;
20212 widen:
20213 if (one_var != 0)
20214 return false;
20215
20216 /* Zero extend the variable element to SImode and recurse. */
20217 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20218
20219 x = gen_reg_rtx (vsimode);
20220 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20221 var, one_var))
20222 gcc_unreachable ();
20223
20224 emit_move_insn (target, gen_lowpart (mode, x));
20225 return true;
20226
20227 default:
20228 return false;
20229 }
20230 }
20231
20232 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20233 consisting of the values in VALS. It is known that all elements
20234 except ONE_VAR are constants. Return true if successful. */
20235
20236 static bool
20237 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20238 rtx target, rtx vals, int one_var)
20239 {
20240 rtx var = XVECEXP (vals, 0, one_var);
20241 enum machine_mode wmode;
20242 rtx const_vec, x;
20243
20244 const_vec = copy_rtx (vals);
20245 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20246 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20247
20248 switch (mode)
20249 {
20250 case V2DFmode:
20251 case V2DImode:
20252 case V2SFmode:
20253 case V2SImode:
20254 /* For the two element vectors, it's just as easy to use
20255 the general case. */
20256 return false;
20257
20258 case V4SFmode:
20259 case V4SImode:
20260 case V8HImode:
20261 case V4HImode:
20262 break;
20263
20264 case V16QImode:
20265 wmode = V8HImode;
20266 goto widen;
20267 case V8QImode:
20268 wmode = V4HImode;
20269 goto widen;
20270 widen:
20271 /* There's no way to set one QImode entry easily. Combine
20272 the variable value with its adjacent constant value, and
20273 promote to an HImode set. */
20274 x = XVECEXP (vals, 0, one_var ^ 1);
20275 if (one_var & 1)
20276 {
20277 var = convert_modes (HImode, QImode, var, true);
20278 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20279 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20280 x = GEN_INT (INTVAL (x) & 0xff);
20281 }
20282 else
20283 {
20284 var = convert_modes (HImode, QImode, var, true);
20285 x = gen_int_mode (INTVAL (x) << 8, HImode);
20286 }
20287 if (x != const0_rtx)
20288 var = expand_simple_binop (HImode, IOR, var, x, var,
20289 1, OPTAB_LIB_WIDEN);
20290
20291 x = gen_reg_rtx (wmode);
20292 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20293 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20294
20295 emit_move_insn (target, gen_lowpart (mode, x));
20296 return true;
20297
20298 default:
20299 return false;
20300 }
20301
20302 emit_move_insn (target, const_vec);
20303 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20304 return true;
20305 }
20306
20307 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20308 all values variable, and none identical. */
20309
20310 static void
20311 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20312 rtx target, rtx vals)
20313 {
20314 enum machine_mode half_mode = GET_MODE_INNER (mode);
20315 rtx op0 = NULL, op1 = NULL;
20316 bool use_vec_concat = false;
20317
20318 switch (mode)
20319 {
20320 case V2SFmode:
20321 case V2SImode:
20322 if (!mmx_ok && !TARGET_SSE)
20323 break;
20324 /* FALLTHRU */
20325
20326 case V2DFmode:
20327 case V2DImode:
20328 /* For the two element vectors, we always implement VEC_CONCAT. */
20329 op0 = XVECEXP (vals, 0, 0);
20330 op1 = XVECEXP (vals, 0, 1);
20331 use_vec_concat = true;
20332 break;
20333
20334 case V4SFmode:
20335 half_mode = V2SFmode;
20336 goto half;
20337 case V4SImode:
20338 half_mode = V2SImode;
20339 goto half;
20340 half:
20341 {
20342 rtvec v;
20343
20344 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20345 Recurse to load the two halves. */
20346
20347 op0 = gen_reg_rtx (half_mode);
20348 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20349 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20350
20351 op1 = gen_reg_rtx (half_mode);
20352 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20353 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20354
20355 use_vec_concat = true;
20356 }
20357 break;
20358
20359 case V8HImode:
20360 case V16QImode:
20361 case V4HImode:
20362 case V8QImode:
20363 break;
20364
20365 default:
20366 gcc_unreachable ();
20367 }
20368
20369 if (use_vec_concat)
20370 {
20371 if (!register_operand (op0, half_mode))
20372 op0 = force_reg (half_mode, op0);
20373 if (!register_operand (op1, half_mode))
20374 op1 = force_reg (half_mode, op1);
20375
20376 emit_insn (gen_rtx_SET (VOIDmode, target,
20377 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20378 }
20379 else
20380 {
20381 int i, j, n_elts, n_words, n_elt_per_word;
20382 enum machine_mode inner_mode;
20383 rtx words[4], shift;
20384
20385 inner_mode = GET_MODE_INNER (mode);
20386 n_elts = GET_MODE_NUNITS (mode);
20387 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20388 n_elt_per_word = n_elts / n_words;
20389 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20390
20391 for (i = 0; i < n_words; ++i)
20392 {
20393 rtx word = NULL_RTX;
20394
20395 for (j = 0; j < n_elt_per_word; ++j)
20396 {
20397 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20398 elt = convert_modes (word_mode, inner_mode, elt, true);
20399
20400 if (j == 0)
20401 word = elt;
20402 else
20403 {
20404 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20405 word, 1, OPTAB_LIB_WIDEN);
20406 word = expand_simple_binop (word_mode, IOR, word, elt,
20407 word, 1, OPTAB_LIB_WIDEN);
20408 }
20409 }
20410
20411 words[i] = word;
20412 }
20413
20414 if (n_words == 1)
20415 emit_move_insn (target, gen_lowpart (mode, words[0]));
20416 else if (n_words == 2)
20417 {
20418 rtx tmp = gen_reg_rtx (mode);
20419 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20420 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20421 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20422 emit_move_insn (target, tmp);
20423 }
20424 else if (n_words == 4)
20425 {
20426 rtx tmp = gen_reg_rtx (V4SImode);
20427 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20428 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20429 emit_move_insn (target, gen_lowpart (mode, tmp));
20430 }
20431 else
20432 gcc_unreachable ();
20433 }
20434 }
20435
20436 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20437 instructions unless MMX_OK is true. */
20438
20439 void
20440 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20441 {
20442 enum machine_mode mode = GET_MODE (target);
20443 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20444 int n_elts = GET_MODE_NUNITS (mode);
20445 int n_var = 0, one_var = -1;
20446 bool all_same = true, all_const_zero = true;
20447 int i;
20448 rtx x;
20449
20450 for (i = 0; i < n_elts; ++i)
20451 {
20452 x = XVECEXP (vals, 0, i);
20453 if (!CONSTANT_P (x))
20454 n_var++, one_var = i;
20455 else if (x != CONST0_RTX (inner_mode))
20456 all_const_zero = false;
20457 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20458 all_same = false;
20459 }
20460
20461 /* Constants are best loaded from the constant pool. */
20462 if (n_var == 0)
20463 {
20464 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20465 return;
20466 }
20467
20468 /* If all values are identical, broadcast the value. */
20469 if (all_same
20470 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20471 XVECEXP (vals, 0, 0)))
20472 return;
20473
20474 /* Values where only one field is non-constant are best loaded from
20475 the pool and overwritten via move later. */
20476 if (n_var == 1)
20477 {
20478 if (all_const_zero
20479 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20480 XVECEXP (vals, 0, one_var),
20481 one_var))
20482 return;
20483
20484 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20485 return;
20486 }
20487
20488 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20489 }
20490
20491 void
20492 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20493 {
20494 enum machine_mode mode = GET_MODE (target);
20495 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20496 bool use_vec_merge = false;
20497 rtx tmp;
20498
20499 switch (mode)
20500 {
20501 case V2SFmode:
20502 case V2SImode:
20503 if (mmx_ok)
20504 {
20505 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20506 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20507 if (elt == 0)
20508 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20509 else
20510 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20511 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20512 return;
20513 }
20514 break;
20515
20516 case V2DFmode:
20517 case V2DImode:
20518 {
20519 rtx op0, op1;
20520
20521 /* For the two element vectors, we implement a VEC_CONCAT with
20522 the extraction of the other element. */
20523
20524 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20525 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20526
20527 if (elt == 0)
20528 op0 = val, op1 = tmp;
20529 else
20530 op0 = tmp, op1 = val;
20531
20532 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20533 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20534 }
20535 return;
20536
20537 case V4SFmode:
20538 switch (elt)
20539 {
20540 case 0:
20541 use_vec_merge = true;
20542 break;
20543
20544 case 1:
20545 /* tmp = target = A B C D */
20546 tmp = copy_to_reg (target);
20547 /* target = A A B B */
20548 emit_insn (gen_sse_unpcklps (target, target, target));
20549 /* target = X A B B */
20550 ix86_expand_vector_set (false, target, val, 0);
20551 /* target = A X C D */
20552 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20553 GEN_INT (1), GEN_INT (0),
20554 GEN_INT (2+4), GEN_INT (3+4)));
20555 return;
20556
20557 case 2:
20558 /* tmp = target = A B C D */
20559 tmp = copy_to_reg (target);
20560 /* tmp = X B C D */
20561 ix86_expand_vector_set (false, tmp, val, 0);
20562 /* target = A B X D */
20563 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20564 GEN_INT (0), GEN_INT (1),
20565 GEN_INT (0+4), GEN_INT (3+4)));
20566 return;
20567
20568 case 3:
20569 /* tmp = target = A B C D */
20570 tmp = copy_to_reg (target);
20571 /* tmp = X B C D */
20572 ix86_expand_vector_set (false, tmp, val, 0);
20573 /* target = A B X D */
20574 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20575 GEN_INT (0), GEN_INT (1),
20576 GEN_INT (2+4), GEN_INT (0+4)));
20577 return;
20578
20579 default:
20580 gcc_unreachable ();
20581 }
20582 break;
20583
20584 case V4SImode:
20585 /* Element 0 handled by vec_merge below. */
20586 if (elt == 0)
20587 {
20588 use_vec_merge = true;
20589 break;
20590 }
20591
20592 if (TARGET_SSE2)
20593 {
20594 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20595 store into element 0, then shuffle them back. */
20596
20597 rtx order[4];
20598
20599 order[0] = GEN_INT (elt);
20600 order[1] = const1_rtx;
20601 order[2] = const2_rtx;
20602 order[3] = GEN_INT (3);
20603 order[elt] = const0_rtx;
20604
20605 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20606 order[1], order[2], order[3]));
20607
20608 ix86_expand_vector_set (false, target, val, 0);
20609
20610 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20611 order[1], order[2], order[3]));
20612 }
20613 else
20614 {
20615 /* For SSE1, we have to reuse the V4SF code. */
20616 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20617 gen_lowpart (SFmode, val), elt);
20618 }
20619 return;
20620
20621 case V8HImode:
20622 use_vec_merge = TARGET_SSE2;
20623 break;
20624 case V4HImode:
20625 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20626 break;
20627
20628 case V16QImode:
20629 case V8QImode:
20630 default:
20631 break;
20632 }
20633
20634 if (use_vec_merge)
20635 {
20636 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20637 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20638 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20639 }
20640 else
20641 {
20642 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20643
20644 emit_move_insn (mem, target);
20645
20646 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20647 emit_move_insn (tmp, val);
20648
20649 emit_move_insn (target, mem);
20650 }
20651 }
20652
20653 void
20654 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20655 {
20656 enum machine_mode mode = GET_MODE (vec);
20657 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20658 bool use_vec_extr = false;
20659 rtx tmp;
20660
20661 switch (mode)
20662 {
20663 case V2SImode:
20664 case V2SFmode:
20665 if (!mmx_ok)
20666 break;
20667 /* FALLTHRU */
20668
20669 case V2DFmode:
20670 case V2DImode:
20671 use_vec_extr = true;
20672 break;
20673
20674 case V4SFmode:
20675 switch (elt)
20676 {
20677 case 0:
20678 tmp = vec;
20679 break;
20680
20681 case 1:
20682 case 3:
20683 tmp = gen_reg_rtx (mode);
20684 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20685 GEN_INT (elt), GEN_INT (elt),
20686 GEN_INT (elt+4), GEN_INT (elt+4)));
20687 break;
20688
20689 case 2:
20690 tmp = gen_reg_rtx (mode);
20691 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20692 break;
20693
20694 default:
20695 gcc_unreachable ();
20696 }
20697 vec = tmp;
20698 use_vec_extr = true;
20699 elt = 0;
20700 break;
20701
20702 case V4SImode:
20703 if (TARGET_SSE2)
20704 {
20705 switch (elt)
20706 {
20707 case 0:
20708 tmp = vec;
20709 break;
20710
20711 case 1:
20712 case 3:
20713 tmp = gen_reg_rtx (mode);
20714 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20715 GEN_INT (elt), GEN_INT (elt),
20716 GEN_INT (elt), GEN_INT (elt)));
20717 break;
20718
20719 case 2:
20720 tmp = gen_reg_rtx (mode);
20721 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20722 break;
20723
20724 default:
20725 gcc_unreachable ();
20726 }
20727 vec = tmp;
20728 use_vec_extr = true;
20729 elt = 0;
20730 }
20731 else
20732 {
20733 /* For SSE1, we have to reuse the V4SF code. */
20734 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20735 gen_lowpart (V4SFmode, vec), elt);
20736 return;
20737 }
20738 break;
20739
20740 case V8HImode:
20741 use_vec_extr = TARGET_SSE2;
20742 break;
20743 case V4HImode:
20744 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20745 break;
20746
20747 case V16QImode:
20748 case V8QImode:
20749 /* ??? Could extract the appropriate HImode element and shift. */
20750 default:
20751 break;
20752 }
20753
20754 if (use_vec_extr)
20755 {
20756 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20757 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20758
20759 /* Let the rtl optimizers know about the zero extension performed. */
20760 if (inner_mode == HImode)
20761 {
20762 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20763 target = gen_lowpart (SImode, target);
20764 }
20765
20766 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20767 }
20768 else
20769 {
20770 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20771
20772 emit_move_insn (mem, vec);
20773
20774 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20775 emit_move_insn (target, tmp);
20776 }
20777 }
20778
20779 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20780 pattern to reduce; DEST is the destination; IN is the input vector. */
20781
20782 void
20783 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20784 {
20785 rtx tmp1, tmp2, tmp3;
20786
20787 tmp1 = gen_reg_rtx (V4SFmode);
20788 tmp2 = gen_reg_rtx (V4SFmode);
20789 tmp3 = gen_reg_rtx (V4SFmode);
20790
20791 emit_insn (gen_sse_movhlps (tmp1, in, in));
20792 emit_insn (fn (tmp2, tmp1, in));
20793
20794 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20795 GEN_INT (1), GEN_INT (1),
20796 GEN_INT (1+4), GEN_INT (1+4)));
20797 emit_insn (fn (dest, tmp2, tmp3));
20798 }
20799 \f
20800 /* Target hook for scalar_mode_supported_p. */
20801 static bool
20802 ix86_scalar_mode_supported_p (enum machine_mode mode)
20803 {
20804 if (DECIMAL_FLOAT_MODE_P (mode))
20805 return true;
20806 else
20807 return default_scalar_mode_supported_p (mode);
20808 }
20809
20810 /* Implements target hook vector_mode_supported_p. */
20811 static bool
20812 ix86_vector_mode_supported_p (enum machine_mode mode)
20813 {
20814 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20815 return true;
20816 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20817 return true;
20818 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20819 return true;
20820 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20821 return true;
20822 return false;
20823 }
20824
20825 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20826
20827 We do this in the new i386 backend to maintain source compatibility
20828 with the old cc0-based compiler. */
20829
20830 static tree
20831 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20832 tree inputs ATTRIBUTE_UNUSED,
20833 tree clobbers)
20834 {
20835 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20836 clobbers);
20837 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20838 clobbers);
20839 return clobbers;
20840 }
20841
20842 /* Return true if this goes in small data/bss. */
20843
20844 static bool
20845 ix86_in_large_data_p (tree exp)
20846 {
20847 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20848 return false;
20849
20850 /* Functions are never large data. */
20851 if (TREE_CODE (exp) == FUNCTION_DECL)
20852 return false;
20853
20854 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20855 {
20856 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20857 if (strcmp (section, ".ldata") == 0
20858 || strcmp (section, ".lbss") == 0)
20859 return true;
20860 return false;
20861 }
20862 else
20863 {
20864 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20865
20866 /* If this is an incomplete type with size 0, then we can't put it
20867 in data because it might be too big when completed. */
20868 if (!size || size > ix86_section_threshold)
20869 return true;
20870 }
20871
20872 return false;
20873 }
20874 static void
20875 ix86_encode_section_info (tree decl, rtx rtl, int first)
20876 {
20877 default_encode_section_info (decl, rtl, first);
20878
20879 if (TREE_CODE (decl) == VAR_DECL
20880 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20881 && ix86_in_large_data_p (decl))
20882 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20883 }
20884
20885 /* Worker function for REVERSE_CONDITION. */
20886
20887 enum rtx_code
20888 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20889 {
20890 return (mode != CCFPmode && mode != CCFPUmode
20891 ? reverse_condition (code)
20892 : reverse_condition_maybe_unordered (code));
20893 }
20894
20895 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20896 to OPERANDS[0]. */
20897
20898 const char *
20899 output_387_reg_move (rtx insn, rtx *operands)
20900 {
20901 if (REG_P (operands[1])
20902 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20903 {
20904 if (REGNO (operands[0]) == FIRST_STACK_REG)
20905 return output_387_ffreep (operands, 0);
20906 return "fstp\t%y0";
20907 }
20908 if (STACK_TOP_P (operands[0]))
20909 return "fld%z1\t%y1";
20910 return "fst\t%y0";
20911 }
20912
20913 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20914 FP status register is set. */
20915
20916 void
20917 ix86_emit_fp_unordered_jump (rtx label)
20918 {
20919 rtx reg = gen_reg_rtx (HImode);
20920 rtx temp;
20921
20922 emit_insn (gen_x86_fnstsw_1 (reg));
20923
20924 if (TARGET_USE_SAHF)
20925 {
20926 emit_insn (gen_x86_sahf_1 (reg));
20927
20928 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20929 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20930 }
20931 else
20932 {
20933 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20934
20935 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20936 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20937 }
20938
20939 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20940 gen_rtx_LABEL_REF (VOIDmode, label),
20941 pc_rtx);
20942 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20943 emit_jump_insn (temp);
20944 }
20945
20946 /* Output code to perform a log1p XFmode calculation. */
20947
20948 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20949 {
20950 rtx label1 = gen_label_rtx ();
20951 rtx label2 = gen_label_rtx ();
20952
20953 rtx tmp = gen_reg_rtx (XFmode);
20954 rtx tmp2 = gen_reg_rtx (XFmode);
20955
20956 emit_insn (gen_absxf2 (tmp, op1));
20957 emit_insn (gen_cmpxf (tmp,
20958 CONST_DOUBLE_FROM_REAL_VALUE (
20959 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20960 XFmode)));
20961 emit_jump_insn (gen_bge (label1));
20962
20963 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20964 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20965 emit_jump (label2);
20966
20967 emit_label (label1);
20968 emit_move_insn (tmp, CONST1_RTX (XFmode));
20969 emit_insn (gen_addxf3 (tmp, op1, tmp));
20970 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20971 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20972
20973 emit_label (label2);
20974 }
20975
20976 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20977
20978 static void
20979 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20980 tree decl)
20981 {
20982 /* With Binutils 2.15, the "@unwind" marker must be specified on
20983 every occurrence of the ".eh_frame" section, not just the first
20984 one. */
20985 if (TARGET_64BIT
20986 && strcmp (name, ".eh_frame") == 0)
20987 {
20988 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20989 flags & SECTION_WRITE ? "aw" : "a");
20990 return;
20991 }
20992 default_elf_asm_named_section (name, flags, decl);
20993 }
20994
20995 /* Return the mangling of TYPE if it is an extended fundamental type. */
20996
20997 static const char *
20998 ix86_mangle_fundamental_type (tree type)
20999 {
21000 switch (TYPE_MODE (type))
21001 {
21002 case TFmode:
21003 /* __float128 is "g". */
21004 return "g";
21005 case XFmode:
21006 /* "long double" or __float80 is "e". */
21007 return "e";
21008 default:
21009 return NULL;
21010 }
21011 }
21012
21013 /* For 32-bit code we can save PIC register setup by using
21014 __stack_chk_fail_local hidden function instead of calling
21015 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21016 register, so it is better to call __stack_chk_fail directly. */
21017
21018 static tree
21019 ix86_stack_protect_fail (void)
21020 {
21021 return TARGET_64BIT
21022 ? default_external_stack_protect_fail ()
21023 : default_hidden_stack_protect_fail ();
21024 }
21025
21026 /* Select a format to encode pointers in exception handling data. CODE
21027 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21028 true if the symbol may be affected by dynamic relocations.
21029
21030 ??? All x86 object file formats are capable of representing this.
21031 After all, the relocation needed is the same as for the call insn.
21032 Whether or not a particular assembler allows us to enter such, I
21033 guess we'll have to see. */
21034 int
21035 asm_preferred_eh_data_format (int code, int global)
21036 {
21037 if (flag_pic)
21038 {
21039 int type = DW_EH_PE_sdata8;
21040 if (!TARGET_64BIT
21041 || ix86_cmodel == CM_SMALL_PIC
21042 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21043 type = DW_EH_PE_sdata4;
21044 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21045 }
21046 if (ix86_cmodel == CM_SMALL
21047 || (ix86_cmodel == CM_MEDIUM && code))
21048 return DW_EH_PE_udata4;
21049 return DW_EH_PE_absptr;
21050 }
21051 \f
21052 /* Expand copysign from SIGN to the positive value ABS_VALUE
21053 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21054 the sign-bit. */
21055 static void
21056 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21057 {
21058 enum machine_mode mode = GET_MODE (sign);
21059 rtx sgn = gen_reg_rtx (mode);
21060 if (mask == NULL_RTX)
21061 {
21062 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21063 if (!VECTOR_MODE_P (mode))
21064 {
21065 /* We need to generate a scalar mode mask in this case. */
21066 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21067 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21068 mask = gen_reg_rtx (mode);
21069 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21070 }
21071 }
21072 else
21073 mask = gen_rtx_NOT (mode, mask);
21074 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21075 gen_rtx_AND (mode, mask, sign)));
21076 emit_insn (gen_rtx_SET (VOIDmode, result,
21077 gen_rtx_IOR (mode, abs_value, sgn)));
21078 }
21079
21080 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21081 mask for masking out the sign-bit is stored in *SMASK, if that is
21082 non-null. */
21083 static rtx
21084 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21085 {
21086 enum machine_mode mode = GET_MODE (op0);
21087 rtx xa, mask;
21088
21089 xa = gen_reg_rtx (mode);
21090 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21091 if (!VECTOR_MODE_P (mode))
21092 {
21093 /* We need to generate a scalar mode mask in this case. */
21094 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21095 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21096 mask = gen_reg_rtx (mode);
21097 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21098 }
21099 emit_insn (gen_rtx_SET (VOIDmode, xa,
21100 gen_rtx_AND (mode, op0, mask)));
21101
21102 if (smask)
21103 *smask = mask;
21104
21105 return xa;
21106 }
21107
21108 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21109 swapping the operands if SWAP_OPERANDS is true. The expanded
21110 code is a forward jump to a newly created label in case the
21111 comparison is true. The generated label rtx is returned. */
21112 static rtx
21113 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21114 bool swap_operands)
21115 {
21116 rtx label, tmp;
21117
21118 if (swap_operands)
21119 {
21120 tmp = op0;
21121 op0 = op1;
21122 op1 = tmp;
21123 }
21124
21125 label = gen_label_rtx ();
21126 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21127 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21128 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21129 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21130 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21131 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21132 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21133 JUMP_LABEL (tmp) = label;
21134
21135 return label;
21136 }
21137
21138 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21139 using comparison code CODE. Operands are swapped for the comparison if
21140 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21141 static rtx
21142 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21143 bool swap_operands)
21144 {
21145 enum machine_mode mode = GET_MODE (op0);
21146 rtx mask = gen_reg_rtx (mode);
21147
21148 if (swap_operands)
21149 {
21150 rtx tmp = op0;
21151 op0 = op1;
21152 op1 = tmp;
21153 }
21154
21155 if (mode == DFmode)
21156 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21157 gen_rtx_fmt_ee (code, mode, op0, op1)));
21158 else
21159 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21160 gen_rtx_fmt_ee (code, mode, op0, op1)));
21161
21162 return mask;
21163 }
21164
21165 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21166 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21167 static rtx
21168 ix86_gen_TWO52 (enum machine_mode mode)
21169 {
21170 REAL_VALUE_TYPE TWO52r;
21171 rtx TWO52;
21172
21173 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21174 TWO52 = const_double_from_real_value (TWO52r, mode);
21175 TWO52 = force_reg (mode, TWO52);
21176
21177 return TWO52;
21178 }
21179
21180 /* Expand SSE sequence for computing lround from OP1 storing
21181 into OP0. */
21182 void
21183 ix86_expand_lround (rtx op0, rtx op1)
21184 {
21185 /* C code for the stuff we're doing below:
21186 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21187 return (long)tmp;
21188 */
21189 enum machine_mode mode = GET_MODE (op1);
21190 const struct real_format *fmt;
21191 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21192 rtx adj;
21193
21194 /* load nextafter (0.5, 0.0) */
21195 fmt = REAL_MODE_FORMAT (mode);
21196 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21197 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21198
21199 /* adj = copysign (0.5, op1) */
21200 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21201 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21202
21203 /* adj = op1 + adj */
21204 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21205
21206 /* op0 = (imode)adj */
21207 expand_fix (op0, adj, 0);
21208 }
21209
21210 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21211 into OPERAND0. */
21212 void
21213 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21214 {
21215 /* C code for the stuff we're doing below (for do_floor):
21216 xi = (long)op1;
21217 xi -= (double)xi > op1 ? 1 : 0;
21218 return xi;
21219 */
21220 enum machine_mode fmode = GET_MODE (op1);
21221 enum machine_mode imode = GET_MODE (op0);
21222 rtx ireg, freg, label, tmp;
21223
21224 /* reg = (long)op1 */
21225 ireg = gen_reg_rtx (imode);
21226 expand_fix (ireg, op1, 0);
21227
21228 /* freg = (double)reg */
21229 freg = gen_reg_rtx (fmode);
21230 expand_float (freg, ireg, 0);
21231
21232 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21233 label = ix86_expand_sse_compare_and_jump (UNLE,
21234 freg, op1, !do_floor);
21235 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21236 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21237 emit_move_insn (ireg, tmp);
21238
21239 emit_label (label);
21240 LABEL_NUSES (label) = 1;
21241
21242 emit_move_insn (op0, ireg);
21243 }
21244
21245 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21246 result in OPERAND0. */
21247 void
21248 ix86_expand_rint (rtx operand0, rtx operand1)
21249 {
21250 /* C code for the stuff we're doing below:
21251 xa = fabs (operand1);
21252 if (!isless (xa, 2**52))
21253 return operand1;
21254 xa = xa + 2**52 - 2**52;
21255 return copysign (xa, operand1);
21256 */
21257 enum machine_mode mode = GET_MODE (operand0);
21258 rtx res, xa, label, TWO52, mask;
21259
21260 res = gen_reg_rtx (mode);
21261 emit_move_insn (res, operand1);
21262
21263 /* xa = abs (operand1) */
21264 xa = ix86_expand_sse_fabs (res, &mask);
21265
21266 /* if (!isless (xa, TWO52)) goto label; */
21267 TWO52 = ix86_gen_TWO52 (mode);
21268 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21269
21270 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21271 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21272
21273 ix86_sse_copysign_to_positive (res, xa, res, mask);
21274
21275 emit_label (label);
21276 LABEL_NUSES (label) = 1;
21277
21278 emit_move_insn (operand0, res);
21279 }
21280
21281 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21282 into OPERAND0. */
21283 void
21284 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21285 {
21286 /* C code for the stuff we expand below.
21287 double xa = fabs (x), x2;
21288 if (!isless (xa, TWO52))
21289 return x;
21290 xa = xa + TWO52 - TWO52;
21291 x2 = copysign (xa, x);
21292 Compensate. Floor:
21293 if (x2 > x)
21294 x2 -= 1;
21295 Compensate. Ceil:
21296 if (x2 < x)
21297 x2 -= -1;
21298 return x2;
21299 */
21300 enum machine_mode mode = GET_MODE (operand0);
21301 rtx xa, TWO52, tmp, label, one, res, mask;
21302
21303 TWO52 = ix86_gen_TWO52 (mode);
21304
21305 /* Temporary for holding the result, initialized to the input
21306 operand to ease control flow. */
21307 res = gen_reg_rtx (mode);
21308 emit_move_insn (res, operand1);
21309
21310 /* xa = abs (operand1) */
21311 xa = ix86_expand_sse_fabs (res, &mask);
21312
21313 /* if (!isless (xa, TWO52)) goto label; */
21314 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21315
21316 /* xa = xa + TWO52 - TWO52; */
21317 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21318 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21319
21320 /* xa = copysign (xa, operand1) */
21321 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21322
21323 /* generate 1.0 or -1.0 */
21324 one = force_reg (mode,
21325 const_double_from_real_value (do_floor
21326 ? dconst1 : dconstm1, mode));
21327
21328 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21329 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21330 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21331 gen_rtx_AND (mode, one, tmp)));
21332 /* We always need to subtract here to preserve signed zero. */
21333 tmp = expand_simple_binop (mode, MINUS,
21334 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21335 emit_move_insn (res, tmp);
21336
21337 emit_label (label);
21338 LABEL_NUSES (label) = 1;
21339
21340 emit_move_insn (operand0, res);
21341 }
21342
21343 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21344 into OPERAND0. */
21345 void
21346 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21347 {
21348 /* C code for the stuff we expand below.
21349 double xa = fabs (x), x2;
21350 if (!isless (xa, TWO52))
21351 return x;
21352 x2 = (double)(long)x;
21353 Compensate. Floor:
21354 if (x2 > x)
21355 x2 -= 1;
21356 Compensate. Ceil:
21357 if (x2 < x)
21358 x2 += 1;
21359 if (HONOR_SIGNED_ZEROS (mode))
21360 return copysign (x2, x);
21361 return x2;
21362 */
21363 enum machine_mode mode = GET_MODE (operand0);
21364 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21365
21366 TWO52 = ix86_gen_TWO52 (mode);
21367
21368 /* Temporary for holding the result, initialized to the input
21369 operand to ease control flow. */
21370 res = gen_reg_rtx (mode);
21371 emit_move_insn (res, operand1);
21372
21373 /* xa = abs (operand1) */
21374 xa = ix86_expand_sse_fabs (res, &mask);
21375
21376 /* if (!isless (xa, TWO52)) goto label; */
21377 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21378
21379 /* xa = (double)(long)x */
21380 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21381 expand_fix (xi, res, 0);
21382 expand_float (xa, xi, 0);
21383
21384 /* generate 1.0 */
21385 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21386
21387 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21388 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21389 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21390 gen_rtx_AND (mode, one, tmp)));
21391 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21392 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21393 emit_move_insn (res, tmp);
21394
21395 if (HONOR_SIGNED_ZEROS (mode))
21396 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21397
21398 emit_label (label);
21399 LABEL_NUSES (label) = 1;
21400
21401 emit_move_insn (operand0, res);
21402 }
21403
21404 /* Expand SSE sequence for computing round from OPERAND1 storing
21405 into OPERAND0. Sequence that works without relying on DImode truncation
21406 via cvttsd2siq that is only available on 64bit targets. */
21407 void
21408 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21409 {
21410 /* C code for the stuff we expand below.
21411 double xa = fabs (x), xa2, x2;
21412 if (!isless (xa, TWO52))
21413 return x;
21414 Using the absolute value and copying back sign makes
21415 -0.0 -> -0.0 correct.
21416 xa2 = xa + TWO52 - TWO52;
21417 Compensate.
21418 dxa = xa2 - xa;
21419 if (dxa <= -0.5)
21420 xa2 += 1;
21421 else if (dxa > 0.5)
21422 xa2 -= 1;
21423 x2 = copysign (xa2, x);
21424 return x2;
21425 */
21426 enum machine_mode mode = GET_MODE (operand0);
21427 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21428
21429 TWO52 = ix86_gen_TWO52 (mode);
21430
21431 /* Temporary for holding the result, initialized to the input
21432 operand to ease control flow. */
21433 res = gen_reg_rtx (mode);
21434 emit_move_insn (res, operand1);
21435
21436 /* xa = abs (operand1) */
21437 xa = ix86_expand_sse_fabs (res, &mask);
21438
21439 /* if (!isless (xa, TWO52)) goto label; */
21440 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21441
21442 /* xa2 = xa + TWO52 - TWO52; */
21443 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21444 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21445
21446 /* dxa = xa2 - xa; */
21447 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21448
21449 /* generate 0.5, 1.0 and -0.5 */
21450 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21451 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21452 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21453 0, OPTAB_DIRECT);
21454
21455 /* Compensate. */
21456 tmp = gen_reg_rtx (mode);
21457 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21458 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21459 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21460 gen_rtx_AND (mode, one, tmp)));
21461 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21462 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21463 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21464 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21465 gen_rtx_AND (mode, one, tmp)));
21466 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21467
21468 /* res = copysign (xa2, operand1) */
21469 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21470
21471 emit_label (label);
21472 LABEL_NUSES (label) = 1;
21473
21474 emit_move_insn (operand0, res);
21475 }
21476
21477 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21478 into OPERAND0. */
21479 void
21480 ix86_expand_trunc (rtx operand0, rtx operand1)
21481 {
21482 /* C code for SSE variant we expand below.
21483 double xa = fabs (x), x2;
21484 if (!isless (xa, TWO52))
21485 return x;
21486 x2 = (double)(long)x;
21487 if (HONOR_SIGNED_ZEROS (mode))
21488 return copysign (x2, x);
21489 return x2;
21490 */
21491 enum machine_mode mode = GET_MODE (operand0);
21492 rtx xa, xi, TWO52, label, res, mask;
21493
21494 TWO52 = ix86_gen_TWO52 (mode);
21495
21496 /* Temporary for holding the result, initialized to the input
21497 operand to ease control flow. */
21498 res = gen_reg_rtx (mode);
21499 emit_move_insn (res, operand1);
21500
21501 /* xa = abs (operand1) */
21502 xa = ix86_expand_sse_fabs (res, &mask);
21503
21504 /* if (!isless (xa, TWO52)) goto label; */
21505 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21506
21507 /* x = (double)(long)x */
21508 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21509 expand_fix (xi, res, 0);
21510 expand_float (res, xi, 0);
21511
21512 if (HONOR_SIGNED_ZEROS (mode))
21513 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21514
21515 emit_label (label);
21516 LABEL_NUSES (label) = 1;
21517
21518 emit_move_insn (operand0, res);
21519 }
21520
21521 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21522 into OPERAND0. */
21523 void
21524 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21525 {
21526 enum machine_mode mode = GET_MODE (operand0);
21527 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21528
21529 /* C code for SSE variant we expand below.
21530 double xa = fabs (x), x2;
21531 if (!isless (xa, TWO52))
21532 return x;
21533 xa2 = xa + TWO52 - TWO52;
21534 Compensate:
21535 if (xa2 > xa)
21536 xa2 -= 1.0;
21537 x2 = copysign (xa2, x);
21538 return x2;
21539 */
21540
21541 TWO52 = ix86_gen_TWO52 (mode);
21542
21543 /* Temporary for holding the result, initialized to the input
21544 operand to ease control flow. */
21545 res = gen_reg_rtx (mode);
21546 emit_move_insn (res, operand1);
21547
21548 /* xa = abs (operand1) */
21549 xa = ix86_expand_sse_fabs (res, &smask);
21550
21551 /* if (!isless (xa, TWO52)) goto label; */
21552 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21553
21554 /* res = xa + TWO52 - TWO52; */
21555 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21556 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21557 emit_move_insn (res, tmp);
21558
21559 /* generate 1.0 */
21560 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21561
21562 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21563 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21564 emit_insn (gen_rtx_SET (VOIDmode, mask,
21565 gen_rtx_AND (mode, mask, one)));
21566 tmp = expand_simple_binop (mode, MINUS,
21567 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21568 emit_move_insn (res, tmp);
21569
21570 /* res = copysign (res, operand1) */
21571 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21572
21573 emit_label (label);
21574 LABEL_NUSES (label) = 1;
21575
21576 emit_move_insn (operand0, res);
21577 }
21578
21579 /* Expand SSE sequence for computing round from OPERAND1 storing
21580 into OPERAND0. */
21581 void
21582 ix86_expand_round (rtx operand0, rtx operand1)
21583 {
21584 /* C code for the stuff we're doing below:
21585 double xa = fabs (x);
21586 if (!isless (xa, TWO52))
21587 return x;
21588 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21589 return copysign (xa, x);
21590 */
21591 enum machine_mode mode = GET_MODE (operand0);
21592 rtx res, TWO52, xa, label, xi, half, mask;
21593 const struct real_format *fmt;
21594 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21595
21596 /* Temporary for holding the result, initialized to the input
21597 operand to ease control flow. */
21598 res = gen_reg_rtx (mode);
21599 emit_move_insn (res, operand1);
21600
21601 TWO52 = ix86_gen_TWO52 (mode);
21602 xa = ix86_expand_sse_fabs (res, &mask);
21603 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21604
21605 /* load nextafter (0.5, 0.0) */
21606 fmt = REAL_MODE_FORMAT (mode);
21607 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21608 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21609
21610 /* xa = xa + 0.5 */
21611 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21612 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21613
21614 /* xa = (double)(int64_t)xa */
21615 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21616 expand_fix (xi, xa, 0);
21617 expand_float (xa, xi, 0);
21618
21619 /* res = copysign (xa, operand1) */
21620 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21621
21622 emit_label (label);
21623 LABEL_NUSES (label) = 1;
21624
21625 emit_move_insn (operand0, res);
21626 }
21627
21628 #include "gt-i386.h"