i386.c (TARGET_VECTORIZE_BUILTIN_CONVERSION): Define.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1001
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1004
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
1009 | m_GENERIC64;
1010 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1011 | m_NOCONA | m_CORE2 | m_GENERIC;
1012 const int x86_zero_extend_with_and = m_486 | m_PENT;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1015 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
1016 const int x86_double_with_add = ~m_386;
1017 const int x86_use_bit_test = m_386;
1018 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
1019 | m_K6 | m_CORE2 | m_GENERIC;
1020 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1021 | m_NOCONA;
1022 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
1023 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
1024 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints = 0;
1030 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall = m_PPRO;
1041 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
1042 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
1043 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
1044 | m_CORE2 | m_GENERIC);
1045 const int x86_use_mov0 = m_K6;
1046 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
1047 const int x86_read_modify_write = ~m_PENT;
1048 const int x86_read_modify = ~(m_PENT | m_PPRO);
1049 const int x86_split_long_moves = m_PPRO;
1050 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
1051 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1052 /* m_PENT4 ? */
1053 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
1054 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
1055 const int x86_qimode_math = ~(0);
1056 const int x86_promote_qi_regs = 0;
1057 /* On PPro this flag is meant to avoid partial register stalls. Just like
1058 the x86_partial_reg_stall this option might be considered for Generic32
1059 if our scheme for avoiding partial stalls was more effective. */
1060 const int x86_himode_math = ~(m_PPRO);
1061 const int x86_promote_hi_regs = m_PPRO;
1062 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1063 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1064 | m_CORE2 | m_GENERIC;
1065 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1066 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1067 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
1068 | m_CORE2 | m_GENERIC;
1069 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1070 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1071 /* Enable if integer moves are preferred for DFmode copies */
1072 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1073 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
1074 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1075 | m_CORE2 | m_GENERIC;
1076 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1077 | m_CORE2 | m_GENERIC;
1078 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1079 for outgoing arguments will be computed and placed into the variable
1080 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1081 for each call; instead, the function prologue should increase the stack frame
1082 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1083 not proper. */
1084 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
1085 | m_NOCONA | m_PPRO | m_CORE2
1086 | m_GENERIC;
1087 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1088 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1089 const int x86_shift1 = ~m_486;
1090 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
1091 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1092 | m_NOCONA | m_CORE2 | m_GENERIC;
1093 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1094 that thread 128bit SSE registers as single units versus K8 based chips that
1095 divide SSE registers to two 64bit halves.
1096 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1097 to allow register renaming on 128bit SSE units, but usually results in one
1098 extra microop on 64bit SSE units. Experimental results shows that disabling
1099 this option on P4 brings over 20% SPECfp regression, while enabling it on
1100 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1101 of moves. */
1102 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1103 | m_GENERIC | m_AMDFAM10;
1104 /* Set for machines where the type and dependencies are resolved on SSE
1105 register parts instead of whole registers, so we may maintain just
1106 lower part of scalar values in proper format leaving the upper part
1107 undefined. */
1108 const int x86_sse_split_regs = m_ATHLON_K8;
1109 /* Code generation for scalar reg-reg moves of single and double precision data:
1110 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1111 movaps reg, reg
1112 else
1113 movss reg, reg
1114 if (x86_sse_partial_reg_dependency == true)
1115 movapd reg, reg
1116 else
1117 movsd reg, reg
1118
1119 Code generation for scalar loads of double precision data:
1120 if (x86_sse_split_regs == true)
1121 movlpd mem, reg (gas syntax)
1122 else
1123 movsd mem, reg
1124
1125 Code generation for unaligned packed loads of single precision data
1126 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1127 if (x86_sse_unaligned_move_optimal)
1128 movups mem, reg
1129
1130 if (x86_sse_partial_reg_dependency == true)
1131 {
1132 xorps reg, reg
1133 movlps mem, reg
1134 movhps mem+8, reg
1135 }
1136 else
1137 {
1138 movlps mem, reg
1139 movhps mem+8, reg
1140 }
1141
1142 Code generation for unaligned packed loads of double precision data
1143 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1144 if (x86_sse_unaligned_move_optimal)
1145 movupd mem, reg
1146
1147 if (x86_sse_split_regs == true)
1148 {
1149 movlpd mem, reg
1150 movhpd mem+8, reg
1151 }
1152 else
1153 {
1154 movsd mem, reg
1155 movhpd mem+8, reg
1156 }
1157 */
1158 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1159 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1160 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1161 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1162 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1163
1164 const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
1165
1166 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1167 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1168 /* Some CPU cores are not able to predict more than 4 branch instructions in
1169 the 16 byte window. */
1170 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1171 | m_NOCONA | m_CORE2 | m_GENERIC;
1172 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1173 | m_CORE2 | m_GENERIC;
1174 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1175 /* Compare and exchange was added for 80486. */
1176 const int x86_cmpxchg = ~m_386;
1177 /* Compare and exchange 8 bytes was added for pentium. */
1178 const int x86_cmpxchg8b = ~(m_386 | m_486);
1179 /* Exchange and add was added for 80486. */
1180 const int x86_xadd = ~m_386;
1181 /* Byteswap was added for 80486. */
1182 const int x86_bswap = ~m_386;
1183 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1184
1185 static enum stringop_alg stringop_alg = no_stringop;
1186
1187 /* In case the average insn count for single function invocation is
1188 lower than this constant, emit fast (but longer) prologue and
1189 epilogue code. */
1190 #define FAST_PROLOGUE_INSN_COUNT 20
1191
1192 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1193 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1194 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1195 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1196
1197 /* Array of the smallest class containing reg number REGNO, indexed by
1198 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1199
1200 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1201 {
1202 /* ax, dx, cx, bx */
1203 AREG, DREG, CREG, BREG,
1204 /* si, di, bp, sp */
1205 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1206 /* FP registers */
1207 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1208 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1209 /* arg pointer */
1210 NON_Q_REGS,
1211 /* flags, fpsr, fpcr, frame */
1212 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1213 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1214 SSE_REGS, SSE_REGS,
1215 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1216 MMX_REGS, MMX_REGS,
1217 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1218 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1219 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1220 SSE_REGS, SSE_REGS,
1221 };
1222
1223 /* The "default" register map used in 32bit mode. */
1224
1225 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1226 {
1227 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1228 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1229 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1230 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1231 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1232 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1233 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1234 };
1235
1236 static int const x86_64_int_parameter_registers[6] =
1237 {
1238 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1239 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1240 };
1241
1242 static int const x86_64_int_return_registers[4] =
1243 {
1244 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1245 };
1246
1247 /* The "default" register map used in 64bit mode. */
1248 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1249 {
1250 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1251 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1252 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1253 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1254 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1255 8,9,10,11,12,13,14,15, /* extended integer registers */
1256 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1257 };
1258
1259 /* Define the register numbers to be used in Dwarf debugging information.
1260 The SVR4 reference port C compiler uses the following register numbers
1261 in its Dwarf output code:
1262 0 for %eax (gcc regno = 0)
1263 1 for %ecx (gcc regno = 2)
1264 2 for %edx (gcc regno = 1)
1265 3 for %ebx (gcc regno = 3)
1266 4 for %esp (gcc regno = 7)
1267 5 for %ebp (gcc regno = 6)
1268 6 for %esi (gcc regno = 4)
1269 7 for %edi (gcc regno = 5)
1270 The following three DWARF register numbers are never generated by
1271 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1272 believes these numbers have these meanings.
1273 8 for %eip (no gcc equivalent)
1274 9 for %eflags (gcc regno = 17)
1275 10 for %trapno (no gcc equivalent)
1276 It is not at all clear how we should number the FP stack registers
1277 for the x86 architecture. If the version of SDB on x86/svr4 were
1278 a bit less brain dead with respect to floating-point then we would
1279 have a precedent to follow with respect to DWARF register numbers
1280 for x86 FP registers, but the SDB on x86/svr4 is so completely
1281 broken with respect to FP registers that it is hardly worth thinking
1282 of it as something to strive for compatibility with.
1283 The version of x86/svr4 SDB I have at the moment does (partially)
1284 seem to believe that DWARF register number 11 is associated with
1285 the x86 register %st(0), but that's about all. Higher DWARF
1286 register numbers don't seem to be associated with anything in
1287 particular, and even for DWARF regno 11, SDB only seems to under-
1288 stand that it should say that a variable lives in %st(0) (when
1289 asked via an `=' command) if we said it was in DWARF regno 11,
1290 but SDB still prints garbage when asked for the value of the
1291 variable in question (via a `/' command).
1292 (Also note that the labels SDB prints for various FP stack regs
1293 when doing an `x' command are all wrong.)
1294 Note that these problems generally don't affect the native SVR4
1295 C compiler because it doesn't allow the use of -O with -g and
1296 because when it is *not* optimizing, it allocates a memory
1297 location for each floating-point variable, and the memory
1298 location is what gets described in the DWARF AT_location
1299 attribute for the variable in question.
1300 Regardless of the severe mental illness of the x86/svr4 SDB, we
1301 do something sensible here and we use the following DWARF
1302 register numbers. Note that these are all stack-top-relative
1303 numbers.
1304 11 for %st(0) (gcc regno = 8)
1305 12 for %st(1) (gcc regno = 9)
1306 13 for %st(2) (gcc regno = 10)
1307 14 for %st(3) (gcc regno = 11)
1308 15 for %st(4) (gcc regno = 12)
1309 16 for %st(5) (gcc regno = 13)
1310 17 for %st(6) (gcc regno = 14)
1311 18 for %st(7) (gcc regno = 15)
1312 */
1313 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1314 {
1315 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1316 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1317 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1319 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1320 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1321 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1322 };
1323
1324 /* Test and compare insns in i386.md store the information needed to
1325 generate branch and scc insns here. */
1326
1327 rtx ix86_compare_op0 = NULL_RTX;
1328 rtx ix86_compare_op1 = NULL_RTX;
1329 rtx ix86_compare_emitted = NULL_RTX;
1330
1331 /* Size of the register save area. */
1332 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1333
1334 /* Define the structure for the machine field in struct function. */
1335
1336 struct stack_local_entry GTY(())
1337 {
1338 unsigned short mode;
1339 unsigned short n;
1340 rtx rtl;
1341 struct stack_local_entry *next;
1342 };
1343
1344 /* Structure describing stack frame layout.
1345 Stack grows downward:
1346
1347 [arguments]
1348 <- ARG_POINTER
1349 saved pc
1350
1351 saved frame pointer if frame_pointer_needed
1352 <- HARD_FRAME_POINTER
1353 [saved regs]
1354
1355 [padding1] \
1356 )
1357 [va_arg registers] (
1358 > to_allocate <- FRAME_POINTER
1359 [frame] (
1360 )
1361 [padding2] /
1362 */
1363 struct ix86_frame
1364 {
1365 int nregs;
1366 int padding1;
1367 int va_arg_size;
1368 HOST_WIDE_INT frame;
1369 int padding2;
1370 int outgoing_arguments_size;
1371 int red_zone_size;
1372
1373 HOST_WIDE_INT to_allocate;
1374 /* The offsets relative to ARG_POINTER. */
1375 HOST_WIDE_INT frame_pointer_offset;
1376 HOST_WIDE_INT hard_frame_pointer_offset;
1377 HOST_WIDE_INT stack_pointer_offset;
1378
1379 /* When save_regs_using_mov is set, emit prologue using
1380 move instead of push instructions. */
1381 bool save_regs_using_mov;
1382 };
1383
1384 /* Code model option. */
1385 enum cmodel ix86_cmodel;
1386 /* Asm dialect. */
1387 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1388 /* TLS dialects. */
1389 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1390
1391 /* Which unit we are generating floating point math for. */
1392 enum fpmath_unit ix86_fpmath;
1393
1394 /* Which cpu are we scheduling for. */
1395 enum processor_type ix86_tune;
1396 /* Which instruction set architecture to use. */
1397 enum processor_type ix86_arch;
1398
1399 /* true if sse prefetch instruction is not NOOP. */
1400 int x86_prefetch_sse;
1401
1402 /* true if cmpxchg16b is supported. */
1403 int x86_cmpxchg16b;
1404
1405 /* ix86_regparm_string as a number */
1406 static int ix86_regparm;
1407
1408 /* -mstackrealign option */
1409 extern int ix86_force_align_arg_pointer;
1410 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1411
1412 /* Preferred alignment for stack boundary in bits. */
1413 unsigned int ix86_preferred_stack_boundary;
1414
1415 /* Values 1-5: see jump.c */
1416 int ix86_branch_cost;
1417
1418 /* Variables which are this size or smaller are put in the data/bss
1419 or ldata/lbss sections. */
1420
1421 int ix86_section_threshold = 65536;
1422
1423 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1424 char internal_label_prefix[16];
1425 int internal_label_prefix_len;
1426 \f
1427 static bool ix86_handle_option (size_t, const char *, int);
1428 static void output_pic_addr_const (FILE *, rtx, int);
1429 static void put_condition_code (enum rtx_code, enum machine_mode,
1430 int, int, FILE *);
1431 static const char *get_some_local_dynamic_name (void);
1432 static int get_some_local_dynamic_name_1 (rtx *, void *);
1433 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1434 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1435 rtx *);
1436 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1437 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1438 enum machine_mode);
1439 static rtx get_thread_pointer (int);
1440 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1441 static void get_pc_thunk_name (char [32], unsigned int);
1442 static rtx gen_push (rtx);
1443 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1444 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1445 static struct machine_function * ix86_init_machine_status (void);
1446 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1447 static int ix86_nsaved_regs (void);
1448 static void ix86_emit_save_regs (void);
1449 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1450 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1451 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1452 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1453 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1454 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1455 static int ix86_issue_rate (void);
1456 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1457 static int ia32_multipass_dfa_lookahead (void);
1458 static void ix86_init_mmx_sse_builtins (void);
1459 static rtx x86_this_parameter (tree);
1460 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1461 HOST_WIDE_INT, tree);
1462 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1463 static void x86_file_start (void);
1464 static void ix86_reorg (void);
1465 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1466 static tree ix86_build_builtin_va_list (void);
1467 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1468 tree, int *, int);
1469 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1470 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1471 static bool ix86_vector_mode_supported_p (enum machine_mode);
1472
1473 static int ix86_address_cost (rtx);
1474 static bool ix86_cannot_force_const_mem (rtx);
1475 static rtx ix86_delegitimize_address (rtx);
1476
1477 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1478
1479 struct builtin_description;
1480 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1481 tree, rtx);
1482 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1483 tree, rtx);
1484 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1485 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1486 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1487 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1488 static rtx safe_vector_operand (rtx, enum machine_mode);
1489 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1490 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1491 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1492 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1493 static int ix86_fp_comparison_cost (enum rtx_code code);
1494 static unsigned int ix86_select_alt_pic_regnum (void);
1495 static int ix86_save_reg (unsigned int, int);
1496 static void ix86_compute_frame_layout (struct ix86_frame *);
1497 static int ix86_comp_type_attributes (tree, tree);
1498 static int ix86_function_regparm (tree, tree);
1499 const struct attribute_spec ix86_attribute_table[];
1500 static bool ix86_function_ok_for_sibcall (tree, tree);
1501 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1502 static int ix86_value_regno (enum machine_mode, tree, tree);
1503 static bool contains_128bit_aligned_vector_p (tree);
1504 static rtx ix86_struct_value_rtx (tree, int);
1505 static bool ix86_ms_bitfield_layout_p (tree);
1506 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1507 static int extended_reg_mentioned_1 (rtx *, void *);
1508 static bool ix86_rtx_costs (rtx, int, int, int *);
1509 static int min_insn_size (rtx);
1510 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1511 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1512 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1513 tree, bool);
1514 static void ix86_init_builtins (void);
1515 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1516 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1517 static tree ix86_builtin_conversion (enum tree_code, tree);
1518 static const char *ix86_mangle_fundamental_type (tree);
1519 static tree ix86_stack_protect_fail (void);
1520 static rtx ix86_internal_arg_pointer (void);
1521 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1522 static rtx ix86_build_const_vector (enum machine_mode, bool, rtx);
1523 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1524 rtx, rtx, int);
1525
1526 /* This function is only used on Solaris. */
1527 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1528 ATTRIBUTE_UNUSED;
1529
1530 /* Register class used for passing given 64bit part of the argument.
1531 These represent classes as documented by the PS ABI, with the exception
1532 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1533 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1534
1535 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1536 whenever possible (upper half does contain padding).
1537 */
1538 enum x86_64_reg_class
1539 {
1540 X86_64_NO_CLASS,
1541 X86_64_INTEGER_CLASS,
1542 X86_64_INTEGERSI_CLASS,
1543 X86_64_SSE_CLASS,
1544 X86_64_SSESF_CLASS,
1545 X86_64_SSEDF_CLASS,
1546 X86_64_SSEUP_CLASS,
1547 X86_64_X87_CLASS,
1548 X86_64_X87UP_CLASS,
1549 X86_64_COMPLEX_X87_CLASS,
1550 X86_64_MEMORY_CLASS
1551 };
1552 static const char * const x86_64_reg_class_name[] = {
1553 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1554 "sseup", "x87", "x87up", "cplx87", "no"
1555 };
1556
1557 #define MAX_CLASSES 4
1558
1559 /* Table of constants used by fldpi, fldln2, etc.... */
1560 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1561 static bool ext_80387_constants_init = 0;
1562 static void init_ext_80387_constants (void);
1563 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1564 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1565 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1566 static section *x86_64_elf_select_section (tree decl, int reloc,
1567 unsigned HOST_WIDE_INT align)
1568 ATTRIBUTE_UNUSED;
1569 \f
1570 /* Initialize the GCC target structure. */
1571 #undef TARGET_ATTRIBUTE_TABLE
1572 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1573 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1574 # undef TARGET_MERGE_DECL_ATTRIBUTES
1575 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1576 #endif
1577
1578 #undef TARGET_COMP_TYPE_ATTRIBUTES
1579 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1580
1581 #undef TARGET_INIT_BUILTINS
1582 #define TARGET_INIT_BUILTINS ix86_init_builtins
1583 #undef TARGET_EXPAND_BUILTIN
1584 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1585
1586 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1587 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1588 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1589 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1590
1591 #undef TARGET_ASM_FUNCTION_EPILOGUE
1592 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1593
1594 #undef TARGET_ENCODE_SECTION_INFO
1595 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1596 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1597 #else
1598 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1599 #endif
1600
1601 #undef TARGET_ASM_OPEN_PAREN
1602 #define TARGET_ASM_OPEN_PAREN ""
1603 #undef TARGET_ASM_CLOSE_PAREN
1604 #define TARGET_ASM_CLOSE_PAREN ""
1605
1606 #undef TARGET_ASM_ALIGNED_HI_OP
1607 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1608 #undef TARGET_ASM_ALIGNED_SI_OP
1609 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1610 #ifdef ASM_QUAD
1611 #undef TARGET_ASM_ALIGNED_DI_OP
1612 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1613 #endif
1614
1615 #undef TARGET_ASM_UNALIGNED_HI_OP
1616 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1617 #undef TARGET_ASM_UNALIGNED_SI_OP
1618 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1619 #undef TARGET_ASM_UNALIGNED_DI_OP
1620 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1621
1622 #undef TARGET_SCHED_ADJUST_COST
1623 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1624 #undef TARGET_SCHED_ISSUE_RATE
1625 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1626 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1627 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1628 ia32_multipass_dfa_lookahead
1629
1630 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1631 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1632
1633 #ifdef HAVE_AS_TLS
1634 #undef TARGET_HAVE_TLS
1635 #define TARGET_HAVE_TLS true
1636 #endif
1637 #undef TARGET_CANNOT_FORCE_CONST_MEM
1638 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1639 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1640 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1641
1642 #undef TARGET_DELEGITIMIZE_ADDRESS
1643 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1644
1645 #undef TARGET_MS_BITFIELD_LAYOUT_P
1646 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1647
1648 #if TARGET_MACHO
1649 #undef TARGET_BINDS_LOCAL_P
1650 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1651 #endif
1652
1653 #undef TARGET_ASM_OUTPUT_MI_THUNK
1654 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1655 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1656 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1657
1658 #undef TARGET_ASM_FILE_START
1659 #define TARGET_ASM_FILE_START x86_file_start
1660
1661 #undef TARGET_DEFAULT_TARGET_FLAGS
1662 #define TARGET_DEFAULT_TARGET_FLAGS \
1663 (TARGET_DEFAULT \
1664 | TARGET_64BIT_DEFAULT \
1665 | TARGET_SUBTARGET_DEFAULT \
1666 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1667
1668 #undef TARGET_HANDLE_OPTION
1669 #define TARGET_HANDLE_OPTION ix86_handle_option
1670
1671 #undef TARGET_RTX_COSTS
1672 #define TARGET_RTX_COSTS ix86_rtx_costs
1673 #undef TARGET_ADDRESS_COST
1674 #define TARGET_ADDRESS_COST ix86_address_cost
1675
1676 #undef TARGET_FIXED_CONDITION_CODE_REGS
1677 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1678 #undef TARGET_CC_MODES_COMPATIBLE
1679 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1680
1681 #undef TARGET_MACHINE_DEPENDENT_REORG
1682 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1683
1684 #undef TARGET_BUILD_BUILTIN_VA_LIST
1685 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1686
1687 #undef TARGET_MD_ASM_CLOBBERS
1688 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1689
1690 #undef TARGET_PROMOTE_PROTOTYPES
1691 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1692 #undef TARGET_STRUCT_VALUE_RTX
1693 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1694 #undef TARGET_SETUP_INCOMING_VARARGS
1695 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1696 #undef TARGET_MUST_PASS_IN_STACK
1697 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1698 #undef TARGET_PASS_BY_REFERENCE
1699 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1700 #undef TARGET_INTERNAL_ARG_POINTER
1701 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1702 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1703 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1704
1705 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1706 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1707
1708 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1709 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1710
1711 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1712 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1713
1714 #ifdef HAVE_AS_TLS
1715 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1716 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1717 #endif
1718
1719 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1720 #undef TARGET_INSERT_ATTRIBUTES
1721 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1722 #endif
1723
1724 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1725 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1726
1727 #undef TARGET_STACK_PROTECT_FAIL
1728 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1729
1730 #undef TARGET_FUNCTION_VALUE
1731 #define TARGET_FUNCTION_VALUE ix86_function_value
1732
1733 struct gcc_target targetm = TARGET_INITIALIZER;
1734
1735 \f
1736 /* The svr4 ABI for the i386 says that records and unions are returned
1737 in memory. */
1738 #ifndef DEFAULT_PCC_STRUCT_RETURN
1739 #define DEFAULT_PCC_STRUCT_RETURN 1
1740 #endif
1741
1742 /* Implement TARGET_HANDLE_OPTION. */
1743
1744 static bool
1745 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1746 {
1747 switch (code)
1748 {
1749 case OPT_m3dnow:
1750 if (!value)
1751 {
1752 target_flags &= ~MASK_3DNOW_A;
1753 target_flags_explicit |= MASK_3DNOW_A;
1754 }
1755 return true;
1756
1757 case OPT_mmmx:
1758 if (!value)
1759 {
1760 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1761 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1762 }
1763 return true;
1764
1765 case OPT_msse:
1766 if (!value)
1767 {
1768 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1769 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1770 }
1771 return true;
1772
1773 case OPT_msse2:
1774 if (!value)
1775 {
1776 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1777 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1778 }
1779 return true;
1780
1781 case OPT_msse3:
1782 if (!value)
1783 {
1784 target_flags &= ~MASK_SSE4A;
1785 target_flags_explicit |= MASK_SSE4A;
1786 }
1787 return true;
1788
1789 default:
1790 return true;
1791 }
1792 }
1793
1794 /* Sometimes certain combinations of command options do not make
1795 sense on a particular target machine. You can define a macro
1796 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1797 defined, is executed once just after all the command options have
1798 been parsed.
1799
1800 Don't use this macro to turn on various extra optimizations for
1801 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1802
1803 void
1804 override_options (void)
1805 {
1806 int i;
1807 int ix86_tune_defaulted = 0;
1808
1809 /* Comes from final.c -- no real reason to change it. */
1810 #define MAX_CODE_ALIGN 16
1811
1812 static struct ptt
1813 {
1814 const struct processor_costs *cost; /* Processor costs */
1815 const int target_enable; /* Target flags to enable. */
1816 const int target_disable; /* Target flags to disable. */
1817 const int align_loop; /* Default alignments. */
1818 const int align_loop_max_skip;
1819 const int align_jump;
1820 const int align_jump_max_skip;
1821 const int align_func;
1822 }
1823 const processor_target_table[PROCESSOR_max] =
1824 {
1825 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1826 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1827 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1828 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1829 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1830 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1831 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1832 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1833 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1834 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1835 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1836 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1837 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1838 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1839 };
1840
1841 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1842 static struct pta
1843 {
1844 const char *const name; /* processor name or nickname. */
1845 const enum processor_type processor;
1846 const enum pta_flags
1847 {
1848 PTA_SSE = 1,
1849 PTA_SSE2 = 2,
1850 PTA_SSE3 = 4,
1851 PTA_MMX = 8,
1852 PTA_PREFETCH_SSE = 16,
1853 PTA_3DNOW = 32,
1854 PTA_3DNOW_A = 64,
1855 PTA_64BIT = 128,
1856 PTA_SSSE3 = 256,
1857 PTA_CX16 = 512,
1858 PTA_POPCNT = 1024,
1859 PTA_ABM = 2048,
1860 PTA_SSE4A = 4096
1861 } flags;
1862 }
1863 const processor_alias_table[] =
1864 {
1865 {"i386", PROCESSOR_I386, 0},
1866 {"i486", PROCESSOR_I486, 0},
1867 {"i586", PROCESSOR_PENTIUM, 0},
1868 {"pentium", PROCESSOR_PENTIUM, 0},
1869 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1870 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1871 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1872 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1873 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1874 {"i686", PROCESSOR_PENTIUMPRO, 0},
1875 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1876 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1877 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1878 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1879 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1880 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1881 | PTA_MMX | PTA_PREFETCH_SSE},
1882 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1883 | PTA_MMX | PTA_PREFETCH_SSE},
1884 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1885 | PTA_MMX | PTA_PREFETCH_SSE},
1886 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1887 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1888 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1889 | PTA_64BIT | PTA_MMX
1890 | PTA_PREFETCH_SSE | PTA_CX16},
1891 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1892 | PTA_3DNOW_A},
1893 {"k6", PROCESSOR_K6, PTA_MMX},
1894 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1895 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1896 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1897 | PTA_3DNOW_A},
1898 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1899 | PTA_3DNOW | PTA_3DNOW_A},
1900 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1901 | PTA_3DNOW_A | PTA_SSE},
1902 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1903 | PTA_3DNOW_A | PTA_SSE},
1904 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1905 | PTA_3DNOW_A | PTA_SSE},
1906 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1907 | PTA_SSE | PTA_SSE2 },
1908 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1909 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1910 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1911 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1912 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1913 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1914 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1915 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1916 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1917 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1918 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1919 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1920 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1921 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1922 };
1923
1924 int const pta_size = ARRAY_SIZE (processor_alias_table);
1925
1926 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1927 SUBTARGET_OVERRIDE_OPTIONS;
1928 #endif
1929
1930 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1931 SUBSUBTARGET_OVERRIDE_OPTIONS;
1932 #endif
1933
1934 /* -fPIC is the default for x86_64. */
1935 if (TARGET_MACHO && TARGET_64BIT)
1936 flag_pic = 2;
1937
1938 /* Set the default values for switches whose default depends on TARGET_64BIT
1939 in case they weren't overwritten by command line options. */
1940 if (TARGET_64BIT)
1941 {
1942 /* Mach-O doesn't support omitting the frame pointer for now. */
1943 if (flag_omit_frame_pointer == 2)
1944 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1945 if (flag_asynchronous_unwind_tables == 2)
1946 flag_asynchronous_unwind_tables = 1;
1947 if (flag_pcc_struct_return == 2)
1948 flag_pcc_struct_return = 0;
1949 }
1950 else
1951 {
1952 if (flag_omit_frame_pointer == 2)
1953 flag_omit_frame_pointer = 0;
1954 if (flag_asynchronous_unwind_tables == 2)
1955 flag_asynchronous_unwind_tables = 0;
1956 if (flag_pcc_struct_return == 2)
1957 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1958 }
1959
1960 /* Need to check -mtune=generic first. */
1961 if (ix86_tune_string)
1962 {
1963 if (!strcmp (ix86_tune_string, "generic")
1964 || !strcmp (ix86_tune_string, "i686")
1965 /* As special support for cross compilers we read -mtune=native
1966 as -mtune=generic. With native compilers we won't see the
1967 -mtune=native, as it was changed by the driver. */
1968 || !strcmp (ix86_tune_string, "native"))
1969 {
1970 if (TARGET_64BIT)
1971 ix86_tune_string = "generic64";
1972 else
1973 ix86_tune_string = "generic32";
1974 }
1975 else if (!strncmp (ix86_tune_string, "generic", 7))
1976 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1977 }
1978 else
1979 {
1980 if (ix86_arch_string)
1981 ix86_tune_string = ix86_arch_string;
1982 if (!ix86_tune_string)
1983 {
1984 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1985 ix86_tune_defaulted = 1;
1986 }
1987
1988 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1989 need to use a sensible tune option. */
1990 if (!strcmp (ix86_tune_string, "generic")
1991 || !strcmp (ix86_tune_string, "x86-64")
1992 || !strcmp (ix86_tune_string, "i686"))
1993 {
1994 if (TARGET_64BIT)
1995 ix86_tune_string = "generic64";
1996 else
1997 ix86_tune_string = "generic32";
1998 }
1999 }
2000 if (ix86_stringop_string)
2001 {
2002 if (!strcmp (ix86_stringop_string, "rep_byte"))
2003 stringop_alg = rep_prefix_1_byte;
2004 else if (!strcmp (ix86_stringop_string, "libcall"))
2005 stringop_alg = libcall;
2006 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2007 stringop_alg = rep_prefix_4_byte;
2008 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2009 stringop_alg = rep_prefix_8_byte;
2010 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2011 stringop_alg = loop_1_byte;
2012 else if (!strcmp (ix86_stringop_string, "loop"))
2013 stringop_alg = loop;
2014 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2015 stringop_alg = unrolled_loop;
2016 else
2017 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2018 }
2019 if (!strcmp (ix86_tune_string, "x86-64"))
2020 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2021 "-mtune=generic instead as appropriate.");
2022
2023 if (!ix86_arch_string)
2024 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2025 if (!strcmp (ix86_arch_string, "generic"))
2026 error ("generic CPU can be used only for -mtune= switch");
2027 if (!strncmp (ix86_arch_string, "generic", 7))
2028 error ("bad value (%s) for -march= switch", ix86_arch_string);
2029
2030 if (ix86_cmodel_string != 0)
2031 {
2032 if (!strcmp (ix86_cmodel_string, "small"))
2033 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2034 else if (!strcmp (ix86_cmodel_string, "medium"))
2035 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2036 else if (flag_pic)
2037 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
2038 else if (!strcmp (ix86_cmodel_string, "32"))
2039 ix86_cmodel = CM_32;
2040 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2041 ix86_cmodel = CM_KERNEL;
2042 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
2043 ix86_cmodel = CM_LARGE;
2044 else
2045 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2046 }
2047 else
2048 {
2049 ix86_cmodel = CM_32;
2050 if (TARGET_64BIT)
2051 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2052 }
2053 if (ix86_asm_string != 0)
2054 {
2055 if (! TARGET_MACHO
2056 && !strcmp (ix86_asm_string, "intel"))
2057 ix86_asm_dialect = ASM_INTEL;
2058 else if (!strcmp (ix86_asm_string, "att"))
2059 ix86_asm_dialect = ASM_ATT;
2060 else
2061 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2062 }
2063 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2064 error ("code model %qs not supported in the %s bit mode",
2065 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2066 if (ix86_cmodel == CM_LARGE)
2067 sorry ("code model %<large%> not supported yet");
2068 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2069 sorry ("%i-bit mode not compiled in",
2070 (target_flags & MASK_64BIT) ? 64 : 32);
2071
2072 for (i = 0; i < pta_size; i++)
2073 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2074 {
2075 ix86_arch = processor_alias_table[i].processor;
2076 /* Default cpu tuning to the architecture. */
2077 ix86_tune = ix86_arch;
2078 if (processor_alias_table[i].flags & PTA_MMX
2079 && !(target_flags_explicit & MASK_MMX))
2080 target_flags |= MASK_MMX;
2081 if (processor_alias_table[i].flags & PTA_3DNOW
2082 && !(target_flags_explicit & MASK_3DNOW))
2083 target_flags |= MASK_3DNOW;
2084 if (processor_alias_table[i].flags & PTA_3DNOW_A
2085 && !(target_flags_explicit & MASK_3DNOW_A))
2086 target_flags |= MASK_3DNOW_A;
2087 if (processor_alias_table[i].flags & PTA_SSE
2088 && !(target_flags_explicit & MASK_SSE))
2089 target_flags |= MASK_SSE;
2090 if (processor_alias_table[i].flags & PTA_SSE2
2091 && !(target_flags_explicit & MASK_SSE2))
2092 target_flags |= MASK_SSE2;
2093 if (processor_alias_table[i].flags & PTA_SSE3
2094 && !(target_flags_explicit & MASK_SSE3))
2095 target_flags |= MASK_SSE3;
2096 if (processor_alias_table[i].flags & PTA_SSSE3
2097 && !(target_flags_explicit & MASK_SSSE3))
2098 target_flags |= MASK_SSSE3;
2099 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2100 x86_prefetch_sse = true;
2101 if (processor_alias_table[i].flags & PTA_CX16)
2102 x86_cmpxchg16b = true;
2103 if (processor_alias_table[i].flags & PTA_POPCNT
2104 && !(target_flags_explicit & MASK_POPCNT))
2105 target_flags |= MASK_POPCNT;
2106 if (processor_alias_table[i].flags & PTA_ABM
2107 && !(target_flags_explicit & MASK_ABM))
2108 target_flags |= MASK_ABM;
2109 if (processor_alias_table[i].flags & PTA_SSE4A
2110 && !(target_flags_explicit & MASK_SSE4A))
2111 target_flags |= MASK_SSE4A;
2112 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2113 error ("CPU you selected does not support x86-64 "
2114 "instruction set");
2115 break;
2116 }
2117
2118 if (i == pta_size)
2119 error ("bad value (%s) for -march= switch", ix86_arch_string);
2120
2121 for (i = 0; i < pta_size; i++)
2122 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2123 {
2124 ix86_tune = processor_alias_table[i].processor;
2125 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2126 {
2127 if (ix86_tune_defaulted)
2128 {
2129 ix86_tune_string = "x86-64";
2130 for (i = 0; i < pta_size; i++)
2131 if (! strcmp (ix86_tune_string,
2132 processor_alias_table[i].name))
2133 break;
2134 ix86_tune = processor_alias_table[i].processor;
2135 }
2136 else
2137 error ("CPU you selected does not support x86-64 "
2138 "instruction set");
2139 }
2140 /* Intel CPUs have always interpreted SSE prefetch instructions as
2141 NOPs; so, we can enable SSE prefetch instructions even when
2142 -mtune (rather than -march) points us to a processor that has them.
2143 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2144 higher processors. */
2145 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2146 x86_prefetch_sse = true;
2147 break;
2148 }
2149 if (i == pta_size)
2150 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2151
2152 if (optimize_size)
2153 ix86_cost = &size_cost;
2154 else
2155 ix86_cost = processor_target_table[ix86_tune].cost;
2156 target_flags |= processor_target_table[ix86_tune].target_enable;
2157 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2158
2159 /* Arrange to set up i386_stack_locals for all functions. */
2160 init_machine_status = ix86_init_machine_status;
2161
2162 /* Validate -mregparm= value. */
2163 if (ix86_regparm_string)
2164 {
2165 i = atoi (ix86_regparm_string);
2166 if (i < 0 || i > REGPARM_MAX)
2167 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2168 else
2169 ix86_regparm = i;
2170 }
2171 else
2172 if (TARGET_64BIT)
2173 ix86_regparm = REGPARM_MAX;
2174
2175 /* If the user has provided any of the -malign-* options,
2176 warn and use that value only if -falign-* is not set.
2177 Remove this code in GCC 3.2 or later. */
2178 if (ix86_align_loops_string)
2179 {
2180 warning (0, "-malign-loops is obsolete, use -falign-loops");
2181 if (align_loops == 0)
2182 {
2183 i = atoi (ix86_align_loops_string);
2184 if (i < 0 || i > MAX_CODE_ALIGN)
2185 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2186 else
2187 align_loops = 1 << i;
2188 }
2189 }
2190
2191 if (ix86_align_jumps_string)
2192 {
2193 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2194 if (align_jumps == 0)
2195 {
2196 i = atoi (ix86_align_jumps_string);
2197 if (i < 0 || i > MAX_CODE_ALIGN)
2198 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2199 else
2200 align_jumps = 1 << i;
2201 }
2202 }
2203
2204 if (ix86_align_funcs_string)
2205 {
2206 warning (0, "-malign-functions is obsolete, use -falign-functions");
2207 if (align_functions == 0)
2208 {
2209 i = atoi (ix86_align_funcs_string);
2210 if (i < 0 || i > MAX_CODE_ALIGN)
2211 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2212 else
2213 align_functions = 1 << i;
2214 }
2215 }
2216
2217 /* Default align_* from the processor table. */
2218 if (align_loops == 0)
2219 {
2220 align_loops = processor_target_table[ix86_tune].align_loop;
2221 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2222 }
2223 if (align_jumps == 0)
2224 {
2225 align_jumps = processor_target_table[ix86_tune].align_jump;
2226 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2227 }
2228 if (align_functions == 0)
2229 {
2230 align_functions = processor_target_table[ix86_tune].align_func;
2231 }
2232
2233 /* Validate -mbranch-cost= value, or provide default. */
2234 ix86_branch_cost = ix86_cost->branch_cost;
2235 if (ix86_branch_cost_string)
2236 {
2237 i = atoi (ix86_branch_cost_string);
2238 if (i < 0 || i > 5)
2239 error ("-mbranch-cost=%d is not between 0 and 5", i);
2240 else
2241 ix86_branch_cost = i;
2242 }
2243 if (ix86_section_threshold_string)
2244 {
2245 i = atoi (ix86_section_threshold_string);
2246 if (i < 0)
2247 error ("-mlarge-data-threshold=%d is negative", i);
2248 else
2249 ix86_section_threshold = i;
2250 }
2251
2252 if (ix86_tls_dialect_string)
2253 {
2254 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2255 ix86_tls_dialect = TLS_DIALECT_GNU;
2256 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2257 ix86_tls_dialect = TLS_DIALECT_GNU2;
2258 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2259 ix86_tls_dialect = TLS_DIALECT_SUN;
2260 else
2261 error ("bad value (%s) for -mtls-dialect= switch",
2262 ix86_tls_dialect_string);
2263 }
2264
2265 /* Keep nonleaf frame pointers. */
2266 if (flag_omit_frame_pointer)
2267 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2268 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2269 flag_omit_frame_pointer = 1;
2270
2271 /* If we're doing fast math, we don't care about comparison order
2272 wrt NaNs. This lets us use a shorter comparison sequence. */
2273 if (flag_finite_math_only)
2274 target_flags &= ~MASK_IEEE_FP;
2275
2276 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2277 since the insns won't need emulation. */
2278 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2279 target_flags &= ~MASK_NO_FANCY_MATH_387;
2280
2281 /* Likewise, if the target doesn't have a 387, or we've specified
2282 software floating point, don't use 387 inline intrinsics. */
2283 if (!TARGET_80387)
2284 target_flags |= MASK_NO_FANCY_MATH_387;
2285
2286 /* Turn on SSE3 builtins for -mssse3. */
2287 if (TARGET_SSSE3)
2288 target_flags |= MASK_SSE3;
2289
2290 /* Turn on SSE3 builtins for -msse4a. */
2291 if (TARGET_SSE4A)
2292 target_flags |= MASK_SSE3;
2293
2294 /* Turn on SSE2 builtins for -msse3. */
2295 if (TARGET_SSE3)
2296 target_flags |= MASK_SSE2;
2297
2298 /* Turn on SSE builtins for -msse2. */
2299 if (TARGET_SSE2)
2300 target_flags |= MASK_SSE;
2301
2302 /* Turn on MMX builtins for -msse. */
2303 if (TARGET_SSE)
2304 {
2305 target_flags |= MASK_MMX & ~target_flags_explicit;
2306 x86_prefetch_sse = true;
2307 }
2308
2309 /* Turn on MMX builtins for 3Dnow. */
2310 if (TARGET_3DNOW)
2311 target_flags |= MASK_MMX;
2312
2313 /* Turn on POPCNT builtins for -mabm. */
2314 if (TARGET_ABM)
2315 target_flags |= MASK_POPCNT;
2316
2317 if (TARGET_64BIT)
2318 {
2319 if (TARGET_ALIGN_DOUBLE)
2320 error ("-malign-double makes no sense in the 64bit mode");
2321 if (TARGET_RTD)
2322 error ("-mrtd calling convention not supported in the 64bit mode");
2323
2324 /* Enable by default the SSE and MMX builtins. Do allow the user to
2325 explicitly disable any of these. In particular, disabling SSE and
2326 MMX for kernel code is extremely useful. */
2327 target_flags
2328 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2329 & ~target_flags_explicit);
2330 }
2331 else
2332 {
2333 /* i386 ABI does not specify red zone. It still makes sense to use it
2334 when programmer takes care to stack from being destroyed. */
2335 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2336 target_flags |= MASK_NO_RED_ZONE;
2337 }
2338
2339 /* Validate -mpreferred-stack-boundary= value, or provide default.
2340 The default of 128 bits is for Pentium III's SSE __m128. We can't
2341 change it because of optimize_size. Otherwise, we can't mix object
2342 files compiled with -Os and -On. */
2343 ix86_preferred_stack_boundary = 128;
2344 if (ix86_preferred_stack_boundary_string)
2345 {
2346 i = atoi (ix86_preferred_stack_boundary_string);
2347 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2348 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2349 TARGET_64BIT ? 4 : 2);
2350 else
2351 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2352 }
2353
2354 /* Accept -msseregparm only if at least SSE support is enabled. */
2355 if (TARGET_SSEREGPARM
2356 && ! TARGET_SSE)
2357 error ("-msseregparm used without SSE enabled");
2358
2359 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2360
2361 if (ix86_fpmath_string != 0)
2362 {
2363 if (! strcmp (ix86_fpmath_string, "387"))
2364 ix86_fpmath = FPMATH_387;
2365 else if (! strcmp (ix86_fpmath_string, "sse"))
2366 {
2367 if (!TARGET_SSE)
2368 {
2369 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2370 ix86_fpmath = FPMATH_387;
2371 }
2372 else
2373 ix86_fpmath = FPMATH_SSE;
2374 }
2375 else if (! strcmp (ix86_fpmath_string, "387,sse")
2376 || ! strcmp (ix86_fpmath_string, "sse,387"))
2377 {
2378 if (!TARGET_SSE)
2379 {
2380 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2381 ix86_fpmath = FPMATH_387;
2382 }
2383 else if (!TARGET_80387)
2384 {
2385 warning (0, "387 instruction set disabled, using SSE arithmetics");
2386 ix86_fpmath = FPMATH_SSE;
2387 }
2388 else
2389 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2390 }
2391 else
2392 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2393 }
2394
2395 /* If the i387 is disabled, then do not return values in it. */
2396 if (!TARGET_80387)
2397 target_flags &= ~MASK_FLOAT_RETURNS;
2398
2399 if ((x86_accumulate_outgoing_args & TUNEMASK)
2400 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2401 && !optimize_size)
2402 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2403
2404 /* ??? Unwind info is not correct around the CFG unless either a frame
2405 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2406 unwind info generation to be aware of the CFG and propagating states
2407 around edges. */
2408 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2409 || flag_exceptions || flag_non_call_exceptions)
2410 && flag_omit_frame_pointer
2411 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2412 {
2413 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2414 warning (0, "unwind tables currently require either a frame pointer "
2415 "or -maccumulate-outgoing-args for correctness");
2416 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2417 }
2418
2419 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2420 {
2421 char *p;
2422 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2423 p = strchr (internal_label_prefix, 'X');
2424 internal_label_prefix_len = p - internal_label_prefix;
2425 *p = '\0';
2426 }
2427
2428 /* When scheduling description is not available, disable scheduler pass
2429 so it won't slow down the compilation and make x87 code slower. */
2430 if (!TARGET_SCHEDULE)
2431 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2432
2433 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2434 set_param_value ("simultaneous-prefetches",
2435 ix86_cost->simultaneous_prefetches);
2436 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2437 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2438 }
2439 \f
2440 /* switch to the appropriate section for output of DECL.
2441 DECL is either a `VAR_DECL' node or a constant of some sort.
2442 RELOC indicates whether forming the initial value of DECL requires
2443 link-time relocations. */
2444
2445 static section *
2446 x86_64_elf_select_section (tree decl, int reloc,
2447 unsigned HOST_WIDE_INT align)
2448 {
2449 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2450 && ix86_in_large_data_p (decl))
2451 {
2452 const char *sname = NULL;
2453 unsigned int flags = SECTION_WRITE;
2454 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2455 {
2456 case SECCAT_DATA:
2457 sname = ".ldata";
2458 break;
2459 case SECCAT_DATA_REL:
2460 sname = ".ldata.rel";
2461 break;
2462 case SECCAT_DATA_REL_LOCAL:
2463 sname = ".ldata.rel.local";
2464 break;
2465 case SECCAT_DATA_REL_RO:
2466 sname = ".ldata.rel.ro";
2467 break;
2468 case SECCAT_DATA_REL_RO_LOCAL:
2469 sname = ".ldata.rel.ro.local";
2470 break;
2471 case SECCAT_BSS:
2472 sname = ".lbss";
2473 flags |= SECTION_BSS;
2474 break;
2475 case SECCAT_RODATA:
2476 case SECCAT_RODATA_MERGE_STR:
2477 case SECCAT_RODATA_MERGE_STR_INIT:
2478 case SECCAT_RODATA_MERGE_CONST:
2479 sname = ".lrodata";
2480 flags = 0;
2481 break;
2482 case SECCAT_SRODATA:
2483 case SECCAT_SDATA:
2484 case SECCAT_SBSS:
2485 gcc_unreachable ();
2486 case SECCAT_TEXT:
2487 case SECCAT_TDATA:
2488 case SECCAT_TBSS:
2489 /* We don't split these for medium model. Place them into
2490 default sections and hope for best. */
2491 break;
2492 }
2493 if (sname)
2494 {
2495 /* We might get called with string constants, but get_named_section
2496 doesn't like them as they are not DECLs. Also, we need to set
2497 flags in that case. */
2498 if (!DECL_P (decl))
2499 return get_section (sname, flags, NULL);
2500 return get_named_section (decl, sname, reloc);
2501 }
2502 }
2503 return default_elf_select_section (decl, reloc, align);
2504 }
2505
2506 /* Build up a unique section name, expressed as a
2507 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2508 RELOC indicates whether the initial value of EXP requires
2509 link-time relocations. */
2510
2511 static void
2512 x86_64_elf_unique_section (tree decl, int reloc)
2513 {
2514 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2515 && ix86_in_large_data_p (decl))
2516 {
2517 const char *prefix = NULL;
2518 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2519 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2520
2521 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2522 {
2523 case SECCAT_DATA:
2524 case SECCAT_DATA_REL:
2525 case SECCAT_DATA_REL_LOCAL:
2526 case SECCAT_DATA_REL_RO:
2527 case SECCAT_DATA_REL_RO_LOCAL:
2528 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2529 break;
2530 case SECCAT_BSS:
2531 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2532 break;
2533 case SECCAT_RODATA:
2534 case SECCAT_RODATA_MERGE_STR:
2535 case SECCAT_RODATA_MERGE_STR_INIT:
2536 case SECCAT_RODATA_MERGE_CONST:
2537 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2538 break;
2539 case SECCAT_SRODATA:
2540 case SECCAT_SDATA:
2541 case SECCAT_SBSS:
2542 gcc_unreachable ();
2543 case SECCAT_TEXT:
2544 case SECCAT_TDATA:
2545 case SECCAT_TBSS:
2546 /* We don't split these for medium model. Place them into
2547 default sections and hope for best. */
2548 break;
2549 }
2550 if (prefix)
2551 {
2552 const char *name;
2553 size_t nlen, plen;
2554 char *string;
2555 plen = strlen (prefix);
2556
2557 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2558 name = targetm.strip_name_encoding (name);
2559 nlen = strlen (name);
2560
2561 string = alloca (nlen + plen + 1);
2562 memcpy (string, prefix, plen);
2563 memcpy (string + plen, name, nlen + 1);
2564
2565 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2566 return;
2567 }
2568 }
2569 default_unique_section (decl, reloc);
2570 }
2571
2572 #ifdef COMMON_ASM_OP
2573 /* This says how to output assembler code to declare an
2574 uninitialized external linkage data object.
2575
2576 For medium model x86-64 we need to use .largecomm opcode for
2577 large objects. */
2578 void
2579 x86_elf_aligned_common (FILE *file,
2580 const char *name, unsigned HOST_WIDE_INT size,
2581 int align)
2582 {
2583 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2584 && size > (unsigned int)ix86_section_threshold)
2585 fprintf (file, ".largecomm\t");
2586 else
2587 fprintf (file, "%s", COMMON_ASM_OP);
2588 assemble_name (file, name);
2589 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2590 size, align / BITS_PER_UNIT);
2591 }
2592 #endif
2593 /* Utility function for targets to use in implementing
2594 ASM_OUTPUT_ALIGNED_BSS. */
2595
2596 void
2597 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2598 const char *name, unsigned HOST_WIDE_INT size,
2599 int align)
2600 {
2601 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2602 && size > (unsigned int)ix86_section_threshold)
2603 switch_to_section (get_named_section (decl, ".lbss", 0));
2604 else
2605 switch_to_section (bss_section);
2606 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2607 #ifdef ASM_DECLARE_OBJECT_NAME
2608 last_assemble_variable_decl = decl;
2609 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2610 #else
2611 /* Standard thing is just output label for the object. */
2612 ASM_OUTPUT_LABEL (file, name);
2613 #endif /* ASM_DECLARE_OBJECT_NAME */
2614 ASM_OUTPUT_SKIP (file, size ? size : 1);
2615 }
2616 \f
2617 void
2618 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2619 {
2620 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2621 make the problem with not enough registers even worse. */
2622 #ifdef INSN_SCHEDULING
2623 if (level > 1)
2624 flag_schedule_insns = 0;
2625 #endif
2626
2627 if (TARGET_MACHO)
2628 /* The Darwin libraries never set errno, so we might as well
2629 avoid calling them when that's the only reason we would. */
2630 flag_errno_math = 0;
2631
2632 /* The default values of these switches depend on the TARGET_64BIT
2633 that is not known at this moment. Mark these values with 2 and
2634 let user the to override these. In case there is no command line option
2635 specifying them, we will set the defaults in override_options. */
2636 if (optimize >= 1)
2637 flag_omit_frame_pointer = 2;
2638 flag_pcc_struct_return = 2;
2639 flag_asynchronous_unwind_tables = 2;
2640 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2641 SUBTARGET_OPTIMIZATION_OPTIONS;
2642 #endif
2643 }
2644 \f
2645 /* Table of valid machine attributes. */
2646 const struct attribute_spec ix86_attribute_table[] =
2647 {
2648 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2649 /* Stdcall attribute says callee is responsible for popping arguments
2650 if they are not variable. */
2651 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2652 /* Fastcall attribute says callee is responsible for popping arguments
2653 if they are not variable. */
2654 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2655 /* Cdecl attribute says the callee is a normal C declaration */
2656 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2657 /* Regparm attribute specifies how many integer arguments are to be
2658 passed in registers. */
2659 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2660 /* Sseregparm attribute says we are using x86_64 calling conventions
2661 for FP arguments. */
2662 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2663 /* force_align_arg_pointer says this function realigns the stack at entry. */
2664 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2665 false, true, true, ix86_handle_cconv_attribute },
2666 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2667 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2668 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2669 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2670 #endif
2671 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2672 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2673 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2674 SUBTARGET_ATTRIBUTE_TABLE,
2675 #endif
2676 { NULL, 0, 0, false, false, false, NULL }
2677 };
2678
2679 /* Decide whether we can make a sibling call to a function. DECL is the
2680 declaration of the function being targeted by the call and EXP is the
2681 CALL_EXPR representing the call. */
2682
2683 static bool
2684 ix86_function_ok_for_sibcall (tree decl, tree exp)
2685 {
2686 tree func;
2687 rtx a, b;
2688
2689 /* If we are generating position-independent code, we cannot sibcall
2690 optimize any indirect call, or a direct call to a global function,
2691 as the PLT requires %ebx be live. */
2692 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2693 return false;
2694
2695 if (decl)
2696 func = decl;
2697 else
2698 {
2699 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2700 if (POINTER_TYPE_P (func))
2701 func = TREE_TYPE (func);
2702 }
2703
2704 /* Check that the return value locations are the same. Like
2705 if we are returning floats on the 80387 register stack, we cannot
2706 make a sibcall from a function that doesn't return a float to a
2707 function that does or, conversely, from a function that does return
2708 a float to a function that doesn't; the necessary stack adjustment
2709 would not be executed. This is also the place we notice
2710 differences in the return value ABI. Note that it is ok for one
2711 of the functions to have void return type as long as the return
2712 value of the other is passed in a register. */
2713 a = ix86_function_value (TREE_TYPE (exp), func, false);
2714 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2715 cfun->decl, false);
2716 if (STACK_REG_P (a) || STACK_REG_P (b))
2717 {
2718 if (!rtx_equal_p (a, b))
2719 return false;
2720 }
2721 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2722 ;
2723 else if (!rtx_equal_p (a, b))
2724 return false;
2725
2726 /* If this call is indirect, we'll need to be able to use a call-clobbered
2727 register for the address of the target function. Make sure that all
2728 such registers are not used for passing parameters. */
2729 if (!decl && !TARGET_64BIT)
2730 {
2731 tree type;
2732
2733 /* We're looking at the CALL_EXPR, we need the type of the function. */
2734 type = TREE_OPERAND (exp, 0); /* pointer expression */
2735 type = TREE_TYPE (type); /* pointer type */
2736 type = TREE_TYPE (type); /* function type */
2737
2738 if (ix86_function_regparm (type, NULL) >= 3)
2739 {
2740 /* ??? Need to count the actual number of registers to be used,
2741 not the possible number of registers. Fix later. */
2742 return false;
2743 }
2744 }
2745
2746 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2747 /* Dllimport'd functions are also called indirectly. */
2748 if (decl && DECL_DLLIMPORT_P (decl)
2749 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2750 return false;
2751 #endif
2752
2753 /* If we forced aligned the stack, then sibcalling would unalign the
2754 stack, which may break the called function. */
2755 if (cfun->machine->force_align_arg_pointer)
2756 return false;
2757
2758 /* Otherwise okay. That also includes certain types of indirect calls. */
2759 return true;
2760 }
2761
2762 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2763 calling convention attributes;
2764 arguments as in struct attribute_spec.handler. */
2765
2766 static tree
2767 ix86_handle_cconv_attribute (tree *node, tree name,
2768 tree args,
2769 int flags ATTRIBUTE_UNUSED,
2770 bool *no_add_attrs)
2771 {
2772 if (TREE_CODE (*node) != FUNCTION_TYPE
2773 && TREE_CODE (*node) != METHOD_TYPE
2774 && TREE_CODE (*node) != FIELD_DECL
2775 && TREE_CODE (*node) != TYPE_DECL)
2776 {
2777 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2778 IDENTIFIER_POINTER (name));
2779 *no_add_attrs = true;
2780 return NULL_TREE;
2781 }
2782
2783 /* Can combine regparm with all attributes but fastcall. */
2784 if (is_attribute_p ("regparm", name))
2785 {
2786 tree cst;
2787
2788 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2789 {
2790 error ("fastcall and regparm attributes are not compatible");
2791 }
2792
2793 cst = TREE_VALUE (args);
2794 if (TREE_CODE (cst) != INTEGER_CST)
2795 {
2796 warning (OPT_Wattributes,
2797 "%qs attribute requires an integer constant argument",
2798 IDENTIFIER_POINTER (name));
2799 *no_add_attrs = true;
2800 }
2801 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2802 {
2803 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2804 IDENTIFIER_POINTER (name), REGPARM_MAX);
2805 *no_add_attrs = true;
2806 }
2807
2808 if (!TARGET_64BIT
2809 && lookup_attribute (ix86_force_align_arg_pointer_string,
2810 TYPE_ATTRIBUTES (*node))
2811 && compare_tree_int (cst, REGPARM_MAX-1))
2812 {
2813 error ("%s functions limited to %d register parameters",
2814 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2815 }
2816
2817 return NULL_TREE;
2818 }
2819
2820 if (TARGET_64BIT)
2821 {
2822 warning (OPT_Wattributes, "%qs attribute ignored",
2823 IDENTIFIER_POINTER (name));
2824 *no_add_attrs = true;
2825 return NULL_TREE;
2826 }
2827
2828 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2829 if (is_attribute_p ("fastcall", name))
2830 {
2831 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2832 {
2833 error ("fastcall and cdecl attributes are not compatible");
2834 }
2835 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2836 {
2837 error ("fastcall and stdcall attributes are not compatible");
2838 }
2839 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2840 {
2841 error ("fastcall and regparm attributes are not compatible");
2842 }
2843 }
2844
2845 /* Can combine stdcall with fastcall (redundant), regparm and
2846 sseregparm. */
2847 else if (is_attribute_p ("stdcall", name))
2848 {
2849 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2850 {
2851 error ("stdcall and cdecl attributes are not compatible");
2852 }
2853 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2854 {
2855 error ("stdcall and fastcall attributes are not compatible");
2856 }
2857 }
2858
2859 /* Can combine cdecl with regparm and sseregparm. */
2860 else if (is_attribute_p ("cdecl", name))
2861 {
2862 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2863 {
2864 error ("stdcall and cdecl attributes are not compatible");
2865 }
2866 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2867 {
2868 error ("fastcall and cdecl attributes are not compatible");
2869 }
2870 }
2871
2872 /* Can combine sseregparm with all attributes. */
2873
2874 return NULL_TREE;
2875 }
2876
2877 /* Return 0 if the attributes for two types are incompatible, 1 if they
2878 are compatible, and 2 if they are nearly compatible (which causes a
2879 warning to be generated). */
2880
2881 static int
2882 ix86_comp_type_attributes (tree type1, tree type2)
2883 {
2884 /* Check for mismatch of non-default calling convention. */
2885 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2886
2887 if (TREE_CODE (type1) != FUNCTION_TYPE)
2888 return 1;
2889
2890 /* Check for mismatched fastcall/regparm types. */
2891 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2892 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2893 || (ix86_function_regparm (type1, NULL)
2894 != ix86_function_regparm (type2, NULL)))
2895 return 0;
2896
2897 /* Check for mismatched sseregparm types. */
2898 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2899 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2900 return 0;
2901
2902 /* Check for mismatched return types (cdecl vs stdcall). */
2903 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2904 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2905 return 0;
2906
2907 return 1;
2908 }
2909 \f
2910 /* Return the regparm value for a function with the indicated TYPE and DECL.
2911 DECL may be NULL when calling function indirectly
2912 or considering a libcall. */
2913
2914 static int
2915 ix86_function_regparm (tree type, tree decl)
2916 {
2917 tree attr;
2918 int regparm = ix86_regparm;
2919 bool user_convention = false;
2920
2921 if (!TARGET_64BIT)
2922 {
2923 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2924 if (attr)
2925 {
2926 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2927 user_convention = true;
2928 }
2929
2930 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2931 {
2932 regparm = 2;
2933 user_convention = true;
2934 }
2935
2936 /* Use register calling convention for local functions when possible. */
2937 if (!TARGET_64BIT && !user_convention && decl
2938 && flag_unit_at_a_time && !profile_flag)
2939 {
2940 struct cgraph_local_info *i = cgraph_local_info (decl);
2941 if (i && i->local)
2942 {
2943 int local_regparm, globals = 0, regno;
2944
2945 /* Make sure no regparm register is taken by a global register
2946 variable. */
2947 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2948 if (global_regs[local_regparm])
2949 break;
2950 /* We can't use regparm(3) for nested functions as these use
2951 static chain pointer in third argument. */
2952 if (local_regparm == 3
2953 && decl_function_context (decl)
2954 && !DECL_NO_STATIC_CHAIN (decl))
2955 local_regparm = 2;
2956 /* If the function realigns its stackpointer, the
2957 prologue will clobber %ecx. If we've already
2958 generated code for the callee, the callee
2959 DECL_STRUCT_FUNCTION is gone, so we fall back to
2960 scanning the attributes for the self-realigning
2961 property. */
2962 if ((DECL_STRUCT_FUNCTION (decl)
2963 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2964 || (!DECL_STRUCT_FUNCTION (decl)
2965 && lookup_attribute (ix86_force_align_arg_pointer_string,
2966 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2967 local_regparm = 2;
2968 /* Each global register variable increases register preassure,
2969 so the more global reg vars there are, the smaller regparm
2970 optimization use, unless requested by the user explicitly. */
2971 for (regno = 0; regno < 6; regno++)
2972 if (global_regs[regno])
2973 globals++;
2974 local_regparm
2975 = globals < local_regparm ? local_regparm - globals : 0;
2976
2977 if (local_regparm > regparm)
2978 regparm = local_regparm;
2979 }
2980 }
2981 }
2982 return regparm;
2983 }
2984
2985 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2986 DFmode (2) arguments in SSE registers for a function with the
2987 indicated TYPE and DECL. DECL may be NULL when calling function
2988 indirectly or considering a libcall. Otherwise return 0. */
2989
2990 static int
2991 ix86_function_sseregparm (tree type, tree decl)
2992 {
2993 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2994 by the sseregparm attribute. */
2995 if (TARGET_SSEREGPARM
2996 || (type
2997 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2998 {
2999 if (!TARGET_SSE)
3000 {
3001 if (decl)
3002 error ("Calling %qD with attribute sseregparm without "
3003 "SSE/SSE2 enabled", decl);
3004 else
3005 error ("Calling %qT with attribute sseregparm without "
3006 "SSE/SSE2 enabled", type);
3007 return 0;
3008 }
3009
3010 return 2;
3011 }
3012
3013 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3014 (and DFmode for SSE2) arguments in SSE registers,
3015 even for 32-bit targets. */
3016 if (!TARGET_64BIT && decl
3017 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3018 {
3019 struct cgraph_local_info *i = cgraph_local_info (decl);
3020 if (i && i->local)
3021 return TARGET_SSE2 ? 2 : 1;
3022 }
3023
3024 return 0;
3025 }
3026
3027 /* Return true if EAX is live at the start of the function. Used by
3028 ix86_expand_prologue to determine if we need special help before
3029 calling allocate_stack_worker. */
3030
3031 static bool
3032 ix86_eax_live_at_start_p (void)
3033 {
3034 /* Cheat. Don't bother working forward from ix86_function_regparm
3035 to the function type to whether an actual argument is located in
3036 eax. Instead just look at cfg info, which is still close enough
3037 to correct at this point. This gives false positives for broken
3038 functions that might use uninitialized data that happens to be
3039 allocated in eax, but who cares? */
3040 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3041 }
3042
3043 /* Value is the number of bytes of arguments automatically
3044 popped when returning from a subroutine call.
3045 FUNDECL is the declaration node of the function (as a tree),
3046 FUNTYPE is the data type of the function (as a tree),
3047 or for a library call it is an identifier node for the subroutine name.
3048 SIZE is the number of bytes of arguments passed on the stack.
3049
3050 On the 80386, the RTD insn may be used to pop them if the number
3051 of args is fixed, but if the number is variable then the caller
3052 must pop them all. RTD can't be used for library calls now
3053 because the library is compiled with the Unix compiler.
3054 Use of RTD is a selectable option, since it is incompatible with
3055 standard Unix calling sequences. If the option is not selected,
3056 the caller must always pop the args.
3057
3058 The attribute stdcall is equivalent to RTD on a per module basis. */
3059
3060 int
3061 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3062 {
3063 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3064
3065 /* Cdecl functions override -mrtd, and never pop the stack. */
3066 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3067
3068 /* Stdcall and fastcall functions will pop the stack if not
3069 variable args. */
3070 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3071 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3072 rtd = 1;
3073
3074 if (rtd
3075 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3076 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3077 == void_type_node)))
3078 return size;
3079 }
3080
3081 /* Lose any fake structure return argument if it is passed on the stack. */
3082 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3083 && !TARGET_64BIT
3084 && !KEEP_AGGREGATE_RETURN_POINTER)
3085 {
3086 int nregs = ix86_function_regparm (funtype, fundecl);
3087
3088 if (!nregs)
3089 return GET_MODE_SIZE (Pmode);
3090 }
3091
3092 return 0;
3093 }
3094 \f
3095 /* Argument support functions. */
3096
3097 /* Return true when register may be used to pass function parameters. */
3098 bool
3099 ix86_function_arg_regno_p (int regno)
3100 {
3101 int i;
3102 if (!TARGET_64BIT)
3103 {
3104 if (TARGET_MACHO)
3105 return (regno < REGPARM_MAX
3106 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3107 else
3108 return (regno < REGPARM_MAX
3109 || (TARGET_MMX && MMX_REGNO_P (regno)
3110 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3111 || (TARGET_SSE && SSE_REGNO_P (regno)
3112 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3113 }
3114
3115 if (TARGET_MACHO)
3116 {
3117 if (SSE_REGNO_P (regno) && TARGET_SSE)
3118 return true;
3119 }
3120 else
3121 {
3122 if (TARGET_SSE && SSE_REGNO_P (regno)
3123 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3124 return true;
3125 }
3126 /* RAX is used as hidden argument to va_arg functions. */
3127 if (!regno)
3128 return true;
3129 for (i = 0; i < REGPARM_MAX; i++)
3130 if (regno == x86_64_int_parameter_registers[i])
3131 return true;
3132 return false;
3133 }
3134
3135 /* Return if we do not know how to pass TYPE solely in registers. */
3136
3137 static bool
3138 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3139 {
3140 if (must_pass_in_stack_var_size_or_pad (mode, type))
3141 return true;
3142
3143 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3144 The layout_type routine is crafty and tries to trick us into passing
3145 currently unsupported vector types on the stack by using TImode. */
3146 return (!TARGET_64BIT && mode == TImode
3147 && type && TREE_CODE (type) != VECTOR_TYPE);
3148 }
3149
3150 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3151 for a call to a function whose data type is FNTYPE.
3152 For a library call, FNTYPE is 0. */
3153
3154 void
3155 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3156 tree fntype, /* tree ptr for function decl */
3157 rtx libname, /* SYMBOL_REF of library name or 0 */
3158 tree fndecl)
3159 {
3160 static CUMULATIVE_ARGS zero_cum;
3161 tree param, next_param;
3162
3163 if (TARGET_DEBUG_ARG)
3164 {
3165 fprintf (stderr, "\ninit_cumulative_args (");
3166 if (fntype)
3167 fprintf (stderr, "fntype code = %s, ret code = %s",
3168 tree_code_name[(int) TREE_CODE (fntype)],
3169 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3170 else
3171 fprintf (stderr, "no fntype");
3172
3173 if (libname)
3174 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3175 }
3176
3177 *cum = zero_cum;
3178
3179 /* Set up the number of registers to use for passing arguments. */
3180 cum->nregs = ix86_regparm;
3181 if (TARGET_SSE)
3182 cum->sse_nregs = SSE_REGPARM_MAX;
3183 if (TARGET_MMX)
3184 cum->mmx_nregs = MMX_REGPARM_MAX;
3185 cum->warn_sse = true;
3186 cum->warn_mmx = true;
3187 cum->maybe_vaarg = false;
3188
3189 /* Use ecx and edx registers if function has fastcall attribute,
3190 else look for regparm information. */
3191 if (fntype && !TARGET_64BIT)
3192 {
3193 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3194 {
3195 cum->nregs = 2;
3196 cum->fastcall = 1;
3197 }
3198 else
3199 cum->nregs = ix86_function_regparm (fntype, fndecl);
3200 }
3201
3202 /* Set up the number of SSE registers used for passing SFmode
3203 and DFmode arguments. Warn for mismatching ABI. */
3204 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3205
3206 /* Determine if this function has variable arguments. This is
3207 indicated by the last argument being 'void_type_mode' if there
3208 are no variable arguments. If there are variable arguments, then
3209 we won't pass anything in registers in 32-bit mode. */
3210
3211 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3212 {
3213 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3214 param != 0; param = next_param)
3215 {
3216 next_param = TREE_CHAIN (param);
3217 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3218 {
3219 if (!TARGET_64BIT)
3220 {
3221 cum->nregs = 0;
3222 cum->sse_nregs = 0;
3223 cum->mmx_nregs = 0;
3224 cum->warn_sse = 0;
3225 cum->warn_mmx = 0;
3226 cum->fastcall = 0;
3227 cum->float_in_sse = 0;
3228 }
3229 cum->maybe_vaarg = true;
3230 }
3231 }
3232 }
3233 if ((!fntype && !libname)
3234 || (fntype && !TYPE_ARG_TYPES (fntype)))
3235 cum->maybe_vaarg = true;
3236
3237 if (TARGET_DEBUG_ARG)
3238 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3239
3240 return;
3241 }
3242
3243 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3244 But in the case of vector types, it is some vector mode.
3245
3246 When we have only some of our vector isa extensions enabled, then there
3247 are some modes for which vector_mode_supported_p is false. For these
3248 modes, the generic vector support in gcc will choose some non-vector mode
3249 in order to implement the type. By computing the natural mode, we'll
3250 select the proper ABI location for the operand and not depend on whatever
3251 the middle-end decides to do with these vector types. */
3252
3253 static enum machine_mode
3254 type_natural_mode (tree type)
3255 {
3256 enum machine_mode mode = TYPE_MODE (type);
3257
3258 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3259 {
3260 HOST_WIDE_INT size = int_size_in_bytes (type);
3261 if ((size == 8 || size == 16)
3262 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3263 && TYPE_VECTOR_SUBPARTS (type) > 1)
3264 {
3265 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3266
3267 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3268 mode = MIN_MODE_VECTOR_FLOAT;
3269 else
3270 mode = MIN_MODE_VECTOR_INT;
3271
3272 /* Get the mode which has this inner mode and number of units. */
3273 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3274 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3275 && GET_MODE_INNER (mode) == innermode)
3276 return mode;
3277
3278 gcc_unreachable ();
3279 }
3280 }
3281
3282 return mode;
3283 }
3284
3285 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3286 this may not agree with the mode that the type system has chosen for the
3287 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3288 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3289
3290 static rtx
3291 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3292 unsigned int regno)
3293 {
3294 rtx tmp;
3295
3296 if (orig_mode != BLKmode)
3297 tmp = gen_rtx_REG (orig_mode, regno);
3298 else
3299 {
3300 tmp = gen_rtx_REG (mode, regno);
3301 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3302 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3303 }
3304
3305 return tmp;
3306 }
3307
3308 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3309 of this code is to classify each 8bytes of incoming argument by the register
3310 class and assign registers accordingly. */
3311
3312 /* Return the union class of CLASS1 and CLASS2.
3313 See the x86-64 PS ABI for details. */
3314
3315 static enum x86_64_reg_class
3316 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3317 {
3318 /* Rule #1: If both classes are equal, this is the resulting class. */
3319 if (class1 == class2)
3320 return class1;
3321
3322 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3323 the other class. */
3324 if (class1 == X86_64_NO_CLASS)
3325 return class2;
3326 if (class2 == X86_64_NO_CLASS)
3327 return class1;
3328
3329 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3330 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3331 return X86_64_MEMORY_CLASS;
3332
3333 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3334 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3335 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3336 return X86_64_INTEGERSI_CLASS;
3337 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3338 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3339 return X86_64_INTEGER_CLASS;
3340
3341 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3342 MEMORY is used. */
3343 if (class1 == X86_64_X87_CLASS
3344 || class1 == X86_64_X87UP_CLASS
3345 || class1 == X86_64_COMPLEX_X87_CLASS
3346 || class2 == X86_64_X87_CLASS
3347 || class2 == X86_64_X87UP_CLASS
3348 || class2 == X86_64_COMPLEX_X87_CLASS)
3349 return X86_64_MEMORY_CLASS;
3350
3351 /* Rule #6: Otherwise class SSE is used. */
3352 return X86_64_SSE_CLASS;
3353 }
3354
3355 /* Classify the argument of type TYPE and mode MODE.
3356 CLASSES will be filled by the register class used to pass each word
3357 of the operand. The number of words is returned. In case the parameter
3358 should be passed in memory, 0 is returned. As a special case for zero
3359 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3360
3361 BIT_OFFSET is used internally for handling records and specifies offset
3362 of the offset in bits modulo 256 to avoid overflow cases.
3363
3364 See the x86-64 PS ABI for details.
3365 */
3366
3367 static int
3368 classify_argument (enum machine_mode mode, tree type,
3369 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3370 {
3371 HOST_WIDE_INT bytes =
3372 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3373 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3374
3375 /* Variable sized entities are always passed/returned in memory. */
3376 if (bytes < 0)
3377 return 0;
3378
3379 if (mode != VOIDmode
3380 && targetm.calls.must_pass_in_stack (mode, type))
3381 return 0;
3382
3383 if (type && AGGREGATE_TYPE_P (type))
3384 {
3385 int i;
3386 tree field;
3387 enum x86_64_reg_class subclasses[MAX_CLASSES];
3388
3389 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3390 if (bytes > 16)
3391 return 0;
3392
3393 for (i = 0; i < words; i++)
3394 classes[i] = X86_64_NO_CLASS;
3395
3396 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3397 signalize memory class, so handle it as special case. */
3398 if (!words)
3399 {
3400 classes[0] = X86_64_NO_CLASS;
3401 return 1;
3402 }
3403
3404 /* Classify each field of record and merge classes. */
3405 switch (TREE_CODE (type))
3406 {
3407 case RECORD_TYPE:
3408 /* And now merge the fields of structure. */
3409 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3410 {
3411 if (TREE_CODE (field) == FIELD_DECL)
3412 {
3413 int num;
3414
3415 if (TREE_TYPE (field) == error_mark_node)
3416 continue;
3417
3418 /* Bitfields are always classified as integer. Handle them
3419 early, since later code would consider them to be
3420 misaligned integers. */
3421 if (DECL_BIT_FIELD (field))
3422 {
3423 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3424 i < ((int_bit_position (field) + (bit_offset % 64))
3425 + tree_low_cst (DECL_SIZE (field), 0)
3426 + 63) / 8 / 8; i++)
3427 classes[i] =
3428 merge_classes (X86_64_INTEGER_CLASS,
3429 classes[i]);
3430 }
3431 else
3432 {
3433 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3434 TREE_TYPE (field), subclasses,
3435 (int_bit_position (field)
3436 + bit_offset) % 256);
3437 if (!num)
3438 return 0;
3439 for (i = 0; i < num; i++)
3440 {
3441 int pos =
3442 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3443 classes[i + pos] =
3444 merge_classes (subclasses[i], classes[i + pos]);
3445 }
3446 }
3447 }
3448 }
3449 break;
3450
3451 case ARRAY_TYPE:
3452 /* Arrays are handled as small records. */
3453 {
3454 int num;
3455 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3456 TREE_TYPE (type), subclasses, bit_offset);
3457 if (!num)
3458 return 0;
3459
3460 /* The partial classes are now full classes. */
3461 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3462 subclasses[0] = X86_64_SSE_CLASS;
3463 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3464 subclasses[0] = X86_64_INTEGER_CLASS;
3465
3466 for (i = 0; i < words; i++)
3467 classes[i] = subclasses[i % num];
3468
3469 break;
3470 }
3471 case UNION_TYPE:
3472 case QUAL_UNION_TYPE:
3473 /* Unions are similar to RECORD_TYPE but offset is always 0.
3474 */
3475 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3476 {
3477 if (TREE_CODE (field) == FIELD_DECL)
3478 {
3479 int num;
3480
3481 if (TREE_TYPE (field) == error_mark_node)
3482 continue;
3483
3484 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3485 TREE_TYPE (field), subclasses,
3486 bit_offset);
3487 if (!num)
3488 return 0;
3489 for (i = 0; i < num; i++)
3490 classes[i] = merge_classes (subclasses[i], classes[i]);
3491 }
3492 }
3493 break;
3494
3495 default:
3496 gcc_unreachable ();
3497 }
3498
3499 /* Final merger cleanup. */
3500 for (i = 0; i < words; i++)
3501 {
3502 /* If one class is MEMORY, everything should be passed in
3503 memory. */
3504 if (classes[i] == X86_64_MEMORY_CLASS)
3505 return 0;
3506
3507 /* The X86_64_SSEUP_CLASS should be always preceded by
3508 X86_64_SSE_CLASS. */
3509 if (classes[i] == X86_64_SSEUP_CLASS
3510 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3511 classes[i] = X86_64_SSE_CLASS;
3512
3513 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3514 if (classes[i] == X86_64_X87UP_CLASS
3515 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3516 classes[i] = X86_64_SSE_CLASS;
3517 }
3518 return words;
3519 }
3520
3521 /* Compute alignment needed. We align all types to natural boundaries with
3522 exception of XFmode that is aligned to 64bits. */
3523 if (mode != VOIDmode && mode != BLKmode)
3524 {
3525 int mode_alignment = GET_MODE_BITSIZE (mode);
3526
3527 if (mode == XFmode)
3528 mode_alignment = 128;
3529 else if (mode == XCmode)
3530 mode_alignment = 256;
3531 if (COMPLEX_MODE_P (mode))
3532 mode_alignment /= 2;
3533 /* Misaligned fields are always returned in memory. */
3534 if (bit_offset % mode_alignment)
3535 return 0;
3536 }
3537
3538 /* for V1xx modes, just use the base mode */
3539 if (VECTOR_MODE_P (mode)
3540 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3541 mode = GET_MODE_INNER (mode);
3542
3543 /* Classification of atomic types. */
3544 switch (mode)
3545 {
3546 case SDmode:
3547 case DDmode:
3548 classes[0] = X86_64_SSE_CLASS;
3549 return 1;
3550 case TDmode:
3551 classes[0] = X86_64_SSE_CLASS;
3552 classes[1] = X86_64_SSEUP_CLASS;
3553 return 2;
3554 case DImode:
3555 case SImode:
3556 case HImode:
3557 case QImode:
3558 case CSImode:
3559 case CHImode:
3560 case CQImode:
3561 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3562 classes[0] = X86_64_INTEGERSI_CLASS;
3563 else
3564 classes[0] = X86_64_INTEGER_CLASS;
3565 return 1;
3566 case CDImode:
3567 case TImode:
3568 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3569 return 2;
3570 case CTImode:
3571 return 0;
3572 case SFmode:
3573 if (!(bit_offset % 64))
3574 classes[0] = X86_64_SSESF_CLASS;
3575 else
3576 classes[0] = X86_64_SSE_CLASS;
3577 return 1;
3578 case DFmode:
3579 classes[0] = X86_64_SSEDF_CLASS;
3580 return 1;
3581 case XFmode:
3582 classes[0] = X86_64_X87_CLASS;
3583 classes[1] = X86_64_X87UP_CLASS;
3584 return 2;
3585 case TFmode:
3586 classes[0] = X86_64_SSE_CLASS;
3587 classes[1] = X86_64_SSEUP_CLASS;
3588 return 2;
3589 case SCmode:
3590 classes[0] = X86_64_SSE_CLASS;
3591 return 1;
3592 case DCmode:
3593 classes[0] = X86_64_SSEDF_CLASS;
3594 classes[1] = X86_64_SSEDF_CLASS;
3595 return 2;
3596 case XCmode:
3597 classes[0] = X86_64_COMPLEX_X87_CLASS;
3598 return 1;
3599 case TCmode:
3600 /* This modes is larger than 16 bytes. */
3601 return 0;
3602 case V4SFmode:
3603 case V4SImode:
3604 case V16QImode:
3605 case V8HImode:
3606 case V2DFmode:
3607 case V2DImode:
3608 classes[0] = X86_64_SSE_CLASS;
3609 classes[1] = X86_64_SSEUP_CLASS;
3610 return 2;
3611 case V2SFmode:
3612 case V2SImode:
3613 case V4HImode:
3614 case V8QImode:
3615 classes[0] = X86_64_SSE_CLASS;
3616 return 1;
3617 case BLKmode:
3618 case VOIDmode:
3619 return 0;
3620 default:
3621 gcc_assert (VECTOR_MODE_P (mode));
3622
3623 if (bytes > 16)
3624 return 0;
3625
3626 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3627
3628 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3629 classes[0] = X86_64_INTEGERSI_CLASS;
3630 else
3631 classes[0] = X86_64_INTEGER_CLASS;
3632 classes[1] = X86_64_INTEGER_CLASS;
3633 return 1 + (bytes > 8);
3634 }
3635 }
3636
3637 /* Examine the argument and return set number of register required in each
3638 class. Return 0 iff parameter should be passed in memory. */
3639 static int
3640 examine_argument (enum machine_mode mode, tree type, int in_return,
3641 int *int_nregs, int *sse_nregs)
3642 {
3643 enum x86_64_reg_class class[MAX_CLASSES];
3644 int n = classify_argument (mode, type, class, 0);
3645
3646 *int_nregs = 0;
3647 *sse_nregs = 0;
3648 if (!n)
3649 return 0;
3650 for (n--; n >= 0; n--)
3651 switch (class[n])
3652 {
3653 case X86_64_INTEGER_CLASS:
3654 case X86_64_INTEGERSI_CLASS:
3655 (*int_nregs)++;
3656 break;
3657 case X86_64_SSE_CLASS:
3658 case X86_64_SSESF_CLASS:
3659 case X86_64_SSEDF_CLASS:
3660 (*sse_nregs)++;
3661 break;
3662 case X86_64_NO_CLASS:
3663 case X86_64_SSEUP_CLASS:
3664 break;
3665 case X86_64_X87_CLASS:
3666 case X86_64_X87UP_CLASS:
3667 if (!in_return)
3668 return 0;
3669 break;
3670 case X86_64_COMPLEX_X87_CLASS:
3671 return in_return ? 2 : 0;
3672 case X86_64_MEMORY_CLASS:
3673 gcc_unreachable ();
3674 }
3675 return 1;
3676 }
3677
3678 /* Construct container for the argument used by GCC interface. See
3679 FUNCTION_ARG for the detailed description. */
3680
3681 static rtx
3682 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3683 tree type, int in_return, int nintregs, int nsseregs,
3684 const int *intreg, int sse_regno)
3685 {
3686 /* The following variables hold the static issued_error state. */
3687 static bool issued_sse_arg_error;
3688 static bool issued_sse_ret_error;
3689 static bool issued_x87_ret_error;
3690
3691 enum machine_mode tmpmode;
3692 int bytes =
3693 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3694 enum x86_64_reg_class class[MAX_CLASSES];
3695 int n;
3696 int i;
3697 int nexps = 0;
3698 int needed_sseregs, needed_intregs;
3699 rtx exp[MAX_CLASSES];
3700 rtx ret;
3701
3702 n = classify_argument (mode, type, class, 0);
3703 if (TARGET_DEBUG_ARG)
3704 {
3705 if (!n)
3706 fprintf (stderr, "Memory class\n");
3707 else
3708 {
3709 fprintf (stderr, "Classes:");
3710 for (i = 0; i < n; i++)
3711 {
3712 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3713 }
3714 fprintf (stderr, "\n");
3715 }
3716 }
3717 if (!n)
3718 return NULL;
3719 if (!examine_argument (mode, type, in_return, &needed_intregs,
3720 &needed_sseregs))
3721 return NULL;
3722 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3723 return NULL;
3724
3725 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3726 some less clueful developer tries to use floating-point anyway. */
3727 if (needed_sseregs && !TARGET_SSE)
3728 {
3729 if (in_return)
3730 {
3731 if (!issued_sse_ret_error)
3732 {
3733 error ("SSE register return with SSE disabled");
3734 issued_sse_ret_error = true;
3735 }
3736 }
3737 else if (!issued_sse_arg_error)
3738 {
3739 error ("SSE register argument with SSE disabled");
3740 issued_sse_arg_error = true;
3741 }
3742 return NULL;
3743 }
3744
3745 /* Likewise, error if the ABI requires us to return values in the
3746 x87 registers and the user specified -mno-80387. */
3747 if (!TARGET_80387 && in_return)
3748 for (i = 0; i < n; i++)
3749 if (class[i] == X86_64_X87_CLASS
3750 || class[i] == X86_64_X87UP_CLASS
3751 || class[i] == X86_64_COMPLEX_X87_CLASS)
3752 {
3753 if (!issued_x87_ret_error)
3754 {
3755 error ("x87 register return with x87 disabled");
3756 issued_x87_ret_error = true;
3757 }
3758 return NULL;
3759 }
3760
3761 /* First construct simple cases. Avoid SCmode, since we want to use
3762 single register to pass this type. */
3763 if (n == 1 && mode != SCmode)
3764 switch (class[0])
3765 {
3766 case X86_64_INTEGER_CLASS:
3767 case X86_64_INTEGERSI_CLASS:
3768 return gen_rtx_REG (mode, intreg[0]);
3769 case X86_64_SSE_CLASS:
3770 case X86_64_SSESF_CLASS:
3771 case X86_64_SSEDF_CLASS:
3772 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3773 case X86_64_X87_CLASS:
3774 case X86_64_COMPLEX_X87_CLASS:
3775 return gen_rtx_REG (mode, FIRST_STACK_REG);
3776 case X86_64_NO_CLASS:
3777 /* Zero sized array, struct or class. */
3778 return NULL;
3779 default:
3780 gcc_unreachable ();
3781 }
3782 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3783 && mode != BLKmode)
3784 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3785 if (n == 2
3786 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3787 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3788 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3789 && class[1] == X86_64_INTEGER_CLASS
3790 && (mode == CDImode || mode == TImode || mode == TFmode)
3791 && intreg[0] + 1 == intreg[1])
3792 return gen_rtx_REG (mode, intreg[0]);
3793
3794 /* Otherwise figure out the entries of the PARALLEL. */
3795 for (i = 0; i < n; i++)
3796 {
3797 switch (class[i])
3798 {
3799 case X86_64_NO_CLASS:
3800 break;
3801 case X86_64_INTEGER_CLASS:
3802 case X86_64_INTEGERSI_CLASS:
3803 /* Merge TImodes on aligned occasions here too. */
3804 if (i * 8 + 8 > bytes)
3805 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3806 else if (class[i] == X86_64_INTEGERSI_CLASS)
3807 tmpmode = SImode;
3808 else
3809 tmpmode = DImode;
3810 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3811 if (tmpmode == BLKmode)
3812 tmpmode = DImode;
3813 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3814 gen_rtx_REG (tmpmode, *intreg),
3815 GEN_INT (i*8));
3816 intreg++;
3817 break;
3818 case X86_64_SSESF_CLASS:
3819 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3820 gen_rtx_REG (SFmode,
3821 SSE_REGNO (sse_regno)),
3822 GEN_INT (i*8));
3823 sse_regno++;
3824 break;
3825 case X86_64_SSEDF_CLASS:
3826 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3827 gen_rtx_REG (DFmode,
3828 SSE_REGNO (sse_regno)),
3829 GEN_INT (i*8));
3830 sse_regno++;
3831 break;
3832 case X86_64_SSE_CLASS:
3833 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3834 tmpmode = TImode;
3835 else
3836 tmpmode = DImode;
3837 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3838 gen_rtx_REG (tmpmode,
3839 SSE_REGNO (sse_regno)),
3840 GEN_INT (i*8));
3841 if (tmpmode == TImode)
3842 i++;
3843 sse_regno++;
3844 break;
3845 default:
3846 gcc_unreachable ();
3847 }
3848 }
3849
3850 /* Empty aligned struct, union or class. */
3851 if (nexps == 0)
3852 return NULL;
3853
3854 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3855 for (i = 0; i < nexps; i++)
3856 XVECEXP (ret, 0, i) = exp [i];
3857 return ret;
3858 }
3859
3860 /* Update the data in CUM to advance over an argument
3861 of mode MODE and data type TYPE.
3862 (TYPE is null for libcalls where that information may not be available.) */
3863
3864 void
3865 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3866 tree type, int named)
3867 {
3868 int bytes =
3869 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3870 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3871
3872 if (type)
3873 mode = type_natural_mode (type);
3874
3875 if (TARGET_DEBUG_ARG)
3876 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3877 "mode=%s, named=%d)\n\n",
3878 words, cum->words, cum->nregs, cum->sse_nregs,
3879 GET_MODE_NAME (mode), named);
3880
3881 if (TARGET_64BIT)
3882 {
3883 int int_nregs, sse_nregs;
3884 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3885 cum->words += words;
3886 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3887 {
3888 cum->nregs -= int_nregs;
3889 cum->sse_nregs -= sse_nregs;
3890 cum->regno += int_nregs;
3891 cum->sse_regno += sse_nregs;
3892 }
3893 else
3894 cum->words += words;
3895 }
3896 else
3897 {
3898 switch (mode)
3899 {
3900 default:
3901 break;
3902
3903 case BLKmode:
3904 if (bytes < 0)
3905 break;
3906 /* FALLTHRU */
3907
3908 case DImode:
3909 case SImode:
3910 case HImode:
3911 case QImode:
3912 cum->words += words;
3913 cum->nregs -= words;
3914 cum->regno += words;
3915
3916 if (cum->nregs <= 0)
3917 {
3918 cum->nregs = 0;
3919 cum->regno = 0;
3920 }
3921 break;
3922
3923 case DFmode:
3924 if (cum->float_in_sse < 2)
3925 break;
3926 case SFmode:
3927 if (cum->float_in_sse < 1)
3928 break;
3929 /* FALLTHRU */
3930
3931 case TImode:
3932 case V16QImode:
3933 case V8HImode:
3934 case V4SImode:
3935 case V2DImode:
3936 case V4SFmode:
3937 case V2DFmode:
3938 if (!type || !AGGREGATE_TYPE_P (type))
3939 {
3940 cum->sse_words += words;
3941 cum->sse_nregs -= 1;
3942 cum->sse_regno += 1;
3943 if (cum->sse_nregs <= 0)
3944 {
3945 cum->sse_nregs = 0;
3946 cum->sse_regno = 0;
3947 }
3948 }
3949 break;
3950
3951 case V8QImode:
3952 case V4HImode:
3953 case V2SImode:
3954 case V2SFmode:
3955 if (!type || !AGGREGATE_TYPE_P (type))
3956 {
3957 cum->mmx_words += words;
3958 cum->mmx_nregs -= 1;
3959 cum->mmx_regno += 1;
3960 if (cum->mmx_nregs <= 0)
3961 {
3962 cum->mmx_nregs = 0;
3963 cum->mmx_regno = 0;
3964 }
3965 }
3966 break;
3967 }
3968 }
3969 }
3970
3971 /* Define where to put the arguments to a function.
3972 Value is zero to push the argument on the stack,
3973 or a hard register in which to store the argument.
3974
3975 MODE is the argument's machine mode.
3976 TYPE is the data type of the argument (as a tree).
3977 This is null for libcalls where that information may
3978 not be available.
3979 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3980 the preceding args and about the function being called.
3981 NAMED is nonzero if this argument is a named parameter
3982 (otherwise it is an extra parameter matching an ellipsis). */
3983
3984 rtx
3985 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3986 tree type, int named)
3987 {
3988 enum machine_mode mode = orig_mode;
3989 rtx ret = NULL_RTX;
3990 int bytes =
3991 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3992 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3993 static bool warnedsse, warnedmmx;
3994
3995 /* To simplify the code below, represent vector types with a vector mode
3996 even if MMX/SSE are not active. */
3997 if (type && TREE_CODE (type) == VECTOR_TYPE)
3998 mode = type_natural_mode (type);
3999
4000 /* Handle a hidden AL argument containing number of registers for varargs
4001 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4002 any AL settings. */
4003 if (mode == VOIDmode)
4004 {
4005 if (TARGET_64BIT)
4006 return GEN_INT (cum->maybe_vaarg
4007 ? (cum->sse_nregs < 0
4008 ? SSE_REGPARM_MAX
4009 : cum->sse_regno)
4010 : -1);
4011 else
4012 return constm1_rtx;
4013 }
4014 if (TARGET_64BIT)
4015 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4016 cum->sse_nregs,
4017 &x86_64_int_parameter_registers [cum->regno],
4018 cum->sse_regno);
4019 else
4020 switch (mode)
4021 {
4022 /* For now, pass fp/complex values on the stack. */
4023 default:
4024 break;
4025
4026 case BLKmode:
4027 if (bytes < 0)
4028 break;
4029 /* FALLTHRU */
4030 case DImode:
4031 case SImode:
4032 case HImode:
4033 case QImode:
4034 if (words <= cum->nregs)
4035 {
4036 int regno = cum->regno;
4037
4038 /* Fastcall allocates the first two DWORD (SImode) or
4039 smaller arguments to ECX and EDX. */
4040 if (cum->fastcall)
4041 {
4042 if (mode == BLKmode || mode == DImode)
4043 break;
4044
4045 /* ECX not EAX is the first allocated register. */
4046 if (regno == 0)
4047 regno = 2;
4048 }
4049 ret = gen_rtx_REG (mode, regno);
4050 }
4051 break;
4052 case DFmode:
4053 if (cum->float_in_sse < 2)
4054 break;
4055 case SFmode:
4056 if (cum->float_in_sse < 1)
4057 break;
4058 /* FALLTHRU */
4059 case TImode:
4060 case V16QImode:
4061 case V8HImode:
4062 case V4SImode:
4063 case V2DImode:
4064 case V4SFmode:
4065 case V2DFmode:
4066 if (!type || !AGGREGATE_TYPE_P (type))
4067 {
4068 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4069 {
4070 warnedsse = true;
4071 warning (0, "SSE vector argument without SSE enabled "
4072 "changes the ABI");
4073 }
4074 if (cum->sse_nregs)
4075 ret = gen_reg_or_parallel (mode, orig_mode,
4076 cum->sse_regno + FIRST_SSE_REG);
4077 }
4078 break;
4079 case V8QImode:
4080 case V4HImode:
4081 case V2SImode:
4082 case V2SFmode:
4083 if (!type || !AGGREGATE_TYPE_P (type))
4084 {
4085 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4086 {
4087 warnedmmx = true;
4088 warning (0, "MMX vector argument without MMX enabled "
4089 "changes the ABI");
4090 }
4091 if (cum->mmx_nregs)
4092 ret = gen_reg_or_parallel (mode, orig_mode,
4093 cum->mmx_regno + FIRST_MMX_REG);
4094 }
4095 break;
4096 }
4097
4098 if (TARGET_DEBUG_ARG)
4099 {
4100 fprintf (stderr,
4101 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4102 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4103
4104 if (ret)
4105 print_simple_rtl (stderr, ret);
4106 else
4107 fprintf (stderr, ", stack");
4108
4109 fprintf (stderr, " )\n");
4110 }
4111
4112 return ret;
4113 }
4114
4115 /* A C expression that indicates when an argument must be passed by
4116 reference. If nonzero for an argument, a copy of that argument is
4117 made in memory and a pointer to the argument is passed instead of
4118 the argument itself. The pointer is passed in whatever way is
4119 appropriate for passing a pointer to that type. */
4120
4121 static bool
4122 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4123 enum machine_mode mode ATTRIBUTE_UNUSED,
4124 tree type, bool named ATTRIBUTE_UNUSED)
4125 {
4126 if (!TARGET_64BIT)
4127 return 0;
4128
4129 if (type && int_size_in_bytes (type) == -1)
4130 {
4131 if (TARGET_DEBUG_ARG)
4132 fprintf (stderr, "function_arg_pass_by_reference\n");
4133 return 1;
4134 }
4135
4136 return 0;
4137 }
4138
4139 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4140 ABI. Only called if TARGET_SSE. */
4141 static bool
4142 contains_128bit_aligned_vector_p (tree type)
4143 {
4144 enum machine_mode mode = TYPE_MODE (type);
4145 if (SSE_REG_MODE_P (mode)
4146 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4147 return true;
4148 if (TYPE_ALIGN (type) < 128)
4149 return false;
4150
4151 if (AGGREGATE_TYPE_P (type))
4152 {
4153 /* Walk the aggregates recursively. */
4154 switch (TREE_CODE (type))
4155 {
4156 case RECORD_TYPE:
4157 case UNION_TYPE:
4158 case QUAL_UNION_TYPE:
4159 {
4160 tree field;
4161
4162 /* Walk all the structure fields. */
4163 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4164 {
4165 if (TREE_CODE (field) == FIELD_DECL
4166 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4167 return true;
4168 }
4169 break;
4170 }
4171
4172 case ARRAY_TYPE:
4173 /* Just for use if some languages passes arrays by value. */
4174 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4175 return true;
4176 break;
4177
4178 default:
4179 gcc_unreachable ();
4180 }
4181 }
4182 return false;
4183 }
4184
4185 /* Gives the alignment boundary, in bits, of an argument with the
4186 specified mode and type. */
4187
4188 int
4189 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4190 {
4191 int align;
4192 if (type)
4193 align = TYPE_ALIGN (type);
4194 else
4195 align = GET_MODE_ALIGNMENT (mode);
4196 if (align < PARM_BOUNDARY)
4197 align = PARM_BOUNDARY;
4198 if (!TARGET_64BIT)
4199 {
4200 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4201 make an exception for SSE modes since these require 128bit
4202 alignment.
4203
4204 The handling here differs from field_alignment. ICC aligns MMX
4205 arguments to 4 byte boundaries, while structure fields are aligned
4206 to 8 byte boundaries. */
4207 if (!TARGET_SSE)
4208 align = PARM_BOUNDARY;
4209 else if (!type)
4210 {
4211 if (!SSE_REG_MODE_P (mode))
4212 align = PARM_BOUNDARY;
4213 }
4214 else
4215 {
4216 if (!contains_128bit_aligned_vector_p (type))
4217 align = PARM_BOUNDARY;
4218 }
4219 }
4220 if (align > 128)
4221 align = 128;
4222 return align;
4223 }
4224
4225 /* Return true if N is a possible register number of function value. */
4226 bool
4227 ix86_function_value_regno_p (int regno)
4228 {
4229 if (TARGET_MACHO)
4230 {
4231 if (!TARGET_64BIT)
4232 {
4233 return ((regno) == 0
4234 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4235 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4236 }
4237 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4238 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4239 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4240 }
4241 else
4242 {
4243 if (regno == 0
4244 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4245 || (regno == FIRST_SSE_REG && TARGET_SSE))
4246 return true;
4247
4248 if (!TARGET_64BIT
4249 && (regno == FIRST_MMX_REG && TARGET_MMX))
4250 return true;
4251
4252 return false;
4253 }
4254 }
4255
4256 /* Define how to find the value returned by a function.
4257 VALTYPE is the data type of the value (as a tree).
4258 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4259 otherwise, FUNC is 0. */
4260 rtx
4261 ix86_function_value (tree valtype, tree fntype_or_decl,
4262 bool outgoing ATTRIBUTE_UNUSED)
4263 {
4264 enum machine_mode natmode = type_natural_mode (valtype);
4265
4266 if (TARGET_64BIT)
4267 {
4268 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4269 1, REGPARM_MAX, SSE_REGPARM_MAX,
4270 x86_64_int_return_registers, 0);
4271 /* For zero sized structures, construct_container return NULL, but we
4272 need to keep rest of compiler happy by returning meaningful value. */
4273 if (!ret)
4274 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4275 return ret;
4276 }
4277 else
4278 {
4279 tree fn = NULL_TREE, fntype;
4280 if (fntype_or_decl
4281 && DECL_P (fntype_or_decl))
4282 fn = fntype_or_decl;
4283 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4284 return gen_rtx_REG (TYPE_MODE (valtype),
4285 ix86_value_regno (natmode, fn, fntype));
4286 }
4287 }
4288
4289 /* Return true iff type is returned in memory. */
4290 int
4291 ix86_return_in_memory (tree type)
4292 {
4293 int needed_intregs, needed_sseregs, size;
4294 enum machine_mode mode = type_natural_mode (type);
4295
4296 if (TARGET_64BIT)
4297 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4298
4299 if (mode == BLKmode)
4300 return 1;
4301
4302 size = int_size_in_bytes (type);
4303
4304 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4305 return 0;
4306
4307 if (VECTOR_MODE_P (mode) || mode == TImode)
4308 {
4309 /* User-created vectors small enough to fit in EAX. */
4310 if (size < 8)
4311 return 0;
4312
4313 /* MMX/3dNow values are returned in MM0,
4314 except when it doesn't exits. */
4315 if (size == 8)
4316 return (TARGET_MMX ? 0 : 1);
4317
4318 /* SSE values are returned in XMM0, except when it doesn't exist. */
4319 if (size == 16)
4320 return (TARGET_SSE ? 0 : 1);
4321 }
4322
4323 if (mode == XFmode)
4324 return 0;
4325
4326 if (mode == TDmode)
4327 return 1;
4328
4329 if (size > 12)
4330 return 1;
4331 return 0;
4332 }
4333
4334 /* When returning SSE vector types, we have a choice of either
4335 (1) being abi incompatible with a -march switch, or
4336 (2) generating an error.
4337 Given no good solution, I think the safest thing is one warning.
4338 The user won't be able to use -Werror, but....
4339
4340 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4341 called in response to actually generating a caller or callee that
4342 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4343 via aggregate_value_p for general type probing from tree-ssa. */
4344
4345 static rtx
4346 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4347 {
4348 static bool warnedsse, warnedmmx;
4349
4350 if (type)
4351 {
4352 /* Look at the return type of the function, not the function type. */
4353 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4354
4355 if (!TARGET_SSE && !warnedsse)
4356 {
4357 if (mode == TImode
4358 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4359 {
4360 warnedsse = true;
4361 warning (0, "SSE vector return without SSE enabled "
4362 "changes the ABI");
4363 }
4364 }
4365
4366 if (!TARGET_MMX && !warnedmmx)
4367 {
4368 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4369 {
4370 warnedmmx = true;
4371 warning (0, "MMX vector return without MMX enabled "
4372 "changes the ABI");
4373 }
4374 }
4375 }
4376
4377 return NULL;
4378 }
4379
4380 /* Define how to find the value returned by a library function
4381 assuming the value has mode MODE. */
4382 rtx
4383 ix86_libcall_value (enum machine_mode mode)
4384 {
4385 if (TARGET_64BIT)
4386 {
4387 switch (mode)
4388 {
4389 case SFmode:
4390 case SCmode:
4391 case DFmode:
4392 case DCmode:
4393 case TFmode:
4394 case SDmode:
4395 case DDmode:
4396 case TDmode:
4397 return gen_rtx_REG (mode, FIRST_SSE_REG);
4398 case XFmode:
4399 case XCmode:
4400 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4401 case TCmode:
4402 return NULL;
4403 default:
4404 return gen_rtx_REG (mode, 0);
4405 }
4406 }
4407 else
4408 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4409 }
4410
4411 /* Given a mode, return the register to use for a return value. */
4412
4413 static int
4414 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4415 {
4416 gcc_assert (!TARGET_64BIT);
4417
4418 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4419 we normally prevent this case when mmx is not available. However
4420 some ABIs may require the result to be returned like DImode. */
4421 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4422 return TARGET_MMX ? FIRST_MMX_REG : 0;
4423
4424 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4425 we prevent this case when sse is not available. However some ABIs
4426 may require the result to be returned like integer TImode. */
4427 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4428 return TARGET_SSE ? FIRST_SSE_REG : 0;
4429
4430 /* Decimal floating point values can go in %eax, unlike other float modes. */
4431 if (DECIMAL_FLOAT_MODE_P (mode))
4432 return 0;
4433
4434 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4435 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4436 return 0;
4437
4438 /* Floating point return values in %st(0), except for local functions when
4439 SSE math is enabled or for functions with sseregparm attribute. */
4440 if ((func || fntype)
4441 && (mode == SFmode || mode == DFmode))
4442 {
4443 int sse_level = ix86_function_sseregparm (fntype, func);
4444 if ((sse_level >= 1 && mode == SFmode)
4445 || (sse_level == 2 && mode == DFmode))
4446 return FIRST_SSE_REG;
4447 }
4448
4449 return FIRST_FLOAT_REG;
4450 }
4451 \f
4452 /* Create the va_list data type. */
4453
4454 static tree
4455 ix86_build_builtin_va_list (void)
4456 {
4457 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4458
4459 /* For i386 we use plain pointer to argument area. */
4460 if (!TARGET_64BIT)
4461 return build_pointer_type (char_type_node);
4462
4463 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4464 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4465
4466 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4467 unsigned_type_node);
4468 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4469 unsigned_type_node);
4470 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4471 ptr_type_node);
4472 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4473 ptr_type_node);
4474
4475 va_list_gpr_counter_field = f_gpr;
4476 va_list_fpr_counter_field = f_fpr;
4477
4478 DECL_FIELD_CONTEXT (f_gpr) = record;
4479 DECL_FIELD_CONTEXT (f_fpr) = record;
4480 DECL_FIELD_CONTEXT (f_ovf) = record;
4481 DECL_FIELD_CONTEXT (f_sav) = record;
4482
4483 TREE_CHAIN (record) = type_decl;
4484 TYPE_NAME (record) = type_decl;
4485 TYPE_FIELDS (record) = f_gpr;
4486 TREE_CHAIN (f_gpr) = f_fpr;
4487 TREE_CHAIN (f_fpr) = f_ovf;
4488 TREE_CHAIN (f_ovf) = f_sav;
4489
4490 layout_type (record);
4491
4492 /* The correct type is an array type of one element. */
4493 return build_array_type (record, build_index_type (size_zero_node));
4494 }
4495
4496 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4497
4498 static void
4499 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4500 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4501 int no_rtl)
4502 {
4503 CUMULATIVE_ARGS next_cum;
4504 rtx save_area = NULL_RTX, mem;
4505 rtx label;
4506 rtx label_ref;
4507 rtx tmp_reg;
4508 rtx nsse_reg;
4509 int set;
4510 tree fntype;
4511 int stdarg_p;
4512 int i;
4513
4514 if (!TARGET_64BIT)
4515 return;
4516
4517 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4518 return;
4519
4520 /* Indicate to allocate space on the stack for varargs save area. */
4521 ix86_save_varrargs_registers = 1;
4522
4523 cfun->stack_alignment_needed = 128;
4524
4525 fntype = TREE_TYPE (current_function_decl);
4526 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4527 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4528 != void_type_node));
4529
4530 /* For varargs, we do not want to skip the dummy va_dcl argument.
4531 For stdargs, we do want to skip the last named argument. */
4532 next_cum = *cum;
4533 if (stdarg_p)
4534 function_arg_advance (&next_cum, mode, type, 1);
4535
4536 if (!no_rtl)
4537 save_area = frame_pointer_rtx;
4538
4539 set = get_varargs_alias_set ();
4540
4541 for (i = next_cum.regno;
4542 i < ix86_regparm
4543 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4544 i++)
4545 {
4546 mem = gen_rtx_MEM (Pmode,
4547 plus_constant (save_area, i * UNITS_PER_WORD));
4548 MEM_NOTRAP_P (mem) = 1;
4549 set_mem_alias_set (mem, set);
4550 emit_move_insn (mem, gen_rtx_REG (Pmode,
4551 x86_64_int_parameter_registers[i]));
4552 }
4553
4554 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4555 {
4556 /* Now emit code to save SSE registers. The AX parameter contains number
4557 of SSE parameter registers used to call this function. We use
4558 sse_prologue_save insn template that produces computed jump across
4559 SSE saves. We need some preparation work to get this working. */
4560
4561 label = gen_label_rtx ();
4562 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4563
4564 /* Compute address to jump to :
4565 label - 5*eax + nnamed_sse_arguments*5 */
4566 tmp_reg = gen_reg_rtx (Pmode);
4567 nsse_reg = gen_reg_rtx (Pmode);
4568 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4569 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4570 gen_rtx_MULT (Pmode, nsse_reg,
4571 GEN_INT (4))));
4572 if (next_cum.sse_regno)
4573 emit_move_insn
4574 (nsse_reg,
4575 gen_rtx_CONST (DImode,
4576 gen_rtx_PLUS (DImode,
4577 label_ref,
4578 GEN_INT (next_cum.sse_regno * 4))));
4579 else
4580 emit_move_insn (nsse_reg, label_ref);
4581 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4582
4583 /* Compute address of memory block we save into. We always use pointer
4584 pointing 127 bytes after first byte to store - this is needed to keep
4585 instruction size limited by 4 bytes. */
4586 tmp_reg = gen_reg_rtx (Pmode);
4587 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4588 plus_constant (save_area,
4589 8 * REGPARM_MAX + 127)));
4590 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4591 MEM_NOTRAP_P (mem) = 1;
4592 set_mem_alias_set (mem, set);
4593 set_mem_align (mem, BITS_PER_WORD);
4594
4595 /* And finally do the dirty job! */
4596 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4597 GEN_INT (next_cum.sse_regno), label));
4598 }
4599
4600 }
4601
4602 /* Implement va_start. */
4603
4604 void
4605 ix86_va_start (tree valist, rtx nextarg)
4606 {
4607 HOST_WIDE_INT words, n_gpr, n_fpr;
4608 tree f_gpr, f_fpr, f_ovf, f_sav;
4609 tree gpr, fpr, ovf, sav, t;
4610 tree type;
4611
4612 /* Only 64bit target needs something special. */
4613 if (!TARGET_64BIT)
4614 {
4615 std_expand_builtin_va_start (valist, nextarg);
4616 return;
4617 }
4618
4619 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4620 f_fpr = TREE_CHAIN (f_gpr);
4621 f_ovf = TREE_CHAIN (f_fpr);
4622 f_sav = TREE_CHAIN (f_ovf);
4623
4624 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4625 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4626 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4627 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4628 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4629
4630 /* Count number of gp and fp argument registers used. */
4631 words = current_function_args_info.words;
4632 n_gpr = current_function_args_info.regno;
4633 n_fpr = current_function_args_info.sse_regno;
4634
4635 if (TARGET_DEBUG_ARG)
4636 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4637 (int) words, (int) n_gpr, (int) n_fpr);
4638
4639 if (cfun->va_list_gpr_size)
4640 {
4641 type = TREE_TYPE (gpr);
4642 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4643 build_int_cst (type, n_gpr * 8));
4644 TREE_SIDE_EFFECTS (t) = 1;
4645 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4646 }
4647
4648 if (cfun->va_list_fpr_size)
4649 {
4650 type = TREE_TYPE (fpr);
4651 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4652 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4653 TREE_SIDE_EFFECTS (t) = 1;
4654 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4655 }
4656
4657 /* Find the overflow area. */
4658 type = TREE_TYPE (ovf);
4659 t = make_tree (type, virtual_incoming_args_rtx);
4660 if (words != 0)
4661 t = build2 (PLUS_EXPR, type, t,
4662 build_int_cst (type, words * UNITS_PER_WORD));
4663 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4664 TREE_SIDE_EFFECTS (t) = 1;
4665 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4666
4667 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4668 {
4669 /* Find the register save area.
4670 Prologue of the function save it right above stack frame. */
4671 type = TREE_TYPE (sav);
4672 t = make_tree (type, frame_pointer_rtx);
4673 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4674 TREE_SIDE_EFFECTS (t) = 1;
4675 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4676 }
4677 }
4678
4679 /* Implement va_arg. */
4680
4681 tree
4682 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4683 {
4684 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4685 tree f_gpr, f_fpr, f_ovf, f_sav;
4686 tree gpr, fpr, ovf, sav, t;
4687 int size, rsize;
4688 tree lab_false, lab_over = NULL_TREE;
4689 tree addr, t2;
4690 rtx container;
4691 int indirect_p = 0;
4692 tree ptrtype;
4693 enum machine_mode nat_mode;
4694
4695 /* Only 64bit target needs something special. */
4696 if (!TARGET_64BIT)
4697 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4698
4699 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4700 f_fpr = TREE_CHAIN (f_gpr);
4701 f_ovf = TREE_CHAIN (f_fpr);
4702 f_sav = TREE_CHAIN (f_ovf);
4703
4704 valist = build_va_arg_indirect_ref (valist);
4705 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4706 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4707 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4708 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4709
4710 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4711 if (indirect_p)
4712 type = build_pointer_type (type);
4713 size = int_size_in_bytes (type);
4714 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4715
4716 nat_mode = type_natural_mode (type);
4717 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4718 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4719
4720 /* Pull the value out of the saved registers. */
4721
4722 addr = create_tmp_var (ptr_type_node, "addr");
4723 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4724
4725 if (container)
4726 {
4727 int needed_intregs, needed_sseregs;
4728 bool need_temp;
4729 tree int_addr, sse_addr;
4730
4731 lab_false = create_artificial_label ();
4732 lab_over = create_artificial_label ();
4733
4734 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4735
4736 need_temp = (!REG_P (container)
4737 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4738 || TYPE_ALIGN (type) > 128));
4739
4740 /* In case we are passing structure, verify that it is consecutive block
4741 on the register save area. If not we need to do moves. */
4742 if (!need_temp && !REG_P (container))
4743 {
4744 /* Verify that all registers are strictly consecutive */
4745 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4746 {
4747 int i;
4748
4749 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4750 {
4751 rtx slot = XVECEXP (container, 0, i);
4752 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4753 || INTVAL (XEXP (slot, 1)) != i * 16)
4754 need_temp = 1;
4755 }
4756 }
4757 else
4758 {
4759 int i;
4760
4761 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4762 {
4763 rtx slot = XVECEXP (container, 0, i);
4764 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4765 || INTVAL (XEXP (slot, 1)) != i * 8)
4766 need_temp = 1;
4767 }
4768 }
4769 }
4770 if (!need_temp)
4771 {
4772 int_addr = addr;
4773 sse_addr = addr;
4774 }
4775 else
4776 {
4777 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4778 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4779 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4780 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4781 }
4782
4783 /* First ensure that we fit completely in registers. */
4784 if (needed_intregs)
4785 {
4786 t = build_int_cst (TREE_TYPE (gpr),
4787 (REGPARM_MAX - needed_intregs + 1) * 8);
4788 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4789 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4790 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4791 gimplify_and_add (t, pre_p);
4792 }
4793 if (needed_sseregs)
4794 {
4795 t = build_int_cst (TREE_TYPE (fpr),
4796 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4797 + REGPARM_MAX * 8);
4798 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4799 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4800 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4801 gimplify_and_add (t, pre_p);
4802 }
4803
4804 /* Compute index to start of area used for integer regs. */
4805 if (needed_intregs)
4806 {
4807 /* int_addr = gpr + sav; */
4808 t = fold_convert (ptr_type_node, gpr);
4809 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4810 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4811 gimplify_and_add (t, pre_p);
4812 }
4813 if (needed_sseregs)
4814 {
4815 /* sse_addr = fpr + sav; */
4816 t = fold_convert (ptr_type_node, fpr);
4817 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4818 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4819 gimplify_and_add (t, pre_p);
4820 }
4821 if (need_temp)
4822 {
4823 int i;
4824 tree temp = create_tmp_var (type, "va_arg_tmp");
4825
4826 /* addr = &temp; */
4827 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4828 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4829 gimplify_and_add (t, pre_p);
4830
4831 for (i = 0; i < XVECLEN (container, 0); i++)
4832 {
4833 rtx slot = XVECEXP (container, 0, i);
4834 rtx reg = XEXP (slot, 0);
4835 enum machine_mode mode = GET_MODE (reg);
4836 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4837 tree addr_type = build_pointer_type (piece_type);
4838 tree src_addr, src;
4839 int src_offset;
4840 tree dest_addr, dest;
4841
4842 if (SSE_REGNO_P (REGNO (reg)))
4843 {
4844 src_addr = sse_addr;
4845 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4846 }
4847 else
4848 {
4849 src_addr = int_addr;
4850 src_offset = REGNO (reg) * 8;
4851 }
4852 src_addr = fold_convert (addr_type, src_addr);
4853 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4854 size_int (src_offset)));
4855 src = build_va_arg_indirect_ref (src_addr);
4856
4857 dest_addr = fold_convert (addr_type, addr);
4858 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4859 size_int (INTVAL (XEXP (slot, 1)))));
4860 dest = build_va_arg_indirect_ref (dest_addr);
4861
4862 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4863 gimplify_and_add (t, pre_p);
4864 }
4865 }
4866
4867 if (needed_intregs)
4868 {
4869 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4870 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4871 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4872 gimplify_and_add (t, pre_p);
4873 }
4874 if (needed_sseregs)
4875 {
4876 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4877 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4878 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4879 gimplify_and_add (t, pre_p);
4880 }
4881
4882 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4883 gimplify_and_add (t, pre_p);
4884
4885 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4886 append_to_statement_list (t, pre_p);
4887 }
4888
4889 /* ... otherwise out of the overflow area. */
4890
4891 /* Care for on-stack alignment if needed. */
4892 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4893 || integer_zerop (TYPE_SIZE (type)))
4894 t = ovf;
4895 else
4896 {
4897 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4898 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4899 build_int_cst (TREE_TYPE (ovf), align - 1));
4900 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4901 build_int_cst (TREE_TYPE (t), -align));
4902 }
4903 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4904
4905 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4906 gimplify_and_add (t2, pre_p);
4907
4908 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4909 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4910 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4911 gimplify_and_add (t, pre_p);
4912
4913 if (container)
4914 {
4915 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4916 append_to_statement_list (t, pre_p);
4917 }
4918
4919 ptrtype = build_pointer_type (type);
4920 addr = fold_convert (ptrtype, addr);
4921
4922 if (indirect_p)
4923 addr = build_va_arg_indirect_ref (addr);
4924 return build_va_arg_indirect_ref (addr);
4925 }
4926 \f
4927 /* Return nonzero if OPNUM's MEM should be matched
4928 in movabs* patterns. */
4929
4930 int
4931 ix86_check_movabs (rtx insn, int opnum)
4932 {
4933 rtx set, mem;
4934
4935 set = PATTERN (insn);
4936 if (GET_CODE (set) == PARALLEL)
4937 set = XVECEXP (set, 0, 0);
4938 gcc_assert (GET_CODE (set) == SET);
4939 mem = XEXP (set, opnum);
4940 while (GET_CODE (mem) == SUBREG)
4941 mem = SUBREG_REG (mem);
4942 gcc_assert (MEM_P (mem));
4943 return (volatile_ok || !MEM_VOLATILE_P (mem));
4944 }
4945 \f
4946 /* Initialize the table of extra 80387 mathematical constants. */
4947
4948 static void
4949 init_ext_80387_constants (void)
4950 {
4951 static const char * cst[5] =
4952 {
4953 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4954 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4955 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4956 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4957 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4958 };
4959 int i;
4960
4961 for (i = 0; i < 5; i++)
4962 {
4963 real_from_string (&ext_80387_constants_table[i], cst[i]);
4964 /* Ensure each constant is rounded to XFmode precision. */
4965 real_convert (&ext_80387_constants_table[i],
4966 XFmode, &ext_80387_constants_table[i]);
4967 }
4968
4969 ext_80387_constants_init = 1;
4970 }
4971
4972 /* Return true if the constant is something that can be loaded with
4973 a special instruction. */
4974
4975 int
4976 standard_80387_constant_p (rtx x)
4977 {
4978 REAL_VALUE_TYPE r;
4979
4980 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4981 return -1;
4982
4983 if (x == CONST0_RTX (GET_MODE (x)))
4984 return 1;
4985 if (x == CONST1_RTX (GET_MODE (x)))
4986 return 2;
4987
4988 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4989
4990 /* For XFmode constants, try to find a special 80387 instruction when
4991 optimizing for size or on those CPUs that benefit from them. */
4992 if (GET_MODE (x) == XFmode
4993 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4994 {
4995 int i;
4996
4997 if (! ext_80387_constants_init)
4998 init_ext_80387_constants ();
4999
5000 for (i = 0; i < 5; i++)
5001 if (real_identical (&r, &ext_80387_constants_table[i]))
5002 return i + 3;
5003 }
5004
5005 /* Load of the constant -0.0 or -1.0 will be split as
5006 fldz;fchs or fld1;fchs sequence. */
5007 if (real_isnegzero (&r))
5008 return 8;
5009 if (real_identical (&r, &dconstm1))
5010 return 9;
5011
5012 return 0;
5013 }
5014
5015 /* Return the opcode of the special instruction to be used to load
5016 the constant X. */
5017
5018 const char *
5019 standard_80387_constant_opcode (rtx x)
5020 {
5021 switch (standard_80387_constant_p (x))
5022 {
5023 case 1:
5024 return "fldz";
5025 case 2:
5026 return "fld1";
5027 case 3:
5028 return "fldlg2";
5029 case 4:
5030 return "fldln2";
5031 case 5:
5032 return "fldl2e";
5033 case 6:
5034 return "fldl2t";
5035 case 7:
5036 return "fldpi";
5037 case 8:
5038 case 9:
5039 return "#";
5040 default:
5041 gcc_unreachable ();
5042 }
5043 }
5044
5045 /* Return the CONST_DOUBLE representing the 80387 constant that is
5046 loaded by the specified special instruction. The argument IDX
5047 matches the return value from standard_80387_constant_p. */
5048
5049 rtx
5050 standard_80387_constant_rtx (int idx)
5051 {
5052 int i;
5053
5054 if (! ext_80387_constants_init)
5055 init_ext_80387_constants ();
5056
5057 switch (idx)
5058 {
5059 case 3:
5060 case 4:
5061 case 5:
5062 case 6:
5063 case 7:
5064 i = idx - 3;
5065 break;
5066
5067 default:
5068 gcc_unreachable ();
5069 }
5070
5071 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5072 XFmode);
5073 }
5074
5075 /* Return 1 if mode is a valid mode for sse. */
5076 static int
5077 standard_sse_mode_p (enum machine_mode mode)
5078 {
5079 switch (mode)
5080 {
5081 case V16QImode:
5082 case V8HImode:
5083 case V4SImode:
5084 case V2DImode:
5085 case V4SFmode:
5086 case V2DFmode:
5087 return 1;
5088
5089 default:
5090 return 0;
5091 }
5092 }
5093
5094 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5095 */
5096 int
5097 standard_sse_constant_p (rtx x)
5098 {
5099 enum machine_mode mode = GET_MODE (x);
5100
5101 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5102 return 1;
5103 if (vector_all_ones_operand (x, mode)
5104 && standard_sse_mode_p (mode))
5105 return TARGET_SSE2 ? 2 : -1;
5106
5107 return 0;
5108 }
5109
5110 /* Return the opcode of the special instruction to be used to load
5111 the constant X. */
5112
5113 const char *
5114 standard_sse_constant_opcode (rtx insn, rtx x)
5115 {
5116 switch (standard_sse_constant_p (x))
5117 {
5118 case 1:
5119 if (get_attr_mode (insn) == MODE_V4SF)
5120 return "xorps\t%0, %0";
5121 else if (get_attr_mode (insn) == MODE_V2DF)
5122 return "xorpd\t%0, %0";
5123 else
5124 return "pxor\t%0, %0";
5125 case 2:
5126 return "pcmpeqd\t%0, %0";
5127 }
5128 gcc_unreachable ();
5129 }
5130
5131 /* Returns 1 if OP contains a symbol reference */
5132
5133 int
5134 symbolic_reference_mentioned_p (rtx op)
5135 {
5136 const char *fmt;
5137 int i;
5138
5139 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5140 return 1;
5141
5142 fmt = GET_RTX_FORMAT (GET_CODE (op));
5143 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5144 {
5145 if (fmt[i] == 'E')
5146 {
5147 int j;
5148
5149 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5150 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5151 return 1;
5152 }
5153
5154 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5155 return 1;
5156 }
5157
5158 return 0;
5159 }
5160
5161 /* Return 1 if it is appropriate to emit `ret' instructions in the
5162 body of a function. Do this only if the epilogue is simple, needing a
5163 couple of insns. Prior to reloading, we can't tell how many registers
5164 must be saved, so return 0 then. Return 0 if there is no frame
5165 marker to de-allocate. */
5166
5167 int
5168 ix86_can_use_return_insn_p (void)
5169 {
5170 struct ix86_frame frame;
5171
5172 if (! reload_completed || frame_pointer_needed)
5173 return 0;
5174
5175 /* Don't allow more than 32 pop, since that's all we can do
5176 with one instruction. */
5177 if (current_function_pops_args
5178 && current_function_args_size >= 32768)
5179 return 0;
5180
5181 ix86_compute_frame_layout (&frame);
5182 return frame.to_allocate == 0 && frame.nregs == 0;
5183 }
5184 \f
5185 /* Value should be nonzero if functions must have frame pointers.
5186 Zero means the frame pointer need not be set up (and parms may
5187 be accessed via the stack pointer) in functions that seem suitable. */
5188
5189 int
5190 ix86_frame_pointer_required (void)
5191 {
5192 /* If we accessed previous frames, then the generated code expects
5193 to be able to access the saved ebp value in our frame. */
5194 if (cfun->machine->accesses_prev_frame)
5195 return 1;
5196
5197 /* Several x86 os'es need a frame pointer for other reasons,
5198 usually pertaining to setjmp. */
5199 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5200 return 1;
5201
5202 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5203 the frame pointer by default. Turn it back on now if we've not
5204 got a leaf function. */
5205 if (TARGET_OMIT_LEAF_FRAME_POINTER
5206 && (!current_function_is_leaf
5207 || ix86_current_function_calls_tls_descriptor))
5208 return 1;
5209
5210 if (current_function_profile)
5211 return 1;
5212
5213 return 0;
5214 }
5215
5216 /* Record that the current function accesses previous call frames. */
5217
5218 void
5219 ix86_setup_frame_addresses (void)
5220 {
5221 cfun->machine->accesses_prev_frame = 1;
5222 }
5223 \f
5224 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5225 # define USE_HIDDEN_LINKONCE 1
5226 #else
5227 # define USE_HIDDEN_LINKONCE 0
5228 #endif
5229
5230 static int pic_labels_used;
5231
5232 /* Fills in the label name that should be used for a pc thunk for
5233 the given register. */
5234
5235 static void
5236 get_pc_thunk_name (char name[32], unsigned int regno)
5237 {
5238 gcc_assert (!TARGET_64BIT);
5239
5240 if (USE_HIDDEN_LINKONCE)
5241 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5242 else
5243 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5244 }
5245
5246
5247 /* This function generates code for -fpic that loads %ebx with
5248 the return address of the caller and then returns. */
5249
5250 void
5251 ix86_file_end (void)
5252 {
5253 rtx xops[2];
5254 int regno;
5255
5256 for (regno = 0; regno < 8; ++regno)
5257 {
5258 char name[32];
5259
5260 if (! ((pic_labels_used >> regno) & 1))
5261 continue;
5262
5263 get_pc_thunk_name (name, regno);
5264
5265 #if TARGET_MACHO
5266 if (TARGET_MACHO)
5267 {
5268 switch_to_section (darwin_sections[text_coal_section]);
5269 fputs ("\t.weak_definition\t", asm_out_file);
5270 assemble_name (asm_out_file, name);
5271 fputs ("\n\t.private_extern\t", asm_out_file);
5272 assemble_name (asm_out_file, name);
5273 fputs ("\n", asm_out_file);
5274 ASM_OUTPUT_LABEL (asm_out_file, name);
5275 }
5276 else
5277 #endif
5278 if (USE_HIDDEN_LINKONCE)
5279 {
5280 tree decl;
5281
5282 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5283 error_mark_node);
5284 TREE_PUBLIC (decl) = 1;
5285 TREE_STATIC (decl) = 1;
5286 DECL_ONE_ONLY (decl) = 1;
5287
5288 (*targetm.asm_out.unique_section) (decl, 0);
5289 switch_to_section (get_named_section (decl, NULL, 0));
5290
5291 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5292 fputs ("\t.hidden\t", asm_out_file);
5293 assemble_name (asm_out_file, name);
5294 fputc ('\n', asm_out_file);
5295 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5296 }
5297 else
5298 {
5299 switch_to_section (text_section);
5300 ASM_OUTPUT_LABEL (asm_out_file, name);
5301 }
5302
5303 xops[0] = gen_rtx_REG (SImode, regno);
5304 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5305 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5306 output_asm_insn ("ret", xops);
5307 }
5308
5309 if (NEED_INDICATE_EXEC_STACK)
5310 file_end_indicate_exec_stack ();
5311 }
5312
5313 /* Emit code for the SET_GOT patterns. */
5314
5315 const char *
5316 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5317 {
5318 rtx xops[3];
5319
5320 xops[0] = dest;
5321 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5322
5323 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5324 {
5325 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5326
5327 if (!flag_pic)
5328 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5329 else
5330 output_asm_insn ("call\t%a2", xops);
5331
5332 #if TARGET_MACHO
5333 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5334 is what will be referenced by the Mach-O PIC subsystem. */
5335 if (!label)
5336 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5337 #endif
5338
5339 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5340 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5341
5342 if (flag_pic)
5343 output_asm_insn ("pop{l}\t%0", xops);
5344 }
5345 else
5346 {
5347 char name[32];
5348 get_pc_thunk_name (name, REGNO (dest));
5349 pic_labels_used |= 1 << REGNO (dest);
5350
5351 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5352 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5353 output_asm_insn ("call\t%X2", xops);
5354 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5355 is what will be referenced by the Mach-O PIC subsystem. */
5356 #if TARGET_MACHO
5357 if (!label)
5358 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5359 else
5360 targetm.asm_out.internal_label (asm_out_file, "L",
5361 CODE_LABEL_NUMBER (label));
5362 #endif
5363 }
5364
5365 if (TARGET_MACHO)
5366 return "";
5367
5368 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5369 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5370 else
5371 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5372
5373 return "";
5374 }
5375
5376 /* Generate an "push" pattern for input ARG. */
5377
5378 static rtx
5379 gen_push (rtx arg)
5380 {
5381 return gen_rtx_SET (VOIDmode,
5382 gen_rtx_MEM (Pmode,
5383 gen_rtx_PRE_DEC (Pmode,
5384 stack_pointer_rtx)),
5385 arg);
5386 }
5387
5388 /* Return >= 0 if there is an unused call-clobbered register available
5389 for the entire function. */
5390
5391 static unsigned int
5392 ix86_select_alt_pic_regnum (void)
5393 {
5394 if (current_function_is_leaf && !current_function_profile
5395 && !ix86_current_function_calls_tls_descriptor)
5396 {
5397 int i;
5398 for (i = 2; i >= 0; --i)
5399 if (!regs_ever_live[i])
5400 return i;
5401 }
5402
5403 return INVALID_REGNUM;
5404 }
5405
5406 /* Return 1 if we need to save REGNO. */
5407 static int
5408 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5409 {
5410 if (pic_offset_table_rtx
5411 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5412 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5413 || current_function_profile
5414 || current_function_calls_eh_return
5415 || current_function_uses_const_pool))
5416 {
5417 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5418 return 0;
5419 return 1;
5420 }
5421
5422 if (current_function_calls_eh_return && maybe_eh_return)
5423 {
5424 unsigned i;
5425 for (i = 0; ; i++)
5426 {
5427 unsigned test = EH_RETURN_DATA_REGNO (i);
5428 if (test == INVALID_REGNUM)
5429 break;
5430 if (test == regno)
5431 return 1;
5432 }
5433 }
5434
5435 if (cfun->machine->force_align_arg_pointer
5436 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5437 return 1;
5438
5439 return (regs_ever_live[regno]
5440 && !call_used_regs[regno]
5441 && !fixed_regs[regno]
5442 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5443 }
5444
5445 /* Return number of registers to be saved on the stack. */
5446
5447 static int
5448 ix86_nsaved_regs (void)
5449 {
5450 int nregs = 0;
5451 int regno;
5452
5453 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5454 if (ix86_save_reg (regno, true))
5455 nregs++;
5456 return nregs;
5457 }
5458
5459 /* Return the offset between two registers, one to be eliminated, and the other
5460 its replacement, at the start of a routine. */
5461
5462 HOST_WIDE_INT
5463 ix86_initial_elimination_offset (int from, int to)
5464 {
5465 struct ix86_frame frame;
5466 ix86_compute_frame_layout (&frame);
5467
5468 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5469 return frame.hard_frame_pointer_offset;
5470 else if (from == FRAME_POINTER_REGNUM
5471 && to == HARD_FRAME_POINTER_REGNUM)
5472 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5473 else
5474 {
5475 gcc_assert (to == STACK_POINTER_REGNUM);
5476
5477 if (from == ARG_POINTER_REGNUM)
5478 return frame.stack_pointer_offset;
5479
5480 gcc_assert (from == FRAME_POINTER_REGNUM);
5481 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5482 }
5483 }
5484
5485 /* Fill structure ix86_frame about frame of currently computed function. */
5486
5487 static void
5488 ix86_compute_frame_layout (struct ix86_frame *frame)
5489 {
5490 HOST_WIDE_INT total_size;
5491 unsigned int stack_alignment_needed;
5492 HOST_WIDE_INT offset;
5493 unsigned int preferred_alignment;
5494 HOST_WIDE_INT size = get_frame_size ();
5495
5496 frame->nregs = ix86_nsaved_regs ();
5497 total_size = size;
5498
5499 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5500 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5501
5502 /* During reload iteration the amount of registers saved can change.
5503 Recompute the value as needed. Do not recompute when amount of registers
5504 didn't change as reload does multiple calls to the function and does not
5505 expect the decision to change within single iteration. */
5506 if (!optimize_size
5507 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5508 {
5509 int count = frame->nregs;
5510
5511 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5512 /* The fast prologue uses move instead of push to save registers. This
5513 is significantly longer, but also executes faster as modern hardware
5514 can execute the moves in parallel, but can't do that for push/pop.
5515
5516 Be careful about choosing what prologue to emit: When function takes
5517 many instructions to execute we may use slow version as well as in
5518 case function is known to be outside hot spot (this is known with
5519 feedback only). Weight the size of function by number of registers
5520 to save as it is cheap to use one or two push instructions but very
5521 slow to use many of them. */
5522 if (count)
5523 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5524 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5525 || (flag_branch_probabilities
5526 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5527 cfun->machine->use_fast_prologue_epilogue = false;
5528 else
5529 cfun->machine->use_fast_prologue_epilogue
5530 = !expensive_function_p (count);
5531 }
5532 if (TARGET_PROLOGUE_USING_MOVE
5533 && cfun->machine->use_fast_prologue_epilogue)
5534 frame->save_regs_using_mov = true;
5535 else
5536 frame->save_regs_using_mov = false;
5537
5538
5539 /* Skip return address and saved base pointer. */
5540 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5541
5542 frame->hard_frame_pointer_offset = offset;
5543
5544 /* Do some sanity checking of stack_alignment_needed and
5545 preferred_alignment, since i386 port is the only using those features
5546 that may break easily. */
5547
5548 gcc_assert (!size || stack_alignment_needed);
5549 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5550 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5551 gcc_assert (stack_alignment_needed
5552 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5553
5554 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5555 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5556
5557 /* Register save area */
5558 offset += frame->nregs * UNITS_PER_WORD;
5559
5560 /* Va-arg area */
5561 if (ix86_save_varrargs_registers)
5562 {
5563 offset += X86_64_VARARGS_SIZE;
5564 frame->va_arg_size = X86_64_VARARGS_SIZE;
5565 }
5566 else
5567 frame->va_arg_size = 0;
5568
5569 /* Align start of frame for local function. */
5570 frame->padding1 = ((offset + stack_alignment_needed - 1)
5571 & -stack_alignment_needed) - offset;
5572
5573 offset += frame->padding1;
5574
5575 /* Frame pointer points here. */
5576 frame->frame_pointer_offset = offset;
5577
5578 offset += size;
5579
5580 /* Add outgoing arguments area. Can be skipped if we eliminated
5581 all the function calls as dead code.
5582 Skipping is however impossible when function calls alloca. Alloca
5583 expander assumes that last current_function_outgoing_args_size
5584 of stack frame are unused. */
5585 if (ACCUMULATE_OUTGOING_ARGS
5586 && (!current_function_is_leaf || current_function_calls_alloca
5587 || ix86_current_function_calls_tls_descriptor))
5588 {
5589 offset += current_function_outgoing_args_size;
5590 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5591 }
5592 else
5593 frame->outgoing_arguments_size = 0;
5594
5595 /* Align stack boundary. Only needed if we're calling another function
5596 or using alloca. */
5597 if (!current_function_is_leaf || current_function_calls_alloca
5598 || ix86_current_function_calls_tls_descriptor)
5599 frame->padding2 = ((offset + preferred_alignment - 1)
5600 & -preferred_alignment) - offset;
5601 else
5602 frame->padding2 = 0;
5603
5604 offset += frame->padding2;
5605
5606 /* We've reached end of stack frame. */
5607 frame->stack_pointer_offset = offset;
5608
5609 /* Size prologue needs to allocate. */
5610 frame->to_allocate =
5611 (size + frame->padding1 + frame->padding2
5612 + frame->outgoing_arguments_size + frame->va_arg_size);
5613
5614 if ((!frame->to_allocate && frame->nregs <= 1)
5615 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5616 frame->save_regs_using_mov = false;
5617
5618 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5619 && current_function_is_leaf
5620 && !ix86_current_function_calls_tls_descriptor)
5621 {
5622 frame->red_zone_size = frame->to_allocate;
5623 if (frame->save_regs_using_mov)
5624 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5625 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5626 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5627 }
5628 else
5629 frame->red_zone_size = 0;
5630 frame->to_allocate -= frame->red_zone_size;
5631 frame->stack_pointer_offset -= frame->red_zone_size;
5632 #if 0
5633 fprintf (stderr, "\n");
5634 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5635 fprintf (stderr, "size: %ld\n", (long)size);
5636 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5637 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5638 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5639 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5640 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5641 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5642 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5643 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5644 (long)frame->hard_frame_pointer_offset);
5645 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5646 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5647 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5648 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5649 #endif
5650 }
5651
5652 /* Emit code to save registers in the prologue. */
5653
5654 static void
5655 ix86_emit_save_regs (void)
5656 {
5657 unsigned int regno;
5658 rtx insn;
5659
5660 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5661 if (ix86_save_reg (regno, true))
5662 {
5663 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5664 RTX_FRAME_RELATED_P (insn) = 1;
5665 }
5666 }
5667
5668 /* Emit code to save registers using MOV insns. First register
5669 is restored from POINTER + OFFSET. */
5670 static void
5671 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5672 {
5673 unsigned int regno;
5674 rtx insn;
5675
5676 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5677 if (ix86_save_reg (regno, true))
5678 {
5679 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5680 Pmode, offset),
5681 gen_rtx_REG (Pmode, regno));
5682 RTX_FRAME_RELATED_P (insn) = 1;
5683 offset += UNITS_PER_WORD;
5684 }
5685 }
5686
5687 /* Expand prologue or epilogue stack adjustment.
5688 The pattern exist to put a dependency on all ebp-based memory accesses.
5689 STYLE should be negative if instructions should be marked as frame related,
5690 zero if %r11 register is live and cannot be freely used and positive
5691 otherwise. */
5692
5693 static void
5694 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5695 {
5696 rtx insn;
5697
5698 if (! TARGET_64BIT)
5699 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5700 else if (x86_64_immediate_operand (offset, DImode))
5701 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5702 else
5703 {
5704 rtx r11;
5705 /* r11 is used by indirect sibcall return as well, set before the
5706 epilogue and used after the epilogue. ATM indirect sibcall
5707 shouldn't be used together with huge frame sizes in one
5708 function because of the frame_size check in sibcall.c. */
5709 gcc_assert (style);
5710 r11 = gen_rtx_REG (DImode, R11_REG);
5711 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5712 if (style < 0)
5713 RTX_FRAME_RELATED_P (insn) = 1;
5714 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5715 offset));
5716 }
5717 if (style < 0)
5718 RTX_FRAME_RELATED_P (insn) = 1;
5719 }
5720
5721 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5722
5723 static rtx
5724 ix86_internal_arg_pointer (void)
5725 {
5726 bool has_force_align_arg_pointer =
5727 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5728 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5729 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5730 && DECL_NAME (current_function_decl)
5731 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5732 && DECL_FILE_SCOPE_P (current_function_decl))
5733 || ix86_force_align_arg_pointer
5734 || has_force_align_arg_pointer)
5735 {
5736 /* Nested functions can't realign the stack due to a register
5737 conflict. */
5738 if (DECL_CONTEXT (current_function_decl)
5739 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5740 {
5741 if (ix86_force_align_arg_pointer)
5742 warning (0, "-mstackrealign ignored for nested functions");
5743 if (has_force_align_arg_pointer)
5744 error ("%s not supported for nested functions",
5745 ix86_force_align_arg_pointer_string);
5746 return virtual_incoming_args_rtx;
5747 }
5748 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5749 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5750 }
5751 else
5752 return virtual_incoming_args_rtx;
5753 }
5754
5755 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5756 This is called from dwarf2out.c to emit call frame instructions
5757 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5758 static void
5759 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5760 {
5761 rtx unspec = SET_SRC (pattern);
5762 gcc_assert (GET_CODE (unspec) == UNSPEC);
5763
5764 switch (index)
5765 {
5766 case UNSPEC_REG_SAVE:
5767 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5768 SET_DEST (pattern));
5769 break;
5770 case UNSPEC_DEF_CFA:
5771 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5772 INTVAL (XVECEXP (unspec, 0, 0)));
5773 break;
5774 default:
5775 gcc_unreachable ();
5776 }
5777 }
5778
5779 /* Expand the prologue into a bunch of separate insns. */
5780
5781 void
5782 ix86_expand_prologue (void)
5783 {
5784 rtx insn;
5785 bool pic_reg_used;
5786 struct ix86_frame frame;
5787 HOST_WIDE_INT allocate;
5788
5789 ix86_compute_frame_layout (&frame);
5790
5791 if (cfun->machine->force_align_arg_pointer)
5792 {
5793 rtx x, y;
5794
5795 /* Grab the argument pointer. */
5796 x = plus_constant (stack_pointer_rtx, 4);
5797 y = cfun->machine->force_align_arg_pointer;
5798 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5799 RTX_FRAME_RELATED_P (insn) = 1;
5800
5801 /* The unwind info consists of two parts: install the fafp as the cfa,
5802 and record the fafp as the "save register" of the stack pointer.
5803 The later is there in order that the unwinder can see where it
5804 should restore the stack pointer across the and insn. */
5805 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5806 x = gen_rtx_SET (VOIDmode, y, x);
5807 RTX_FRAME_RELATED_P (x) = 1;
5808 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5809 UNSPEC_REG_SAVE);
5810 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5811 RTX_FRAME_RELATED_P (y) = 1;
5812 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5813 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5814 REG_NOTES (insn) = x;
5815
5816 /* Align the stack. */
5817 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5818 GEN_INT (-16)));
5819
5820 /* And here we cheat like madmen with the unwind info. We force the
5821 cfa register back to sp+4, which is exactly what it was at the
5822 start of the function. Re-pushing the return address results in
5823 the return at the same spot relative to the cfa, and thus is
5824 correct wrt the unwind info. */
5825 x = cfun->machine->force_align_arg_pointer;
5826 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5827 insn = emit_insn (gen_push (x));
5828 RTX_FRAME_RELATED_P (insn) = 1;
5829
5830 x = GEN_INT (4);
5831 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5832 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5833 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5834 REG_NOTES (insn) = x;
5835 }
5836
5837 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5838 slower on all targets. Also sdb doesn't like it. */
5839
5840 if (frame_pointer_needed)
5841 {
5842 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5843 RTX_FRAME_RELATED_P (insn) = 1;
5844
5845 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5846 RTX_FRAME_RELATED_P (insn) = 1;
5847 }
5848
5849 allocate = frame.to_allocate;
5850
5851 if (!frame.save_regs_using_mov)
5852 ix86_emit_save_regs ();
5853 else
5854 allocate += frame.nregs * UNITS_PER_WORD;
5855
5856 /* When using red zone we may start register saving before allocating
5857 the stack frame saving one cycle of the prologue. */
5858 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5859 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5860 : stack_pointer_rtx,
5861 -frame.nregs * UNITS_PER_WORD);
5862
5863 if (allocate == 0)
5864 ;
5865 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5866 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5867 GEN_INT (-allocate), -1);
5868 else
5869 {
5870 /* Only valid for Win32. */
5871 rtx eax = gen_rtx_REG (SImode, 0);
5872 bool eax_live = ix86_eax_live_at_start_p ();
5873 rtx t;
5874
5875 gcc_assert (!TARGET_64BIT);
5876
5877 if (eax_live)
5878 {
5879 emit_insn (gen_push (eax));
5880 allocate -= 4;
5881 }
5882
5883 emit_move_insn (eax, GEN_INT (allocate));
5884
5885 insn = emit_insn (gen_allocate_stack_worker (eax));
5886 RTX_FRAME_RELATED_P (insn) = 1;
5887 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5888 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5889 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5890 t, REG_NOTES (insn));
5891
5892 if (eax_live)
5893 {
5894 if (frame_pointer_needed)
5895 t = plus_constant (hard_frame_pointer_rtx,
5896 allocate
5897 - frame.to_allocate
5898 - frame.nregs * UNITS_PER_WORD);
5899 else
5900 t = plus_constant (stack_pointer_rtx, allocate);
5901 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5902 }
5903 }
5904
5905 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5906 {
5907 if (!frame_pointer_needed || !frame.to_allocate)
5908 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5909 else
5910 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5911 -frame.nregs * UNITS_PER_WORD);
5912 }
5913
5914 pic_reg_used = false;
5915 if (pic_offset_table_rtx
5916 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5917 || current_function_profile))
5918 {
5919 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5920
5921 if (alt_pic_reg_used != INVALID_REGNUM)
5922 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5923
5924 pic_reg_used = true;
5925 }
5926
5927 if (pic_reg_used)
5928 {
5929 if (TARGET_64BIT)
5930 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5931 else
5932 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5933
5934 /* Even with accurate pre-reload life analysis, we can wind up
5935 deleting all references to the pic register after reload.
5936 Consider if cross-jumping unifies two sides of a branch
5937 controlled by a comparison vs the only read from a global.
5938 In which case, allow the set_got to be deleted, though we're
5939 too late to do anything about the ebx save in the prologue. */
5940 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5941 }
5942
5943 /* Prevent function calls from be scheduled before the call to mcount.
5944 In the pic_reg_used case, make sure that the got load isn't deleted. */
5945 if (current_function_profile)
5946 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5947 }
5948
5949 /* Emit code to restore saved registers using MOV insns. First register
5950 is restored from POINTER + OFFSET. */
5951 static void
5952 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5953 int maybe_eh_return)
5954 {
5955 int regno;
5956 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5957
5958 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5959 if (ix86_save_reg (regno, maybe_eh_return))
5960 {
5961 /* Ensure that adjust_address won't be forced to produce pointer
5962 out of range allowed by x86-64 instruction set. */
5963 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5964 {
5965 rtx r11;
5966
5967 r11 = gen_rtx_REG (DImode, R11_REG);
5968 emit_move_insn (r11, GEN_INT (offset));
5969 emit_insn (gen_adddi3 (r11, r11, pointer));
5970 base_address = gen_rtx_MEM (Pmode, r11);
5971 offset = 0;
5972 }
5973 emit_move_insn (gen_rtx_REG (Pmode, regno),
5974 adjust_address (base_address, Pmode, offset));
5975 offset += UNITS_PER_WORD;
5976 }
5977 }
5978
5979 /* Restore function stack, frame, and registers. */
5980
5981 void
5982 ix86_expand_epilogue (int style)
5983 {
5984 int regno;
5985 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5986 struct ix86_frame frame;
5987 HOST_WIDE_INT offset;
5988
5989 ix86_compute_frame_layout (&frame);
5990
5991 /* Calculate start of saved registers relative to ebp. Special care
5992 must be taken for the normal return case of a function using
5993 eh_return: the eax and edx registers are marked as saved, but not
5994 restored along this path. */
5995 offset = frame.nregs;
5996 if (current_function_calls_eh_return && style != 2)
5997 offset -= 2;
5998 offset *= -UNITS_PER_WORD;
5999
6000 /* If we're only restoring one register and sp is not valid then
6001 using a move instruction to restore the register since it's
6002 less work than reloading sp and popping the register.
6003
6004 The default code result in stack adjustment using add/lea instruction,
6005 while this code results in LEAVE instruction (or discrete equivalent),
6006 so it is profitable in some other cases as well. Especially when there
6007 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6008 and there is exactly one register to pop. This heuristic may need some
6009 tuning in future. */
6010 if ((!sp_valid && frame.nregs <= 1)
6011 || (TARGET_EPILOGUE_USING_MOVE
6012 && cfun->machine->use_fast_prologue_epilogue
6013 && (frame.nregs > 1 || frame.to_allocate))
6014 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6015 || (frame_pointer_needed && TARGET_USE_LEAVE
6016 && cfun->machine->use_fast_prologue_epilogue
6017 && frame.nregs == 1)
6018 || current_function_calls_eh_return)
6019 {
6020 /* Restore registers. We can use ebp or esp to address the memory
6021 locations. If both are available, default to ebp, since offsets
6022 are known to be small. Only exception is esp pointing directly to the
6023 end of block of saved registers, where we may simplify addressing
6024 mode. */
6025
6026 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6027 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6028 frame.to_allocate, style == 2);
6029 else
6030 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6031 offset, style == 2);
6032
6033 /* eh_return epilogues need %ecx added to the stack pointer. */
6034 if (style == 2)
6035 {
6036 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6037
6038 if (frame_pointer_needed)
6039 {
6040 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6041 tmp = plus_constant (tmp, UNITS_PER_WORD);
6042 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6043
6044 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6045 emit_move_insn (hard_frame_pointer_rtx, tmp);
6046
6047 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6048 const0_rtx, style);
6049 }
6050 else
6051 {
6052 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6053 tmp = plus_constant (tmp, (frame.to_allocate
6054 + frame.nregs * UNITS_PER_WORD));
6055 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6056 }
6057 }
6058 else if (!frame_pointer_needed)
6059 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6060 GEN_INT (frame.to_allocate
6061 + frame.nregs * UNITS_PER_WORD),
6062 style);
6063 /* If not an i386, mov & pop is faster than "leave". */
6064 else if (TARGET_USE_LEAVE || optimize_size
6065 || !cfun->machine->use_fast_prologue_epilogue)
6066 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6067 else
6068 {
6069 pro_epilogue_adjust_stack (stack_pointer_rtx,
6070 hard_frame_pointer_rtx,
6071 const0_rtx, style);
6072 if (TARGET_64BIT)
6073 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6074 else
6075 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6076 }
6077 }
6078 else
6079 {
6080 /* First step is to deallocate the stack frame so that we can
6081 pop the registers. */
6082 if (!sp_valid)
6083 {
6084 gcc_assert (frame_pointer_needed);
6085 pro_epilogue_adjust_stack (stack_pointer_rtx,
6086 hard_frame_pointer_rtx,
6087 GEN_INT (offset), style);
6088 }
6089 else if (frame.to_allocate)
6090 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6091 GEN_INT (frame.to_allocate), style);
6092
6093 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6094 if (ix86_save_reg (regno, false))
6095 {
6096 if (TARGET_64BIT)
6097 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6098 else
6099 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6100 }
6101 if (frame_pointer_needed)
6102 {
6103 /* Leave results in shorter dependency chains on CPUs that are
6104 able to grok it fast. */
6105 if (TARGET_USE_LEAVE)
6106 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6107 else if (TARGET_64BIT)
6108 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6109 else
6110 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6111 }
6112 }
6113
6114 if (cfun->machine->force_align_arg_pointer)
6115 {
6116 emit_insn (gen_addsi3 (stack_pointer_rtx,
6117 cfun->machine->force_align_arg_pointer,
6118 GEN_INT (-4)));
6119 }
6120
6121 /* Sibcall epilogues don't want a return instruction. */
6122 if (style == 0)
6123 return;
6124
6125 if (current_function_pops_args && current_function_args_size)
6126 {
6127 rtx popc = GEN_INT (current_function_pops_args);
6128
6129 /* i386 can only pop 64K bytes. If asked to pop more, pop
6130 return address, do explicit add, and jump indirectly to the
6131 caller. */
6132
6133 if (current_function_pops_args >= 65536)
6134 {
6135 rtx ecx = gen_rtx_REG (SImode, 2);
6136
6137 /* There is no "pascal" calling convention in 64bit ABI. */
6138 gcc_assert (!TARGET_64BIT);
6139
6140 emit_insn (gen_popsi1 (ecx));
6141 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6142 emit_jump_insn (gen_return_indirect_internal (ecx));
6143 }
6144 else
6145 emit_jump_insn (gen_return_pop_internal (popc));
6146 }
6147 else
6148 emit_jump_insn (gen_return_internal ());
6149 }
6150
6151 /* Reset from the function's potential modifications. */
6152
6153 static void
6154 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6155 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6156 {
6157 if (pic_offset_table_rtx)
6158 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6159 #if TARGET_MACHO
6160 /* Mach-O doesn't support labels at the end of objects, so if
6161 it looks like we might want one, insert a NOP. */
6162 {
6163 rtx insn = get_last_insn ();
6164 while (insn
6165 && NOTE_P (insn)
6166 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6167 insn = PREV_INSN (insn);
6168 if (insn
6169 && (LABEL_P (insn)
6170 || (NOTE_P (insn)
6171 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6172 fputs ("\tnop\n", file);
6173 }
6174 #endif
6175
6176 }
6177 \f
6178 /* Extract the parts of an RTL expression that is a valid memory address
6179 for an instruction. Return 0 if the structure of the address is
6180 grossly off. Return -1 if the address contains ASHIFT, so it is not
6181 strictly valid, but still used for computing length of lea instruction. */
6182
6183 int
6184 ix86_decompose_address (rtx addr, struct ix86_address *out)
6185 {
6186 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6187 rtx base_reg, index_reg;
6188 HOST_WIDE_INT scale = 1;
6189 rtx scale_rtx = NULL_RTX;
6190 int retval = 1;
6191 enum ix86_address_seg seg = SEG_DEFAULT;
6192
6193 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6194 base = addr;
6195 else if (GET_CODE (addr) == PLUS)
6196 {
6197 rtx addends[4], op;
6198 int n = 0, i;
6199
6200 op = addr;
6201 do
6202 {
6203 if (n >= 4)
6204 return 0;
6205 addends[n++] = XEXP (op, 1);
6206 op = XEXP (op, 0);
6207 }
6208 while (GET_CODE (op) == PLUS);
6209 if (n >= 4)
6210 return 0;
6211 addends[n] = op;
6212
6213 for (i = n; i >= 0; --i)
6214 {
6215 op = addends[i];
6216 switch (GET_CODE (op))
6217 {
6218 case MULT:
6219 if (index)
6220 return 0;
6221 index = XEXP (op, 0);
6222 scale_rtx = XEXP (op, 1);
6223 break;
6224
6225 case UNSPEC:
6226 if (XINT (op, 1) == UNSPEC_TP
6227 && TARGET_TLS_DIRECT_SEG_REFS
6228 && seg == SEG_DEFAULT)
6229 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6230 else
6231 return 0;
6232 break;
6233
6234 case REG:
6235 case SUBREG:
6236 if (!base)
6237 base = op;
6238 else if (!index)
6239 index = op;
6240 else
6241 return 0;
6242 break;
6243
6244 case CONST:
6245 case CONST_INT:
6246 case SYMBOL_REF:
6247 case LABEL_REF:
6248 if (disp)
6249 return 0;
6250 disp = op;
6251 break;
6252
6253 default:
6254 return 0;
6255 }
6256 }
6257 }
6258 else if (GET_CODE (addr) == MULT)
6259 {
6260 index = XEXP (addr, 0); /* index*scale */
6261 scale_rtx = XEXP (addr, 1);
6262 }
6263 else if (GET_CODE (addr) == ASHIFT)
6264 {
6265 rtx tmp;
6266
6267 /* We're called for lea too, which implements ashift on occasion. */
6268 index = XEXP (addr, 0);
6269 tmp = XEXP (addr, 1);
6270 if (!CONST_INT_P (tmp))
6271 return 0;
6272 scale = INTVAL (tmp);
6273 if ((unsigned HOST_WIDE_INT) scale > 3)
6274 return 0;
6275 scale = 1 << scale;
6276 retval = -1;
6277 }
6278 else
6279 disp = addr; /* displacement */
6280
6281 /* Extract the integral value of scale. */
6282 if (scale_rtx)
6283 {
6284 if (!CONST_INT_P (scale_rtx))
6285 return 0;
6286 scale = INTVAL (scale_rtx);
6287 }
6288
6289 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6290 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6291
6292 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6293 if (base_reg && index_reg && scale == 1
6294 && (index_reg == arg_pointer_rtx
6295 || index_reg == frame_pointer_rtx
6296 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6297 {
6298 rtx tmp;
6299 tmp = base, base = index, index = tmp;
6300 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6301 }
6302
6303 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6304 if ((base_reg == hard_frame_pointer_rtx
6305 || base_reg == frame_pointer_rtx
6306 || base_reg == arg_pointer_rtx) && !disp)
6307 disp = const0_rtx;
6308
6309 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6310 Avoid this by transforming to [%esi+0]. */
6311 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6312 && base_reg && !index_reg && !disp
6313 && REG_P (base_reg)
6314 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6315 disp = const0_rtx;
6316
6317 /* Special case: encode reg+reg instead of reg*2. */
6318 if (!base && index && scale && scale == 2)
6319 base = index, base_reg = index_reg, scale = 1;
6320
6321 /* Special case: scaling cannot be encoded without base or displacement. */
6322 if (!base && !disp && index && scale != 1)
6323 disp = const0_rtx;
6324
6325 out->base = base;
6326 out->index = index;
6327 out->disp = disp;
6328 out->scale = scale;
6329 out->seg = seg;
6330
6331 return retval;
6332 }
6333 \f
6334 /* Return cost of the memory address x.
6335 For i386, it is better to use a complex address than let gcc copy
6336 the address into a reg and make a new pseudo. But not if the address
6337 requires to two regs - that would mean more pseudos with longer
6338 lifetimes. */
6339 static int
6340 ix86_address_cost (rtx x)
6341 {
6342 struct ix86_address parts;
6343 int cost = 1;
6344 int ok = ix86_decompose_address (x, &parts);
6345
6346 gcc_assert (ok);
6347
6348 if (parts.base && GET_CODE (parts.base) == SUBREG)
6349 parts.base = SUBREG_REG (parts.base);
6350 if (parts.index && GET_CODE (parts.index) == SUBREG)
6351 parts.index = SUBREG_REG (parts.index);
6352
6353 /* More complex memory references are better. */
6354 if (parts.disp && parts.disp != const0_rtx)
6355 cost--;
6356 if (parts.seg != SEG_DEFAULT)
6357 cost--;
6358
6359 /* Attempt to minimize number of registers in the address. */
6360 if ((parts.base
6361 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6362 || (parts.index
6363 && (!REG_P (parts.index)
6364 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6365 cost++;
6366
6367 if (parts.base
6368 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6369 && parts.index
6370 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6371 && parts.base != parts.index)
6372 cost++;
6373
6374 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6375 since it's predecode logic can't detect the length of instructions
6376 and it degenerates to vector decoded. Increase cost of such
6377 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6378 to split such addresses or even refuse such addresses at all.
6379
6380 Following addressing modes are affected:
6381 [base+scale*index]
6382 [scale*index+disp]
6383 [base+index]
6384
6385 The first and last case may be avoidable by explicitly coding the zero in
6386 memory address, but I don't have AMD-K6 machine handy to check this
6387 theory. */
6388
6389 if (TARGET_K6
6390 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6391 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6392 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6393 cost += 10;
6394
6395 return cost;
6396 }
6397 \f
6398 /* If X is a machine specific address (i.e. a symbol or label being
6399 referenced as a displacement from the GOT implemented using an
6400 UNSPEC), then return the base term. Otherwise return X. */
6401
6402 rtx
6403 ix86_find_base_term (rtx x)
6404 {
6405 rtx term;
6406
6407 if (TARGET_64BIT)
6408 {
6409 if (GET_CODE (x) != CONST)
6410 return x;
6411 term = XEXP (x, 0);
6412 if (GET_CODE (term) == PLUS
6413 && (CONST_INT_P (XEXP (term, 1))
6414 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6415 term = XEXP (term, 0);
6416 if (GET_CODE (term) != UNSPEC
6417 || XINT (term, 1) != UNSPEC_GOTPCREL)
6418 return x;
6419
6420 term = XVECEXP (term, 0, 0);
6421
6422 if (GET_CODE (term) != SYMBOL_REF
6423 && GET_CODE (term) != LABEL_REF)
6424 return x;
6425
6426 return term;
6427 }
6428
6429 term = ix86_delegitimize_address (x);
6430
6431 if (GET_CODE (term) != SYMBOL_REF
6432 && GET_CODE (term) != LABEL_REF)
6433 return x;
6434
6435 return term;
6436 }
6437
6438 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6439 this is used for to form addresses to local data when -fPIC is in
6440 use. */
6441
6442 static bool
6443 darwin_local_data_pic (rtx disp)
6444 {
6445 if (GET_CODE (disp) == MINUS)
6446 {
6447 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6448 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6449 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6450 {
6451 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6452 if (! strcmp (sym_name, "<pic base>"))
6453 return true;
6454 }
6455 }
6456
6457 return false;
6458 }
6459 \f
6460 /* Determine if a given RTX is a valid constant. We already know this
6461 satisfies CONSTANT_P. */
6462
6463 bool
6464 legitimate_constant_p (rtx x)
6465 {
6466 switch (GET_CODE (x))
6467 {
6468 case CONST:
6469 x = XEXP (x, 0);
6470
6471 if (GET_CODE (x) == PLUS)
6472 {
6473 if (!CONST_INT_P (XEXP (x, 1)))
6474 return false;
6475 x = XEXP (x, 0);
6476 }
6477
6478 if (TARGET_MACHO && darwin_local_data_pic (x))
6479 return true;
6480
6481 /* Only some unspecs are valid as "constants". */
6482 if (GET_CODE (x) == UNSPEC)
6483 switch (XINT (x, 1))
6484 {
6485 case UNSPEC_GOTOFF:
6486 return TARGET_64BIT;
6487 case UNSPEC_TPOFF:
6488 case UNSPEC_NTPOFF:
6489 x = XVECEXP (x, 0, 0);
6490 return (GET_CODE (x) == SYMBOL_REF
6491 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6492 case UNSPEC_DTPOFF:
6493 x = XVECEXP (x, 0, 0);
6494 return (GET_CODE (x) == SYMBOL_REF
6495 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6496 default:
6497 return false;
6498 }
6499
6500 /* We must have drilled down to a symbol. */
6501 if (GET_CODE (x) == LABEL_REF)
6502 return true;
6503 if (GET_CODE (x) != SYMBOL_REF)
6504 return false;
6505 /* FALLTHRU */
6506
6507 case SYMBOL_REF:
6508 /* TLS symbols are never valid. */
6509 if (SYMBOL_REF_TLS_MODEL (x))
6510 return false;
6511 break;
6512
6513 case CONST_DOUBLE:
6514 if (GET_MODE (x) == TImode
6515 && x != CONST0_RTX (TImode)
6516 && !TARGET_64BIT)
6517 return false;
6518 break;
6519
6520 case CONST_VECTOR:
6521 if (x == CONST0_RTX (GET_MODE (x)))
6522 return true;
6523 return false;
6524
6525 default:
6526 break;
6527 }
6528
6529 /* Otherwise we handle everything else in the move patterns. */
6530 return true;
6531 }
6532
6533 /* Determine if it's legal to put X into the constant pool. This
6534 is not possible for the address of thread-local symbols, which
6535 is checked above. */
6536
6537 static bool
6538 ix86_cannot_force_const_mem (rtx x)
6539 {
6540 /* We can always put integral constants and vectors in memory. */
6541 switch (GET_CODE (x))
6542 {
6543 case CONST_INT:
6544 case CONST_DOUBLE:
6545 case CONST_VECTOR:
6546 return false;
6547
6548 default:
6549 break;
6550 }
6551 return !legitimate_constant_p (x);
6552 }
6553
6554 /* Determine if a given RTX is a valid constant address. */
6555
6556 bool
6557 constant_address_p (rtx x)
6558 {
6559 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6560 }
6561
6562 /* Nonzero if the constant value X is a legitimate general operand
6563 when generating PIC code. It is given that flag_pic is on and
6564 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6565
6566 bool
6567 legitimate_pic_operand_p (rtx x)
6568 {
6569 rtx inner;
6570
6571 switch (GET_CODE (x))
6572 {
6573 case CONST:
6574 inner = XEXP (x, 0);
6575 if (GET_CODE (inner) == PLUS
6576 && CONST_INT_P (XEXP (inner, 1)))
6577 inner = XEXP (inner, 0);
6578
6579 /* Only some unspecs are valid as "constants". */
6580 if (GET_CODE (inner) == UNSPEC)
6581 switch (XINT (inner, 1))
6582 {
6583 case UNSPEC_GOTOFF:
6584 return TARGET_64BIT;
6585 case UNSPEC_TPOFF:
6586 x = XVECEXP (inner, 0, 0);
6587 return (GET_CODE (x) == SYMBOL_REF
6588 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6589 default:
6590 return false;
6591 }
6592 /* FALLTHRU */
6593
6594 case SYMBOL_REF:
6595 case LABEL_REF:
6596 return legitimate_pic_address_disp_p (x);
6597
6598 default:
6599 return true;
6600 }
6601 }
6602
6603 /* Determine if a given CONST RTX is a valid memory displacement
6604 in PIC mode. */
6605
6606 int
6607 legitimate_pic_address_disp_p (rtx disp)
6608 {
6609 bool saw_plus;
6610
6611 /* In 64bit mode we can allow direct addresses of symbols and labels
6612 when they are not dynamic symbols. */
6613 if (TARGET_64BIT)
6614 {
6615 rtx op0 = disp, op1;
6616
6617 switch (GET_CODE (disp))
6618 {
6619 case LABEL_REF:
6620 return true;
6621
6622 case CONST:
6623 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6624 break;
6625 op0 = XEXP (XEXP (disp, 0), 0);
6626 op1 = XEXP (XEXP (disp, 0), 1);
6627 if (!CONST_INT_P (op1)
6628 || INTVAL (op1) >= 16*1024*1024
6629 || INTVAL (op1) < -16*1024*1024)
6630 break;
6631 if (GET_CODE (op0) == LABEL_REF)
6632 return true;
6633 if (GET_CODE (op0) != SYMBOL_REF)
6634 break;
6635 /* FALLTHRU */
6636
6637 case SYMBOL_REF:
6638 /* TLS references should always be enclosed in UNSPEC. */
6639 if (SYMBOL_REF_TLS_MODEL (op0))
6640 return false;
6641 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6642 return true;
6643 break;
6644
6645 default:
6646 break;
6647 }
6648 }
6649 if (GET_CODE (disp) != CONST)
6650 return 0;
6651 disp = XEXP (disp, 0);
6652
6653 if (TARGET_64BIT)
6654 {
6655 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6656 of GOT tables. We should not need these anyway. */
6657 if (GET_CODE (disp) != UNSPEC
6658 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6659 && XINT (disp, 1) != UNSPEC_GOTOFF))
6660 return 0;
6661
6662 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6663 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6664 return 0;
6665 return 1;
6666 }
6667
6668 saw_plus = false;
6669 if (GET_CODE (disp) == PLUS)
6670 {
6671 if (!CONST_INT_P (XEXP (disp, 1)))
6672 return 0;
6673 disp = XEXP (disp, 0);
6674 saw_plus = true;
6675 }
6676
6677 if (TARGET_MACHO && darwin_local_data_pic (disp))
6678 return 1;
6679
6680 if (GET_CODE (disp) != UNSPEC)
6681 return 0;
6682
6683 switch (XINT (disp, 1))
6684 {
6685 case UNSPEC_GOT:
6686 if (saw_plus)
6687 return false;
6688 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6689 case UNSPEC_GOTOFF:
6690 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6691 While ABI specify also 32bit relocation but we don't produce it in
6692 small PIC model at all. */
6693 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6694 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6695 && !TARGET_64BIT)
6696 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6697 return false;
6698 case UNSPEC_GOTTPOFF:
6699 case UNSPEC_GOTNTPOFF:
6700 case UNSPEC_INDNTPOFF:
6701 if (saw_plus)
6702 return false;
6703 disp = XVECEXP (disp, 0, 0);
6704 return (GET_CODE (disp) == SYMBOL_REF
6705 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6706 case UNSPEC_NTPOFF:
6707 disp = XVECEXP (disp, 0, 0);
6708 return (GET_CODE (disp) == SYMBOL_REF
6709 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6710 case UNSPEC_DTPOFF:
6711 disp = XVECEXP (disp, 0, 0);
6712 return (GET_CODE (disp) == SYMBOL_REF
6713 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6714 }
6715
6716 return 0;
6717 }
6718
6719 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6720 memory address for an instruction. The MODE argument is the machine mode
6721 for the MEM expression that wants to use this address.
6722
6723 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6724 convert common non-canonical forms to canonical form so that they will
6725 be recognized. */
6726
6727 int
6728 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6729 {
6730 struct ix86_address parts;
6731 rtx base, index, disp;
6732 HOST_WIDE_INT scale;
6733 const char *reason = NULL;
6734 rtx reason_rtx = NULL_RTX;
6735
6736 if (TARGET_DEBUG_ADDR)
6737 {
6738 fprintf (stderr,
6739 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6740 GET_MODE_NAME (mode), strict);
6741 debug_rtx (addr);
6742 }
6743
6744 if (ix86_decompose_address (addr, &parts) <= 0)
6745 {
6746 reason = "decomposition failed";
6747 goto report_error;
6748 }
6749
6750 base = parts.base;
6751 index = parts.index;
6752 disp = parts.disp;
6753 scale = parts.scale;
6754
6755 /* Validate base register.
6756
6757 Don't allow SUBREG's that span more than a word here. It can lead to spill
6758 failures when the base is one word out of a two word structure, which is
6759 represented internally as a DImode int. */
6760
6761 if (base)
6762 {
6763 rtx reg;
6764 reason_rtx = base;
6765
6766 if (REG_P (base))
6767 reg = base;
6768 else if (GET_CODE (base) == SUBREG
6769 && REG_P (SUBREG_REG (base))
6770 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6771 <= UNITS_PER_WORD)
6772 reg = SUBREG_REG (base);
6773 else
6774 {
6775 reason = "base is not a register";
6776 goto report_error;
6777 }
6778
6779 if (GET_MODE (base) != Pmode)
6780 {
6781 reason = "base is not in Pmode";
6782 goto report_error;
6783 }
6784
6785 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6786 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6787 {
6788 reason = "base is not valid";
6789 goto report_error;
6790 }
6791 }
6792
6793 /* Validate index register.
6794
6795 Don't allow SUBREG's that span more than a word here -- same as above. */
6796
6797 if (index)
6798 {
6799 rtx reg;
6800 reason_rtx = index;
6801
6802 if (REG_P (index))
6803 reg = index;
6804 else if (GET_CODE (index) == SUBREG
6805 && REG_P (SUBREG_REG (index))
6806 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6807 <= UNITS_PER_WORD)
6808 reg = SUBREG_REG (index);
6809 else
6810 {
6811 reason = "index is not a register";
6812 goto report_error;
6813 }
6814
6815 if (GET_MODE (index) != Pmode)
6816 {
6817 reason = "index is not in Pmode";
6818 goto report_error;
6819 }
6820
6821 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6822 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6823 {
6824 reason = "index is not valid";
6825 goto report_error;
6826 }
6827 }
6828
6829 /* Validate scale factor. */
6830 if (scale != 1)
6831 {
6832 reason_rtx = GEN_INT (scale);
6833 if (!index)
6834 {
6835 reason = "scale without index";
6836 goto report_error;
6837 }
6838
6839 if (scale != 2 && scale != 4 && scale != 8)
6840 {
6841 reason = "scale is not a valid multiplier";
6842 goto report_error;
6843 }
6844 }
6845
6846 /* Validate displacement. */
6847 if (disp)
6848 {
6849 reason_rtx = disp;
6850
6851 if (GET_CODE (disp) == CONST
6852 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6853 switch (XINT (XEXP (disp, 0), 1))
6854 {
6855 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6856 used. While ABI specify also 32bit relocations, we don't produce
6857 them at all and use IP relative instead. */
6858 case UNSPEC_GOT:
6859 case UNSPEC_GOTOFF:
6860 gcc_assert (flag_pic);
6861 if (!TARGET_64BIT)
6862 goto is_legitimate_pic;
6863 reason = "64bit address unspec";
6864 goto report_error;
6865
6866 case UNSPEC_GOTPCREL:
6867 gcc_assert (flag_pic);
6868 goto is_legitimate_pic;
6869
6870 case UNSPEC_GOTTPOFF:
6871 case UNSPEC_GOTNTPOFF:
6872 case UNSPEC_INDNTPOFF:
6873 case UNSPEC_NTPOFF:
6874 case UNSPEC_DTPOFF:
6875 break;
6876
6877 default:
6878 reason = "invalid address unspec";
6879 goto report_error;
6880 }
6881
6882 else if (SYMBOLIC_CONST (disp)
6883 && (flag_pic
6884 || (TARGET_MACHO
6885 #if TARGET_MACHO
6886 && MACHOPIC_INDIRECT
6887 && !machopic_operand_p (disp)
6888 #endif
6889 )))
6890 {
6891
6892 is_legitimate_pic:
6893 if (TARGET_64BIT && (index || base))
6894 {
6895 /* foo@dtpoff(%rX) is ok. */
6896 if (GET_CODE (disp) != CONST
6897 || GET_CODE (XEXP (disp, 0)) != PLUS
6898 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6899 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6900 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6901 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6902 {
6903 reason = "non-constant pic memory reference";
6904 goto report_error;
6905 }
6906 }
6907 else if (! legitimate_pic_address_disp_p (disp))
6908 {
6909 reason = "displacement is an invalid pic construct";
6910 goto report_error;
6911 }
6912
6913 /* This code used to verify that a symbolic pic displacement
6914 includes the pic_offset_table_rtx register.
6915
6916 While this is good idea, unfortunately these constructs may
6917 be created by "adds using lea" optimization for incorrect
6918 code like:
6919
6920 int a;
6921 int foo(int i)
6922 {
6923 return *(&a+i);
6924 }
6925
6926 This code is nonsensical, but results in addressing
6927 GOT table with pic_offset_table_rtx base. We can't
6928 just refuse it easily, since it gets matched by
6929 "addsi3" pattern, that later gets split to lea in the
6930 case output register differs from input. While this
6931 can be handled by separate addsi pattern for this case
6932 that never results in lea, this seems to be easier and
6933 correct fix for crash to disable this test. */
6934 }
6935 else if (GET_CODE (disp) != LABEL_REF
6936 && !CONST_INT_P (disp)
6937 && (GET_CODE (disp) != CONST
6938 || !legitimate_constant_p (disp))
6939 && (GET_CODE (disp) != SYMBOL_REF
6940 || !legitimate_constant_p (disp)))
6941 {
6942 reason = "displacement is not constant";
6943 goto report_error;
6944 }
6945 else if (TARGET_64BIT
6946 && !x86_64_immediate_operand (disp, VOIDmode))
6947 {
6948 reason = "displacement is out of range";
6949 goto report_error;
6950 }
6951 }
6952
6953 /* Everything looks valid. */
6954 if (TARGET_DEBUG_ADDR)
6955 fprintf (stderr, "Success.\n");
6956 return TRUE;
6957
6958 report_error:
6959 if (TARGET_DEBUG_ADDR)
6960 {
6961 fprintf (stderr, "Error: %s\n", reason);
6962 debug_rtx (reason_rtx);
6963 }
6964 return FALSE;
6965 }
6966 \f
6967 /* Return a unique alias set for the GOT. */
6968
6969 static HOST_WIDE_INT
6970 ix86_GOT_alias_set (void)
6971 {
6972 static HOST_WIDE_INT set = -1;
6973 if (set == -1)
6974 set = new_alias_set ();
6975 return set;
6976 }
6977
6978 /* Return a legitimate reference for ORIG (an address) using the
6979 register REG. If REG is 0, a new pseudo is generated.
6980
6981 There are two types of references that must be handled:
6982
6983 1. Global data references must load the address from the GOT, via
6984 the PIC reg. An insn is emitted to do this load, and the reg is
6985 returned.
6986
6987 2. Static data references, constant pool addresses, and code labels
6988 compute the address as an offset from the GOT, whose base is in
6989 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6990 differentiate them from global data objects. The returned
6991 address is the PIC reg + an unspec constant.
6992
6993 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6994 reg also appears in the address. */
6995
6996 static rtx
6997 legitimize_pic_address (rtx orig, rtx reg)
6998 {
6999 rtx addr = orig;
7000 rtx new = orig;
7001 rtx base;
7002
7003 #if TARGET_MACHO
7004 if (TARGET_MACHO && !TARGET_64BIT)
7005 {
7006 if (reg == 0)
7007 reg = gen_reg_rtx (Pmode);
7008 /* Use the generic Mach-O PIC machinery. */
7009 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7010 }
7011 #endif
7012
7013 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7014 new = addr;
7015 else if (TARGET_64BIT
7016 && ix86_cmodel != CM_SMALL_PIC
7017 && local_symbolic_operand (addr, Pmode))
7018 {
7019 rtx tmpreg;
7020 /* This symbol may be referenced via a displacement from the PIC
7021 base address (@GOTOFF). */
7022
7023 if (reload_in_progress)
7024 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7025 if (GET_CODE (addr) == CONST)
7026 addr = XEXP (addr, 0);
7027 if (GET_CODE (addr) == PLUS)
7028 {
7029 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7030 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7031 }
7032 else
7033 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7034 new = gen_rtx_CONST (Pmode, new);
7035 if (!reg)
7036 tmpreg = gen_reg_rtx (Pmode);
7037 else
7038 tmpreg = reg;
7039 emit_move_insn (tmpreg, new);
7040
7041 if (reg != 0)
7042 {
7043 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7044 tmpreg, 1, OPTAB_DIRECT);
7045 new = reg;
7046 }
7047 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7048 }
7049 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
7050 {
7051 /* This symbol may be referenced via a displacement from the PIC
7052 base address (@GOTOFF). */
7053
7054 if (reload_in_progress)
7055 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7056 if (GET_CODE (addr) == CONST)
7057 addr = XEXP (addr, 0);
7058 if (GET_CODE (addr) == PLUS)
7059 {
7060 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7061 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7062 }
7063 else
7064 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7065 new = gen_rtx_CONST (Pmode, new);
7066 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7067
7068 if (reg != 0)
7069 {
7070 emit_move_insn (reg, new);
7071 new = reg;
7072 }
7073 }
7074 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7075 {
7076 if (TARGET_64BIT)
7077 {
7078 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7079 new = gen_rtx_CONST (Pmode, new);
7080 new = gen_const_mem (Pmode, new);
7081 set_mem_alias_set (new, ix86_GOT_alias_set ());
7082
7083 if (reg == 0)
7084 reg = gen_reg_rtx (Pmode);
7085 /* Use directly gen_movsi, otherwise the address is loaded
7086 into register for CSE. We don't want to CSE this addresses,
7087 instead we CSE addresses from the GOT table, so skip this. */
7088 emit_insn (gen_movsi (reg, new));
7089 new = reg;
7090 }
7091 else
7092 {
7093 /* This symbol must be referenced via a load from the
7094 Global Offset Table (@GOT). */
7095
7096 if (reload_in_progress)
7097 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7098 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7099 new = gen_rtx_CONST (Pmode, new);
7100 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7101 new = gen_const_mem (Pmode, new);
7102 set_mem_alias_set (new, ix86_GOT_alias_set ());
7103
7104 if (reg == 0)
7105 reg = gen_reg_rtx (Pmode);
7106 emit_move_insn (reg, new);
7107 new = reg;
7108 }
7109 }
7110 else
7111 {
7112 if (CONST_INT_P (addr)
7113 && !x86_64_immediate_operand (addr, VOIDmode))
7114 {
7115 if (reg)
7116 {
7117 emit_move_insn (reg, addr);
7118 new = reg;
7119 }
7120 else
7121 new = force_reg (Pmode, addr);
7122 }
7123 else if (GET_CODE (addr) == CONST)
7124 {
7125 addr = XEXP (addr, 0);
7126
7127 /* We must match stuff we generate before. Assume the only
7128 unspecs that can get here are ours. Not that we could do
7129 anything with them anyway.... */
7130 if (GET_CODE (addr) == UNSPEC
7131 || (GET_CODE (addr) == PLUS
7132 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7133 return orig;
7134 gcc_assert (GET_CODE (addr) == PLUS);
7135 }
7136 if (GET_CODE (addr) == PLUS)
7137 {
7138 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7139
7140 /* Check first to see if this is a constant offset from a @GOTOFF
7141 symbol reference. */
7142 if (local_symbolic_operand (op0, Pmode)
7143 && CONST_INT_P (op1))
7144 {
7145 if (!TARGET_64BIT)
7146 {
7147 if (reload_in_progress)
7148 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7149 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7150 UNSPEC_GOTOFF);
7151 new = gen_rtx_PLUS (Pmode, new, op1);
7152 new = gen_rtx_CONST (Pmode, new);
7153 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7154
7155 if (reg != 0)
7156 {
7157 emit_move_insn (reg, new);
7158 new = reg;
7159 }
7160 }
7161 else
7162 {
7163 if (INTVAL (op1) < -16*1024*1024
7164 || INTVAL (op1) >= 16*1024*1024)
7165 {
7166 if (!x86_64_immediate_operand (op1, Pmode))
7167 op1 = force_reg (Pmode, op1);
7168 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7169 }
7170 }
7171 }
7172 else
7173 {
7174 base = legitimize_pic_address (XEXP (addr, 0), reg);
7175 new = legitimize_pic_address (XEXP (addr, 1),
7176 base == reg ? NULL_RTX : reg);
7177
7178 if (CONST_INT_P (new))
7179 new = plus_constant (base, INTVAL (new));
7180 else
7181 {
7182 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7183 {
7184 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7185 new = XEXP (new, 1);
7186 }
7187 new = gen_rtx_PLUS (Pmode, base, new);
7188 }
7189 }
7190 }
7191 }
7192 return new;
7193 }
7194 \f
7195 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7196
7197 static rtx
7198 get_thread_pointer (int to_reg)
7199 {
7200 rtx tp, reg, insn;
7201
7202 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7203 if (!to_reg)
7204 return tp;
7205
7206 reg = gen_reg_rtx (Pmode);
7207 insn = gen_rtx_SET (VOIDmode, reg, tp);
7208 insn = emit_insn (insn);
7209
7210 return reg;
7211 }
7212
7213 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7214 false if we expect this to be used for a memory address and true if
7215 we expect to load the address into a register. */
7216
7217 static rtx
7218 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7219 {
7220 rtx dest, base, off, pic, tp;
7221 int type;
7222
7223 switch (model)
7224 {
7225 case TLS_MODEL_GLOBAL_DYNAMIC:
7226 dest = gen_reg_rtx (Pmode);
7227 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7228
7229 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7230 {
7231 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7232
7233 start_sequence ();
7234 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7235 insns = get_insns ();
7236 end_sequence ();
7237
7238 emit_libcall_block (insns, dest, rax, x);
7239 }
7240 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7241 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7242 else
7243 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7244
7245 if (TARGET_GNU2_TLS)
7246 {
7247 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7248
7249 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7250 }
7251 break;
7252
7253 case TLS_MODEL_LOCAL_DYNAMIC:
7254 base = gen_reg_rtx (Pmode);
7255 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7256
7257 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7258 {
7259 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7260
7261 start_sequence ();
7262 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7263 insns = get_insns ();
7264 end_sequence ();
7265
7266 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7267 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7268 emit_libcall_block (insns, base, rax, note);
7269 }
7270 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7271 emit_insn (gen_tls_local_dynamic_base_64 (base));
7272 else
7273 emit_insn (gen_tls_local_dynamic_base_32 (base));
7274
7275 if (TARGET_GNU2_TLS)
7276 {
7277 rtx x = ix86_tls_module_base ();
7278
7279 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7280 gen_rtx_MINUS (Pmode, x, tp));
7281 }
7282
7283 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7284 off = gen_rtx_CONST (Pmode, off);
7285
7286 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7287
7288 if (TARGET_GNU2_TLS)
7289 {
7290 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7291
7292 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7293 }
7294
7295 break;
7296
7297 case TLS_MODEL_INITIAL_EXEC:
7298 if (TARGET_64BIT)
7299 {
7300 pic = NULL;
7301 type = UNSPEC_GOTNTPOFF;
7302 }
7303 else if (flag_pic)
7304 {
7305 if (reload_in_progress)
7306 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7307 pic = pic_offset_table_rtx;
7308 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7309 }
7310 else if (!TARGET_ANY_GNU_TLS)
7311 {
7312 pic = gen_reg_rtx (Pmode);
7313 emit_insn (gen_set_got (pic));
7314 type = UNSPEC_GOTTPOFF;
7315 }
7316 else
7317 {
7318 pic = NULL;
7319 type = UNSPEC_INDNTPOFF;
7320 }
7321
7322 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7323 off = gen_rtx_CONST (Pmode, off);
7324 if (pic)
7325 off = gen_rtx_PLUS (Pmode, pic, off);
7326 off = gen_const_mem (Pmode, off);
7327 set_mem_alias_set (off, ix86_GOT_alias_set ());
7328
7329 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7330 {
7331 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7332 off = force_reg (Pmode, off);
7333 return gen_rtx_PLUS (Pmode, base, off);
7334 }
7335 else
7336 {
7337 base = get_thread_pointer (true);
7338 dest = gen_reg_rtx (Pmode);
7339 emit_insn (gen_subsi3 (dest, base, off));
7340 }
7341 break;
7342
7343 case TLS_MODEL_LOCAL_EXEC:
7344 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7345 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7346 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7347 off = gen_rtx_CONST (Pmode, off);
7348
7349 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7350 {
7351 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7352 return gen_rtx_PLUS (Pmode, base, off);
7353 }
7354 else
7355 {
7356 base = get_thread_pointer (true);
7357 dest = gen_reg_rtx (Pmode);
7358 emit_insn (gen_subsi3 (dest, base, off));
7359 }
7360 break;
7361
7362 default:
7363 gcc_unreachable ();
7364 }
7365
7366 return dest;
7367 }
7368
7369 /* Try machine-dependent ways of modifying an illegitimate address
7370 to be legitimate. If we find one, return the new, valid address.
7371 This macro is used in only one place: `memory_address' in explow.c.
7372
7373 OLDX is the address as it was before break_out_memory_refs was called.
7374 In some cases it is useful to look at this to decide what needs to be done.
7375
7376 MODE and WIN are passed so that this macro can use
7377 GO_IF_LEGITIMATE_ADDRESS.
7378
7379 It is always safe for this macro to do nothing. It exists to recognize
7380 opportunities to optimize the output.
7381
7382 For the 80386, we handle X+REG by loading X into a register R and
7383 using R+REG. R will go in a general reg and indexing will be used.
7384 However, if REG is a broken-out memory address or multiplication,
7385 nothing needs to be done because REG can certainly go in a general reg.
7386
7387 When -fpic is used, special handling is needed for symbolic references.
7388 See comments by legitimize_pic_address in i386.c for details. */
7389
7390 rtx
7391 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7392 {
7393 int changed = 0;
7394 unsigned log;
7395
7396 if (TARGET_DEBUG_ADDR)
7397 {
7398 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7399 GET_MODE_NAME (mode));
7400 debug_rtx (x);
7401 }
7402
7403 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7404 if (log)
7405 return legitimize_tls_address (x, log, false);
7406 if (GET_CODE (x) == CONST
7407 && GET_CODE (XEXP (x, 0)) == PLUS
7408 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7409 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7410 {
7411 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7412 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7413 }
7414
7415 if (flag_pic && SYMBOLIC_CONST (x))
7416 return legitimize_pic_address (x, 0);
7417
7418 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7419 if (GET_CODE (x) == ASHIFT
7420 && CONST_INT_P (XEXP (x, 1))
7421 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7422 {
7423 changed = 1;
7424 log = INTVAL (XEXP (x, 1));
7425 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7426 GEN_INT (1 << log));
7427 }
7428
7429 if (GET_CODE (x) == PLUS)
7430 {
7431 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7432
7433 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7434 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7435 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7436 {
7437 changed = 1;
7438 log = INTVAL (XEXP (XEXP (x, 0), 1));
7439 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7440 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7441 GEN_INT (1 << log));
7442 }
7443
7444 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7445 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7446 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7447 {
7448 changed = 1;
7449 log = INTVAL (XEXP (XEXP (x, 1), 1));
7450 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7451 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7452 GEN_INT (1 << log));
7453 }
7454
7455 /* Put multiply first if it isn't already. */
7456 if (GET_CODE (XEXP (x, 1)) == MULT)
7457 {
7458 rtx tmp = XEXP (x, 0);
7459 XEXP (x, 0) = XEXP (x, 1);
7460 XEXP (x, 1) = tmp;
7461 changed = 1;
7462 }
7463
7464 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7465 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7466 created by virtual register instantiation, register elimination, and
7467 similar optimizations. */
7468 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7469 {
7470 changed = 1;
7471 x = gen_rtx_PLUS (Pmode,
7472 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7473 XEXP (XEXP (x, 1), 0)),
7474 XEXP (XEXP (x, 1), 1));
7475 }
7476
7477 /* Canonicalize
7478 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7479 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7480 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7481 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7482 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7483 && CONSTANT_P (XEXP (x, 1)))
7484 {
7485 rtx constant;
7486 rtx other = NULL_RTX;
7487
7488 if (CONST_INT_P (XEXP (x, 1)))
7489 {
7490 constant = XEXP (x, 1);
7491 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7492 }
7493 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7494 {
7495 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7496 other = XEXP (x, 1);
7497 }
7498 else
7499 constant = 0;
7500
7501 if (constant)
7502 {
7503 changed = 1;
7504 x = gen_rtx_PLUS (Pmode,
7505 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7506 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7507 plus_constant (other, INTVAL (constant)));
7508 }
7509 }
7510
7511 if (changed && legitimate_address_p (mode, x, FALSE))
7512 return x;
7513
7514 if (GET_CODE (XEXP (x, 0)) == MULT)
7515 {
7516 changed = 1;
7517 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7518 }
7519
7520 if (GET_CODE (XEXP (x, 1)) == MULT)
7521 {
7522 changed = 1;
7523 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7524 }
7525
7526 if (changed
7527 && REG_P (XEXP (x, 1))
7528 && REG_P (XEXP (x, 0)))
7529 return x;
7530
7531 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7532 {
7533 changed = 1;
7534 x = legitimize_pic_address (x, 0);
7535 }
7536
7537 if (changed && legitimate_address_p (mode, x, FALSE))
7538 return x;
7539
7540 if (REG_P (XEXP (x, 0)))
7541 {
7542 rtx temp = gen_reg_rtx (Pmode);
7543 rtx val = force_operand (XEXP (x, 1), temp);
7544 if (val != temp)
7545 emit_move_insn (temp, val);
7546
7547 XEXP (x, 1) = temp;
7548 return x;
7549 }
7550
7551 else if (REG_P (XEXP (x, 1)))
7552 {
7553 rtx temp = gen_reg_rtx (Pmode);
7554 rtx val = force_operand (XEXP (x, 0), temp);
7555 if (val != temp)
7556 emit_move_insn (temp, val);
7557
7558 XEXP (x, 0) = temp;
7559 return x;
7560 }
7561 }
7562
7563 return x;
7564 }
7565 \f
7566 /* Print an integer constant expression in assembler syntax. Addition
7567 and subtraction are the only arithmetic that may appear in these
7568 expressions. FILE is the stdio stream to write to, X is the rtx, and
7569 CODE is the operand print code from the output string. */
7570
7571 static void
7572 output_pic_addr_const (FILE *file, rtx x, int code)
7573 {
7574 char buf[256];
7575
7576 switch (GET_CODE (x))
7577 {
7578 case PC:
7579 gcc_assert (flag_pic);
7580 putc ('.', file);
7581 break;
7582
7583 case SYMBOL_REF:
7584 output_addr_const (file, x);
7585 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7586 fputs ("@PLT", file);
7587 break;
7588
7589 case LABEL_REF:
7590 x = XEXP (x, 0);
7591 /* FALLTHRU */
7592 case CODE_LABEL:
7593 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7594 assemble_name (asm_out_file, buf);
7595 break;
7596
7597 case CONST_INT:
7598 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7599 break;
7600
7601 case CONST:
7602 /* This used to output parentheses around the expression,
7603 but that does not work on the 386 (either ATT or BSD assembler). */
7604 output_pic_addr_const (file, XEXP (x, 0), code);
7605 break;
7606
7607 case CONST_DOUBLE:
7608 if (GET_MODE (x) == VOIDmode)
7609 {
7610 /* We can use %d if the number is <32 bits and positive. */
7611 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7612 fprintf (file, "0x%lx%08lx",
7613 (unsigned long) CONST_DOUBLE_HIGH (x),
7614 (unsigned long) CONST_DOUBLE_LOW (x));
7615 else
7616 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7617 }
7618 else
7619 /* We can't handle floating point constants;
7620 PRINT_OPERAND must handle them. */
7621 output_operand_lossage ("floating constant misused");
7622 break;
7623
7624 case PLUS:
7625 /* Some assemblers need integer constants to appear first. */
7626 if (CONST_INT_P (XEXP (x, 0)))
7627 {
7628 output_pic_addr_const (file, XEXP (x, 0), code);
7629 putc ('+', file);
7630 output_pic_addr_const (file, XEXP (x, 1), code);
7631 }
7632 else
7633 {
7634 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7635 output_pic_addr_const (file, XEXP (x, 1), code);
7636 putc ('+', file);
7637 output_pic_addr_const (file, XEXP (x, 0), code);
7638 }
7639 break;
7640
7641 case MINUS:
7642 if (!TARGET_MACHO)
7643 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7644 output_pic_addr_const (file, XEXP (x, 0), code);
7645 putc ('-', file);
7646 output_pic_addr_const (file, XEXP (x, 1), code);
7647 if (!TARGET_MACHO)
7648 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7649 break;
7650
7651 case UNSPEC:
7652 gcc_assert (XVECLEN (x, 0) == 1);
7653 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7654 switch (XINT (x, 1))
7655 {
7656 case UNSPEC_GOT:
7657 fputs ("@GOT", file);
7658 break;
7659 case UNSPEC_GOTOFF:
7660 fputs ("@GOTOFF", file);
7661 break;
7662 case UNSPEC_GOTPCREL:
7663 fputs ("@GOTPCREL(%rip)", file);
7664 break;
7665 case UNSPEC_GOTTPOFF:
7666 /* FIXME: This might be @TPOFF in Sun ld too. */
7667 fputs ("@GOTTPOFF", file);
7668 break;
7669 case UNSPEC_TPOFF:
7670 fputs ("@TPOFF", file);
7671 break;
7672 case UNSPEC_NTPOFF:
7673 if (TARGET_64BIT)
7674 fputs ("@TPOFF", file);
7675 else
7676 fputs ("@NTPOFF", file);
7677 break;
7678 case UNSPEC_DTPOFF:
7679 fputs ("@DTPOFF", file);
7680 break;
7681 case UNSPEC_GOTNTPOFF:
7682 if (TARGET_64BIT)
7683 fputs ("@GOTTPOFF(%rip)", file);
7684 else
7685 fputs ("@GOTNTPOFF", file);
7686 break;
7687 case UNSPEC_INDNTPOFF:
7688 fputs ("@INDNTPOFF", file);
7689 break;
7690 default:
7691 output_operand_lossage ("invalid UNSPEC as operand");
7692 break;
7693 }
7694 break;
7695
7696 default:
7697 output_operand_lossage ("invalid expression as operand");
7698 }
7699 }
7700
7701 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7702 We need to emit DTP-relative relocations. */
7703
7704 static void
7705 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7706 {
7707 fputs (ASM_LONG, file);
7708 output_addr_const (file, x);
7709 fputs ("@DTPOFF", file);
7710 switch (size)
7711 {
7712 case 4:
7713 break;
7714 case 8:
7715 fputs (", 0", file);
7716 break;
7717 default:
7718 gcc_unreachable ();
7719 }
7720 }
7721
7722 /* In the name of slightly smaller debug output, and to cater to
7723 general assembler lossage, recognize PIC+GOTOFF and turn it back
7724 into a direct symbol reference.
7725
7726 On Darwin, this is necessary to avoid a crash, because Darwin
7727 has a different PIC label for each routine but the DWARF debugging
7728 information is not associated with any particular routine, so it's
7729 necessary to remove references to the PIC label from RTL stored by
7730 the DWARF output code. */
7731
7732 static rtx
7733 ix86_delegitimize_address (rtx orig_x)
7734 {
7735 rtx x = orig_x;
7736 /* reg_addend is NULL or a multiple of some register. */
7737 rtx reg_addend = NULL_RTX;
7738 /* const_addend is NULL or a const_int. */
7739 rtx const_addend = NULL_RTX;
7740 /* This is the result, or NULL. */
7741 rtx result = NULL_RTX;
7742
7743 if (MEM_P (x))
7744 x = XEXP (x, 0);
7745
7746 if (TARGET_64BIT)
7747 {
7748 if (GET_CODE (x) != CONST
7749 || GET_CODE (XEXP (x, 0)) != UNSPEC
7750 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7751 || !MEM_P (orig_x))
7752 return orig_x;
7753 return XVECEXP (XEXP (x, 0), 0, 0);
7754 }
7755
7756 if (GET_CODE (x) != PLUS
7757 || GET_CODE (XEXP (x, 1)) != CONST)
7758 return orig_x;
7759
7760 if (REG_P (XEXP (x, 0))
7761 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7762 /* %ebx + GOT/GOTOFF */
7763 ;
7764 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7765 {
7766 /* %ebx + %reg * scale + GOT/GOTOFF */
7767 reg_addend = XEXP (x, 0);
7768 if (REG_P (XEXP (reg_addend, 0))
7769 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7770 reg_addend = XEXP (reg_addend, 1);
7771 else if (REG_P (XEXP (reg_addend, 1))
7772 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7773 reg_addend = XEXP (reg_addend, 0);
7774 else
7775 return orig_x;
7776 if (!REG_P (reg_addend)
7777 && GET_CODE (reg_addend) != MULT
7778 && GET_CODE (reg_addend) != ASHIFT)
7779 return orig_x;
7780 }
7781 else
7782 return orig_x;
7783
7784 x = XEXP (XEXP (x, 1), 0);
7785 if (GET_CODE (x) == PLUS
7786 && CONST_INT_P (XEXP (x, 1)))
7787 {
7788 const_addend = XEXP (x, 1);
7789 x = XEXP (x, 0);
7790 }
7791
7792 if (GET_CODE (x) == UNSPEC
7793 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7794 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7795 result = XVECEXP (x, 0, 0);
7796
7797 if (TARGET_MACHO && darwin_local_data_pic (x)
7798 && !MEM_P (orig_x))
7799 result = XEXP (x, 0);
7800
7801 if (! result)
7802 return orig_x;
7803
7804 if (const_addend)
7805 result = gen_rtx_PLUS (Pmode, result, const_addend);
7806 if (reg_addend)
7807 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7808 return result;
7809 }
7810 \f
7811 static void
7812 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7813 int fp, FILE *file)
7814 {
7815 const char *suffix;
7816
7817 if (mode == CCFPmode || mode == CCFPUmode)
7818 {
7819 enum rtx_code second_code, bypass_code;
7820 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7821 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7822 code = ix86_fp_compare_code_to_integer (code);
7823 mode = CCmode;
7824 }
7825 if (reverse)
7826 code = reverse_condition (code);
7827
7828 switch (code)
7829 {
7830 case EQ:
7831 suffix = "e";
7832 break;
7833 case NE:
7834 suffix = "ne";
7835 break;
7836 case GT:
7837 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7838 suffix = "g";
7839 break;
7840 case GTU:
7841 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7842 Those same assemblers have the same but opposite lossage on cmov. */
7843 gcc_assert (mode == CCmode);
7844 suffix = fp ? "nbe" : "a";
7845 break;
7846 case LT:
7847 switch (mode)
7848 {
7849 case CCNOmode:
7850 case CCGOCmode:
7851 suffix = "s";
7852 break;
7853
7854 case CCmode:
7855 case CCGCmode:
7856 suffix = "l";
7857 break;
7858
7859 default:
7860 gcc_unreachable ();
7861 }
7862 break;
7863 case LTU:
7864 gcc_assert (mode == CCmode);
7865 suffix = "b";
7866 break;
7867 case GE:
7868 switch (mode)
7869 {
7870 case CCNOmode:
7871 case CCGOCmode:
7872 suffix = "ns";
7873 break;
7874
7875 case CCmode:
7876 case CCGCmode:
7877 suffix = "ge";
7878 break;
7879
7880 default:
7881 gcc_unreachable ();
7882 }
7883 break;
7884 case GEU:
7885 /* ??? As above. */
7886 gcc_assert (mode == CCmode);
7887 suffix = fp ? "nb" : "ae";
7888 break;
7889 case LE:
7890 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7891 suffix = "le";
7892 break;
7893 case LEU:
7894 gcc_assert (mode == CCmode);
7895 suffix = "be";
7896 break;
7897 case UNORDERED:
7898 suffix = fp ? "u" : "p";
7899 break;
7900 case ORDERED:
7901 suffix = fp ? "nu" : "np";
7902 break;
7903 default:
7904 gcc_unreachable ();
7905 }
7906 fputs (suffix, file);
7907 }
7908
7909 /* Print the name of register X to FILE based on its machine mode and number.
7910 If CODE is 'w', pretend the mode is HImode.
7911 If CODE is 'b', pretend the mode is QImode.
7912 If CODE is 'k', pretend the mode is SImode.
7913 If CODE is 'q', pretend the mode is DImode.
7914 If CODE is 'h', pretend the reg is the 'high' byte register.
7915 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7916
7917 void
7918 print_reg (rtx x, int code, FILE *file)
7919 {
7920 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7921 && REGNO (x) != FRAME_POINTER_REGNUM
7922 && REGNO (x) != FLAGS_REG
7923 && REGNO (x) != FPSR_REG
7924 && REGNO (x) != FPCR_REG);
7925
7926 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7927 putc ('%', file);
7928
7929 if (code == 'w' || MMX_REG_P (x))
7930 code = 2;
7931 else if (code == 'b')
7932 code = 1;
7933 else if (code == 'k')
7934 code = 4;
7935 else if (code == 'q')
7936 code = 8;
7937 else if (code == 'y')
7938 code = 3;
7939 else if (code == 'h')
7940 code = 0;
7941 else
7942 code = GET_MODE_SIZE (GET_MODE (x));
7943
7944 /* Irritatingly, AMD extended registers use different naming convention
7945 from the normal registers. */
7946 if (REX_INT_REG_P (x))
7947 {
7948 gcc_assert (TARGET_64BIT);
7949 switch (code)
7950 {
7951 case 0:
7952 error ("extended registers have no high halves");
7953 break;
7954 case 1:
7955 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7956 break;
7957 case 2:
7958 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7959 break;
7960 case 4:
7961 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7962 break;
7963 case 8:
7964 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7965 break;
7966 default:
7967 error ("unsupported operand size for extended register");
7968 break;
7969 }
7970 return;
7971 }
7972 switch (code)
7973 {
7974 case 3:
7975 if (STACK_TOP_P (x))
7976 {
7977 fputs ("st(0)", file);
7978 break;
7979 }
7980 /* FALLTHRU */
7981 case 8:
7982 case 4:
7983 case 12:
7984 if (! ANY_FP_REG_P (x))
7985 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7986 /* FALLTHRU */
7987 case 16:
7988 case 2:
7989 normal:
7990 fputs (hi_reg_name[REGNO (x)], file);
7991 break;
7992 case 1:
7993 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7994 goto normal;
7995 fputs (qi_reg_name[REGNO (x)], file);
7996 break;
7997 case 0:
7998 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7999 goto normal;
8000 fputs (qi_high_reg_name[REGNO (x)], file);
8001 break;
8002 default:
8003 gcc_unreachable ();
8004 }
8005 }
8006
8007 /* Locate some local-dynamic symbol still in use by this function
8008 so that we can print its name in some tls_local_dynamic_base
8009 pattern. */
8010
8011 static const char *
8012 get_some_local_dynamic_name (void)
8013 {
8014 rtx insn;
8015
8016 if (cfun->machine->some_ld_name)
8017 return cfun->machine->some_ld_name;
8018
8019 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8020 if (INSN_P (insn)
8021 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8022 return cfun->machine->some_ld_name;
8023
8024 gcc_unreachable ();
8025 }
8026
8027 static int
8028 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8029 {
8030 rtx x = *px;
8031
8032 if (GET_CODE (x) == SYMBOL_REF
8033 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8034 {
8035 cfun->machine->some_ld_name = XSTR (x, 0);
8036 return 1;
8037 }
8038
8039 return 0;
8040 }
8041
8042 /* Meaning of CODE:
8043 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8044 C -- print opcode suffix for set/cmov insn.
8045 c -- like C, but print reversed condition
8046 F,f -- likewise, but for floating-point.
8047 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8048 otherwise nothing
8049 R -- print the prefix for register names.
8050 z -- print the opcode suffix for the size of the current operand.
8051 * -- print a star (in certain assembler syntax)
8052 A -- print an absolute memory reference.
8053 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8054 s -- print a shift double count, followed by the assemblers argument
8055 delimiter.
8056 b -- print the QImode name of the register for the indicated operand.
8057 %b0 would print %al if operands[0] is reg 0.
8058 w -- likewise, print the HImode name of the register.
8059 k -- likewise, print the SImode name of the register.
8060 q -- likewise, print the DImode name of the register.
8061 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8062 y -- print "st(0)" instead of "st" as a register.
8063 D -- print condition for SSE cmp instruction.
8064 P -- if PIC, print an @PLT suffix.
8065 X -- don't print any sort of PIC '@' suffix for a symbol.
8066 & -- print some in-use local-dynamic symbol name.
8067 H -- print a memory address offset by 8; used for sse high-parts
8068 */
8069
8070 void
8071 print_operand (FILE *file, rtx x, int code)
8072 {
8073 if (code)
8074 {
8075 switch (code)
8076 {
8077 case '*':
8078 if (ASSEMBLER_DIALECT == ASM_ATT)
8079 putc ('*', file);
8080 return;
8081
8082 case '&':
8083 assemble_name (file, get_some_local_dynamic_name ());
8084 return;
8085
8086 case 'A':
8087 switch (ASSEMBLER_DIALECT)
8088 {
8089 case ASM_ATT:
8090 putc ('*', file);
8091 break;
8092
8093 case ASM_INTEL:
8094 /* Intel syntax. For absolute addresses, registers should not
8095 be surrounded by braces. */
8096 if (!REG_P (x))
8097 {
8098 putc ('[', file);
8099 PRINT_OPERAND (file, x, 0);
8100 putc (']', file);
8101 return;
8102 }
8103 break;
8104
8105 default:
8106 gcc_unreachable ();
8107 }
8108
8109 PRINT_OPERAND (file, x, 0);
8110 return;
8111
8112
8113 case 'L':
8114 if (ASSEMBLER_DIALECT == ASM_ATT)
8115 putc ('l', file);
8116 return;
8117
8118 case 'W':
8119 if (ASSEMBLER_DIALECT == ASM_ATT)
8120 putc ('w', file);
8121 return;
8122
8123 case 'B':
8124 if (ASSEMBLER_DIALECT == ASM_ATT)
8125 putc ('b', file);
8126 return;
8127
8128 case 'Q':
8129 if (ASSEMBLER_DIALECT == ASM_ATT)
8130 putc ('l', file);
8131 return;
8132
8133 case 'S':
8134 if (ASSEMBLER_DIALECT == ASM_ATT)
8135 putc ('s', file);
8136 return;
8137
8138 case 'T':
8139 if (ASSEMBLER_DIALECT == ASM_ATT)
8140 putc ('t', file);
8141 return;
8142
8143 case 'z':
8144 /* 387 opcodes don't get size suffixes if the operands are
8145 registers. */
8146 if (STACK_REG_P (x))
8147 return;
8148
8149 /* Likewise if using Intel opcodes. */
8150 if (ASSEMBLER_DIALECT == ASM_INTEL)
8151 return;
8152
8153 /* This is the size of op from size of operand. */
8154 switch (GET_MODE_SIZE (GET_MODE (x)))
8155 {
8156 case 1:
8157 putc ('b', file);
8158 return;
8159
8160 case 2:
8161 #ifdef HAVE_GAS_FILDS_FISTS
8162 putc ('s', file);
8163 #endif
8164 return;
8165
8166 case 4:
8167 if (GET_MODE (x) == SFmode)
8168 {
8169 putc ('s', file);
8170 return;
8171 }
8172 else
8173 putc ('l', file);
8174 return;
8175
8176 case 12:
8177 case 16:
8178 putc ('t', file);
8179 return;
8180
8181 case 8:
8182 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8183 {
8184 #ifdef GAS_MNEMONICS
8185 putc ('q', file);
8186 #else
8187 putc ('l', file);
8188 putc ('l', file);
8189 #endif
8190 }
8191 else
8192 putc ('l', file);
8193 return;
8194
8195 default:
8196 gcc_unreachable ();
8197 }
8198
8199 case 'b':
8200 case 'w':
8201 case 'k':
8202 case 'q':
8203 case 'h':
8204 case 'y':
8205 case 'X':
8206 case 'P':
8207 break;
8208
8209 case 's':
8210 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8211 {
8212 PRINT_OPERAND (file, x, 0);
8213 putc (',', file);
8214 }
8215 return;
8216
8217 case 'D':
8218 /* Little bit of braindamage here. The SSE compare instructions
8219 does use completely different names for the comparisons that the
8220 fp conditional moves. */
8221 switch (GET_CODE (x))
8222 {
8223 case EQ:
8224 case UNEQ:
8225 fputs ("eq", file);
8226 break;
8227 case LT:
8228 case UNLT:
8229 fputs ("lt", file);
8230 break;
8231 case LE:
8232 case UNLE:
8233 fputs ("le", file);
8234 break;
8235 case UNORDERED:
8236 fputs ("unord", file);
8237 break;
8238 case NE:
8239 case LTGT:
8240 fputs ("neq", file);
8241 break;
8242 case UNGE:
8243 case GE:
8244 fputs ("nlt", file);
8245 break;
8246 case UNGT:
8247 case GT:
8248 fputs ("nle", file);
8249 break;
8250 case ORDERED:
8251 fputs ("ord", file);
8252 break;
8253 default:
8254 gcc_unreachable ();
8255 }
8256 return;
8257 case 'O':
8258 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8259 if (ASSEMBLER_DIALECT == ASM_ATT)
8260 {
8261 switch (GET_MODE (x))
8262 {
8263 case HImode: putc ('w', file); break;
8264 case SImode:
8265 case SFmode: putc ('l', file); break;
8266 case DImode:
8267 case DFmode: putc ('q', file); break;
8268 default: gcc_unreachable ();
8269 }
8270 putc ('.', file);
8271 }
8272 #endif
8273 return;
8274 case 'C':
8275 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8276 return;
8277 case 'F':
8278 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8279 if (ASSEMBLER_DIALECT == ASM_ATT)
8280 putc ('.', file);
8281 #endif
8282 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8283 return;
8284
8285 /* Like above, but reverse condition */
8286 case 'c':
8287 /* Check to see if argument to %c is really a constant
8288 and not a condition code which needs to be reversed. */
8289 if (!COMPARISON_P (x))
8290 {
8291 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8292 return;
8293 }
8294 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8295 return;
8296 case 'f':
8297 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8298 if (ASSEMBLER_DIALECT == ASM_ATT)
8299 putc ('.', file);
8300 #endif
8301 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8302 return;
8303
8304 case 'H':
8305 /* It doesn't actually matter what mode we use here, as we're
8306 only going to use this for printing. */
8307 x = adjust_address_nv (x, DImode, 8);
8308 break;
8309
8310 case '+':
8311 {
8312 rtx x;
8313
8314 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8315 return;
8316
8317 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8318 if (x)
8319 {
8320 int pred_val = INTVAL (XEXP (x, 0));
8321
8322 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8323 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8324 {
8325 int taken = pred_val > REG_BR_PROB_BASE / 2;
8326 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8327
8328 /* Emit hints only in the case default branch prediction
8329 heuristics would fail. */
8330 if (taken != cputaken)
8331 {
8332 /* We use 3e (DS) prefix for taken branches and
8333 2e (CS) prefix for not taken branches. */
8334 if (taken)
8335 fputs ("ds ; ", file);
8336 else
8337 fputs ("cs ; ", file);
8338 }
8339 }
8340 }
8341 return;
8342 }
8343 default:
8344 output_operand_lossage ("invalid operand code '%c'", code);
8345 }
8346 }
8347
8348 if (REG_P (x))
8349 print_reg (x, code, file);
8350
8351 else if (MEM_P (x))
8352 {
8353 /* No `byte ptr' prefix for call instructions. */
8354 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8355 {
8356 const char * size;
8357 switch (GET_MODE_SIZE (GET_MODE (x)))
8358 {
8359 case 1: size = "BYTE"; break;
8360 case 2: size = "WORD"; break;
8361 case 4: size = "DWORD"; break;
8362 case 8: size = "QWORD"; break;
8363 case 12: size = "XWORD"; break;
8364 case 16: size = "XMMWORD"; break;
8365 default:
8366 gcc_unreachable ();
8367 }
8368
8369 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8370 if (code == 'b')
8371 size = "BYTE";
8372 else if (code == 'w')
8373 size = "WORD";
8374 else if (code == 'k')
8375 size = "DWORD";
8376
8377 fputs (size, file);
8378 fputs (" PTR ", file);
8379 }
8380
8381 x = XEXP (x, 0);
8382 /* Avoid (%rip) for call operands. */
8383 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8384 && !CONST_INT_P (x))
8385 output_addr_const (file, x);
8386 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8387 output_operand_lossage ("invalid constraints for operand");
8388 else
8389 output_address (x);
8390 }
8391
8392 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8393 {
8394 REAL_VALUE_TYPE r;
8395 long l;
8396
8397 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8398 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8399
8400 if (ASSEMBLER_DIALECT == ASM_ATT)
8401 putc ('$', file);
8402 fprintf (file, "0x%08lx", l);
8403 }
8404
8405 /* These float cases don't actually occur as immediate operands. */
8406 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8407 {
8408 char dstr[30];
8409
8410 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8411 fprintf (file, "%s", dstr);
8412 }
8413
8414 else if (GET_CODE (x) == CONST_DOUBLE
8415 && GET_MODE (x) == XFmode)
8416 {
8417 char dstr[30];
8418
8419 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8420 fprintf (file, "%s", dstr);
8421 }
8422
8423 else
8424 {
8425 /* We have patterns that allow zero sets of memory, for instance.
8426 In 64-bit mode, we should probably support all 8-byte vectors,
8427 since we can in fact encode that into an immediate. */
8428 if (GET_CODE (x) == CONST_VECTOR)
8429 {
8430 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8431 x = const0_rtx;
8432 }
8433
8434 if (code != 'P')
8435 {
8436 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8437 {
8438 if (ASSEMBLER_DIALECT == ASM_ATT)
8439 putc ('$', file);
8440 }
8441 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8442 || GET_CODE (x) == LABEL_REF)
8443 {
8444 if (ASSEMBLER_DIALECT == ASM_ATT)
8445 putc ('$', file);
8446 else
8447 fputs ("OFFSET FLAT:", file);
8448 }
8449 }
8450 if (CONST_INT_P (x))
8451 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8452 else if (flag_pic)
8453 output_pic_addr_const (file, x, code);
8454 else
8455 output_addr_const (file, x);
8456 }
8457 }
8458 \f
8459 /* Print a memory operand whose address is ADDR. */
8460
8461 void
8462 print_operand_address (FILE *file, rtx addr)
8463 {
8464 struct ix86_address parts;
8465 rtx base, index, disp;
8466 int scale;
8467 int ok = ix86_decompose_address (addr, &parts);
8468
8469 gcc_assert (ok);
8470
8471 base = parts.base;
8472 index = parts.index;
8473 disp = parts.disp;
8474 scale = parts.scale;
8475
8476 switch (parts.seg)
8477 {
8478 case SEG_DEFAULT:
8479 break;
8480 case SEG_FS:
8481 case SEG_GS:
8482 if (USER_LABEL_PREFIX[0] == 0)
8483 putc ('%', file);
8484 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8485 break;
8486 default:
8487 gcc_unreachable ();
8488 }
8489
8490 if (!base && !index)
8491 {
8492 /* Displacement only requires special attention. */
8493
8494 if (CONST_INT_P (disp))
8495 {
8496 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8497 {
8498 if (USER_LABEL_PREFIX[0] == 0)
8499 putc ('%', file);
8500 fputs ("ds:", file);
8501 }
8502 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8503 }
8504 else if (flag_pic)
8505 output_pic_addr_const (file, disp, 0);
8506 else
8507 output_addr_const (file, disp);
8508
8509 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8510 if (TARGET_64BIT)
8511 {
8512 if (GET_CODE (disp) == CONST
8513 && GET_CODE (XEXP (disp, 0)) == PLUS
8514 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8515 disp = XEXP (XEXP (disp, 0), 0);
8516 if (GET_CODE (disp) == LABEL_REF
8517 || (GET_CODE (disp) == SYMBOL_REF
8518 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8519 fputs ("(%rip)", file);
8520 }
8521 }
8522 else
8523 {
8524 if (ASSEMBLER_DIALECT == ASM_ATT)
8525 {
8526 if (disp)
8527 {
8528 if (flag_pic)
8529 output_pic_addr_const (file, disp, 0);
8530 else if (GET_CODE (disp) == LABEL_REF)
8531 output_asm_label (disp);
8532 else
8533 output_addr_const (file, disp);
8534 }
8535
8536 putc ('(', file);
8537 if (base)
8538 print_reg (base, 0, file);
8539 if (index)
8540 {
8541 putc (',', file);
8542 print_reg (index, 0, file);
8543 if (scale != 1)
8544 fprintf (file, ",%d", scale);
8545 }
8546 putc (')', file);
8547 }
8548 else
8549 {
8550 rtx offset = NULL_RTX;
8551
8552 if (disp)
8553 {
8554 /* Pull out the offset of a symbol; print any symbol itself. */
8555 if (GET_CODE (disp) == CONST
8556 && GET_CODE (XEXP (disp, 0)) == PLUS
8557 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8558 {
8559 offset = XEXP (XEXP (disp, 0), 1);
8560 disp = gen_rtx_CONST (VOIDmode,
8561 XEXP (XEXP (disp, 0), 0));
8562 }
8563
8564 if (flag_pic)
8565 output_pic_addr_const (file, disp, 0);
8566 else if (GET_CODE (disp) == LABEL_REF)
8567 output_asm_label (disp);
8568 else if (CONST_INT_P (disp))
8569 offset = disp;
8570 else
8571 output_addr_const (file, disp);
8572 }
8573
8574 putc ('[', file);
8575 if (base)
8576 {
8577 print_reg (base, 0, file);
8578 if (offset)
8579 {
8580 if (INTVAL (offset) >= 0)
8581 putc ('+', file);
8582 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8583 }
8584 }
8585 else if (offset)
8586 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8587 else
8588 putc ('0', file);
8589
8590 if (index)
8591 {
8592 putc ('+', file);
8593 print_reg (index, 0, file);
8594 if (scale != 1)
8595 fprintf (file, "*%d", scale);
8596 }
8597 putc (']', file);
8598 }
8599 }
8600 }
8601
8602 bool
8603 output_addr_const_extra (FILE *file, rtx x)
8604 {
8605 rtx op;
8606
8607 if (GET_CODE (x) != UNSPEC)
8608 return false;
8609
8610 op = XVECEXP (x, 0, 0);
8611 switch (XINT (x, 1))
8612 {
8613 case UNSPEC_GOTTPOFF:
8614 output_addr_const (file, op);
8615 /* FIXME: This might be @TPOFF in Sun ld. */
8616 fputs ("@GOTTPOFF", file);
8617 break;
8618 case UNSPEC_TPOFF:
8619 output_addr_const (file, op);
8620 fputs ("@TPOFF", file);
8621 break;
8622 case UNSPEC_NTPOFF:
8623 output_addr_const (file, op);
8624 if (TARGET_64BIT)
8625 fputs ("@TPOFF", file);
8626 else
8627 fputs ("@NTPOFF", file);
8628 break;
8629 case UNSPEC_DTPOFF:
8630 output_addr_const (file, op);
8631 fputs ("@DTPOFF", file);
8632 break;
8633 case UNSPEC_GOTNTPOFF:
8634 output_addr_const (file, op);
8635 if (TARGET_64BIT)
8636 fputs ("@GOTTPOFF(%rip)", file);
8637 else
8638 fputs ("@GOTNTPOFF", file);
8639 break;
8640 case UNSPEC_INDNTPOFF:
8641 output_addr_const (file, op);
8642 fputs ("@INDNTPOFF", file);
8643 break;
8644
8645 default:
8646 return false;
8647 }
8648
8649 return true;
8650 }
8651 \f
8652 /* Split one or more DImode RTL references into pairs of SImode
8653 references. The RTL can be REG, offsettable MEM, integer constant, or
8654 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8655 split and "num" is its length. lo_half and hi_half are output arrays
8656 that parallel "operands". */
8657
8658 void
8659 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8660 {
8661 while (num--)
8662 {
8663 rtx op = operands[num];
8664
8665 /* simplify_subreg refuse to split volatile memory addresses,
8666 but we still have to handle it. */
8667 if (MEM_P (op))
8668 {
8669 lo_half[num] = adjust_address (op, SImode, 0);
8670 hi_half[num] = adjust_address (op, SImode, 4);
8671 }
8672 else
8673 {
8674 lo_half[num] = simplify_gen_subreg (SImode, op,
8675 GET_MODE (op) == VOIDmode
8676 ? DImode : GET_MODE (op), 0);
8677 hi_half[num] = simplify_gen_subreg (SImode, op,
8678 GET_MODE (op) == VOIDmode
8679 ? DImode : GET_MODE (op), 4);
8680 }
8681 }
8682 }
8683 /* Split one or more TImode RTL references into pairs of DImode
8684 references. The RTL can be REG, offsettable MEM, integer constant, or
8685 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8686 split and "num" is its length. lo_half and hi_half are output arrays
8687 that parallel "operands". */
8688
8689 void
8690 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8691 {
8692 while (num--)
8693 {
8694 rtx op = operands[num];
8695
8696 /* simplify_subreg refuse to split volatile memory addresses, but we
8697 still have to handle it. */
8698 if (MEM_P (op))
8699 {
8700 lo_half[num] = adjust_address (op, DImode, 0);
8701 hi_half[num] = adjust_address (op, DImode, 8);
8702 }
8703 else
8704 {
8705 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8706 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8707 }
8708 }
8709 }
8710 \f
8711 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8712 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8713 is the expression of the binary operation. The output may either be
8714 emitted here, or returned to the caller, like all output_* functions.
8715
8716 There is no guarantee that the operands are the same mode, as they
8717 might be within FLOAT or FLOAT_EXTEND expressions. */
8718
8719 #ifndef SYSV386_COMPAT
8720 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8721 wants to fix the assemblers because that causes incompatibility
8722 with gcc. No-one wants to fix gcc because that causes
8723 incompatibility with assemblers... You can use the option of
8724 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8725 #define SYSV386_COMPAT 1
8726 #endif
8727
8728 const char *
8729 output_387_binary_op (rtx insn, rtx *operands)
8730 {
8731 static char buf[30];
8732 const char *p;
8733 const char *ssep;
8734 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8735
8736 #ifdef ENABLE_CHECKING
8737 /* Even if we do not want to check the inputs, this documents input
8738 constraints. Which helps in understanding the following code. */
8739 if (STACK_REG_P (operands[0])
8740 && ((REG_P (operands[1])
8741 && REGNO (operands[0]) == REGNO (operands[1])
8742 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8743 || (REG_P (operands[2])
8744 && REGNO (operands[0]) == REGNO (operands[2])
8745 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8746 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8747 ; /* ok */
8748 else
8749 gcc_assert (is_sse);
8750 #endif
8751
8752 switch (GET_CODE (operands[3]))
8753 {
8754 case PLUS:
8755 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8756 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8757 p = "fiadd";
8758 else
8759 p = "fadd";
8760 ssep = "add";
8761 break;
8762
8763 case MINUS:
8764 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8765 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8766 p = "fisub";
8767 else
8768 p = "fsub";
8769 ssep = "sub";
8770 break;
8771
8772 case MULT:
8773 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8774 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8775 p = "fimul";
8776 else
8777 p = "fmul";
8778 ssep = "mul";
8779 break;
8780
8781 case DIV:
8782 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8783 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8784 p = "fidiv";
8785 else
8786 p = "fdiv";
8787 ssep = "div";
8788 break;
8789
8790 default:
8791 gcc_unreachable ();
8792 }
8793
8794 if (is_sse)
8795 {
8796 strcpy (buf, ssep);
8797 if (GET_MODE (operands[0]) == SFmode)
8798 strcat (buf, "ss\t{%2, %0|%0, %2}");
8799 else
8800 strcat (buf, "sd\t{%2, %0|%0, %2}");
8801 return buf;
8802 }
8803 strcpy (buf, p);
8804
8805 switch (GET_CODE (operands[3]))
8806 {
8807 case MULT:
8808 case PLUS:
8809 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8810 {
8811 rtx temp = operands[2];
8812 operands[2] = operands[1];
8813 operands[1] = temp;
8814 }
8815
8816 /* know operands[0] == operands[1]. */
8817
8818 if (MEM_P (operands[2]))
8819 {
8820 p = "%z2\t%2";
8821 break;
8822 }
8823
8824 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8825 {
8826 if (STACK_TOP_P (operands[0]))
8827 /* How is it that we are storing to a dead operand[2]?
8828 Well, presumably operands[1] is dead too. We can't
8829 store the result to st(0) as st(0) gets popped on this
8830 instruction. Instead store to operands[2] (which I
8831 think has to be st(1)). st(1) will be popped later.
8832 gcc <= 2.8.1 didn't have this check and generated
8833 assembly code that the Unixware assembler rejected. */
8834 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8835 else
8836 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8837 break;
8838 }
8839
8840 if (STACK_TOP_P (operands[0]))
8841 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8842 else
8843 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8844 break;
8845
8846 case MINUS:
8847 case DIV:
8848 if (MEM_P (operands[1]))
8849 {
8850 p = "r%z1\t%1";
8851 break;
8852 }
8853
8854 if (MEM_P (operands[2]))
8855 {
8856 p = "%z2\t%2";
8857 break;
8858 }
8859
8860 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8861 {
8862 #if SYSV386_COMPAT
8863 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8864 derived assemblers, confusingly reverse the direction of
8865 the operation for fsub{r} and fdiv{r} when the
8866 destination register is not st(0). The Intel assembler
8867 doesn't have this brain damage. Read !SYSV386_COMPAT to
8868 figure out what the hardware really does. */
8869 if (STACK_TOP_P (operands[0]))
8870 p = "{p\t%0, %2|rp\t%2, %0}";
8871 else
8872 p = "{rp\t%2, %0|p\t%0, %2}";
8873 #else
8874 if (STACK_TOP_P (operands[0]))
8875 /* As above for fmul/fadd, we can't store to st(0). */
8876 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8877 else
8878 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8879 #endif
8880 break;
8881 }
8882
8883 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8884 {
8885 #if SYSV386_COMPAT
8886 if (STACK_TOP_P (operands[0]))
8887 p = "{rp\t%0, %1|p\t%1, %0}";
8888 else
8889 p = "{p\t%1, %0|rp\t%0, %1}";
8890 #else
8891 if (STACK_TOP_P (operands[0]))
8892 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8893 else
8894 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8895 #endif
8896 break;
8897 }
8898
8899 if (STACK_TOP_P (operands[0]))
8900 {
8901 if (STACK_TOP_P (operands[1]))
8902 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8903 else
8904 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8905 break;
8906 }
8907 else if (STACK_TOP_P (operands[1]))
8908 {
8909 #if SYSV386_COMPAT
8910 p = "{\t%1, %0|r\t%0, %1}";
8911 #else
8912 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8913 #endif
8914 }
8915 else
8916 {
8917 #if SYSV386_COMPAT
8918 p = "{r\t%2, %0|\t%0, %2}";
8919 #else
8920 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8921 #endif
8922 }
8923 break;
8924
8925 default:
8926 gcc_unreachable ();
8927 }
8928
8929 strcat (buf, p);
8930 return buf;
8931 }
8932
8933 /* Return needed mode for entity in optimize_mode_switching pass. */
8934
8935 int
8936 ix86_mode_needed (int entity, rtx insn)
8937 {
8938 enum attr_i387_cw mode;
8939
8940 /* The mode UNINITIALIZED is used to store control word after a
8941 function call or ASM pattern. The mode ANY specify that function
8942 has no requirements on the control word and make no changes in the
8943 bits we are interested in. */
8944
8945 if (CALL_P (insn)
8946 || (NONJUMP_INSN_P (insn)
8947 && (asm_noperands (PATTERN (insn)) >= 0
8948 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8949 return I387_CW_UNINITIALIZED;
8950
8951 if (recog_memoized (insn) < 0)
8952 return I387_CW_ANY;
8953
8954 mode = get_attr_i387_cw (insn);
8955
8956 switch (entity)
8957 {
8958 case I387_TRUNC:
8959 if (mode == I387_CW_TRUNC)
8960 return mode;
8961 break;
8962
8963 case I387_FLOOR:
8964 if (mode == I387_CW_FLOOR)
8965 return mode;
8966 break;
8967
8968 case I387_CEIL:
8969 if (mode == I387_CW_CEIL)
8970 return mode;
8971 break;
8972
8973 case I387_MASK_PM:
8974 if (mode == I387_CW_MASK_PM)
8975 return mode;
8976 break;
8977
8978 default:
8979 gcc_unreachable ();
8980 }
8981
8982 return I387_CW_ANY;
8983 }
8984
8985 /* Output code to initialize control word copies used by trunc?f?i and
8986 rounding patterns. CURRENT_MODE is set to current control word,
8987 while NEW_MODE is set to new control word. */
8988
8989 void
8990 emit_i387_cw_initialization (int mode)
8991 {
8992 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8993 rtx new_mode;
8994
8995 int slot;
8996
8997 rtx reg = gen_reg_rtx (HImode);
8998
8999 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9000 emit_move_insn (reg, copy_rtx (stored_mode));
9001
9002 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9003 {
9004 switch (mode)
9005 {
9006 case I387_CW_TRUNC:
9007 /* round toward zero (truncate) */
9008 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9009 slot = SLOT_CW_TRUNC;
9010 break;
9011
9012 case I387_CW_FLOOR:
9013 /* round down toward -oo */
9014 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9015 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9016 slot = SLOT_CW_FLOOR;
9017 break;
9018
9019 case I387_CW_CEIL:
9020 /* round up toward +oo */
9021 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9022 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9023 slot = SLOT_CW_CEIL;
9024 break;
9025
9026 case I387_CW_MASK_PM:
9027 /* mask precision exception for nearbyint() */
9028 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9029 slot = SLOT_CW_MASK_PM;
9030 break;
9031
9032 default:
9033 gcc_unreachable ();
9034 }
9035 }
9036 else
9037 {
9038 switch (mode)
9039 {
9040 case I387_CW_TRUNC:
9041 /* round toward zero (truncate) */
9042 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9043 slot = SLOT_CW_TRUNC;
9044 break;
9045
9046 case I387_CW_FLOOR:
9047 /* round down toward -oo */
9048 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9049 slot = SLOT_CW_FLOOR;
9050 break;
9051
9052 case I387_CW_CEIL:
9053 /* round up toward +oo */
9054 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9055 slot = SLOT_CW_CEIL;
9056 break;
9057
9058 case I387_CW_MASK_PM:
9059 /* mask precision exception for nearbyint() */
9060 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9061 slot = SLOT_CW_MASK_PM;
9062 break;
9063
9064 default:
9065 gcc_unreachable ();
9066 }
9067 }
9068
9069 gcc_assert (slot < MAX_386_STACK_LOCALS);
9070
9071 new_mode = assign_386_stack_local (HImode, slot);
9072 emit_move_insn (new_mode, reg);
9073 }
9074
9075 /* Output code for INSN to convert a float to a signed int. OPERANDS
9076 are the insn operands. The output may be [HSD]Imode and the input
9077 operand may be [SDX]Fmode. */
9078
9079 const char *
9080 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9081 {
9082 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9083 int dimode_p = GET_MODE (operands[0]) == DImode;
9084 int round_mode = get_attr_i387_cw (insn);
9085
9086 /* Jump through a hoop or two for DImode, since the hardware has no
9087 non-popping instruction. We used to do this a different way, but
9088 that was somewhat fragile and broke with post-reload splitters. */
9089 if ((dimode_p || fisttp) && !stack_top_dies)
9090 output_asm_insn ("fld\t%y1", operands);
9091
9092 gcc_assert (STACK_TOP_P (operands[1]));
9093 gcc_assert (MEM_P (operands[0]));
9094
9095 if (fisttp)
9096 output_asm_insn ("fisttp%z0\t%0", operands);
9097 else
9098 {
9099 if (round_mode != I387_CW_ANY)
9100 output_asm_insn ("fldcw\t%3", operands);
9101 if (stack_top_dies || dimode_p)
9102 output_asm_insn ("fistp%z0\t%0", operands);
9103 else
9104 output_asm_insn ("fist%z0\t%0", operands);
9105 if (round_mode != I387_CW_ANY)
9106 output_asm_insn ("fldcw\t%2", operands);
9107 }
9108
9109 return "";
9110 }
9111
9112 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9113 have the values zero or one, indicates the ffreep insn's operand
9114 from the OPERANDS array. */
9115
9116 static const char *
9117 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9118 {
9119 if (TARGET_USE_FFREEP)
9120 #if HAVE_AS_IX86_FFREEP
9121 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9122 #else
9123 {
9124 static char retval[] = ".word\t0xc_df";
9125 int regno = REGNO (operands[opno]);
9126
9127 gcc_assert (FP_REGNO_P (regno));
9128
9129 retval[9] = '0' + (regno - FIRST_STACK_REG);
9130 return retval;
9131 }
9132 #endif
9133
9134 return opno ? "fstp\t%y1" : "fstp\t%y0";
9135 }
9136
9137
9138 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9139 should be used. UNORDERED_P is true when fucom should be used. */
9140
9141 const char *
9142 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9143 {
9144 int stack_top_dies;
9145 rtx cmp_op0, cmp_op1;
9146 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9147
9148 if (eflags_p)
9149 {
9150 cmp_op0 = operands[0];
9151 cmp_op1 = operands[1];
9152 }
9153 else
9154 {
9155 cmp_op0 = operands[1];
9156 cmp_op1 = operands[2];
9157 }
9158
9159 if (is_sse)
9160 {
9161 if (GET_MODE (operands[0]) == SFmode)
9162 if (unordered_p)
9163 return "ucomiss\t{%1, %0|%0, %1}";
9164 else
9165 return "comiss\t{%1, %0|%0, %1}";
9166 else
9167 if (unordered_p)
9168 return "ucomisd\t{%1, %0|%0, %1}";
9169 else
9170 return "comisd\t{%1, %0|%0, %1}";
9171 }
9172
9173 gcc_assert (STACK_TOP_P (cmp_op0));
9174
9175 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9176
9177 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9178 {
9179 if (stack_top_dies)
9180 {
9181 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9182 return output_387_ffreep (operands, 1);
9183 }
9184 else
9185 return "ftst\n\tfnstsw\t%0";
9186 }
9187
9188 if (STACK_REG_P (cmp_op1)
9189 && stack_top_dies
9190 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9191 && REGNO (cmp_op1) != FIRST_STACK_REG)
9192 {
9193 /* If both the top of the 387 stack dies, and the other operand
9194 is also a stack register that dies, then this must be a
9195 `fcompp' float compare */
9196
9197 if (eflags_p)
9198 {
9199 /* There is no double popping fcomi variant. Fortunately,
9200 eflags is immune from the fstp's cc clobbering. */
9201 if (unordered_p)
9202 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9203 else
9204 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9205 return output_387_ffreep (operands, 0);
9206 }
9207 else
9208 {
9209 if (unordered_p)
9210 return "fucompp\n\tfnstsw\t%0";
9211 else
9212 return "fcompp\n\tfnstsw\t%0";
9213 }
9214 }
9215 else
9216 {
9217 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9218
9219 static const char * const alt[16] =
9220 {
9221 "fcom%z2\t%y2\n\tfnstsw\t%0",
9222 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9223 "fucom%z2\t%y2\n\tfnstsw\t%0",
9224 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9225
9226 "ficom%z2\t%y2\n\tfnstsw\t%0",
9227 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9228 NULL,
9229 NULL,
9230
9231 "fcomi\t{%y1, %0|%0, %y1}",
9232 "fcomip\t{%y1, %0|%0, %y1}",
9233 "fucomi\t{%y1, %0|%0, %y1}",
9234 "fucomip\t{%y1, %0|%0, %y1}",
9235
9236 NULL,
9237 NULL,
9238 NULL,
9239 NULL
9240 };
9241
9242 int mask;
9243 const char *ret;
9244
9245 mask = eflags_p << 3;
9246 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9247 mask |= unordered_p << 1;
9248 mask |= stack_top_dies;
9249
9250 gcc_assert (mask < 16);
9251 ret = alt[mask];
9252 gcc_assert (ret);
9253
9254 return ret;
9255 }
9256 }
9257
9258 void
9259 ix86_output_addr_vec_elt (FILE *file, int value)
9260 {
9261 const char *directive = ASM_LONG;
9262
9263 #ifdef ASM_QUAD
9264 if (TARGET_64BIT)
9265 directive = ASM_QUAD;
9266 #else
9267 gcc_assert (!TARGET_64BIT);
9268 #endif
9269
9270 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9271 }
9272
9273 void
9274 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9275 {
9276 if (TARGET_64BIT)
9277 fprintf (file, "%s%s%d-%s%d\n",
9278 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9279 else if (HAVE_AS_GOTOFF_IN_DATA)
9280 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9281 #if TARGET_MACHO
9282 else if (TARGET_MACHO)
9283 {
9284 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9285 machopic_output_function_base_name (file);
9286 fprintf(file, "\n");
9287 }
9288 #endif
9289 else
9290 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9291 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9292 }
9293 \f
9294 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9295 for the target. */
9296
9297 void
9298 ix86_expand_clear (rtx dest)
9299 {
9300 rtx tmp;
9301
9302 /* We play register width games, which are only valid after reload. */
9303 gcc_assert (reload_completed);
9304
9305 /* Avoid HImode and its attendant prefix byte. */
9306 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9307 dest = gen_rtx_REG (SImode, REGNO (dest));
9308
9309 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9310
9311 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9312 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9313 {
9314 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9315 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9316 }
9317
9318 emit_insn (tmp);
9319 }
9320
9321 /* X is an unchanging MEM. If it is a constant pool reference, return
9322 the constant pool rtx, else NULL. */
9323
9324 rtx
9325 maybe_get_pool_constant (rtx x)
9326 {
9327 x = ix86_delegitimize_address (XEXP (x, 0));
9328
9329 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9330 return get_pool_constant (x);
9331
9332 return NULL_RTX;
9333 }
9334
9335 void
9336 ix86_expand_move (enum machine_mode mode, rtx operands[])
9337 {
9338 int strict = (reload_in_progress || reload_completed);
9339 rtx op0, op1;
9340 enum tls_model model;
9341
9342 op0 = operands[0];
9343 op1 = operands[1];
9344
9345 if (GET_CODE (op1) == SYMBOL_REF)
9346 {
9347 model = SYMBOL_REF_TLS_MODEL (op1);
9348 if (model)
9349 {
9350 op1 = legitimize_tls_address (op1, model, true);
9351 op1 = force_operand (op1, op0);
9352 if (op1 == op0)
9353 return;
9354 }
9355 }
9356 else if (GET_CODE (op1) == CONST
9357 && GET_CODE (XEXP (op1, 0)) == PLUS
9358 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9359 {
9360 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9361 if (model)
9362 {
9363 rtx addend = XEXP (XEXP (op1, 0), 1);
9364 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9365 op1 = force_operand (op1, NULL);
9366 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9367 op0, 1, OPTAB_DIRECT);
9368 if (op1 == op0)
9369 return;
9370 }
9371 }
9372
9373 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9374 {
9375 if (TARGET_MACHO && !TARGET_64BIT)
9376 {
9377 #if TARGET_MACHO
9378 if (MACHOPIC_PURE)
9379 {
9380 rtx temp = ((reload_in_progress
9381 || ((op0 && REG_P (op0))
9382 && mode == Pmode))
9383 ? op0 : gen_reg_rtx (Pmode));
9384 op1 = machopic_indirect_data_reference (op1, temp);
9385 op1 = machopic_legitimize_pic_address (op1, mode,
9386 temp == op1 ? 0 : temp);
9387 }
9388 else if (MACHOPIC_INDIRECT)
9389 op1 = machopic_indirect_data_reference (op1, 0);
9390 if (op0 == op1)
9391 return;
9392 #endif
9393 }
9394 else
9395 {
9396 if (MEM_P (op0))
9397 op1 = force_reg (Pmode, op1);
9398 else
9399 op1 = legitimize_address (op1, op1, Pmode);
9400 }
9401 }
9402 else
9403 {
9404 if (MEM_P (op0)
9405 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9406 || !push_operand (op0, mode))
9407 && MEM_P (op1))
9408 op1 = force_reg (mode, op1);
9409
9410 if (push_operand (op0, mode)
9411 && ! general_no_elim_operand (op1, mode))
9412 op1 = copy_to_mode_reg (mode, op1);
9413
9414 /* Force large constants in 64bit compilation into register
9415 to get them CSEed. */
9416 if (TARGET_64BIT && mode == DImode
9417 && immediate_operand (op1, mode)
9418 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9419 && !register_operand (op0, mode)
9420 && optimize && !reload_completed && !reload_in_progress)
9421 op1 = copy_to_mode_reg (mode, op1);
9422
9423 if (FLOAT_MODE_P (mode))
9424 {
9425 /* If we are loading a floating point constant to a register,
9426 force the value to memory now, since we'll get better code
9427 out the back end. */
9428
9429 if (strict)
9430 ;
9431 else if (GET_CODE (op1) == CONST_DOUBLE)
9432 {
9433 op1 = validize_mem (force_const_mem (mode, op1));
9434 if (!register_operand (op0, mode))
9435 {
9436 rtx temp = gen_reg_rtx (mode);
9437 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9438 emit_move_insn (op0, temp);
9439 return;
9440 }
9441 }
9442 }
9443 }
9444
9445 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9446 }
9447
9448 void
9449 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9450 {
9451 rtx op0 = operands[0], op1 = operands[1];
9452
9453 /* Force constants other than zero into memory. We do not know how
9454 the instructions used to build constants modify the upper 64 bits
9455 of the register, once we have that information we may be able
9456 to handle some of them more efficiently. */
9457 if ((reload_in_progress | reload_completed) == 0
9458 && register_operand (op0, mode)
9459 && CONSTANT_P (op1)
9460 && standard_sse_constant_p (op1) <= 0)
9461 op1 = validize_mem (force_const_mem (mode, op1));
9462
9463 /* Make operand1 a register if it isn't already. */
9464 if (!no_new_pseudos
9465 && !register_operand (op0, mode)
9466 && !register_operand (op1, mode))
9467 {
9468 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9469 return;
9470 }
9471
9472 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9473 }
9474
9475 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9476 straight to ix86_expand_vector_move. */
9477
9478 void
9479 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9480 {
9481 rtx op0, op1, m;
9482
9483 op0 = operands[0];
9484 op1 = operands[1];
9485
9486 if (MEM_P (op1))
9487 {
9488 /* If we're optimizing for size, movups is the smallest. */
9489 if (optimize_size)
9490 {
9491 op0 = gen_lowpart (V4SFmode, op0);
9492 op1 = gen_lowpart (V4SFmode, op1);
9493 emit_insn (gen_sse_movups (op0, op1));
9494 return;
9495 }
9496
9497 /* ??? If we have typed data, then it would appear that using
9498 movdqu is the only way to get unaligned data loaded with
9499 integer type. */
9500 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9501 {
9502 op0 = gen_lowpart (V16QImode, op0);
9503 op1 = gen_lowpart (V16QImode, op1);
9504 emit_insn (gen_sse2_movdqu (op0, op1));
9505 return;
9506 }
9507
9508 if (TARGET_SSE2 && mode == V2DFmode)
9509 {
9510 rtx zero;
9511
9512 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9513 {
9514 op0 = gen_lowpart (V2DFmode, op0);
9515 op1 = gen_lowpart (V2DFmode, op1);
9516 emit_insn (gen_sse2_movupd (op0, op1));
9517 return;
9518 }
9519
9520 /* When SSE registers are split into halves, we can avoid
9521 writing to the top half twice. */
9522 if (TARGET_SSE_SPLIT_REGS)
9523 {
9524 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9525 zero = op0;
9526 }
9527 else
9528 {
9529 /* ??? Not sure about the best option for the Intel chips.
9530 The following would seem to satisfy; the register is
9531 entirely cleared, breaking the dependency chain. We
9532 then store to the upper half, with a dependency depth
9533 of one. A rumor has it that Intel recommends two movsd
9534 followed by an unpacklpd, but this is unconfirmed. And
9535 given that the dependency depth of the unpacklpd would
9536 still be one, I'm not sure why this would be better. */
9537 zero = CONST0_RTX (V2DFmode);
9538 }
9539
9540 m = adjust_address (op1, DFmode, 0);
9541 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9542 m = adjust_address (op1, DFmode, 8);
9543 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9544 }
9545 else
9546 {
9547 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9548 {
9549 op0 = gen_lowpart (V4SFmode, op0);
9550 op1 = gen_lowpart (V4SFmode, op1);
9551 emit_insn (gen_sse_movups (op0, op1));
9552 return;
9553 }
9554
9555 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9556 emit_move_insn (op0, CONST0_RTX (mode));
9557 else
9558 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9559
9560 if (mode != V4SFmode)
9561 op0 = gen_lowpart (V4SFmode, op0);
9562 m = adjust_address (op1, V2SFmode, 0);
9563 emit_insn (gen_sse_loadlps (op0, op0, m));
9564 m = adjust_address (op1, V2SFmode, 8);
9565 emit_insn (gen_sse_loadhps (op0, op0, m));
9566 }
9567 }
9568 else if (MEM_P (op0))
9569 {
9570 /* If we're optimizing for size, movups is the smallest. */
9571 if (optimize_size)
9572 {
9573 op0 = gen_lowpart (V4SFmode, op0);
9574 op1 = gen_lowpart (V4SFmode, op1);
9575 emit_insn (gen_sse_movups (op0, op1));
9576 return;
9577 }
9578
9579 /* ??? Similar to above, only less clear because of quote
9580 typeless stores unquote. */
9581 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9582 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9583 {
9584 op0 = gen_lowpart (V16QImode, op0);
9585 op1 = gen_lowpart (V16QImode, op1);
9586 emit_insn (gen_sse2_movdqu (op0, op1));
9587 return;
9588 }
9589
9590 if (TARGET_SSE2 && mode == V2DFmode)
9591 {
9592 m = adjust_address (op0, DFmode, 0);
9593 emit_insn (gen_sse2_storelpd (m, op1));
9594 m = adjust_address (op0, DFmode, 8);
9595 emit_insn (gen_sse2_storehpd (m, op1));
9596 }
9597 else
9598 {
9599 if (mode != V4SFmode)
9600 op1 = gen_lowpart (V4SFmode, op1);
9601 m = adjust_address (op0, V2SFmode, 0);
9602 emit_insn (gen_sse_storelps (m, op1));
9603 m = adjust_address (op0, V2SFmode, 8);
9604 emit_insn (gen_sse_storehps (m, op1));
9605 }
9606 }
9607 else
9608 gcc_unreachable ();
9609 }
9610
9611 /* Expand a push in MODE. This is some mode for which we do not support
9612 proper push instructions, at least from the registers that we expect
9613 the value to live in. */
9614
9615 void
9616 ix86_expand_push (enum machine_mode mode, rtx x)
9617 {
9618 rtx tmp;
9619
9620 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9621 GEN_INT (-GET_MODE_SIZE (mode)),
9622 stack_pointer_rtx, 1, OPTAB_DIRECT);
9623 if (tmp != stack_pointer_rtx)
9624 emit_move_insn (stack_pointer_rtx, tmp);
9625
9626 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9627 emit_move_insn (tmp, x);
9628 }
9629
9630 /* Helper function of ix86_fixup_binary_operands to canonicalize
9631 operand order. Returns true if the operands should be swapped. */
9632
9633 static bool
9634 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9635 rtx operands[])
9636 {
9637 rtx dst = operands[0];
9638 rtx src1 = operands[1];
9639 rtx src2 = operands[2];
9640
9641 /* If the operation is not commutative, we can't do anything. */
9642 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9643 return false;
9644
9645 /* Highest priority is that src1 should match dst. */
9646 if (rtx_equal_p (dst, src1))
9647 return false;
9648 if (rtx_equal_p (dst, src2))
9649 return true;
9650
9651 /* Next highest priority is that immediate constants come second. */
9652 if (immediate_operand (src2, mode))
9653 return false;
9654 if (immediate_operand (src1, mode))
9655 return true;
9656
9657 /* Lowest priority is that memory references should come second. */
9658 if (MEM_P (src2))
9659 return false;
9660 if (MEM_P (src1))
9661 return true;
9662
9663 return false;
9664 }
9665
9666
9667 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9668 destination to use for the operation. If different from the true
9669 destination in operands[0], a copy operation will be required. */
9670
9671 rtx
9672 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9673 rtx operands[])
9674 {
9675 rtx dst = operands[0];
9676 rtx src1 = operands[1];
9677 rtx src2 = operands[2];
9678
9679 /* Canonicalize operand order. */
9680 if (ix86_swap_binary_operands_p (code, mode, operands))
9681 {
9682 rtx temp = src1;
9683 src1 = src2;
9684 src2 = temp;
9685 }
9686
9687 /* Both source operands cannot be in memory. */
9688 if (MEM_P (src1) && MEM_P (src2))
9689 {
9690 /* Optimization: Only read from memory once. */
9691 if (rtx_equal_p (src1, src2))
9692 {
9693 src2 = force_reg (mode, src2);
9694 src1 = src2;
9695 }
9696 else
9697 src2 = force_reg (mode, src2);
9698 }
9699
9700 /* If the destination is memory, and we do not have matching source
9701 operands, do things in registers. */
9702 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9703 dst = gen_reg_rtx (mode);
9704
9705 /* Source 1 cannot be a constant. */
9706 if (CONSTANT_P (src1))
9707 src1 = force_reg (mode, src1);
9708
9709 /* Source 1 cannot be a non-matching memory. */
9710 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9711 src1 = force_reg (mode, src1);
9712
9713 operands[1] = src1;
9714 operands[2] = src2;
9715 return dst;
9716 }
9717
9718 /* Similarly, but assume that the destination has already been
9719 set up properly. */
9720
9721 void
9722 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9723 enum machine_mode mode, rtx operands[])
9724 {
9725 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9726 gcc_assert (dst == operands[0]);
9727 }
9728
9729 /* Attempt to expand a binary operator. Make the expansion closer to the
9730 actual machine, then just general_operand, which will allow 3 separate
9731 memory references (one output, two input) in a single insn. */
9732
9733 void
9734 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9735 rtx operands[])
9736 {
9737 rtx src1, src2, dst, op, clob;
9738
9739 dst = ix86_fixup_binary_operands (code, mode, operands);
9740 src1 = operands[1];
9741 src2 = operands[2];
9742
9743 /* Emit the instruction. */
9744
9745 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9746 if (reload_in_progress)
9747 {
9748 /* Reload doesn't know about the flags register, and doesn't know that
9749 it doesn't want to clobber it. We can only do this with PLUS. */
9750 gcc_assert (code == PLUS);
9751 emit_insn (op);
9752 }
9753 else
9754 {
9755 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9756 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9757 }
9758
9759 /* Fix up the destination if needed. */
9760 if (dst != operands[0])
9761 emit_move_insn (operands[0], dst);
9762 }
9763
9764 /* Return TRUE or FALSE depending on whether the binary operator meets the
9765 appropriate constraints. */
9766
9767 int
9768 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9769 rtx operands[3])
9770 {
9771 rtx dst = operands[0];
9772 rtx src1 = operands[1];
9773 rtx src2 = operands[2];
9774
9775 /* Both source operands cannot be in memory. */
9776 if (MEM_P (src1) && MEM_P (src2))
9777 return 0;
9778
9779 /* Canonicalize operand order for commutative operators. */
9780 if (ix86_swap_binary_operands_p (code, mode, operands))
9781 {
9782 rtx temp = src1;
9783 src1 = src2;
9784 src2 = temp;
9785 }
9786
9787 /* If the destination is memory, we must have a matching source operand. */
9788 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9789 return 0;
9790
9791 /* Source 1 cannot be a constant. */
9792 if (CONSTANT_P (src1))
9793 return 0;
9794
9795 /* Source 1 cannot be a non-matching memory. */
9796 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9797 return 0;
9798
9799 return 1;
9800 }
9801
9802 /* Attempt to expand a unary operator. Make the expansion closer to the
9803 actual machine, then just general_operand, which will allow 2 separate
9804 memory references (one output, one input) in a single insn. */
9805
9806 void
9807 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9808 rtx operands[])
9809 {
9810 int matching_memory;
9811 rtx src, dst, op, clob;
9812
9813 dst = operands[0];
9814 src = operands[1];
9815
9816 /* If the destination is memory, and we do not have matching source
9817 operands, do things in registers. */
9818 matching_memory = 0;
9819 if (MEM_P (dst))
9820 {
9821 if (rtx_equal_p (dst, src))
9822 matching_memory = 1;
9823 else
9824 dst = gen_reg_rtx (mode);
9825 }
9826
9827 /* When source operand is memory, destination must match. */
9828 if (MEM_P (src) && !matching_memory)
9829 src = force_reg (mode, src);
9830
9831 /* Emit the instruction. */
9832
9833 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9834 if (reload_in_progress || code == NOT)
9835 {
9836 /* Reload doesn't know about the flags register, and doesn't know that
9837 it doesn't want to clobber it. */
9838 gcc_assert (code == NOT);
9839 emit_insn (op);
9840 }
9841 else
9842 {
9843 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9844 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9845 }
9846
9847 /* Fix up the destination if needed. */
9848 if (dst != operands[0])
9849 emit_move_insn (operands[0], dst);
9850 }
9851
9852 /* Return TRUE or FALSE depending on whether the unary operator meets the
9853 appropriate constraints. */
9854
9855 int
9856 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9857 enum machine_mode mode ATTRIBUTE_UNUSED,
9858 rtx operands[2] ATTRIBUTE_UNUSED)
9859 {
9860 /* If one of operands is memory, source and destination must match. */
9861 if ((MEM_P (operands[0])
9862 || MEM_P (operands[1]))
9863 && ! rtx_equal_p (operands[0], operands[1]))
9864 return FALSE;
9865 return TRUE;
9866 }
9867
9868 /* Convert an SF or DFmode value in an SSE register into an unsigned SImode.
9869 When -fpmath=387, this is done with an x87 st(0)_FP->signed-int-64
9870 conversion, and ignoring the upper 32 bits of the result. On x86_64,
9871 there is an equivalent SSE %xmm->signed-int-64 conversion.
9872
9873 On x86_32, we don't have the instruction, nor the 64-bit destination
9874 register it requires. Do the conversion inline in the SSE registers.
9875 Requires SSE2. For x86_32, -mfpmath=sse, !optimize_size only. */
9876
9877 void
9878 ix86_expand_convert_uns_si_sse (rtx target, rtx input)
9879 {
9880 REAL_VALUE_TYPE TWO31r;
9881 enum machine_mode mode, vecmode;
9882 rtx two31, value, large, sign, result_vec, zero_or_two31, x;
9883
9884 mode = GET_MODE (input);
9885 vecmode = mode == SFmode ? V4SFmode : V2DFmode;
9886
9887 real_ldexp (&TWO31r, &dconst1, 31);
9888 two31 = const_double_from_real_value (TWO31r, mode);
9889 two31 = ix86_build_const_vector (mode, true, two31);
9890 two31 = force_reg (vecmode, two31);
9891
9892 value = gen_reg_rtx (vecmode);
9893 ix86_expand_vector_init_one_nonzero (false, vecmode, value, input, 0);
9894
9895 large = gen_reg_rtx (vecmode);
9896 x = gen_rtx_fmt_ee (LE, vecmode, two31, value);
9897 emit_insn (gen_rtx_SET (VOIDmode, large, x));
9898
9899 zero_or_two31 = gen_reg_rtx (vecmode);
9900 x = gen_rtx_AND (vecmode, large, two31);
9901 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
9902
9903 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
9904 emit_insn (gen_rtx_SET (VOIDmode, value, x));
9905
9906 result_vec = gen_reg_rtx (V4SImode);
9907 if (mode == SFmode)
9908 x = gen_sse2_cvttps2dq (result_vec, value);
9909 else
9910 x = gen_sse2_cvttpd2dq (result_vec, value);
9911 emit_insn (x);
9912
9913 sign = gen_reg_rtx (V4SImode);
9914 emit_insn (gen_ashlv4si3 (sign, gen_lowpart (V4SImode, large),
9915 GEN_INT (31)));
9916
9917 emit_insn (gen_xorv4si3 (result_vec, result_vec, sign));
9918
9919 ix86_expand_vector_extract (false, target, result_vec, 0);
9920 }
9921
9922 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9923 Expects the 64-bit DImode to be supplied in a pair of integral
9924 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9925 -mfpmath=sse, !optimize_size only. */
9926
9927 void
9928 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
9929 {
9930 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
9931 rtx int_xmm, fp_xmm;
9932 rtx biases, exponents;
9933 rtx x;
9934
9935 int_xmm = gen_reg_rtx (V4SImode);
9936 if (TARGET_INTER_UNIT_MOVES)
9937 emit_insn (gen_movdi_to_sse (int_xmm, input));
9938 else if (TARGET_SSE_SPLIT_REGS)
9939 {
9940 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
9941 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
9942 }
9943 else
9944 {
9945 x = gen_reg_rtx (V2DImode);
9946 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
9947 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
9948 }
9949
9950 x = gen_rtx_CONST_VECTOR (V4SImode,
9951 gen_rtvec (4, GEN_INT (0x43300000UL),
9952 GEN_INT (0x45300000UL),
9953 const0_rtx, const0_rtx));
9954 exponents = validize_mem (force_const_mem (V4SImode, x));
9955
9956 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9957 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
9958
9959 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9960 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9961 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9962 (0x1.0p84 + double(fp_value_hi_xmm)).
9963 Note these exponents differ by 32. */
9964
9965 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
9966
9967 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9968 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9969 real_ldexp (&bias_lo_rvt, &dconst1, 52);
9970 real_ldexp (&bias_hi_rvt, &dconst1, 84);
9971 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
9972 x = const_double_from_real_value (bias_hi_rvt, DFmode);
9973 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
9974 biases = validize_mem (force_const_mem (V2DFmode, biases));
9975 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
9976
9977 /* Add the upper and lower DFmode values together. */
9978 if (TARGET_SSE3)
9979 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
9980 else
9981 {
9982 x = copy_to_mode_reg (V2DFmode, fp_xmm);
9983 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
9984 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
9985 }
9986
9987 ix86_expand_vector_extract (false, target, fp_xmm, 0);
9988 }
9989
9990 /* Convert an unsigned SImode value into a DFmode. Only currently used
9991 for SSE, but applicable anywhere. */
9992
9993 void
9994 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
9995 {
9996 REAL_VALUE_TYPE TWO31r;
9997 rtx x, fp;
9998
9999 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10000 NULL, 1, OPTAB_DIRECT);
10001
10002 fp = gen_reg_rtx (DFmode);
10003 emit_insn (gen_floatsidf2 (fp, x));
10004
10005 real_ldexp (&TWO31r, &dconst1, 31);
10006 x = const_double_from_real_value (TWO31r, DFmode);
10007
10008 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10009 if (x != target)
10010 emit_move_insn (target, x);
10011 }
10012
10013 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10014 32-bit mode; otherwise we have a direct convert instruction. */
10015
10016 void
10017 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10018 {
10019 REAL_VALUE_TYPE TWO32r;
10020 rtx fp_lo, fp_hi, x;
10021
10022 fp_lo = gen_reg_rtx (DFmode);
10023 fp_hi = gen_reg_rtx (DFmode);
10024
10025 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10026
10027 real_ldexp (&TWO32r, &dconst1, 32);
10028 x = const_double_from_real_value (TWO32r, DFmode);
10029 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10030
10031 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10032
10033 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10034 0, OPTAB_DIRECT);
10035 if (x != target)
10036 emit_move_insn (target, x);
10037 }
10038
10039 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10040 For x86_32, -mfpmath=sse, !optimize_size only. */
10041 void
10042 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10043 {
10044 REAL_VALUE_TYPE ONE16r;
10045 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10046
10047 real_ldexp (&ONE16r, &dconst1, 16);
10048 x = const_double_from_real_value (ONE16r, SFmode);
10049 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10050 NULL, 0, OPTAB_DIRECT);
10051 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10052 NULL, 0, OPTAB_DIRECT);
10053 fp_hi = gen_reg_rtx (SFmode);
10054 fp_lo = gen_reg_rtx (SFmode);
10055 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10056 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10057 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10058 0, OPTAB_DIRECT);
10059 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10060 0, OPTAB_DIRECT);
10061 if (!rtx_equal_p (target, fp_hi))
10062 emit_move_insn (target, fp_hi);
10063 }
10064
10065 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10066 then replicate the value for all elements of the vector
10067 register. */
10068
10069 static rtx
10070 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10071 {
10072 rtvec v;
10073 switch (mode)
10074 {
10075 case SFmode:
10076 if (vect)
10077 v = gen_rtvec (4, value, value, value, value);
10078 else
10079 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10080 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10081 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10082
10083 case DFmode:
10084 if (vect)
10085 v = gen_rtvec (2, value, value);
10086 else
10087 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10088 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10089
10090 default:
10091 gcc_unreachable ();
10092 }
10093 }
10094
10095 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10096 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10097 true, then replicate the mask for all elements of the vector register.
10098 If INVERT is true, then create a mask excluding the sign bit. */
10099
10100 rtx
10101 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10102 {
10103 enum machine_mode vec_mode;
10104 HOST_WIDE_INT hi, lo;
10105 int shift = 63;
10106 rtx v;
10107 rtx mask;
10108
10109 /* Find the sign bit, sign extended to 2*HWI. */
10110 if (mode == SFmode)
10111 lo = 0x80000000, hi = lo < 0;
10112 else if (HOST_BITS_PER_WIDE_INT >= 64)
10113 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10114 else
10115 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10116
10117 if (invert)
10118 lo = ~lo, hi = ~hi;
10119
10120 /* Force this value into the low part of a fp vector constant. */
10121 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10122 mask = gen_lowpart (mode, mask);
10123
10124 v = ix86_build_const_vector (mode, vect, mask);
10125 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10126 return force_reg (vec_mode, v);
10127 }
10128
10129 /* Generate code for floating point ABS or NEG. */
10130
10131 void
10132 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10133 rtx operands[])
10134 {
10135 rtx mask, set, use, clob, dst, src;
10136 bool matching_memory;
10137 bool use_sse = false;
10138 bool vector_mode = VECTOR_MODE_P (mode);
10139 enum machine_mode elt_mode = mode;
10140
10141 if (vector_mode)
10142 {
10143 elt_mode = GET_MODE_INNER (mode);
10144 use_sse = true;
10145 }
10146 else if (TARGET_SSE_MATH)
10147 use_sse = SSE_FLOAT_MODE_P (mode);
10148
10149 /* NEG and ABS performed with SSE use bitwise mask operations.
10150 Create the appropriate mask now. */
10151 if (use_sse)
10152 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10153 else
10154 mask = NULL_RTX;
10155
10156 dst = operands[0];
10157 src = operands[1];
10158
10159 /* If the destination is memory, and we don't have matching source
10160 operands or we're using the x87, do things in registers. */
10161 matching_memory = false;
10162 if (MEM_P (dst))
10163 {
10164 if (use_sse && rtx_equal_p (dst, src))
10165 matching_memory = true;
10166 else
10167 dst = gen_reg_rtx (mode);
10168 }
10169 if (MEM_P (src) && !matching_memory)
10170 src = force_reg (mode, src);
10171
10172 if (vector_mode)
10173 {
10174 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10175 set = gen_rtx_SET (VOIDmode, dst, set);
10176 emit_insn (set);
10177 }
10178 else
10179 {
10180 set = gen_rtx_fmt_e (code, mode, src);
10181 set = gen_rtx_SET (VOIDmode, dst, set);
10182 if (mask)
10183 {
10184 use = gen_rtx_USE (VOIDmode, mask);
10185 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10186 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10187 gen_rtvec (3, set, use, clob)));
10188 }
10189 else
10190 emit_insn (set);
10191 }
10192
10193 if (dst != operands[0])
10194 emit_move_insn (operands[0], dst);
10195 }
10196
10197 /* Expand a copysign operation. Special case operand 0 being a constant. */
10198
10199 void
10200 ix86_expand_copysign (rtx operands[])
10201 {
10202 enum machine_mode mode, vmode;
10203 rtx dest, op0, op1, mask, nmask;
10204
10205 dest = operands[0];
10206 op0 = operands[1];
10207 op1 = operands[2];
10208
10209 mode = GET_MODE (dest);
10210 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10211
10212 if (GET_CODE (op0) == CONST_DOUBLE)
10213 {
10214 rtvec v;
10215
10216 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10217 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10218
10219 if (op0 == CONST0_RTX (mode))
10220 op0 = CONST0_RTX (vmode);
10221 else
10222 {
10223 if (mode == SFmode)
10224 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10225 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10226 else
10227 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10228 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10229 }
10230
10231 mask = ix86_build_signbit_mask (mode, 0, 0);
10232
10233 if (mode == SFmode)
10234 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10235 else
10236 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10237 }
10238 else
10239 {
10240 nmask = ix86_build_signbit_mask (mode, 0, 1);
10241 mask = ix86_build_signbit_mask (mode, 0, 0);
10242
10243 if (mode == SFmode)
10244 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10245 else
10246 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10247 }
10248 }
10249
10250 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10251 be a constant, and so has already been expanded into a vector constant. */
10252
10253 void
10254 ix86_split_copysign_const (rtx operands[])
10255 {
10256 enum machine_mode mode, vmode;
10257 rtx dest, op0, op1, mask, x;
10258
10259 dest = operands[0];
10260 op0 = operands[1];
10261 op1 = operands[2];
10262 mask = operands[3];
10263
10264 mode = GET_MODE (dest);
10265 vmode = GET_MODE (mask);
10266
10267 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10268 x = gen_rtx_AND (vmode, dest, mask);
10269 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10270
10271 if (op0 != CONST0_RTX (vmode))
10272 {
10273 x = gen_rtx_IOR (vmode, dest, op0);
10274 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10275 }
10276 }
10277
10278 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10279 so we have to do two masks. */
10280
10281 void
10282 ix86_split_copysign_var (rtx operands[])
10283 {
10284 enum machine_mode mode, vmode;
10285 rtx dest, scratch, op0, op1, mask, nmask, x;
10286
10287 dest = operands[0];
10288 scratch = operands[1];
10289 op0 = operands[2];
10290 op1 = operands[3];
10291 nmask = operands[4];
10292 mask = operands[5];
10293
10294 mode = GET_MODE (dest);
10295 vmode = GET_MODE (mask);
10296
10297 if (rtx_equal_p (op0, op1))
10298 {
10299 /* Shouldn't happen often (it's useless, obviously), but when it does
10300 we'd generate incorrect code if we continue below. */
10301 emit_move_insn (dest, op0);
10302 return;
10303 }
10304
10305 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10306 {
10307 gcc_assert (REGNO (op1) == REGNO (scratch));
10308
10309 x = gen_rtx_AND (vmode, scratch, mask);
10310 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10311
10312 dest = mask;
10313 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10314 x = gen_rtx_NOT (vmode, dest);
10315 x = gen_rtx_AND (vmode, x, op0);
10316 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10317 }
10318 else
10319 {
10320 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10321 {
10322 x = gen_rtx_AND (vmode, scratch, mask);
10323 }
10324 else /* alternative 2,4 */
10325 {
10326 gcc_assert (REGNO (mask) == REGNO (scratch));
10327 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10328 x = gen_rtx_AND (vmode, scratch, op1);
10329 }
10330 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10331
10332 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10333 {
10334 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10335 x = gen_rtx_AND (vmode, dest, nmask);
10336 }
10337 else /* alternative 3,4 */
10338 {
10339 gcc_assert (REGNO (nmask) == REGNO (dest));
10340 dest = nmask;
10341 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10342 x = gen_rtx_AND (vmode, dest, op0);
10343 }
10344 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10345 }
10346
10347 x = gen_rtx_IOR (vmode, dest, scratch);
10348 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10349 }
10350
10351 /* Return TRUE or FALSE depending on whether the first SET in INSN
10352 has source and destination with matching CC modes, and that the
10353 CC mode is at least as constrained as REQ_MODE. */
10354
10355 int
10356 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10357 {
10358 rtx set;
10359 enum machine_mode set_mode;
10360
10361 set = PATTERN (insn);
10362 if (GET_CODE (set) == PARALLEL)
10363 set = XVECEXP (set, 0, 0);
10364 gcc_assert (GET_CODE (set) == SET);
10365 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10366
10367 set_mode = GET_MODE (SET_DEST (set));
10368 switch (set_mode)
10369 {
10370 case CCNOmode:
10371 if (req_mode != CCNOmode
10372 && (req_mode != CCmode
10373 || XEXP (SET_SRC (set), 1) != const0_rtx))
10374 return 0;
10375 break;
10376 case CCmode:
10377 if (req_mode == CCGCmode)
10378 return 0;
10379 /* FALLTHRU */
10380 case CCGCmode:
10381 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10382 return 0;
10383 /* FALLTHRU */
10384 case CCGOCmode:
10385 if (req_mode == CCZmode)
10386 return 0;
10387 /* FALLTHRU */
10388 case CCZmode:
10389 break;
10390
10391 default:
10392 gcc_unreachable ();
10393 }
10394
10395 return (GET_MODE (SET_SRC (set)) == set_mode);
10396 }
10397
10398 /* Generate insn patterns to do an integer compare of OPERANDS. */
10399
10400 static rtx
10401 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10402 {
10403 enum machine_mode cmpmode;
10404 rtx tmp, flags;
10405
10406 cmpmode = SELECT_CC_MODE (code, op0, op1);
10407 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10408
10409 /* This is very simple, but making the interface the same as in the
10410 FP case makes the rest of the code easier. */
10411 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10412 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10413
10414 /* Return the test that should be put into the flags user, i.e.
10415 the bcc, scc, or cmov instruction. */
10416 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10417 }
10418
10419 /* Figure out whether to use ordered or unordered fp comparisons.
10420 Return the appropriate mode to use. */
10421
10422 enum machine_mode
10423 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10424 {
10425 /* ??? In order to make all comparisons reversible, we do all comparisons
10426 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10427 all forms trapping and nontrapping comparisons, we can make inequality
10428 comparisons trapping again, since it results in better code when using
10429 FCOM based compares. */
10430 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10431 }
10432
10433 enum machine_mode
10434 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10435 {
10436 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10437 return ix86_fp_compare_mode (code);
10438 switch (code)
10439 {
10440 /* Only zero flag is needed. */
10441 case EQ: /* ZF=0 */
10442 case NE: /* ZF!=0 */
10443 return CCZmode;
10444 /* Codes needing carry flag. */
10445 case GEU: /* CF=0 */
10446 case GTU: /* CF=0 & ZF=0 */
10447 case LTU: /* CF=1 */
10448 case LEU: /* CF=1 | ZF=1 */
10449 return CCmode;
10450 /* Codes possibly doable only with sign flag when
10451 comparing against zero. */
10452 case GE: /* SF=OF or SF=0 */
10453 case LT: /* SF<>OF or SF=1 */
10454 if (op1 == const0_rtx)
10455 return CCGOCmode;
10456 else
10457 /* For other cases Carry flag is not required. */
10458 return CCGCmode;
10459 /* Codes doable only with sign flag when comparing
10460 against zero, but we miss jump instruction for it
10461 so we need to use relational tests against overflow
10462 that thus needs to be zero. */
10463 case GT: /* ZF=0 & SF=OF */
10464 case LE: /* ZF=1 | SF<>OF */
10465 if (op1 == const0_rtx)
10466 return CCNOmode;
10467 else
10468 return CCGCmode;
10469 /* strcmp pattern do (use flags) and combine may ask us for proper
10470 mode. */
10471 case USE:
10472 return CCmode;
10473 default:
10474 gcc_unreachable ();
10475 }
10476 }
10477
10478 /* Return the fixed registers used for condition codes. */
10479
10480 static bool
10481 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10482 {
10483 *p1 = FLAGS_REG;
10484 *p2 = FPSR_REG;
10485 return true;
10486 }
10487
10488 /* If two condition code modes are compatible, return a condition code
10489 mode which is compatible with both. Otherwise, return
10490 VOIDmode. */
10491
10492 static enum machine_mode
10493 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10494 {
10495 if (m1 == m2)
10496 return m1;
10497
10498 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10499 return VOIDmode;
10500
10501 if ((m1 == CCGCmode && m2 == CCGOCmode)
10502 || (m1 == CCGOCmode && m2 == CCGCmode))
10503 return CCGCmode;
10504
10505 switch (m1)
10506 {
10507 default:
10508 gcc_unreachable ();
10509
10510 case CCmode:
10511 case CCGCmode:
10512 case CCGOCmode:
10513 case CCNOmode:
10514 case CCZmode:
10515 switch (m2)
10516 {
10517 default:
10518 return VOIDmode;
10519
10520 case CCmode:
10521 case CCGCmode:
10522 case CCGOCmode:
10523 case CCNOmode:
10524 case CCZmode:
10525 return CCmode;
10526 }
10527
10528 case CCFPmode:
10529 case CCFPUmode:
10530 /* These are only compatible with themselves, which we already
10531 checked above. */
10532 return VOIDmode;
10533 }
10534 }
10535
10536 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10537
10538 int
10539 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10540 {
10541 enum rtx_code swapped_code = swap_condition (code);
10542 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10543 || (ix86_fp_comparison_cost (swapped_code)
10544 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10545 }
10546
10547 /* Swap, force into registers, or otherwise massage the two operands
10548 to a fp comparison. The operands are updated in place; the new
10549 comparison code is returned. */
10550
10551 static enum rtx_code
10552 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10553 {
10554 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10555 rtx op0 = *pop0, op1 = *pop1;
10556 enum machine_mode op_mode = GET_MODE (op0);
10557 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10558
10559 /* All of the unordered compare instructions only work on registers.
10560 The same is true of the fcomi compare instructions. The XFmode
10561 compare instructions require registers except when comparing
10562 against zero or when converting operand 1 from fixed point to
10563 floating point. */
10564
10565 if (!is_sse
10566 && (fpcmp_mode == CCFPUmode
10567 || (op_mode == XFmode
10568 && ! (standard_80387_constant_p (op0) == 1
10569 || standard_80387_constant_p (op1) == 1)
10570 && GET_CODE (op1) != FLOAT)
10571 || ix86_use_fcomi_compare (code)))
10572 {
10573 op0 = force_reg (op_mode, op0);
10574 op1 = force_reg (op_mode, op1);
10575 }
10576 else
10577 {
10578 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10579 things around if they appear profitable, otherwise force op0
10580 into a register. */
10581
10582 if (standard_80387_constant_p (op0) == 0
10583 || (MEM_P (op0)
10584 && ! (standard_80387_constant_p (op1) == 0
10585 || MEM_P (op1))))
10586 {
10587 rtx tmp;
10588 tmp = op0, op0 = op1, op1 = tmp;
10589 code = swap_condition (code);
10590 }
10591
10592 if (!REG_P (op0))
10593 op0 = force_reg (op_mode, op0);
10594
10595 if (CONSTANT_P (op1))
10596 {
10597 int tmp = standard_80387_constant_p (op1);
10598 if (tmp == 0)
10599 op1 = validize_mem (force_const_mem (op_mode, op1));
10600 else if (tmp == 1)
10601 {
10602 if (TARGET_CMOVE)
10603 op1 = force_reg (op_mode, op1);
10604 }
10605 else
10606 op1 = force_reg (op_mode, op1);
10607 }
10608 }
10609
10610 /* Try to rearrange the comparison to make it cheaper. */
10611 if (ix86_fp_comparison_cost (code)
10612 > ix86_fp_comparison_cost (swap_condition (code))
10613 && (REG_P (op1) || !no_new_pseudos))
10614 {
10615 rtx tmp;
10616 tmp = op0, op0 = op1, op1 = tmp;
10617 code = swap_condition (code);
10618 if (!REG_P (op0))
10619 op0 = force_reg (op_mode, op0);
10620 }
10621
10622 *pop0 = op0;
10623 *pop1 = op1;
10624 return code;
10625 }
10626
10627 /* Convert comparison codes we use to represent FP comparison to integer
10628 code that will result in proper branch. Return UNKNOWN if no such code
10629 is available. */
10630
10631 enum rtx_code
10632 ix86_fp_compare_code_to_integer (enum rtx_code code)
10633 {
10634 switch (code)
10635 {
10636 case GT:
10637 return GTU;
10638 case GE:
10639 return GEU;
10640 case ORDERED:
10641 case UNORDERED:
10642 return code;
10643 break;
10644 case UNEQ:
10645 return EQ;
10646 break;
10647 case UNLT:
10648 return LTU;
10649 break;
10650 case UNLE:
10651 return LEU;
10652 break;
10653 case LTGT:
10654 return NE;
10655 break;
10656 default:
10657 return UNKNOWN;
10658 }
10659 }
10660
10661 /* Split comparison code CODE into comparisons we can do using branch
10662 instructions. BYPASS_CODE is comparison code for branch that will
10663 branch around FIRST_CODE and SECOND_CODE. If some of branches
10664 is not required, set value to UNKNOWN.
10665 We never require more than two branches. */
10666
10667 void
10668 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10669 enum rtx_code *first_code,
10670 enum rtx_code *second_code)
10671 {
10672 *first_code = code;
10673 *bypass_code = UNKNOWN;
10674 *second_code = UNKNOWN;
10675
10676 /* The fcomi comparison sets flags as follows:
10677
10678 cmp ZF PF CF
10679 > 0 0 0
10680 < 0 0 1
10681 = 1 0 0
10682 un 1 1 1 */
10683
10684 switch (code)
10685 {
10686 case GT: /* GTU - CF=0 & ZF=0 */
10687 case GE: /* GEU - CF=0 */
10688 case ORDERED: /* PF=0 */
10689 case UNORDERED: /* PF=1 */
10690 case UNEQ: /* EQ - ZF=1 */
10691 case UNLT: /* LTU - CF=1 */
10692 case UNLE: /* LEU - CF=1 | ZF=1 */
10693 case LTGT: /* EQ - ZF=0 */
10694 break;
10695 case LT: /* LTU - CF=1 - fails on unordered */
10696 *first_code = UNLT;
10697 *bypass_code = UNORDERED;
10698 break;
10699 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10700 *first_code = UNLE;
10701 *bypass_code = UNORDERED;
10702 break;
10703 case EQ: /* EQ - ZF=1 - fails on unordered */
10704 *first_code = UNEQ;
10705 *bypass_code = UNORDERED;
10706 break;
10707 case NE: /* NE - ZF=0 - fails on unordered */
10708 *first_code = LTGT;
10709 *second_code = UNORDERED;
10710 break;
10711 case UNGE: /* GEU - CF=0 - fails on unordered */
10712 *first_code = GE;
10713 *second_code = UNORDERED;
10714 break;
10715 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10716 *first_code = GT;
10717 *second_code = UNORDERED;
10718 break;
10719 default:
10720 gcc_unreachable ();
10721 }
10722 if (!TARGET_IEEE_FP)
10723 {
10724 *second_code = UNKNOWN;
10725 *bypass_code = UNKNOWN;
10726 }
10727 }
10728
10729 /* Return cost of comparison done fcom + arithmetics operations on AX.
10730 All following functions do use number of instructions as a cost metrics.
10731 In future this should be tweaked to compute bytes for optimize_size and
10732 take into account performance of various instructions on various CPUs. */
10733 static int
10734 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10735 {
10736 if (!TARGET_IEEE_FP)
10737 return 4;
10738 /* The cost of code output by ix86_expand_fp_compare. */
10739 switch (code)
10740 {
10741 case UNLE:
10742 case UNLT:
10743 case LTGT:
10744 case GT:
10745 case GE:
10746 case UNORDERED:
10747 case ORDERED:
10748 case UNEQ:
10749 return 4;
10750 break;
10751 case LT:
10752 case NE:
10753 case EQ:
10754 case UNGE:
10755 return 5;
10756 break;
10757 case LE:
10758 case UNGT:
10759 return 6;
10760 break;
10761 default:
10762 gcc_unreachable ();
10763 }
10764 }
10765
10766 /* Return cost of comparison done using fcomi operation.
10767 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10768 static int
10769 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10770 {
10771 enum rtx_code bypass_code, first_code, second_code;
10772 /* Return arbitrarily high cost when instruction is not supported - this
10773 prevents gcc from using it. */
10774 if (!TARGET_CMOVE)
10775 return 1024;
10776 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10777 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10778 }
10779
10780 /* Return cost of comparison done using sahf operation.
10781 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10782 static int
10783 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10784 {
10785 enum rtx_code bypass_code, first_code, second_code;
10786 /* Return arbitrarily high cost when instruction is not preferred - this
10787 avoids gcc from using it. */
10788 if (!TARGET_USE_SAHF && !optimize_size)
10789 return 1024;
10790 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10791 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10792 }
10793
10794 /* Compute cost of the comparison done using any method.
10795 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10796 static int
10797 ix86_fp_comparison_cost (enum rtx_code code)
10798 {
10799 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10800 int min;
10801
10802 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10803 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10804
10805 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10806 if (min > sahf_cost)
10807 min = sahf_cost;
10808 if (min > fcomi_cost)
10809 min = fcomi_cost;
10810 return min;
10811 }
10812
10813 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10814
10815 static rtx
10816 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10817 rtx *second_test, rtx *bypass_test)
10818 {
10819 enum machine_mode fpcmp_mode, intcmp_mode;
10820 rtx tmp, tmp2;
10821 int cost = ix86_fp_comparison_cost (code);
10822 enum rtx_code bypass_code, first_code, second_code;
10823
10824 fpcmp_mode = ix86_fp_compare_mode (code);
10825 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10826
10827 if (second_test)
10828 *second_test = NULL_RTX;
10829 if (bypass_test)
10830 *bypass_test = NULL_RTX;
10831
10832 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10833
10834 /* Do fcomi/sahf based test when profitable. */
10835 if ((bypass_code == UNKNOWN || bypass_test)
10836 && (second_code == UNKNOWN || second_test)
10837 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10838 {
10839 if (TARGET_CMOVE)
10840 {
10841 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10842 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10843 tmp);
10844 emit_insn (tmp);
10845 }
10846 else
10847 {
10848 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10849 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10850 if (!scratch)
10851 scratch = gen_reg_rtx (HImode);
10852 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10853 emit_insn (gen_x86_sahf_1 (scratch));
10854 }
10855
10856 /* The FP codes work out to act like unsigned. */
10857 intcmp_mode = fpcmp_mode;
10858 code = first_code;
10859 if (bypass_code != UNKNOWN)
10860 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10861 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10862 const0_rtx);
10863 if (second_code != UNKNOWN)
10864 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10865 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10866 const0_rtx);
10867 }
10868 else
10869 {
10870 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10871 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10872 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10873 if (!scratch)
10874 scratch = gen_reg_rtx (HImode);
10875 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10876
10877 /* In the unordered case, we have to check C2 for NaN's, which
10878 doesn't happen to work out to anything nice combination-wise.
10879 So do some bit twiddling on the value we've got in AH to come
10880 up with an appropriate set of condition codes. */
10881
10882 intcmp_mode = CCNOmode;
10883 switch (code)
10884 {
10885 case GT:
10886 case UNGT:
10887 if (code == GT || !TARGET_IEEE_FP)
10888 {
10889 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10890 code = EQ;
10891 }
10892 else
10893 {
10894 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10895 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10896 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10897 intcmp_mode = CCmode;
10898 code = GEU;
10899 }
10900 break;
10901 case LT:
10902 case UNLT:
10903 if (code == LT && TARGET_IEEE_FP)
10904 {
10905 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10906 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10907 intcmp_mode = CCmode;
10908 code = EQ;
10909 }
10910 else
10911 {
10912 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10913 code = NE;
10914 }
10915 break;
10916 case GE:
10917 case UNGE:
10918 if (code == GE || !TARGET_IEEE_FP)
10919 {
10920 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10921 code = EQ;
10922 }
10923 else
10924 {
10925 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10926 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10927 GEN_INT (0x01)));
10928 code = NE;
10929 }
10930 break;
10931 case LE:
10932 case UNLE:
10933 if (code == LE && TARGET_IEEE_FP)
10934 {
10935 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10936 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10937 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10938 intcmp_mode = CCmode;
10939 code = LTU;
10940 }
10941 else
10942 {
10943 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10944 code = NE;
10945 }
10946 break;
10947 case EQ:
10948 case UNEQ:
10949 if (code == EQ && TARGET_IEEE_FP)
10950 {
10951 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10952 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10953 intcmp_mode = CCmode;
10954 code = EQ;
10955 }
10956 else
10957 {
10958 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10959 code = NE;
10960 break;
10961 }
10962 break;
10963 case NE:
10964 case LTGT:
10965 if (code == NE && TARGET_IEEE_FP)
10966 {
10967 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10968 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10969 GEN_INT (0x40)));
10970 code = NE;
10971 }
10972 else
10973 {
10974 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10975 code = EQ;
10976 }
10977 break;
10978
10979 case UNORDERED:
10980 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10981 code = NE;
10982 break;
10983 case ORDERED:
10984 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10985 code = EQ;
10986 break;
10987
10988 default:
10989 gcc_unreachable ();
10990 }
10991 }
10992
10993 /* Return the test that should be put into the flags user, i.e.
10994 the bcc, scc, or cmov instruction. */
10995 return gen_rtx_fmt_ee (code, VOIDmode,
10996 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10997 const0_rtx);
10998 }
10999
11000 rtx
11001 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11002 {
11003 rtx op0, op1, ret;
11004 op0 = ix86_compare_op0;
11005 op1 = ix86_compare_op1;
11006
11007 if (second_test)
11008 *second_test = NULL_RTX;
11009 if (bypass_test)
11010 *bypass_test = NULL_RTX;
11011
11012 if (ix86_compare_emitted)
11013 {
11014 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11015 ix86_compare_emitted = NULL_RTX;
11016 }
11017 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11018 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11019 second_test, bypass_test);
11020 else
11021 ret = ix86_expand_int_compare (code, op0, op1);
11022
11023 return ret;
11024 }
11025
11026 /* Return true if the CODE will result in nontrivial jump sequence. */
11027 bool
11028 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11029 {
11030 enum rtx_code bypass_code, first_code, second_code;
11031 if (!TARGET_CMOVE)
11032 return true;
11033 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11034 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11035 }
11036
11037 void
11038 ix86_expand_branch (enum rtx_code code, rtx label)
11039 {
11040 rtx tmp;
11041
11042 /* If we have emitted a compare insn, go straight to simple.
11043 ix86_expand_compare won't emit anything if ix86_compare_emitted
11044 is non NULL. */
11045 if (ix86_compare_emitted)
11046 goto simple;
11047
11048 switch (GET_MODE (ix86_compare_op0))
11049 {
11050 case QImode:
11051 case HImode:
11052 case SImode:
11053 simple:
11054 tmp = ix86_expand_compare (code, NULL, NULL);
11055 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11056 gen_rtx_LABEL_REF (VOIDmode, label),
11057 pc_rtx);
11058 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11059 return;
11060
11061 case SFmode:
11062 case DFmode:
11063 case XFmode:
11064 {
11065 rtvec vec;
11066 int use_fcomi;
11067 enum rtx_code bypass_code, first_code, second_code;
11068
11069 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11070 &ix86_compare_op1);
11071
11072 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11073
11074 /* Check whether we will use the natural sequence with one jump. If
11075 so, we can expand jump early. Otherwise delay expansion by
11076 creating compound insn to not confuse optimizers. */
11077 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11078 && TARGET_CMOVE)
11079 {
11080 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11081 gen_rtx_LABEL_REF (VOIDmode, label),
11082 pc_rtx, NULL_RTX, NULL_RTX);
11083 }
11084 else
11085 {
11086 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11087 ix86_compare_op0, ix86_compare_op1);
11088 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11089 gen_rtx_LABEL_REF (VOIDmode, label),
11090 pc_rtx);
11091 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11092
11093 use_fcomi = ix86_use_fcomi_compare (code);
11094 vec = rtvec_alloc (3 + !use_fcomi);
11095 RTVEC_ELT (vec, 0) = tmp;
11096 RTVEC_ELT (vec, 1)
11097 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11098 RTVEC_ELT (vec, 2)
11099 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11100 if (! use_fcomi)
11101 RTVEC_ELT (vec, 3)
11102 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11103
11104 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11105 }
11106 return;
11107 }
11108
11109 case DImode:
11110 if (TARGET_64BIT)
11111 goto simple;
11112 case TImode:
11113 /* Expand DImode branch into multiple compare+branch. */
11114 {
11115 rtx lo[2], hi[2], label2;
11116 enum rtx_code code1, code2, code3;
11117 enum machine_mode submode;
11118
11119 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11120 {
11121 tmp = ix86_compare_op0;
11122 ix86_compare_op0 = ix86_compare_op1;
11123 ix86_compare_op1 = tmp;
11124 code = swap_condition (code);
11125 }
11126 if (GET_MODE (ix86_compare_op0) == DImode)
11127 {
11128 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11129 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11130 submode = SImode;
11131 }
11132 else
11133 {
11134 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11135 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11136 submode = DImode;
11137 }
11138
11139 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11140 avoid two branches. This costs one extra insn, so disable when
11141 optimizing for size. */
11142
11143 if ((code == EQ || code == NE)
11144 && (!optimize_size
11145 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11146 {
11147 rtx xor0, xor1;
11148
11149 xor1 = hi[0];
11150 if (hi[1] != const0_rtx)
11151 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11152 NULL_RTX, 0, OPTAB_WIDEN);
11153
11154 xor0 = lo[0];
11155 if (lo[1] != const0_rtx)
11156 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11157 NULL_RTX, 0, OPTAB_WIDEN);
11158
11159 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11160 NULL_RTX, 0, OPTAB_WIDEN);
11161
11162 ix86_compare_op0 = tmp;
11163 ix86_compare_op1 = const0_rtx;
11164 ix86_expand_branch (code, label);
11165 return;
11166 }
11167
11168 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11169 op1 is a constant and the low word is zero, then we can just
11170 examine the high word. */
11171
11172 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11173 switch (code)
11174 {
11175 case LT: case LTU: case GE: case GEU:
11176 ix86_compare_op0 = hi[0];
11177 ix86_compare_op1 = hi[1];
11178 ix86_expand_branch (code, label);
11179 return;
11180 default:
11181 break;
11182 }
11183
11184 /* Otherwise, we need two or three jumps. */
11185
11186 label2 = gen_label_rtx ();
11187
11188 code1 = code;
11189 code2 = swap_condition (code);
11190 code3 = unsigned_condition (code);
11191
11192 switch (code)
11193 {
11194 case LT: case GT: case LTU: case GTU:
11195 break;
11196
11197 case LE: code1 = LT; code2 = GT; break;
11198 case GE: code1 = GT; code2 = LT; break;
11199 case LEU: code1 = LTU; code2 = GTU; break;
11200 case GEU: code1 = GTU; code2 = LTU; break;
11201
11202 case EQ: code1 = UNKNOWN; code2 = NE; break;
11203 case NE: code2 = UNKNOWN; break;
11204
11205 default:
11206 gcc_unreachable ();
11207 }
11208
11209 /*
11210 * a < b =>
11211 * if (hi(a) < hi(b)) goto true;
11212 * if (hi(a) > hi(b)) goto false;
11213 * if (lo(a) < lo(b)) goto true;
11214 * false:
11215 */
11216
11217 ix86_compare_op0 = hi[0];
11218 ix86_compare_op1 = hi[1];
11219
11220 if (code1 != UNKNOWN)
11221 ix86_expand_branch (code1, label);
11222 if (code2 != UNKNOWN)
11223 ix86_expand_branch (code2, label2);
11224
11225 ix86_compare_op0 = lo[0];
11226 ix86_compare_op1 = lo[1];
11227 ix86_expand_branch (code3, label);
11228
11229 if (code2 != UNKNOWN)
11230 emit_label (label2);
11231 return;
11232 }
11233
11234 default:
11235 gcc_unreachable ();
11236 }
11237 }
11238
11239 /* Split branch based on floating point condition. */
11240 void
11241 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11242 rtx target1, rtx target2, rtx tmp, rtx pushed)
11243 {
11244 rtx second, bypass;
11245 rtx label = NULL_RTX;
11246 rtx condition;
11247 int bypass_probability = -1, second_probability = -1, probability = -1;
11248 rtx i;
11249
11250 if (target2 != pc_rtx)
11251 {
11252 rtx tmp = target2;
11253 code = reverse_condition_maybe_unordered (code);
11254 target2 = target1;
11255 target1 = tmp;
11256 }
11257
11258 condition = ix86_expand_fp_compare (code, op1, op2,
11259 tmp, &second, &bypass);
11260
11261 /* Remove pushed operand from stack. */
11262 if (pushed)
11263 ix86_free_from_memory (GET_MODE (pushed));
11264
11265 if (split_branch_probability >= 0)
11266 {
11267 /* Distribute the probabilities across the jumps.
11268 Assume the BYPASS and SECOND to be always test
11269 for UNORDERED. */
11270 probability = split_branch_probability;
11271
11272 /* Value of 1 is low enough to make no need for probability
11273 to be updated. Later we may run some experiments and see
11274 if unordered values are more frequent in practice. */
11275 if (bypass)
11276 bypass_probability = 1;
11277 if (second)
11278 second_probability = 1;
11279 }
11280 if (bypass != NULL_RTX)
11281 {
11282 label = gen_label_rtx ();
11283 i = emit_jump_insn (gen_rtx_SET
11284 (VOIDmode, pc_rtx,
11285 gen_rtx_IF_THEN_ELSE (VOIDmode,
11286 bypass,
11287 gen_rtx_LABEL_REF (VOIDmode,
11288 label),
11289 pc_rtx)));
11290 if (bypass_probability >= 0)
11291 REG_NOTES (i)
11292 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11293 GEN_INT (bypass_probability),
11294 REG_NOTES (i));
11295 }
11296 i = emit_jump_insn (gen_rtx_SET
11297 (VOIDmode, pc_rtx,
11298 gen_rtx_IF_THEN_ELSE (VOIDmode,
11299 condition, target1, target2)));
11300 if (probability >= 0)
11301 REG_NOTES (i)
11302 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11303 GEN_INT (probability),
11304 REG_NOTES (i));
11305 if (second != NULL_RTX)
11306 {
11307 i = emit_jump_insn (gen_rtx_SET
11308 (VOIDmode, pc_rtx,
11309 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11310 target2)));
11311 if (second_probability >= 0)
11312 REG_NOTES (i)
11313 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11314 GEN_INT (second_probability),
11315 REG_NOTES (i));
11316 }
11317 if (label != NULL_RTX)
11318 emit_label (label);
11319 }
11320
11321 int
11322 ix86_expand_setcc (enum rtx_code code, rtx dest)
11323 {
11324 rtx ret, tmp, tmpreg, equiv;
11325 rtx second_test, bypass_test;
11326
11327 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11328 return 0; /* FAIL */
11329
11330 gcc_assert (GET_MODE (dest) == QImode);
11331
11332 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11333 PUT_MODE (ret, QImode);
11334
11335 tmp = dest;
11336 tmpreg = dest;
11337
11338 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11339 if (bypass_test || second_test)
11340 {
11341 rtx test = second_test;
11342 int bypass = 0;
11343 rtx tmp2 = gen_reg_rtx (QImode);
11344 if (bypass_test)
11345 {
11346 gcc_assert (!second_test);
11347 test = bypass_test;
11348 bypass = 1;
11349 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11350 }
11351 PUT_MODE (test, QImode);
11352 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11353
11354 if (bypass)
11355 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11356 else
11357 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11358 }
11359
11360 /* Attach a REG_EQUAL note describing the comparison result. */
11361 if (ix86_compare_op0 && ix86_compare_op1)
11362 {
11363 equiv = simplify_gen_relational (code, QImode,
11364 GET_MODE (ix86_compare_op0),
11365 ix86_compare_op0, ix86_compare_op1);
11366 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11367 }
11368
11369 return 1; /* DONE */
11370 }
11371
11372 /* Expand comparison setting or clearing carry flag. Return true when
11373 successful and set pop for the operation. */
11374 static bool
11375 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11376 {
11377 enum machine_mode mode =
11378 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11379
11380 /* Do not handle DImode compares that go through special path. Also we can't
11381 deal with FP compares yet. This is possible to add. */
11382 if (mode == (TARGET_64BIT ? TImode : DImode))
11383 return false;
11384 if (FLOAT_MODE_P (mode))
11385 {
11386 rtx second_test = NULL, bypass_test = NULL;
11387 rtx compare_op, compare_seq;
11388
11389 /* Shortcut: following common codes never translate into carry flag compares. */
11390 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11391 || code == ORDERED || code == UNORDERED)
11392 return false;
11393
11394 /* These comparisons require zero flag; swap operands so they won't. */
11395 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11396 && !TARGET_IEEE_FP)
11397 {
11398 rtx tmp = op0;
11399 op0 = op1;
11400 op1 = tmp;
11401 code = swap_condition (code);
11402 }
11403
11404 /* Try to expand the comparison and verify that we end up with carry flag
11405 based comparison. This is fails to be true only when we decide to expand
11406 comparison using arithmetic that is not too common scenario. */
11407 start_sequence ();
11408 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11409 &second_test, &bypass_test);
11410 compare_seq = get_insns ();
11411 end_sequence ();
11412
11413 if (second_test || bypass_test)
11414 return false;
11415 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11416 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11417 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11418 else
11419 code = GET_CODE (compare_op);
11420 if (code != LTU && code != GEU)
11421 return false;
11422 emit_insn (compare_seq);
11423 *pop = compare_op;
11424 return true;
11425 }
11426 if (!INTEGRAL_MODE_P (mode))
11427 return false;
11428 switch (code)
11429 {
11430 case LTU:
11431 case GEU:
11432 break;
11433
11434 /* Convert a==0 into (unsigned)a<1. */
11435 case EQ:
11436 case NE:
11437 if (op1 != const0_rtx)
11438 return false;
11439 op1 = const1_rtx;
11440 code = (code == EQ ? LTU : GEU);
11441 break;
11442
11443 /* Convert a>b into b<a or a>=b-1. */
11444 case GTU:
11445 case LEU:
11446 if (CONST_INT_P (op1))
11447 {
11448 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11449 /* Bail out on overflow. We still can swap operands but that
11450 would force loading of the constant into register. */
11451 if (op1 == const0_rtx
11452 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11453 return false;
11454 code = (code == GTU ? GEU : LTU);
11455 }
11456 else
11457 {
11458 rtx tmp = op1;
11459 op1 = op0;
11460 op0 = tmp;
11461 code = (code == GTU ? LTU : GEU);
11462 }
11463 break;
11464
11465 /* Convert a>=0 into (unsigned)a<0x80000000. */
11466 case LT:
11467 case GE:
11468 if (mode == DImode || op1 != const0_rtx)
11469 return false;
11470 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11471 code = (code == LT ? GEU : LTU);
11472 break;
11473 case LE:
11474 case GT:
11475 if (mode == DImode || op1 != constm1_rtx)
11476 return false;
11477 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11478 code = (code == LE ? GEU : LTU);
11479 break;
11480
11481 default:
11482 return false;
11483 }
11484 /* Swapping operands may cause constant to appear as first operand. */
11485 if (!nonimmediate_operand (op0, VOIDmode))
11486 {
11487 if (no_new_pseudos)
11488 return false;
11489 op0 = force_reg (mode, op0);
11490 }
11491 ix86_compare_op0 = op0;
11492 ix86_compare_op1 = op1;
11493 *pop = ix86_expand_compare (code, NULL, NULL);
11494 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11495 return true;
11496 }
11497
11498 int
11499 ix86_expand_int_movcc (rtx operands[])
11500 {
11501 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11502 rtx compare_seq, compare_op;
11503 rtx second_test, bypass_test;
11504 enum machine_mode mode = GET_MODE (operands[0]);
11505 bool sign_bit_compare_p = false;;
11506
11507 start_sequence ();
11508 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11509 compare_seq = get_insns ();
11510 end_sequence ();
11511
11512 compare_code = GET_CODE (compare_op);
11513
11514 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11515 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11516 sign_bit_compare_p = true;
11517
11518 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11519 HImode insns, we'd be swallowed in word prefix ops. */
11520
11521 if ((mode != HImode || TARGET_FAST_PREFIX)
11522 && (mode != (TARGET_64BIT ? TImode : DImode))
11523 && CONST_INT_P (operands[2])
11524 && CONST_INT_P (operands[3]))
11525 {
11526 rtx out = operands[0];
11527 HOST_WIDE_INT ct = INTVAL (operands[2]);
11528 HOST_WIDE_INT cf = INTVAL (operands[3]);
11529 HOST_WIDE_INT diff;
11530
11531 diff = ct - cf;
11532 /* Sign bit compares are better done using shifts than we do by using
11533 sbb. */
11534 if (sign_bit_compare_p
11535 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11536 ix86_compare_op1, &compare_op))
11537 {
11538 /* Detect overlap between destination and compare sources. */
11539 rtx tmp = out;
11540
11541 if (!sign_bit_compare_p)
11542 {
11543 bool fpcmp = false;
11544
11545 compare_code = GET_CODE (compare_op);
11546
11547 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11548 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11549 {
11550 fpcmp = true;
11551 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11552 }
11553
11554 /* To simplify rest of code, restrict to the GEU case. */
11555 if (compare_code == LTU)
11556 {
11557 HOST_WIDE_INT tmp = ct;
11558 ct = cf;
11559 cf = tmp;
11560 compare_code = reverse_condition (compare_code);
11561 code = reverse_condition (code);
11562 }
11563 else
11564 {
11565 if (fpcmp)
11566 PUT_CODE (compare_op,
11567 reverse_condition_maybe_unordered
11568 (GET_CODE (compare_op)));
11569 else
11570 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11571 }
11572 diff = ct - cf;
11573
11574 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11575 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11576 tmp = gen_reg_rtx (mode);
11577
11578 if (mode == DImode)
11579 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11580 else
11581 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11582 }
11583 else
11584 {
11585 if (code == GT || code == GE)
11586 code = reverse_condition (code);
11587 else
11588 {
11589 HOST_WIDE_INT tmp = ct;
11590 ct = cf;
11591 cf = tmp;
11592 diff = ct - cf;
11593 }
11594 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11595 ix86_compare_op1, VOIDmode, 0, -1);
11596 }
11597
11598 if (diff == 1)
11599 {
11600 /*
11601 * cmpl op0,op1
11602 * sbbl dest,dest
11603 * [addl dest, ct]
11604 *
11605 * Size 5 - 8.
11606 */
11607 if (ct)
11608 tmp = expand_simple_binop (mode, PLUS,
11609 tmp, GEN_INT (ct),
11610 copy_rtx (tmp), 1, OPTAB_DIRECT);
11611 }
11612 else if (cf == -1)
11613 {
11614 /*
11615 * cmpl op0,op1
11616 * sbbl dest,dest
11617 * orl $ct, dest
11618 *
11619 * Size 8.
11620 */
11621 tmp = expand_simple_binop (mode, IOR,
11622 tmp, GEN_INT (ct),
11623 copy_rtx (tmp), 1, OPTAB_DIRECT);
11624 }
11625 else if (diff == -1 && ct)
11626 {
11627 /*
11628 * cmpl op0,op1
11629 * sbbl dest,dest
11630 * notl dest
11631 * [addl dest, cf]
11632 *
11633 * Size 8 - 11.
11634 */
11635 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11636 if (cf)
11637 tmp = expand_simple_binop (mode, PLUS,
11638 copy_rtx (tmp), GEN_INT (cf),
11639 copy_rtx (tmp), 1, OPTAB_DIRECT);
11640 }
11641 else
11642 {
11643 /*
11644 * cmpl op0,op1
11645 * sbbl dest,dest
11646 * [notl dest]
11647 * andl cf - ct, dest
11648 * [addl dest, ct]
11649 *
11650 * Size 8 - 11.
11651 */
11652
11653 if (cf == 0)
11654 {
11655 cf = ct;
11656 ct = 0;
11657 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11658 }
11659
11660 tmp = expand_simple_binop (mode, AND,
11661 copy_rtx (tmp),
11662 gen_int_mode (cf - ct, mode),
11663 copy_rtx (tmp), 1, OPTAB_DIRECT);
11664 if (ct)
11665 tmp = expand_simple_binop (mode, PLUS,
11666 copy_rtx (tmp), GEN_INT (ct),
11667 copy_rtx (tmp), 1, OPTAB_DIRECT);
11668 }
11669
11670 if (!rtx_equal_p (tmp, out))
11671 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11672
11673 return 1; /* DONE */
11674 }
11675
11676 if (diff < 0)
11677 {
11678 HOST_WIDE_INT tmp;
11679 tmp = ct, ct = cf, cf = tmp;
11680 diff = -diff;
11681 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11682 {
11683 /* We may be reversing unordered compare to normal compare, that
11684 is not valid in general (we may convert non-trapping condition
11685 to trapping one), however on i386 we currently emit all
11686 comparisons unordered. */
11687 compare_code = reverse_condition_maybe_unordered (compare_code);
11688 code = reverse_condition_maybe_unordered (code);
11689 }
11690 else
11691 {
11692 compare_code = reverse_condition (compare_code);
11693 code = reverse_condition (code);
11694 }
11695 }
11696
11697 compare_code = UNKNOWN;
11698 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11699 && CONST_INT_P (ix86_compare_op1))
11700 {
11701 if (ix86_compare_op1 == const0_rtx
11702 && (code == LT || code == GE))
11703 compare_code = code;
11704 else if (ix86_compare_op1 == constm1_rtx)
11705 {
11706 if (code == LE)
11707 compare_code = LT;
11708 else if (code == GT)
11709 compare_code = GE;
11710 }
11711 }
11712
11713 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11714 if (compare_code != UNKNOWN
11715 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11716 && (cf == -1 || ct == -1))
11717 {
11718 /* If lea code below could be used, only optimize
11719 if it results in a 2 insn sequence. */
11720
11721 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11722 || diff == 3 || diff == 5 || diff == 9)
11723 || (compare_code == LT && ct == -1)
11724 || (compare_code == GE && cf == -1))
11725 {
11726 /*
11727 * notl op1 (if necessary)
11728 * sarl $31, op1
11729 * orl cf, op1
11730 */
11731 if (ct != -1)
11732 {
11733 cf = ct;
11734 ct = -1;
11735 code = reverse_condition (code);
11736 }
11737
11738 out = emit_store_flag (out, code, ix86_compare_op0,
11739 ix86_compare_op1, VOIDmode, 0, -1);
11740
11741 out = expand_simple_binop (mode, IOR,
11742 out, GEN_INT (cf),
11743 out, 1, OPTAB_DIRECT);
11744 if (out != operands[0])
11745 emit_move_insn (operands[0], out);
11746
11747 return 1; /* DONE */
11748 }
11749 }
11750
11751
11752 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11753 || diff == 3 || diff == 5 || diff == 9)
11754 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11755 && (mode != DImode
11756 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11757 {
11758 /*
11759 * xorl dest,dest
11760 * cmpl op1,op2
11761 * setcc dest
11762 * lea cf(dest*(ct-cf)),dest
11763 *
11764 * Size 14.
11765 *
11766 * This also catches the degenerate setcc-only case.
11767 */
11768
11769 rtx tmp;
11770 int nops;
11771
11772 out = emit_store_flag (out, code, ix86_compare_op0,
11773 ix86_compare_op1, VOIDmode, 0, 1);
11774
11775 nops = 0;
11776 /* On x86_64 the lea instruction operates on Pmode, so we need
11777 to get arithmetics done in proper mode to match. */
11778 if (diff == 1)
11779 tmp = copy_rtx (out);
11780 else
11781 {
11782 rtx out1;
11783 out1 = copy_rtx (out);
11784 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11785 nops++;
11786 if (diff & 1)
11787 {
11788 tmp = gen_rtx_PLUS (mode, tmp, out1);
11789 nops++;
11790 }
11791 }
11792 if (cf != 0)
11793 {
11794 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11795 nops++;
11796 }
11797 if (!rtx_equal_p (tmp, out))
11798 {
11799 if (nops == 1)
11800 out = force_operand (tmp, copy_rtx (out));
11801 else
11802 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11803 }
11804 if (!rtx_equal_p (out, operands[0]))
11805 emit_move_insn (operands[0], copy_rtx (out));
11806
11807 return 1; /* DONE */
11808 }
11809
11810 /*
11811 * General case: Jumpful:
11812 * xorl dest,dest cmpl op1, op2
11813 * cmpl op1, op2 movl ct, dest
11814 * setcc dest jcc 1f
11815 * decl dest movl cf, dest
11816 * andl (cf-ct),dest 1:
11817 * addl ct,dest
11818 *
11819 * Size 20. Size 14.
11820 *
11821 * This is reasonably steep, but branch mispredict costs are
11822 * high on modern cpus, so consider failing only if optimizing
11823 * for space.
11824 */
11825
11826 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11827 && BRANCH_COST >= 2)
11828 {
11829 if (cf == 0)
11830 {
11831 cf = ct;
11832 ct = 0;
11833 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11834 /* We may be reversing unordered compare to normal compare,
11835 that is not valid in general (we may convert non-trapping
11836 condition to trapping one), however on i386 we currently
11837 emit all comparisons unordered. */
11838 code = reverse_condition_maybe_unordered (code);
11839 else
11840 {
11841 code = reverse_condition (code);
11842 if (compare_code != UNKNOWN)
11843 compare_code = reverse_condition (compare_code);
11844 }
11845 }
11846
11847 if (compare_code != UNKNOWN)
11848 {
11849 /* notl op1 (if needed)
11850 sarl $31, op1
11851 andl (cf-ct), op1
11852 addl ct, op1
11853
11854 For x < 0 (resp. x <= -1) there will be no notl,
11855 so if possible swap the constants to get rid of the
11856 complement.
11857 True/false will be -1/0 while code below (store flag
11858 followed by decrement) is 0/-1, so the constants need
11859 to be exchanged once more. */
11860
11861 if (compare_code == GE || !cf)
11862 {
11863 code = reverse_condition (code);
11864 compare_code = LT;
11865 }
11866 else
11867 {
11868 HOST_WIDE_INT tmp = cf;
11869 cf = ct;
11870 ct = tmp;
11871 }
11872
11873 out = emit_store_flag (out, code, ix86_compare_op0,
11874 ix86_compare_op1, VOIDmode, 0, -1);
11875 }
11876 else
11877 {
11878 out = emit_store_flag (out, code, ix86_compare_op0,
11879 ix86_compare_op1, VOIDmode, 0, 1);
11880
11881 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11882 copy_rtx (out), 1, OPTAB_DIRECT);
11883 }
11884
11885 out = expand_simple_binop (mode, AND, copy_rtx (out),
11886 gen_int_mode (cf - ct, mode),
11887 copy_rtx (out), 1, OPTAB_DIRECT);
11888 if (ct)
11889 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11890 copy_rtx (out), 1, OPTAB_DIRECT);
11891 if (!rtx_equal_p (out, operands[0]))
11892 emit_move_insn (operands[0], copy_rtx (out));
11893
11894 return 1; /* DONE */
11895 }
11896 }
11897
11898 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11899 {
11900 /* Try a few things more with specific constants and a variable. */
11901
11902 optab op;
11903 rtx var, orig_out, out, tmp;
11904
11905 if (BRANCH_COST <= 2)
11906 return 0; /* FAIL */
11907
11908 /* If one of the two operands is an interesting constant, load a
11909 constant with the above and mask it in with a logical operation. */
11910
11911 if (CONST_INT_P (operands[2]))
11912 {
11913 var = operands[3];
11914 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11915 operands[3] = constm1_rtx, op = and_optab;
11916 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11917 operands[3] = const0_rtx, op = ior_optab;
11918 else
11919 return 0; /* FAIL */
11920 }
11921 else if (CONST_INT_P (operands[3]))
11922 {
11923 var = operands[2];
11924 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11925 operands[2] = constm1_rtx, op = and_optab;
11926 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11927 operands[2] = const0_rtx, op = ior_optab;
11928 else
11929 return 0; /* FAIL */
11930 }
11931 else
11932 return 0; /* FAIL */
11933
11934 orig_out = operands[0];
11935 tmp = gen_reg_rtx (mode);
11936 operands[0] = tmp;
11937
11938 /* Recurse to get the constant loaded. */
11939 if (ix86_expand_int_movcc (operands) == 0)
11940 return 0; /* FAIL */
11941
11942 /* Mask in the interesting variable. */
11943 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11944 OPTAB_WIDEN);
11945 if (!rtx_equal_p (out, orig_out))
11946 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11947
11948 return 1; /* DONE */
11949 }
11950
11951 /*
11952 * For comparison with above,
11953 *
11954 * movl cf,dest
11955 * movl ct,tmp
11956 * cmpl op1,op2
11957 * cmovcc tmp,dest
11958 *
11959 * Size 15.
11960 */
11961
11962 if (! nonimmediate_operand (operands[2], mode))
11963 operands[2] = force_reg (mode, operands[2]);
11964 if (! nonimmediate_operand (operands[3], mode))
11965 operands[3] = force_reg (mode, operands[3]);
11966
11967 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11968 {
11969 rtx tmp = gen_reg_rtx (mode);
11970 emit_move_insn (tmp, operands[3]);
11971 operands[3] = tmp;
11972 }
11973 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11974 {
11975 rtx tmp = gen_reg_rtx (mode);
11976 emit_move_insn (tmp, operands[2]);
11977 operands[2] = tmp;
11978 }
11979
11980 if (! register_operand (operands[2], VOIDmode)
11981 && (mode == QImode
11982 || ! register_operand (operands[3], VOIDmode)))
11983 operands[2] = force_reg (mode, operands[2]);
11984
11985 if (mode == QImode
11986 && ! register_operand (operands[3], VOIDmode))
11987 operands[3] = force_reg (mode, operands[3]);
11988
11989 emit_insn (compare_seq);
11990 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11991 gen_rtx_IF_THEN_ELSE (mode,
11992 compare_op, operands[2],
11993 operands[3])));
11994 if (bypass_test)
11995 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11996 gen_rtx_IF_THEN_ELSE (mode,
11997 bypass_test,
11998 copy_rtx (operands[3]),
11999 copy_rtx (operands[0]))));
12000 if (second_test)
12001 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12002 gen_rtx_IF_THEN_ELSE (mode,
12003 second_test,
12004 copy_rtx (operands[2]),
12005 copy_rtx (operands[0]))));
12006
12007 return 1; /* DONE */
12008 }
12009
12010 /* Swap, force into registers, or otherwise massage the two operands
12011 to an sse comparison with a mask result. Thus we differ a bit from
12012 ix86_prepare_fp_compare_args which expects to produce a flags result.
12013
12014 The DEST operand exists to help determine whether to commute commutative
12015 operators. The POP0/POP1 operands are updated in place. The new
12016 comparison code is returned, or UNKNOWN if not implementable. */
12017
12018 static enum rtx_code
12019 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12020 rtx *pop0, rtx *pop1)
12021 {
12022 rtx tmp;
12023
12024 switch (code)
12025 {
12026 case LTGT:
12027 case UNEQ:
12028 /* We have no LTGT as an operator. We could implement it with
12029 NE & ORDERED, but this requires an extra temporary. It's
12030 not clear that it's worth it. */
12031 return UNKNOWN;
12032
12033 case LT:
12034 case LE:
12035 case UNGT:
12036 case UNGE:
12037 /* These are supported directly. */
12038 break;
12039
12040 case EQ:
12041 case NE:
12042 case UNORDERED:
12043 case ORDERED:
12044 /* For commutative operators, try to canonicalize the destination
12045 operand to be first in the comparison - this helps reload to
12046 avoid extra moves. */
12047 if (!dest || !rtx_equal_p (dest, *pop1))
12048 break;
12049 /* FALLTHRU */
12050
12051 case GE:
12052 case GT:
12053 case UNLE:
12054 case UNLT:
12055 /* These are not supported directly. Swap the comparison operands
12056 to transform into something that is supported. */
12057 tmp = *pop0;
12058 *pop0 = *pop1;
12059 *pop1 = tmp;
12060 code = swap_condition (code);
12061 break;
12062
12063 default:
12064 gcc_unreachable ();
12065 }
12066
12067 return code;
12068 }
12069
12070 /* Detect conditional moves that exactly match min/max operational
12071 semantics. Note that this is IEEE safe, as long as we don't
12072 interchange the operands.
12073
12074 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12075 and TRUE if the operation is successful and instructions are emitted. */
12076
12077 static bool
12078 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12079 rtx cmp_op1, rtx if_true, rtx if_false)
12080 {
12081 enum machine_mode mode;
12082 bool is_min;
12083 rtx tmp;
12084
12085 if (code == LT)
12086 ;
12087 else if (code == UNGE)
12088 {
12089 tmp = if_true;
12090 if_true = if_false;
12091 if_false = tmp;
12092 }
12093 else
12094 return false;
12095
12096 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12097 is_min = true;
12098 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12099 is_min = false;
12100 else
12101 return false;
12102
12103 mode = GET_MODE (dest);
12104
12105 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12106 but MODE may be a vector mode and thus not appropriate. */
12107 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12108 {
12109 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12110 rtvec v;
12111
12112 if_true = force_reg (mode, if_true);
12113 v = gen_rtvec (2, if_true, if_false);
12114 tmp = gen_rtx_UNSPEC (mode, v, u);
12115 }
12116 else
12117 {
12118 code = is_min ? SMIN : SMAX;
12119 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12120 }
12121
12122 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12123 return true;
12124 }
12125
12126 /* Expand an sse vector comparison. Return the register with the result. */
12127
12128 static rtx
12129 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12130 rtx op_true, rtx op_false)
12131 {
12132 enum machine_mode mode = GET_MODE (dest);
12133 rtx x;
12134
12135 cmp_op0 = force_reg (mode, cmp_op0);
12136 if (!nonimmediate_operand (cmp_op1, mode))
12137 cmp_op1 = force_reg (mode, cmp_op1);
12138
12139 if (optimize
12140 || reg_overlap_mentioned_p (dest, op_true)
12141 || reg_overlap_mentioned_p (dest, op_false))
12142 dest = gen_reg_rtx (mode);
12143
12144 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12145 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12146
12147 return dest;
12148 }
12149
12150 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12151 operations. This is used for both scalar and vector conditional moves. */
12152
12153 static void
12154 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12155 {
12156 enum machine_mode mode = GET_MODE (dest);
12157 rtx t2, t3, x;
12158
12159 if (op_false == CONST0_RTX (mode))
12160 {
12161 op_true = force_reg (mode, op_true);
12162 x = gen_rtx_AND (mode, cmp, op_true);
12163 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12164 }
12165 else if (op_true == CONST0_RTX (mode))
12166 {
12167 op_false = force_reg (mode, op_false);
12168 x = gen_rtx_NOT (mode, cmp);
12169 x = gen_rtx_AND (mode, x, op_false);
12170 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12171 }
12172 else
12173 {
12174 op_true = force_reg (mode, op_true);
12175 op_false = force_reg (mode, op_false);
12176
12177 t2 = gen_reg_rtx (mode);
12178 if (optimize)
12179 t3 = gen_reg_rtx (mode);
12180 else
12181 t3 = dest;
12182
12183 x = gen_rtx_AND (mode, op_true, cmp);
12184 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12185
12186 x = gen_rtx_NOT (mode, cmp);
12187 x = gen_rtx_AND (mode, x, op_false);
12188 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12189
12190 x = gen_rtx_IOR (mode, t3, t2);
12191 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12192 }
12193 }
12194
12195 /* Expand a floating-point conditional move. Return true if successful. */
12196
12197 int
12198 ix86_expand_fp_movcc (rtx operands[])
12199 {
12200 enum machine_mode mode = GET_MODE (operands[0]);
12201 enum rtx_code code = GET_CODE (operands[1]);
12202 rtx tmp, compare_op, second_test, bypass_test;
12203
12204 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12205 {
12206 enum machine_mode cmode;
12207
12208 /* Since we've no cmove for sse registers, don't force bad register
12209 allocation just to gain access to it. Deny movcc when the
12210 comparison mode doesn't match the move mode. */
12211 cmode = GET_MODE (ix86_compare_op0);
12212 if (cmode == VOIDmode)
12213 cmode = GET_MODE (ix86_compare_op1);
12214 if (cmode != mode)
12215 return 0;
12216
12217 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12218 &ix86_compare_op0,
12219 &ix86_compare_op1);
12220 if (code == UNKNOWN)
12221 return 0;
12222
12223 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12224 ix86_compare_op1, operands[2],
12225 operands[3]))
12226 return 1;
12227
12228 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12229 ix86_compare_op1, operands[2], operands[3]);
12230 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12231 return 1;
12232 }
12233
12234 /* The floating point conditional move instructions don't directly
12235 support conditions resulting from a signed integer comparison. */
12236
12237 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12238
12239 /* The floating point conditional move instructions don't directly
12240 support signed integer comparisons. */
12241
12242 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12243 {
12244 gcc_assert (!second_test && !bypass_test);
12245 tmp = gen_reg_rtx (QImode);
12246 ix86_expand_setcc (code, tmp);
12247 code = NE;
12248 ix86_compare_op0 = tmp;
12249 ix86_compare_op1 = const0_rtx;
12250 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12251 }
12252 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12253 {
12254 tmp = gen_reg_rtx (mode);
12255 emit_move_insn (tmp, operands[3]);
12256 operands[3] = tmp;
12257 }
12258 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12259 {
12260 tmp = gen_reg_rtx (mode);
12261 emit_move_insn (tmp, operands[2]);
12262 operands[2] = tmp;
12263 }
12264
12265 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12266 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12267 operands[2], operands[3])));
12268 if (bypass_test)
12269 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12270 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12271 operands[3], operands[0])));
12272 if (second_test)
12273 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12274 gen_rtx_IF_THEN_ELSE (mode, second_test,
12275 operands[2], operands[0])));
12276
12277 return 1;
12278 }
12279
12280 /* Expand a floating-point vector conditional move; a vcond operation
12281 rather than a movcc operation. */
12282
12283 bool
12284 ix86_expand_fp_vcond (rtx operands[])
12285 {
12286 enum rtx_code code = GET_CODE (operands[3]);
12287 rtx cmp;
12288
12289 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12290 &operands[4], &operands[5]);
12291 if (code == UNKNOWN)
12292 return false;
12293
12294 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12295 operands[5], operands[1], operands[2]))
12296 return true;
12297
12298 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12299 operands[1], operands[2]);
12300 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12301 return true;
12302 }
12303
12304 /* Expand a signed integral vector conditional move. */
12305
12306 bool
12307 ix86_expand_int_vcond (rtx operands[])
12308 {
12309 enum machine_mode mode = GET_MODE (operands[0]);
12310 enum rtx_code code = GET_CODE (operands[3]);
12311 bool negate = false;
12312 rtx x, cop0, cop1;
12313
12314 cop0 = operands[4];
12315 cop1 = operands[5];
12316
12317 /* Canonicalize the comparison to EQ, GT, GTU. */
12318 switch (code)
12319 {
12320 case EQ:
12321 case GT:
12322 case GTU:
12323 break;
12324
12325 case NE:
12326 case LE:
12327 case LEU:
12328 code = reverse_condition (code);
12329 negate = true;
12330 break;
12331
12332 case GE:
12333 case GEU:
12334 code = reverse_condition (code);
12335 negate = true;
12336 /* FALLTHRU */
12337
12338 case LT:
12339 case LTU:
12340 code = swap_condition (code);
12341 x = cop0, cop0 = cop1, cop1 = x;
12342 break;
12343
12344 default:
12345 gcc_unreachable ();
12346 }
12347
12348 /* Unsigned parallel compare is not supported by the hardware. Play some
12349 tricks to turn this into a signed comparison against 0. */
12350 if (code == GTU)
12351 {
12352 cop0 = force_reg (mode, cop0);
12353
12354 switch (mode)
12355 {
12356 case V4SImode:
12357 {
12358 rtx t1, t2, mask;
12359
12360 /* Perform a parallel modulo subtraction. */
12361 t1 = gen_reg_rtx (mode);
12362 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12363
12364 /* Extract the original sign bit of op0. */
12365 mask = GEN_INT (-0x80000000);
12366 mask = gen_rtx_CONST_VECTOR (mode,
12367 gen_rtvec (4, mask, mask, mask, mask));
12368 mask = force_reg (mode, mask);
12369 t2 = gen_reg_rtx (mode);
12370 emit_insn (gen_andv4si3 (t2, cop0, mask));
12371
12372 /* XOR it back into the result of the subtraction. This results
12373 in the sign bit set iff we saw unsigned underflow. */
12374 x = gen_reg_rtx (mode);
12375 emit_insn (gen_xorv4si3 (x, t1, t2));
12376
12377 code = GT;
12378 }
12379 break;
12380
12381 case V16QImode:
12382 case V8HImode:
12383 /* Perform a parallel unsigned saturating subtraction. */
12384 x = gen_reg_rtx (mode);
12385 emit_insn (gen_rtx_SET (VOIDmode, x,
12386 gen_rtx_US_MINUS (mode, cop0, cop1)));
12387
12388 code = EQ;
12389 negate = !negate;
12390 break;
12391
12392 default:
12393 gcc_unreachable ();
12394 }
12395
12396 cop0 = x;
12397 cop1 = CONST0_RTX (mode);
12398 }
12399
12400 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12401 operands[1+negate], operands[2-negate]);
12402
12403 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12404 operands[2-negate]);
12405 return true;
12406 }
12407
12408 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12409 true if we should do zero extension, else sign extension. HIGH_P is
12410 true if we want the N/2 high elements, else the low elements. */
12411
12412 void
12413 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12414 {
12415 enum machine_mode imode = GET_MODE (operands[1]);
12416 rtx (*unpack)(rtx, rtx, rtx);
12417 rtx se, dest;
12418
12419 switch (imode)
12420 {
12421 case V16QImode:
12422 if (high_p)
12423 unpack = gen_vec_interleave_highv16qi;
12424 else
12425 unpack = gen_vec_interleave_lowv16qi;
12426 break;
12427 case V8HImode:
12428 if (high_p)
12429 unpack = gen_vec_interleave_highv8hi;
12430 else
12431 unpack = gen_vec_interleave_lowv8hi;
12432 break;
12433 case V4SImode:
12434 if (high_p)
12435 unpack = gen_vec_interleave_highv4si;
12436 else
12437 unpack = gen_vec_interleave_lowv4si;
12438 break;
12439 default:
12440 gcc_unreachable ();
12441 }
12442
12443 dest = gen_lowpart (imode, operands[0]);
12444
12445 if (unsigned_p)
12446 se = force_reg (imode, CONST0_RTX (imode));
12447 else
12448 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12449 operands[1], pc_rtx, pc_rtx);
12450
12451 emit_insn (unpack (dest, operands[1], se));
12452 }
12453
12454 /* Expand conditional increment or decrement using adb/sbb instructions.
12455 The default case using setcc followed by the conditional move can be
12456 done by generic code. */
12457 int
12458 ix86_expand_int_addcc (rtx operands[])
12459 {
12460 enum rtx_code code = GET_CODE (operands[1]);
12461 rtx compare_op;
12462 rtx val = const0_rtx;
12463 bool fpcmp = false;
12464 enum machine_mode mode = GET_MODE (operands[0]);
12465
12466 if (operands[3] != const1_rtx
12467 && operands[3] != constm1_rtx)
12468 return 0;
12469 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12470 ix86_compare_op1, &compare_op))
12471 return 0;
12472 code = GET_CODE (compare_op);
12473
12474 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12475 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12476 {
12477 fpcmp = true;
12478 code = ix86_fp_compare_code_to_integer (code);
12479 }
12480
12481 if (code != LTU)
12482 {
12483 val = constm1_rtx;
12484 if (fpcmp)
12485 PUT_CODE (compare_op,
12486 reverse_condition_maybe_unordered
12487 (GET_CODE (compare_op)));
12488 else
12489 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12490 }
12491 PUT_MODE (compare_op, mode);
12492
12493 /* Construct either adc or sbb insn. */
12494 if ((code == LTU) == (operands[3] == constm1_rtx))
12495 {
12496 switch (GET_MODE (operands[0]))
12497 {
12498 case QImode:
12499 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12500 break;
12501 case HImode:
12502 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12503 break;
12504 case SImode:
12505 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12506 break;
12507 case DImode:
12508 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12509 break;
12510 default:
12511 gcc_unreachable ();
12512 }
12513 }
12514 else
12515 {
12516 switch (GET_MODE (operands[0]))
12517 {
12518 case QImode:
12519 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12520 break;
12521 case HImode:
12522 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12523 break;
12524 case SImode:
12525 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12526 break;
12527 case DImode:
12528 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12529 break;
12530 default:
12531 gcc_unreachable ();
12532 }
12533 }
12534 return 1; /* DONE */
12535 }
12536
12537
12538 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12539 works for floating pointer parameters and nonoffsetable memories.
12540 For pushes, it returns just stack offsets; the values will be saved
12541 in the right order. Maximally three parts are generated. */
12542
12543 static int
12544 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12545 {
12546 int size;
12547
12548 if (!TARGET_64BIT)
12549 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12550 else
12551 size = (GET_MODE_SIZE (mode) + 4) / 8;
12552
12553 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12554 gcc_assert (size >= 2 && size <= 3);
12555
12556 /* Optimize constant pool reference to immediates. This is used by fp
12557 moves, that force all constants to memory to allow combining. */
12558 if (MEM_P (operand) && MEM_READONLY_P (operand))
12559 {
12560 rtx tmp = maybe_get_pool_constant (operand);
12561 if (tmp)
12562 operand = tmp;
12563 }
12564
12565 if (MEM_P (operand) && !offsettable_memref_p (operand))
12566 {
12567 /* The only non-offsetable memories we handle are pushes. */
12568 int ok = push_operand (operand, VOIDmode);
12569
12570 gcc_assert (ok);
12571
12572 operand = copy_rtx (operand);
12573 PUT_MODE (operand, Pmode);
12574 parts[0] = parts[1] = parts[2] = operand;
12575 return size;
12576 }
12577
12578 if (GET_CODE (operand) == CONST_VECTOR)
12579 {
12580 enum machine_mode imode = int_mode_for_mode (mode);
12581 /* Caution: if we looked through a constant pool memory above,
12582 the operand may actually have a different mode now. That's
12583 ok, since we want to pun this all the way back to an integer. */
12584 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12585 gcc_assert (operand != NULL);
12586 mode = imode;
12587 }
12588
12589 if (!TARGET_64BIT)
12590 {
12591 if (mode == DImode)
12592 split_di (&operand, 1, &parts[0], &parts[1]);
12593 else
12594 {
12595 if (REG_P (operand))
12596 {
12597 gcc_assert (reload_completed);
12598 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12599 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12600 if (size == 3)
12601 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12602 }
12603 else if (offsettable_memref_p (operand))
12604 {
12605 operand = adjust_address (operand, SImode, 0);
12606 parts[0] = operand;
12607 parts[1] = adjust_address (operand, SImode, 4);
12608 if (size == 3)
12609 parts[2] = adjust_address (operand, SImode, 8);
12610 }
12611 else if (GET_CODE (operand) == CONST_DOUBLE)
12612 {
12613 REAL_VALUE_TYPE r;
12614 long l[4];
12615
12616 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12617 switch (mode)
12618 {
12619 case XFmode:
12620 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12621 parts[2] = gen_int_mode (l[2], SImode);
12622 break;
12623 case DFmode:
12624 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12625 break;
12626 default:
12627 gcc_unreachable ();
12628 }
12629 parts[1] = gen_int_mode (l[1], SImode);
12630 parts[0] = gen_int_mode (l[0], SImode);
12631 }
12632 else
12633 gcc_unreachable ();
12634 }
12635 }
12636 else
12637 {
12638 if (mode == TImode)
12639 split_ti (&operand, 1, &parts[0], &parts[1]);
12640 if (mode == XFmode || mode == TFmode)
12641 {
12642 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12643 if (REG_P (operand))
12644 {
12645 gcc_assert (reload_completed);
12646 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12647 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12648 }
12649 else if (offsettable_memref_p (operand))
12650 {
12651 operand = adjust_address (operand, DImode, 0);
12652 parts[0] = operand;
12653 parts[1] = adjust_address (operand, upper_mode, 8);
12654 }
12655 else if (GET_CODE (operand) == CONST_DOUBLE)
12656 {
12657 REAL_VALUE_TYPE r;
12658 long l[4];
12659
12660 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12661 real_to_target (l, &r, mode);
12662
12663 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12664 if (HOST_BITS_PER_WIDE_INT >= 64)
12665 parts[0]
12666 = gen_int_mode
12667 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12668 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12669 DImode);
12670 else
12671 parts[0] = immed_double_const (l[0], l[1], DImode);
12672
12673 if (upper_mode == SImode)
12674 parts[1] = gen_int_mode (l[2], SImode);
12675 else if (HOST_BITS_PER_WIDE_INT >= 64)
12676 parts[1]
12677 = gen_int_mode
12678 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12679 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12680 DImode);
12681 else
12682 parts[1] = immed_double_const (l[2], l[3], DImode);
12683 }
12684 else
12685 gcc_unreachable ();
12686 }
12687 }
12688
12689 return size;
12690 }
12691
12692 /* Emit insns to perform a move or push of DI, DF, and XF values.
12693 Return false when normal moves are needed; true when all required
12694 insns have been emitted. Operands 2-4 contain the input values
12695 int the correct order; operands 5-7 contain the output values. */
12696
12697 void
12698 ix86_split_long_move (rtx operands[])
12699 {
12700 rtx part[2][3];
12701 int nparts;
12702 int push = 0;
12703 int collisions = 0;
12704 enum machine_mode mode = GET_MODE (operands[0]);
12705
12706 /* The DFmode expanders may ask us to move double.
12707 For 64bit target this is single move. By hiding the fact
12708 here we simplify i386.md splitters. */
12709 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12710 {
12711 /* Optimize constant pool reference to immediates. This is used by
12712 fp moves, that force all constants to memory to allow combining. */
12713
12714 if (MEM_P (operands[1])
12715 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12716 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12717 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12718 if (push_operand (operands[0], VOIDmode))
12719 {
12720 operands[0] = copy_rtx (operands[0]);
12721 PUT_MODE (operands[0], Pmode);
12722 }
12723 else
12724 operands[0] = gen_lowpart (DImode, operands[0]);
12725 operands[1] = gen_lowpart (DImode, operands[1]);
12726 emit_move_insn (operands[0], operands[1]);
12727 return;
12728 }
12729
12730 /* The only non-offsettable memory we handle is push. */
12731 if (push_operand (operands[0], VOIDmode))
12732 push = 1;
12733 else
12734 gcc_assert (!MEM_P (operands[0])
12735 || offsettable_memref_p (operands[0]));
12736
12737 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12738 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12739
12740 /* When emitting push, take care for source operands on the stack. */
12741 if (push && MEM_P (operands[1])
12742 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12743 {
12744 if (nparts == 3)
12745 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12746 XEXP (part[1][2], 0));
12747 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12748 XEXP (part[1][1], 0));
12749 }
12750
12751 /* We need to do copy in the right order in case an address register
12752 of the source overlaps the destination. */
12753 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12754 {
12755 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12756 collisions++;
12757 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12758 collisions++;
12759 if (nparts == 3
12760 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12761 collisions++;
12762
12763 /* Collision in the middle part can be handled by reordering. */
12764 if (collisions == 1 && nparts == 3
12765 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12766 {
12767 rtx tmp;
12768 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12769 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12770 }
12771
12772 /* If there are more collisions, we can't handle it by reordering.
12773 Do an lea to the last part and use only one colliding move. */
12774 else if (collisions > 1)
12775 {
12776 rtx base;
12777
12778 collisions = 1;
12779
12780 base = part[0][nparts - 1];
12781
12782 /* Handle the case when the last part isn't valid for lea.
12783 Happens in 64-bit mode storing the 12-byte XFmode. */
12784 if (GET_MODE (base) != Pmode)
12785 base = gen_rtx_REG (Pmode, REGNO (base));
12786
12787 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12788 part[1][0] = replace_equiv_address (part[1][0], base);
12789 part[1][1] = replace_equiv_address (part[1][1],
12790 plus_constant (base, UNITS_PER_WORD));
12791 if (nparts == 3)
12792 part[1][2] = replace_equiv_address (part[1][2],
12793 plus_constant (base, 8));
12794 }
12795 }
12796
12797 if (push)
12798 {
12799 if (!TARGET_64BIT)
12800 {
12801 if (nparts == 3)
12802 {
12803 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12804 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12805 emit_move_insn (part[0][2], part[1][2]);
12806 }
12807 }
12808 else
12809 {
12810 /* In 64bit mode we don't have 32bit push available. In case this is
12811 register, it is OK - we will just use larger counterpart. We also
12812 retype memory - these comes from attempt to avoid REX prefix on
12813 moving of second half of TFmode value. */
12814 if (GET_MODE (part[1][1]) == SImode)
12815 {
12816 switch (GET_CODE (part[1][1]))
12817 {
12818 case MEM:
12819 part[1][1] = adjust_address (part[1][1], DImode, 0);
12820 break;
12821
12822 case REG:
12823 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12824 break;
12825
12826 default:
12827 gcc_unreachable ();
12828 }
12829
12830 if (GET_MODE (part[1][0]) == SImode)
12831 part[1][0] = part[1][1];
12832 }
12833 }
12834 emit_move_insn (part[0][1], part[1][1]);
12835 emit_move_insn (part[0][0], part[1][0]);
12836 return;
12837 }
12838
12839 /* Choose correct order to not overwrite the source before it is copied. */
12840 if ((REG_P (part[0][0])
12841 && REG_P (part[1][1])
12842 && (REGNO (part[0][0]) == REGNO (part[1][1])
12843 || (nparts == 3
12844 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12845 || (collisions > 0
12846 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12847 {
12848 if (nparts == 3)
12849 {
12850 operands[2] = part[0][2];
12851 operands[3] = part[0][1];
12852 operands[4] = part[0][0];
12853 operands[5] = part[1][2];
12854 operands[6] = part[1][1];
12855 operands[7] = part[1][0];
12856 }
12857 else
12858 {
12859 operands[2] = part[0][1];
12860 operands[3] = part[0][0];
12861 operands[5] = part[1][1];
12862 operands[6] = part[1][0];
12863 }
12864 }
12865 else
12866 {
12867 if (nparts == 3)
12868 {
12869 operands[2] = part[0][0];
12870 operands[3] = part[0][1];
12871 operands[4] = part[0][2];
12872 operands[5] = part[1][0];
12873 operands[6] = part[1][1];
12874 operands[7] = part[1][2];
12875 }
12876 else
12877 {
12878 operands[2] = part[0][0];
12879 operands[3] = part[0][1];
12880 operands[5] = part[1][0];
12881 operands[6] = part[1][1];
12882 }
12883 }
12884
12885 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12886 if (optimize_size)
12887 {
12888 if (CONST_INT_P (operands[5])
12889 && operands[5] != const0_rtx
12890 && REG_P (operands[2]))
12891 {
12892 if (CONST_INT_P (operands[6])
12893 && INTVAL (operands[6]) == INTVAL (operands[5]))
12894 operands[6] = operands[2];
12895
12896 if (nparts == 3
12897 && CONST_INT_P (operands[7])
12898 && INTVAL (operands[7]) == INTVAL (operands[5]))
12899 operands[7] = operands[2];
12900 }
12901
12902 if (nparts == 3
12903 && CONST_INT_P (operands[6])
12904 && operands[6] != const0_rtx
12905 && REG_P (operands[3])
12906 && CONST_INT_P (operands[7])
12907 && INTVAL (operands[7]) == INTVAL (operands[6]))
12908 operands[7] = operands[3];
12909 }
12910
12911 emit_move_insn (operands[2], operands[5]);
12912 emit_move_insn (operands[3], operands[6]);
12913 if (nparts == 3)
12914 emit_move_insn (operands[4], operands[7]);
12915
12916 return;
12917 }
12918
12919 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12920 left shift by a constant, either using a single shift or
12921 a sequence of add instructions. */
12922
12923 static void
12924 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12925 {
12926 if (count == 1)
12927 {
12928 emit_insn ((mode == DImode
12929 ? gen_addsi3
12930 : gen_adddi3) (operand, operand, operand));
12931 }
12932 else if (!optimize_size
12933 && count * ix86_cost->add <= ix86_cost->shift_const)
12934 {
12935 int i;
12936 for (i=0; i<count; i++)
12937 {
12938 emit_insn ((mode == DImode
12939 ? gen_addsi3
12940 : gen_adddi3) (operand, operand, operand));
12941 }
12942 }
12943 else
12944 emit_insn ((mode == DImode
12945 ? gen_ashlsi3
12946 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12947 }
12948
12949 void
12950 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12951 {
12952 rtx low[2], high[2];
12953 int count;
12954 const int single_width = mode == DImode ? 32 : 64;
12955
12956 if (CONST_INT_P (operands[2]))
12957 {
12958 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12959 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12960
12961 if (count >= single_width)
12962 {
12963 emit_move_insn (high[0], low[1]);
12964 emit_move_insn (low[0], const0_rtx);
12965
12966 if (count > single_width)
12967 ix86_expand_ashl_const (high[0], count - single_width, mode);
12968 }
12969 else
12970 {
12971 if (!rtx_equal_p (operands[0], operands[1]))
12972 emit_move_insn (operands[0], operands[1]);
12973 emit_insn ((mode == DImode
12974 ? gen_x86_shld_1
12975 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12976 ix86_expand_ashl_const (low[0], count, mode);
12977 }
12978 return;
12979 }
12980
12981 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12982
12983 if (operands[1] == const1_rtx)
12984 {
12985 /* Assuming we've chosen a QImode capable registers, then 1 << N
12986 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12987 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12988 {
12989 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12990
12991 ix86_expand_clear (low[0]);
12992 ix86_expand_clear (high[0]);
12993 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12994
12995 d = gen_lowpart (QImode, low[0]);
12996 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12997 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12998 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12999
13000 d = gen_lowpart (QImode, high[0]);
13001 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13002 s = gen_rtx_NE (QImode, flags, const0_rtx);
13003 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13004 }
13005
13006 /* Otherwise, we can get the same results by manually performing
13007 a bit extract operation on bit 5/6, and then performing the two
13008 shifts. The two methods of getting 0/1 into low/high are exactly
13009 the same size. Avoiding the shift in the bit extract case helps
13010 pentium4 a bit; no one else seems to care much either way. */
13011 else
13012 {
13013 rtx x;
13014
13015 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13016 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13017 else
13018 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13019 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13020
13021 emit_insn ((mode == DImode
13022 ? gen_lshrsi3
13023 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13024 emit_insn ((mode == DImode
13025 ? gen_andsi3
13026 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13027 emit_move_insn (low[0], high[0]);
13028 emit_insn ((mode == DImode
13029 ? gen_xorsi3
13030 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13031 }
13032
13033 emit_insn ((mode == DImode
13034 ? gen_ashlsi3
13035 : gen_ashldi3) (low[0], low[0], operands[2]));
13036 emit_insn ((mode == DImode
13037 ? gen_ashlsi3
13038 : gen_ashldi3) (high[0], high[0], operands[2]));
13039 return;
13040 }
13041
13042 if (operands[1] == constm1_rtx)
13043 {
13044 /* For -1 << N, we can avoid the shld instruction, because we
13045 know that we're shifting 0...31/63 ones into a -1. */
13046 emit_move_insn (low[0], constm1_rtx);
13047 if (optimize_size)
13048 emit_move_insn (high[0], low[0]);
13049 else
13050 emit_move_insn (high[0], constm1_rtx);
13051 }
13052 else
13053 {
13054 if (!rtx_equal_p (operands[0], operands[1]))
13055 emit_move_insn (operands[0], operands[1]);
13056
13057 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13058 emit_insn ((mode == DImode
13059 ? gen_x86_shld_1
13060 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13061 }
13062
13063 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13064
13065 if (TARGET_CMOVE && scratch)
13066 {
13067 ix86_expand_clear (scratch);
13068 emit_insn ((mode == DImode
13069 ? gen_x86_shift_adj_1
13070 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13071 }
13072 else
13073 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13074 }
13075
13076 void
13077 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13078 {
13079 rtx low[2], high[2];
13080 int count;
13081 const int single_width = mode == DImode ? 32 : 64;
13082
13083 if (CONST_INT_P (operands[2]))
13084 {
13085 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13086 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13087
13088 if (count == single_width * 2 - 1)
13089 {
13090 emit_move_insn (high[0], high[1]);
13091 emit_insn ((mode == DImode
13092 ? gen_ashrsi3
13093 : gen_ashrdi3) (high[0], high[0],
13094 GEN_INT (single_width - 1)));
13095 emit_move_insn (low[0], high[0]);
13096
13097 }
13098 else if (count >= single_width)
13099 {
13100 emit_move_insn (low[0], high[1]);
13101 emit_move_insn (high[0], low[0]);
13102 emit_insn ((mode == DImode
13103 ? gen_ashrsi3
13104 : gen_ashrdi3) (high[0], high[0],
13105 GEN_INT (single_width - 1)));
13106 if (count > single_width)
13107 emit_insn ((mode == DImode
13108 ? gen_ashrsi3
13109 : gen_ashrdi3) (low[0], low[0],
13110 GEN_INT (count - single_width)));
13111 }
13112 else
13113 {
13114 if (!rtx_equal_p (operands[0], operands[1]))
13115 emit_move_insn (operands[0], operands[1]);
13116 emit_insn ((mode == DImode
13117 ? gen_x86_shrd_1
13118 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13119 emit_insn ((mode == DImode
13120 ? gen_ashrsi3
13121 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13122 }
13123 }
13124 else
13125 {
13126 if (!rtx_equal_p (operands[0], operands[1]))
13127 emit_move_insn (operands[0], operands[1]);
13128
13129 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13130
13131 emit_insn ((mode == DImode
13132 ? gen_x86_shrd_1
13133 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13134 emit_insn ((mode == DImode
13135 ? gen_ashrsi3
13136 : gen_ashrdi3) (high[0], high[0], operands[2]));
13137
13138 if (TARGET_CMOVE && scratch)
13139 {
13140 emit_move_insn (scratch, high[0]);
13141 emit_insn ((mode == DImode
13142 ? gen_ashrsi3
13143 : gen_ashrdi3) (scratch, scratch,
13144 GEN_INT (single_width - 1)));
13145 emit_insn ((mode == DImode
13146 ? gen_x86_shift_adj_1
13147 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13148 scratch));
13149 }
13150 else
13151 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13152 }
13153 }
13154
13155 void
13156 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13157 {
13158 rtx low[2], high[2];
13159 int count;
13160 const int single_width = mode == DImode ? 32 : 64;
13161
13162 if (CONST_INT_P (operands[2]))
13163 {
13164 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13165 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13166
13167 if (count >= single_width)
13168 {
13169 emit_move_insn (low[0], high[1]);
13170 ix86_expand_clear (high[0]);
13171
13172 if (count > single_width)
13173 emit_insn ((mode == DImode
13174 ? gen_lshrsi3
13175 : gen_lshrdi3) (low[0], low[0],
13176 GEN_INT (count - single_width)));
13177 }
13178 else
13179 {
13180 if (!rtx_equal_p (operands[0], operands[1]))
13181 emit_move_insn (operands[0], operands[1]);
13182 emit_insn ((mode == DImode
13183 ? gen_x86_shrd_1
13184 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13185 emit_insn ((mode == DImode
13186 ? gen_lshrsi3
13187 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13188 }
13189 }
13190 else
13191 {
13192 if (!rtx_equal_p (operands[0], operands[1]))
13193 emit_move_insn (operands[0], operands[1]);
13194
13195 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13196
13197 emit_insn ((mode == DImode
13198 ? gen_x86_shrd_1
13199 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13200 emit_insn ((mode == DImode
13201 ? gen_lshrsi3
13202 : gen_lshrdi3) (high[0], high[0], operands[2]));
13203
13204 /* Heh. By reversing the arguments, we can reuse this pattern. */
13205 if (TARGET_CMOVE && scratch)
13206 {
13207 ix86_expand_clear (scratch);
13208 emit_insn ((mode == DImode
13209 ? gen_x86_shift_adj_1
13210 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13211 scratch));
13212 }
13213 else
13214 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13215 }
13216 }
13217
13218 /* Predict just emitted jump instruction to be taken with probability PROB. */
13219 static void
13220 predict_jump (int prob)
13221 {
13222 rtx insn = get_last_insn ();
13223 gcc_assert (JUMP_P (insn));
13224 REG_NOTES (insn)
13225 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13226 GEN_INT (prob),
13227 REG_NOTES (insn));
13228 }
13229
13230 /* Helper function for the string operations below. Dest VARIABLE whether
13231 it is aligned to VALUE bytes. If true, jump to the label. */
13232 static rtx
13233 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13234 {
13235 rtx label = gen_label_rtx ();
13236 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13237 if (GET_MODE (variable) == DImode)
13238 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13239 else
13240 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13241 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13242 1, label);
13243 if (epilogue)
13244 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13245 else
13246 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13247 return label;
13248 }
13249
13250 /* Adjust COUNTER by the VALUE. */
13251 static void
13252 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13253 {
13254 if (GET_MODE (countreg) == DImode)
13255 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13256 else
13257 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13258 }
13259
13260 /* Zero extend possibly SImode EXP to Pmode register. */
13261 rtx
13262 ix86_zero_extend_to_Pmode (rtx exp)
13263 {
13264 rtx r;
13265 if (GET_MODE (exp) == VOIDmode)
13266 return force_reg (Pmode, exp);
13267 if (GET_MODE (exp) == Pmode)
13268 return copy_to_mode_reg (Pmode, exp);
13269 r = gen_reg_rtx (Pmode);
13270 emit_insn (gen_zero_extendsidi2 (r, exp));
13271 return r;
13272 }
13273
13274 /* Divide COUNTREG by SCALE. */
13275 static rtx
13276 scale_counter (rtx countreg, int scale)
13277 {
13278 rtx sc;
13279 rtx piece_size_mask;
13280
13281 if (scale == 1)
13282 return countreg;
13283 if (CONST_INT_P (countreg))
13284 return GEN_INT (INTVAL (countreg) / scale);
13285 gcc_assert (REG_P (countreg));
13286
13287 piece_size_mask = GEN_INT (scale - 1);
13288 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13289 GEN_INT (exact_log2 (scale)),
13290 NULL, 1, OPTAB_DIRECT);
13291 return sc;
13292 }
13293
13294 /* When SRCPTR is non-NULL, output simple loop to move memory
13295 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13296 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13297 equivalent loop to set memory by VALUE (supposed to be in MODE).
13298
13299 The size is rounded down to whole number of chunk size moved at once.
13300 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13301
13302
13303 static void
13304 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13305 rtx destptr, rtx srcptr, rtx value,
13306 rtx count, enum machine_mode mode, int unroll,
13307 int expected_size)
13308 {
13309 rtx out_label, top_label, iter, tmp;
13310 enum machine_mode iter_mode;
13311 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13312 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13313 rtx size;
13314 rtx x_addr;
13315 rtx y_addr;
13316 int i;
13317
13318 iter_mode = GET_MODE (count);
13319 if (iter_mode == VOIDmode)
13320 iter_mode = word_mode;
13321
13322 top_label = gen_label_rtx ();
13323 out_label = gen_label_rtx ();
13324 iter = gen_reg_rtx (iter_mode);
13325
13326 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13327 NULL, 1, OPTAB_DIRECT);
13328 /* Those two should combine. */
13329 if (piece_size == const1_rtx)
13330 {
13331 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13332 true, out_label);
13333 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13334 }
13335 emit_move_insn (iter, const0_rtx);
13336
13337 emit_label (top_label);
13338
13339 tmp = convert_modes (Pmode, iter_mode, iter, true);
13340 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13341 destmem = change_address (destmem, mode, x_addr);
13342
13343 if (srcmem)
13344 {
13345 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13346 srcmem = change_address (srcmem, mode, y_addr);
13347
13348 /* When unrolling for chips that reorder memory reads and writes,
13349 we can save registers by using single temporary.
13350 Also using 4 temporaries is overkill in 32bit mode. */
13351 if (!TARGET_64BIT && 0)
13352 {
13353 for (i = 0; i < unroll; i++)
13354 {
13355 if (i)
13356 {
13357 destmem =
13358 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13359 srcmem =
13360 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13361 }
13362 emit_move_insn (destmem, srcmem);
13363 }
13364 }
13365 else
13366 {
13367 rtx tmpreg[4];
13368 gcc_assert (unroll <= 4);
13369 for (i = 0; i < unroll; i++)
13370 {
13371 tmpreg[i] = gen_reg_rtx (mode);
13372 if (i)
13373 {
13374 srcmem =
13375 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13376 }
13377 emit_move_insn (tmpreg[i], srcmem);
13378 }
13379 for (i = 0; i < unroll; i++)
13380 {
13381 if (i)
13382 {
13383 destmem =
13384 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13385 }
13386 emit_move_insn (destmem, tmpreg[i]);
13387 }
13388 }
13389 }
13390 else
13391 for (i = 0; i < unroll; i++)
13392 {
13393 if (i)
13394 destmem =
13395 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13396 emit_move_insn (destmem, value);
13397 }
13398
13399 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13400 true, OPTAB_LIB_WIDEN);
13401 if (tmp != iter)
13402 emit_move_insn (iter, tmp);
13403
13404 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13405 true, top_label);
13406 if (expected_size != -1)
13407 {
13408 expected_size /= GET_MODE_SIZE (mode) * unroll;
13409 if (expected_size == 0)
13410 predict_jump (0);
13411 else if (expected_size > REG_BR_PROB_BASE)
13412 predict_jump (REG_BR_PROB_BASE - 1);
13413 else
13414 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13415 }
13416 else
13417 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13418 iter = ix86_zero_extend_to_Pmode (iter);
13419 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13420 true, OPTAB_LIB_WIDEN);
13421 if (tmp != destptr)
13422 emit_move_insn (destptr, tmp);
13423 if (srcptr)
13424 {
13425 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13426 true, OPTAB_LIB_WIDEN);
13427 if (tmp != srcptr)
13428 emit_move_insn (srcptr, tmp);
13429 }
13430 emit_label (out_label);
13431 }
13432
13433 /* Output "rep; mov" instruction.
13434 Arguments have same meaning as for previous function */
13435 static void
13436 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13437 rtx destptr, rtx srcptr,
13438 rtx count,
13439 enum machine_mode mode)
13440 {
13441 rtx destexp;
13442 rtx srcexp;
13443 rtx countreg;
13444
13445 /* If the size is known, it is shorter to use rep movs. */
13446 if (mode == QImode && CONST_INT_P (count)
13447 && !(INTVAL (count) & 3))
13448 mode = SImode;
13449
13450 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13451 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13452 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13453 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13454 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13455 if (mode != QImode)
13456 {
13457 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13458 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13459 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13460 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13461 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13462 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13463 }
13464 else
13465 {
13466 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13467 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13468 }
13469 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13470 destexp, srcexp));
13471 }
13472
13473 /* Output "rep; stos" instruction.
13474 Arguments have same meaning as for previous function */
13475 static void
13476 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13477 rtx count,
13478 enum machine_mode mode)
13479 {
13480 rtx destexp;
13481 rtx countreg;
13482
13483 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13484 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13485 value = force_reg (mode, gen_lowpart (mode, value));
13486 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13487 if (mode != QImode)
13488 {
13489 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13490 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13491 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13492 }
13493 else
13494 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13495 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13496 }
13497
13498 static void
13499 emit_strmov (rtx destmem, rtx srcmem,
13500 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13501 {
13502 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13503 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13504 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13505 }
13506
13507 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13508 static void
13509 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13510 rtx destptr, rtx srcptr, rtx count, int max_size)
13511 {
13512 rtx src, dest;
13513 if (CONST_INT_P (count))
13514 {
13515 HOST_WIDE_INT countval = INTVAL (count);
13516 int offset = 0;
13517
13518 if ((countval & 0x16) && max_size > 16)
13519 {
13520 if (TARGET_64BIT)
13521 {
13522 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13523 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13524 }
13525 else
13526 gcc_unreachable ();
13527 offset += 16;
13528 }
13529 if ((countval & 0x08) && max_size > 8)
13530 {
13531 if (TARGET_64BIT)
13532 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13533 else
13534 {
13535 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13536 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 4);
13537 }
13538 offset += 8;
13539 }
13540 if ((countval & 0x04) && max_size > 4)
13541 {
13542 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13543 offset += 4;
13544 }
13545 if ((countval & 0x02) && max_size > 2)
13546 {
13547 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13548 offset += 2;
13549 }
13550 if ((countval & 0x01) && max_size > 1)
13551 {
13552 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13553 offset += 1;
13554 }
13555 return;
13556 }
13557 if (max_size > 8)
13558 {
13559 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13560 count, 1, OPTAB_DIRECT);
13561 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13562 count, QImode, 1, 4);
13563 return;
13564 }
13565
13566 /* When there are stringops, we can cheaply increase dest and src pointers.
13567 Otherwise we save code size by maintaining offset (zero is readily
13568 available from preceding rep operation) and using x86 addressing modes.
13569 */
13570 if (TARGET_SINGLE_STRINGOP)
13571 {
13572 if (max_size > 4)
13573 {
13574 rtx label = ix86_expand_aligntest (count, 4, true);
13575 src = change_address (srcmem, SImode, srcptr);
13576 dest = change_address (destmem, SImode, destptr);
13577 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13578 emit_label (label);
13579 LABEL_NUSES (label) = 1;
13580 }
13581 if (max_size > 2)
13582 {
13583 rtx label = ix86_expand_aligntest (count, 2, true);
13584 src = change_address (srcmem, HImode, srcptr);
13585 dest = change_address (destmem, HImode, destptr);
13586 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13587 emit_label (label);
13588 LABEL_NUSES (label) = 1;
13589 }
13590 if (max_size > 1)
13591 {
13592 rtx label = ix86_expand_aligntest (count, 1, true);
13593 src = change_address (srcmem, QImode, srcptr);
13594 dest = change_address (destmem, QImode, destptr);
13595 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13596 emit_label (label);
13597 LABEL_NUSES (label) = 1;
13598 }
13599 }
13600 else
13601 {
13602 rtx offset = force_reg (Pmode, const0_rtx);
13603 rtx tmp;
13604
13605 if (max_size > 4)
13606 {
13607 rtx label = ix86_expand_aligntest (count, 4, true);
13608 src = change_address (srcmem, SImode, srcptr);
13609 dest = change_address (destmem, SImode, destptr);
13610 emit_move_insn (dest, src);
13611 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13612 true, OPTAB_LIB_WIDEN);
13613 if (tmp != offset)
13614 emit_move_insn (offset, tmp);
13615 emit_label (label);
13616 LABEL_NUSES (label) = 1;
13617 }
13618 if (max_size > 2)
13619 {
13620 rtx label = ix86_expand_aligntest (count, 2, true);
13621 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13622 src = change_address (srcmem, HImode, tmp);
13623 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13624 dest = change_address (destmem, HImode, tmp);
13625 emit_move_insn (dest, src);
13626 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13627 true, OPTAB_LIB_WIDEN);
13628 if (tmp != offset)
13629 emit_move_insn (offset, tmp);
13630 emit_label (label);
13631 LABEL_NUSES (label) = 1;
13632 }
13633 if (max_size > 1)
13634 {
13635 rtx label = ix86_expand_aligntest (count, 1, true);
13636 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13637 src = change_address (srcmem, QImode, tmp);
13638 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13639 dest = change_address (destmem, QImode, tmp);
13640 emit_move_insn (dest, src);
13641 emit_label (label);
13642 LABEL_NUSES (label) = 1;
13643 }
13644 }
13645 }
13646
13647 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13648 static void
13649 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13650 rtx count, int max_size)
13651 {
13652 count =
13653 expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13654 count, 1, OPTAB_DIRECT);
13655 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13656 gen_lowpart (QImode, value), count, QImode,
13657 1, max_size / 2);
13658 }
13659
13660 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13661 static void
13662 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13663 {
13664 rtx dest;
13665
13666 if (CONST_INT_P (count))
13667 {
13668 HOST_WIDE_INT countval = INTVAL (count);
13669 int offset = 0;
13670
13671 if ((countval & 0x16) && max_size > 16)
13672 {
13673 if (TARGET_64BIT)
13674 {
13675 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13676 emit_insn (gen_strset (destptr, dest, value));
13677 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13678 emit_insn (gen_strset (destptr, dest, value));
13679 }
13680 else
13681 gcc_unreachable ();
13682 offset += 16;
13683 }
13684 if ((countval & 0x08) && max_size > 8)
13685 {
13686 if (TARGET_64BIT)
13687 {
13688 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13689 emit_insn (gen_strset (destptr, dest, value));
13690 }
13691 else
13692 {
13693 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13694 emit_insn (gen_strset (destptr, dest, value));
13695 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13696 emit_insn (gen_strset (destptr, dest, value));
13697 }
13698 offset += 8;
13699 }
13700 if ((countval & 0x04) && max_size > 4)
13701 {
13702 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13703 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13704 offset += 4;
13705 }
13706 if ((countval & 0x02) && max_size > 2)
13707 {
13708 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13709 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13710 offset += 2;
13711 }
13712 if ((countval & 0x01) && max_size > 1)
13713 {
13714 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13715 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13716 offset += 1;
13717 }
13718 return;
13719 }
13720 if (max_size > 32)
13721 {
13722 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13723 return;
13724 }
13725 if (max_size > 16)
13726 {
13727 rtx label = ix86_expand_aligntest (count, 16, true);
13728 if (TARGET_64BIT)
13729 {
13730 dest = change_address (destmem, DImode, destptr);
13731 emit_insn (gen_strset (destptr, dest, value));
13732 emit_insn (gen_strset (destptr, dest, value));
13733 }
13734 else
13735 {
13736 dest = change_address (destmem, SImode, destptr);
13737 emit_insn (gen_strset (destptr, dest, value));
13738 emit_insn (gen_strset (destptr, dest, value));
13739 emit_insn (gen_strset (destptr, dest, value));
13740 emit_insn (gen_strset (destptr, dest, value));
13741 }
13742 emit_label (label);
13743 LABEL_NUSES (label) = 1;
13744 }
13745 if (max_size > 8)
13746 {
13747 rtx label = ix86_expand_aligntest (count, 8, true);
13748 if (TARGET_64BIT)
13749 {
13750 dest = change_address (destmem, DImode, destptr);
13751 emit_insn (gen_strset (destptr, dest, value));
13752 }
13753 else
13754 {
13755 dest = change_address (destmem, SImode, destptr);
13756 emit_insn (gen_strset (destptr, dest, value));
13757 emit_insn (gen_strset (destptr, dest, value));
13758 }
13759 emit_label (label);
13760 LABEL_NUSES (label) = 1;
13761 }
13762 if (max_size > 4)
13763 {
13764 rtx label = ix86_expand_aligntest (count, 4, true);
13765 dest = change_address (destmem, SImode, destptr);
13766 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13767 emit_label (label);
13768 LABEL_NUSES (label) = 1;
13769 }
13770 if (max_size > 2)
13771 {
13772 rtx label = ix86_expand_aligntest (count, 2, true);
13773 dest = change_address (destmem, HImode, destptr);
13774 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13775 emit_label (label);
13776 LABEL_NUSES (label) = 1;
13777 }
13778 if (max_size > 1)
13779 {
13780 rtx label = ix86_expand_aligntest (count, 1, true);
13781 dest = change_address (destmem, QImode, destptr);
13782 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13783 emit_label (label);
13784 LABEL_NUSES (label) = 1;
13785 }
13786 }
13787
13788 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13789 DESIRED_ALIGNMENT. */
13790 static void
13791 expand_movmem_prologue (rtx destmem, rtx srcmem,
13792 rtx destptr, rtx srcptr, rtx count,
13793 int align, int desired_alignment)
13794 {
13795 if (align <= 1 && desired_alignment > 1)
13796 {
13797 rtx label = ix86_expand_aligntest (destptr, 1, false);
13798 srcmem = change_address (srcmem, QImode, srcptr);
13799 destmem = change_address (destmem, QImode, destptr);
13800 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13801 ix86_adjust_counter (count, 1);
13802 emit_label (label);
13803 LABEL_NUSES (label) = 1;
13804 }
13805 if (align <= 2 && desired_alignment > 2)
13806 {
13807 rtx label = ix86_expand_aligntest (destptr, 2, false);
13808 srcmem = change_address (srcmem, HImode, srcptr);
13809 destmem = change_address (destmem, HImode, destptr);
13810 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13811 ix86_adjust_counter (count, 2);
13812 emit_label (label);
13813 LABEL_NUSES (label) = 1;
13814 }
13815 if (align <= 4 && desired_alignment > 4)
13816 {
13817 rtx label = ix86_expand_aligntest (destptr, 4, false);
13818 srcmem = change_address (srcmem, SImode, srcptr);
13819 destmem = change_address (destmem, SImode, destptr);
13820 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13821 ix86_adjust_counter (count, 4);
13822 emit_label (label);
13823 LABEL_NUSES (label) = 1;
13824 }
13825 gcc_assert (desired_alignment <= 8);
13826 }
13827
13828 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13829 DESIRED_ALIGNMENT. */
13830 static void
13831 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13832 int align, int desired_alignment)
13833 {
13834 if (align <= 1 && desired_alignment > 1)
13835 {
13836 rtx label = ix86_expand_aligntest (destptr, 1, false);
13837 destmem = change_address (destmem, QImode, destptr);
13838 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13839 ix86_adjust_counter (count, 1);
13840 emit_label (label);
13841 LABEL_NUSES (label) = 1;
13842 }
13843 if (align <= 2 && desired_alignment > 2)
13844 {
13845 rtx label = ix86_expand_aligntest (destptr, 2, false);
13846 destmem = change_address (destmem, HImode, destptr);
13847 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13848 ix86_adjust_counter (count, 2);
13849 emit_label (label);
13850 LABEL_NUSES (label) = 1;
13851 }
13852 if (align <= 4 && desired_alignment > 4)
13853 {
13854 rtx label = ix86_expand_aligntest (destptr, 4, false);
13855 destmem = change_address (destmem, SImode, destptr);
13856 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13857 ix86_adjust_counter (count, 4);
13858 emit_label (label);
13859 LABEL_NUSES (label) = 1;
13860 }
13861 gcc_assert (desired_alignment <= 8);
13862 }
13863
13864 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13865 static enum stringop_alg
13866 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13867 int *dynamic_check)
13868 {
13869 const struct stringop_algs * algs;
13870
13871 *dynamic_check = -1;
13872 if (memset)
13873 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13874 else
13875 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13876 if (stringop_alg != no_stringop)
13877 return stringop_alg;
13878 /* rep; movq or rep; movl is the smallest variant. */
13879 else if (optimize_size)
13880 {
13881 if (!count || (count & 3))
13882 return rep_prefix_1_byte;
13883 else
13884 return rep_prefix_4_byte;
13885 }
13886 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13887 */
13888 else if (expected_size != -1 && expected_size < 4)
13889 return loop_1_byte;
13890 else if (expected_size != -1)
13891 {
13892 unsigned int i;
13893 enum stringop_alg alg = libcall;
13894 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13895 {
13896 gcc_assert (algs->size[i].max);
13897 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13898 {
13899 if (algs->size[i].alg != libcall)
13900 alg = algs->size[i].alg;
13901 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13902 last non-libcall inline algorithm. */
13903 if (TARGET_INLINE_ALL_STRINGOPS)
13904 {
13905 /* When the current size is best to be copied by a libcall,
13906 but we are still forced to inline, run the heuristic bellow
13907 that will pick code for medium sized blocks. */
13908 if (alg != libcall)
13909 return alg;
13910 break;
13911 }
13912 else
13913 return algs->size[i].alg;
13914 }
13915 }
13916 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13917 }
13918 /* When asked to inline the call anyway, try to pick meaningful choice.
13919 We look for maximal size of block that is faster to copy by hand and
13920 take blocks of at most of that size guessing that average size will
13921 be roughly half of the block.
13922
13923 If this turns out to be bad, we might simply specify the preferred
13924 choice in ix86_costs. */
13925 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13926 && algs->unknown_size == libcall)
13927 {
13928 int max = -1;
13929 enum stringop_alg alg;
13930 int i;
13931
13932 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13933 if (algs->size[i].alg != libcall && algs->size[i].alg)
13934 max = algs->size[i].max;
13935 if (max == -1)
13936 max = 4096;
13937 alg = decide_alg (count, max / 2, memset, dynamic_check);
13938 gcc_assert (*dynamic_check == -1);
13939 gcc_assert (alg != libcall);
13940 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13941 *dynamic_check = max;
13942 return alg;
13943 }
13944 return algs->unknown_size;
13945 }
13946
13947 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13948 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13949 static int
13950 decide_alignment (int align,
13951 enum stringop_alg alg,
13952 int expected_size)
13953 {
13954 int desired_align = 0;
13955 switch (alg)
13956 {
13957 case no_stringop:
13958 gcc_unreachable ();
13959 case loop:
13960 case unrolled_loop:
13961 desired_align = GET_MODE_SIZE (Pmode);
13962 break;
13963 case rep_prefix_8_byte:
13964 desired_align = 8;
13965 break;
13966 case rep_prefix_4_byte:
13967 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13968 copying whole cacheline at once. */
13969 if (TARGET_PENTIUMPRO)
13970 desired_align = 8;
13971 else
13972 desired_align = 4;
13973 break;
13974 case rep_prefix_1_byte:
13975 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13976 copying whole cacheline at once. */
13977 if (TARGET_PENTIUMPRO)
13978 desired_align = 8;
13979 else
13980 desired_align = 1;
13981 break;
13982 case loop_1_byte:
13983 desired_align = 1;
13984 break;
13985 case libcall:
13986 return 0;
13987 }
13988
13989 if (optimize_size)
13990 desired_align = 1;
13991 if (desired_align < align)
13992 desired_align = align;
13993 if (expected_size != -1 && expected_size < 4)
13994 desired_align = align;
13995 return desired_align;
13996 }
13997
13998 /* Return the smallest power of 2 greater than VAL. */
13999 static int
14000 smallest_pow2_greater_than (int val)
14001 {
14002 int ret = 1;
14003 while (ret <= val)
14004 ret <<= 1;
14005 return ret;
14006 }
14007
14008 /* Expand string move (memcpy) operation. Use i386 string operations when
14009 profitable. expand_clrmem contains similar code. The code depends upon
14010 architecture, block size and alignment, but always has the same
14011 overall structure:
14012
14013 1) Prologue guard: Conditional that jumps up to epilogues for small
14014 blocks that can be handled by epilogue alone. This is faster but
14015 also needed for correctness, since prologue assume the block is larger
14016 than the desired alignment.
14017
14018 Optional dynamic check for size and libcall for large
14019 blocks is emitted here too, with -minline-stringops-dynamically.
14020
14021 2) Prologue: copy first few bytes in order to get destination aligned
14022 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14023 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14024 We emit either a jump tree on power of two sized blocks, or a byte loop.
14025
14026 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14027 with specified algorithm.
14028
14029 4) Epilogue: code copying tail of the block that is too small to be
14030 handled by main body (or up to size guarded by prologue guard). */
14031
14032 int
14033 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14034 rtx expected_align_exp, rtx expected_size_exp)
14035 {
14036 rtx destreg;
14037 rtx srcreg;
14038 rtx label = NULL;
14039 rtx tmp;
14040 rtx jump_around_label = NULL;
14041 HOST_WIDE_INT align = 1;
14042 unsigned HOST_WIDE_INT count = 0;
14043 HOST_WIDE_INT expected_size = -1;
14044 int size_needed = 0, epilogue_size_needed;
14045 int desired_align = 0;
14046 enum stringop_alg alg;
14047 int dynamic_check;
14048
14049 if (CONST_INT_P (align_exp))
14050 align = INTVAL (align_exp);
14051 /* i386 can do misaligned access on reasonably increased cost. */
14052 if (CONST_INT_P (expected_align_exp)
14053 && INTVAL (expected_align_exp) > align)
14054 align = INTVAL (expected_align_exp);
14055 if (CONST_INT_P (count_exp))
14056 count = expected_size = INTVAL (count_exp);
14057 if (CONST_INT_P (expected_size_exp) && count == 0)
14058 expected_size = INTVAL (expected_size_exp);
14059
14060 /* Step 0: Decide on preferred algorithm, desired alignment and
14061 size of chunks to be copied by main loop. */
14062
14063 alg = decide_alg (count, expected_size, false, &dynamic_check);
14064 desired_align = decide_alignment (align, alg, expected_size);
14065
14066 if (!TARGET_ALIGN_STRINGOPS)
14067 align = desired_align;
14068
14069 if (alg == libcall)
14070 return 0;
14071 gcc_assert (alg != no_stringop);
14072 if (!count)
14073 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14074 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14075 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14076 switch (alg)
14077 {
14078 case libcall:
14079 case no_stringop:
14080 gcc_unreachable ();
14081 case loop:
14082 size_needed = GET_MODE_SIZE (Pmode);
14083 break;
14084 case unrolled_loop:
14085 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14086 break;
14087 case rep_prefix_8_byte:
14088 size_needed = 8;
14089 break;
14090 case rep_prefix_4_byte:
14091 size_needed = 4;
14092 break;
14093 case rep_prefix_1_byte:
14094 case loop_1_byte:
14095 size_needed = 1;
14096 break;
14097 }
14098
14099 epilogue_size_needed = size_needed;
14100
14101 /* Step 1: Prologue guard. */
14102
14103 /* Alignment code needs count to be in register. */
14104 if (CONST_INT_P (count_exp) && desired_align > align)
14105 {
14106 enum machine_mode mode = SImode;
14107 if (TARGET_64BIT && (count & ~0xffffffff))
14108 mode = DImode;
14109 count_exp = force_reg (mode, count_exp);
14110 }
14111 gcc_assert (desired_align >= 1 && align >= 1);
14112
14113 /* Ensure that alignment prologue won't copy past end of block. */
14114 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
14115 && !count)
14116 {
14117 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14118
14119 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14120 Make sure it is power of 2. */
14121 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14122
14123 label = gen_label_rtx ();
14124 emit_cmp_and_jump_insns (count_exp,
14125 GEN_INT (epilogue_size_needed),
14126 LTU, 0, GET_MODE (count_exp), 1, label);
14127 if (expected_size == -1 || expected_size < epilogue_size_needed)
14128 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14129 else
14130 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14131 }
14132 /* Emit code to decide on runtime whether library call or inline should be
14133 used. */
14134 if (dynamic_check != -1)
14135 {
14136 rtx hot_label = gen_label_rtx ();
14137 jump_around_label = gen_label_rtx ();
14138 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14139 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14140 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14141 emit_block_move_via_libcall (dst, src, count_exp, false);
14142 emit_jump (jump_around_label);
14143 emit_label (hot_label);
14144 }
14145
14146 /* Step 2: Alignment prologue. */
14147
14148 if (desired_align > align)
14149 {
14150 /* Except for the first move in epilogue, we no longer know
14151 constant offset in aliasing info. It don't seems to worth
14152 the pain to maintain it for the first move, so throw away
14153 the info early. */
14154 src = change_address (src, BLKmode, srcreg);
14155 dst = change_address (dst, BLKmode, destreg);
14156 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14157 desired_align);
14158 }
14159 if (label && size_needed == 1)
14160 {
14161 emit_label (label);
14162 LABEL_NUSES (label) = 1;
14163 label = NULL;
14164 }
14165
14166 /* Step 3: Main loop. */
14167
14168 switch (alg)
14169 {
14170 case libcall:
14171 case no_stringop:
14172 gcc_unreachable ();
14173 case loop_1_byte:
14174 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14175 count_exp, QImode, 1, expected_size);
14176 break;
14177 case loop:
14178 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14179 count_exp, Pmode, 1, expected_size);
14180 break;
14181 case unrolled_loop:
14182 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14183 registers for 4 temporaries anyway. */
14184 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14185 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14186 expected_size);
14187 break;
14188 case rep_prefix_8_byte:
14189 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14190 DImode);
14191 break;
14192 case rep_prefix_4_byte:
14193 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14194 SImode);
14195 break;
14196 case rep_prefix_1_byte:
14197 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14198 QImode);
14199 break;
14200 }
14201 /* Adjust properly the offset of src and dest memory for aliasing. */
14202 if (CONST_INT_P (count_exp))
14203 {
14204 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14205 (count / size_needed) * size_needed);
14206 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14207 (count / size_needed) * size_needed);
14208 }
14209 else
14210 {
14211 src = change_address (src, BLKmode, srcreg);
14212 dst = change_address (dst, BLKmode, destreg);
14213 }
14214
14215 /* Step 4: Epilogue to copy the remaining bytes. */
14216
14217 if (label)
14218 {
14219 /* When the main loop is done, COUNT_EXP might hold original count,
14220 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14221 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14222 bytes. Compensate if needed. */
14223
14224 if (size_needed < epilogue_size_needed)
14225 {
14226 tmp =
14227 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14228 GEN_INT (size_needed - 1), count_exp, 1,
14229 OPTAB_DIRECT);
14230 if (tmp != count_exp)
14231 emit_move_insn (count_exp, tmp);
14232 }
14233 emit_label (label);
14234 LABEL_NUSES (label) = 1;
14235 }
14236
14237 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14238 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14239 epilogue_size_needed);
14240 if (jump_around_label)
14241 emit_label (jump_around_label);
14242 return 1;
14243 }
14244
14245 /* Helper function for memcpy. For QImode value 0xXY produce
14246 0xXYXYXYXY of wide specified by MODE. This is essentially
14247 a * 0x10101010, but we can do slightly better than
14248 synth_mult by unwinding the sequence by hand on CPUs with
14249 slow multiply. */
14250 static rtx
14251 promote_duplicated_reg (enum machine_mode mode, rtx val)
14252 {
14253 enum machine_mode valmode = GET_MODE (val);
14254 rtx tmp;
14255 int nops = mode == DImode ? 3 : 2;
14256
14257 gcc_assert (mode == SImode || mode == DImode);
14258 if (val == const0_rtx)
14259 return copy_to_mode_reg (mode, const0_rtx);
14260 if (CONST_INT_P (val))
14261 {
14262 HOST_WIDE_INT v = INTVAL (val) & 255;
14263
14264 v |= v << 8;
14265 v |= v << 16;
14266 if (mode == DImode)
14267 v |= (v << 16) << 16;
14268 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14269 }
14270
14271 if (valmode == VOIDmode)
14272 valmode = QImode;
14273 if (valmode != QImode)
14274 val = gen_lowpart (QImode, val);
14275 if (mode == QImode)
14276 return val;
14277 if (!TARGET_PARTIAL_REG_STALL)
14278 nops--;
14279 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14280 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14281 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14282 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14283 {
14284 rtx reg = convert_modes (mode, QImode, val, true);
14285 tmp = promote_duplicated_reg (mode, const1_rtx);
14286 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14287 OPTAB_DIRECT);
14288 }
14289 else
14290 {
14291 rtx reg = convert_modes (mode, QImode, val, true);
14292
14293 if (!TARGET_PARTIAL_REG_STALL)
14294 if (mode == SImode)
14295 emit_insn (gen_movsi_insv_1 (reg, reg));
14296 else
14297 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14298 else
14299 {
14300 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14301 NULL, 1, OPTAB_DIRECT);
14302 reg =
14303 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14304 }
14305 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14306 NULL, 1, OPTAB_DIRECT);
14307 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14308 if (mode == SImode)
14309 return reg;
14310 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14311 NULL, 1, OPTAB_DIRECT);
14312 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14313 return reg;
14314 }
14315 }
14316
14317 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14318 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14319 alignment from ALIGN to DESIRED_ALIGN. */
14320 static rtx
14321 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14322 {
14323 rtx promoted_val;
14324
14325 if (TARGET_64BIT
14326 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14327 promoted_val = promote_duplicated_reg (DImode, val);
14328 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14329 promoted_val = promote_duplicated_reg (SImode, val);
14330 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14331 promoted_val = promote_duplicated_reg (HImode, val);
14332 else
14333 promoted_val = val;
14334
14335 return promoted_val;
14336 }
14337
14338 /* Expand string clear operation (bzero). Use i386 string operations when
14339 profitable. See expand_movmem comment for explanation of individual
14340 steps performed. */
14341 int
14342 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14343 rtx expected_align_exp, rtx expected_size_exp)
14344 {
14345 rtx destreg;
14346 rtx label = NULL;
14347 rtx tmp;
14348 rtx jump_around_label = NULL;
14349 HOST_WIDE_INT align = 1;
14350 unsigned HOST_WIDE_INT count = 0;
14351 HOST_WIDE_INT expected_size = -1;
14352 int size_needed = 0, epilogue_size_needed;
14353 int desired_align = 0;
14354 enum stringop_alg alg;
14355 rtx promoted_val = NULL;
14356 bool force_loopy_epilogue = false;
14357 int dynamic_check;
14358
14359 if (CONST_INT_P (align_exp))
14360 align = INTVAL (align_exp);
14361 /* i386 can do misaligned access on reasonably increased cost. */
14362 if (CONST_INT_P (expected_align_exp)
14363 && INTVAL (expected_align_exp) > align)
14364 align = INTVAL (expected_align_exp);
14365 if (CONST_INT_P (count_exp))
14366 count = expected_size = INTVAL (count_exp);
14367 if (CONST_INT_P (expected_size_exp) && count == 0)
14368 expected_size = INTVAL (expected_size_exp);
14369
14370 /* Step 0: Decide on preferred algorithm, desired alignment and
14371 size of chunks to be copied by main loop. */
14372
14373 alg = decide_alg (count, expected_size, true, &dynamic_check);
14374 desired_align = decide_alignment (align, alg, expected_size);
14375
14376 if (!TARGET_ALIGN_STRINGOPS)
14377 align = desired_align;
14378
14379 if (alg == libcall)
14380 return 0;
14381 gcc_assert (alg != no_stringop);
14382 if (!count)
14383 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14384 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14385 switch (alg)
14386 {
14387 case libcall:
14388 case no_stringop:
14389 gcc_unreachable ();
14390 case loop:
14391 size_needed = GET_MODE_SIZE (Pmode);
14392 break;
14393 case unrolled_loop:
14394 size_needed = GET_MODE_SIZE (Pmode) * 4;
14395 break;
14396 case rep_prefix_8_byte:
14397 size_needed = 8;
14398 break;
14399 case rep_prefix_4_byte:
14400 size_needed = 4;
14401 break;
14402 case rep_prefix_1_byte:
14403 case loop_1_byte:
14404 size_needed = 1;
14405 break;
14406 }
14407 epilogue_size_needed = size_needed;
14408
14409 /* Step 1: Prologue guard. */
14410
14411 /* Alignment code needs count to be in register. */
14412 if (CONST_INT_P (count_exp) && desired_align > align)
14413 {
14414 enum machine_mode mode = SImode;
14415 if (TARGET_64BIT && (count & ~0xffffffff))
14416 mode = DImode;
14417 count_exp = force_reg (mode, count_exp);
14418 }
14419 /* Do the cheap promotion to allow better CSE across the
14420 main loop and epilogue (ie one load of the big constant in the
14421 front of all code. */
14422 if (CONST_INT_P (val_exp))
14423 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14424 desired_align, align);
14425 /* Ensure that alignment prologue won't copy past end of block. */
14426 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
14427 && !count)
14428 {
14429 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14430
14431 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14432 Make sure it is power of 2. */
14433 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14434
14435 /* To improve performance of small blocks, we jump around the VAL
14436 promoting mode. This mean that if the promoted VAL is not constant,
14437 we might not use it in the epilogue and have to use byte
14438 loop variant. */
14439 if (epilogue_size_needed > 2 && !promoted_val)
14440 force_loopy_epilogue = true;
14441 label = gen_label_rtx ();
14442 emit_cmp_and_jump_insns (count_exp,
14443 GEN_INT (epilogue_size_needed),
14444 LTU, 0, GET_MODE (count_exp), 1, label);
14445 if (expected_size == -1 || expected_size <= epilogue_size_needed)
14446 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14447 else
14448 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14449 }
14450 if (dynamic_check != -1)
14451 {
14452 rtx hot_label = gen_label_rtx ();
14453 jump_around_label = gen_label_rtx ();
14454 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14455 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14456 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14457 set_storage_via_libcall (dst, count_exp, val_exp, false);
14458 emit_jump (jump_around_label);
14459 emit_label (hot_label);
14460 }
14461
14462 /* Step 2: Alignment prologue. */
14463
14464 /* Do the expensive promotion once we branched off the small blocks. */
14465 if (!promoted_val)
14466 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14467 desired_align, align);
14468 gcc_assert (desired_align >= 1 && align >= 1);
14469
14470 if (desired_align > align)
14471 {
14472 /* Except for the first move in epilogue, we no longer know
14473 constant offset in aliasing info. It don't seems to worth
14474 the pain to maintain it for the first move, so throw away
14475 the info early. */
14476 dst = change_address (dst, BLKmode, destreg);
14477 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14478 desired_align);
14479 }
14480 if (label && size_needed == 1)
14481 {
14482 emit_label (label);
14483 LABEL_NUSES (label) = 1;
14484 label = NULL;
14485 }
14486
14487 /* Step 3: Main loop. */
14488
14489 switch (alg)
14490 {
14491 case libcall:
14492 case no_stringop:
14493 gcc_unreachable ();
14494 case loop_1_byte:
14495 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14496 count_exp, QImode, 1, expected_size);
14497 break;
14498 case loop:
14499 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14500 count_exp, Pmode, 1, expected_size);
14501 break;
14502 case unrolled_loop:
14503 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14504 count_exp, Pmode, 4, expected_size);
14505 break;
14506 case rep_prefix_8_byte:
14507 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14508 DImode);
14509 break;
14510 case rep_prefix_4_byte:
14511 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14512 SImode);
14513 break;
14514 case rep_prefix_1_byte:
14515 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14516 QImode);
14517 break;
14518 }
14519 /* Adjust properly the offset of src and dest memory for aliasing. */
14520 if (CONST_INT_P (count_exp))
14521 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14522 (count / size_needed) * size_needed);
14523 else
14524 dst = change_address (dst, BLKmode, destreg);
14525
14526 /* Step 4: Epilogue to copy the remaining bytes. */
14527
14528 if (label)
14529 {
14530 /* When the main loop is done, COUNT_EXP might hold original count,
14531 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14532 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14533 bytes. Compensate if needed. */
14534
14535 if (size_needed < desired_align - align)
14536 {
14537 tmp =
14538 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14539 GEN_INT (size_needed - 1), count_exp, 1,
14540 OPTAB_DIRECT);
14541 size_needed = desired_align - align + 1;
14542 if (tmp != count_exp)
14543 emit_move_insn (count_exp, tmp);
14544 }
14545 emit_label (label);
14546 LABEL_NUSES (label) = 1;
14547 }
14548 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14549 {
14550 if (force_loopy_epilogue)
14551 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14552 size_needed);
14553 else
14554 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14555 size_needed);
14556 }
14557 if (jump_around_label)
14558 emit_label (jump_around_label);
14559 return 1;
14560 }
14561
14562 /* Expand strlen. */
14563 int
14564 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14565 {
14566 rtx addr, scratch1, scratch2, scratch3, scratch4;
14567
14568 /* The generic case of strlen expander is long. Avoid it's
14569 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14570
14571 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14572 && !TARGET_INLINE_ALL_STRINGOPS
14573 && !optimize_size
14574 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14575 return 0;
14576
14577 addr = force_reg (Pmode, XEXP (src, 0));
14578 scratch1 = gen_reg_rtx (Pmode);
14579
14580 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14581 && !optimize_size)
14582 {
14583 /* Well it seems that some optimizer does not combine a call like
14584 foo(strlen(bar), strlen(bar));
14585 when the move and the subtraction is done here. It does calculate
14586 the length just once when these instructions are done inside of
14587 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14588 often used and I use one fewer register for the lifetime of
14589 output_strlen_unroll() this is better. */
14590
14591 emit_move_insn (out, addr);
14592
14593 ix86_expand_strlensi_unroll_1 (out, src, align);
14594
14595 /* strlensi_unroll_1 returns the address of the zero at the end of
14596 the string, like memchr(), so compute the length by subtracting
14597 the start address. */
14598 if (TARGET_64BIT)
14599 emit_insn (gen_subdi3 (out, out, addr));
14600 else
14601 emit_insn (gen_subsi3 (out, out, addr));
14602 }
14603 else
14604 {
14605 rtx unspec;
14606 scratch2 = gen_reg_rtx (Pmode);
14607 scratch3 = gen_reg_rtx (Pmode);
14608 scratch4 = force_reg (Pmode, constm1_rtx);
14609
14610 emit_move_insn (scratch3, addr);
14611 eoschar = force_reg (QImode, eoschar);
14612
14613 src = replace_equiv_address_nv (src, scratch3);
14614
14615 /* If .md starts supporting :P, this can be done in .md. */
14616 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14617 scratch4), UNSPEC_SCAS);
14618 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14619 if (TARGET_64BIT)
14620 {
14621 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14622 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14623 }
14624 else
14625 {
14626 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14627 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14628 }
14629 }
14630 return 1;
14631 }
14632
14633 /* Expand the appropriate insns for doing strlen if not just doing
14634 repnz; scasb
14635
14636 out = result, initialized with the start address
14637 align_rtx = alignment of the address.
14638 scratch = scratch register, initialized with the startaddress when
14639 not aligned, otherwise undefined
14640
14641 This is just the body. It needs the initializations mentioned above and
14642 some address computing at the end. These things are done in i386.md. */
14643
14644 static void
14645 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14646 {
14647 int align;
14648 rtx tmp;
14649 rtx align_2_label = NULL_RTX;
14650 rtx align_3_label = NULL_RTX;
14651 rtx align_4_label = gen_label_rtx ();
14652 rtx end_0_label = gen_label_rtx ();
14653 rtx mem;
14654 rtx tmpreg = gen_reg_rtx (SImode);
14655 rtx scratch = gen_reg_rtx (SImode);
14656 rtx cmp;
14657
14658 align = 0;
14659 if (CONST_INT_P (align_rtx))
14660 align = INTVAL (align_rtx);
14661
14662 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14663
14664 /* Is there a known alignment and is it less than 4? */
14665 if (align < 4)
14666 {
14667 rtx scratch1 = gen_reg_rtx (Pmode);
14668 emit_move_insn (scratch1, out);
14669 /* Is there a known alignment and is it not 2? */
14670 if (align != 2)
14671 {
14672 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14673 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14674
14675 /* Leave just the 3 lower bits. */
14676 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14677 NULL_RTX, 0, OPTAB_WIDEN);
14678
14679 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14680 Pmode, 1, align_4_label);
14681 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14682 Pmode, 1, align_2_label);
14683 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14684 Pmode, 1, align_3_label);
14685 }
14686 else
14687 {
14688 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14689 check if is aligned to 4 - byte. */
14690
14691 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14692 NULL_RTX, 0, OPTAB_WIDEN);
14693
14694 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14695 Pmode, 1, align_4_label);
14696 }
14697
14698 mem = change_address (src, QImode, out);
14699
14700 /* Now compare the bytes. */
14701
14702 /* Compare the first n unaligned byte on a byte per byte basis. */
14703 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14704 QImode, 1, end_0_label);
14705
14706 /* Increment the address. */
14707 if (TARGET_64BIT)
14708 emit_insn (gen_adddi3 (out, out, const1_rtx));
14709 else
14710 emit_insn (gen_addsi3 (out, out, const1_rtx));
14711
14712 /* Not needed with an alignment of 2 */
14713 if (align != 2)
14714 {
14715 emit_label (align_2_label);
14716
14717 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14718 end_0_label);
14719
14720 if (TARGET_64BIT)
14721 emit_insn (gen_adddi3 (out, out, const1_rtx));
14722 else
14723 emit_insn (gen_addsi3 (out, out, const1_rtx));
14724
14725 emit_label (align_3_label);
14726 }
14727
14728 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14729 end_0_label);
14730
14731 if (TARGET_64BIT)
14732 emit_insn (gen_adddi3 (out, out, const1_rtx));
14733 else
14734 emit_insn (gen_addsi3 (out, out, const1_rtx));
14735 }
14736
14737 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14738 align this loop. It gives only huge programs, but does not help to
14739 speed up. */
14740 emit_label (align_4_label);
14741
14742 mem = change_address (src, SImode, out);
14743 emit_move_insn (scratch, mem);
14744 if (TARGET_64BIT)
14745 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14746 else
14747 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14748
14749 /* This formula yields a nonzero result iff one of the bytes is zero.
14750 This saves three branches inside loop and many cycles. */
14751
14752 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14753 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14754 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14755 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14756 gen_int_mode (0x80808080, SImode)));
14757 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14758 align_4_label);
14759
14760 if (TARGET_CMOVE)
14761 {
14762 rtx reg = gen_reg_rtx (SImode);
14763 rtx reg2 = gen_reg_rtx (Pmode);
14764 emit_move_insn (reg, tmpreg);
14765 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14766
14767 /* If zero is not in the first two bytes, move two bytes forward. */
14768 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14769 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14770 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14771 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14772 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14773 reg,
14774 tmpreg)));
14775 /* Emit lea manually to avoid clobbering of flags. */
14776 emit_insn (gen_rtx_SET (SImode, reg2,
14777 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14778
14779 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14780 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14781 emit_insn (gen_rtx_SET (VOIDmode, out,
14782 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14783 reg2,
14784 out)));
14785
14786 }
14787 else
14788 {
14789 rtx end_2_label = gen_label_rtx ();
14790 /* Is zero in the first two bytes? */
14791
14792 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14793 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14794 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14795 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14796 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14797 pc_rtx);
14798 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14799 JUMP_LABEL (tmp) = end_2_label;
14800
14801 /* Not in the first two. Move two bytes forward. */
14802 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14803 if (TARGET_64BIT)
14804 emit_insn (gen_adddi3 (out, out, const2_rtx));
14805 else
14806 emit_insn (gen_addsi3 (out, out, const2_rtx));
14807
14808 emit_label (end_2_label);
14809
14810 }
14811
14812 /* Avoid branch in fixing the byte. */
14813 tmpreg = gen_lowpart (QImode, tmpreg);
14814 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14815 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14816 if (TARGET_64BIT)
14817 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14818 else
14819 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14820
14821 emit_label (end_0_label);
14822 }
14823
14824 void
14825 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14826 rtx callarg2 ATTRIBUTE_UNUSED,
14827 rtx pop, int sibcall)
14828 {
14829 rtx use = NULL, call;
14830
14831 if (pop == const0_rtx)
14832 pop = NULL;
14833 gcc_assert (!TARGET_64BIT || !pop);
14834
14835 if (TARGET_MACHO && !TARGET_64BIT)
14836 {
14837 #if TARGET_MACHO
14838 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14839 fnaddr = machopic_indirect_call_target (fnaddr);
14840 #endif
14841 }
14842 else
14843 {
14844 /* Static functions and indirect calls don't need the pic register. */
14845 if (! TARGET_64BIT && flag_pic
14846 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14847 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14848 use_reg (&use, pic_offset_table_rtx);
14849 }
14850
14851 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14852 {
14853 rtx al = gen_rtx_REG (QImode, 0);
14854 emit_move_insn (al, callarg2);
14855 use_reg (&use, al);
14856 }
14857
14858 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14859 {
14860 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14861 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14862 }
14863 if (sibcall && TARGET_64BIT
14864 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14865 {
14866 rtx addr;
14867 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14868 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14869 emit_move_insn (fnaddr, addr);
14870 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14871 }
14872
14873 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14874 if (retval)
14875 call = gen_rtx_SET (VOIDmode, retval, call);
14876 if (pop)
14877 {
14878 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14879 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14880 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14881 }
14882
14883 call = emit_call_insn (call);
14884 if (use)
14885 CALL_INSN_FUNCTION_USAGE (call) = use;
14886 }
14887
14888 \f
14889 /* Clear stack slot assignments remembered from previous functions.
14890 This is called from INIT_EXPANDERS once before RTL is emitted for each
14891 function. */
14892
14893 static struct machine_function *
14894 ix86_init_machine_status (void)
14895 {
14896 struct machine_function *f;
14897
14898 f = ggc_alloc_cleared (sizeof (struct machine_function));
14899 f->use_fast_prologue_epilogue_nregs = -1;
14900 f->tls_descriptor_call_expanded_p = 0;
14901
14902 return f;
14903 }
14904
14905 /* Return a MEM corresponding to a stack slot with mode MODE.
14906 Allocate a new slot if necessary.
14907
14908 The RTL for a function can have several slots available: N is
14909 which slot to use. */
14910
14911 rtx
14912 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14913 {
14914 struct stack_local_entry *s;
14915
14916 gcc_assert (n < MAX_386_STACK_LOCALS);
14917
14918 for (s = ix86_stack_locals; s; s = s->next)
14919 if (s->mode == mode && s->n == n)
14920 return copy_rtx (s->rtl);
14921
14922 s = (struct stack_local_entry *)
14923 ggc_alloc (sizeof (struct stack_local_entry));
14924 s->n = n;
14925 s->mode = mode;
14926 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14927
14928 s->next = ix86_stack_locals;
14929 ix86_stack_locals = s;
14930 return s->rtl;
14931 }
14932
14933 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14934
14935 static GTY(()) rtx ix86_tls_symbol;
14936 rtx
14937 ix86_tls_get_addr (void)
14938 {
14939
14940 if (!ix86_tls_symbol)
14941 {
14942 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14943 (TARGET_ANY_GNU_TLS
14944 && !TARGET_64BIT)
14945 ? "___tls_get_addr"
14946 : "__tls_get_addr");
14947 }
14948
14949 return ix86_tls_symbol;
14950 }
14951
14952 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14953
14954 static GTY(()) rtx ix86_tls_module_base_symbol;
14955 rtx
14956 ix86_tls_module_base (void)
14957 {
14958
14959 if (!ix86_tls_module_base_symbol)
14960 {
14961 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14962 "_TLS_MODULE_BASE_");
14963 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14964 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14965 }
14966
14967 return ix86_tls_module_base_symbol;
14968 }
14969 \f
14970 /* Calculate the length of the memory address in the instruction
14971 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14972
14973 int
14974 memory_address_length (rtx addr)
14975 {
14976 struct ix86_address parts;
14977 rtx base, index, disp;
14978 int len;
14979 int ok;
14980
14981 if (GET_CODE (addr) == PRE_DEC
14982 || GET_CODE (addr) == POST_INC
14983 || GET_CODE (addr) == PRE_MODIFY
14984 || GET_CODE (addr) == POST_MODIFY)
14985 return 0;
14986
14987 ok = ix86_decompose_address (addr, &parts);
14988 gcc_assert (ok);
14989
14990 if (parts.base && GET_CODE (parts.base) == SUBREG)
14991 parts.base = SUBREG_REG (parts.base);
14992 if (parts.index && GET_CODE (parts.index) == SUBREG)
14993 parts.index = SUBREG_REG (parts.index);
14994
14995 base = parts.base;
14996 index = parts.index;
14997 disp = parts.disp;
14998 len = 0;
14999
15000 /* Rule of thumb:
15001 - esp as the base always wants an index,
15002 - ebp as the base always wants a displacement. */
15003
15004 /* Register Indirect. */
15005 if (base && !index && !disp)
15006 {
15007 /* esp (for its index) and ebp (for its displacement) need
15008 the two-byte modrm form. */
15009 if (addr == stack_pointer_rtx
15010 || addr == arg_pointer_rtx
15011 || addr == frame_pointer_rtx
15012 || addr == hard_frame_pointer_rtx)
15013 len = 1;
15014 }
15015
15016 /* Direct Addressing. */
15017 else if (disp && !base && !index)
15018 len = 4;
15019
15020 else
15021 {
15022 /* Find the length of the displacement constant. */
15023 if (disp)
15024 {
15025 if (base && satisfies_constraint_K (disp))
15026 len = 1;
15027 else
15028 len = 4;
15029 }
15030 /* ebp always wants a displacement. */
15031 else if (base == hard_frame_pointer_rtx)
15032 len = 1;
15033
15034 /* An index requires the two-byte modrm form.... */
15035 if (index
15036 /* ...like esp, which always wants an index. */
15037 || base == stack_pointer_rtx
15038 || base == arg_pointer_rtx
15039 || base == frame_pointer_rtx)
15040 len += 1;
15041 }
15042
15043 return len;
15044 }
15045
15046 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15047 is set, expect that insn have 8bit immediate alternative. */
15048 int
15049 ix86_attr_length_immediate_default (rtx insn, int shortform)
15050 {
15051 int len = 0;
15052 int i;
15053 extract_insn_cached (insn);
15054 for (i = recog_data.n_operands - 1; i >= 0; --i)
15055 if (CONSTANT_P (recog_data.operand[i]))
15056 {
15057 gcc_assert (!len);
15058 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15059 len = 1;
15060 else
15061 {
15062 switch (get_attr_mode (insn))
15063 {
15064 case MODE_QI:
15065 len+=1;
15066 break;
15067 case MODE_HI:
15068 len+=2;
15069 break;
15070 case MODE_SI:
15071 len+=4;
15072 break;
15073 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15074 case MODE_DI:
15075 len+=4;
15076 break;
15077 default:
15078 fatal_insn ("unknown insn mode", insn);
15079 }
15080 }
15081 }
15082 return len;
15083 }
15084 /* Compute default value for "length_address" attribute. */
15085 int
15086 ix86_attr_length_address_default (rtx insn)
15087 {
15088 int i;
15089
15090 if (get_attr_type (insn) == TYPE_LEA)
15091 {
15092 rtx set = PATTERN (insn);
15093
15094 if (GET_CODE (set) == PARALLEL)
15095 set = XVECEXP (set, 0, 0);
15096
15097 gcc_assert (GET_CODE (set) == SET);
15098
15099 return memory_address_length (SET_SRC (set));
15100 }
15101
15102 extract_insn_cached (insn);
15103 for (i = recog_data.n_operands - 1; i >= 0; --i)
15104 if (MEM_P (recog_data.operand[i]))
15105 {
15106 return memory_address_length (XEXP (recog_data.operand[i], 0));
15107 break;
15108 }
15109 return 0;
15110 }
15111 \f
15112 /* Return the maximum number of instructions a cpu can issue. */
15113
15114 static int
15115 ix86_issue_rate (void)
15116 {
15117 switch (ix86_tune)
15118 {
15119 case PROCESSOR_PENTIUM:
15120 case PROCESSOR_K6:
15121 return 2;
15122
15123 case PROCESSOR_PENTIUMPRO:
15124 case PROCESSOR_PENTIUM4:
15125 case PROCESSOR_ATHLON:
15126 case PROCESSOR_K8:
15127 case PROCESSOR_AMDFAM10:
15128 case PROCESSOR_NOCONA:
15129 case PROCESSOR_GENERIC32:
15130 case PROCESSOR_GENERIC64:
15131 return 3;
15132
15133 case PROCESSOR_CORE2:
15134 return 4;
15135
15136 default:
15137 return 1;
15138 }
15139 }
15140
15141 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15142 by DEP_INSN and nothing set by DEP_INSN. */
15143
15144 static int
15145 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15146 {
15147 rtx set, set2;
15148
15149 /* Simplify the test for uninteresting insns. */
15150 if (insn_type != TYPE_SETCC
15151 && insn_type != TYPE_ICMOV
15152 && insn_type != TYPE_FCMOV
15153 && insn_type != TYPE_IBR)
15154 return 0;
15155
15156 if ((set = single_set (dep_insn)) != 0)
15157 {
15158 set = SET_DEST (set);
15159 set2 = NULL_RTX;
15160 }
15161 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15162 && XVECLEN (PATTERN (dep_insn), 0) == 2
15163 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15164 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15165 {
15166 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15167 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15168 }
15169 else
15170 return 0;
15171
15172 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15173 return 0;
15174
15175 /* This test is true if the dependent insn reads the flags but
15176 not any other potentially set register. */
15177 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15178 return 0;
15179
15180 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15181 return 0;
15182
15183 return 1;
15184 }
15185
15186 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15187 address with operands set by DEP_INSN. */
15188
15189 static int
15190 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15191 {
15192 rtx addr;
15193
15194 if (insn_type == TYPE_LEA
15195 && TARGET_PENTIUM)
15196 {
15197 addr = PATTERN (insn);
15198
15199 if (GET_CODE (addr) == PARALLEL)
15200 addr = XVECEXP (addr, 0, 0);
15201
15202 gcc_assert (GET_CODE (addr) == SET);
15203
15204 addr = SET_SRC (addr);
15205 }
15206 else
15207 {
15208 int i;
15209 extract_insn_cached (insn);
15210 for (i = recog_data.n_operands - 1; i >= 0; --i)
15211 if (MEM_P (recog_data.operand[i]))
15212 {
15213 addr = XEXP (recog_data.operand[i], 0);
15214 goto found;
15215 }
15216 return 0;
15217 found:;
15218 }
15219
15220 return modified_in_p (addr, dep_insn);
15221 }
15222
15223 static int
15224 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15225 {
15226 enum attr_type insn_type, dep_insn_type;
15227 enum attr_memory memory;
15228 rtx set, set2;
15229 int dep_insn_code_number;
15230
15231 /* Anti and output dependencies have zero cost on all CPUs. */
15232 if (REG_NOTE_KIND (link) != 0)
15233 return 0;
15234
15235 dep_insn_code_number = recog_memoized (dep_insn);
15236
15237 /* If we can't recognize the insns, we can't really do anything. */
15238 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15239 return cost;
15240
15241 insn_type = get_attr_type (insn);
15242 dep_insn_type = get_attr_type (dep_insn);
15243
15244 switch (ix86_tune)
15245 {
15246 case PROCESSOR_PENTIUM:
15247 /* Address Generation Interlock adds a cycle of latency. */
15248 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15249 cost += 1;
15250
15251 /* ??? Compares pair with jump/setcc. */
15252 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15253 cost = 0;
15254
15255 /* Floating point stores require value to be ready one cycle earlier. */
15256 if (insn_type == TYPE_FMOV
15257 && get_attr_memory (insn) == MEMORY_STORE
15258 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15259 cost += 1;
15260 break;
15261
15262 case PROCESSOR_PENTIUMPRO:
15263 memory = get_attr_memory (insn);
15264
15265 /* INT->FP conversion is expensive. */
15266 if (get_attr_fp_int_src (dep_insn))
15267 cost += 5;
15268
15269 /* There is one cycle extra latency between an FP op and a store. */
15270 if (insn_type == TYPE_FMOV
15271 && (set = single_set (dep_insn)) != NULL_RTX
15272 && (set2 = single_set (insn)) != NULL_RTX
15273 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15274 && MEM_P (SET_DEST (set2)))
15275 cost += 1;
15276
15277 /* Show ability of reorder buffer to hide latency of load by executing
15278 in parallel with previous instruction in case
15279 previous instruction is not needed to compute the address. */
15280 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15281 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15282 {
15283 /* Claim moves to take one cycle, as core can issue one load
15284 at time and the next load can start cycle later. */
15285 if (dep_insn_type == TYPE_IMOV
15286 || dep_insn_type == TYPE_FMOV)
15287 cost = 1;
15288 else if (cost > 1)
15289 cost--;
15290 }
15291 break;
15292
15293 case PROCESSOR_K6:
15294 memory = get_attr_memory (insn);
15295
15296 /* The esp dependency is resolved before the instruction is really
15297 finished. */
15298 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15299 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15300 return 1;
15301
15302 /* INT->FP conversion is expensive. */
15303 if (get_attr_fp_int_src (dep_insn))
15304 cost += 5;
15305
15306 /* Show ability of reorder buffer to hide latency of load by executing
15307 in parallel with previous instruction in case
15308 previous instruction is not needed to compute the address. */
15309 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15310 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15311 {
15312 /* Claim moves to take one cycle, as core can issue one load
15313 at time and the next load can start cycle later. */
15314 if (dep_insn_type == TYPE_IMOV
15315 || dep_insn_type == TYPE_FMOV)
15316 cost = 1;
15317 else if (cost > 2)
15318 cost -= 2;
15319 else
15320 cost = 1;
15321 }
15322 break;
15323
15324 case PROCESSOR_ATHLON:
15325 case PROCESSOR_K8:
15326 case PROCESSOR_AMDFAM10:
15327 case PROCESSOR_GENERIC32:
15328 case PROCESSOR_GENERIC64:
15329 memory = get_attr_memory (insn);
15330
15331 /* Show ability of reorder buffer to hide latency of load by executing
15332 in parallel with previous instruction in case
15333 previous instruction is not needed to compute the address. */
15334 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15335 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15336 {
15337 enum attr_unit unit = get_attr_unit (insn);
15338 int loadcost = 3;
15339
15340 /* Because of the difference between the length of integer and
15341 floating unit pipeline preparation stages, the memory operands
15342 for floating point are cheaper.
15343
15344 ??? For Athlon it the difference is most probably 2. */
15345 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15346 loadcost = 3;
15347 else
15348 loadcost = TARGET_ATHLON ? 2 : 0;
15349
15350 if (cost >= loadcost)
15351 cost -= loadcost;
15352 else
15353 cost = 0;
15354 }
15355
15356 default:
15357 break;
15358 }
15359
15360 return cost;
15361 }
15362
15363 /* How many alternative schedules to try. This should be as wide as the
15364 scheduling freedom in the DFA, but no wider. Making this value too
15365 large results extra work for the scheduler. */
15366
15367 static int
15368 ia32_multipass_dfa_lookahead (void)
15369 {
15370 if (ix86_tune == PROCESSOR_PENTIUM)
15371 return 2;
15372
15373 if (ix86_tune == PROCESSOR_PENTIUMPRO
15374 || ix86_tune == PROCESSOR_K6)
15375 return 1;
15376
15377 else
15378 return 0;
15379 }
15380
15381 \f
15382 /* Compute the alignment given to a constant that is being placed in memory.
15383 EXP is the constant and ALIGN is the alignment that the object would
15384 ordinarily have.
15385 The value of this function is used instead of that alignment to align
15386 the object. */
15387
15388 int
15389 ix86_constant_alignment (tree exp, int align)
15390 {
15391 if (TREE_CODE (exp) == REAL_CST)
15392 {
15393 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15394 return 64;
15395 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15396 return 128;
15397 }
15398 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15399 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15400 return BITS_PER_WORD;
15401
15402 return align;
15403 }
15404
15405 /* Compute the alignment for a static variable.
15406 TYPE is the data type, and ALIGN is the alignment that
15407 the object would ordinarily have. The value of this function is used
15408 instead of that alignment to align the object. */
15409
15410 int
15411 ix86_data_alignment (tree type, int align)
15412 {
15413 int max_align = optimize_size ? BITS_PER_WORD : 256;
15414
15415 if (AGGREGATE_TYPE_P (type)
15416 && TYPE_SIZE (type)
15417 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15418 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15419 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15420 && align < max_align)
15421 align = max_align;
15422
15423 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15424 to 16byte boundary. */
15425 if (TARGET_64BIT)
15426 {
15427 if (AGGREGATE_TYPE_P (type)
15428 && TYPE_SIZE (type)
15429 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15430 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15431 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15432 return 128;
15433 }
15434
15435 if (TREE_CODE (type) == ARRAY_TYPE)
15436 {
15437 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15438 return 64;
15439 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15440 return 128;
15441 }
15442 else if (TREE_CODE (type) == COMPLEX_TYPE)
15443 {
15444
15445 if (TYPE_MODE (type) == DCmode && align < 64)
15446 return 64;
15447 if (TYPE_MODE (type) == XCmode && align < 128)
15448 return 128;
15449 }
15450 else if ((TREE_CODE (type) == RECORD_TYPE
15451 || TREE_CODE (type) == UNION_TYPE
15452 || TREE_CODE (type) == QUAL_UNION_TYPE)
15453 && TYPE_FIELDS (type))
15454 {
15455 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15456 return 64;
15457 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15458 return 128;
15459 }
15460 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15461 || TREE_CODE (type) == INTEGER_TYPE)
15462 {
15463 if (TYPE_MODE (type) == DFmode && align < 64)
15464 return 64;
15465 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15466 return 128;
15467 }
15468
15469 return align;
15470 }
15471
15472 /* Compute the alignment for a local variable.
15473 TYPE is the data type, and ALIGN is the alignment that
15474 the object would ordinarily have. The value of this macro is used
15475 instead of that alignment to align the object. */
15476
15477 int
15478 ix86_local_alignment (tree type, int align)
15479 {
15480 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15481 to 16byte boundary. */
15482 if (TARGET_64BIT)
15483 {
15484 if (AGGREGATE_TYPE_P (type)
15485 && TYPE_SIZE (type)
15486 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15487 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15488 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15489 return 128;
15490 }
15491 if (TREE_CODE (type) == ARRAY_TYPE)
15492 {
15493 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15494 return 64;
15495 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15496 return 128;
15497 }
15498 else if (TREE_CODE (type) == COMPLEX_TYPE)
15499 {
15500 if (TYPE_MODE (type) == DCmode && align < 64)
15501 return 64;
15502 if (TYPE_MODE (type) == XCmode && align < 128)
15503 return 128;
15504 }
15505 else if ((TREE_CODE (type) == RECORD_TYPE
15506 || TREE_CODE (type) == UNION_TYPE
15507 || TREE_CODE (type) == QUAL_UNION_TYPE)
15508 && TYPE_FIELDS (type))
15509 {
15510 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15511 return 64;
15512 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15513 return 128;
15514 }
15515 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15516 || TREE_CODE (type) == INTEGER_TYPE)
15517 {
15518
15519 if (TYPE_MODE (type) == DFmode && align < 64)
15520 return 64;
15521 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15522 return 128;
15523 }
15524 return align;
15525 }
15526 \f
15527 /* Emit RTL insns to initialize the variable parts of a trampoline.
15528 FNADDR is an RTX for the address of the function's pure code.
15529 CXT is an RTX for the static chain value for the function. */
15530 void
15531 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15532 {
15533 if (!TARGET_64BIT)
15534 {
15535 /* Compute offset from the end of the jmp to the target function. */
15536 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15537 plus_constant (tramp, 10),
15538 NULL_RTX, 1, OPTAB_DIRECT);
15539 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15540 gen_int_mode (0xb9, QImode));
15541 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15542 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15543 gen_int_mode (0xe9, QImode));
15544 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15545 }
15546 else
15547 {
15548 int offset = 0;
15549 /* Try to load address using shorter movl instead of movabs.
15550 We may want to support movq for kernel mode, but kernel does not use
15551 trampolines at the moment. */
15552 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15553 {
15554 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15555 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15556 gen_int_mode (0xbb41, HImode));
15557 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15558 gen_lowpart (SImode, fnaddr));
15559 offset += 6;
15560 }
15561 else
15562 {
15563 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15564 gen_int_mode (0xbb49, HImode));
15565 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15566 fnaddr);
15567 offset += 10;
15568 }
15569 /* Load static chain using movabs to r10. */
15570 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15571 gen_int_mode (0xba49, HImode));
15572 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15573 cxt);
15574 offset += 10;
15575 /* Jump to the r11 */
15576 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15577 gen_int_mode (0xff49, HImode));
15578 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15579 gen_int_mode (0xe3, QImode));
15580 offset += 3;
15581 gcc_assert (offset <= TRAMPOLINE_SIZE);
15582 }
15583
15584 #ifdef ENABLE_EXECUTE_STACK
15585 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15586 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15587 #endif
15588 }
15589 \f
15590 /* Codes for all the SSE/MMX builtins. */
15591 enum ix86_builtins
15592 {
15593 IX86_BUILTIN_ADDPS,
15594 IX86_BUILTIN_ADDSS,
15595 IX86_BUILTIN_DIVPS,
15596 IX86_BUILTIN_DIVSS,
15597 IX86_BUILTIN_MULPS,
15598 IX86_BUILTIN_MULSS,
15599 IX86_BUILTIN_SUBPS,
15600 IX86_BUILTIN_SUBSS,
15601
15602 IX86_BUILTIN_CMPEQPS,
15603 IX86_BUILTIN_CMPLTPS,
15604 IX86_BUILTIN_CMPLEPS,
15605 IX86_BUILTIN_CMPGTPS,
15606 IX86_BUILTIN_CMPGEPS,
15607 IX86_BUILTIN_CMPNEQPS,
15608 IX86_BUILTIN_CMPNLTPS,
15609 IX86_BUILTIN_CMPNLEPS,
15610 IX86_BUILTIN_CMPNGTPS,
15611 IX86_BUILTIN_CMPNGEPS,
15612 IX86_BUILTIN_CMPORDPS,
15613 IX86_BUILTIN_CMPUNORDPS,
15614 IX86_BUILTIN_CMPEQSS,
15615 IX86_BUILTIN_CMPLTSS,
15616 IX86_BUILTIN_CMPLESS,
15617 IX86_BUILTIN_CMPNEQSS,
15618 IX86_BUILTIN_CMPNLTSS,
15619 IX86_BUILTIN_CMPNLESS,
15620 IX86_BUILTIN_CMPNGTSS,
15621 IX86_BUILTIN_CMPNGESS,
15622 IX86_BUILTIN_CMPORDSS,
15623 IX86_BUILTIN_CMPUNORDSS,
15624
15625 IX86_BUILTIN_COMIEQSS,
15626 IX86_BUILTIN_COMILTSS,
15627 IX86_BUILTIN_COMILESS,
15628 IX86_BUILTIN_COMIGTSS,
15629 IX86_BUILTIN_COMIGESS,
15630 IX86_BUILTIN_COMINEQSS,
15631 IX86_BUILTIN_UCOMIEQSS,
15632 IX86_BUILTIN_UCOMILTSS,
15633 IX86_BUILTIN_UCOMILESS,
15634 IX86_BUILTIN_UCOMIGTSS,
15635 IX86_BUILTIN_UCOMIGESS,
15636 IX86_BUILTIN_UCOMINEQSS,
15637
15638 IX86_BUILTIN_CVTPI2PS,
15639 IX86_BUILTIN_CVTPS2PI,
15640 IX86_BUILTIN_CVTSI2SS,
15641 IX86_BUILTIN_CVTSI642SS,
15642 IX86_BUILTIN_CVTSS2SI,
15643 IX86_BUILTIN_CVTSS2SI64,
15644 IX86_BUILTIN_CVTTPS2PI,
15645 IX86_BUILTIN_CVTTSS2SI,
15646 IX86_BUILTIN_CVTTSS2SI64,
15647
15648 IX86_BUILTIN_MAXPS,
15649 IX86_BUILTIN_MAXSS,
15650 IX86_BUILTIN_MINPS,
15651 IX86_BUILTIN_MINSS,
15652
15653 IX86_BUILTIN_LOADUPS,
15654 IX86_BUILTIN_STOREUPS,
15655 IX86_BUILTIN_MOVSS,
15656
15657 IX86_BUILTIN_MOVHLPS,
15658 IX86_BUILTIN_MOVLHPS,
15659 IX86_BUILTIN_LOADHPS,
15660 IX86_BUILTIN_LOADLPS,
15661 IX86_BUILTIN_STOREHPS,
15662 IX86_BUILTIN_STORELPS,
15663
15664 IX86_BUILTIN_MASKMOVQ,
15665 IX86_BUILTIN_MOVMSKPS,
15666 IX86_BUILTIN_PMOVMSKB,
15667
15668 IX86_BUILTIN_MOVNTPS,
15669 IX86_BUILTIN_MOVNTQ,
15670
15671 IX86_BUILTIN_LOADDQU,
15672 IX86_BUILTIN_STOREDQU,
15673
15674 IX86_BUILTIN_PACKSSWB,
15675 IX86_BUILTIN_PACKSSDW,
15676 IX86_BUILTIN_PACKUSWB,
15677
15678 IX86_BUILTIN_PADDB,
15679 IX86_BUILTIN_PADDW,
15680 IX86_BUILTIN_PADDD,
15681 IX86_BUILTIN_PADDQ,
15682 IX86_BUILTIN_PADDSB,
15683 IX86_BUILTIN_PADDSW,
15684 IX86_BUILTIN_PADDUSB,
15685 IX86_BUILTIN_PADDUSW,
15686 IX86_BUILTIN_PSUBB,
15687 IX86_BUILTIN_PSUBW,
15688 IX86_BUILTIN_PSUBD,
15689 IX86_BUILTIN_PSUBQ,
15690 IX86_BUILTIN_PSUBSB,
15691 IX86_BUILTIN_PSUBSW,
15692 IX86_BUILTIN_PSUBUSB,
15693 IX86_BUILTIN_PSUBUSW,
15694
15695 IX86_BUILTIN_PAND,
15696 IX86_BUILTIN_PANDN,
15697 IX86_BUILTIN_POR,
15698 IX86_BUILTIN_PXOR,
15699
15700 IX86_BUILTIN_PAVGB,
15701 IX86_BUILTIN_PAVGW,
15702
15703 IX86_BUILTIN_PCMPEQB,
15704 IX86_BUILTIN_PCMPEQW,
15705 IX86_BUILTIN_PCMPEQD,
15706 IX86_BUILTIN_PCMPGTB,
15707 IX86_BUILTIN_PCMPGTW,
15708 IX86_BUILTIN_PCMPGTD,
15709
15710 IX86_BUILTIN_PMADDWD,
15711
15712 IX86_BUILTIN_PMAXSW,
15713 IX86_BUILTIN_PMAXUB,
15714 IX86_BUILTIN_PMINSW,
15715 IX86_BUILTIN_PMINUB,
15716
15717 IX86_BUILTIN_PMULHUW,
15718 IX86_BUILTIN_PMULHW,
15719 IX86_BUILTIN_PMULLW,
15720
15721 IX86_BUILTIN_PSADBW,
15722 IX86_BUILTIN_PSHUFW,
15723
15724 IX86_BUILTIN_PSLLW,
15725 IX86_BUILTIN_PSLLD,
15726 IX86_BUILTIN_PSLLQ,
15727 IX86_BUILTIN_PSRAW,
15728 IX86_BUILTIN_PSRAD,
15729 IX86_BUILTIN_PSRLW,
15730 IX86_BUILTIN_PSRLD,
15731 IX86_BUILTIN_PSRLQ,
15732 IX86_BUILTIN_PSLLWI,
15733 IX86_BUILTIN_PSLLDI,
15734 IX86_BUILTIN_PSLLQI,
15735 IX86_BUILTIN_PSRAWI,
15736 IX86_BUILTIN_PSRADI,
15737 IX86_BUILTIN_PSRLWI,
15738 IX86_BUILTIN_PSRLDI,
15739 IX86_BUILTIN_PSRLQI,
15740
15741 IX86_BUILTIN_PUNPCKHBW,
15742 IX86_BUILTIN_PUNPCKHWD,
15743 IX86_BUILTIN_PUNPCKHDQ,
15744 IX86_BUILTIN_PUNPCKLBW,
15745 IX86_BUILTIN_PUNPCKLWD,
15746 IX86_BUILTIN_PUNPCKLDQ,
15747
15748 IX86_BUILTIN_SHUFPS,
15749
15750 IX86_BUILTIN_RCPPS,
15751 IX86_BUILTIN_RCPSS,
15752 IX86_BUILTIN_RSQRTPS,
15753 IX86_BUILTIN_RSQRTSS,
15754 IX86_BUILTIN_SQRTPS,
15755 IX86_BUILTIN_SQRTSS,
15756
15757 IX86_BUILTIN_UNPCKHPS,
15758 IX86_BUILTIN_UNPCKLPS,
15759
15760 IX86_BUILTIN_ANDPS,
15761 IX86_BUILTIN_ANDNPS,
15762 IX86_BUILTIN_ORPS,
15763 IX86_BUILTIN_XORPS,
15764
15765 IX86_BUILTIN_EMMS,
15766 IX86_BUILTIN_LDMXCSR,
15767 IX86_BUILTIN_STMXCSR,
15768 IX86_BUILTIN_SFENCE,
15769
15770 /* 3DNow! Original */
15771 IX86_BUILTIN_FEMMS,
15772 IX86_BUILTIN_PAVGUSB,
15773 IX86_BUILTIN_PF2ID,
15774 IX86_BUILTIN_PFACC,
15775 IX86_BUILTIN_PFADD,
15776 IX86_BUILTIN_PFCMPEQ,
15777 IX86_BUILTIN_PFCMPGE,
15778 IX86_BUILTIN_PFCMPGT,
15779 IX86_BUILTIN_PFMAX,
15780 IX86_BUILTIN_PFMIN,
15781 IX86_BUILTIN_PFMUL,
15782 IX86_BUILTIN_PFRCP,
15783 IX86_BUILTIN_PFRCPIT1,
15784 IX86_BUILTIN_PFRCPIT2,
15785 IX86_BUILTIN_PFRSQIT1,
15786 IX86_BUILTIN_PFRSQRT,
15787 IX86_BUILTIN_PFSUB,
15788 IX86_BUILTIN_PFSUBR,
15789 IX86_BUILTIN_PI2FD,
15790 IX86_BUILTIN_PMULHRW,
15791
15792 /* 3DNow! Athlon Extensions */
15793 IX86_BUILTIN_PF2IW,
15794 IX86_BUILTIN_PFNACC,
15795 IX86_BUILTIN_PFPNACC,
15796 IX86_BUILTIN_PI2FW,
15797 IX86_BUILTIN_PSWAPDSI,
15798 IX86_BUILTIN_PSWAPDSF,
15799
15800 /* SSE2 */
15801 IX86_BUILTIN_ADDPD,
15802 IX86_BUILTIN_ADDSD,
15803 IX86_BUILTIN_DIVPD,
15804 IX86_BUILTIN_DIVSD,
15805 IX86_BUILTIN_MULPD,
15806 IX86_BUILTIN_MULSD,
15807 IX86_BUILTIN_SUBPD,
15808 IX86_BUILTIN_SUBSD,
15809
15810 IX86_BUILTIN_CMPEQPD,
15811 IX86_BUILTIN_CMPLTPD,
15812 IX86_BUILTIN_CMPLEPD,
15813 IX86_BUILTIN_CMPGTPD,
15814 IX86_BUILTIN_CMPGEPD,
15815 IX86_BUILTIN_CMPNEQPD,
15816 IX86_BUILTIN_CMPNLTPD,
15817 IX86_BUILTIN_CMPNLEPD,
15818 IX86_BUILTIN_CMPNGTPD,
15819 IX86_BUILTIN_CMPNGEPD,
15820 IX86_BUILTIN_CMPORDPD,
15821 IX86_BUILTIN_CMPUNORDPD,
15822 IX86_BUILTIN_CMPNEPD,
15823 IX86_BUILTIN_CMPEQSD,
15824 IX86_BUILTIN_CMPLTSD,
15825 IX86_BUILTIN_CMPLESD,
15826 IX86_BUILTIN_CMPNEQSD,
15827 IX86_BUILTIN_CMPNLTSD,
15828 IX86_BUILTIN_CMPNLESD,
15829 IX86_BUILTIN_CMPORDSD,
15830 IX86_BUILTIN_CMPUNORDSD,
15831 IX86_BUILTIN_CMPNESD,
15832
15833 IX86_BUILTIN_COMIEQSD,
15834 IX86_BUILTIN_COMILTSD,
15835 IX86_BUILTIN_COMILESD,
15836 IX86_BUILTIN_COMIGTSD,
15837 IX86_BUILTIN_COMIGESD,
15838 IX86_BUILTIN_COMINEQSD,
15839 IX86_BUILTIN_UCOMIEQSD,
15840 IX86_BUILTIN_UCOMILTSD,
15841 IX86_BUILTIN_UCOMILESD,
15842 IX86_BUILTIN_UCOMIGTSD,
15843 IX86_BUILTIN_UCOMIGESD,
15844 IX86_BUILTIN_UCOMINEQSD,
15845
15846 IX86_BUILTIN_MAXPD,
15847 IX86_BUILTIN_MAXSD,
15848 IX86_BUILTIN_MINPD,
15849 IX86_BUILTIN_MINSD,
15850
15851 IX86_BUILTIN_ANDPD,
15852 IX86_BUILTIN_ANDNPD,
15853 IX86_BUILTIN_ORPD,
15854 IX86_BUILTIN_XORPD,
15855
15856 IX86_BUILTIN_SQRTPD,
15857 IX86_BUILTIN_SQRTSD,
15858
15859 IX86_BUILTIN_UNPCKHPD,
15860 IX86_BUILTIN_UNPCKLPD,
15861
15862 IX86_BUILTIN_SHUFPD,
15863
15864 IX86_BUILTIN_LOADUPD,
15865 IX86_BUILTIN_STOREUPD,
15866 IX86_BUILTIN_MOVSD,
15867
15868 IX86_BUILTIN_LOADHPD,
15869 IX86_BUILTIN_LOADLPD,
15870
15871 IX86_BUILTIN_CVTDQ2PD,
15872 IX86_BUILTIN_CVTDQ2PS,
15873
15874 IX86_BUILTIN_CVTPD2DQ,
15875 IX86_BUILTIN_CVTPD2PI,
15876 IX86_BUILTIN_CVTPD2PS,
15877 IX86_BUILTIN_CVTTPD2DQ,
15878 IX86_BUILTIN_CVTTPD2PI,
15879
15880 IX86_BUILTIN_CVTPI2PD,
15881 IX86_BUILTIN_CVTSI2SD,
15882 IX86_BUILTIN_CVTSI642SD,
15883
15884 IX86_BUILTIN_CVTSD2SI,
15885 IX86_BUILTIN_CVTSD2SI64,
15886 IX86_BUILTIN_CVTSD2SS,
15887 IX86_BUILTIN_CVTSS2SD,
15888 IX86_BUILTIN_CVTTSD2SI,
15889 IX86_BUILTIN_CVTTSD2SI64,
15890
15891 IX86_BUILTIN_CVTPS2DQ,
15892 IX86_BUILTIN_CVTPS2PD,
15893 IX86_BUILTIN_CVTTPS2DQ,
15894
15895 IX86_BUILTIN_MOVNTI,
15896 IX86_BUILTIN_MOVNTPD,
15897 IX86_BUILTIN_MOVNTDQ,
15898
15899 /* SSE2 MMX */
15900 IX86_BUILTIN_MASKMOVDQU,
15901 IX86_BUILTIN_MOVMSKPD,
15902 IX86_BUILTIN_PMOVMSKB128,
15903
15904 IX86_BUILTIN_PACKSSWB128,
15905 IX86_BUILTIN_PACKSSDW128,
15906 IX86_BUILTIN_PACKUSWB128,
15907
15908 IX86_BUILTIN_PADDB128,
15909 IX86_BUILTIN_PADDW128,
15910 IX86_BUILTIN_PADDD128,
15911 IX86_BUILTIN_PADDQ128,
15912 IX86_BUILTIN_PADDSB128,
15913 IX86_BUILTIN_PADDSW128,
15914 IX86_BUILTIN_PADDUSB128,
15915 IX86_BUILTIN_PADDUSW128,
15916 IX86_BUILTIN_PSUBB128,
15917 IX86_BUILTIN_PSUBW128,
15918 IX86_BUILTIN_PSUBD128,
15919 IX86_BUILTIN_PSUBQ128,
15920 IX86_BUILTIN_PSUBSB128,
15921 IX86_BUILTIN_PSUBSW128,
15922 IX86_BUILTIN_PSUBUSB128,
15923 IX86_BUILTIN_PSUBUSW128,
15924
15925 IX86_BUILTIN_PAND128,
15926 IX86_BUILTIN_PANDN128,
15927 IX86_BUILTIN_POR128,
15928 IX86_BUILTIN_PXOR128,
15929
15930 IX86_BUILTIN_PAVGB128,
15931 IX86_BUILTIN_PAVGW128,
15932
15933 IX86_BUILTIN_PCMPEQB128,
15934 IX86_BUILTIN_PCMPEQW128,
15935 IX86_BUILTIN_PCMPEQD128,
15936 IX86_BUILTIN_PCMPGTB128,
15937 IX86_BUILTIN_PCMPGTW128,
15938 IX86_BUILTIN_PCMPGTD128,
15939
15940 IX86_BUILTIN_PMADDWD128,
15941
15942 IX86_BUILTIN_PMAXSW128,
15943 IX86_BUILTIN_PMAXUB128,
15944 IX86_BUILTIN_PMINSW128,
15945 IX86_BUILTIN_PMINUB128,
15946
15947 IX86_BUILTIN_PMULUDQ,
15948 IX86_BUILTIN_PMULUDQ128,
15949 IX86_BUILTIN_PMULHUW128,
15950 IX86_BUILTIN_PMULHW128,
15951 IX86_BUILTIN_PMULLW128,
15952
15953 IX86_BUILTIN_PSADBW128,
15954 IX86_BUILTIN_PSHUFHW,
15955 IX86_BUILTIN_PSHUFLW,
15956 IX86_BUILTIN_PSHUFD,
15957
15958 IX86_BUILTIN_PSLLW128,
15959 IX86_BUILTIN_PSLLD128,
15960 IX86_BUILTIN_PSLLQ128,
15961 IX86_BUILTIN_PSRAW128,
15962 IX86_BUILTIN_PSRAD128,
15963 IX86_BUILTIN_PSRLW128,
15964 IX86_BUILTIN_PSRLD128,
15965 IX86_BUILTIN_PSRLQ128,
15966 IX86_BUILTIN_PSLLDQI128,
15967 IX86_BUILTIN_PSLLWI128,
15968 IX86_BUILTIN_PSLLDI128,
15969 IX86_BUILTIN_PSLLQI128,
15970 IX86_BUILTIN_PSRAWI128,
15971 IX86_BUILTIN_PSRADI128,
15972 IX86_BUILTIN_PSRLDQI128,
15973 IX86_BUILTIN_PSRLWI128,
15974 IX86_BUILTIN_PSRLDI128,
15975 IX86_BUILTIN_PSRLQI128,
15976
15977 IX86_BUILTIN_PUNPCKHBW128,
15978 IX86_BUILTIN_PUNPCKHWD128,
15979 IX86_BUILTIN_PUNPCKHDQ128,
15980 IX86_BUILTIN_PUNPCKHQDQ128,
15981 IX86_BUILTIN_PUNPCKLBW128,
15982 IX86_BUILTIN_PUNPCKLWD128,
15983 IX86_BUILTIN_PUNPCKLDQ128,
15984 IX86_BUILTIN_PUNPCKLQDQ128,
15985
15986 IX86_BUILTIN_CLFLUSH,
15987 IX86_BUILTIN_MFENCE,
15988 IX86_BUILTIN_LFENCE,
15989
15990 /* Prescott New Instructions. */
15991 IX86_BUILTIN_ADDSUBPS,
15992 IX86_BUILTIN_HADDPS,
15993 IX86_BUILTIN_HSUBPS,
15994 IX86_BUILTIN_MOVSHDUP,
15995 IX86_BUILTIN_MOVSLDUP,
15996 IX86_BUILTIN_ADDSUBPD,
15997 IX86_BUILTIN_HADDPD,
15998 IX86_BUILTIN_HSUBPD,
15999 IX86_BUILTIN_LDDQU,
16000
16001 IX86_BUILTIN_MONITOR,
16002 IX86_BUILTIN_MWAIT,
16003
16004 /* SSSE3. */
16005 IX86_BUILTIN_PHADDW,
16006 IX86_BUILTIN_PHADDD,
16007 IX86_BUILTIN_PHADDSW,
16008 IX86_BUILTIN_PHSUBW,
16009 IX86_BUILTIN_PHSUBD,
16010 IX86_BUILTIN_PHSUBSW,
16011 IX86_BUILTIN_PMADDUBSW,
16012 IX86_BUILTIN_PMULHRSW,
16013 IX86_BUILTIN_PSHUFB,
16014 IX86_BUILTIN_PSIGNB,
16015 IX86_BUILTIN_PSIGNW,
16016 IX86_BUILTIN_PSIGND,
16017 IX86_BUILTIN_PALIGNR,
16018 IX86_BUILTIN_PABSB,
16019 IX86_BUILTIN_PABSW,
16020 IX86_BUILTIN_PABSD,
16021
16022 IX86_BUILTIN_PHADDW128,
16023 IX86_BUILTIN_PHADDD128,
16024 IX86_BUILTIN_PHADDSW128,
16025 IX86_BUILTIN_PHSUBW128,
16026 IX86_BUILTIN_PHSUBD128,
16027 IX86_BUILTIN_PHSUBSW128,
16028 IX86_BUILTIN_PMADDUBSW128,
16029 IX86_BUILTIN_PMULHRSW128,
16030 IX86_BUILTIN_PSHUFB128,
16031 IX86_BUILTIN_PSIGNB128,
16032 IX86_BUILTIN_PSIGNW128,
16033 IX86_BUILTIN_PSIGND128,
16034 IX86_BUILTIN_PALIGNR128,
16035 IX86_BUILTIN_PABSB128,
16036 IX86_BUILTIN_PABSW128,
16037 IX86_BUILTIN_PABSD128,
16038
16039 /* AMDFAM10 - SSE4A New Instructions. */
16040 IX86_BUILTIN_MOVNTSD,
16041 IX86_BUILTIN_MOVNTSS,
16042 IX86_BUILTIN_EXTRQI,
16043 IX86_BUILTIN_EXTRQ,
16044 IX86_BUILTIN_INSERTQI,
16045 IX86_BUILTIN_INSERTQ,
16046
16047 IX86_BUILTIN_VEC_INIT_V2SI,
16048 IX86_BUILTIN_VEC_INIT_V4HI,
16049 IX86_BUILTIN_VEC_INIT_V8QI,
16050 IX86_BUILTIN_VEC_EXT_V2DF,
16051 IX86_BUILTIN_VEC_EXT_V2DI,
16052 IX86_BUILTIN_VEC_EXT_V4SF,
16053 IX86_BUILTIN_VEC_EXT_V4SI,
16054 IX86_BUILTIN_VEC_EXT_V8HI,
16055 IX86_BUILTIN_VEC_EXT_V2SI,
16056 IX86_BUILTIN_VEC_EXT_V4HI,
16057 IX86_BUILTIN_VEC_SET_V8HI,
16058 IX86_BUILTIN_VEC_SET_V4HI,
16059
16060 IX86_BUILTIN_MAX
16061 };
16062
16063 /* Table for the ix86 builtin decls. */
16064 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16065
16066 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16067 * if the target_flags include one of MASK. Stores the function decl
16068 * in the ix86_builtins array.
16069 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16070
16071 static inline tree
16072 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16073 {
16074 tree decl = NULL_TREE;
16075
16076 if (mask & target_flags
16077 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16078 {
16079 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16080 NULL, NULL_TREE);
16081 ix86_builtins[(int) code] = decl;
16082 }
16083
16084 return decl;
16085 }
16086
16087 /* Like def_builtin, but also marks the function decl "const". */
16088
16089 static inline tree
16090 def_builtin_const (int mask, const char *name, tree type,
16091 enum ix86_builtins code)
16092 {
16093 tree decl = def_builtin (mask, name, type, code);
16094 if (decl)
16095 TREE_READONLY (decl) = 1;
16096 return decl;
16097 }
16098
16099 /* Bits for builtin_description.flag. */
16100
16101 /* Set when we don't support the comparison natively, and should
16102 swap_comparison in order to support it. */
16103 #define BUILTIN_DESC_SWAP_OPERANDS 1
16104
16105 struct builtin_description
16106 {
16107 const unsigned int mask;
16108 const enum insn_code icode;
16109 const char *const name;
16110 const enum ix86_builtins code;
16111 const enum rtx_code comparison;
16112 const unsigned int flag;
16113 };
16114
16115 static const struct builtin_description bdesc_comi[] =
16116 {
16117 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16118 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16119 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16120 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16121 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16122 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16123 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16124 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16125 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16126 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16127 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16128 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16129 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16130 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16131 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16132 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16133 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16134 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16135 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16136 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16137 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16138 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16139 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16140 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16141 };
16142
16143 static const struct builtin_description bdesc_2arg[] =
16144 {
16145 /* SSE */
16146 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16147 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16148 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16149 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16150 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16151 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16152 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16153 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16154
16155 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16156 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16157 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16158 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16159 BUILTIN_DESC_SWAP_OPERANDS },
16160 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16161 BUILTIN_DESC_SWAP_OPERANDS },
16162 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16163 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16164 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16165 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16166 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16167 BUILTIN_DESC_SWAP_OPERANDS },
16168 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16169 BUILTIN_DESC_SWAP_OPERANDS },
16170 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16171 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16172 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16173 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16174 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16175 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16176 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16177 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16178 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16179 BUILTIN_DESC_SWAP_OPERANDS },
16180 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16181 BUILTIN_DESC_SWAP_OPERANDS },
16182 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16183
16184 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16185 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16186 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16187 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16188
16189 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16190 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16191 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16192 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16193
16194 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16195 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16196 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16197 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16198 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16199
16200 /* MMX */
16201 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16202 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16203 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16204 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16205 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16206 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16207 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16208 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16209
16210 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16211 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16212 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16213 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16214 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16215 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16216 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16217 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16218
16219 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16220 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16221 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16222
16223 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16224 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16225 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16226 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16227
16228 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16229 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16230
16231 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16232 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16233 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16234 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16235 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16236 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16237
16238 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16239 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16240 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16241 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16242
16243 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16244 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16245 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16246 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16247 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16248 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16249
16250 /* Special. */
16251 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16252 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16253 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16254
16255 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16256 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16257 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16258
16259 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16260 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16261 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16262 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16263 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16264 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16265
16266 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16267 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16268 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16269 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16270 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16271 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16272
16273 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16274 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16275 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16276 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16277
16278 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16279 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16280
16281 /* SSE2 */
16282 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16283 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16284 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16285 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16286 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16287 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16288 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16289 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16290
16291 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16292 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16293 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16294 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16295 BUILTIN_DESC_SWAP_OPERANDS },
16296 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16297 BUILTIN_DESC_SWAP_OPERANDS },
16298 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16299 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16300 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16301 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16302 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16303 BUILTIN_DESC_SWAP_OPERANDS },
16304 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16305 BUILTIN_DESC_SWAP_OPERANDS },
16306 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16307 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16308 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16309 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16310 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16311 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16312 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16313 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16314 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16315
16316 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16317 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16318 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16319 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16320
16321 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16322 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16323 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16324 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16325
16326 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16327 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16328 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16329
16330 /* SSE2 MMX */
16331 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16332 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16333 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16334 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16335 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16336 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16337 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16338 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16339
16340 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16341 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16342 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16343 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16344 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16345 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16346 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16347 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16348
16349 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16350 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16351
16352 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16353 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16354 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16355 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16356
16357 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16358 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16359
16360 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16361 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16362 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16363 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16364 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16365 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16366
16367 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16368 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16369 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16370 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16371
16372 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16373 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16374 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16375 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16376 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16377 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16378 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16379 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16380
16381 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16382 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16383 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16384
16385 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16386 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16387
16388 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16389 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16390
16391 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16392 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16393 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16394
16395 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16396 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16397 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16398
16399 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16400 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16401
16402 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16403
16404 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16405 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16406 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16407 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16408
16409 /* SSE3 MMX */
16410 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16411 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16412 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16413 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16414 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16415 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16416
16417 /* SSSE3 */
16418 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16419 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16420 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16421 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16422 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16423 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16424 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16425 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16426 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16427 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16428 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16429 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16430 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16431 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16432 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16433 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16434 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16435 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16436 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16437 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16438 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16439 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16440 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16441 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16442 };
16443
16444 static const struct builtin_description bdesc_1arg[] =
16445 {
16446 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16447 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16448
16449 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16450 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16451 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16452
16453 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16454 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16455 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16456 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16457 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16458 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16459
16460 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16461 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16462
16463 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16464
16465 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16466 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16467
16468 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16469 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16470 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16471 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16472 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16473
16474 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16475
16476 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16477 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16478 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16479 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16480
16481 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16482 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16483 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16484
16485 /* SSE3 */
16486 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
16487 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
16488
16489 /* SSSE3 */
16490 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16491 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16492 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16493 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16494 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16495 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16496 };
16497
16498 static void
16499 ix86_init_builtins (void)
16500 {
16501 if (TARGET_MMX)
16502 ix86_init_mmx_sse_builtins ();
16503 }
16504
16505 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16506 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16507 builtins. */
16508 static void
16509 ix86_init_mmx_sse_builtins (void)
16510 {
16511 const struct builtin_description * d;
16512 size_t i;
16513
16514 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16515 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16516 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16517 tree V2DI_type_node
16518 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16519 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16520 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16521 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16522 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16523 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16524 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16525
16526 tree pchar_type_node = build_pointer_type (char_type_node);
16527 tree pcchar_type_node = build_pointer_type (
16528 build_type_variant (char_type_node, 1, 0));
16529 tree pfloat_type_node = build_pointer_type (float_type_node);
16530 tree pcfloat_type_node = build_pointer_type (
16531 build_type_variant (float_type_node, 1, 0));
16532 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16533 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16534 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16535
16536 /* Comparisons. */
16537 tree int_ftype_v4sf_v4sf
16538 = build_function_type_list (integer_type_node,
16539 V4SF_type_node, V4SF_type_node, NULL_TREE);
16540 tree v4si_ftype_v4sf_v4sf
16541 = build_function_type_list (V4SI_type_node,
16542 V4SF_type_node, V4SF_type_node, NULL_TREE);
16543 /* MMX/SSE/integer conversions. */
16544 tree int_ftype_v4sf
16545 = build_function_type_list (integer_type_node,
16546 V4SF_type_node, NULL_TREE);
16547 tree int64_ftype_v4sf
16548 = build_function_type_list (long_long_integer_type_node,
16549 V4SF_type_node, NULL_TREE);
16550 tree int_ftype_v8qi
16551 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16552 tree v4sf_ftype_v4sf_int
16553 = build_function_type_list (V4SF_type_node,
16554 V4SF_type_node, integer_type_node, NULL_TREE);
16555 tree v4sf_ftype_v4sf_int64
16556 = build_function_type_list (V4SF_type_node,
16557 V4SF_type_node, long_long_integer_type_node,
16558 NULL_TREE);
16559 tree v4sf_ftype_v4sf_v2si
16560 = build_function_type_list (V4SF_type_node,
16561 V4SF_type_node, V2SI_type_node, NULL_TREE);
16562
16563 /* Miscellaneous. */
16564 tree v8qi_ftype_v4hi_v4hi
16565 = build_function_type_list (V8QI_type_node,
16566 V4HI_type_node, V4HI_type_node, NULL_TREE);
16567 tree v4hi_ftype_v2si_v2si
16568 = build_function_type_list (V4HI_type_node,
16569 V2SI_type_node, V2SI_type_node, NULL_TREE);
16570 tree v4sf_ftype_v4sf_v4sf_int
16571 = build_function_type_list (V4SF_type_node,
16572 V4SF_type_node, V4SF_type_node,
16573 integer_type_node, NULL_TREE);
16574 tree v2si_ftype_v4hi_v4hi
16575 = build_function_type_list (V2SI_type_node,
16576 V4HI_type_node, V4HI_type_node, NULL_TREE);
16577 tree v4hi_ftype_v4hi_int
16578 = build_function_type_list (V4HI_type_node,
16579 V4HI_type_node, integer_type_node, NULL_TREE);
16580 tree v4hi_ftype_v4hi_di
16581 = build_function_type_list (V4HI_type_node,
16582 V4HI_type_node, long_long_unsigned_type_node,
16583 NULL_TREE);
16584 tree v2si_ftype_v2si_di
16585 = build_function_type_list (V2SI_type_node,
16586 V2SI_type_node, long_long_unsigned_type_node,
16587 NULL_TREE);
16588 tree void_ftype_void
16589 = build_function_type (void_type_node, void_list_node);
16590 tree void_ftype_unsigned
16591 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16592 tree void_ftype_unsigned_unsigned
16593 = build_function_type_list (void_type_node, unsigned_type_node,
16594 unsigned_type_node, NULL_TREE);
16595 tree void_ftype_pcvoid_unsigned_unsigned
16596 = build_function_type_list (void_type_node, const_ptr_type_node,
16597 unsigned_type_node, unsigned_type_node,
16598 NULL_TREE);
16599 tree unsigned_ftype_void
16600 = build_function_type (unsigned_type_node, void_list_node);
16601 tree v2si_ftype_v4sf
16602 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16603 /* Loads/stores. */
16604 tree void_ftype_v8qi_v8qi_pchar
16605 = build_function_type_list (void_type_node,
16606 V8QI_type_node, V8QI_type_node,
16607 pchar_type_node, NULL_TREE);
16608 tree v4sf_ftype_pcfloat
16609 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16610 /* @@@ the type is bogus */
16611 tree v4sf_ftype_v4sf_pv2si
16612 = build_function_type_list (V4SF_type_node,
16613 V4SF_type_node, pv2si_type_node, NULL_TREE);
16614 tree void_ftype_pv2si_v4sf
16615 = build_function_type_list (void_type_node,
16616 pv2si_type_node, V4SF_type_node, NULL_TREE);
16617 tree void_ftype_pfloat_v4sf
16618 = build_function_type_list (void_type_node,
16619 pfloat_type_node, V4SF_type_node, NULL_TREE);
16620 tree void_ftype_pdi_di
16621 = build_function_type_list (void_type_node,
16622 pdi_type_node, long_long_unsigned_type_node,
16623 NULL_TREE);
16624 tree void_ftype_pv2di_v2di
16625 = build_function_type_list (void_type_node,
16626 pv2di_type_node, V2DI_type_node, NULL_TREE);
16627 /* Normal vector unops. */
16628 tree v4sf_ftype_v4sf
16629 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16630 tree v16qi_ftype_v16qi
16631 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16632 tree v8hi_ftype_v8hi
16633 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16634 tree v4si_ftype_v4si
16635 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16636 tree v8qi_ftype_v8qi
16637 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16638 tree v4hi_ftype_v4hi
16639 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16640
16641 /* Normal vector binops. */
16642 tree v4sf_ftype_v4sf_v4sf
16643 = build_function_type_list (V4SF_type_node,
16644 V4SF_type_node, V4SF_type_node, NULL_TREE);
16645 tree v8qi_ftype_v8qi_v8qi
16646 = build_function_type_list (V8QI_type_node,
16647 V8QI_type_node, V8QI_type_node, NULL_TREE);
16648 tree v4hi_ftype_v4hi_v4hi
16649 = build_function_type_list (V4HI_type_node,
16650 V4HI_type_node, V4HI_type_node, NULL_TREE);
16651 tree v2si_ftype_v2si_v2si
16652 = build_function_type_list (V2SI_type_node,
16653 V2SI_type_node, V2SI_type_node, NULL_TREE);
16654 tree di_ftype_di_di
16655 = build_function_type_list (long_long_unsigned_type_node,
16656 long_long_unsigned_type_node,
16657 long_long_unsigned_type_node, NULL_TREE);
16658
16659 tree di_ftype_di_di_int
16660 = build_function_type_list (long_long_unsigned_type_node,
16661 long_long_unsigned_type_node,
16662 long_long_unsigned_type_node,
16663 integer_type_node, NULL_TREE);
16664
16665 tree v2si_ftype_v2sf
16666 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16667 tree v2sf_ftype_v2si
16668 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16669 tree v2si_ftype_v2si
16670 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16671 tree v2sf_ftype_v2sf
16672 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16673 tree v2sf_ftype_v2sf_v2sf
16674 = build_function_type_list (V2SF_type_node,
16675 V2SF_type_node, V2SF_type_node, NULL_TREE);
16676 tree v2si_ftype_v2sf_v2sf
16677 = build_function_type_list (V2SI_type_node,
16678 V2SF_type_node, V2SF_type_node, NULL_TREE);
16679 tree pint_type_node = build_pointer_type (integer_type_node);
16680 tree pdouble_type_node = build_pointer_type (double_type_node);
16681 tree pcdouble_type_node = build_pointer_type (
16682 build_type_variant (double_type_node, 1, 0));
16683 tree int_ftype_v2df_v2df
16684 = build_function_type_list (integer_type_node,
16685 V2DF_type_node, V2DF_type_node, NULL_TREE);
16686
16687 tree void_ftype_pcvoid
16688 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16689 tree v4sf_ftype_v4si
16690 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16691 tree v4si_ftype_v4sf
16692 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16693 tree v2df_ftype_v4si
16694 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16695 tree v4si_ftype_v2df
16696 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16697 tree v2si_ftype_v2df
16698 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16699 tree v4sf_ftype_v2df
16700 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16701 tree v2df_ftype_v2si
16702 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16703 tree v2df_ftype_v4sf
16704 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16705 tree int_ftype_v2df
16706 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16707 tree int64_ftype_v2df
16708 = build_function_type_list (long_long_integer_type_node,
16709 V2DF_type_node, NULL_TREE);
16710 tree v2df_ftype_v2df_int
16711 = build_function_type_list (V2DF_type_node,
16712 V2DF_type_node, integer_type_node, NULL_TREE);
16713 tree v2df_ftype_v2df_int64
16714 = build_function_type_list (V2DF_type_node,
16715 V2DF_type_node, long_long_integer_type_node,
16716 NULL_TREE);
16717 tree v4sf_ftype_v4sf_v2df
16718 = build_function_type_list (V4SF_type_node,
16719 V4SF_type_node, V2DF_type_node, NULL_TREE);
16720 tree v2df_ftype_v2df_v4sf
16721 = build_function_type_list (V2DF_type_node,
16722 V2DF_type_node, V4SF_type_node, NULL_TREE);
16723 tree v2df_ftype_v2df_v2df_int
16724 = build_function_type_list (V2DF_type_node,
16725 V2DF_type_node, V2DF_type_node,
16726 integer_type_node,
16727 NULL_TREE);
16728 tree v2df_ftype_v2df_pcdouble
16729 = build_function_type_list (V2DF_type_node,
16730 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16731 tree void_ftype_pdouble_v2df
16732 = build_function_type_list (void_type_node,
16733 pdouble_type_node, V2DF_type_node, NULL_TREE);
16734 tree void_ftype_pint_int
16735 = build_function_type_list (void_type_node,
16736 pint_type_node, integer_type_node, NULL_TREE);
16737 tree void_ftype_v16qi_v16qi_pchar
16738 = build_function_type_list (void_type_node,
16739 V16QI_type_node, V16QI_type_node,
16740 pchar_type_node, NULL_TREE);
16741 tree v2df_ftype_pcdouble
16742 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16743 tree v2df_ftype_v2df_v2df
16744 = build_function_type_list (V2DF_type_node,
16745 V2DF_type_node, V2DF_type_node, NULL_TREE);
16746 tree v16qi_ftype_v16qi_v16qi
16747 = build_function_type_list (V16QI_type_node,
16748 V16QI_type_node, V16QI_type_node, NULL_TREE);
16749 tree v8hi_ftype_v8hi_v8hi
16750 = build_function_type_list (V8HI_type_node,
16751 V8HI_type_node, V8HI_type_node, NULL_TREE);
16752 tree v4si_ftype_v4si_v4si
16753 = build_function_type_list (V4SI_type_node,
16754 V4SI_type_node, V4SI_type_node, NULL_TREE);
16755 tree v2di_ftype_v2di_v2di
16756 = build_function_type_list (V2DI_type_node,
16757 V2DI_type_node, V2DI_type_node, NULL_TREE);
16758 tree v2di_ftype_v2df_v2df
16759 = build_function_type_list (V2DI_type_node,
16760 V2DF_type_node, V2DF_type_node, NULL_TREE);
16761 tree v2df_ftype_v2df
16762 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16763 tree v2di_ftype_v2di_int
16764 = build_function_type_list (V2DI_type_node,
16765 V2DI_type_node, integer_type_node, NULL_TREE);
16766 tree v2di_ftype_v2di_v2di_int
16767 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16768 V2DI_type_node, integer_type_node, NULL_TREE);
16769 tree v4si_ftype_v4si_int
16770 = build_function_type_list (V4SI_type_node,
16771 V4SI_type_node, integer_type_node, NULL_TREE);
16772 tree v8hi_ftype_v8hi_int
16773 = build_function_type_list (V8HI_type_node,
16774 V8HI_type_node, integer_type_node, NULL_TREE);
16775 tree v8hi_ftype_v8hi_v2di
16776 = build_function_type_list (V8HI_type_node,
16777 V8HI_type_node, V2DI_type_node, NULL_TREE);
16778 tree v4si_ftype_v4si_v2di
16779 = build_function_type_list (V4SI_type_node,
16780 V4SI_type_node, V2DI_type_node, NULL_TREE);
16781 tree v4si_ftype_v8hi_v8hi
16782 = build_function_type_list (V4SI_type_node,
16783 V8HI_type_node, V8HI_type_node, NULL_TREE);
16784 tree di_ftype_v8qi_v8qi
16785 = build_function_type_list (long_long_unsigned_type_node,
16786 V8QI_type_node, V8QI_type_node, NULL_TREE);
16787 tree di_ftype_v2si_v2si
16788 = build_function_type_list (long_long_unsigned_type_node,
16789 V2SI_type_node, V2SI_type_node, NULL_TREE);
16790 tree v2di_ftype_v16qi_v16qi
16791 = build_function_type_list (V2DI_type_node,
16792 V16QI_type_node, V16QI_type_node, NULL_TREE);
16793 tree v2di_ftype_v4si_v4si
16794 = build_function_type_list (V2DI_type_node,
16795 V4SI_type_node, V4SI_type_node, NULL_TREE);
16796 tree int_ftype_v16qi
16797 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16798 tree v16qi_ftype_pcchar
16799 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16800 tree void_ftype_pchar_v16qi
16801 = build_function_type_list (void_type_node,
16802 pchar_type_node, V16QI_type_node, NULL_TREE);
16803
16804 tree v2di_ftype_v2di_unsigned_unsigned
16805 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16806 unsigned_type_node, unsigned_type_node,
16807 NULL_TREE);
16808 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16809 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16810 unsigned_type_node, unsigned_type_node,
16811 NULL_TREE);
16812 tree v2di_ftype_v2di_v16qi
16813 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16814 NULL_TREE);
16815
16816 tree float80_type;
16817 tree float128_type;
16818 tree ftype;
16819
16820 /* The __float80 type. */
16821 if (TYPE_MODE (long_double_type_node) == XFmode)
16822 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16823 "__float80");
16824 else
16825 {
16826 /* The __float80 type. */
16827 float80_type = make_node (REAL_TYPE);
16828 TYPE_PRECISION (float80_type) = 80;
16829 layout_type (float80_type);
16830 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16831 }
16832
16833 if (TARGET_64BIT)
16834 {
16835 float128_type = make_node (REAL_TYPE);
16836 TYPE_PRECISION (float128_type) = 128;
16837 layout_type (float128_type);
16838 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16839 }
16840
16841 /* Add all builtins that are more or less simple operations on two
16842 operands. */
16843 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16844 {
16845 /* Use one of the operands; the target can have a different mode for
16846 mask-generating compares. */
16847 enum machine_mode mode;
16848 tree type;
16849
16850 if (d->name == 0)
16851 continue;
16852 mode = insn_data[d->icode].operand[1].mode;
16853
16854 switch (mode)
16855 {
16856 case V16QImode:
16857 type = v16qi_ftype_v16qi_v16qi;
16858 break;
16859 case V8HImode:
16860 type = v8hi_ftype_v8hi_v8hi;
16861 break;
16862 case V4SImode:
16863 type = v4si_ftype_v4si_v4si;
16864 break;
16865 case V2DImode:
16866 type = v2di_ftype_v2di_v2di;
16867 break;
16868 case V2DFmode:
16869 type = v2df_ftype_v2df_v2df;
16870 break;
16871 case V4SFmode:
16872 type = v4sf_ftype_v4sf_v4sf;
16873 break;
16874 case V8QImode:
16875 type = v8qi_ftype_v8qi_v8qi;
16876 break;
16877 case V4HImode:
16878 type = v4hi_ftype_v4hi_v4hi;
16879 break;
16880 case V2SImode:
16881 type = v2si_ftype_v2si_v2si;
16882 break;
16883 case DImode:
16884 type = di_ftype_di_di;
16885 break;
16886
16887 default:
16888 gcc_unreachable ();
16889 }
16890
16891 /* Override for comparisons. */
16892 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16893 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16894 type = v4si_ftype_v4sf_v4sf;
16895
16896 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16897 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16898 type = v2di_ftype_v2df_v2df;
16899
16900 def_builtin (d->mask, d->name, type, d->code);
16901 }
16902
16903 /* Add all builtins that are more or less simple operations on 1 operand. */
16904 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16905 {
16906 enum machine_mode mode;
16907 tree type;
16908
16909 if (d->name == 0)
16910 continue;
16911 mode = insn_data[d->icode].operand[1].mode;
16912
16913 switch (mode)
16914 {
16915 case V16QImode:
16916 type = v16qi_ftype_v16qi;
16917 break;
16918 case V8HImode:
16919 type = v8hi_ftype_v8hi;
16920 break;
16921 case V4SImode:
16922 type = v4si_ftype_v4si;
16923 break;
16924 case V2DFmode:
16925 type = v2df_ftype_v2df;
16926 break;
16927 case V4SFmode:
16928 type = v4sf_ftype_v4sf;
16929 break;
16930 case V8QImode:
16931 type = v8qi_ftype_v8qi;
16932 break;
16933 case V4HImode:
16934 type = v4hi_ftype_v4hi;
16935 break;
16936 case V2SImode:
16937 type = v2si_ftype_v2si;
16938 break;
16939
16940 default:
16941 abort ();
16942 }
16943
16944 def_builtin (d->mask, d->name, type, d->code);
16945 }
16946
16947 /* Add the remaining MMX insns with somewhat more complicated types. */
16948 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16949 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16950 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16951 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16952
16953 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16954 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16955 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16956
16957 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16958 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16959
16960 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16961 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16962
16963 /* comi/ucomi insns. */
16964 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16965 if (d->mask == MASK_SSE2)
16966 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16967 else
16968 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16969
16970 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16971 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16972 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16973
16974 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16975 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16976 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16977 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16978 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16979 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16980 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16981 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16982 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16983 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16984 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16985
16986 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16987
16988 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16989 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16990
16991 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16992 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
16993 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
16994 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
16995
16996 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
16997 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
16998 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
16999 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17000
17001 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17002
17003 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17004
17005 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17006 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17007 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17008 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17009 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17010 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17011
17012 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17013
17014 /* Original 3DNow! */
17015 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17016 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17017 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17018 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17019 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17020 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17021 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17022 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17023 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17024 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17025 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17026 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17027 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17028 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17029 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17030 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17031 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17032 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17033 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17034 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17035
17036 /* 3DNow! extension as used in the Athlon CPU. */
17037 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17038 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17039 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17040 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17041 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17042 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17043
17044 /* SSE2 */
17045 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17046
17047 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17048 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17049
17050 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17051 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17052
17053 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17054 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17055 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17056 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17057 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17058
17059 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17060 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17061 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17062 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17063
17064 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17065 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17066
17067 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17068
17069 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17070 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17071
17072 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17073 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17074 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17075 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17076 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17077
17078 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17079
17080 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17081 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17082 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17083 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17084
17085 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17086 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17087 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17088
17089 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17090 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17091 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17092 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17093
17094 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17095 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17096 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17097
17098 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17099 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17100
17101 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17102 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17103
17104 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17105 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17106 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17107
17108 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17109 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17110 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17111
17112 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17113 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17114
17115 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17116 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17117 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17118 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17119
17120 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17121 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17122 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17123 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17124
17125 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17126 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17127
17128 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17129
17130 /* Prescott New Instructions. */
17131 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17132 void_ftype_pcvoid_unsigned_unsigned,
17133 IX86_BUILTIN_MONITOR);
17134 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17135 void_ftype_unsigned_unsigned,
17136 IX86_BUILTIN_MWAIT);
17137 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
17138 v4sf_ftype_v4sf,
17139 IX86_BUILTIN_MOVSHDUP);
17140 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
17141 v4sf_ftype_v4sf,
17142 IX86_BUILTIN_MOVSLDUP);
17143 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17144 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17145
17146 /* SSSE3. */
17147 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17148 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17149 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17150 IX86_BUILTIN_PALIGNR);
17151
17152 /* AMDFAM10 SSE4A New built-ins */
17153 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17154 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17155 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17156 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17157 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17158 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17159 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17160 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17161 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17162 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17163 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17164 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17165
17166 /* Access to the vec_init patterns. */
17167 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17168 integer_type_node, NULL_TREE);
17169 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17170 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17171
17172 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17173 short_integer_type_node,
17174 short_integer_type_node,
17175 short_integer_type_node, NULL_TREE);
17176 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17177 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17178
17179 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17180 char_type_node, char_type_node,
17181 char_type_node, char_type_node,
17182 char_type_node, char_type_node,
17183 char_type_node, NULL_TREE);
17184 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17185 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17186
17187 /* Access to the vec_extract patterns. */
17188 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17189 integer_type_node, NULL_TREE);
17190 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17191 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17192
17193 ftype = build_function_type_list (long_long_integer_type_node,
17194 V2DI_type_node, integer_type_node,
17195 NULL_TREE);
17196 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17197 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17198
17199 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17200 integer_type_node, NULL_TREE);
17201 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17202 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17203
17204 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17205 integer_type_node, NULL_TREE);
17206 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17207 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17208
17209 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17210 integer_type_node, NULL_TREE);
17211 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17212 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17213
17214 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17215 integer_type_node, NULL_TREE);
17216 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17217 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17218
17219 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17220 integer_type_node, NULL_TREE);
17221 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17222 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17223
17224 /* Access to the vec_set patterns. */
17225 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17226 intHI_type_node,
17227 integer_type_node, NULL_TREE);
17228 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17229 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17230
17231 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17232 intHI_type_node,
17233 integer_type_node, NULL_TREE);
17234 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17235 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17236 }
17237
17238 /* Errors in the source file can cause expand_expr to return const0_rtx
17239 where we expect a vector. To avoid crashing, use one of the vector
17240 clear instructions. */
17241 static rtx
17242 safe_vector_operand (rtx x, enum machine_mode mode)
17243 {
17244 if (x == const0_rtx)
17245 x = CONST0_RTX (mode);
17246 return x;
17247 }
17248
17249 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17250
17251 static rtx
17252 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
17253 {
17254 rtx pat, xops[3];
17255 tree arg0 = TREE_VALUE (arglist);
17256 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17257 rtx op0 = expand_normal (arg0);
17258 rtx op1 = expand_normal (arg1);
17259 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17260 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17261 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17262
17263 if (VECTOR_MODE_P (mode0))
17264 op0 = safe_vector_operand (op0, mode0);
17265 if (VECTOR_MODE_P (mode1))
17266 op1 = safe_vector_operand (op1, mode1);
17267
17268 if (optimize || !target
17269 || GET_MODE (target) != tmode
17270 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17271 target = gen_reg_rtx (tmode);
17272
17273 if (GET_MODE (op1) == SImode && mode1 == TImode)
17274 {
17275 rtx x = gen_reg_rtx (V4SImode);
17276 emit_insn (gen_sse2_loadd (x, op1));
17277 op1 = gen_lowpart (TImode, x);
17278 }
17279
17280 /* The insn must want input operands in the same modes as the
17281 result. */
17282 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17283 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17284
17285 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17286 op0 = copy_to_mode_reg (mode0, op0);
17287 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17288 op1 = copy_to_mode_reg (mode1, op1);
17289
17290 /* ??? Using ix86_fixup_binary_operands is problematic when
17291 we've got mismatched modes. Fake it. */
17292
17293 xops[0] = target;
17294 xops[1] = op0;
17295 xops[2] = op1;
17296
17297 if (tmode == mode0 && tmode == mode1)
17298 {
17299 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17300 op0 = xops[1];
17301 op1 = xops[2];
17302 }
17303 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17304 {
17305 op0 = force_reg (mode0, op0);
17306 op1 = force_reg (mode1, op1);
17307 target = gen_reg_rtx (tmode);
17308 }
17309
17310 pat = GEN_FCN (icode) (target, op0, op1);
17311 if (! pat)
17312 return 0;
17313 emit_insn (pat);
17314 return target;
17315 }
17316
17317 /* Subroutine of ix86_expand_builtin to take care of stores. */
17318
17319 static rtx
17320 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
17321 {
17322 rtx pat;
17323 tree arg0 = TREE_VALUE (arglist);
17324 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17325 rtx op0 = expand_normal (arg0);
17326 rtx op1 = expand_normal (arg1);
17327 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17328 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17329
17330 if (VECTOR_MODE_P (mode1))
17331 op1 = safe_vector_operand (op1, mode1);
17332
17333 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17334 op1 = copy_to_mode_reg (mode1, op1);
17335
17336 pat = GEN_FCN (icode) (op0, op1);
17337 if (pat)
17338 emit_insn (pat);
17339 return 0;
17340 }
17341
17342 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17343
17344 static rtx
17345 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
17346 rtx target, int do_load)
17347 {
17348 rtx pat;
17349 tree arg0 = TREE_VALUE (arglist);
17350 rtx op0 = expand_normal (arg0);
17351 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17352 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17353
17354 if (optimize || !target
17355 || GET_MODE (target) != tmode
17356 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17357 target = gen_reg_rtx (tmode);
17358 if (do_load)
17359 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17360 else
17361 {
17362 if (VECTOR_MODE_P (mode0))
17363 op0 = safe_vector_operand (op0, mode0);
17364
17365 if ((optimize && !register_operand (op0, mode0))
17366 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17367 op0 = copy_to_mode_reg (mode0, op0);
17368 }
17369
17370 pat = GEN_FCN (icode) (target, op0);
17371 if (! pat)
17372 return 0;
17373 emit_insn (pat);
17374 return target;
17375 }
17376
17377 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17378 sqrtss, rsqrtss, rcpss. */
17379
17380 static rtx
17381 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
17382 {
17383 rtx pat;
17384 tree arg0 = TREE_VALUE (arglist);
17385 rtx op1, op0 = expand_normal (arg0);
17386 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17387 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17388
17389 if (optimize || !target
17390 || GET_MODE (target) != tmode
17391 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17392 target = gen_reg_rtx (tmode);
17393
17394 if (VECTOR_MODE_P (mode0))
17395 op0 = safe_vector_operand (op0, mode0);
17396
17397 if ((optimize && !register_operand (op0, mode0))
17398 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17399 op0 = copy_to_mode_reg (mode0, op0);
17400
17401 op1 = op0;
17402 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17403 op1 = copy_to_mode_reg (mode0, op1);
17404
17405 pat = GEN_FCN (icode) (target, op0, op1);
17406 if (! pat)
17407 return 0;
17408 emit_insn (pat);
17409 return target;
17410 }
17411
17412 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17413
17414 static rtx
17415 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
17416 rtx target)
17417 {
17418 rtx pat;
17419 tree arg0 = TREE_VALUE (arglist);
17420 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17421 rtx op0 = expand_normal (arg0);
17422 rtx op1 = expand_normal (arg1);
17423 rtx op2;
17424 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17425 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17426 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17427 enum rtx_code comparison = d->comparison;
17428
17429 if (VECTOR_MODE_P (mode0))
17430 op0 = safe_vector_operand (op0, mode0);
17431 if (VECTOR_MODE_P (mode1))
17432 op1 = safe_vector_operand (op1, mode1);
17433
17434 /* Swap operands if we have a comparison that isn't available in
17435 hardware. */
17436 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17437 {
17438 rtx tmp = gen_reg_rtx (mode1);
17439 emit_move_insn (tmp, op1);
17440 op1 = op0;
17441 op0 = tmp;
17442 }
17443
17444 if (optimize || !target
17445 || GET_MODE (target) != tmode
17446 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17447 target = gen_reg_rtx (tmode);
17448
17449 if ((optimize && !register_operand (op0, mode0))
17450 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17451 op0 = copy_to_mode_reg (mode0, op0);
17452 if ((optimize && !register_operand (op1, mode1))
17453 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17454 op1 = copy_to_mode_reg (mode1, op1);
17455
17456 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17457 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17458 if (! pat)
17459 return 0;
17460 emit_insn (pat);
17461 return target;
17462 }
17463
17464 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17465
17466 static rtx
17467 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
17468 rtx target)
17469 {
17470 rtx pat;
17471 tree arg0 = TREE_VALUE (arglist);
17472 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17473 rtx op0 = expand_normal (arg0);
17474 rtx op1 = expand_normal (arg1);
17475 rtx op2;
17476 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17477 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17478 enum rtx_code comparison = d->comparison;
17479
17480 if (VECTOR_MODE_P (mode0))
17481 op0 = safe_vector_operand (op0, mode0);
17482 if (VECTOR_MODE_P (mode1))
17483 op1 = safe_vector_operand (op1, mode1);
17484
17485 /* Swap operands if we have a comparison that isn't available in
17486 hardware. */
17487 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17488 {
17489 rtx tmp = op1;
17490 op1 = op0;
17491 op0 = tmp;
17492 }
17493
17494 target = gen_reg_rtx (SImode);
17495 emit_move_insn (target, const0_rtx);
17496 target = gen_rtx_SUBREG (QImode, target, 0);
17497
17498 if ((optimize && !register_operand (op0, mode0))
17499 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17500 op0 = copy_to_mode_reg (mode0, op0);
17501 if ((optimize && !register_operand (op1, mode1))
17502 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17503 op1 = copy_to_mode_reg (mode1, op1);
17504
17505 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17506 pat = GEN_FCN (d->icode) (op0, op1);
17507 if (! pat)
17508 return 0;
17509 emit_insn (pat);
17510 emit_insn (gen_rtx_SET (VOIDmode,
17511 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17512 gen_rtx_fmt_ee (comparison, QImode,
17513 SET_DEST (pat),
17514 const0_rtx)));
17515
17516 return SUBREG_REG (target);
17517 }
17518
17519 /* Return the integer constant in ARG. Constrain it to be in the range
17520 of the subparts of VEC_TYPE; issue an error if not. */
17521
17522 static int
17523 get_element_number (tree vec_type, tree arg)
17524 {
17525 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17526
17527 if (!host_integerp (arg, 1)
17528 || (elt = tree_low_cst (arg, 1), elt > max))
17529 {
17530 error ("selector must be an integer constant in the range 0..%wi", max);
17531 return 0;
17532 }
17533
17534 return elt;
17535 }
17536
17537 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17538 ix86_expand_vector_init. We DO have language-level syntax for this, in
17539 the form of (type){ init-list }. Except that since we can't place emms
17540 instructions from inside the compiler, we can't allow the use of MMX
17541 registers unless the user explicitly asks for it. So we do *not* define
17542 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17543 we have builtins invoked by mmintrin.h that gives us license to emit
17544 these sorts of instructions. */
17545
17546 static rtx
17547 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
17548 {
17549 enum machine_mode tmode = TYPE_MODE (type);
17550 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17551 int i, n_elt = GET_MODE_NUNITS (tmode);
17552 rtvec v = rtvec_alloc (n_elt);
17553
17554 gcc_assert (VECTOR_MODE_P (tmode));
17555
17556 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
17557 {
17558 rtx x = expand_normal (TREE_VALUE (arglist));
17559 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17560 }
17561
17562 gcc_assert (arglist == NULL);
17563
17564 if (!target || !register_operand (target, tmode))
17565 target = gen_reg_rtx (tmode);
17566
17567 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17568 return target;
17569 }
17570
17571 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17572 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17573 had a language-level syntax for referencing vector elements. */
17574
17575 static rtx
17576 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
17577 {
17578 enum machine_mode tmode, mode0;
17579 tree arg0, arg1;
17580 int elt;
17581 rtx op0;
17582
17583 arg0 = TREE_VALUE (arglist);
17584 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17585
17586 op0 = expand_normal (arg0);
17587 elt = get_element_number (TREE_TYPE (arg0), arg1);
17588
17589 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17590 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17591 gcc_assert (VECTOR_MODE_P (mode0));
17592
17593 op0 = force_reg (mode0, op0);
17594
17595 if (optimize || !target || !register_operand (target, tmode))
17596 target = gen_reg_rtx (tmode);
17597
17598 ix86_expand_vector_extract (true, target, op0, elt);
17599
17600 return target;
17601 }
17602
17603 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17604 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17605 a language-level syntax for referencing vector elements. */
17606
17607 static rtx
17608 ix86_expand_vec_set_builtin (tree arglist)
17609 {
17610 enum machine_mode tmode, mode1;
17611 tree arg0, arg1, arg2;
17612 int elt;
17613 rtx op0, op1;
17614
17615 arg0 = TREE_VALUE (arglist);
17616 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17617 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17618
17619 tmode = TYPE_MODE (TREE_TYPE (arg0));
17620 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17621 gcc_assert (VECTOR_MODE_P (tmode));
17622
17623 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17624 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17625 elt = get_element_number (TREE_TYPE (arg0), arg2);
17626
17627 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17628 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17629
17630 op0 = force_reg (tmode, op0);
17631 op1 = force_reg (mode1, op1);
17632
17633 ix86_expand_vector_set (true, op0, op1, elt);
17634
17635 return op0;
17636 }
17637
17638 /* Expand an expression EXP that calls a built-in function,
17639 with result going to TARGET if that's convenient
17640 (and in mode MODE if that's convenient).
17641 SUBTARGET may be used as the target for computing one of EXP's operands.
17642 IGNORE is nonzero if the value is to be ignored. */
17643
17644 static rtx
17645 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17646 enum machine_mode mode ATTRIBUTE_UNUSED,
17647 int ignore ATTRIBUTE_UNUSED)
17648 {
17649 const struct builtin_description *d;
17650 size_t i;
17651 enum insn_code icode;
17652 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
17653 tree arglist = TREE_OPERAND (exp, 1);
17654 tree arg0, arg1, arg2, arg3;
17655 rtx op0, op1, op2, op3, pat;
17656 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17657 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17658
17659 switch (fcode)
17660 {
17661 case IX86_BUILTIN_EMMS:
17662 emit_insn (gen_mmx_emms ());
17663 return 0;
17664
17665 case IX86_BUILTIN_SFENCE:
17666 emit_insn (gen_sse_sfence ());
17667 return 0;
17668
17669 case IX86_BUILTIN_MASKMOVQ:
17670 case IX86_BUILTIN_MASKMOVDQU:
17671 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17672 ? CODE_FOR_mmx_maskmovq
17673 : CODE_FOR_sse2_maskmovdqu);
17674 /* Note the arg order is different from the operand order. */
17675 arg1 = TREE_VALUE (arglist);
17676 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
17677 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17678 op0 = expand_normal (arg0);
17679 op1 = expand_normal (arg1);
17680 op2 = expand_normal (arg2);
17681 mode0 = insn_data[icode].operand[0].mode;
17682 mode1 = insn_data[icode].operand[1].mode;
17683 mode2 = insn_data[icode].operand[2].mode;
17684
17685 op0 = force_reg (Pmode, op0);
17686 op0 = gen_rtx_MEM (mode1, op0);
17687
17688 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17689 op0 = copy_to_mode_reg (mode0, op0);
17690 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17691 op1 = copy_to_mode_reg (mode1, op1);
17692 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17693 op2 = copy_to_mode_reg (mode2, op2);
17694 pat = GEN_FCN (icode) (op0, op1, op2);
17695 if (! pat)
17696 return 0;
17697 emit_insn (pat);
17698 return 0;
17699
17700 case IX86_BUILTIN_SQRTSS:
17701 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
17702 case IX86_BUILTIN_RSQRTSS:
17703 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
17704 case IX86_BUILTIN_RCPSS:
17705 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
17706
17707 case IX86_BUILTIN_LOADUPS:
17708 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
17709
17710 case IX86_BUILTIN_STOREUPS:
17711 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
17712
17713 case IX86_BUILTIN_LOADHPS:
17714 case IX86_BUILTIN_LOADLPS:
17715 case IX86_BUILTIN_LOADHPD:
17716 case IX86_BUILTIN_LOADLPD:
17717 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17718 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17719 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17720 : CODE_FOR_sse2_loadlpd);
17721 arg0 = TREE_VALUE (arglist);
17722 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17723 op0 = expand_normal (arg0);
17724 op1 = expand_normal (arg1);
17725 tmode = insn_data[icode].operand[0].mode;
17726 mode0 = insn_data[icode].operand[1].mode;
17727 mode1 = insn_data[icode].operand[2].mode;
17728
17729 op0 = force_reg (mode0, op0);
17730 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17731 if (optimize || target == 0
17732 || GET_MODE (target) != tmode
17733 || !register_operand (target, tmode))
17734 target = gen_reg_rtx (tmode);
17735 pat = GEN_FCN (icode) (target, op0, op1);
17736 if (! pat)
17737 return 0;
17738 emit_insn (pat);
17739 return target;
17740
17741 case IX86_BUILTIN_STOREHPS:
17742 case IX86_BUILTIN_STORELPS:
17743 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17744 : CODE_FOR_sse_storelps);
17745 arg0 = TREE_VALUE (arglist);
17746 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17747 op0 = expand_normal (arg0);
17748 op1 = expand_normal (arg1);
17749 mode0 = insn_data[icode].operand[0].mode;
17750 mode1 = insn_data[icode].operand[1].mode;
17751
17752 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17753 op1 = force_reg (mode1, op1);
17754
17755 pat = GEN_FCN (icode) (op0, op1);
17756 if (! pat)
17757 return 0;
17758 emit_insn (pat);
17759 return const0_rtx;
17760
17761 case IX86_BUILTIN_MOVNTPS:
17762 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
17763 case IX86_BUILTIN_MOVNTQ:
17764 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
17765
17766 case IX86_BUILTIN_LDMXCSR:
17767 op0 = expand_normal (TREE_VALUE (arglist));
17768 target = assign_386_stack_local (SImode, SLOT_TEMP);
17769 emit_move_insn (target, op0);
17770 emit_insn (gen_sse_ldmxcsr (target));
17771 return 0;
17772
17773 case IX86_BUILTIN_STMXCSR:
17774 target = assign_386_stack_local (SImode, SLOT_TEMP);
17775 emit_insn (gen_sse_stmxcsr (target));
17776 return copy_to_mode_reg (SImode, target);
17777
17778 case IX86_BUILTIN_SHUFPS:
17779 case IX86_BUILTIN_SHUFPD:
17780 icode = (fcode == IX86_BUILTIN_SHUFPS
17781 ? CODE_FOR_sse_shufps
17782 : CODE_FOR_sse2_shufpd);
17783 arg0 = TREE_VALUE (arglist);
17784 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17785 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17786 op0 = expand_normal (arg0);
17787 op1 = expand_normal (arg1);
17788 op2 = expand_normal (arg2);
17789 tmode = insn_data[icode].operand[0].mode;
17790 mode0 = insn_data[icode].operand[1].mode;
17791 mode1 = insn_data[icode].operand[2].mode;
17792 mode2 = insn_data[icode].operand[3].mode;
17793
17794 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17795 op0 = copy_to_mode_reg (mode0, op0);
17796 if ((optimize && !register_operand (op1, mode1))
17797 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17798 op1 = copy_to_mode_reg (mode1, op1);
17799 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17800 {
17801 /* @@@ better error message */
17802 error ("mask must be an immediate");
17803 return gen_reg_rtx (tmode);
17804 }
17805 if (optimize || target == 0
17806 || GET_MODE (target) != tmode
17807 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17808 target = gen_reg_rtx (tmode);
17809 pat = GEN_FCN (icode) (target, op0, op1, op2);
17810 if (! pat)
17811 return 0;
17812 emit_insn (pat);
17813 return target;
17814
17815 case IX86_BUILTIN_PSHUFW:
17816 case IX86_BUILTIN_PSHUFD:
17817 case IX86_BUILTIN_PSHUFHW:
17818 case IX86_BUILTIN_PSHUFLW:
17819 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17820 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17821 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17822 : CODE_FOR_mmx_pshufw);
17823 arg0 = TREE_VALUE (arglist);
17824 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17825 op0 = expand_normal (arg0);
17826 op1 = expand_normal (arg1);
17827 tmode = insn_data[icode].operand[0].mode;
17828 mode1 = insn_data[icode].operand[1].mode;
17829 mode2 = insn_data[icode].operand[2].mode;
17830
17831 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17832 op0 = copy_to_mode_reg (mode1, op0);
17833 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17834 {
17835 /* @@@ better error message */
17836 error ("mask must be an immediate");
17837 return const0_rtx;
17838 }
17839 if (target == 0
17840 || GET_MODE (target) != tmode
17841 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17842 target = gen_reg_rtx (tmode);
17843 pat = GEN_FCN (icode) (target, op0, op1);
17844 if (! pat)
17845 return 0;
17846 emit_insn (pat);
17847 return target;
17848
17849 case IX86_BUILTIN_PSLLDQI128:
17850 case IX86_BUILTIN_PSRLDQI128:
17851 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17852 : CODE_FOR_sse2_lshrti3);
17853 arg0 = TREE_VALUE (arglist);
17854 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17855 op0 = expand_normal (arg0);
17856 op1 = expand_normal (arg1);
17857 tmode = insn_data[icode].operand[0].mode;
17858 mode1 = insn_data[icode].operand[1].mode;
17859 mode2 = insn_data[icode].operand[2].mode;
17860
17861 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17862 {
17863 op0 = copy_to_reg (op0);
17864 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17865 }
17866 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17867 {
17868 error ("shift must be an immediate");
17869 return const0_rtx;
17870 }
17871 target = gen_reg_rtx (V2DImode);
17872 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17873 if (! pat)
17874 return 0;
17875 emit_insn (pat);
17876 return target;
17877
17878 case IX86_BUILTIN_FEMMS:
17879 emit_insn (gen_mmx_femms ());
17880 return NULL_RTX;
17881
17882 case IX86_BUILTIN_PAVGUSB:
17883 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
17884
17885 case IX86_BUILTIN_PF2ID:
17886 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
17887
17888 case IX86_BUILTIN_PFACC:
17889 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
17890
17891 case IX86_BUILTIN_PFADD:
17892 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
17893
17894 case IX86_BUILTIN_PFCMPEQ:
17895 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
17896
17897 case IX86_BUILTIN_PFCMPGE:
17898 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
17899
17900 case IX86_BUILTIN_PFCMPGT:
17901 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
17902
17903 case IX86_BUILTIN_PFMAX:
17904 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
17905
17906 case IX86_BUILTIN_PFMIN:
17907 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
17908
17909 case IX86_BUILTIN_PFMUL:
17910 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
17911
17912 case IX86_BUILTIN_PFRCP:
17913 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
17914
17915 case IX86_BUILTIN_PFRCPIT1:
17916 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
17917
17918 case IX86_BUILTIN_PFRCPIT2:
17919 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
17920
17921 case IX86_BUILTIN_PFRSQIT1:
17922 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
17923
17924 case IX86_BUILTIN_PFRSQRT:
17925 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
17926
17927 case IX86_BUILTIN_PFSUB:
17928 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
17929
17930 case IX86_BUILTIN_PFSUBR:
17931 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
17932
17933 case IX86_BUILTIN_PI2FD:
17934 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
17935
17936 case IX86_BUILTIN_PMULHRW:
17937 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
17938
17939 case IX86_BUILTIN_PF2IW:
17940 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
17941
17942 case IX86_BUILTIN_PFNACC:
17943 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
17944
17945 case IX86_BUILTIN_PFPNACC:
17946 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
17947
17948 case IX86_BUILTIN_PI2FW:
17949 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
17950
17951 case IX86_BUILTIN_PSWAPDSI:
17952 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
17953
17954 case IX86_BUILTIN_PSWAPDSF:
17955 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
17956
17957 case IX86_BUILTIN_SQRTSD:
17958 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
17959 case IX86_BUILTIN_LOADUPD:
17960 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
17961 case IX86_BUILTIN_STOREUPD:
17962 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
17963
17964 case IX86_BUILTIN_MFENCE:
17965 emit_insn (gen_sse2_mfence ());
17966 return 0;
17967 case IX86_BUILTIN_LFENCE:
17968 emit_insn (gen_sse2_lfence ());
17969 return 0;
17970
17971 case IX86_BUILTIN_CLFLUSH:
17972 arg0 = TREE_VALUE (arglist);
17973 op0 = expand_normal (arg0);
17974 icode = CODE_FOR_sse2_clflush;
17975 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
17976 op0 = copy_to_mode_reg (Pmode, op0);
17977
17978 emit_insn (gen_sse2_clflush (op0));
17979 return 0;
17980
17981 case IX86_BUILTIN_MOVNTPD:
17982 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
17983 case IX86_BUILTIN_MOVNTDQ:
17984 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
17985 case IX86_BUILTIN_MOVNTI:
17986 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
17987
17988 case IX86_BUILTIN_LOADDQU:
17989 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
17990 case IX86_BUILTIN_STOREDQU:
17991 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
17992
17993 case IX86_BUILTIN_MONITOR:
17994 arg0 = TREE_VALUE (arglist);
17995 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17996 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17997 op0 = expand_normal (arg0);
17998 op1 = expand_normal (arg1);
17999 op2 = expand_normal (arg2);
18000 if (!REG_P (op0))
18001 op0 = copy_to_mode_reg (Pmode, op0);
18002 if (!REG_P (op1))
18003 op1 = copy_to_mode_reg (SImode, op1);
18004 if (!REG_P (op2))
18005 op2 = copy_to_mode_reg (SImode, op2);
18006 if (!TARGET_64BIT)
18007 emit_insn (gen_sse3_monitor (op0, op1, op2));
18008 else
18009 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18010 return 0;
18011
18012 case IX86_BUILTIN_MWAIT:
18013 arg0 = TREE_VALUE (arglist);
18014 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
18015 op0 = expand_normal (arg0);
18016 op1 = expand_normal (arg1);
18017 if (!REG_P (op0))
18018 op0 = copy_to_mode_reg (SImode, op0);
18019 if (!REG_P (op1))
18020 op1 = copy_to_mode_reg (SImode, op1);
18021 emit_insn (gen_sse3_mwait (op0, op1));
18022 return 0;
18023
18024 case IX86_BUILTIN_LDDQU:
18025 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
18026 target, 1);
18027
18028 case IX86_BUILTIN_PALIGNR:
18029 case IX86_BUILTIN_PALIGNR128:
18030 if (fcode == IX86_BUILTIN_PALIGNR)
18031 {
18032 icode = CODE_FOR_ssse3_palignrdi;
18033 mode = DImode;
18034 }
18035 else
18036 {
18037 icode = CODE_FOR_ssse3_palignrti;
18038 mode = V2DImode;
18039 }
18040 arg0 = TREE_VALUE (arglist);
18041 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
18042 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
18043 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18044 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18045 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18046 tmode = insn_data[icode].operand[0].mode;
18047 mode1 = insn_data[icode].operand[1].mode;
18048 mode2 = insn_data[icode].operand[2].mode;
18049 mode3 = insn_data[icode].operand[3].mode;
18050
18051 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18052 {
18053 op0 = copy_to_reg (op0);
18054 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18055 }
18056 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18057 {
18058 op1 = copy_to_reg (op1);
18059 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18060 }
18061 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18062 {
18063 error ("shift must be an immediate");
18064 return const0_rtx;
18065 }
18066 target = gen_reg_rtx (mode);
18067 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18068 op0, op1, op2);
18069 if (! pat)
18070 return 0;
18071 emit_insn (pat);
18072 return target;
18073
18074 case IX86_BUILTIN_MOVNTSD:
18075 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, arglist);
18076
18077 case IX86_BUILTIN_MOVNTSS:
18078 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, arglist);
18079
18080 case IX86_BUILTIN_INSERTQ:
18081 case IX86_BUILTIN_EXTRQ:
18082 icode = (fcode == IX86_BUILTIN_EXTRQ
18083 ? CODE_FOR_sse4a_extrq
18084 : CODE_FOR_sse4a_insertq);
18085 arg0 = TREE_VALUE (arglist);
18086 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
18087 op0 = expand_normal (arg0);
18088 op1 = expand_normal (arg1);
18089 tmode = insn_data[icode].operand[0].mode;
18090 mode1 = insn_data[icode].operand[1].mode;
18091 mode2 = insn_data[icode].operand[2].mode;
18092 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18093 op0 = copy_to_mode_reg (mode1, op0);
18094 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18095 op1 = copy_to_mode_reg (mode2, op1);
18096 if (optimize || target == 0
18097 || GET_MODE (target) != tmode
18098 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18099 target = gen_reg_rtx (tmode);
18100 pat = GEN_FCN (icode) (target, op0, op1);
18101 if (! pat)
18102 return NULL_RTX;
18103 emit_insn (pat);
18104 return target;
18105
18106 case IX86_BUILTIN_EXTRQI:
18107 icode = CODE_FOR_sse4a_extrqi;
18108 arg0 = TREE_VALUE (arglist);
18109 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
18110 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
18111 op0 = expand_normal (arg0);
18112 op1 = expand_normal (arg1);
18113 op2 = expand_normal (arg2);
18114 tmode = insn_data[icode].operand[0].mode;
18115 mode1 = insn_data[icode].operand[1].mode;
18116 mode2 = insn_data[icode].operand[2].mode;
18117 mode3 = insn_data[icode].operand[3].mode;
18118 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18119 op0 = copy_to_mode_reg (mode1, op0);
18120 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18121 {
18122 error ("index mask must be an immediate");
18123 return gen_reg_rtx (tmode);
18124 }
18125 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18126 {
18127 error ("length mask must be an immediate");
18128 return gen_reg_rtx (tmode);
18129 }
18130 if (optimize || target == 0
18131 || GET_MODE (target) != tmode
18132 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18133 target = gen_reg_rtx (tmode);
18134 pat = GEN_FCN (icode) (target, op0, op1, op2);
18135 if (! pat)
18136 return NULL_RTX;
18137 emit_insn (pat);
18138 return target;
18139
18140 case IX86_BUILTIN_INSERTQI:
18141 icode = CODE_FOR_sse4a_insertqi;
18142 arg0 = TREE_VALUE (arglist);
18143 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
18144 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
18145 arg3 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (TREE_CHAIN (arglist))));
18146 op0 = expand_normal (arg0);
18147 op1 = expand_normal (arg1);
18148 op2 = expand_normal (arg2);
18149 op3 = expand_normal (arg3);
18150 tmode = insn_data[icode].operand[0].mode;
18151 mode1 = insn_data[icode].operand[1].mode;
18152 mode2 = insn_data[icode].operand[2].mode;
18153 mode3 = insn_data[icode].operand[3].mode;
18154 mode4 = insn_data[icode].operand[4].mode;
18155
18156 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18157 op0 = copy_to_mode_reg (mode1, op0);
18158
18159 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18160 op1 = copy_to_mode_reg (mode2, op1);
18161
18162 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18163 {
18164 error ("index mask must be an immediate");
18165 return gen_reg_rtx (tmode);
18166 }
18167 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18168 {
18169 error ("length mask must be an immediate");
18170 return gen_reg_rtx (tmode);
18171 }
18172 if (optimize || target == 0
18173 || GET_MODE (target) != tmode
18174 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18175 target = gen_reg_rtx (tmode);
18176 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18177 if (! pat)
18178 return NULL_RTX;
18179 emit_insn (pat);
18180 return target;
18181
18182 case IX86_BUILTIN_VEC_INIT_V2SI:
18183 case IX86_BUILTIN_VEC_INIT_V4HI:
18184 case IX86_BUILTIN_VEC_INIT_V8QI:
18185 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
18186
18187 case IX86_BUILTIN_VEC_EXT_V2DF:
18188 case IX86_BUILTIN_VEC_EXT_V2DI:
18189 case IX86_BUILTIN_VEC_EXT_V4SF:
18190 case IX86_BUILTIN_VEC_EXT_V4SI:
18191 case IX86_BUILTIN_VEC_EXT_V8HI:
18192 case IX86_BUILTIN_VEC_EXT_V2SI:
18193 case IX86_BUILTIN_VEC_EXT_V4HI:
18194 return ix86_expand_vec_ext_builtin (arglist, target);
18195
18196 case IX86_BUILTIN_VEC_SET_V8HI:
18197 case IX86_BUILTIN_VEC_SET_V4HI:
18198 return ix86_expand_vec_set_builtin (arglist);
18199
18200 default:
18201 break;
18202 }
18203
18204 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18205 if (d->code == fcode)
18206 {
18207 /* Compares are treated specially. */
18208 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18209 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18210 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18211 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18212 return ix86_expand_sse_compare (d, arglist, target);
18213
18214 return ix86_expand_binop_builtin (d->icode, arglist, target);
18215 }
18216
18217 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18218 if (d->code == fcode)
18219 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
18220
18221 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18222 if (d->code == fcode)
18223 return ix86_expand_sse_comi (d, arglist, target);
18224
18225 gcc_unreachable ();
18226 }
18227
18228 /* Returns a function decl for a vectorized version of the builtin function
18229 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18230 if it is not available. */
18231
18232 static tree
18233 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18234 tree type_in)
18235 {
18236 enum machine_mode in_mode, out_mode;
18237 int in_n, out_n;
18238
18239 if (TREE_CODE (type_out) != VECTOR_TYPE
18240 || TREE_CODE (type_in) != VECTOR_TYPE)
18241 return NULL_TREE;
18242
18243 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18244 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18245 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18246 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18247
18248 switch (fn)
18249 {
18250 case BUILT_IN_SQRT:
18251 if (out_mode == DFmode && out_n == 2
18252 && in_mode == DFmode && in_n == 2)
18253 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18254 return NULL_TREE;
18255
18256 case BUILT_IN_SQRTF:
18257 if (out_mode == SFmode && out_n == 4
18258 && in_mode == SFmode && in_n == 4)
18259 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18260 return NULL_TREE;
18261
18262 case BUILT_IN_LRINTF:
18263 if (out_mode == SImode && out_n == 4
18264 && in_mode == SFmode && in_n == 4)
18265 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18266 return NULL_TREE;
18267
18268 default:
18269 ;
18270 }
18271
18272 return NULL_TREE;
18273 }
18274
18275 /* Returns a decl of a function that implements conversion of the
18276 input vector of type TYPE, or NULL_TREE if it is not available. */
18277
18278 static tree
18279 ix86_builtin_conversion (enum tree_code code, tree type)
18280 {
18281 if (TREE_CODE (type) != VECTOR_TYPE)
18282 return NULL_TREE;
18283
18284 switch (code)
18285 {
18286 case FLOAT_EXPR:
18287 switch (TYPE_MODE (type))
18288 {
18289 case V4SImode:
18290 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18291 default:
18292 return NULL_TREE;
18293 }
18294
18295 case FIX_TRUNC_EXPR:
18296 switch (TYPE_MODE (type))
18297 {
18298 case V4SFmode:
18299 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18300 default:
18301 return NULL_TREE;
18302 }
18303 default:
18304 return NULL_TREE;
18305
18306 }
18307 }
18308
18309 /* Store OPERAND to the memory after reload is completed. This means
18310 that we can't easily use assign_stack_local. */
18311 rtx
18312 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18313 {
18314 rtx result;
18315
18316 gcc_assert (reload_completed);
18317 if (TARGET_RED_ZONE)
18318 {
18319 result = gen_rtx_MEM (mode,
18320 gen_rtx_PLUS (Pmode,
18321 stack_pointer_rtx,
18322 GEN_INT (-RED_ZONE_SIZE)));
18323 emit_move_insn (result, operand);
18324 }
18325 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18326 {
18327 switch (mode)
18328 {
18329 case HImode:
18330 case SImode:
18331 operand = gen_lowpart (DImode, operand);
18332 /* FALLTHRU */
18333 case DImode:
18334 emit_insn (
18335 gen_rtx_SET (VOIDmode,
18336 gen_rtx_MEM (DImode,
18337 gen_rtx_PRE_DEC (DImode,
18338 stack_pointer_rtx)),
18339 operand));
18340 break;
18341 default:
18342 gcc_unreachable ();
18343 }
18344 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18345 }
18346 else
18347 {
18348 switch (mode)
18349 {
18350 case DImode:
18351 {
18352 rtx operands[2];
18353 split_di (&operand, 1, operands, operands + 1);
18354 emit_insn (
18355 gen_rtx_SET (VOIDmode,
18356 gen_rtx_MEM (SImode,
18357 gen_rtx_PRE_DEC (Pmode,
18358 stack_pointer_rtx)),
18359 operands[1]));
18360 emit_insn (
18361 gen_rtx_SET (VOIDmode,
18362 gen_rtx_MEM (SImode,
18363 gen_rtx_PRE_DEC (Pmode,
18364 stack_pointer_rtx)),
18365 operands[0]));
18366 }
18367 break;
18368 case HImode:
18369 /* Store HImodes as SImodes. */
18370 operand = gen_lowpart (SImode, operand);
18371 /* FALLTHRU */
18372 case SImode:
18373 emit_insn (
18374 gen_rtx_SET (VOIDmode,
18375 gen_rtx_MEM (GET_MODE (operand),
18376 gen_rtx_PRE_DEC (SImode,
18377 stack_pointer_rtx)),
18378 operand));
18379 break;
18380 default:
18381 gcc_unreachable ();
18382 }
18383 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18384 }
18385 return result;
18386 }
18387
18388 /* Free operand from the memory. */
18389 void
18390 ix86_free_from_memory (enum machine_mode mode)
18391 {
18392 if (!TARGET_RED_ZONE)
18393 {
18394 int size;
18395
18396 if (mode == DImode || TARGET_64BIT)
18397 size = 8;
18398 else
18399 size = 4;
18400 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18401 to pop or add instruction if registers are available. */
18402 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18403 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18404 GEN_INT (size))));
18405 }
18406 }
18407
18408 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18409 QImode must go into class Q_REGS.
18410 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18411 movdf to do mem-to-mem moves through integer regs. */
18412 enum reg_class
18413 ix86_preferred_reload_class (rtx x, enum reg_class class)
18414 {
18415 enum machine_mode mode = GET_MODE (x);
18416
18417 /* We're only allowed to return a subclass of CLASS. Many of the
18418 following checks fail for NO_REGS, so eliminate that early. */
18419 if (class == NO_REGS)
18420 return NO_REGS;
18421
18422 /* All classes can load zeros. */
18423 if (x == CONST0_RTX (mode))
18424 return class;
18425
18426 /* Force constants into memory if we are loading a (nonzero) constant into
18427 an MMX or SSE register. This is because there are no MMX/SSE instructions
18428 to load from a constant. */
18429 if (CONSTANT_P (x)
18430 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18431 return NO_REGS;
18432
18433 /* Prefer SSE regs only, if we can use them for math. */
18434 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18435 return SSE_CLASS_P (class) ? class : NO_REGS;
18436
18437 /* Floating-point constants need more complex checks. */
18438 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18439 {
18440 /* General regs can load everything. */
18441 if (reg_class_subset_p (class, GENERAL_REGS))
18442 return class;
18443
18444 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18445 zero above. We only want to wind up preferring 80387 registers if
18446 we plan on doing computation with them. */
18447 if (TARGET_80387
18448 && standard_80387_constant_p (x))
18449 {
18450 /* Limit class to non-sse. */
18451 if (class == FLOAT_SSE_REGS)
18452 return FLOAT_REGS;
18453 if (class == FP_TOP_SSE_REGS)
18454 return FP_TOP_REG;
18455 if (class == FP_SECOND_SSE_REGS)
18456 return FP_SECOND_REG;
18457 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18458 return class;
18459 }
18460
18461 return NO_REGS;
18462 }
18463
18464 /* Generally when we see PLUS here, it's the function invariant
18465 (plus soft-fp const_int). Which can only be computed into general
18466 regs. */
18467 if (GET_CODE (x) == PLUS)
18468 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18469
18470 /* QImode constants are easy to load, but non-constant QImode data
18471 must go into Q_REGS. */
18472 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18473 {
18474 if (reg_class_subset_p (class, Q_REGS))
18475 return class;
18476 if (reg_class_subset_p (Q_REGS, class))
18477 return Q_REGS;
18478 return NO_REGS;
18479 }
18480
18481 return class;
18482 }
18483
18484 /* Discourage putting floating-point values in SSE registers unless
18485 SSE math is being used, and likewise for the 387 registers. */
18486 enum reg_class
18487 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18488 {
18489 enum machine_mode mode = GET_MODE (x);
18490
18491 /* Restrict the output reload class to the register bank that we are doing
18492 math on. If we would like not to return a subset of CLASS, reject this
18493 alternative: if reload cannot do this, it will still use its choice. */
18494 mode = GET_MODE (x);
18495 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18496 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18497
18498 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18499 {
18500 if (class == FP_TOP_SSE_REGS)
18501 return FP_TOP_REG;
18502 else if (class == FP_SECOND_SSE_REGS)
18503 return FP_SECOND_REG;
18504 else
18505 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18506 }
18507
18508 return class;
18509 }
18510
18511 /* If we are copying between general and FP registers, we need a memory
18512 location. The same is true for SSE and MMX registers.
18513
18514 The macro can't work reliably when one of the CLASSES is class containing
18515 registers from multiple units (SSE, MMX, integer). We avoid this by never
18516 combining those units in single alternative in the machine description.
18517 Ensure that this constraint holds to avoid unexpected surprises.
18518
18519 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18520 enforce these sanity checks. */
18521
18522 int
18523 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18524 enum machine_mode mode, int strict)
18525 {
18526 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18527 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18528 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18529 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18530 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18531 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18532 {
18533 gcc_assert (!strict);
18534 return true;
18535 }
18536
18537 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18538 return true;
18539
18540 /* ??? This is a lie. We do have moves between mmx/general, and for
18541 mmx/sse2. But by saying we need secondary memory we discourage the
18542 register allocator from using the mmx registers unless needed. */
18543 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18544 return true;
18545
18546 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18547 {
18548 /* SSE1 doesn't have any direct moves from other classes. */
18549 if (!TARGET_SSE2)
18550 return true;
18551
18552 /* If the target says that inter-unit moves are more expensive
18553 than moving through memory, then don't generate them. */
18554 if (!TARGET_INTER_UNIT_MOVES)
18555 return true;
18556
18557 /* Between SSE and general, we have moves no larger than word size. */
18558 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18559 return true;
18560 }
18561
18562 return false;
18563 }
18564
18565 /* Return true if the registers in CLASS cannot represent the change from
18566 modes FROM to TO. */
18567
18568 bool
18569 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18570 enum reg_class class)
18571 {
18572 if (from == to)
18573 return false;
18574
18575 /* x87 registers can't do subreg at all, as all values are reformatted
18576 to extended precision. */
18577 if (MAYBE_FLOAT_CLASS_P (class))
18578 return true;
18579
18580 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18581 {
18582 /* Vector registers do not support QI or HImode loads. If we don't
18583 disallow a change to these modes, reload will assume it's ok to
18584 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18585 the vec_dupv4hi pattern. */
18586 if (GET_MODE_SIZE (from) < 4)
18587 return true;
18588
18589 /* Vector registers do not support subreg with nonzero offsets, which
18590 are otherwise valid for integer registers. Since we can't see
18591 whether we have a nonzero offset from here, prohibit all
18592 nonparadoxical subregs changing size. */
18593 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18594 return true;
18595 }
18596
18597 return false;
18598 }
18599
18600 /* Return the cost of moving data from a register in class CLASS1 to
18601 one in class CLASS2.
18602
18603 It is not required that the cost always equal 2 when FROM is the same as TO;
18604 on some machines it is expensive to move between registers if they are not
18605 general registers. */
18606
18607 int
18608 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18609 enum reg_class class2)
18610 {
18611 /* In case we require secondary memory, compute cost of the store followed
18612 by load. In order to avoid bad register allocation choices, we need
18613 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18614
18615 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18616 {
18617 int cost = 1;
18618
18619 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18620 MEMORY_MOVE_COST (mode, class1, 1));
18621 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18622 MEMORY_MOVE_COST (mode, class2, 1));
18623
18624 /* In case of copying from general_purpose_register we may emit multiple
18625 stores followed by single load causing memory size mismatch stall.
18626 Count this as arbitrarily high cost of 20. */
18627 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18628 cost += 20;
18629
18630 /* In the case of FP/MMX moves, the registers actually overlap, and we
18631 have to switch modes in order to treat them differently. */
18632 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18633 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18634 cost += 20;
18635
18636 return cost;
18637 }
18638
18639 /* Moves between SSE/MMX and integer unit are expensive. */
18640 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18641 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18642 return ix86_cost->mmxsse_to_integer;
18643 if (MAYBE_FLOAT_CLASS_P (class1))
18644 return ix86_cost->fp_move;
18645 if (MAYBE_SSE_CLASS_P (class1))
18646 return ix86_cost->sse_move;
18647 if (MAYBE_MMX_CLASS_P (class1))
18648 return ix86_cost->mmx_move;
18649 return 2;
18650 }
18651
18652 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18653
18654 bool
18655 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18656 {
18657 /* Flags and only flags can only hold CCmode values. */
18658 if (CC_REGNO_P (regno))
18659 return GET_MODE_CLASS (mode) == MODE_CC;
18660 if (GET_MODE_CLASS (mode) == MODE_CC
18661 || GET_MODE_CLASS (mode) == MODE_RANDOM
18662 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18663 return 0;
18664 if (FP_REGNO_P (regno))
18665 return VALID_FP_MODE_P (mode);
18666 if (SSE_REGNO_P (regno))
18667 {
18668 /* We implement the move patterns for all vector modes into and
18669 out of SSE registers, even when no operation instructions
18670 are available. */
18671 return (VALID_SSE_REG_MODE (mode)
18672 || VALID_SSE2_REG_MODE (mode)
18673 || VALID_MMX_REG_MODE (mode)
18674 || VALID_MMX_REG_MODE_3DNOW (mode));
18675 }
18676 if (MMX_REGNO_P (regno))
18677 {
18678 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18679 so if the register is available at all, then we can move data of
18680 the given mode into or out of it. */
18681 return (VALID_MMX_REG_MODE (mode)
18682 || VALID_MMX_REG_MODE_3DNOW (mode));
18683 }
18684
18685 if (mode == QImode)
18686 {
18687 /* Take care for QImode values - they can be in non-QI regs,
18688 but then they do cause partial register stalls. */
18689 if (regno < 4 || TARGET_64BIT)
18690 return 1;
18691 if (!TARGET_PARTIAL_REG_STALL)
18692 return 1;
18693 return reload_in_progress || reload_completed;
18694 }
18695 /* We handle both integer and floats in the general purpose registers. */
18696 else if (VALID_INT_MODE_P (mode))
18697 return 1;
18698 else if (VALID_FP_MODE_P (mode))
18699 return 1;
18700 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18701 on to use that value in smaller contexts, this can easily force a
18702 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18703 supporting DImode, allow it. */
18704 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18705 return 1;
18706
18707 return 0;
18708 }
18709
18710 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18711 tieable integer mode. */
18712
18713 static bool
18714 ix86_tieable_integer_mode_p (enum machine_mode mode)
18715 {
18716 switch (mode)
18717 {
18718 case HImode:
18719 case SImode:
18720 return true;
18721
18722 case QImode:
18723 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18724
18725 case DImode:
18726 return TARGET_64BIT;
18727
18728 default:
18729 return false;
18730 }
18731 }
18732
18733 /* Return true if MODE1 is accessible in a register that can hold MODE2
18734 without copying. That is, all register classes that can hold MODE2
18735 can also hold MODE1. */
18736
18737 bool
18738 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18739 {
18740 if (mode1 == mode2)
18741 return true;
18742
18743 if (ix86_tieable_integer_mode_p (mode1)
18744 && ix86_tieable_integer_mode_p (mode2))
18745 return true;
18746
18747 /* MODE2 being XFmode implies fp stack or general regs, which means we
18748 can tie any smaller floating point modes to it. Note that we do not
18749 tie this with TFmode. */
18750 if (mode2 == XFmode)
18751 return mode1 == SFmode || mode1 == DFmode;
18752
18753 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18754 that we can tie it with SFmode. */
18755 if (mode2 == DFmode)
18756 return mode1 == SFmode;
18757
18758 /* If MODE2 is only appropriate for an SSE register, then tie with
18759 any other mode acceptable to SSE registers. */
18760 if (GET_MODE_SIZE (mode2) >= 8
18761 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18762 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
18763
18764 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18765 with any other mode acceptable to MMX registers. */
18766 if (GET_MODE_SIZE (mode2) == 8
18767 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18768 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
18769
18770 return false;
18771 }
18772
18773 /* Return the cost of moving data of mode M between a
18774 register and memory. A value of 2 is the default; this cost is
18775 relative to those in `REGISTER_MOVE_COST'.
18776
18777 If moving between registers and memory is more expensive than
18778 between two registers, you should define this macro to express the
18779 relative cost.
18780
18781 Model also increased moving costs of QImode registers in non
18782 Q_REGS classes.
18783 */
18784 int
18785 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18786 {
18787 if (FLOAT_CLASS_P (class))
18788 {
18789 int index;
18790 switch (mode)
18791 {
18792 case SFmode:
18793 index = 0;
18794 break;
18795 case DFmode:
18796 index = 1;
18797 break;
18798 case XFmode:
18799 index = 2;
18800 break;
18801 default:
18802 return 100;
18803 }
18804 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18805 }
18806 if (SSE_CLASS_P (class))
18807 {
18808 int index;
18809 switch (GET_MODE_SIZE (mode))
18810 {
18811 case 4:
18812 index = 0;
18813 break;
18814 case 8:
18815 index = 1;
18816 break;
18817 case 16:
18818 index = 2;
18819 break;
18820 default:
18821 return 100;
18822 }
18823 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18824 }
18825 if (MMX_CLASS_P (class))
18826 {
18827 int index;
18828 switch (GET_MODE_SIZE (mode))
18829 {
18830 case 4:
18831 index = 0;
18832 break;
18833 case 8:
18834 index = 1;
18835 break;
18836 default:
18837 return 100;
18838 }
18839 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18840 }
18841 switch (GET_MODE_SIZE (mode))
18842 {
18843 case 1:
18844 if (in)
18845 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18846 : ix86_cost->movzbl_load);
18847 else
18848 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18849 : ix86_cost->int_store[0] + 4);
18850 break;
18851 case 2:
18852 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18853 default:
18854 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18855 if (mode == TFmode)
18856 mode = XFmode;
18857 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18858 * (((int) GET_MODE_SIZE (mode)
18859 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18860 }
18861 }
18862
18863 /* Compute a (partial) cost for rtx X. Return true if the complete
18864 cost has been computed, and false if subexpressions should be
18865 scanned. In either case, *TOTAL contains the cost result. */
18866
18867 static bool
18868 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18869 {
18870 enum machine_mode mode = GET_MODE (x);
18871
18872 switch (code)
18873 {
18874 case CONST_INT:
18875 case CONST:
18876 case LABEL_REF:
18877 case SYMBOL_REF:
18878 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18879 *total = 3;
18880 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18881 *total = 2;
18882 else if (flag_pic && SYMBOLIC_CONST (x)
18883 && (!TARGET_64BIT
18884 || (!GET_CODE (x) != LABEL_REF
18885 && (GET_CODE (x) != SYMBOL_REF
18886 || !SYMBOL_REF_LOCAL_P (x)))))
18887 *total = 1;
18888 else
18889 *total = 0;
18890 return true;
18891
18892 case CONST_DOUBLE:
18893 if (mode == VOIDmode)
18894 *total = 0;
18895 else
18896 switch (standard_80387_constant_p (x))
18897 {
18898 case 1: /* 0.0 */
18899 *total = 1;
18900 break;
18901 default: /* Other constants */
18902 *total = 2;
18903 break;
18904 case 0:
18905 case -1:
18906 /* Start with (MEM (SYMBOL_REF)), since that's where
18907 it'll probably end up. Add a penalty for size. */
18908 *total = (COSTS_N_INSNS (1)
18909 + (flag_pic != 0 && !TARGET_64BIT)
18910 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18911 break;
18912 }
18913 return true;
18914
18915 case ZERO_EXTEND:
18916 /* The zero extensions is often completely free on x86_64, so make
18917 it as cheap as possible. */
18918 if (TARGET_64BIT && mode == DImode
18919 && GET_MODE (XEXP (x, 0)) == SImode)
18920 *total = 1;
18921 else if (TARGET_ZERO_EXTEND_WITH_AND)
18922 *total = ix86_cost->add;
18923 else
18924 *total = ix86_cost->movzx;
18925 return false;
18926
18927 case SIGN_EXTEND:
18928 *total = ix86_cost->movsx;
18929 return false;
18930
18931 case ASHIFT:
18932 if (CONST_INT_P (XEXP (x, 1))
18933 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18934 {
18935 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18936 if (value == 1)
18937 {
18938 *total = ix86_cost->add;
18939 return false;
18940 }
18941 if ((value == 2 || value == 3)
18942 && ix86_cost->lea <= ix86_cost->shift_const)
18943 {
18944 *total = ix86_cost->lea;
18945 return false;
18946 }
18947 }
18948 /* FALLTHRU */
18949
18950 case ROTATE:
18951 case ASHIFTRT:
18952 case LSHIFTRT:
18953 case ROTATERT:
18954 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18955 {
18956 if (CONST_INT_P (XEXP (x, 1)))
18957 {
18958 if (INTVAL (XEXP (x, 1)) > 32)
18959 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18960 else
18961 *total = ix86_cost->shift_const * 2;
18962 }
18963 else
18964 {
18965 if (GET_CODE (XEXP (x, 1)) == AND)
18966 *total = ix86_cost->shift_var * 2;
18967 else
18968 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
18969 }
18970 }
18971 else
18972 {
18973 if (CONST_INT_P (XEXP (x, 1)))
18974 *total = ix86_cost->shift_const;
18975 else
18976 *total = ix86_cost->shift_var;
18977 }
18978 return false;
18979
18980 case MULT:
18981 if (FLOAT_MODE_P (mode))
18982 {
18983 *total = ix86_cost->fmul;
18984 return false;
18985 }
18986 else
18987 {
18988 rtx op0 = XEXP (x, 0);
18989 rtx op1 = XEXP (x, 1);
18990 int nbits;
18991 if (CONST_INT_P (XEXP (x, 1)))
18992 {
18993 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18994 for (nbits = 0; value != 0; value &= value - 1)
18995 nbits++;
18996 }
18997 else
18998 /* This is arbitrary. */
18999 nbits = 7;
19000
19001 /* Compute costs correctly for widening multiplication. */
19002 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19003 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19004 == GET_MODE_SIZE (mode))
19005 {
19006 int is_mulwiden = 0;
19007 enum machine_mode inner_mode = GET_MODE (op0);
19008
19009 if (GET_CODE (op0) == GET_CODE (op1))
19010 is_mulwiden = 1, op1 = XEXP (op1, 0);
19011 else if (CONST_INT_P (op1))
19012 {
19013 if (GET_CODE (op0) == SIGN_EXTEND)
19014 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19015 == INTVAL (op1);
19016 else
19017 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19018 }
19019
19020 if (is_mulwiden)
19021 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19022 }
19023
19024 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19025 + nbits * ix86_cost->mult_bit
19026 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19027
19028 return true;
19029 }
19030
19031 case DIV:
19032 case UDIV:
19033 case MOD:
19034 case UMOD:
19035 if (FLOAT_MODE_P (mode))
19036 *total = ix86_cost->fdiv;
19037 else
19038 *total = ix86_cost->divide[MODE_INDEX (mode)];
19039 return false;
19040
19041 case PLUS:
19042 if (FLOAT_MODE_P (mode))
19043 *total = ix86_cost->fadd;
19044 else if (GET_MODE_CLASS (mode) == MODE_INT
19045 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19046 {
19047 if (GET_CODE (XEXP (x, 0)) == PLUS
19048 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19049 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19050 && CONSTANT_P (XEXP (x, 1)))
19051 {
19052 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19053 if (val == 2 || val == 4 || val == 8)
19054 {
19055 *total = ix86_cost->lea;
19056 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19057 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19058 outer_code);
19059 *total += rtx_cost (XEXP (x, 1), outer_code);
19060 return true;
19061 }
19062 }
19063 else if (GET_CODE (XEXP (x, 0)) == MULT
19064 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19065 {
19066 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19067 if (val == 2 || val == 4 || val == 8)
19068 {
19069 *total = ix86_cost->lea;
19070 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19071 *total += rtx_cost (XEXP (x, 1), outer_code);
19072 return true;
19073 }
19074 }
19075 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19076 {
19077 *total = ix86_cost->lea;
19078 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19079 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19080 *total += rtx_cost (XEXP (x, 1), outer_code);
19081 return true;
19082 }
19083 }
19084 /* FALLTHRU */
19085
19086 case MINUS:
19087 if (FLOAT_MODE_P (mode))
19088 {
19089 *total = ix86_cost->fadd;
19090 return false;
19091 }
19092 /* FALLTHRU */
19093
19094 case AND:
19095 case IOR:
19096 case XOR:
19097 if (!TARGET_64BIT && mode == DImode)
19098 {
19099 *total = (ix86_cost->add * 2
19100 + (rtx_cost (XEXP (x, 0), outer_code)
19101 << (GET_MODE (XEXP (x, 0)) != DImode))
19102 + (rtx_cost (XEXP (x, 1), outer_code)
19103 << (GET_MODE (XEXP (x, 1)) != DImode)));
19104 return true;
19105 }
19106 /* FALLTHRU */
19107
19108 case NEG:
19109 if (FLOAT_MODE_P (mode))
19110 {
19111 *total = ix86_cost->fchs;
19112 return false;
19113 }
19114 /* FALLTHRU */
19115
19116 case NOT:
19117 if (!TARGET_64BIT && mode == DImode)
19118 *total = ix86_cost->add * 2;
19119 else
19120 *total = ix86_cost->add;
19121 return false;
19122
19123 case COMPARE:
19124 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19125 && XEXP (XEXP (x, 0), 1) == const1_rtx
19126 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19127 && XEXP (x, 1) == const0_rtx)
19128 {
19129 /* This kind of construct is implemented using test[bwl].
19130 Treat it as if we had an AND. */
19131 *total = (ix86_cost->add
19132 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19133 + rtx_cost (const1_rtx, outer_code));
19134 return true;
19135 }
19136 return false;
19137
19138 case FLOAT_EXTEND:
19139 if (!TARGET_SSE_MATH
19140 || mode == XFmode
19141 || (mode == DFmode && !TARGET_SSE2))
19142 *total = 0;
19143 return false;
19144
19145 case ABS:
19146 if (FLOAT_MODE_P (mode))
19147 *total = ix86_cost->fabs;
19148 return false;
19149
19150 case SQRT:
19151 if (FLOAT_MODE_P (mode))
19152 *total = ix86_cost->fsqrt;
19153 return false;
19154
19155 case UNSPEC:
19156 if (XINT (x, 1) == UNSPEC_TP)
19157 *total = 0;
19158 return false;
19159
19160 default:
19161 return false;
19162 }
19163 }
19164
19165 #if TARGET_MACHO
19166
19167 static int current_machopic_label_num;
19168
19169 /* Given a symbol name and its associated stub, write out the
19170 definition of the stub. */
19171
19172 void
19173 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19174 {
19175 unsigned int length;
19176 char *binder_name, *symbol_name, lazy_ptr_name[32];
19177 int label = ++current_machopic_label_num;
19178
19179 /* For 64-bit we shouldn't get here. */
19180 gcc_assert (!TARGET_64BIT);
19181
19182 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19183 symb = (*targetm.strip_name_encoding) (symb);
19184
19185 length = strlen (stub);
19186 binder_name = alloca (length + 32);
19187 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19188
19189 length = strlen (symb);
19190 symbol_name = alloca (length + 32);
19191 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19192
19193 sprintf (lazy_ptr_name, "L%d$lz", label);
19194
19195 if (MACHOPIC_PURE)
19196 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19197 else
19198 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19199
19200 fprintf (file, "%s:\n", stub);
19201 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19202
19203 if (MACHOPIC_PURE)
19204 {
19205 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19206 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19207 fprintf (file, "\tjmp\t*%%edx\n");
19208 }
19209 else
19210 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19211
19212 fprintf (file, "%s:\n", binder_name);
19213
19214 if (MACHOPIC_PURE)
19215 {
19216 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19217 fprintf (file, "\tpushl\t%%eax\n");
19218 }
19219 else
19220 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19221
19222 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19223
19224 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19225 fprintf (file, "%s:\n", lazy_ptr_name);
19226 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19227 fprintf (file, "\t.long %s\n", binder_name);
19228 }
19229
19230 void
19231 darwin_x86_file_end (void)
19232 {
19233 darwin_file_end ();
19234 ix86_file_end ();
19235 }
19236 #endif /* TARGET_MACHO */
19237
19238 /* Order the registers for register allocator. */
19239
19240 void
19241 x86_order_regs_for_local_alloc (void)
19242 {
19243 int pos = 0;
19244 int i;
19245
19246 /* First allocate the local general purpose registers. */
19247 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19248 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19249 reg_alloc_order [pos++] = i;
19250
19251 /* Global general purpose registers. */
19252 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19253 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19254 reg_alloc_order [pos++] = i;
19255
19256 /* x87 registers come first in case we are doing FP math
19257 using them. */
19258 if (!TARGET_SSE_MATH)
19259 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19260 reg_alloc_order [pos++] = i;
19261
19262 /* SSE registers. */
19263 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19264 reg_alloc_order [pos++] = i;
19265 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19266 reg_alloc_order [pos++] = i;
19267
19268 /* x87 registers. */
19269 if (TARGET_SSE_MATH)
19270 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19271 reg_alloc_order [pos++] = i;
19272
19273 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19274 reg_alloc_order [pos++] = i;
19275
19276 /* Initialize the rest of array as we do not allocate some registers
19277 at all. */
19278 while (pos < FIRST_PSEUDO_REGISTER)
19279 reg_alloc_order [pos++] = 0;
19280 }
19281
19282 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19283 struct attribute_spec.handler. */
19284 static tree
19285 ix86_handle_struct_attribute (tree *node, tree name,
19286 tree args ATTRIBUTE_UNUSED,
19287 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19288 {
19289 tree *type = NULL;
19290 if (DECL_P (*node))
19291 {
19292 if (TREE_CODE (*node) == TYPE_DECL)
19293 type = &TREE_TYPE (*node);
19294 }
19295 else
19296 type = node;
19297
19298 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19299 || TREE_CODE (*type) == UNION_TYPE)))
19300 {
19301 warning (OPT_Wattributes, "%qs attribute ignored",
19302 IDENTIFIER_POINTER (name));
19303 *no_add_attrs = true;
19304 }
19305
19306 else if ((is_attribute_p ("ms_struct", name)
19307 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19308 || ((is_attribute_p ("gcc_struct", name)
19309 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19310 {
19311 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19312 IDENTIFIER_POINTER (name));
19313 *no_add_attrs = true;
19314 }
19315
19316 return NULL_TREE;
19317 }
19318
19319 static bool
19320 ix86_ms_bitfield_layout_p (tree record_type)
19321 {
19322 return (TARGET_MS_BITFIELD_LAYOUT &&
19323 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19324 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19325 }
19326
19327 /* Returns an expression indicating where the this parameter is
19328 located on entry to the FUNCTION. */
19329
19330 static rtx
19331 x86_this_parameter (tree function)
19332 {
19333 tree type = TREE_TYPE (function);
19334
19335 if (TARGET_64BIT)
19336 {
19337 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19338 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19339 }
19340
19341 if (ix86_function_regparm (type, function) > 0)
19342 {
19343 tree parm;
19344
19345 parm = TYPE_ARG_TYPES (type);
19346 /* Figure out whether or not the function has a variable number of
19347 arguments. */
19348 for (; parm; parm = TREE_CHAIN (parm))
19349 if (TREE_VALUE (parm) == void_type_node)
19350 break;
19351 /* If not, the this parameter is in the first argument. */
19352 if (parm)
19353 {
19354 int regno = 0;
19355 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19356 regno = 2;
19357 return gen_rtx_REG (SImode, regno);
19358 }
19359 }
19360
19361 if (aggregate_value_p (TREE_TYPE (type), type))
19362 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19363 else
19364 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19365 }
19366
19367 /* Determine whether x86_output_mi_thunk can succeed. */
19368
19369 static bool
19370 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19371 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19372 HOST_WIDE_INT vcall_offset, tree function)
19373 {
19374 /* 64-bit can handle anything. */
19375 if (TARGET_64BIT)
19376 return true;
19377
19378 /* For 32-bit, everything's fine if we have one free register. */
19379 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19380 return true;
19381
19382 /* Need a free register for vcall_offset. */
19383 if (vcall_offset)
19384 return false;
19385
19386 /* Need a free register for GOT references. */
19387 if (flag_pic && !(*targetm.binds_local_p) (function))
19388 return false;
19389
19390 /* Otherwise ok. */
19391 return true;
19392 }
19393
19394 /* Output the assembler code for a thunk function. THUNK_DECL is the
19395 declaration for the thunk function itself, FUNCTION is the decl for
19396 the target function. DELTA is an immediate constant offset to be
19397 added to THIS. If VCALL_OFFSET is nonzero, the word at
19398 *(*this + vcall_offset) should be added to THIS. */
19399
19400 static void
19401 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19402 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19403 HOST_WIDE_INT vcall_offset, tree function)
19404 {
19405 rtx xops[3];
19406 rtx this = x86_this_parameter (function);
19407 rtx this_reg, tmp;
19408
19409 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19410 pull it in now and let DELTA benefit. */
19411 if (REG_P (this))
19412 this_reg = this;
19413 else if (vcall_offset)
19414 {
19415 /* Put the this parameter into %eax. */
19416 xops[0] = this;
19417 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19418 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19419 }
19420 else
19421 this_reg = NULL_RTX;
19422
19423 /* Adjust the this parameter by a fixed constant. */
19424 if (delta)
19425 {
19426 xops[0] = GEN_INT (delta);
19427 xops[1] = this_reg ? this_reg : this;
19428 if (TARGET_64BIT)
19429 {
19430 if (!x86_64_general_operand (xops[0], DImode))
19431 {
19432 tmp = gen_rtx_REG (DImode, R10_REG);
19433 xops[1] = tmp;
19434 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19435 xops[0] = tmp;
19436 xops[1] = this;
19437 }
19438 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19439 }
19440 else
19441 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19442 }
19443
19444 /* Adjust the this parameter by a value stored in the vtable. */
19445 if (vcall_offset)
19446 {
19447 if (TARGET_64BIT)
19448 tmp = gen_rtx_REG (DImode, R10_REG);
19449 else
19450 {
19451 int tmp_regno = 2 /* ECX */;
19452 if (lookup_attribute ("fastcall",
19453 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19454 tmp_regno = 0 /* EAX */;
19455 tmp = gen_rtx_REG (SImode, tmp_regno);
19456 }
19457
19458 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19459 xops[1] = tmp;
19460 if (TARGET_64BIT)
19461 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19462 else
19463 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19464
19465 /* Adjust the this parameter. */
19466 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19467 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19468 {
19469 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19470 xops[0] = GEN_INT (vcall_offset);
19471 xops[1] = tmp2;
19472 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19473 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19474 }
19475 xops[1] = this_reg;
19476 if (TARGET_64BIT)
19477 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19478 else
19479 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19480 }
19481
19482 /* If necessary, drop THIS back to its stack slot. */
19483 if (this_reg && this_reg != this)
19484 {
19485 xops[0] = this_reg;
19486 xops[1] = this;
19487 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19488 }
19489
19490 xops[0] = XEXP (DECL_RTL (function), 0);
19491 if (TARGET_64BIT)
19492 {
19493 if (!flag_pic || (*targetm.binds_local_p) (function))
19494 output_asm_insn ("jmp\t%P0", xops);
19495 else
19496 {
19497 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19498 tmp = gen_rtx_CONST (Pmode, tmp);
19499 tmp = gen_rtx_MEM (QImode, tmp);
19500 xops[0] = tmp;
19501 output_asm_insn ("jmp\t%A0", xops);
19502 }
19503 }
19504 else
19505 {
19506 if (!flag_pic || (*targetm.binds_local_p) (function))
19507 output_asm_insn ("jmp\t%P0", xops);
19508 else
19509 #if TARGET_MACHO
19510 if (TARGET_MACHO)
19511 {
19512 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19513 tmp = (gen_rtx_SYMBOL_REF
19514 (Pmode,
19515 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19516 tmp = gen_rtx_MEM (QImode, tmp);
19517 xops[0] = tmp;
19518 output_asm_insn ("jmp\t%0", xops);
19519 }
19520 else
19521 #endif /* TARGET_MACHO */
19522 {
19523 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19524 output_set_got (tmp, NULL_RTX);
19525
19526 xops[1] = tmp;
19527 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19528 output_asm_insn ("jmp\t{*}%1", xops);
19529 }
19530 }
19531 }
19532
19533 static void
19534 x86_file_start (void)
19535 {
19536 default_file_start ();
19537 #if TARGET_MACHO
19538 darwin_file_start ();
19539 #endif
19540 if (X86_FILE_START_VERSION_DIRECTIVE)
19541 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19542 if (X86_FILE_START_FLTUSED)
19543 fputs ("\t.global\t__fltused\n", asm_out_file);
19544 if (ix86_asm_dialect == ASM_INTEL)
19545 fputs ("\t.intel_syntax\n", asm_out_file);
19546 }
19547
19548 int
19549 x86_field_alignment (tree field, int computed)
19550 {
19551 enum machine_mode mode;
19552 tree type = TREE_TYPE (field);
19553
19554 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19555 return computed;
19556 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19557 ? get_inner_array_type (type) : type);
19558 if (mode == DFmode || mode == DCmode
19559 || GET_MODE_CLASS (mode) == MODE_INT
19560 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19561 return MIN (32, computed);
19562 return computed;
19563 }
19564
19565 /* Output assembler code to FILE to increment profiler label # LABELNO
19566 for profiling a function entry. */
19567 void
19568 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19569 {
19570 if (TARGET_64BIT)
19571 if (flag_pic)
19572 {
19573 #ifndef NO_PROFILE_COUNTERS
19574 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19575 #endif
19576 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19577 }
19578 else
19579 {
19580 #ifndef NO_PROFILE_COUNTERS
19581 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19582 #endif
19583 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19584 }
19585 else if (flag_pic)
19586 {
19587 #ifndef NO_PROFILE_COUNTERS
19588 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19589 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19590 #endif
19591 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19592 }
19593 else
19594 {
19595 #ifndef NO_PROFILE_COUNTERS
19596 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19597 PROFILE_COUNT_REGISTER);
19598 #endif
19599 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19600 }
19601 }
19602
19603 /* We don't have exact information about the insn sizes, but we may assume
19604 quite safely that we are informed about all 1 byte insns and memory
19605 address sizes. This is enough to eliminate unnecessary padding in
19606 99% of cases. */
19607
19608 static int
19609 min_insn_size (rtx insn)
19610 {
19611 int l = 0;
19612
19613 if (!INSN_P (insn) || !active_insn_p (insn))
19614 return 0;
19615
19616 /* Discard alignments we've emit and jump instructions. */
19617 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19618 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19619 return 0;
19620 if (JUMP_P (insn)
19621 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19622 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19623 return 0;
19624
19625 /* Important case - calls are always 5 bytes.
19626 It is common to have many calls in the row. */
19627 if (CALL_P (insn)
19628 && symbolic_reference_mentioned_p (PATTERN (insn))
19629 && !SIBLING_CALL_P (insn))
19630 return 5;
19631 if (get_attr_length (insn) <= 1)
19632 return 1;
19633
19634 /* For normal instructions we may rely on the sizes of addresses
19635 and the presence of symbol to require 4 bytes of encoding.
19636 This is not the case for jumps where references are PC relative. */
19637 if (!JUMP_P (insn))
19638 {
19639 l = get_attr_length_address (insn);
19640 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19641 l = 4;
19642 }
19643 if (l)
19644 return 1+l;
19645 else
19646 return 2;
19647 }
19648
19649 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19650 window. */
19651
19652 static void
19653 ix86_avoid_jump_misspredicts (void)
19654 {
19655 rtx insn, start = get_insns ();
19656 int nbytes = 0, njumps = 0;
19657 int isjump = 0;
19658
19659 /* Look for all minimal intervals of instructions containing 4 jumps.
19660 The intervals are bounded by START and INSN. NBYTES is the total
19661 size of instructions in the interval including INSN and not including
19662 START. When the NBYTES is smaller than 16 bytes, it is possible
19663 that the end of START and INSN ends up in the same 16byte page.
19664
19665 The smallest offset in the page INSN can start is the case where START
19666 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19667 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19668 */
19669 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19670 {
19671
19672 nbytes += min_insn_size (insn);
19673 if (dump_file)
19674 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19675 INSN_UID (insn), min_insn_size (insn));
19676 if ((JUMP_P (insn)
19677 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19678 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19679 || CALL_P (insn))
19680 njumps++;
19681 else
19682 continue;
19683
19684 while (njumps > 3)
19685 {
19686 start = NEXT_INSN (start);
19687 if ((JUMP_P (start)
19688 && GET_CODE (PATTERN (start)) != ADDR_VEC
19689 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19690 || CALL_P (start))
19691 njumps--, isjump = 1;
19692 else
19693 isjump = 0;
19694 nbytes -= min_insn_size (start);
19695 }
19696 gcc_assert (njumps >= 0);
19697 if (dump_file)
19698 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19699 INSN_UID (start), INSN_UID (insn), nbytes);
19700
19701 if (njumps == 3 && isjump && nbytes < 16)
19702 {
19703 int padsize = 15 - nbytes + min_insn_size (insn);
19704
19705 if (dump_file)
19706 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19707 INSN_UID (insn), padsize);
19708 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19709 }
19710 }
19711 }
19712
19713 /* AMD Athlon works faster
19714 when RET is not destination of conditional jump or directly preceded
19715 by other jump instruction. We avoid the penalty by inserting NOP just
19716 before the RET instructions in such cases. */
19717 static void
19718 ix86_pad_returns (void)
19719 {
19720 edge e;
19721 edge_iterator ei;
19722
19723 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19724 {
19725 basic_block bb = e->src;
19726 rtx ret = BB_END (bb);
19727 rtx prev;
19728 bool replace = false;
19729
19730 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19731 || !maybe_hot_bb_p (bb))
19732 continue;
19733 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19734 if (active_insn_p (prev) || LABEL_P (prev))
19735 break;
19736 if (prev && LABEL_P (prev))
19737 {
19738 edge e;
19739 edge_iterator ei;
19740
19741 FOR_EACH_EDGE (e, ei, bb->preds)
19742 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19743 && !(e->flags & EDGE_FALLTHRU))
19744 replace = true;
19745 }
19746 if (!replace)
19747 {
19748 prev = prev_active_insn (ret);
19749 if (prev
19750 && ((JUMP_P (prev) && any_condjump_p (prev))
19751 || CALL_P (prev)))
19752 replace = true;
19753 /* Empty functions get branch mispredict even when the jump destination
19754 is not visible to us. */
19755 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19756 replace = true;
19757 }
19758 if (replace)
19759 {
19760 emit_insn_before (gen_return_internal_long (), ret);
19761 delete_insn (ret);
19762 }
19763 }
19764 }
19765
19766 /* Implement machine specific optimizations. We implement padding of returns
19767 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19768 static void
19769 ix86_reorg (void)
19770 {
19771 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19772 ix86_pad_returns ();
19773 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19774 ix86_avoid_jump_misspredicts ();
19775 }
19776
19777 /* Return nonzero when QImode register that must be represented via REX prefix
19778 is used. */
19779 bool
19780 x86_extended_QIreg_mentioned_p (rtx insn)
19781 {
19782 int i;
19783 extract_insn_cached (insn);
19784 for (i = 0; i < recog_data.n_operands; i++)
19785 if (REG_P (recog_data.operand[i])
19786 && REGNO (recog_data.operand[i]) >= 4)
19787 return true;
19788 return false;
19789 }
19790
19791 /* Return nonzero when P points to register encoded via REX prefix.
19792 Called via for_each_rtx. */
19793 static int
19794 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19795 {
19796 unsigned int regno;
19797 if (!REG_P (*p))
19798 return 0;
19799 regno = REGNO (*p);
19800 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19801 }
19802
19803 /* Return true when INSN mentions register that must be encoded using REX
19804 prefix. */
19805 bool
19806 x86_extended_reg_mentioned_p (rtx insn)
19807 {
19808 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19809 }
19810
19811 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19812 optabs would emit if we didn't have TFmode patterns. */
19813
19814 void
19815 x86_emit_floatuns (rtx operands[2])
19816 {
19817 rtx neglab, donelab, i0, i1, f0, in, out;
19818 enum machine_mode mode, inmode;
19819
19820 inmode = GET_MODE (operands[1]);
19821 gcc_assert (inmode == SImode || inmode == DImode);
19822
19823 out = operands[0];
19824 in = force_reg (inmode, operands[1]);
19825 mode = GET_MODE (out);
19826 neglab = gen_label_rtx ();
19827 donelab = gen_label_rtx ();
19828 f0 = gen_reg_rtx (mode);
19829
19830 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
19831
19832 expand_float (out, in, 0);
19833
19834 emit_jump_insn (gen_jump (donelab));
19835 emit_barrier ();
19836
19837 emit_label (neglab);
19838
19839 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
19840 1, OPTAB_DIRECT);
19841 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
19842 1, OPTAB_DIRECT);
19843 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19844
19845 expand_float (f0, i0, 0);
19846
19847 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19848
19849 emit_label (donelab);
19850 }
19851 \f
19852 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19853 with all elements equal to VAR. Return true if successful. */
19854
19855 static bool
19856 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19857 rtx target, rtx val)
19858 {
19859 enum machine_mode smode, wsmode, wvmode;
19860 rtx x;
19861
19862 switch (mode)
19863 {
19864 case V2SImode:
19865 case V2SFmode:
19866 if (!mmx_ok)
19867 return false;
19868 /* FALLTHRU */
19869
19870 case V2DFmode:
19871 case V2DImode:
19872 case V4SFmode:
19873 case V4SImode:
19874 val = force_reg (GET_MODE_INNER (mode), val);
19875 x = gen_rtx_VEC_DUPLICATE (mode, val);
19876 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19877 return true;
19878
19879 case V4HImode:
19880 if (!mmx_ok)
19881 return false;
19882 if (TARGET_SSE || TARGET_3DNOW_A)
19883 {
19884 val = gen_lowpart (SImode, val);
19885 x = gen_rtx_TRUNCATE (HImode, val);
19886 x = gen_rtx_VEC_DUPLICATE (mode, x);
19887 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19888 return true;
19889 }
19890 else
19891 {
19892 smode = HImode;
19893 wsmode = SImode;
19894 wvmode = V2SImode;
19895 goto widen;
19896 }
19897
19898 case V8QImode:
19899 if (!mmx_ok)
19900 return false;
19901 smode = QImode;
19902 wsmode = HImode;
19903 wvmode = V4HImode;
19904 goto widen;
19905 case V8HImode:
19906 if (TARGET_SSE2)
19907 {
19908 rtx tmp1, tmp2;
19909 /* Extend HImode to SImode using a paradoxical SUBREG. */
19910 tmp1 = gen_reg_rtx (SImode);
19911 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19912 /* Insert the SImode value as low element of V4SImode vector. */
19913 tmp2 = gen_reg_rtx (V4SImode);
19914 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19915 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19916 CONST0_RTX (V4SImode),
19917 const1_rtx);
19918 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19919 /* Cast the V4SImode vector back to a V8HImode vector. */
19920 tmp1 = gen_reg_rtx (V8HImode);
19921 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19922 /* Duplicate the low short through the whole low SImode word. */
19923 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19924 /* Cast the V8HImode vector back to a V4SImode vector. */
19925 tmp2 = gen_reg_rtx (V4SImode);
19926 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19927 /* Replicate the low element of the V4SImode vector. */
19928 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19929 /* Cast the V2SImode back to V8HImode, and store in target. */
19930 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19931 return true;
19932 }
19933 smode = HImode;
19934 wsmode = SImode;
19935 wvmode = V4SImode;
19936 goto widen;
19937 case V16QImode:
19938 if (TARGET_SSE2)
19939 {
19940 rtx tmp1, tmp2;
19941 /* Extend QImode to SImode using a paradoxical SUBREG. */
19942 tmp1 = gen_reg_rtx (SImode);
19943 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19944 /* Insert the SImode value as low element of V4SImode vector. */
19945 tmp2 = gen_reg_rtx (V4SImode);
19946 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19947 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19948 CONST0_RTX (V4SImode),
19949 const1_rtx);
19950 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19951 /* Cast the V4SImode vector back to a V16QImode vector. */
19952 tmp1 = gen_reg_rtx (V16QImode);
19953 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19954 /* Duplicate the low byte through the whole low SImode word. */
19955 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19956 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19957 /* Cast the V16QImode vector back to a V4SImode vector. */
19958 tmp2 = gen_reg_rtx (V4SImode);
19959 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19960 /* Replicate the low element of the V4SImode vector. */
19961 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19962 /* Cast the V2SImode back to V16QImode, and store in target. */
19963 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
19964 return true;
19965 }
19966 smode = QImode;
19967 wsmode = HImode;
19968 wvmode = V8HImode;
19969 goto widen;
19970 widen:
19971 /* Replicate the value once into the next wider mode and recurse. */
19972 val = convert_modes (wsmode, smode, val, true);
19973 x = expand_simple_binop (wsmode, ASHIFT, val,
19974 GEN_INT (GET_MODE_BITSIZE (smode)),
19975 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19976 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
19977
19978 x = gen_reg_rtx (wvmode);
19979 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
19980 gcc_unreachable ();
19981 emit_move_insn (target, gen_lowpart (mode, x));
19982 return true;
19983
19984 default:
19985 return false;
19986 }
19987 }
19988
19989 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19990 whose ONE_VAR element is VAR, and other elements are zero. Return true
19991 if successful. */
19992
19993 static bool
19994 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
19995 rtx target, rtx var, int one_var)
19996 {
19997 enum machine_mode vsimode;
19998 rtx new_target;
19999 rtx x, tmp;
20000
20001 switch (mode)
20002 {
20003 case V2SFmode:
20004 case V2SImode:
20005 if (!mmx_ok)
20006 return false;
20007 /* FALLTHRU */
20008
20009 case V2DFmode:
20010 case V2DImode:
20011 if (one_var != 0)
20012 return false;
20013 var = force_reg (GET_MODE_INNER (mode), var);
20014 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20015 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20016 return true;
20017
20018 case V4SFmode:
20019 case V4SImode:
20020 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20021 new_target = gen_reg_rtx (mode);
20022 else
20023 new_target = target;
20024 var = force_reg (GET_MODE_INNER (mode), var);
20025 x = gen_rtx_VEC_DUPLICATE (mode, var);
20026 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20027 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20028 if (one_var != 0)
20029 {
20030 /* We need to shuffle the value to the correct position, so
20031 create a new pseudo to store the intermediate result. */
20032
20033 /* With SSE2, we can use the integer shuffle insns. */
20034 if (mode != V4SFmode && TARGET_SSE2)
20035 {
20036 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20037 GEN_INT (1),
20038 GEN_INT (one_var == 1 ? 0 : 1),
20039 GEN_INT (one_var == 2 ? 0 : 1),
20040 GEN_INT (one_var == 3 ? 0 : 1)));
20041 if (target != new_target)
20042 emit_move_insn (target, new_target);
20043 return true;
20044 }
20045
20046 /* Otherwise convert the intermediate result to V4SFmode and
20047 use the SSE1 shuffle instructions. */
20048 if (mode != V4SFmode)
20049 {
20050 tmp = gen_reg_rtx (V4SFmode);
20051 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20052 }
20053 else
20054 tmp = new_target;
20055
20056 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20057 GEN_INT (1),
20058 GEN_INT (one_var == 1 ? 0 : 1),
20059 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20060 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20061
20062 if (mode != V4SFmode)
20063 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20064 else if (tmp != target)
20065 emit_move_insn (target, tmp);
20066 }
20067 else if (target != new_target)
20068 emit_move_insn (target, new_target);
20069 return true;
20070
20071 case V8HImode:
20072 case V16QImode:
20073 vsimode = V4SImode;
20074 goto widen;
20075 case V4HImode:
20076 case V8QImode:
20077 if (!mmx_ok)
20078 return false;
20079 vsimode = V2SImode;
20080 goto widen;
20081 widen:
20082 if (one_var != 0)
20083 return false;
20084
20085 /* Zero extend the variable element to SImode and recurse. */
20086 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20087
20088 x = gen_reg_rtx (vsimode);
20089 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20090 var, one_var))
20091 gcc_unreachable ();
20092
20093 emit_move_insn (target, gen_lowpart (mode, x));
20094 return true;
20095
20096 default:
20097 return false;
20098 }
20099 }
20100
20101 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20102 consisting of the values in VALS. It is known that all elements
20103 except ONE_VAR are constants. Return true if successful. */
20104
20105 static bool
20106 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20107 rtx target, rtx vals, int one_var)
20108 {
20109 rtx var = XVECEXP (vals, 0, one_var);
20110 enum machine_mode wmode;
20111 rtx const_vec, x;
20112
20113 const_vec = copy_rtx (vals);
20114 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20115 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20116
20117 switch (mode)
20118 {
20119 case V2DFmode:
20120 case V2DImode:
20121 case V2SFmode:
20122 case V2SImode:
20123 /* For the two element vectors, it's just as easy to use
20124 the general case. */
20125 return false;
20126
20127 case V4SFmode:
20128 case V4SImode:
20129 case V8HImode:
20130 case V4HImode:
20131 break;
20132
20133 case V16QImode:
20134 wmode = V8HImode;
20135 goto widen;
20136 case V8QImode:
20137 wmode = V4HImode;
20138 goto widen;
20139 widen:
20140 /* There's no way to set one QImode entry easily. Combine
20141 the variable value with its adjacent constant value, and
20142 promote to an HImode set. */
20143 x = XVECEXP (vals, 0, one_var ^ 1);
20144 if (one_var & 1)
20145 {
20146 var = convert_modes (HImode, QImode, var, true);
20147 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20148 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20149 x = GEN_INT (INTVAL (x) & 0xff);
20150 }
20151 else
20152 {
20153 var = convert_modes (HImode, QImode, var, true);
20154 x = gen_int_mode (INTVAL (x) << 8, HImode);
20155 }
20156 if (x != const0_rtx)
20157 var = expand_simple_binop (HImode, IOR, var, x, var,
20158 1, OPTAB_LIB_WIDEN);
20159
20160 x = gen_reg_rtx (wmode);
20161 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20162 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20163
20164 emit_move_insn (target, gen_lowpart (mode, x));
20165 return true;
20166
20167 default:
20168 return false;
20169 }
20170
20171 emit_move_insn (target, const_vec);
20172 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20173 return true;
20174 }
20175
20176 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20177 all values variable, and none identical. */
20178
20179 static void
20180 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20181 rtx target, rtx vals)
20182 {
20183 enum machine_mode half_mode = GET_MODE_INNER (mode);
20184 rtx op0 = NULL, op1 = NULL;
20185 bool use_vec_concat = false;
20186
20187 switch (mode)
20188 {
20189 case V2SFmode:
20190 case V2SImode:
20191 if (!mmx_ok && !TARGET_SSE)
20192 break;
20193 /* FALLTHRU */
20194
20195 case V2DFmode:
20196 case V2DImode:
20197 /* For the two element vectors, we always implement VEC_CONCAT. */
20198 op0 = XVECEXP (vals, 0, 0);
20199 op1 = XVECEXP (vals, 0, 1);
20200 use_vec_concat = true;
20201 break;
20202
20203 case V4SFmode:
20204 half_mode = V2SFmode;
20205 goto half;
20206 case V4SImode:
20207 half_mode = V2SImode;
20208 goto half;
20209 half:
20210 {
20211 rtvec v;
20212
20213 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20214 Recurse to load the two halves. */
20215
20216 op0 = gen_reg_rtx (half_mode);
20217 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20218 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20219
20220 op1 = gen_reg_rtx (half_mode);
20221 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20222 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20223
20224 use_vec_concat = true;
20225 }
20226 break;
20227
20228 case V8HImode:
20229 case V16QImode:
20230 case V4HImode:
20231 case V8QImode:
20232 break;
20233
20234 default:
20235 gcc_unreachable ();
20236 }
20237
20238 if (use_vec_concat)
20239 {
20240 if (!register_operand (op0, half_mode))
20241 op0 = force_reg (half_mode, op0);
20242 if (!register_operand (op1, half_mode))
20243 op1 = force_reg (half_mode, op1);
20244
20245 emit_insn (gen_rtx_SET (VOIDmode, target,
20246 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20247 }
20248 else
20249 {
20250 int i, j, n_elts, n_words, n_elt_per_word;
20251 enum machine_mode inner_mode;
20252 rtx words[4], shift;
20253
20254 inner_mode = GET_MODE_INNER (mode);
20255 n_elts = GET_MODE_NUNITS (mode);
20256 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20257 n_elt_per_word = n_elts / n_words;
20258 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20259
20260 for (i = 0; i < n_words; ++i)
20261 {
20262 rtx word = NULL_RTX;
20263
20264 for (j = 0; j < n_elt_per_word; ++j)
20265 {
20266 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20267 elt = convert_modes (word_mode, inner_mode, elt, true);
20268
20269 if (j == 0)
20270 word = elt;
20271 else
20272 {
20273 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20274 word, 1, OPTAB_LIB_WIDEN);
20275 word = expand_simple_binop (word_mode, IOR, word, elt,
20276 word, 1, OPTAB_LIB_WIDEN);
20277 }
20278 }
20279
20280 words[i] = word;
20281 }
20282
20283 if (n_words == 1)
20284 emit_move_insn (target, gen_lowpart (mode, words[0]));
20285 else if (n_words == 2)
20286 {
20287 rtx tmp = gen_reg_rtx (mode);
20288 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20289 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20290 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20291 emit_move_insn (target, tmp);
20292 }
20293 else if (n_words == 4)
20294 {
20295 rtx tmp = gen_reg_rtx (V4SImode);
20296 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20297 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20298 emit_move_insn (target, gen_lowpart (mode, tmp));
20299 }
20300 else
20301 gcc_unreachable ();
20302 }
20303 }
20304
20305 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20306 instructions unless MMX_OK is true. */
20307
20308 void
20309 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20310 {
20311 enum machine_mode mode = GET_MODE (target);
20312 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20313 int n_elts = GET_MODE_NUNITS (mode);
20314 int n_var = 0, one_var = -1;
20315 bool all_same = true, all_const_zero = true;
20316 int i;
20317 rtx x;
20318
20319 for (i = 0; i < n_elts; ++i)
20320 {
20321 x = XVECEXP (vals, 0, i);
20322 if (!CONSTANT_P (x))
20323 n_var++, one_var = i;
20324 else if (x != CONST0_RTX (inner_mode))
20325 all_const_zero = false;
20326 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20327 all_same = false;
20328 }
20329
20330 /* Constants are best loaded from the constant pool. */
20331 if (n_var == 0)
20332 {
20333 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20334 return;
20335 }
20336
20337 /* If all values are identical, broadcast the value. */
20338 if (all_same
20339 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20340 XVECEXP (vals, 0, 0)))
20341 return;
20342
20343 /* Values where only one field is non-constant are best loaded from
20344 the pool and overwritten via move later. */
20345 if (n_var == 1)
20346 {
20347 if (all_const_zero
20348 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20349 XVECEXP (vals, 0, one_var),
20350 one_var))
20351 return;
20352
20353 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20354 return;
20355 }
20356
20357 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20358 }
20359
20360 void
20361 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20362 {
20363 enum machine_mode mode = GET_MODE (target);
20364 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20365 bool use_vec_merge = false;
20366 rtx tmp;
20367
20368 switch (mode)
20369 {
20370 case V2SFmode:
20371 case V2SImode:
20372 if (mmx_ok)
20373 {
20374 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20375 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20376 if (elt == 0)
20377 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20378 else
20379 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20380 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20381 return;
20382 }
20383 break;
20384
20385 case V2DFmode:
20386 case V2DImode:
20387 {
20388 rtx op0, op1;
20389
20390 /* For the two element vectors, we implement a VEC_CONCAT with
20391 the extraction of the other element. */
20392
20393 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20394 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20395
20396 if (elt == 0)
20397 op0 = val, op1 = tmp;
20398 else
20399 op0 = tmp, op1 = val;
20400
20401 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20402 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20403 }
20404 return;
20405
20406 case V4SFmode:
20407 switch (elt)
20408 {
20409 case 0:
20410 use_vec_merge = true;
20411 break;
20412
20413 case 1:
20414 /* tmp = target = A B C D */
20415 tmp = copy_to_reg (target);
20416 /* target = A A B B */
20417 emit_insn (gen_sse_unpcklps (target, target, target));
20418 /* target = X A B B */
20419 ix86_expand_vector_set (false, target, val, 0);
20420 /* target = A X C D */
20421 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20422 GEN_INT (1), GEN_INT (0),
20423 GEN_INT (2+4), GEN_INT (3+4)));
20424 return;
20425
20426 case 2:
20427 /* tmp = target = A B C D */
20428 tmp = copy_to_reg (target);
20429 /* tmp = X B C D */
20430 ix86_expand_vector_set (false, tmp, val, 0);
20431 /* target = A B X D */
20432 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20433 GEN_INT (0), GEN_INT (1),
20434 GEN_INT (0+4), GEN_INT (3+4)));
20435 return;
20436
20437 case 3:
20438 /* tmp = target = A B C D */
20439 tmp = copy_to_reg (target);
20440 /* tmp = X B C D */
20441 ix86_expand_vector_set (false, tmp, val, 0);
20442 /* target = A B X D */
20443 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20444 GEN_INT (0), GEN_INT (1),
20445 GEN_INT (2+4), GEN_INT (0+4)));
20446 return;
20447
20448 default:
20449 gcc_unreachable ();
20450 }
20451 break;
20452
20453 case V4SImode:
20454 /* Element 0 handled by vec_merge below. */
20455 if (elt == 0)
20456 {
20457 use_vec_merge = true;
20458 break;
20459 }
20460
20461 if (TARGET_SSE2)
20462 {
20463 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20464 store into element 0, then shuffle them back. */
20465
20466 rtx order[4];
20467
20468 order[0] = GEN_INT (elt);
20469 order[1] = const1_rtx;
20470 order[2] = const2_rtx;
20471 order[3] = GEN_INT (3);
20472 order[elt] = const0_rtx;
20473
20474 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20475 order[1], order[2], order[3]));
20476
20477 ix86_expand_vector_set (false, target, val, 0);
20478
20479 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20480 order[1], order[2], order[3]));
20481 }
20482 else
20483 {
20484 /* For SSE1, we have to reuse the V4SF code. */
20485 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20486 gen_lowpart (SFmode, val), elt);
20487 }
20488 return;
20489
20490 case V8HImode:
20491 use_vec_merge = TARGET_SSE2;
20492 break;
20493 case V4HImode:
20494 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20495 break;
20496
20497 case V16QImode:
20498 case V8QImode:
20499 default:
20500 break;
20501 }
20502
20503 if (use_vec_merge)
20504 {
20505 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20506 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20507 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20508 }
20509 else
20510 {
20511 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20512
20513 emit_move_insn (mem, target);
20514
20515 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20516 emit_move_insn (tmp, val);
20517
20518 emit_move_insn (target, mem);
20519 }
20520 }
20521
20522 void
20523 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20524 {
20525 enum machine_mode mode = GET_MODE (vec);
20526 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20527 bool use_vec_extr = false;
20528 rtx tmp;
20529
20530 switch (mode)
20531 {
20532 case V2SImode:
20533 case V2SFmode:
20534 if (!mmx_ok)
20535 break;
20536 /* FALLTHRU */
20537
20538 case V2DFmode:
20539 case V2DImode:
20540 use_vec_extr = true;
20541 break;
20542
20543 case V4SFmode:
20544 switch (elt)
20545 {
20546 case 0:
20547 tmp = vec;
20548 break;
20549
20550 case 1:
20551 case 3:
20552 tmp = gen_reg_rtx (mode);
20553 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20554 GEN_INT (elt), GEN_INT (elt),
20555 GEN_INT (elt+4), GEN_INT (elt+4)));
20556 break;
20557
20558 case 2:
20559 tmp = gen_reg_rtx (mode);
20560 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20561 break;
20562
20563 default:
20564 gcc_unreachable ();
20565 }
20566 vec = tmp;
20567 use_vec_extr = true;
20568 elt = 0;
20569 break;
20570
20571 case V4SImode:
20572 if (TARGET_SSE2)
20573 {
20574 switch (elt)
20575 {
20576 case 0:
20577 tmp = vec;
20578 break;
20579
20580 case 1:
20581 case 3:
20582 tmp = gen_reg_rtx (mode);
20583 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20584 GEN_INT (elt), GEN_INT (elt),
20585 GEN_INT (elt), GEN_INT (elt)));
20586 break;
20587
20588 case 2:
20589 tmp = gen_reg_rtx (mode);
20590 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20591 break;
20592
20593 default:
20594 gcc_unreachable ();
20595 }
20596 vec = tmp;
20597 use_vec_extr = true;
20598 elt = 0;
20599 }
20600 else
20601 {
20602 /* For SSE1, we have to reuse the V4SF code. */
20603 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20604 gen_lowpart (V4SFmode, vec), elt);
20605 return;
20606 }
20607 break;
20608
20609 case V8HImode:
20610 use_vec_extr = TARGET_SSE2;
20611 break;
20612 case V4HImode:
20613 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20614 break;
20615
20616 case V16QImode:
20617 case V8QImode:
20618 /* ??? Could extract the appropriate HImode element and shift. */
20619 default:
20620 break;
20621 }
20622
20623 if (use_vec_extr)
20624 {
20625 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20626 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20627
20628 /* Let the rtl optimizers know about the zero extension performed. */
20629 if (inner_mode == HImode)
20630 {
20631 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20632 target = gen_lowpart (SImode, target);
20633 }
20634
20635 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20636 }
20637 else
20638 {
20639 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20640
20641 emit_move_insn (mem, vec);
20642
20643 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20644 emit_move_insn (target, tmp);
20645 }
20646 }
20647
20648 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20649 pattern to reduce; DEST is the destination; IN is the input vector. */
20650
20651 void
20652 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20653 {
20654 rtx tmp1, tmp2, tmp3;
20655
20656 tmp1 = gen_reg_rtx (V4SFmode);
20657 tmp2 = gen_reg_rtx (V4SFmode);
20658 tmp3 = gen_reg_rtx (V4SFmode);
20659
20660 emit_insn (gen_sse_movhlps (tmp1, in, in));
20661 emit_insn (fn (tmp2, tmp1, in));
20662
20663 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20664 GEN_INT (1), GEN_INT (1),
20665 GEN_INT (1+4), GEN_INT (1+4)));
20666 emit_insn (fn (dest, tmp2, tmp3));
20667 }
20668 \f
20669 /* Target hook for scalar_mode_supported_p. */
20670 static bool
20671 ix86_scalar_mode_supported_p (enum machine_mode mode)
20672 {
20673 if (DECIMAL_FLOAT_MODE_P (mode))
20674 return true;
20675 else
20676 return default_scalar_mode_supported_p (mode);
20677 }
20678
20679 /* Implements target hook vector_mode_supported_p. */
20680 static bool
20681 ix86_vector_mode_supported_p (enum machine_mode mode)
20682 {
20683 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20684 return true;
20685 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20686 return true;
20687 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20688 return true;
20689 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20690 return true;
20691 return false;
20692 }
20693
20694 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20695
20696 We do this in the new i386 backend to maintain source compatibility
20697 with the old cc0-based compiler. */
20698
20699 static tree
20700 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20701 tree inputs ATTRIBUTE_UNUSED,
20702 tree clobbers)
20703 {
20704 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20705 clobbers);
20706 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20707 clobbers);
20708 return clobbers;
20709 }
20710
20711 /* Return true if this goes in small data/bss. */
20712
20713 static bool
20714 ix86_in_large_data_p (tree exp)
20715 {
20716 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20717 return false;
20718
20719 /* Functions are never large data. */
20720 if (TREE_CODE (exp) == FUNCTION_DECL)
20721 return false;
20722
20723 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20724 {
20725 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20726 if (strcmp (section, ".ldata") == 0
20727 || strcmp (section, ".lbss") == 0)
20728 return true;
20729 return false;
20730 }
20731 else
20732 {
20733 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20734
20735 /* If this is an incomplete type with size 0, then we can't put it
20736 in data because it might be too big when completed. */
20737 if (!size || size > ix86_section_threshold)
20738 return true;
20739 }
20740
20741 return false;
20742 }
20743 static void
20744 ix86_encode_section_info (tree decl, rtx rtl, int first)
20745 {
20746 default_encode_section_info (decl, rtl, first);
20747
20748 if (TREE_CODE (decl) == VAR_DECL
20749 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20750 && ix86_in_large_data_p (decl))
20751 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20752 }
20753
20754 /* Worker function for REVERSE_CONDITION. */
20755
20756 enum rtx_code
20757 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20758 {
20759 return (mode != CCFPmode && mode != CCFPUmode
20760 ? reverse_condition (code)
20761 : reverse_condition_maybe_unordered (code));
20762 }
20763
20764 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20765 to OPERANDS[0]. */
20766
20767 const char *
20768 output_387_reg_move (rtx insn, rtx *operands)
20769 {
20770 if (REG_P (operands[1])
20771 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20772 {
20773 if (REGNO (operands[0]) == FIRST_STACK_REG)
20774 return output_387_ffreep (operands, 0);
20775 return "fstp\t%y0";
20776 }
20777 if (STACK_TOP_P (operands[0]))
20778 return "fld%z1\t%y1";
20779 return "fst\t%y0";
20780 }
20781
20782 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20783 FP status register is set. */
20784
20785 void
20786 ix86_emit_fp_unordered_jump (rtx label)
20787 {
20788 rtx reg = gen_reg_rtx (HImode);
20789 rtx temp;
20790
20791 emit_insn (gen_x86_fnstsw_1 (reg));
20792
20793 if (TARGET_USE_SAHF)
20794 {
20795 emit_insn (gen_x86_sahf_1 (reg));
20796
20797 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20798 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20799 }
20800 else
20801 {
20802 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20803
20804 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20805 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20806 }
20807
20808 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20809 gen_rtx_LABEL_REF (VOIDmode, label),
20810 pc_rtx);
20811 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20812 emit_jump_insn (temp);
20813 }
20814
20815 /* Output code to perform a log1p XFmode calculation. */
20816
20817 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20818 {
20819 rtx label1 = gen_label_rtx ();
20820 rtx label2 = gen_label_rtx ();
20821
20822 rtx tmp = gen_reg_rtx (XFmode);
20823 rtx tmp2 = gen_reg_rtx (XFmode);
20824
20825 emit_insn (gen_absxf2 (tmp, op1));
20826 emit_insn (gen_cmpxf (tmp,
20827 CONST_DOUBLE_FROM_REAL_VALUE (
20828 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20829 XFmode)));
20830 emit_jump_insn (gen_bge (label1));
20831
20832 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20833 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20834 emit_jump (label2);
20835
20836 emit_label (label1);
20837 emit_move_insn (tmp, CONST1_RTX (XFmode));
20838 emit_insn (gen_addxf3 (tmp, op1, tmp));
20839 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20840 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20841
20842 emit_label (label2);
20843 }
20844
20845 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20846
20847 static void
20848 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20849 tree decl)
20850 {
20851 /* With Binutils 2.15, the "@unwind" marker must be specified on
20852 every occurrence of the ".eh_frame" section, not just the first
20853 one. */
20854 if (TARGET_64BIT
20855 && strcmp (name, ".eh_frame") == 0)
20856 {
20857 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20858 flags & SECTION_WRITE ? "aw" : "a");
20859 return;
20860 }
20861 default_elf_asm_named_section (name, flags, decl);
20862 }
20863
20864 /* Return the mangling of TYPE if it is an extended fundamental type. */
20865
20866 static const char *
20867 ix86_mangle_fundamental_type (tree type)
20868 {
20869 switch (TYPE_MODE (type))
20870 {
20871 case TFmode:
20872 /* __float128 is "g". */
20873 return "g";
20874 case XFmode:
20875 /* "long double" or __float80 is "e". */
20876 return "e";
20877 default:
20878 return NULL;
20879 }
20880 }
20881
20882 /* For 32-bit code we can save PIC register setup by using
20883 __stack_chk_fail_local hidden function instead of calling
20884 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20885 register, so it is better to call __stack_chk_fail directly. */
20886
20887 static tree
20888 ix86_stack_protect_fail (void)
20889 {
20890 return TARGET_64BIT
20891 ? default_external_stack_protect_fail ()
20892 : default_hidden_stack_protect_fail ();
20893 }
20894
20895 /* Select a format to encode pointers in exception handling data. CODE
20896 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20897 true if the symbol may be affected by dynamic relocations.
20898
20899 ??? All x86 object file formats are capable of representing this.
20900 After all, the relocation needed is the same as for the call insn.
20901 Whether or not a particular assembler allows us to enter such, I
20902 guess we'll have to see. */
20903 int
20904 asm_preferred_eh_data_format (int code, int global)
20905 {
20906 if (flag_pic)
20907 {
20908 int type = DW_EH_PE_sdata8;
20909 if (!TARGET_64BIT
20910 || ix86_cmodel == CM_SMALL_PIC
20911 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20912 type = DW_EH_PE_sdata4;
20913 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20914 }
20915 if (ix86_cmodel == CM_SMALL
20916 || (ix86_cmodel == CM_MEDIUM && code))
20917 return DW_EH_PE_udata4;
20918 return DW_EH_PE_absptr;
20919 }
20920 \f
20921 /* Expand copysign from SIGN to the positive value ABS_VALUE
20922 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20923 the sign-bit. */
20924 static void
20925 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20926 {
20927 enum machine_mode mode = GET_MODE (sign);
20928 rtx sgn = gen_reg_rtx (mode);
20929 if (mask == NULL_RTX)
20930 {
20931 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20932 if (!VECTOR_MODE_P (mode))
20933 {
20934 /* We need to generate a scalar mode mask in this case. */
20935 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20936 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20937 mask = gen_reg_rtx (mode);
20938 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20939 }
20940 }
20941 else
20942 mask = gen_rtx_NOT (mode, mask);
20943 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20944 gen_rtx_AND (mode, mask, sign)));
20945 emit_insn (gen_rtx_SET (VOIDmode, result,
20946 gen_rtx_IOR (mode, abs_value, sgn)));
20947 }
20948
20949 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20950 mask for masking out the sign-bit is stored in *SMASK, if that is
20951 non-null. */
20952 static rtx
20953 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20954 {
20955 enum machine_mode mode = GET_MODE (op0);
20956 rtx xa, mask;
20957
20958 xa = gen_reg_rtx (mode);
20959 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20960 if (!VECTOR_MODE_P (mode))
20961 {
20962 /* We need to generate a scalar mode mask in this case. */
20963 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20964 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20965 mask = gen_reg_rtx (mode);
20966 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20967 }
20968 emit_insn (gen_rtx_SET (VOIDmode, xa,
20969 gen_rtx_AND (mode, op0, mask)));
20970
20971 if (smask)
20972 *smask = mask;
20973
20974 return xa;
20975 }
20976
20977 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20978 swapping the operands if SWAP_OPERANDS is true. The expanded
20979 code is a forward jump to a newly created label in case the
20980 comparison is true. The generated label rtx is returned. */
20981 static rtx
20982 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
20983 bool swap_operands)
20984 {
20985 rtx label, tmp;
20986
20987 if (swap_operands)
20988 {
20989 tmp = op0;
20990 op0 = op1;
20991 op1 = tmp;
20992 }
20993
20994 label = gen_label_rtx ();
20995 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
20996 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20997 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
20998 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
20999 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21000 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21001 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21002 JUMP_LABEL (tmp) = label;
21003
21004 return label;
21005 }
21006
21007 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21008 using comparison code CODE. Operands are swapped for the comparison if
21009 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21010 static rtx
21011 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21012 bool swap_operands)
21013 {
21014 enum machine_mode mode = GET_MODE (op0);
21015 rtx mask = gen_reg_rtx (mode);
21016
21017 if (swap_operands)
21018 {
21019 rtx tmp = op0;
21020 op0 = op1;
21021 op1 = tmp;
21022 }
21023
21024 if (mode == DFmode)
21025 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21026 gen_rtx_fmt_ee (code, mode, op0, op1)));
21027 else
21028 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21029 gen_rtx_fmt_ee (code, mode, op0, op1)));
21030
21031 return mask;
21032 }
21033
21034 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21035 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21036 static rtx
21037 ix86_gen_TWO52 (enum machine_mode mode)
21038 {
21039 REAL_VALUE_TYPE TWO52r;
21040 rtx TWO52;
21041
21042 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21043 TWO52 = const_double_from_real_value (TWO52r, mode);
21044 TWO52 = force_reg (mode, TWO52);
21045
21046 return TWO52;
21047 }
21048
21049 /* Expand SSE sequence for computing lround from OP1 storing
21050 into OP0. */
21051 void
21052 ix86_expand_lround (rtx op0, rtx op1)
21053 {
21054 /* C code for the stuff we're doing below:
21055 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21056 return (long)tmp;
21057 */
21058 enum machine_mode mode = GET_MODE (op1);
21059 const struct real_format *fmt;
21060 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21061 rtx adj;
21062
21063 /* load nextafter (0.5, 0.0) */
21064 fmt = REAL_MODE_FORMAT (mode);
21065 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21066 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21067
21068 /* adj = copysign (0.5, op1) */
21069 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21070 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21071
21072 /* adj = op1 + adj */
21073 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21074
21075 /* op0 = (imode)adj */
21076 expand_fix (op0, adj, 0);
21077 }
21078
21079 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21080 into OPERAND0. */
21081 void
21082 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21083 {
21084 /* C code for the stuff we're doing below (for do_floor):
21085 xi = (long)op1;
21086 xi -= (double)xi > op1 ? 1 : 0;
21087 return xi;
21088 */
21089 enum machine_mode fmode = GET_MODE (op1);
21090 enum machine_mode imode = GET_MODE (op0);
21091 rtx ireg, freg, label, tmp;
21092
21093 /* reg = (long)op1 */
21094 ireg = gen_reg_rtx (imode);
21095 expand_fix (ireg, op1, 0);
21096
21097 /* freg = (double)reg */
21098 freg = gen_reg_rtx (fmode);
21099 expand_float (freg, ireg, 0);
21100
21101 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21102 label = ix86_expand_sse_compare_and_jump (UNLE,
21103 freg, op1, !do_floor);
21104 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21105 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21106 emit_move_insn (ireg, tmp);
21107
21108 emit_label (label);
21109 LABEL_NUSES (label) = 1;
21110
21111 emit_move_insn (op0, ireg);
21112 }
21113
21114 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21115 result in OPERAND0. */
21116 void
21117 ix86_expand_rint (rtx operand0, rtx operand1)
21118 {
21119 /* C code for the stuff we're doing below:
21120 xa = fabs (operand1);
21121 if (!isless (xa, 2**52))
21122 return operand1;
21123 xa = xa + 2**52 - 2**52;
21124 return copysign (xa, operand1);
21125 */
21126 enum machine_mode mode = GET_MODE (operand0);
21127 rtx res, xa, label, TWO52, mask;
21128
21129 res = gen_reg_rtx (mode);
21130 emit_move_insn (res, operand1);
21131
21132 /* xa = abs (operand1) */
21133 xa = ix86_expand_sse_fabs (res, &mask);
21134
21135 /* if (!isless (xa, TWO52)) goto label; */
21136 TWO52 = ix86_gen_TWO52 (mode);
21137 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21138
21139 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21140 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21141
21142 ix86_sse_copysign_to_positive (res, xa, res, mask);
21143
21144 emit_label (label);
21145 LABEL_NUSES (label) = 1;
21146
21147 emit_move_insn (operand0, res);
21148 }
21149
21150 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21151 into OPERAND0. */
21152 void
21153 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21154 {
21155 /* C code for the stuff we expand below.
21156 double xa = fabs (x), x2;
21157 if (!isless (xa, TWO52))
21158 return x;
21159 xa = xa + TWO52 - TWO52;
21160 x2 = copysign (xa, x);
21161 Compensate. Floor:
21162 if (x2 > x)
21163 x2 -= 1;
21164 Compensate. Ceil:
21165 if (x2 < x)
21166 x2 -= -1;
21167 return x2;
21168 */
21169 enum machine_mode mode = GET_MODE (operand0);
21170 rtx xa, TWO52, tmp, label, one, res, mask;
21171
21172 TWO52 = ix86_gen_TWO52 (mode);
21173
21174 /* Temporary for holding the result, initialized to the input
21175 operand to ease control flow. */
21176 res = gen_reg_rtx (mode);
21177 emit_move_insn (res, operand1);
21178
21179 /* xa = abs (operand1) */
21180 xa = ix86_expand_sse_fabs (res, &mask);
21181
21182 /* if (!isless (xa, TWO52)) goto label; */
21183 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21184
21185 /* xa = xa + TWO52 - TWO52; */
21186 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21187 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21188
21189 /* xa = copysign (xa, operand1) */
21190 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21191
21192 /* generate 1.0 or -1.0 */
21193 one = force_reg (mode,
21194 const_double_from_real_value (do_floor
21195 ? dconst1 : dconstm1, mode));
21196
21197 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21198 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21199 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21200 gen_rtx_AND (mode, one, tmp)));
21201 /* We always need to subtract here to preserve signed zero. */
21202 tmp = expand_simple_binop (mode, MINUS,
21203 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21204 emit_move_insn (res, tmp);
21205
21206 emit_label (label);
21207 LABEL_NUSES (label) = 1;
21208
21209 emit_move_insn (operand0, res);
21210 }
21211
21212 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21213 into OPERAND0. */
21214 void
21215 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21216 {
21217 /* C code for the stuff we expand below.
21218 double xa = fabs (x), x2;
21219 if (!isless (xa, TWO52))
21220 return x;
21221 x2 = (double)(long)x;
21222 Compensate. Floor:
21223 if (x2 > x)
21224 x2 -= 1;
21225 Compensate. Ceil:
21226 if (x2 < x)
21227 x2 += 1;
21228 if (HONOR_SIGNED_ZEROS (mode))
21229 return copysign (x2, x);
21230 return x2;
21231 */
21232 enum machine_mode mode = GET_MODE (operand0);
21233 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21234
21235 TWO52 = ix86_gen_TWO52 (mode);
21236
21237 /* Temporary for holding the result, initialized to the input
21238 operand to ease control flow. */
21239 res = gen_reg_rtx (mode);
21240 emit_move_insn (res, operand1);
21241
21242 /* xa = abs (operand1) */
21243 xa = ix86_expand_sse_fabs (res, &mask);
21244
21245 /* if (!isless (xa, TWO52)) goto label; */
21246 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21247
21248 /* xa = (double)(long)x */
21249 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21250 expand_fix (xi, res, 0);
21251 expand_float (xa, xi, 0);
21252
21253 /* generate 1.0 */
21254 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21255
21256 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21257 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21258 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21259 gen_rtx_AND (mode, one, tmp)));
21260 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21261 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21262 emit_move_insn (res, tmp);
21263
21264 if (HONOR_SIGNED_ZEROS (mode))
21265 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21266
21267 emit_label (label);
21268 LABEL_NUSES (label) = 1;
21269
21270 emit_move_insn (operand0, res);
21271 }
21272
21273 /* Expand SSE sequence for computing round from OPERAND1 storing
21274 into OPERAND0. Sequence that works without relying on DImode truncation
21275 via cvttsd2siq that is only available on 64bit targets. */
21276 void
21277 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21278 {
21279 /* C code for the stuff we expand below.
21280 double xa = fabs (x), xa2, x2;
21281 if (!isless (xa, TWO52))
21282 return x;
21283 Using the absolute value and copying back sign makes
21284 -0.0 -> -0.0 correct.
21285 xa2 = xa + TWO52 - TWO52;
21286 Compensate.
21287 dxa = xa2 - xa;
21288 if (dxa <= -0.5)
21289 xa2 += 1;
21290 else if (dxa > 0.5)
21291 xa2 -= 1;
21292 x2 = copysign (xa2, x);
21293 return x2;
21294 */
21295 enum machine_mode mode = GET_MODE (operand0);
21296 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21297
21298 TWO52 = ix86_gen_TWO52 (mode);
21299
21300 /* Temporary for holding the result, initialized to the input
21301 operand to ease control flow. */
21302 res = gen_reg_rtx (mode);
21303 emit_move_insn (res, operand1);
21304
21305 /* xa = abs (operand1) */
21306 xa = ix86_expand_sse_fabs (res, &mask);
21307
21308 /* if (!isless (xa, TWO52)) goto label; */
21309 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21310
21311 /* xa2 = xa + TWO52 - TWO52; */
21312 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21313 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21314
21315 /* dxa = xa2 - xa; */
21316 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21317
21318 /* generate 0.5, 1.0 and -0.5 */
21319 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21320 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21321 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21322 0, OPTAB_DIRECT);
21323
21324 /* Compensate. */
21325 tmp = gen_reg_rtx (mode);
21326 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21327 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21328 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21329 gen_rtx_AND (mode, one, tmp)));
21330 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21331 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21332 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21333 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21334 gen_rtx_AND (mode, one, tmp)));
21335 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21336
21337 /* res = copysign (xa2, operand1) */
21338 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21339
21340 emit_label (label);
21341 LABEL_NUSES (label) = 1;
21342
21343 emit_move_insn (operand0, res);
21344 }
21345
21346 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21347 into OPERAND0. */
21348 void
21349 ix86_expand_trunc (rtx operand0, rtx operand1)
21350 {
21351 /* C code for SSE variant we expand below.
21352 double xa = fabs (x), x2;
21353 if (!isless (xa, TWO52))
21354 return x;
21355 x2 = (double)(long)x;
21356 if (HONOR_SIGNED_ZEROS (mode))
21357 return copysign (x2, x);
21358 return x2;
21359 */
21360 enum machine_mode mode = GET_MODE (operand0);
21361 rtx xa, xi, TWO52, label, res, mask;
21362
21363 TWO52 = ix86_gen_TWO52 (mode);
21364
21365 /* Temporary for holding the result, initialized to the input
21366 operand to ease control flow. */
21367 res = gen_reg_rtx (mode);
21368 emit_move_insn (res, operand1);
21369
21370 /* xa = abs (operand1) */
21371 xa = ix86_expand_sse_fabs (res, &mask);
21372
21373 /* if (!isless (xa, TWO52)) goto label; */
21374 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21375
21376 /* x = (double)(long)x */
21377 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21378 expand_fix (xi, res, 0);
21379 expand_float (res, xi, 0);
21380
21381 if (HONOR_SIGNED_ZEROS (mode))
21382 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21383
21384 emit_label (label);
21385 LABEL_NUSES (label) = 1;
21386
21387 emit_move_insn (operand0, res);
21388 }
21389
21390 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21391 into OPERAND0. */
21392 void
21393 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21394 {
21395 enum machine_mode mode = GET_MODE (operand0);
21396 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21397
21398 /* C code for SSE variant we expand below.
21399 double xa = fabs (x), x2;
21400 if (!isless (xa, TWO52))
21401 return x;
21402 xa2 = xa + TWO52 - TWO52;
21403 Compensate:
21404 if (xa2 > xa)
21405 xa2 -= 1.0;
21406 x2 = copysign (xa2, x);
21407 return x2;
21408 */
21409
21410 TWO52 = ix86_gen_TWO52 (mode);
21411
21412 /* Temporary for holding the result, initialized to the input
21413 operand to ease control flow. */
21414 res = gen_reg_rtx (mode);
21415 emit_move_insn (res, operand1);
21416
21417 /* xa = abs (operand1) */
21418 xa = ix86_expand_sse_fabs (res, &smask);
21419
21420 /* if (!isless (xa, TWO52)) goto label; */
21421 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21422
21423 /* res = xa + TWO52 - TWO52; */
21424 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21425 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21426 emit_move_insn (res, tmp);
21427
21428 /* generate 1.0 */
21429 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21430
21431 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21432 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21433 emit_insn (gen_rtx_SET (VOIDmode, mask,
21434 gen_rtx_AND (mode, mask, one)));
21435 tmp = expand_simple_binop (mode, MINUS,
21436 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21437 emit_move_insn (res, tmp);
21438
21439 /* res = copysign (res, operand1) */
21440 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21441
21442 emit_label (label);
21443 LABEL_NUSES (label) = 1;
21444
21445 emit_move_insn (operand0, res);
21446 }
21447
21448 /* Expand SSE sequence for computing round from OPERAND1 storing
21449 into OPERAND0. */
21450 void
21451 ix86_expand_round (rtx operand0, rtx operand1)
21452 {
21453 /* C code for the stuff we're doing below:
21454 double xa = fabs (x);
21455 if (!isless (xa, TWO52))
21456 return x;
21457 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21458 return copysign (xa, x);
21459 */
21460 enum machine_mode mode = GET_MODE (operand0);
21461 rtx res, TWO52, xa, label, xi, half, mask;
21462 const struct real_format *fmt;
21463 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21464
21465 /* Temporary for holding the result, initialized to the input
21466 operand to ease control flow. */
21467 res = gen_reg_rtx (mode);
21468 emit_move_insn (res, operand1);
21469
21470 TWO52 = ix86_gen_TWO52 (mode);
21471 xa = ix86_expand_sse_fabs (res, &mask);
21472 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21473
21474 /* load nextafter (0.5, 0.0) */
21475 fmt = REAL_MODE_FORMAT (mode);
21476 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21477 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21478
21479 /* xa = xa + 0.5 */
21480 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21481 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21482
21483 /* xa = (double)(int64_t)xa */
21484 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21485 expand_fix (xi, xa, 0);
21486 expand_float (xa, xi, 0);
21487
21488 /* res = copysign (xa, operand1) */
21489 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21490
21491 emit_label (label);
21492 LABEL_NUSES (label) = 1;
21493
21494 emit_move_insn (operand0, res);
21495 }
21496
21497 #include "gt-i386.h"