i386.c (nocona_cost, [...]): Update preffered memcpy/memset codegen.
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 static const
608 struct processor_costs pentium4_cost = {
609 COSTS_N_INSNS (1), /* cost of an add instruction */
610 COSTS_N_INSNS (3), /* cost of a lea instruction */
611 COSTS_N_INSNS (4), /* variable shift costs */
612 COSTS_N_INSNS (4), /* constant shift costs */
613 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
614 COSTS_N_INSNS (15), /* HI */
615 COSTS_N_INSNS (15), /* SI */
616 COSTS_N_INSNS (15), /* DI */
617 COSTS_N_INSNS (15)}, /* other */
618 0, /* cost of multiply per each bit set */
619 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
620 COSTS_N_INSNS (56), /* HI */
621 COSTS_N_INSNS (56), /* SI */
622 COSTS_N_INSNS (56), /* DI */
623 COSTS_N_INSNS (56)}, /* other */
624 COSTS_N_INSNS (1), /* cost of movsx */
625 COSTS_N_INSNS (1), /* cost of movzx */
626 16, /* "large" insn */
627 6, /* MOVE_RATIO */
628 2, /* cost for loading QImode using movzbl */
629 {4, 5, 4}, /* cost of loading integer registers
630 in QImode, HImode and SImode.
631 Relative to reg-reg move (2). */
632 {2, 3, 2}, /* cost of storing integer registers */
633 2, /* cost of reg,reg fld/fst */
634 {2, 2, 6}, /* cost of loading fp registers
635 in SFmode, DFmode and XFmode */
636 {4, 4, 6}, /* cost of storing fp registers
637 in SFmode, DFmode and XFmode */
638 2, /* cost of moving MMX register */
639 {2, 2}, /* cost of loading MMX registers
640 in SImode and DImode */
641 {2, 2}, /* cost of storing MMX registers
642 in SImode and DImode */
643 12, /* cost of moving SSE register */
644 {12, 12, 12}, /* cost of loading SSE registers
645 in SImode, DImode and TImode */
646 {2, 2, 8}, /* cost of storing SSE registers
647 in SImode, DImode and TImode */
648 10, /* MMX or SSE register to integer */
649 64, /* size of prefetch block */
650 6, /* number of parallel prefetches */
651 2, /* Branch cost */
652 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
653 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
654 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
655 COSTS_N_INSNS (2), /* cost of FABS instruction. */
656 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
657 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
658 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
659 DUMMY_STRINGOP_ALGS},
660 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
661 {-1, libcall}}},
662 DUMMY_STRINGOP_ALGS},
663 };
664
665 static const
666 struct processor_costs nocona_cost = {
667 COSTS_N_INSNS (1), /* cost of an add instruction */
668 COSTS_N_INSNS (1), /* cost of a lea instruction */
669 COSTS_N_INSNS (1), /* variable shift costs */
670 COSTS_N_INSNS (1), /* constant shift costs */
671 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
672 COSTS_N_INSNS (10), /* HI */
673 COSTS_N_INSNS (10), /* SI */
674 COSTS_N_INSNS (10), /* DI */
675 COSTS_N_INSNS (10)}, /* other */
676 0, /* cost of multiply per each bit set */
677 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
678 COSTS_N_INSNS (66), /* HI */
679 COSTS_N_INSNS (66), /* SI */
680 COSTS_N_INSNS (66), /* DI */
681 COSTS_N_INSNS (66)}, /* other */
682 COSTS_N_INSNS (1), /* cost of movsx */
683 COSTS_N_INSNS (1), /* cost of movzx */
684 16, /* "large" insn */
685 17, /* MOVE_RATIO */
686 4, /* cost for loading QImode using movzbl */
687 {4, 4, 4}, /* cost of loading integer registers
688 in QImode, HImode and SImode.
689 Relative to reg-reg move (2). */
690 {4, 4, 4}, /* cost of storing integer registers */
691 3, /* cost of reg,reg fld/fst */
692 {12, 12, 12}, /* cost of loading fp registers
693 in SFmode, DFmode and XFmode */
694 {4, 4, 4}, /* cost of storing fp registers
695 in SFmode, DFmode and XFmode */
696 6, /* cost of moving MMX register */
697 {12, 12}, /* cost of loading MMX registers
698 in SImode and DImode */
699 {12, 12}, /* cost of storing MMX registers
700 in SImode and DImode */
701 6, /* cost of moving SSE register */
702 {12, 12, 12}, /* cost of loading SSE registers
703 in SImode, DImode and TImode */
704 {12, 12, 12}, /* cost of storing SSE registers
705 in SImode, DImode and TImode */
706 8, /* MMX or SSE register to integer */
707 128, /* size of prefetch block */
708 8, /* number of parallel prefetches */
709 1, /* Branch cost */
710 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
711 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
712 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
713 COSTS_N_INSNS (3), /* cost of FABS instruction. */
714 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
715 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
716 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
717 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
718 {100000, unrolled_loop}, {-1, libcall}}}},
719 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
720 {-1, libcall}}},
721 {libcall, {{24, loop}, {64, unrolled_loop},
722 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
723 };
724
725 static const
726 struct processor_costs core2_cost = {
727 COSTS_N_INSNS (1), /* cost of an add instruction */
728 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
729 COSTS_N_INSNS (1), /* variable shift costs */
730 COSTS_N_INSNS (1), /* constant shift costs */
731 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
732 COSTS_N_INSNS (3), /* HI */
733 COSTS_N_INSNS (3), /* SI */
734 COSTS_N_INSNS (3), /* DI */
735 COSTS_N_INSNS (3)}, /* other */
736 0, /* cost of multiply per each bit set */
737 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
738 COSTS_N_INSNS (22), /* HI */
739 COSTS_N_INSNS (22), /* SI */
740 COSTS_N_INSNS (22), /* DI */
741 COSTS_N_INSNS (22)}, /* other */
742 COSTS_N_INSNS (1), /* cost of movsx */
743 COSTS_N_INSNS (1), /* cost of movzx */
744 8, /* "large" insn */
745 16, /* MOVE_RATIO */
746 2, /* cost for loading QImode using movzbl */
747 {6, 6, 6}, /* cost of loading integer registers
748 in QImode, HImode and SImode.
749 Relative to reg-reg move (2). */
750 {4, 4, 4}, /* cost of storing integer registers */
751 2, /* cost of reg,reg fld/fst */
752 {6, 6, 6}, /* cost of loading fp registers
753 in SFmode, DFmode and XFmode */
754 {4, 4, 4}, /* cost of loading integer registers */
755 2, /* cost of moving MMX register */
756 {6, 6}, /* cost of loading MMX registers
757 in SImode and DImode */
758 {4, 4}, /* cost of storing MMX registers
759 in SImode and DImode */
760 2, /* cost of moving SSE register */
761 {6, 6, 6}, /* cost of loading SSE registers
762 in SImode, DImode and TImode */
763 {4, 4, 4}, /* cost of storing SSE registers
764 in SImode, DImode and TImode */
765 2, /* MMX or SSE register to integer */
766 128, /* size of prefetch block */
767 8, /* number of parallel prefetches */
768 3, /* Branch cost */
769 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
770 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
771 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
772 COSTS_N_INSNS (1), /* cost of FABS instruction. */
773 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
774 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
775 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
776 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
777 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
778 {{libcall, {{8, loop}, {15, unrolled_loop},
779 {2048, rep_prefix_4_byte}, {-1, libcall}}},
780 {libcall, {{24, loop}, {32, unrolled_loop},
781 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
782 };
783
784 /* Generic64 should produce code tuned for Nocona and K8. */
785 static const
786 struct processor_costs generic64_cost = {
787 COSTS_N_INSNS (1), /* cost of an add instruction */
788 /* On all chips taken into consideration lea is 2 cycles and more. With
789 this cost however our current implementation of synth_mult results in
790 use of unnecessary temporary registers causing regression on several
791 SPECfp benchmarks. */
792 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
793 COSTS_N_INSNS (1), /* variable shift costs */
794 COSTS_N_INSNS (1), /* constant shift costs */
795 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
796 COSTS_N_INSNS (4), /* HI */
797 COSTS_N_INSNS (3), /* SI */
798 COSTS_N_INSNS (4), /* DI */
799 COSTS_N_INSNS (2)}, /* other */
800 0, /* cost of multiply per each bit set */
801 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
802 COSTS_N_INSNS (26), /* HI */
803 COSTS_N_INSNS (42), /* SI */
804 COSTS_N_INSNS (74), /* DI */
805 COSTS_N_INSNS (74)}, /* other */
806 COSTS_N_INSNS (1), /* cost of movsx */
807 COSTS_N_INSNS (1), /* cost of movzx */
808 8, /* "large" insn */
809 17, /* MOVE_RATIO */
810 4, /* cost for loading QImode using movzbl */
811 {4, 4, 4}, /* cost of loading integer registers
812 in QImode, HImode and SImode.
813 Relative to reg-reg move (2). */
814 {4, 4, 4}, /* cost of storing integer registers */
815 4, /* cost of reg,reg fld/fst */
816 {12, 12, 12}, /* cost of loading fp registers
817 in SFmode, DFmode and XFmode */
818 {6, 6, 8}, /* cost of storing fp registers
819 in SFmode, DFmode and XFmode */
820 2, /* cost of moving MMX register */
821 {8, 8}, /* cost of loading MMX registers
822 in SImode and DImode */
823 {8, 8}, /* cost of storing MMX registers
824 in SImode and DImode */
825 2, /* cost of moving SSE register */
826 {8, 8, 8}, /* cost of loading SSE registers
827 in SImode, DImode and TImode */
828 {8, 8, 8}, /* cost of storing SSE registers
829 in SImode, DImode and TImode */
830 5, /* MMX or SSE register to integer */
831 64, /* size of prefetch block */
832 6, /* number of parallel prefetches */
833 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
834 is increased to perhaps more appropriate value of 5. */
835 3, /* Branch cost */
836 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
837 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
838 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
839 COSTS_N_INSNS (8), /* cost of FABS instruction. */
840 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
841 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
842 {DUMMY_STRINGOP_ALGS,
843 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
844 {DUMMY_STRINGOP_ALGS,
845 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
846 };
847
848 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
849 static const
850 struct processor_costs generic32_cost = {
851 COSTS_N_INSNS (1), /* cost of an add instruction */
852 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
853 COSTS_N_INSNS (1), /* variable shift costs */
854 COSTS_N_INSNS (1), /* constant shift costs */
855 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
856 COSTS_N_INSNS (4), /* HI */
857 COSTS_N_INSNS (3), /* SI */
858 COSTS_N_INSNS (4), /* DI */
859 COSTS_N_INSNS (2)}, /* other */
860 0, /* cost of multiply per each bit set */
861 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
862 COSTS_N_INSNS (26), /* HI */
863 COSTS_N_INSNS (42), /* SI */
864 COSTS_N_INSNS (74), /* DI */
865 COSTS_N_INSNS (74)}, /* other */
866 COSTS_N_INSNS (1), /* cost of movsx */
867 COSTS_N_INSNS (1), /* cost of movzx */
868 8, /* "large" insn */
869 17, /* MOVE_RATIO */
870 4, /* cost for loading QImode using movzbl */
871 {4, 4, 4}, /* cost of loading integer registers
872 in QImode, HImode and SImode.
873 Relative to reg-reg move (2). */
874 {4, 4, 4}, /* cost of storing integer registers */
875 4, /* cost of reg,reg fld/fst */
876 {12, 12, 12}, /* cost of loading fp registers
877 in SFmode, DFmode and XFmode */
878 {6, 6, 8}, /* cost of storing fp registers
879 in SFmode, DFmode and XFmode */
880 2, /* cost of moving MMX register */
881 {8, 8}, /* cost of loading MMX registers
882 in SImode and DImode */
883 {8, 8}, /* cost of storing MMX registers
884 in SImode and DImode */
885 2, /* cost of moving SSE register */
886 {8, 8, 8}, /* cost of loading SSE registers
887 in SImode, DImode and TImode */
888 {8, 8, 8}, /* cost of storing SSE registers
889 in SImode, DImode and TImode */
890 5, /* MMX or SSE register to integer */
891 64, /* size of prefetch block */
892 6, /* number of parallel prefetches */
893 3, /* Branch cost */
894 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
895 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
896 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
897 COSTS_N_INSNS (8), /* cost of FABS instruction. */
898 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
899 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
900 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
901 DUMMY_STRINGOP_ALGS},
902 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
903 DUMMY_STRINGOP_ALGS},
904 };
905
906 const struct processor_costs *ix86_cost = &pentium_cost;
907
908 /* Processor feature/optimization bitmasks. */
909 #define m_386 (1<<PROCESSOR_I386)
910 #define m_486 (1<<PROCESSOR_I486)
911 #define m_PENT (1<<PROCESSOR_PENTIUM)
912 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
913 #define m_GEODE (1<<PROCESSOR_GEODE)
914 #define m_K6_GEODE (m_K6 | m_GEODE)
915 #define m_K6 (1<<PROCESSOR_K6)
916 #define m_ATHLON (1<<PROCESSOR_ATHLON)
917 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
918 #define m_K8 (1<<PROCESSOR_K8)
919 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
920 #define m_NOCONA (1<<PROCESSOR_NOCONA)
921 #define m_CORE2 (1<<PROCESSOR_CORE2)
922 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
923 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
924 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
925
926 /* Generic instruction choice should be common subset of supported CPUs
927 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
928
929 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
930 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
931 generic because it is not working well with PPro base chips. */
932 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_CORE2 | m_GENERIC64;
933 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
934 const int x86_zero_extend_with_and = m_486 | m_PENT;
935 const int x86_movx = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
936 const int x86_double_with_add = ~m_386;
937 const int x86_use_bit_test = m_386;
938 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8 | m_K6 | m_CORE2 | m_GENERIC;
939 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA;
940 const int x86_3dnow_a = m_ATHLON_K8;
941 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
942 /* Branch hints were put in P4 based on simulation result. But
943 after P4 was made, no performance benefit was observed with
944 branch hints. It also increases the code size. As the result,
945 icc never generates branch hints. */
946 const int x86_branch_hints = 0;
947 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32; /*m_GENERIC | m_ATHLON_K8 ? */
948 /* We probably ought to watch for partial register stalls on Generic32
949 compilation setting as well. However in current implementation the
950 partial register stalls are not eliminated very well - they can
951 be introduced via subregs synthesized by combine and can happen
952 in caller/callee saving sequences.
953 Because this option pays back little on PPro based chips and is in conflict
954 with partial reg. dependencies used by Athlon/P4 based chips, it is better
955 to leave it off for generic32 for now. */
956 const int x86_partial_reg_stall = m_PPRO;
957 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
958 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
959 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8 | m_PENT | m_CORE2 | m_GENERIC);
960 const int x86_use_mov0 = m_K6;
961 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
962 const int x86_read_modify_write = ~m_PENT;
963 const int x86_read_modify = ~(m_PENT | m_PPRO);
964 const int x86_split_long_moves = m_PPRO;
965 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8 | m_CORE2 | m_GENERIC; /* m_PENT4 ? */
966 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
967 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
968 const int x86_qimode_math = ~(0);
969 const int x86_promote_qi_regs = 0;
970 /* On PPro this flag is meant to avoid partial register stalls. Just like
971 the x86_partial_reg_stall this option might be considered for Generic32
972 if our scheme for avoiding partial stalls was more effective. */
973 const int x86_himode_math = ~(m_PPRO);
974 const int x86_promote_hi_regs = m_PPRO;
975 const int x86_sub_esp_4 = m_ATHLON_K8 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
976 const int x86_sub_esp_8 = m_ATHLON_K8 | m_PPRO | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
977 const int x86_add_esp_4 = m_ATHLON_K8 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
978 const int x86_add_esp_8 = m_ATHLON_K8 | m_PPRO | m_K6_GEODE | m_386 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
979 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
980 const int x86_partial_reg_dependency = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
981 const int x86_memory_mismatch_stall = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
982 const int x86_accumulate_outgoing_args = m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
983 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
984 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
985 const int x86_shift1 = ~m_486;
986 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
987 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
988 that thread 128bit SSE registers as single units versus K8 based chips that
989 divide SSE registers to two 64bit halves.
990 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
991 to allow register renaming on 128bit SSE units, but usually results in one
992 extra microop on 64bit SSE units. Experimental results shows that disabling
993 this option on P4 brings over 20% SPECfp regression, while enabling it on
994 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
995 of moves. */
996 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
997 /* Set for machines where the type and dependencies are resolved on SSE
998 register parts instead of whole registers, so we may maintain just
999 lower part of scalar values in proper format leaving the upper part
1000 undefined. */
1001 const int x86_sse_split_regs = m_ATHLON_K8;
1002 const int x86_sse_typeless_stores = m_ATHLON_K8;
1003 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1004 const int x86_use_ffreep = m_ATHLON_K8;
1005 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1006
1007 /* ??? Allowing interunit moves makes it all too easy for the compiler to put
1008 integer data in xmm registers. Which results in pretty abysmal code. */
1009 const int x86_inter_unit_moves = 0 /* ~(m_ATHLON_K8) */;
1010
1011 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1012 /* Some CPU cores are not able to predict more than 4 branch instructions in
1013 the 16 byte window. */
1014 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1015 const int x86_schedule = m_PPRO | m_ATHLON_K8 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC;
1016 const int x86_use_bt = m_ATHLON_K8;
1017 /* Compare and exchange was added for 80486. */
1018 const int x86_cmpxchg = ~m_386;
1019 /* Compare and exchange 8 bytes was added for pentium. */
1020 const int x86_cmpxchg8b = ~(m_386 | m_486);
1021 /* Compare and exchange 16 bytes was added for nocona. */
1022 const int x86_cmpxchg16b = m_NOCONA;
1023 /* Exchange and add was added for 80486. */
1024 const int x86_xadd = ~m_386;
1025 /* Byteswap was added for 80486. */
1026 const int x86_bswap = ~m_386;
1027 const int x86_pad_returns = m_ATHLON_K8 | m_CORE2 | m_GENERIC;
1028
1029 static enum stringop_alg stringop_alg = no_stringop;
1030
1031 /* In case the average insn count for single function invocation is
1032 lower than this constant, emit fast (but longer) prologue and
1033 epilogue code. */
1034 #define FAST_PROLOGUE_INSN_COUNT 20
1035
1036 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1037 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1038 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1039 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1040
1041 /* Array of the smallest class containing reg number REGNO, indexed by
1042 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1043
1044 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1045 {
1046 /* ax, dx, cx, bx */
1047 AREG, DREG, CREG, BREG,
1048 /* si, di, bp, sp */
1049 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1050 /* FP registers */
1051 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1052 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1053 /* arg pointer */
1054 NON_Q_REGS,
1055 /* flags, fpsr, fpcr, dirflag, frame */
1056 NO_REGS, NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1057 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1058 SSE_REGS, SSE_REGS,
1059 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1060 MMX_REGS, MMX_REGS,
1061 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1062 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1063 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1064 SSE_REGS, SSE_REGS,
1065 };
1066
1067 /* The "default" register map used in 32bit mode. */
1068
1069 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1070 {
1071 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1072 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1073 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1074 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1075 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1076 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1077 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1078 };
1079
1080 static int const x86_64_int_parameter_registers[6] =
1081 {
1082 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1083 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1084 };
1085
1086 static int const x86_64_int_return_registers[4] =
1087 {
1088 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1089 };
1090
1091 /* The "default" register map used in 64bit mode. */
1092 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1093 {
1094 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1095 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1096 -1, -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1097 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1098 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1099 8,9,10,11,12,13,14,15, /* extended integer registers */
1100 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1101 };
1102
1103 /* Define the register numbers to be used in Dwarf debugging information.
1104 The SVR4 reference port C compiler uses the following register numbers
1105 in its Dwarf output code:
1106 0 for %eax (gcc regno = 0)
1107 1 for %ecx (gcc regno = 2)
1108 2 for %edx (gcc regno = 1)
1109 3 for %ebx (gcc regno = 3)
1110 4 for %esp (gcc regno = 7)
1111 5 for %ebp (gcc regno = 6)
1112 6 for %esi (gcc regno = 4)
1113 7 for %edi (gcc regno = 5)
1114 The following three DWARF register numbers are never generated by
1115 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1116 believes these numbers have these meanings.
1117 8 for %eip (no gcc equivalent)
1118 9 for %eflags (gcc regno = 17)
1119 10 for %trapno (no gcc equivalent)
1120 It is not at all clear how we should number the FP stack registers
1121 for the x86 architecture. If the version of SDB on x86/svr4 were
1122 a bit less brain dead with respect to floating-point then we would
1123 have a precedent to follow with respect to DWARF register numbers
1124 for x86 FP registers, but the SDB on x86/svr4 is so completely
1125 broken with respect to FP registers that it is hardly worth thinking
1126 of it as something to strive for compatibility with.
1127 The version of x86/svr4 SDB I have at the moment does (partially)
1128 seem to believe that DWARF register number 11 is associated with
1129 the x86 register %st(0), but that's about all. Higher DWARF
1130 register numbers don't seem to be associated with anything in
1131 particular, and even for DWARF regno 11, SDB only seems to under-
1132 stand that it should say that a variable lives in %st(0) (when
1133 asked via an `=' command) if we said it was in DWARF regno 11,
1134 but SDB still prints garbage when asked for the value of the
1135 variable in question (via a `/' command).
1136 (Also note that the labels SDB prints for various FP stack regs
1137 when doing an `x' command are all wrong.)
1138 Note that these problems generally don't affect the native SVR4
1139 C compiler because it doesn't allow the use of -O with -g and
1140 because when it is *not* optimizing, it allocates a memory
1141 location for each floating-point variable, and the memory
1142 location is what gets described in the DWARF AT_location
1143 attribute for the variable in question.
1144 Regardless of the severe mental illness of the x86/svr4 SDB, we
1145 do something sensible here and we use the following DWARF
1146 register numbers. Note that these are all stack-top-relative
1147 numbers.
1148 11 for %st(0) (gcc regno = 8)
1149 12 for %st(1) (gcc regno = 9)
1150 13 for %st(2) (gcc regno = 10)
1151 14 for %st(3) (gcc regno = 11)
1152 15 for %st(4) (gcc regno = 12)
1153 16 for %st(5) (gcc regno = 13)
1154 17 for %st(6) (gcc regno = 14)
1155 18 for %st(7) (gcc regno = 15)
1156 */
1157 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1158 {
1159 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1160 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1161 -1, 9, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, dir, frame */
1162 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1163 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1164 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1165 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1166 };
1167
1168 /* Test and compare insns in i386.md store the information needed to
1169 generate branch and scc insns here. */
1170
1171 rtx ix86_compare_op0 = NULL_RTX;
1172 rtx ix86_compare_op1 = NULL_RTX;
1173 rtx ix86_compare_emitted = NULL_RTX;
1174
1175 /* Size of the register save area. */
1176 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1177
1178 /* Define the structure for the machine field in struct function. */
1179
1180 struct stack_local_entry GTY(())
1181 {
1182 unsigned short mode;
1183 unsigned short n;
1184 rtx rtl;
1185 struct stack_local_entry *next;
1186 };
1187
1188 /* Structure describing stack frame layout.
1189 Stack grows downward:
1190
1191 [arguments]
1192 <- ARG_POINTER
1193 saved pc
1194
1195 saved frame pointer if frame_pointer_needed
1196 <- HARD_FRAME_POINTER
1197 [saved regs]
1198
1199 [padding1] \
1200 )
1201 [va_arg registers] (
1202 > to_allocate <- FRAME_POINTER
1203 [frame] (
1204 )
1205 [padding2] /
1206 */
1207 struct ix86_frame
1208 {
1209 int nregs;
1210 int padding1;
1211 int va_arg_size;
1212 HOST_WIDE_INT frame;
1213 int padding2;
1214 int outgoing_arguments_size;
1215 int red_zone_size;
1216
1217 HOST_WIDE_INT to_allocate;
1218 /* The offsets relative to ARG_POINTER. */
1219 HOST_WIDE_INT frame_pointer_offset;
1220 HOST_WIDE_INT hard_frame_pointer_offset;
1221 HOST_WIDE_INT stack_pointer_offset;
1222
1223 /* When save_regs_using_mov is set, emit prologue using
1224 move instead of push instructions. */
1225 bool save_regs_using_mov;
1226 };
1227
1228 /* Code model option. */
1229 enum cmodel ix86_cmodel;
1230 /* Asm dialect. */
1231 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1232 /* TLS dialects. */
1233 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1234
1235 /* Which unit we are generating floating point math for. */
1236 enum fpmath_unit ix86_fpmath;
1237
1238 /* Which cpu are we scheduling for. */
1239 enum processor_type ix86_tune;
1240 /* Which instruction set architecture to use. */
1241 enum processor_type ix86_arch;
1242
1243 /* true if sse prefetch instruction is not NOOP. */
1244 int x86_prefetch_sse;
1245
1246 /* ix86_regparm_string as a number */
1247 static int ix86_regparm;
1248
1249 /* -mstackrealign option */
1250 extern int ix86_force_align_arg_pointer;
1251 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1252
1253 /* Preferred alignment for stack boundary in bits. */
1254 unsigned int ix86_preferred_stack_boundary;
1255
1256 /* Values 1-5: see jump.c */
1257 int ix86_branch_cost;
1258
1259 /* Variables which are this size or smaller are put in the data/bss
1260 or ldata/lbss sections. */
1261
1262 int ix86_section_threshold = 65536;
1263
1264 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1265 char internal_label_prefix[16];
1266 int internal_label_prefix_len;
1267 \f
1268 static bool ix86_handle_option (size_t, const char *, int);
1269 static void output_pic_addr_const (FILE *, rtx, int);
1270 static void put_condition_code (enum rtx_code, enum machine_mode,
1271 int, int, FILE *);
1272 static const char *get_some_local_dynamic_name (void);
1273 static int get_some_local_dynamic_name_1 (rtx *, void *);
1274 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1275 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1276 rtx *);
1277 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1278 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1279 enum machine_mode);
1280 static rtx get_thread_pointer (int);
1281 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1282 static void get_pc_thunk_name (char [32], unsigned int);
1283 static rtx gen_push (rtx);
1284 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1285 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1286 static struct machine_function * ix86_init_machine_status (void);
1287 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1288 static int ix86_nsaved_regs (void);
1289 static void ix86_emit_save_regs (void);
1290 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1291 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1292 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1293 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1294 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1295 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1296 static int ix86_issue_rate (void);
1297 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1298 static int ia32_multipass_dfa_lookahead (void);
1299 static void ix86_init_mmx_sse_builtins (void);
1300 static rtx x86_this_parameter (tree);
1301 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1302 HOST_WIDE_INT, tree);
1303 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1304 static void x86_file_start (void);
1305 static void ix86_reorg (void);
1306 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1307 static tree ix86_build_builtin_va_list (void);
1308 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1309 tree, int *, int);
1310 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1311 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1312 static bool ix86_vector_mode_supported_p (enum machine_mode);
1313
1314 static int ix86_address_cost (rtx);
1315 static bool ix86_cannot_force_const_mem (rtx);
1316 static rtx ix86_delegitimize_address (rtx);
1317
1318 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1319
1320 struct builtin_description;
1321 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1322 tree, rtx);
1323 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1324 tree, rtx);
1325 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1326 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1327 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1328 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1329 static rtx safe_vector_operand (rtx, enum machine_mode);
1330 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1331 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1332 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1333 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1334 static int ix86_fp_comparison_cost (enum rtx_code code);
1335 static unsigned int ix86_select_alt_pic_regnum (void);
1336 static int ix86_save_reg (unsigned int, int);
1337 static void ix86_compute_frame_layout (struct ix86_frame *);
1338 static int ix86_comp_type_attributes (tree, tree);
1339 static int ix86_function_regparm (tree, tree);
1340 const struct attribute_spec ix86_attribute_table[];
1341 static bool ix86_function_ok_for_sibcall (tree, tree);
1342 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1343 static int ix86_value_regno (enum machine_mode, tree, tree);
1344 static bool contains_128bit_aligned_vector_p (tree);
1345 static rtx ix86_struct_value_rtx (tree, int);
1346 static bool ix86_ms_bitfield_layout_p (tree);
1347 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1348 static int extended_reg_mentioned_1 (rtx *, void *);
1349 static bool ix86_rtx_costs (rtx, int, int, int *);
1350 static int min_insn_size (rtx);
1351 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1352 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1353 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1354 tree, bool);
1355 static void ix86_init_builtins (void);
1356 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1357 static tree ix86_builtin_vectorized_function (enum built_in_function, tree);
1358 static const char *ix86_mangle_fundamental_type (tree);
1359 static tree ix86_stack_protect_fail (void);
1360 static rtx ix86_internal_arg_pointer (void);
1361 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1362
1363 /* This function is only used on Solaris. */
1364 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1365 ATTRIBUTE_UNUSED;
1366
1367 /* Register class used for passing given 64bit part of the argument.
1368 These represent classes as documented by the PS ABI, with the exception
1369 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1370 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1371
1372 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1373 whenever possible (upper half does contain padding).
1374 */
1375 enum x86_64_reg_class
1376 {
1377 X86_64_NO_CLASS,
1378 X86_64_INTEGER_CLASS,
1379 X86_64_INTEGERSI_CLASS,
1380 X86_64_SSE_CLASS,
1381 X86_64_SSESF_CLASS,
1382 X86_64_SSEDF_CLASS,
1383 X86_64_SSEUP_CLASS,
1384 X86_64_X87_CLASS,
1385 X86_64_X87UP_CLASS,
1386 X86_64_COMPLEX_X87_CLASS,
1387 X86_64_MEMORY_CLASS
1388 };
1389 static const char * const x86_64_reg_class_name[] = {
1390 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1391 "sseup", "x87", "x87up", "cplx87", "no"
1392 };
1393
1394 #define MAX_CLASSES 4
1395
1396 /* Table of constants used by fldpi, fldln2, etc.... */
1397 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1398 static bool ext_80387_constants_init = 0;
1399 static void init_ext_80387_constants (void);
1400 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1401 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1402 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1403 static section *x86_64_elf_select_section (tree decl, int reloc,
1404 unsigned HOST_WIDE_INT align)
1405 ATTRIBUTE_UNUSED;
1406 \f
1407 /* Initialize the GCC target structure. */
1408 #undef TARGET_ATTRIBUTE_TABLE
1409 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1410 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1411 # undef TARGET_MERGE_DECL_ATTRIBUTES
1412 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1413 #endif
1414
1415 #undef TARGET_COMP_TYPE_ATTRIBUTES
1416 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1417
1418 #undef TARGET_INIT_BUILTINS
1419 #define TARGET_INIT_BUILTINS ix86_init_builtins
1420 #undef TARGET_EXPAND_BUILTIN
1421 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1422 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1423 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1424
1425 #undef TARGET_ASM_FUNCTION_EPILOGUE
1426 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1427
1428 #undef TARGET_ENCODE_SECTION_INFO
1429 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1430 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1431 #else
1432 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1433 #endif
1434
1435 #undef TARGET_ASM_OPEN_PAREN
1436 #define TARGET_ASM_OPEN_PAREN ""
1437 #undef TARGET_ASM_CLOSE_PAREN
1438 #define TARGET_ASM_CLOSE_PAREN ""
1439
1440 #undef TARGET_ASM_ALIGNED_HI_OP
1441 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1442 #undef TARGET_ASM_ALIGNED_SI_OP
1443 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1444 #ifdef ASM_QUAD
1445 #undef TARGET_ASM_ALIGNED_DI_OP
1446 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1447 #endif
1448
1449 #undef TARGET_ASM_UNALIGNED_HI_OP
1450 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1451 #undef TARGET_ASM_UNALIGNED_SI_OP
1452 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1453 #undef TARGET_ASM_UNALIGNED_DI_OP
1454 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1455
1456 #undef TARGET_SCHED_ADJUST_COST
1457 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1458 #undef TARGET_SCHED_ISSUE_RATE
1459 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1460 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1461 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1462 ia32_multipass_dfa_lookahead
1463
1464 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1465 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1466
1467 #ifdef HAVE_AS_TLS
1468 #undef TARGET_HAVE_TLS
1469 #define TARGET_HAVE_TLS true
1470 #endif
1471 #undef TARGET_CANNOT_FORCE_CONST_MEM
1472 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1473 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1474 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1475
1476 #undef TARGET_DELEGITIMIZE_ADDRESS
1477 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1478
1479 #undef TARGET_MS_BITFIELD_LAYOUT_P
1480 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1481
1482 #if TARGET_MACHO
1483 #undef TARGET_BINDS_LOCAL_P
1484 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1485 #endif
1486
1487 #undef TARGET_ASM_OUTPUT_MI_THUNK
1488 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1489 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1490 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1491
1492 #undef TARGET_ASM_FILE_START
1493 #define TARGET_ASM_FILE_START x86_file_start
1494
1495 #undef TARGET_DEFAULT_TARGET_FLAGS
1496 #define TARGET_DEFAULT_TARGET_FLAGS \
1497 (TARGET_DEFAULT \
1498 | TARGET_64BIT_DEFAULT \
1499 | TARGET_SUBTARGET_DEFAULT \
1500 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1501
1502 #undef TARGET_HANDLE_OPTION
1503 #define TARGET_HANDLE_OPTION ix86_handle_option
1504
1505 #undef TARGET_RTX_COSTS
1506 #define TARGET_RTX_COSTS ix86_rtx_costs
1507 #undef TARGET_ADDRESS_COST
1508 #define TARGET_ADDRESS_COST ix86_address_cost
1509
1510 #undef TARGET_FIXED_CONDITION_CODE_REGS
1511 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1512 #undef TARGET_CC_MODES_COMPATIBLE
1513 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1514
1515 #undef TARGET_MACHINE_DEPENDENT_REORG
1516 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1517
1518 #undef TARGET_BUILD_BUILTIN_VA_LIST
1519 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1520
1521 #undef TARGET_MD_ASM_CLOBBERS
1522 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1523
1524 #undef TARGET_PROMOTE_PROTOTYPES
1525 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1526 #undef TARGET_STRUCT_VALUE_RTX
1527 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1528 #undef TARGET_SETUP_INCOMING_VARARGS
1529 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1530 #undef TARGET_MUST_PASS_IN_STACK
1531 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1532 #undef TARGET_PASS_BY_REFERENCE
1533 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1534 #undef TARGET_INTERNAL_ARG_POINTER
1535 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1536 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1537 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1538
1539 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1540 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1541
1542 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1543 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1544
1545 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1546 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1547
1548 #ifdef HAVE_AS_TLS
1549 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1550 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1551 #endif
1552
1553 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1554 #undef TARGET_INSERT_ATTRIBUTES
1555 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1556 #endif
1557
1558 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1559 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1560
1561 #undef TARGET_STACK_PROTECT_FAIL
1562 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1563
1564 #undef TARGET_FUNCTION_VALUE
1565 #define TARGET_FUNCTION_VALUE ix86_function_value
1566
1567 struct gcc_target targetm = TARGET_INITIALIZER;
1568
1569 \f
1570 /* The svr4 ABI for the i386 says that records and unions are returned
1571 in memory. */
1572 #ifndef DEFAULT_PCC_STRUCT_RETURN
1573 #define DEFAULT_PCC_STRUCT_RETURN 1
1574 #endif
1575
1576 /* Implement TARGET_HANDLE_OPTION. */
1577
1578 static bool
1579 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1580 {
1581 switch (code)
1582 {
1583 case OPT_m3dnow:
1584 if (!value)
1585 {
1586 target_flags &= ~MASK_3DNOW_A;
1587 target_flags_explicit |= MASK_3DNOW_A;
1588 }
1589 return true;
1590
1591 case OPT_mmmx:
1592 if (!value)
1593 {
1594 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1595 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1596 }
1597 return true;
1598
1599 case OPT_msse:
1600 if (!value)
1601 {
1602 target_flags &= ~(MASK_SSE2 | MASK_SSE3);
1603 target_flags_explicit |= MASK_SSE2 | MASK_SSE3;
1604 }
1605 return true;
1606
1607 case OPT_msse2:
1608 if (!value)
1609 {
1610 target_flags &= ~MASK_SSE3;
1611 target_flags_explicit |= MASK_SSE3;
1612 }
1613 return true;
1614
1615 default:
1616 return true;
1617 }
1618 }
1619
1620 /* Sometimes certain combinations of command options do not make
1621 sense on a particular target machine. You can define a macro
1622 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1623 defined, is executed once just after all the command options have
1624 been parsed.
1625
1626 Don't use this macro to turn on various extra optimizations for
1627 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1628
1629 void
1630 override_options (void)
1631 {
1632 int i;
1633 int ix86_tune_defaulted = 0;
1634
1635 /* Comes from final.c -- no real reason to change it. */
1636 #define MAX_CODE_ALIGN 16
1637
1638 static struct ptt
1639 {
1640 const struct processor_costs *cost; /* Processor costs */
1641 const int target_enable; /* Target flags to enable. */
1642 const int target_disable; /* Target flags to disable. */
1643 const int align_loop; /* Default alignments. */
1644 const int align_loop_max_skip;
1645 const int align_jump;
1646 const int align_jump_max_skip;
1647 const int align_func;
1648 }
1649 const processor_target_table[PROCESSOR_max] =
1650 {
1651 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1652 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1653 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1654 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1655 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1656 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1657 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1658 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1659 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1660 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1661 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1662 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1663 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16}
1664 };
1665
1666 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1667 static struct pta
1668 {
1669 const char *const name; /* processor name or nickname. */
1670 const enum processor_type processor;
1671 const enum pta_flags
1672 {
1673 PTA_SSE = 1,
1674 PTA_SSE2 = 2,
1675 PTA_SSE3 = 4,
1676 PTA_MMX = 8,
1677 PTA_PREFETCH_SSE = 16,
1678 PTA_3DNOW = 32,
1679 PTA_3DNOW_A = 64,
1680 PTA_64BIT = 128,
1681 PTA_SSSE3 = 256
1682 } flags;
1683 }
1684 const processor_alias_table[] =
1685 {
1686 {"i386", PROCESSOR_I386, 0},
1687 {"i486", PROCESSOR_I486, 0},
1688 {"i586", PROCESSOR_PENTIUM, 0},
1689 {"pentium", PROCESSOR_PENTIUM, 0},
1690 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1691 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1692 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1693 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1694 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1695 {"i686", PROCESSOR_PENTIUMPRO, 0},
1696 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1697 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1698 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1699 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1700 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1701 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1702 | PTA_MMX | PTA_PREFETCH_SSE},
1703 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1704 | PTA_MMX | PTA_PREFETCH_SSE},
1705 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1706 | PTA_MMX | PTA_PREFETCH_SSE},
1707 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1708 | PTA_MMX | PTA_PREFETCH_SSE},
1709 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3
1710 | PTA_64BIT | PTA_MMX
1711 | PTA_PREFETCH_SSE},
1712 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1713 | PTA_3DNOW_A},
1714 {"k6", PROCESSOR_K6, PTA_MMX},
1715 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1716 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1717 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1718 | PTA_3DNOW_A},
1719 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1720 | PTA_3DNOW | PTA_3DNOW_A},
1721 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1722 | PTA_3DNOW_A | PTA_SSE},
1723 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1724 | PTA_3DNOW_A | PTA_SSE},
1725 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1726 | PTA_3DNOW_A | PTA_SSE},
1727 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1728 | PTA_SSE | PTA_SSE2 },
1729 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1730 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1731 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1732 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1733 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1734 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1735 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1736 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1737 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1738 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1739 };
1740
1741 int const pta_size = ARRAY_SIZE (processor_alias_table);
1742
1743 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1744 SUBTARGET_OVERRIDE_OPTIONS;
1745 #endif
1746
1747 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1748 SUBSUBTARGET_OVERRIDE_OPTIONS;
1749 #endif
1750
1751 /* -fPIC is the default for x86_64. */
1752 if (TARGET_MACHO && TARGET_64BIT)
1753 flag_pic = 2;
1754
1755 /* Set the default values for switches whose default depends on TARGET_64BIT
1756 in case they weren't overwritten by command line options. */
1757 if (TARGET_64BIT)
1758 {
1759 /* Mach-O doesn't support omitting the frame pointer for now. */
1760 if (flag_omit_frame_pointer == 2)
1761 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1762 if (flag_asynchronous_unwind_tables == 2)
1763 flag_asynchronous_unwind_tables = 1;
1764 if (flag_pcc_struct_return == 2)
1765 flag_pcc_struct_return = 0;
1766 }
1767 else
1768 {
1769 if (flag_omit_frame_pointer == 2)
1770 flag_omit_frame_pointer = 0;
1771 if (flag_asynchronous_unwind_tables == 2)
1772 flag_asynchronous_unwind_tables = 0;
1773 if (flag_pcc_struct_return == 2)
1774 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1775 }
1776
1777 /* Need to check -mtune=generic first. */
1778 if (ix86_tune_string)
1779 {
1780 if (!strcmp (ix86_tune_string, "generic")
1781 || !strcmp (ix86_tune_string, "i686")
1782 /* As special support for cross compilers we read -mtune=native
1783 as -mtune=generic. With native compilers we won't see the
1784 -mtune=native, as it was changed by the driver. */
1785 || !strcmp (ix86_tune_string, "native"))
1786 {
1787 if (TARGET_64BIT)
1788 ix86_tune_string = "generic64";
1789 else
1790 ix86_tune_string = "generic32";
1791 }
1792 else if (!strncmp (ix86_tune_string, "generic", 7))
1793 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1794 }
1795 else
1796 {
1797 if (ix86_arch_string)
1798 ix86_tune_string = ix86_arch_string;
1799 if (!ix86_tune_string)
1800 {
1801 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1802 ix86_tune_defaulted = 1;
1803 }
1804
1805 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1806 need to use a sensible tune option. */
1807 if (!strcmp (ix86_tune_string, "generic")
1808 || !strcmp (ix86_tune_string, "x86-64")
1809 || !strcmp (ix86_tune_string, "i686"))
1810 {
1811 if (TARGET_64BIT)
1812 ix86_tune_string = "generic64";
1813 else
1814 ix86_tune_string = "generic32";
1815 }
1816 }
1817 if (ix86_stringop_string)
1818 {
1819 if (!strcmp (ix86_stringop_string, "rep_byte"))
1820 stringop_alg = rep_prefix_1_byte;
1821 else if (!strcmp (ix86_stringop_string, "libcall"))
1822 stringop_alg = libcall;
1823 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1824 stringop_alg = rep_prefix_4_byte;
1825 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1826 stringop_alg = rep_prefix_8_byte;
1827 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1828 stringop_alg = loop_1_byte;
1829 else if (!strcmp (ix86_stringop_string, "loop"))
1830 stringop_alg = loop;
1831 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1832 stringop_alg = unrolled_loop;
1833 else
1834 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1835 }
1836 if (!strcmp (ix86_tune_string, "x86-64"))
1837 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1838 "-mtune=generic instead as appropriate.");
1839
1840 if (!ix86_arch_string)
1841 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1842 if (!strcmp (ix86_arch_string, "generic"))
1843 error ("generic CPU can be used only for -mtune= switch");
1844 if (!strncmp (ix86_arch_string, "generic", 7))
1845 error ("bad value (%s) for -march= switch", ix86_arch_string);
1846
1847 if (ix86_cmodel_string != 0)
1848 {
1849 if (!strcmp (ix86_cmodel_string, "small"))
1850 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1851 else if (!strcmp (ix86_cmodel_string, "medium"))
1852 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1853 else if (flag_pic)
1854 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
1855 else if (!strcmp (ix86_cmodel_string, "32"))
1856 ix86_cmodel = CM_32;
1857 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1858 ix86_cmodel = CM_KERNEL;
1859 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
1860 ix86_cmodel = CM_LARGE;
1861 else
1862 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1863 }
1864 else
1865 {
1866 ix86_cmodel = CM_32;
1867 if (TARGET_64BIT)
1868 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1869 }
1870 if (ix86_asm_string != 0)
1871 {
1872 if (! TARGET_MACHO
1873 && !strcmp (ix86_asm_string, "intel"))
1874 ix86_asm_dialect = ASM_INTEL;
1875 else if (!strcmp (ix86_asm_string, "att"))
1876 ix86_asm_dialect = ASM_ATT;
1877 else
1878 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1879 }
1880 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1881 error ("code model %qs not supported in the %s bit mode",
1882 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1883 if (ix86_cmodel == CM_LARGE)
1884 sorry ("code model %<large%> not supported yet");
1885 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1886 sorry ("%i-bit mode not compiled in",
1887 (target_flags & MASK_64BIT) ? 64 : 32);
1888
1889 for (i = 0; i < pta_size; i++)
1890 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1891 {
1892 ix86_arch = processor_alias_table[i].processor;
1893 /* Default cpu tuning to the architecture. */
1894 ix86_tune = ix86_arch;
1895 if (processor_alias_table[i].flags & PTA_MMX
1896 && !(target_flags_explicit & MASK_MMX))
1897 target_flags |= MASK_MMX;
1898 if (processor_alias_table[i].flags & PTA_3DNOW
1899 && !(target_flags_explicit & MASK_3DNOW))
1900 target_flags |= MASK_3DNOW;
1901 if (processor_alias_table[i].flags & PTA_3DNOW_A
1902 && !(target_flags_explicit & MASK_3DNOW_A))
1903 target_flags |= MASK_3DNOW_A;
1904 if (processor_alias_table[i].flags & PTA_SSE
1905 && !(target_flags_explicit & MASK_SSE))
1906 target_flags |= MASK_SSE;
1907 if (processor_alias_table[i].flags & PTA_SSE2
1908 && !(target_flags_explicit & MASK_SSE2))
1909 target_flags |= MASK_SSE2;
1910 if (processor_alias_table[i].flags & PTA_SSE3
1911 && !(target_flags_explicit & MASK_SSE3))
1912 target_flags |= MASK_SSE3;
1913 if (processor_alias_table[i].flags & PTA_SSSE3
1914 && !(target_flags_explicit & MASK_SSSE3))
1915 target_flags |= MASK_SSSE3;
1916 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1917 x86_prefetch_sse = true;
1918 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1919 error ("CPU you selected does not support x86-64 "
1920 "instruction set");
1921 break;
1922 }
1923
1924 if (i == pta_size)
1925 error ("bad value (%s) for -march= switch", ix86_arch_string);
1926
1927 for (i = 0; i < pta_size; i++)
1928 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1929 {
1930 ix86_tune = processor_alias_table[i].processor;
1931 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1932 {
1933 if (ix86_tune_defaulted)
1934 {
1935 ix86_tune_string = "x86-64";
1936 for (i = 0; i < pta_size; i++)
1937 if (! strcmp (ix86_tune_string,
1938 processor_alias_table[i].name))
1939 break;
1940 ix86_tune = processor_alias_table[i].processor;
1941 }
1942 else
1943 error ("CPU you selected does not support x86-64 "
1944 "instruction set");
1945 }
1946 /* Intel CPUs have always interpreted SSE prefetch instructions as
1947 NOPs; so, we can enable SSE prefetch instructions even when
1948 -mtune (rather than -march) points us to a processor that has them.
1949 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1950 higher processors. */
1951 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1952 x86_prefetch_sse = true;
1953 break;
1954 }
1955 if (i == pta_size)
1956 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1957
1958 if (optimize_size)
1959 ix86_cost = &size_cost;
1960 else
1961 ix86_cost = processor_target_table[ix86_tune].cost;
1962 target_flags |= processor_target_table[ix86_tune].target_enable;
1963 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1964
1965 /* Arrange to set up i386_stack_locals for all functions. */
1966 init_machine_status = ix86_init_machine_status;
1967
1968 /* Validate -mregparm= value. */
1969 if (ix86_regparm_string)
1970 {
1971 i = atoi (ix86_regparm_string);
1972 if (i < 0 || i > REGPARM_MAX)
1973 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1974 else
1975 ix86_regparm = i;
1976 }
1977 else
1978 if (TARGET_64BIT)
1979 ix86_regparm = REGPARM_MAX;
1980
1981 /* If the user has provided any of the -malign-* options,
1982 warn and use that value only if -falign-* is not set.
1983 Remove this code in GCC 3.2 or later. */
1984 if (ix86_align_loops_string)
1985 {
1986 warning (0, "-malign-loops is obsolete, use -falign-loops");
1987 if (align_loops == 0)
1988 {
1989 i = atoi (ix86_align_loops_string);
1990 if (i < 0 || i > MAX_CODE_ALIGN)
1991 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
1992 else
1993 align_loops = 1 << i;
1994 }
1995 }
1996
1997 if (ix86_align_jumps_string)
1998 {
1999 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2000 if (align_jumps == 0)
2001 {
2002 i = atoi (ix86_align_jumps_string);
2003 if (i < 0 || i > MAX_CODE_ALIGN)
2004 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2005 else
2006 align_jumps = 1 << i;
2007 }
2008 }
2009
2010 if (ix86_align_funcs_string)
2011 {
2012 warning (0, "-malign-functions is obsolete, use -falign-functions");
2013 if (align_functions == 0)
2014 {
2015 i = atoi (ix86_align_funcs_string);
2016 if (i < 0 || i > MAX_CODE_ALIGN)
2017 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2018 else
2019 align_functions = 1 << i;
2020 }
2021 }
2022
2023 /* Default align_* from the processor table. */
2024 if (align_loops == 0)
2025 {
2026 align_loops = processor_target_table[ix86_tune].align_loop;
2027 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2028 }
2029 if (align_jumps == 0)
2030 {
2031 align_jumps = processor_target_table[ix86_tune].align_jump;
2032 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2033 }
2034 if (align_functions == 0)
2035 {
2036 align_functions = processor_target_table[ix86_tune].align_func;
2037 }
2038
2039 /* Validate -mbranch-cost= value, or provide default. */
2040 ix86_branch_cost = ix86_cost->branch_cost;
2041 if (ix86_branch_cost_string)
2042 {
2043 i = atoi (ix86_branch_cost_string);
2044 if (i < 0 || i > 5)
2045 error ("-mbranch-cost=%d is not between 0 and 5", i);
2046 else
2047 ix86_branch_cost = i;
2048 }
2049 if (ix86_section_threshold_string)
2050 {
2051 i = atoi (ix86_section_threshold_string);
2052 if (i < 0)
2053 error ("-mlarge-data-threshold=%d is negative", i);
2054 else
2055 ix86_section_threshold = i;
2056 }
2057
2058 if (ix86_tls_dialect_string)
2059 {
2060 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2061 ix86_tls_dialect = TLS_DIALECT_GNU;
2062 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2063 ix86_tls_dialect = TLS_DIALECT_GNU2;
2064 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2065 ix86_tls_dialect = TLS_DIALECT_SUN;
2066 else
2067 error ("bad value (%s) for -mtls-dialect= switch",
2068 ix86_tls_dialect_string);
2069 }
2070
2071 /* Keep nonleaf frame pointers. */
2072 if (flag_omit_frame_pointer)
2073 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2074 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2075 flag_omit_frame_pointer = 1;
2076
2077 /* If we're doing fast math, we don't care about comparison order
2078 wrt NaNs. This lets us use a shorter comparison sequence. */
2079 if (flag_finite_math_only)
2080 target_flags &= ~MASK_IEEE_FP;
2081
2082 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2083 since the insns won't need emulation. */
2084 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2085 target_flags &= ~MASK_NO_FANCY_MATH_387;
2086
2087 /* Likewise, if the target doesn't have a 387, or we've specified
2088 software floating point, don't use 387 inline intrinsics. */
2089 if (!TARGET_80387)
2090 target_flags |= MASK_NO_FANCY_MATH_387;
2091
2092 /* Turn on SSE3 builtins for -mssse3. */
2093 if (TARGET_SSSE3)
2094 target_flags |= MASK_SSE3;
2095
2096 /* Turn on SSE2 builtins for -msse3. */
2097 if (TARGET_SSE3)
2098 target_flags |= MASK_SSE2;
2099
2100 /* Turn on SSE builtins for -msse2. */
2101 if (TARGET_SSE2)
2102 target_flags |= MASK_SSE;
2103
2104 /* Turn on MMX builtins for -msse. */
2105 if (TARGET_SSE)
2106 {
2107 target_flags |= MASK_MMX & ~target_flags_explicit;
2108 x86_prefetch_sse = true;
2109 }
2110
2111 /* Turn on MMX builtins for 3Dnow. */
2112 if (TARGET_3DNOW)
2113 target_flags |= MASK_MMX;
2114
2115 if (TARGET_64BIT)
2116 {
2117 if (TARGET_ALIGN_DOUBLE)
2118 error ("-malign-double makes no sense in the 64bit mode");
2119 if (TARGET_RTD)
2120 error ("-mrtd calling convention not supported in the 64bit mode");
2121
2122 /* Enable by default the SSE and MMX builtins. Do allow the user to
2123 explicitly disable any of these. In particular, disabling SSE and
2124 MMX for kernel code is extremely useful. */
2125 target_flags
2126 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2127 & ~target_flags_explicit);
2128 }
2129 else
2130 {
2131 /* i386 ABI does not specify red zone. It still makes sense to use it
2132 when programmer takes care to stack from being destroyed. */
2133 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2134 target_flags |= MASK_NO_RED_ZONE;
2135 }
2136
2137 /* Validate -mpreferred-stack-boundary= value, or provide default.
2138 The default of 128 bits is for Pentium III's SSE __m128. We can't
2139 change it because of optimize_size. Otherwise, we can't mix object
2140 files compiled with -Os and -On. */
2141 ix86_preferred_stack_boundary = 128;
2142 if (ix86_preferred_stack_boundary_string)
2143 {
2144 i = atoi (ix86_preferred_stack_boundary_string);
2145 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2146 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2147 TARGET_64BIT ? 4 : 2);
2148 else
2149 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2150 }
2151
2152 /* Accept -mx87regparm only if 80387 support is enabled. */
2153 if (TARGET_X87REGPARM
2154 && ! TARGET_80387)
2155 error ("-mx87regparm used without 80387 enabled");
2156
2157 /* Accept -msseregparm only if at least SSE support is enabled. */
2158 if (TARGET_SSEREGPARM
2159 && ! TARGET_SSE)
2160 error ("-msseregparm used without SSE enabled");
2161
2162 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2163
2164 if (ix86_fpmath_string != 0)
2165 {
2166 if (! strcmp (ix86_fpmath_string, "387"))
2167 ix86_fpmath = FPMATH_387;
2168 else if (! strcmp (ix86_fpmath_string, "sse"))
2169 {
2170 if (!TARGET_SSE)
2171 {
2172 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2173 ix86_fpmath = FPMATH_387;
2174 }
2175 else
2176 ix86_fpmath = FPMATH_SSE;
2177 }
2178 else if (! strcmp (ix86_fpmath_string, "387,sse")
2179 || ! strcmp (ix86_fpmath_string, "sse,387"))
2180 {
2181 if (!TARGET_SSE)
2182 {
2183 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2184 ix86_fpmath = FPMATH_387;
2185 }
2186 else if (!TARGET_80387)
2187 {
2188 warning (0, "387 instruction set disabled, using SSE arithmetics");
2189 ix86_fpmath = FPMATH_SSE;
2190 }
2191 else
2192 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2193 }
2194 else
2195 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2196 }
2197
2198 /* If the i387 is disabled, then do not return values in it. */
2199 if (!TARGET_80387)
2200 target_flags &= ~MASK_FLOAT_RETURNS;
2201
2202 if ((x86_accumulate_outgoing_args & TUNEMASK)
2203 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2204 && !optimize_size)
2205 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2206
2207 /* ??? Unwind info is not correct around the CFG unless either a frame
2208 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2209 unwind info generation to be aware of the CFG and propagating states
2210 around edges. */
2211 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2212 || flag_exceptions || flag_non_call_exceptions)
2213 && flag_omit_frame_pointer
2214 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2215 {
2216 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2217 warning (0, "unwind tables currently require either a frame pointer "
2218 "or -maccumulate-outgoing-args for correctness");
2219 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2220 }
2221
2222 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2223 {
2224 char *p;
2225 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2226 p = strchr (internal_label_prefix, 'X');
2227 internal_label_prefix_len = p - internal_label_prefix;
2228 *p = '\0';
2229 }
2230
2231 /* When scheduling description is not available, disable scheduler pass
2232 so it won't slow down the compilation and make x87 code slower. */
2233 if (!TARGET_SCHEDULE)
2234 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2235
2236 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2237 set_param_value ("simultaneous-prefetches",
2238 ix86_cost->simultaneous_prefetches);
2239 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2240 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2241 }
2242 \f
2243 /* switch to the appropriate section for output of DECL.
2244 DECL is either a `VAR_DECL' node or a constant of some sort.
2245 RELOC indicates whether forming the initial value of DECL requires
2246 link-time relocations. */
2247
2248 static section *
2249 x86_64_elf_select_section (tree decl, int reloc,
2250 unsigned HOST_WIDE_INT align)
2251 {
2252 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2253 && ix86_in_large_data_p (decl))
2254 {
2255 const char *sname = NULL;
2256 unsigned int flags = SECTION_WRITE;
2257 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2258 {
2259 case SECCAT_DATA:
2260 sname = ".ldata";
2261 break;
2262 case SECCAT_DATA_REL:
2263 sname = ".ldata.rel";
2264 break;
2265 case SECCAT_DATA_REL_LOCAL:
2266 sname = ".ldata.rel.local";
2267 break;
2268 case SECCAT_DATA_REL_RO:
2269 sname = ".ldata.rel.ro";
2270 break;
2271 case SECCAT_DATA_REL_RO_LOCAL:
2272 sname = ".ldata.rel.ro.local";
2273 break;
2274 case SECCAT_BSS:
2275 sname = ".lbss";
2276 flags |= SECTION_BSS;
2277 break;
2278 case SECCAT_RODATA:
2279 case SECCAT_RODATA_MERGE_STR:
2280 case SECCAT_RODATA_MERGE_STR_INIT:
2281 case SECCAT_RODATA_MERGE_CONST:
2282 sname = ".lrodata";
2283 flags = 0;
2284 break;
2285 case SECCAT_SRODATA:
2286 case SECCAT_SDATA:
2287 case SECCAT_SBSS:
2288 gcc_unreachable ();
2289 case SECCAT_TEXT:
2290 case SECCAT_TDATA:
2291 case SECCAT_TBSS:
2292 /* We don't split these for medium model. Place them into
2293 default sections and hope for best. */
2294 break;
2295 }
2296 if (sname)
2297 {
2298 /* We might get called with string constants, but get_named_section
2299 doesn't like them as they are not DECLs. Also, we need to set
2300 flags in that case. */
2301 if (!DECL_P (decl))
2302 return get_section (sname, flags, NULL);
2303 return get_named_section (decl, sname, reloc);
2304 }
2305 }
2306 return default_elf_select_section (decl, reloc, align);
2307 }
2308
2309 /* Build up a unique section name, expressed as a
2310 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2311 RELOC indicates whether the initial value of EXP requires
2312 link-time relocations. */
2313
2314 static void
2315 x86_64_elf_unique_section (tree decl, int reloc)
2316 {
2317 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2318 && ix86_in_large_data_p (decl))
2319 {
2320 const char *prefix = NULL;
2321 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2322 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2323
2324 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2325 {
2326 case SECCAT_DATA:
2327 case SECCAT_DATA_REL:
2328 case SECCAT_DATA_REL_LOCAL:
2329 case SECCAT_DATA_REL_RO:
2330 case SECCAT_DATA_REL_RO_LOCAL:
2331 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2332 break;
2333 case SECCAT_BSS:
2334 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2335 break;
2336 case SECCAT_RODATA:
2337 case SECCAT_RODATA_MERGE_STR:
2338 case SECCAT_RODATA_MERGE_STR_INIT:
2339 case SECCAT_RODATA_MERGE_CONST:
2340 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2341 break;
2342 case SECCAT_SRODATA:
2343 case SECCAT_SDATA:
2344 case SECCAT_SBSS:
2345 gcc_unreachable ();
2346 case SECCAT_TEXT:
2347 case SECCAT_TDATA:
2348 case SECCAT_TBSS:
2349 /* We don't split these for medium model. Place them into
2350 default sections and hope for best. */
2351 break;
2352 }
2353 if (prefix)
2354 {
2355 const char *name;
2356 size_t nlen, plen;
2357 char *string;
2358 plen = strlen (prefix);
2359
2360 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2361 name = targetm.strip_name_encoding (name);
2362 nlen = strlen (name);
2363
2364 string = alloca (nlen + plen + 1);
2365 memcpy (string, prefix, plen);
2366 memcpy (string + plen, name, nlen + 1);
2367
2368 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2369 return;
2370 }
2371 }
2372 default_unique_section (decl, reloc);
2373 }
2374
2375 #ifdef COMMON_ASM_OP
2376 /* This says how to output assembler code to declare an
2377 uninitialized external linkage data object.
2378
2379 For medium model x86-64 we need to use .largecomm opcode for
2380 large objects. */
2381 void
2382 x86_elf_aligned_common (FILE *file,
2383 const char *name, unsigned HOST_WIDE_INT size,
2384 int align)
2385 {
2386 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2387 && size > (unsigned int)ix86_section_threshold)
2388 fprintf (file, ".largecomm\t");
2389 else
2390 fprintf (file, "%s", COMMON_ASM_OP);
2391 assemble_name (file, name);
2392 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2393 size, align / BITS_PER_UNIT);
2394 }
2395 #endif
2396 /* Utility function for targets to use in implementing
2397 ASM_OUTPUT_ALIGNED_BSS. */
2398
2399 void
2400 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2401 const char *name, unsigned HOST_WIDE_INT size,
2402 int align)
2403 {
2404 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2405 && size > (unsigned int)ix86_section_threshold)
2406 switch_to_section (get_named_section (decl, ".lbss", 0));
2407 else
2408 switch_to_section (bss_section);
2409 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2410 #ifdef ASM_DECLARE_OBJECT_NAME
2411 last_assemble_variable_decl = decl;
2412 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2413 #else
2414 /* Standard thing is just output label for the object. */
2415 ASM_OUTPUT_LABEL (file, name);
2416 #endif /* ASM_DECLARE_OBJECT_NAME */
2417 ASM_OUTPUT_SKIP (file, size ? size : 1);
2418 }
2419 \f
2420 void
2421 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2422 {
2423 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2424 make the problem with not enough registers even worse. */
2425 #ifdef INSN_SCHEDULING
2426 if (level > 1)
2427 flag_schedule_insns = 0;
2428 #endif
2429
2430 if (TARGET_MACHO)
2431 /* The Darwin libraries never set errno, so we might as well
2432 avoid calling them when that's the only reason we would. */
2433 flag_errno_math = 0;
2434
2435 /* The default values of these switches depend on the TARGET_64BIT
2436 that is not known at this moment. Mark these values with 2 and
2437 let user the to override these. In case there is no command line option
2438 specifying them, we will set the defaults in override_options. */
2439 if (optimize >= 1)
2440 flag_omit_frame_pointer = 2;
2441 flag_pcc_struct_return = 2;
2442 flag_asynchronous_unwind_tables = 2;
2443 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2444 SUBTARGET_OPTIMIZATION_OPTIONS;
2445 #endif
2446 }
2447 \f
2448 /* Table of valid machine attributes. */
2449 const struct attribute_spec ix86_attribute_table[] =
2450 {
2451 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2452 /* Stdcall attribute says callee is responsible for popping arguments
2453 if they are not variable. */
2454 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2455 /* Fastcall attribute says callee is responsible for popping arguments
2456 if they are not variable. */
2457 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2458 /* Cdecl attribute says the callee is a normal C declaration */
2459 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2460 /* Regparm attribute specifies how many integer arguments are to be
2461 passed in registers. */
2462 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2463 /* X87regparm attribute says we are passing floating point arguments
2464 in 80387 registers. */
2465 { "x87regparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2466 /* Sseregparm attribute says we are using x86_64 calling conventions
2467 for FP arguments. */
2468 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2469 /* force_align_arg_pointer says this function realigns the stack at entry. */
2470 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2471 false, true, true, ix86_handle_cconv_attribute },
2472 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2473 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2474 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2475 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2476 #endif
2477 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2478 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2479 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2480 SUBTARGET_ATTRIBUTE_TABLE,
2481 #endif
2482 { NULL, 0, 0, false, false, false, NULL }
2483 };
2484
2485 /* Decide whether we can make a sibling call to a function. DECL is the
2486 declaration of the function being targeted by the call and EXP is the
2487 CALL_EXPR representing the call. */
2488
2489 static bool
2490 ix86_function_ok_for_sibcall (tree decl, tree exp)
2491 {
2492 tree func;
2493 rtx a, b;
2494
2495 /* If we are generating position-independent code, we cannot sibcall
2496 optimize any indirect call, or a direct call to a global function,
2497 as the PLT requires %ebx be live. */
2498 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2499 return false;
2500
2501 if (decl)
2502 func = decl;
2503 else
2504 {
2505 func = TREE_TYPE (TREE_OPERAND (exp, 0));
2506 if (POINTER_TYPE_P (func))
2507 func = TREE_TYPE (func);
2508 }
2509
2510 /* Check that the return value locations are the same. Like
2511 if we are returning floats on the 80387 register stack, we cannot
2512 make a sibcall from a function that doesn't return a float to a
2513 function that does or, conversely, from a function that does return
2514 a float to a function that doesn't; the necessary stack adjustment
2515 would not be executed. This is also the place we notice
2516 differences in the return value ABI. Note that it is ok for one
2517 of the functions to have void return type as long as the return
2518 value of the other is passed in a register. */
2519 a = ix86_function_value (TREE_TYPE (exp), func, false);
2520 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2521 cfun->decl, false);
2522 if (STACK_REG_P (a) || STACK_REG_P (b))
2523 {
2524 if (!rtx_equal_p (a, b))
2525 return false;
2526 }
2527 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2528 ;
2529 else if (!rtx_equal_p (a, b))
2530 return false;
2531
2532 /* If this call is indirect, we'll need to be able to use a call-clobbered
2533 register for the address of the target function. Make sure that all
2534 such registers are not used for passing parameters. */
2535 if (!decl && !TARGET_64BIT)
2536 {
2537 tree type;
2538
2539 /* We're looking at the CALL_EXPR, we need the type of the function. */
2540 type = TREE_OPERAND (exp, 0); /* pointer expression */
2541 type = TREE_TYPE (type); /* pointer type */
2542 type = TREE_TYPE (type); /* function type */
2543
2544 if (ix86_function_regparm (type, NULL) >= 3)
2545 {
2546 /* ??? Need to count the actual number of registers to be used,
2547 not the possible number of registers. Fix later. */
2548 return false;
2549 }
2550 }
2551
2552 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2553 /* Dllimport'd functions are also called indirectly. */
2554 if (decl && DECL_DLLIMPORT_P (decl)
2555 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2556 return false;
2557 #endif
2558
2559 /* If we forced aligned the stack, then sibcalling would unalign the
2560 stack, which may break the called function. */
2561 if (cfun->machine->force_align_arg_pointer)
2562 return false;
2563
2564 /* Otherwise okay. That also includes certain types of indirect calls. */
2565 return true;
2566 }
2567
2568 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "x87regparm"
2569 and "sseregparm" calling convention attributes;
2570 arguments as in struct attribute_spec.handler. */
2571
2572 static tree
2573 ix86_handle_cconv_attribute (tree *node, tree name,
2574 tree args,
2575 int flags ATTRIBUTE_UNUSED,
2576 bool *no_add_attrs)
2577 {
2578 if (TREE_CODE (*node) != FUNCTION_TYPE
2579 && TREE_CODE (*node) != METHOD_TYPE
2580 && TREE_CODE (*node) != FIELD_DECL
2581 && TREE_CODE (*node) != TYPE_DECL)
2582 {
2583 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2584 IDENTIFIER_POINTER (name));
2585 *no_add_attrs = true;
2586 return NULL_TREE;
2587 }
2588
2589 /* Can combine regparm with all attributes but fastcall. */
2590 if (is_attribute_p ("regparm", name))
2591 {
2592 tree cst;
2593
2594 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2595 {
2596 error ("fastcall and regparm attributes are not compatible");
2597 }
2598
2599 cst = TREE_VALUE (args);
2600 if (TREE_CODE (cst) != INTEGER_CST)
2601 {
2602 warning (OPT_Wattributes,
2603 "%qs attribute requires an integer constant argument",
2604 IDENTIFIER_POINTER (name));
2605 *no_add_attrs = true;
2606 }
2607 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2608 {
2609 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2610 IDENTIFIER_POINTER (name), REGPARM_MAX);
2611 *no_add_attrs = true;
2612 }
2613
2614 if (!TARGET_64BIT
2615 && lookup_attribute (ix86_force_align_arg_pointer_string,
2616 TYPE_ATTRIBUTES (*node))
2617 && compare_tree_int (cst, REGPARM_MAX-1))
2618 {
2619 error ("%s functions limited to %d register parameters",
2620 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2621 }
2622
2623 return NULL_TREE;
2624 }
2625
2626 if (TARGET_64BIT)
2627 {
2628 warning (OPT_Wattributes, "%qs attribute ignored",
2629 IDENTIFIER_POINTER (name));
2630 *no_add_attrs = true;
2631 return NULL_TREE;
2632 }
2633
2634 /* Can combine fastcall with stdcall (redundant), x87regparm
2635 and sseregparm. */
2636 if (is_attribute_p ("fastcall", name))
2637 {
2638 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2639 {
2640 error ("fastcall and cdecl attributes are not compatible");
2641 }
2642 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2643 {
2644 error ("fastcall and stdcall attributes are not compatible");
2645 }
2646 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2647 {
2648 error ("fastcall and regparm attributes are not compatible");
2649 }
2650 }
2651
2652 /* Can combine stdcall with fastcall (redundant), regparm,
2653 x87regparm and sseregparm. */
2654 else if (is_attribute_p ("stdcall", name))
2655 {
2656 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2657 {
2658 error ("stdcall and cdecl attributes are not compatible");
2659 }
2660 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2661 {
2662 error ("stdcall and fastcall attributes are not compatible");
2663 }
2664 }
2665
2666 /* Can combine cdecl with regparm, x87regparm and sseregparm. */
2667 else if (is_attribute_p ("cdecl", name))
2668 {
2669 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2670 {
2671 error ("stdcall and cdecl attributes are not compatible");
2672 }
2673 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2674 {
2675 error ("fastcall and cdecl attributes are not compatible");
2676 }
2677 }
2678
2679 /* Can combine x87regparm or sseregparm with all attributes. */
2680
2681 return NULL_TREE;
2682 }
2683
2684 /* Return 0 if the attributes for two types are incompatible, 1 if they
2685 are compatible, and 2 if they are nearly compatible (which causes a
2686 warning to be generated). */
2687
2688 static int
2689 ix86_comp_type_attributes (tree type1, tree type2)
2690 {
2691 /* Check for mismatch of non-default calling convention. */
2692 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2693
2694 if (TREE_CODE (type1) != FUNCTION_TYPE)
2695 return 1;
2696
2697 /* Check for mismatched fastcall/regparm types. */
2698 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2699 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2700 || (ix86_function_regparm (type1, NULL)
2701 != ix86_function_regparm (type2, NULL)))
2702 return 0;
2703
2704 /* Check for mismatched x87regparm types. */
2705 if (!lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type1))
2706 != !lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type2)))
2707 return 0;
2708
2709 /* Check for mismatched sseregparm types. */
2710 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2711 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2712 return 0;
2713
2714 /* Check for mismatched return types (cdecl vs stdcall). */
2715 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2716 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2717 return 0;
2718
2719 return 1;
2720 }
2721 \f
2722 /* Return the regparm value for a function with the indicated TYPE and DECL.
2723 DECL may be NULL when calling function indirectly
2724 or considering a libcall. */
2725
2726 static int
2727 ix86_function_regparm (tree type, tree decl)
2728 {
2729 tree attr;
2730 int regparm = ix86_regparm;
2731 bool user_convention = false;
2732
2733 if (!TARGET_64BIT)
2734 {
2735 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2736 if (attr)
2737 {
2738 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2739 user_convention = true;
2740 }
2741
2742 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2743 {
2744 regparm = 2;
2745 user_convention = true;
2746 }
2747
2748 /* Use register calling convention for local functions when possible. */
2749 if (!TARGET_64BIT && !user_convention && decl
2750 && flag_unit_at_a_time && !profile_flag)
2751 {
2752 struct cgraph_local_info *i = cgraph_local_info (decl);
2753 if (i && i->local)
2754 {
2755 int local_regparm, globals = 0, regno;
2756
2757 /* Make sure no regparm register is taken by a global register
2758 variable. */
2759 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2760 if (global_regs[local_regparm])
2761 break;
2762 /* We can't use regparm(3) for nested functions as these use
2763 static chain pointer in third argument. */
2764 if (local_regparm == 3
2765 && decl_function_context (decl)
2766 && !DECL_NO_STATIC_CHAIN (decl))
2767 local_regparm = 2;
2768 /* If the function realigns its stackpointer, the
2769 prologue will clobber %ecx. If we've already
2770 generated code for the callee, the callee
2771 DECL_STRUCT_FUNCTION is gone, so we fall back to
2772 scanning the attributes for the self-realigning
2773 property. */
2774 if ((DECL_STRUCT_FUNCTION (decl)
2775 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2776 || (!DECL_STRUCT_FUNCTION (decl)
2777 && lookup_attribute (ix86_force_align_arg_pointer_string,
2778 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2779 local_regparm = 2;
2780 /* Each global register variable increases register preassure,
2781 so the more global reg vars there are, the smaller regparm
2782 optimization use, unless requested by the user explicitly. */
2783 for (regno = 0; regno < 6; regno++)
2784 if (global_regs[regno])
2785 globals++;
2786 local_regparm
2787 = globals < local_regparm ? local_regparm - globals : 0;
2788
2789 if (local_regparm > regparm)
2790 regparm = local_regparm;
2791 }
2792 }
2793 }
2794 return regparm;
2795 }
2796
2797 /* Return 1 if we can pass up to X87_REGPARM_MAX floating point
2798 arguments in x87 registers for a function with the indicated
2799 TYPE and DECL. DECL may be NULL when calling function indirectly
2800 or considering a libcall. For local functions, return 2.
2801 Otherwise return 0. */
2802
2803 static int
2804 ix86_function_x87regparm (tree type, tree decl)
2805 {
2806 /* Use x87 registers to pass floating point arguments if requested
2807 by the x87regparm attribute. */
2808 if (TARGET_X87REGPARM
2809 || (type
2810 && lookup_attribute ("x87regparm", TYPE_ATTRIBUTES (type))))
2811 {
2812 if (!TARGET_80387)
2813 {
2814 if (decl)
2815 error ("Calling %qD with attribute x87regparm without "
2816 "80387 enabled", decl);
2817 else
2818 error ("Calling %qT with attribute x87regparm without "
2819 "80387 enabled", type);
2820 return 0;
2821 }
2822
2823 return 1;
2824 }
2825
2826 /* For local functions, pass up to X87_REGPARM_MAX floating point
2827 arguments in x87 registers. */
2828 if (!TARGET_64BIT && decl
2829 && flag_unit_at_a_time && !profile_flag)
2830 {
2831 struct cgraph_local_info *i = cgraph_local_info (decl);
2832 if (i && i->local)
2833 return 2;
2834 }
2835
2836 return 0;
2837 }
2838
2839 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2840 DFmode (2) arguments in SSE registers for a function with the
2841 indicated TYPE and DECL. DECL may be NULL when calling function
2842 indirectly or considering a libcall. Otherwise return 0. */
2843
2844 static int
2845 ix86_function_sseregparm (tree type, tree decl)
2846 {
2847 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2848 by the sseregparm attribute. */
2849 if (TARGET_SSEREGPARM
2850 || (type
2851 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2852 {
2853 if (!TARGET_SSE)
2854 {
2855 if (decl)
2856 error ("Calling %qD with attribute sseregparm without "
2857 "SSE/SSE2 enabled", decl);
2858 else
2859 error ("Calling %qT with attribute sseregparm without "
2860 "SSE/SSE2 enabled", type);
2861 return 0;
2862 }
2863
2864 return 2;
2865 }
2866
2867 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2868 (and DFmode for SSE2) arguments in SSE registers,
2869 even for 32-bit targets. */
2870 if (!TARGET_64BIT && decl
2871 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2872 {
2873 struct cgraph_local_info *i = cgraph_local_info (decl);
2874 if (i && i->local)
2875 return TARGET_SSE2 ? 2 : 1;
2876 }
2877
2878 return 0;
2879 }
2880
2881 /* Return true if EAX is live at the start of the function. Used by
2882 ix86_expand_prologue to determine if we need special help before
2883 calling allocate_stack_worker. */
2884
2885 static bool
2886 ix86_eax_live_at_start_p (void)
2887 {
2888 /* Cheat. Don't bother working forward from ix86_function_regparm
2889 to the function type to whether an actual argument is located in
2890 eax. Instead just look at cfg info, which is still close enough
2891 to correct at this point. This gives false positives for broken
2892 functions that might use uninitialized data that happens to be
2893 allocated in eax, but who cares? */
2894 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2895 }
2896
2897 /* Value is the number of bytes of arguments automatically
2898 popped when returning from a subroutine call.
2899 FUNDECL is the declaration node of the function (as a tree),
2900 FUNTYPE is the data type of the function (as a tree),
2901 or for a library call it is an identifier node for the subroutine name.
2902 SIZE is the number of bytes of arguments passed on the stack.
2903
2904 On the 80386, the RTD insn may be used to pop them if the number
2905 of args is fixed, but if the number is variable then the caller
2906 must pop them all. RTD can't be used for library calls now
2907 because the library is compiled with the Unix compiler.
2908 Use of RTD is a selectable option, since it is incompatible with
2909 standard Unix calling sequences. If the option is not selected,
2910 the caller must always pop the args.
2911
2912 The attribute stdcall is equivalent to RTD on a per module basis. */
2913
2914 int
2915 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2916 {
2917 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2918
2919 /* Cdecl functions override -mrtd, and never pop the stack. */
2920 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
2921
2922 /* Stdcall and fastcall functions will pop the stack if not
2923 variable args. */
2924 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2925 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2926 rtd = 1;
2927
2928 if (rtd
2929 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
2930 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
2931 == void_type_node)))
2932 return size;
2933 }
2934
2935 /* Lose any fake structure return argument if it is passed on the stack. */
2936 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2937 && !TARGET_64BIT
2938 && !KEEP_AGGREGATE_RETURN_POINTER)
2939 {
2940 int nregs = ix86_function_regparm (funtype, fundecl);
2941
2942 if (!nregs)
2943 return GET_MODE_SIZE (Pmode);
2944 }
2945
2946 return 0;
2947 }
2948 \f
2949 /* Argument support functions. */
2950
2951 /* Return true when register may be used to pass function parameters. */
2952 bool
2953 ix86_function_arg_regno_p (int regno)
2954 {
2955 int i;
2956 if (!TARGET_64BIT)
2957 return (regno < REGPARM_MAX
2958 || (TARGET_80387 && FP_REGNO_P (regno)
2959 && (regno < FIRST_FLOAT_REG + X87_REGPARM_MAX))
2960 || (TARGET_MMX && MMX_REGNO_P (regno)
2961 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2962 || (TARGET_SSE && SSE_REGNO_P (regno)
2963 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2964
2965 if (TARGET_SSE && SSE_REGNO_P (regno)
2966 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2967 return true;
2968 /* RAX is used as hidden argument to va_arg functions. */
2969 if (!regno)
2970 return true;
2971 for (i = 0; i < REGPARM_MAX; i++)
2972 if (regno == x86_64_int_parameter_registers[i])
2973 return true;
2974 return false;
2975 }
2976
2977 /* Return if we do not know how to pass TYPE solely in registers. */
2978
2979 static bool
2980 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2981 {
2982 if (must_pass_in_stack_var_size_or_pad (mode, type))
2983 return true;
2984
2985 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2986 The layout_type routine is crafty and tries to trick us into passing
2987 currently unsupported vector types on the stack by using TImode. */
2988 return (!TARGET_64BIT && mode == TImode
2989 && type && TREE_CODE (type) != VECTOR_TYPE);
2990 }
2991
2992 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2993 for a call to a function whose data type is FNTYPE.
2994 For a library call, FNTYPE is 0. */
2995
2996 void
2997 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2998 tree fntype, /* tree ptr for function decl */
2999 rtx libname, /* SYMBOL_REF of library name or 0 */
3000 tree fndecl)
3001 {
3002 static CUMULATIVE_ARGS zero_cum;
3003 tree param, next_param;
3004
3005 if (TARGET_DEBUG_ARG)
3006 {
3007 fprintf (stderr, "\ninit_cumulative_args (");
3008 if (fntype)
3009 fprintf (stderr, "fntype code = %s, ret code = %s",
3010 tree_code_name[(int) TREE_CODE (fntype)],
3011 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3012 else
3013 fprintf (stderr, "no fntype");
3014
3015 if (libname)
3016 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3017 }
3018
3019 *cum = zero_cum;
3020
3021 /* Set up the number of registers to use for passing arguments. */
3022 cum->nregs = ix86_regparm;
3023 if (TARGET_80387)
3024 cum->x87_nregs = X87_REGPARM_MAX;
3025 if (TARGET_SSE)
3026 cum->sse_nregs = SSE_REGPARM_MAX;
3027 if (TARGET_MMX)
3028 cum->mmx_nregs = MMX_REGPARM_MAX;
3029 cum->warn_sse = true;
3030 cum->warn_mmx = true;
3031 cum->maybe_vaarg = false;
3032
3033 /* Use ecx and edx registers if function has fastcall attribute,
3034 else look for regparm information. */
3035 if (fntype && !TARGET_64BIT)
3036 {
3037 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3038 {
3039 cum->nregs = 2;
3040 cum->fastcall = 1;
3041 }
3042 else
3043 cum->nregs = ix86_function_regparm (fntype, fndecl);
3044 }
3045
3046 /* Set up the number of 80387 registers used for passing
3047 floating point arguments. Warn for mismatching ABI. */
3048 cum->float_in_x87 = ix86_function_x87regparm (fntype, fndecl);
3049
3050 /* Set up the number of SSE registers used for passing SFmode
3051 and DFmode arguments. Warn for mismatching ABI. */
3052 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3053
3054 /* Determine if this function has variable arguments. This is
3055 indicated by the last argument being 'void_type_mode' if there
3056 are no variable arguments. If there are variable arguments, then
3057 we won't pass anything in registers in 32-bit mode. */
3058
3059 if (cum->nregs || cum->mmx_nregs
3060 || cum->x87_nregs || cum->sse_nregs)
3061 {
3062 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3063 param != 0; param = next_param)
3064 {
3065 next_param = TREE_CHAIN (param);
3066 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3067 {
3068 if (!TARGET_64BIT)
3069 {
3070 cum->nregs = 0;
3071 cum->x87_nregs = 0;
3072 cum->sse_nregs = 0;
3073 cum->mmx_nregs = 0;
3074 cum->warn_sse = 0;
3075 cum->warn_mmx = 0;
3076 cum->fastcall = 0;
3077 cum->float_in_x87 = 0;
3078 cum->float_in_sse = 0;
3079 }
3080 cum->maybe_vaarg = true;
3081 }
3082 }
3083 }
3084 if ((!fntype && !libname)
3085 || (fntype && !TYPE_ARG_TYPES (fntype)))
3086 cum->maybe_vaarg = true;
3087
3088 if (TARGET_DEBUG_ARG)
3089 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3090
3091 return;
3092 }
3093
3094 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3095 But in the case of vector types, it is some vector mode.
3096
3097 When we have only some of our vector isa extensions enabled, then there
3098 are some modes for which vector_mode_supported_p is false. For these
3099 modes, the generic vector support in gcc will choose some non-vector mode
3100 in order to implement the type. By computing the natural mode, we'll
3101 select the proper ABI location for the operand and not depend on whatever
3102 the middle-end decides to do with these vector types. */
3103
3104 static enum machine_mode
3105 type_natural_mode (tree type)
3106 {
3107 enum machine_mode mode = TYPE_MODE (type);
3108
3109 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3110 {
3111 HOST_WIDE_INT size = int_size_in_bytes (type);
3112 if ((size == 8 || size == 16)
3113 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3114 && TYPE_VECTOR_SUBPARTS (type) > 1)
3115 {
3116 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3117
3118 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3119 mode = MIN_MODE_VECTOR_FLOAT;
3120 else
3121 mode = MIN_MODE_VECTOR_INT;
3122
3123 /* Get the mode which has this inner mode and number of units. */
3124 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3125 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3126 && GET_MODE_INNER (mode) == innermode)
3127 return mode;
3128
3129 gcc_unreachable ();
3130 }
3131 }
3132
3133 return mode;
3134 }
3135
3136 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3137 this may not agree with the mode that the type system has chosen for the
3138 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3139 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3140
3141 static rtx
3142 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3143 unsigned int regno)
3144 {
3145 rtx tmp;
3146
3147 if (orig_mode != BLKmode)
3148 tmp = gen_rtx_REG (orig_mode, regno);
3149 else
3150 {
3151 tmp = gen_rtx_REG (mode, regno);
3152 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3153 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3154 }
3155
3156 return tmp;
3157 }
3158
3159 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3160 of this code is to classify each 8bytes of incoming argument by the register
3161 class and assign registers accordingly. */
3162
3163 /* Return the union class of CLASS1 and CLASS2.
3164 See the x86-64 PS ABI for details. */
3165
3166 static enum x86_64_reg_class
3167 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3168 {
3169 /* Rule #1: If both classes are equal, this is the resulting class. */
3170 if (class1 == class2)
3171 return class1;
3172
3173 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3174 the other class. */
3175 if (class1 == X86_64_NO_CLASS)
3176 return class2;
3177 if (class2 == X86_64_NO_CLASS)
3178 return class1;
3179
3180 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3181 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3182 return X86_64_MEMORY_CLASS;
3183
3184 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3185 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3186 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3187 return X86_64_INTEGERSI_CLASS;
3188 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3189 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3190 return X86_64_INTEGER_CLASS;
3191
3192 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3193 MEMORY is used. */
3194 if (class1 == X86_64_X87_CLASS
3195 || class1 == X86_64_X87UP_CLASS
3196 || class1 == X86_64_COMPLEX_X87_CLASS
3197 || class2 == X86_64_X87_CLASS
3198 || class2 == X86_64_X87UP_CLASS
3199 || class2 == X86_64_COMPLEX_X87_CLASS)
3200 return X86_64_MEMORY_CLASS;
3201
3202 /* Rule #6: Otherwise class SSE is used. */
3203 return X86_64_SSE_CLASS;
3204 }
3205
3206 /* Classify the argument of type TYPE and mode MODE.
3207 CLASSES will be filled by the register class used to pass each word
3208 of the operand. The number of words is returned. In case the parameter
3209 should be passed in memory, 0 is returned. As a special case for zero
3210 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3211
3212 BIT_OFFSET is used internally for handling records and specifies offset
3213 of the offset in bits modulo 256 to avoid overflow cases.
3214
3215 See the x86-64 PS ABI for details.
3216 */
3217
3218 static int
3219 classify_argument (enum machine_mode mode, tree type,
3220 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3221 {
3222 HOST_WIDE_INT bytes =
3223 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3224 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3225
3226 /* Variable sized entities are always passed/returned in memory. */
3227 if (bytes < 0)
3228 return 0;
3229
3230 if (mode != VOIDmode
3231 && targetm.calls.must_pass_in_stack (mode, type))
3232 return 0;
3233
3234 if (type && AGGREGATE_TYPE_P (type))
3235 {
3236 int i;
3237 tree field;
3238 enum x86_64_reg_class subclasses[MAX_CLASSES];
3239
3240 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3241 if (bytes > 16)
3242 return 0;
3243
3244 for (i = 0; i < words; i++)
3245 classes[i] = X86_64_NO_CLASS;
3246
3247 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3248 signalize memory class, so handle it as special case. */
3249 if (!words)
3250 {
3251 classes[0] = X86_64_NO_CLASS;
3252 return 1;
3253 }
3254
3255 /* Classify each field of record and merge classes. */
3256 switch (TREE_CODE (type))
3257 {
3258 case RECORD_TYPE:
3259 /* And now merge the fields of structure. */
3260 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3261 {
3262 if (TREE_CODE (field) == FIELD_DECL)
3263 {
3264 int num;
3265
3266 if (TREE_TYPE (field) == error_mark_node)
3267 continue;
3268
3269 /* Bitfields are always classified as integer. Handle them
3270 early, since later code would consider them to be
3271 misaligned integers. */
3272 if (DECL_BIT_FIELD (field))
3273 {
3274 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3275 i < ((int_bit_position (field) + (bit_offset % 64))
3276 + tree_low_cst (DECL_SIZE (field), 0)
3277 + 63) / 8 / 8; i++)
3278 classes[i] =
3279 merge_classes (X86_64_INTEGER_CLASS,
3280 classes[i]);
3281 }
3282 else
3283 {
3284 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3285 TREE_TYPE (field), subclasses,
3286 (int_bit_position (field)
3287 + bit_offset) % 256);
3288 if (!num)
3289 return 0;
3290 for (i = 0; i < num; i++)
3291 {
3292 int pos =
3293 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3294 classes[i + pos] =
3295 merge_classes (subclasses[i], classes[i + pos]);
3296 }
3297 }
3298 }
3299 }
3300 break;
3301
3302 case ARRAY_TYPE:
3303 /* Arrays are handled as small records. */
3304 {
3305 int num;
3306 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3307 TREE_TYPE (type), subclasses, bit_offset);
3308 if (!num)
3309 return 0;
3310
3311 /* The partial classes are now full classes. */
3312 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3313 subclasses[0] = X86_64_SSE_CLASS;
3314 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3315 subclasses[0] = X86_64_INTEGER_CLASS;
3316
3317 for (i = 0; i < words; i++)
3318 classes[i] = subclasses[i % num];
3319
3320 break;
3321 }
3322 case UNION_TYPE:
3323 case QUAL_UNION_TYPE:
3324 /* Unions are similar to RECORD_TYPE but offset is always 0.
3325 */
3326 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3327 {
3328 if (TREE_CODE (field) == FIELD_DECL)
3329 {
3330 int num;
3331
3332 if (TREE_TYPE (field) == error_mark_node)
3333 continue;
3334
3335 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3336 TREE_TYPE (field), subclasses,
3337 bit_offset);
3338 if (!num)
3339 return 0;
3340 for (i = 0; i < num; i++)
3341 classes[i] = merge_classes (subclasses[i], classes[i]);
3342 }
3343 }
3344 break;
3345
3346 default:
3347 gcc_unreachable ();
3348 }
3349
3350 /* Final merger cleanup. */
3351 for (i = 0; i < words; i++)
3352 {
3353 /* If one class is MEMORY, everything should be passed in
3354 memory. */
3355 if (classes[i] == X86_64_MEMORY_CLASS)
3356 return 0;
3357
3358 /* The X86_64_SSEUP_CLASS should be always preceded by
3359 X86_64_SSE_CLASS. */
3360 if (classes[i] == X86_64_SSEUP_CLASS
3361 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3362 classes[i] = X86_64_SSE_CLASS;
3363
3364 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3365 if (classes[i] == X86_64_X87UP_CLASS
3366 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3367 classes[i] = X86_64_SSE_CLASS;
3368 }
3369 return words;
3370 }
3371
3372 /* Compute alignment needed. We align all types to natural boundaries with
3373 exception of XFmode that is aligned to 64bits. */
3374 if (mode != VOIDmode && mode != BLKmode)
3375 {
3376 int mode_alignment = GET_MODE_BITSIZE (mode);
3377
3378 if (mode == XFmode)
3379 mode_alignment = 128;
3380 else if (mode == XCmode)
3381 mode_alignment = 256;
3382 if (COMPLEX_MODE_P (mode))
3383 mode_alignment /= 2;
3384 /* Misaligned fields are always returned in memory. */
3385 if (bit_offset % mode_alignment)
3386 return 0;
3387 }
3388
3389 /* for V1xx modes, just use the base mode */
3390 if (VECTOR_MODE_P (mode)
3391 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3392 mode = GET_MODE_INNER (mode);
3393
3394 /* Classification of atomic types. */
3395 switch (mode)
3396 {
3397 case SDmode:
3398 case DDmode:
3399 classes[0] = X86_64_SSE_CLASS;
3400 return 1;
3401 case TDmode:
3402 classes[0] = X86_64_SSE_CLASS;
3403 classes[1] = X86_64_SSEUP_CLASS;
3404 return 2;
3405 case DImode:
3406 case SImode:
3407 case HImode:
3408 case QImode:
3409 case CSImode:
3410 case CHImode:
3411 case CQImode:
3412 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3413 classes[0] = X86_64_INTEGERSI_CLASS;
3414 else
3415 classes[0] = X86_64_INTEGER_CLASS;
3416 return 1;
3417 case CDImode:
3418 case TImode:
3419 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3420 return 2;
3421 case CTImode:
3422 return 0;
3423 case SFmode:
3424 if (!(bit_offset % 64))
3425 classes[0] = X86_64_SSESF_CLASS;
3426 else
3427 classes[0] = X86_64_SSE_CLASS;
3428 return 1;
3429 case DFmode:
3430 classes[0] = X86_64_SSEDF_CLASS;
3431 return 1;
3432 case XFmode:
3433 classes[0] = X86_64_X87_CLASS;
3434 classes[1] = X86_64_X87UP_CLASS;
3435 return 2;
3436 case TFmode:
3437 classes[0] = X86_64_SSE_CLASS;
3438 classes[1] = X86_64_SSEUP_CLASS;
3439 return 2;
3440 case SCmode:
3441 classes[0] = X86_64_SSE_CLASS;
3442 return 1;
3443 case DCmode:
3444 classes[0] = X86_64_SSEDF_CLASS;
3445 classes[1] = X86_64_SSEDF_CLASS;
3446 return 2;
3447 case XCmode:
3448 classes[0] = X86_64_COMPLEX_X87_CLASS;
3449 return 1;
3450 case TCmode:
3451 /* This modes is larger than 16 bytes. */
3452 return 0;
3453 case V4SFmode:
3454 case V4SImode:
3455 case V16QImode:
3456 case V8HImode:
3457 case V2DFmode:
3458 case V2DImode:
3459 classes[0] = X86_64_SSE_CLASS;
3460 classes[1] = X86_64_SSEUP_CLASS;
3461 return 2;
3462 case V2SFmode:
3463 case V2SImode:
3464 case V4HImode:
3465 case V8QImode:
3466 classes[0] = X86_64_SSE_CLASS;
3467 return 1;
3468 case BLKmode:
3469 case VOIDmode:
3470 return 0;
3471 default:
3472 gcc_assert (VECTOR_MODE_P (mode));
3473
3474 if (bytes > 16)
3475 return 0;
3476
3477 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3478
3479 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3480 classes[0] = X86_64_INTEGERSI_CLASS;
3481 else
3482 classes[0] = X86_64_INTEGER_CLASS;
3483 classes[1] = X86_64_INTEGER_CLASS;
3484 return 1 + (bytes > 8);
3485 }
3486 }
3487
3488 /* Examine the argument and return set number of register required in each
3489 class. Return 0 iff parameter should be passed in memory. */
3490 static int
3491 examine_argument (enum machine_mode mode, tree type, int in_return,
3492 int *int_nregs, int *sse_nregs)
3493 {
3494 enum x86_64_reg_class class[MAX_CLASSES];
3495 int n = classify_argument (mode, type, class, 0);
3496
3497 *int_nregs = 0;
3498 *sse_nregs = 0;
3499 if (!n)
3500 return 0;
3501 for (n--; n >= 0; n--)
3502 switch (class[n])
3503 {
3504 case X86_64_INTEGER_CLASS:
3505 case X86_64_INTEGERSI_CLASS:
3506 (*int_nregs)++;
3507 break;
3508 case X86_64_SSE_CLASS:
3509 case X86_64_SSESF_CLASS:
3510 case X86_64_SSEDF_CLASS:
3511 (*sse_nregs)++;
3512 break;
3513 case X86_64_NO_CLASS:
3514 case X86_64_SSEUP_CLASS:
3515 break;
3516 case X86_64_X87_CLASS:
3517 case X86_64_X87UP_CLASS:
3518 if (!in_return)
3519 return 0;
3520 break;
3521 case X86_64_COMPLEX_X87_CLASS:
3522 return in_return ? 2 : 0;
3523 case X86_64_MEMORY_CLASS:
3524 gcc_unreachable ();
3525 }
3526 return 1;
3527 }
3528
3529 /* Construct container for the argument used by GCC interface. See
3530 FUNCTION_ARG for the detailed description. */
3531
3532 static rtx
3533 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3534 tree type, int in_return, int nintregs, int nsseregs,
3535 const int *intreg, int sse_regno)
3536 {
3537 /* The following variables hold the static issued_error state. */
3538 static bool issued_sse_arg_error;
3539 static bool issued_sse_ret_error;
3540 static bool issued_x87_ret_error;
3541
3542 enum machine_mode tmpmode;
3543 int bytes =
3544 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3545 enum x86_64_reg_class class[MAX_CLASSES];
3546 int n;
3547 int i;
3548 int nexps = 0;
3549 int needed_sseregs, needed_intregs;
3550 rtx exp[MAX_CLASSES];
3551 rtx ret;
3552
3553 n = classify_argument (mode, type, class, 0);
3554 if (TARGET_DEBUG_ARG)
3555 {
3556 if (!n)
3557 fprintf (stderr, "Memory class\n");
3558 else
3559 {
3560 fprintf (stderr, "Classes:");
3561 for (i = 0; i < n; i++)
3562 {
3563 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3564 }
3565 fprintf (stderr, "\n");
3566 }
3567 }
3568 if (!n)
3569 return NULL;
3570 if (!examine_argument (mode, type, in_return, &needed_intregs,
3571 &needed_sseregs))
3572 return NULL;
3573 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3574 return NULL;
3575
3576 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3577 some less clueful developer tries to use floating-point anyway. */
3578 if (needed_sseregs && !TARGET_SSE)
3579 {
3580 if (in_return)
3581 {
3582 if (!issued_sse_ret_error)
3583 {
3584 error ("SSE register return with SSE disabled");
3585 issued_sse_ret_error = true;
3586 }
3587 }
3588 else if (!issued_sse_arg_error)
3589 {
3590 error ("SSE register argument with SSE disabled");
3591 issued_sse_arg_error = true;
3592 }
3593 return NULL;
3594 }
3595
3596 /* Likewise, error if the ABI requires us to return values in the
3597 x87 registers and the user specified -mno-80387. */
3598 if (!TARGET_80387 && in_return)
3599 for (i = 0; i < n; i++)
3600 if (class[i] == X86_64_X87_CLASS
3601 || class[i] == X86_64_X87UP_CLASS
3602 || class[i] == X86_64_COMPLEX_X87_CLASS)
3603 {
3604 if (!issued_x87_ret_error)
3605 {
3606 error ("x87 register return with x87 disabled");
3607 issued_x87_ret_error = true;
3608 }
3609 return NULL;
3610 }
3611
3612 /* First construct simple cases. Avoid SCmode, since we want to use
3613 single register to pass this type. */
3614 if (n == 1 && mode != SCmode)
3615 switch (class[0])
3616 {
3617 case X86_64_INTEGER_CLASS:
3618 case X86_64_INTEGERSI_CLASS:
3619 return gen_rtx_REG (mode, intreg[0]);
3620 case X86_64_SSE_CLASS:
3621 case X86_64_SSESF_CLASS:
3622 case X86_64_SSEDF_CLASS:
3623 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3624 case X86_64_X87_CLASS:
3625 case X86_64_COMPLEX_X87_CLASS:
3626 return gen_rtx_REG (mode, FIRST_STACK_REG);
3627 case X86_64_NO_CLASS:
3628 /* Zero sized array, struct or class. */
3629 return NULL;
3630 default:
3631 gcc_unreachable ();
3632 }
3633 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3634 && mode != BLKmode)
3635 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3636 if (n == 2
3637 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3638 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3639 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3640 && class[1] == X86_64_INTEGER_CLASS
3641 && (mode == CDImode || mode == TImode || mode == TFmode)
3642 && intreg[0] + 1 == intreg[1])
3643 return gen_rtx_REG (mode, intreg[0]);
3644
3645 /* Otherwise figure out the entries of the PARALLEL. */
3646 for (i = 0; i < n; i++)
3647 {
3648 switch (class[i])
3649 {
3650 case X86_64_NO_CLASS:
3651 break;
3652 case X86_64_INTEGER_CLASS:
3653 case X86_64_INTEGERSI_CLASS:
3654 /* Merge TImodes on aligned occasions here too. */
3655 if (i * 8 + 8 > bytes)
3656 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3657 else if (class[i] == X86_64_INTEGERSI_CLASS)
3658 tmpmode = SImode;
3659 else
3660 tmpmode = DImode;
3661 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3662 if (tmpmode == BLKmode)
3663 tmpmode = DImode;
3664 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3665 gen_rtx_REG (tmpmode, *intreg),
3666 GEN_INT (i*8));
3667 intreg++;
3668 break;
3669 case X86_64_SSESF_CLASS:
3670 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3671 gen_rtx_REG (SFmode,
3672 SSE_REGNO (sse_regno)),
3673 GEN_INT (i*8));
3674 sse_regno++;
3675 break;
3676 case X86_64_SSEDF_CLASS:
3677 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3678 gen_rtx_REG (DFmode,
3679 SSE_REGNO (sse_regno)),
3680 GEN_INT (i*8));
3681 sse_regno++;
3682 break;
3683 case X86_64_SSE_CLASS:
3684 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3685 tmpmode = TImode;
3686 else
3687 tmpmode = DImode;
3688 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3689 gen_rtx_REG (tmpmode,
3690 SSE_REGNO (sse_regno)),
3691 GEN_INT (i*8));
3692 if (tmpmode == TImode)
3693 i++;
3694 sse_regno++;
3695 break;
3696 default:
3697 gcc_unreachable ();
3698 }
3699 }
3700
3701 /* Empty aligned struct, union or class. */
3702 if (nexps == 0)
3703 return NULL;
3704
3705 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3706 for (i = 0; i < nexps; i++)
3707 XVECEXP (ret, 0, i) = exp [i];
3708 return ret;
3709 }
3710
3711 /* Update the data in CUM to advance over an argument
3712 of mode MODE and data type TYPE.
3713 (TYPE is null for libcalls where that information may not be available.) */
3714
3715 void
3716 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3717 tree type, int named)
3718 {
3719 int bytes =
3720 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3721 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3722
3723 if (type)
3724 mode = type_natural_mode (type);
3725
3726 if (TARGET_DEBUG_ARG)
3727 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3728 "mode=%s, named=%d)\n\n",
3729 words, cum->words, cum->nregs, cum->sse_nregs,
3730 GET_MODE_NAME (mode), named);
3731
3732 if (TARGET_64BIT)
3733 {
3734 int int_nregs, sse_nregs;
3735 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3736 cum->words += words;
3737 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3738 {
3739 cum->nregs -= int_nregs;
3740 cum->sse_nregs -= sse_nregs;
3741 cum->regno += int_nregs;
3742 cum->sse_regno += sse_nregs;
3743 }
3744 else
3745 cum->words += words;
3746 }
3747 else
3748 {
3749 switch (mode)
3750 {
3751 default:
3752 break;
3753
3754 case BLKmode:
3755 if (bytes < 0)
3756 break;
3757 /* FALLTHRU */
3758
3759 case DImode:
3760 case SImode:
3761 case HImode:
3762 case QImode:
3763 cum->words += words;
3764 cum->nregs -= words;
3765 cum->regno += words;
3766
3767 if (cum->nregs <= 0)
3768 {
3769 cum->nregs = 0;
3770 cum->regno = 0;
3771 }
3772 break;
3773
3774 case SFmode:
3775 if (cum->float_in_sse > 0)
3776 goto skip_80387;
3777
3778 case DFmode:
3779 if (cum->float_in_sse > 1)
3780 goto skip_80387;
3781
3782 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3783 rounding takes place when values are passed in x87
3784 registers, pass DFmode and SFmode types to local functions
3785 only when flag_unsafe_math_optimizations is set. */
3786 if (!cum->float_in_x87
3787 || (cum->float_in_x87 == 2
3788 && !flag_unsafe_math_optimizations))
3789 break;
3790
3791 case XFmode:
3792 if (!cum->float_in_x87)
3793 break;
3794
3795 if (!type || !AGGREGATE_TYPE_P (type))
3796 {
3797 cum->x87_nregs -= 1;
3798 cum->x87_regno += 1;
3799 if (cum->x87_nregs <= 0)
3800 {
3801 cum->x87_nregs = 0;
3802 cum->x87_regno = 0;
3803 }
3804 }
3805 break;
3806
3807 skip_80387:
3808
3809 case TImode:
3810 case V16QImode:
3811 case V8HImode:
3812 case V4SImode:
3813 case V2DImode:
3814 case V4SFmode:
3815 case V2DFmode:
3816 if (!type || !AGGREGATE_TYPE_P (type))
3817 {
3818 cum->sse_nregs -= 1;
3819 cum->sse_regno += 1;
3820 if (cum->sse_nregs <= 0)
3821 {
3822 cum->sse_nregs = 0;
3823 cum->sse_regno = 0;
3824 }
3825 }
3826 break;
3827
3828 case V8QImode:
3829 case V4HImode:
3830 case V2SImode:
3831 case V2SFmode:
3832 if (!type || !AGGREGATE_TYPE_P (type))
3833 {
3834 cum->mmx_nregs -= 1;
3835 cum->mmx_regno += 1;
3836 if (cum->mmx_nregs <= 0)
3837 {
3838 cum->mmx_nregs = 0;
3839 cum->mmx_regno = 0;
3840 }
3841 }
3842 break;
3843 }
3844 }
3845 }
3846
3847 /* Define where to put the arguments to a function.
3848 Value is zero to push the argument on the stack,
3849 or a hard register in which to store the argument.
3850
3851 MODE is the argument's machine mode.
3852 TYPE is the data type of the argument (as a tree).
3853 This is null for libcalls where that information may
3854 not be available.
3855 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3856 the preceding args and about the function being called.
3857 NAMED is nonzero if this argument is a named parameter
3858 (otherwise it is an extra parameter matching an ellipsis). */
3859
3860 rtx
3861 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3862 tree type, int named)
3863 {
3864 enum machine_mode mode = orig_mode;
3865 rtx ret = NULL_RTX;
3866 int bytes =
3867 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3868 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3869 static bool warnedsse, warnedmmx;
3870
3871 /* To simplify the code below, represent vector types with a vector mode
3872 even if MMX/SSE are not active. */
3873 if (type && TREE_CODE (type) == VECTOR_TYPE)
3874 mode = type_natural_mode (type);
3875
3876 /* Handle a hidden AL argument containing number of registers for varargs
3877 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
3878 any AL settings. */
3879 if (mode == VOIDmode)
3880 {
3881 if (TARGET_64BIT)
3882 return GEN_INT (cum->maybe_vaarg
3883 ? (cum->sse_nregs < 0
3884 ? SSE_REGPARM_MAX
3885 : cum->sse_regno)
3886 : -1);
3887 else
3888 return constm1_rtx;
3889 }
3890 if (TARGET_64BIT)
3891 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
3892 cum->sse_nregs,
3893 &x86_64_int_parameter_registers [cum->regno],
3894 cum->sse_regno);
3895 else
3896 switch (mode)
3897 {
3898 default:
3899 break;
3900
3901 case BLKmode:
3902 if (bytes < 0)
3903 break;
3904 /* FALLTHRU */
3905 case DImode:
3906 case SImode:
3907 case HImode:
3908 case QImode:
3909 if (words <= cum->nregs)
3910 {
3911 int regno = cum->regno;
3912
3913 /* Fastcall allocates the first two DWORD (SImode) or
3914 smaller arguments to ECX and EDX. */
3915 if (cum->fastcall)
3916 {
3917 if (mode == BLKmode || mode == DImode)
3918 break;
3919
3920 /* ECX not EAX is the first allocated register. */
3921 if (regno == 0)
3922 regno = 2;
3923 }
3924 ret = gen_rtx_REG (mode, regno);
3925 }
3926 break;
3927
3928 case SFmode:
3929 if (cum->float_in_sse > 0)
3930 goto skip_80387;
3931
3932 case DFmode:
3933 if (cum->float_in_sse > 1)
3934 goto skip_80387;
3935
3936 /* Because no inherent XFmode->DFmode and XFmode->SFmode
3937 rounding takes place when values are passed in x87
3938 registers, pass DFmode and SFmode types to local functions
3939 only when flag_unsafe_math_optimizations is set. */
3940 if (!cum->float_in_x87
3941 || (cum->float_in_x87 == 2
3942 && !flag_unsafe_math_optimizations))
3943 break;
3944
3945 case XFmode:
3946 if (!cum->float_in_x87)
3947 break;
3948
3949 if (!type || !AGGREGATE_TYPE_P (type))
3950 if (cum->x87_nregs)
3951 ret = gen_rtx_REG (mode, cum->x87_regno + FIRST_FLOAT_REG);
3952 break;
3953
3954 skip_80387:
3955
3956 case TImode:
3957 case V16QImode:
3958 case V8HImode:
3959 case V4SImode:
3960 case V2DImode:
3961 case V4SFmode:
3962 case V2DFmode:
3963 if (!type || !AGGREGATE_TYPE_P (type))
3964 {
3965 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3966 {
3967 warnedsse = true;
3968 warning (0, "SSE vector argument without SSE enabled "
3969 "changes the ABI");
3970 }
3971 if (cum->sse_nregs)
3972 ret = gen_reg_or_parallel (mode, orig_mode,
3973 cum->sse_regno + FIRST_SSE_REG);
3974 }
3975 break;
3976 case V8QImode:
3977 case V4HImode:
3978 case V2SImode:
3979 case V2SFmode:
3980 if (!type || !AGGREGATE_TYPE_P (type))
3981 {
3982 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3983 {
3984 warnedmmx = true;
3985 warning (0, "MMX vector argument without MMX enabled "
3986 "changes the ABI");
3987 }
3988 if (cum->mmx_nregs)
3989 ret = gen_reg_or_parallel (mode, orig_mode,
3990 cum->mmx_regno + FIRST_MMX_REG);
3991 }
3992 break;
3993 }
3994
3995 if (TARGET_DEBUG_ARG)
3996 {
3997 fprintf (stderr,
3998 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
3999 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4000
4001 if (ret)
4002 print_simple_rtl (stderr, ret);
4003 else
4004 fprintf (stderr, ", stack");
4005
4006 fprintf (stderr, " )\n");
4007 }
4008
4009 return ret;
4010 }
4011
4012 /* A C expression that indicates when an argument must be passed by
4013 reference. If nonzero for an argument, a copy of that argument is
4014 made in memory and a pointer to the argument is passed instead of
4015 the argument itself. The pointer is passed in whatever way is
4016 appropriate for passing a pointer to that type. */
4017
4018 static bool
4019 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4020 enum machine_mode mode ATTRIBUTE_UNUSED,
4021 tree type, bool named ATTRIBUTE_UNUSED)
4022 {
4023 if (!TARGET_64BIT)
4024 return 0;
4025
4026 if (type && int_size_in_bytes (type) == -1)
4027 {
4028 if (TARGET_DEBUG_ARG)
4029 fprintf (stderr, "function_arg_pass_by_reference\n");
4030 return 1;
4031 }
4032
4033 return 0;
4034 }
4035
4036 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4037 ABI. Only called if TARGET_SSE. */
4038 static bool
4039 contains_128bit_aligned_vector_p (tree type)
4040 {
4041 enum machine_mode mode = TYPE_MODE (type);
4042 if (SSE_REG_MODE_P (mode)
4043 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4044 return true;
4045 if (TYPE_ALIGN (type) < 128)
4046 return false;
4047
4048 if (AGGREGATE_TYPE_P (type))
4049 {
4050 /* Walk the aggregates recursively. */
4051 switch (TREE_CODE (type))
4052 {
4053 case RECORD_TYPE:
4054 case UNION_TYPE:
4055 case QUAL_UNION_TYPE:
4056 {
4057 tree field;
4058
4059 /* Walk all the structure fields. */
4060 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4061 {
4062 if (TREE_CODE (field) == FIELD_DECL
4063 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4064 return true;
4065 }
4066 break;
4067 }
4068
4069 case ARRAY_TYPE:
4070 /* Just for use if some languages passes arrays by value. */
4071 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4072 return true;
4073 break;
4074
4075 default:
4076 gcc_unreachable ();
4077 }
4078 }
4079 return false;
4080 }
4081
4082 /* Gives the alignment boundary, in bits, of an argument with the
4083 specified mode and type. */
4084
4085 int
4086 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4087 {
4088 int align;
4089 if (type)
4090 align = TYPE_ALIGN (type);
4091 else
4092 align = GET_MODE_ALIGNMENT (mode);
4093 if (align < PARM_BOUNDARY)
4094 align = PARM_BOUNDARY;
4095 if (!TARGET_64BIT)
4096 {
4097 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4098 make an exception for SSE modes since these require 128bit
4099 alignment.
4100
4101 The handling here differs from field_alignment. ICC aligns MMX
4102 arguments to 4 byte boundaries, while structure fields are aligned
4103 to 8 byte boundaries. */
4104 if (!TARGET_SSE)
4105 align = PARM_BOUNDARY;
4106 else if (!type)
4107 {
4108 if (!SSE_REG_MODE_P (mode))
4109 align = PARM_BOUNDARY;
4110 }
4111 else
4112 {
4113 if (!contains_128bit_aligned_vector_p (type))
4114 align = PARM_BOUNDARY;
4115 }
4116 }
4117 if (align > 128)
4118 align = 128;
4119 return align;
4120 }
4121
4122 /* Return true if N is a possible register number of function value. */
4123 bool
4124 ix86_function_value_regno_p (int regno)
4125 {
4126 if (regno == 0
4127 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4128 || (regno == FIRST_SSE_REG && TARGET_SSE))
4129 return true;
4130
4131 if (!TARGET_64BIT
4132 && (regno == FIRST_MMX_REG && TARGET_MMX))
4133 return true;
4134
4135 return false;
4136 }
4137
4138 /* Define how to find the value returned by a function.
4139 VALTYPE is the data type of the value (as a tree).
4140 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4141 otherwise, FUNC is 0. */
4142 rtx
4143 ix86_function_value (tree valtype, tree fntype_or_decl,
4144 bool outgoing ATTRIBUTE_UNUSED)
4145 {
4146 enum machine_mode natmode = type_natural_mode (valtype);
4147
4148 if (TARGET_64BIT)
4149 {
4150 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4151 1, REGPARM_MAX, SSE_REGPARM_MAX,
4152 x86_64_int_return_registers, 0);
4153 /* For zero sized structures, construct_container return NULL, but we
4154 need to keep rest of compiler happy by returning meaningful value. */
4155 if (!ret)
4156 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4157 return ret;
4158 }
4159 else
4160 {
4161 tree fn = NULL_TREE, fntype;
4162 if (fntype_or_decl
4163 && DECL_P (fntype_or_decl))
4164 fn = fntype_or_decl;
4165 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4166 return gen_rtx_REG (TYPE_MODE (valtype),
4167 ix86_value_regno (natmode, fn, fntype));
4168 }
4169 }
4170
4171 /* Return true iff type is returned in memory. */
4172 int
4173 ix86_return_in_memory (tree type)
4174 {
4175 int needed_intregs, needed_sseregs, size;
4176 enum machine_mode mode = type_natural_mode (type);
4177
4178 if (TARGET_64BIT)
4179 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4180
4181 if (mode == BLKmode)
4182 return 1;
4183
4184 size = int_size_in_bytes (type);
4185
4186 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4187 return 0;
4188
4189 if (VECTOR_MODE_P (mode) || mode == TImode)
4190 {
4191 /* User-created vectors small enough to fit in EAX. */
4192 if (size < 8)
4193 return 0;
4194
4195 /* MMX/3dNow values are returned in MM0,
4196 except when it doesn't exits. */
4197 if (size == 8)
4198 return (TARGET_MMX ? 0 : 1);
4199
4200 /* SSE values are returned in XMM0, except when it doesn't exist. */
4201 if (size == 16)
4202 return (TARGET_SSE ? 0 : 1);
4203 }
4204
4205 if (mode == XFmode)
4206 return 0;
4207
4208 if (mode == TDmode)
4209 return 1;
4210
4211 if (size > 12)
4212 return 1;
4213 return 0;
4214 }
4215
4216 /* When returning SSE vector types, we have a choice of either
4217 (1) being abi incompatible with a -march switch, or
4218 (2) generating an error.
4219 Given no good solution, I think the safest thing is one warning.
4220 The user won't be able to use -Werror, but....
4221
4222 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4223 called in response to actually generating a caller or callee that
4224 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4225 via aggregate_value_p for general type probing from tree-ssa. */
4226
4227 static rtx
4228 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4229 {
4230 static bool warnedsse, warnedmmx;
4231
4232 if (type)
4233 {
4234 /* Look at the return type of the function, not the function type. */
4235 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4236
4237 if (!TARGET_SSE && !warnedsse)
4238 {
4239 if (mode == TImode
4240 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4241 {
4242 warnedsse = true;
4243 warning (0, "SSE vector return without SSE enabled "
4244 "changes the ABI");
4245 }
4246 }
4247
4248 if (!TARGET_MMX && !warnedmmx)
4249 {
4250 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4251 {
4252 warnedmmx = true;
4253 warning (0, "MMX vector return without MMX enabled "
4254 "changes the ABI");
4255 }
4256 }
4257 }
4258
4259 return NULL;
4260 }
4261
4262 /* Define how to find the value returned by a library function
4263 assuming the value has mode MODE. */
4264 rtx
4265 ix86_libcall_value (enum machine_mode mode)
4266 {
4267 if (TARGET_64BIT)
4268 {
4269 switch (mode)
4270 {
4271 case SFmode:
4272 case SCmode:
4273 case DFmode:
4274 case DCmode:
4275 case TFmode:
4276 case SDmode:
4277 case DDmode:
4278 case TDmode:
4279 return gen_rtx_REG (mode, FIRST_SSE_REG);
4280 case XFmode:
4281 case XCmode:
4282 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4283 case TCmode:
4284 return NULL;
4285 default:
4286 return gen_rtx_REG (mode, 0);
4287 }
4288 }
4289 else
4290 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4291 }
4292
4293 /* Given a mode, return the register to use for a return value. */
4294
4295 static int
4296 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4297 {
4298 gcc_assert (!TARGET_64BIT);
4299
4300 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4301 we normally prevent this case when mmx is not available. However
4302 some ABIs may require the result to be returned like DImode. */
4303 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4304 return TARGET_MMX ? FIRST_MMX_REG : 0;
4305
4306 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4307 we prevent this case when sse is not available. However some ABIs
4308 may require the result to be returned like integer TImode. */
4309 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4310 return TARGET_SSE ? FIRST_SSE_REG : 0;
4311
4312 /* Decimal floating point values can go in %eax, unlike other float modes. */
4313 if (DECIMAL_FLOAT_MODE_P (mode))
4314 return 0;
4315
4316 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4317 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4318 return 0;
4319
4320 /* Floating point return values in %st(0), except for local functions when
4321 SSE math is enabled or for functions with sseregparm attribute. */
4322 if ((func || fntype)
4323 && (mode == SFmode || mode == DFmode))
4324 {
4325 int sse_level = ix86_function_sseregparm (fntype, func);
4326 if ((sse_level >= 1 && mode == SFmode)
4327 || (sse_level == 2 && mode == DFmode))
4328 return FIRST_SSE_REG;
4329 }
4330
4331 return FIRST_FLOAT_REG;
4332 }
4333 \f
4334 /* Create the va_list data type. */
4335
4336 static tree
4337 ix86_build_builtin_va_list (void)
4338 {
4339 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4340
4341 /* For i386 we use plain pointer to argument area. */
4342 if (!TARGET_64BIT)
4343 return build_pointer_type (char_type_node);
4344
4345 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4346 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4347
4348 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4349 unsigned_type_node);
4350 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4351 unsigned_type_node);
4352 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4353 ptr_type_node);
4354 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4355 ptr_type_node);
4356
4357 va_list_gpr_counter_field = f_gpr;
4358 va_list_fpr_counter_field = f_fpr;
4359
4360 DECL_FIELD_CONTEXT (f_gpr) = record;
4361 DECL_FIELD_CONTEXT (f_fpr) = record;
4362 DECL_FIELD_CONTEXT (f_ovf) = record;
4363 DECL_FIELD_CONTEXT (f_sav) = record;
4364
4365 TREE_CHAIN (record) = type_decl;
4366 TYPE_NAME (record) = type_decl;
4367 TYPE_FIELDS (record) = f_gpr;
4368 TREE_CHAIN (f_gpr) = f_fpr;
4369 TREE_CHAIN (f_fpr) = f_ovf;
4370 TREE_CHAIN (f_ovf) = f_sav;
4371
4372 layout_type (record);
4373
4374 /* The correct type is an array type of one element. */
4375 return build_array_type (record, build_index_type (size_zero_node));
4376 }
4377
4378 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4379
4380 static void
4381 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4382 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4383 int no_rtl)
4384 {
4385 CUMULATIVE_ARGS next_cum;
4386 rtx save_area = NULL_RTX, mem;
4387 rtx label;
4388 rtx label_ref;
4389 rtx tmp_reg;
4390 rtx nsse_reg;
4391 int set;
4392 tree fntype;
4393 int stdarg_p;
4394 int i;
4395
4396 if (!TARGET_64BIT)
4397 return;
4398
4399 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4400 return;
4401
4402 /* Indicate to allocate space on the stack for varargs save area. */
4403 ix86_save_varrargs_registers = 1;
4404
4405 cfun->stack_alignment_needed = 128;
4406
4407 fntype = TREE_TYPE (current_function_decl);
4408 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4409 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4410 != void_type_node));
4411
4412 /* For varargs, we do not want to skip the dummy va_dcl argument.
4413 For stdargs, we do want to skip the last named argument. */
4414 next_cum = *cum;
4415 if (stdarg_p)
4416 function_arg_advance (&next_cum, mode, type, 1);
4417
4418 if (!no_rtl)
4419 save_area = frame_pointer_rtx;
4420
4421 set = get_varargs_alias_set ();
4422
4423 for (i = next_cum.regno;
4424 i < ix86_regparm
4425 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4426 i++)
4427 {
4428 mem = gen_rtx_MEM (Pmode,
4429 plus_constant (save_area, i * UNITS_PER_WORD));
4430 MEM_NOTRAP_P (mem) = 1;
4431 set_mem_alias_set (mem, set);
4432 emit_move_insn (mem, gen_rtx_REG (Pmode,
4433 x86_64_int_parameter_registers[i]));
4434 }
4435
4436 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4437 {
4438 /* Now emit code to save SSE registers. The AX parameter contains number
4439 of SSE parameter registers used to call this function. We use
4440 sse_prologue_save insn template that produces computed jump across
4441 SSE saves. We need some preparation work to get this working. */
4442
4443 label = gen_label_rtx ();
4444 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4445
4446 /* Compute address to jump to :
4447 label - 5*eax + nnamed_sse_arguments*5 */
4448 tmp_reg = gen_reg_rtx (Pmode);
4449 nsse_reg = gen_reg_rtx (Pmode);
4450 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4451 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4452 gen_rtx_MULT (Pmode, nsse_reg,
4453 GEN_INT (4))));
4454 if (next_cum.sse_regno)
4455 emit_move_insn
4456 (nsse_reg,
4457 gen_rtx_CONST (DImode,
4458 gen_rtx_PLUS (DImode,
4459 label_ref,
4460 GEN_INT (next_cum.sse_regno * 4))));
4461 else
4462 emit_move_insn (nsse_reg, label_ref);
4463 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4464
4465 /* Compute address of memory block we save into. We always use pointer
4466 pointing 127 bytes after first byte to store - this is needed to keep
4467 instruction size limited by 4 bytes. */
4468 tmp_reg = gen_reg_rtx (Pmode);
4469 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4470 plus_constant (save_area,
4471 8 * REGPARM_MAX + 127)));
4472 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4473 MEM_NOTRAP_P (mem) = 1;
4474 set_mem_alias_set (mem, set);
4475 set_mem_align (mem, BITS_PER_WORD);
4476
4477 /* And finally do the dirty job! */
4478 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4479 GEN_INT (next_cum.sse_regno), label));
4480 }
4481
4482 }
4483
4484 /* Implement va_start. */
4485
4486 void
4487 ix86_va_start (tree valist, rtx nextarg)
4488 {
4489 HOST_WIDE_INT words, n_gpr, n_fpr;
4490 tree f_gpr, f_fpr, f_ovf, f_sav;
4491 tree gpr, fpr, ovf, sav, t;
4492 tree type;
4493
4494 /* Only 64bit target needs something special. */
4495 if (!TARGET_64BIT)
4496 {
4497 std_expand_builtin_va_start (valist, nextarg);
4498 return;
4499 }
4500
4501 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4502 f_fpr = TREE_CHAIN (f_gpr);
4503 f_ovf = TREE_CHAIN (f_fpr);
4504 f_sav = TREE_CHAIN (f_ovf);
4505
4506 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4507 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4508 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4509 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4510 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4511
4512 /* Count number of gp and fp argument registers used. */
4513 words = current_function_args_info.words;
4514 n_gpr = current_function_args_info.regno;
4515 n_fpr = current_function_args_info.sse_regno;
4516
4517 if (TARGET_DEBUG_ARG)
4518 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4519 (int) words, (int) n_gpr, (int) n_fpr);
4520
4521 if (cfun->va_list_gpr_size)
4522 {
4523 type = TREE_TYPE (gpr);
4524 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4525 build_int_cst (type, n_gpr * 8));
4526 TREE_SIDE_EFFECTS (t) = 1;
4527 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4528 }
4529
4530 if (cfun->va_list_fpr_size)
4531 {
4532 type = TREE_TYPE (fpr);
4533 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4534 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4535 TREE_SIDE_EFFECTS (t) = 1;
4536 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4537 }
4538
4539 /* Find the overflow area. */
4540 type = TREE_TYPE (ovf);
4541 t = make_tree (type, virtual_incoming_args_rtx);
4542 if (words != 0)
4543 t = build2 (PLUS_EXPR, type, t,
4544 build_int_cst (type, words * UNITS_PER_WORD));
4545 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4546 TREE_SIDE_EFFECTS (t) = 1;
4547 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4548
4549 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4550 {
4551 /* Find the register save area.
4552 Prologue of the function save it right above stack frame. */
4553 type = TREE_TYPE (sav);
4554 t = make_tree (type, frame_pointer_rtx);
4555 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4556 TREE_SIDE_EFFECTS (t) = 1;
4557 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4558 }
4559 }
4560
4561 /* Implement va_arg. */
4562
4563 tree
4564 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4565 {
4566 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4567 tree f_gpr, f_fpr, f_ovf, f_sav;
4568 tree gpr, fpr, ovf, sav, t;
4569 int size, rsize;
4570 tree lab_false, lab_over = NULL_TREE;
4571 tree addr, t2;
4572 rtx container;
4573 int indirect_p = 0;
4574 tree ptrtype;
4575 enum machine_mode nat_mode;
4576
4577 /* Only 64bit target needs something special. */
4578 if (!TARGET_64BIT)
4579 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4580
4581 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4582 f_fpr = TREE_CHAIN (f_gpr);
4583 f_ovf = TREE_CHAIN (f_fpr);
4584 f_sav = TREE_CHAIN (f_ovf);
4585
4586 valist = build_va_arg_indirect_ref (valist);
4587 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4588 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4589 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4590 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4591
4592 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4593 if (indirect_p)
4594 type = build_pointer_type (type);
4595 size = int_size_in_bytes (type);
4596 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4597
4598 nat_mode = type_natural_mode (type);
4599 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4600 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4601
4602 /* Pull the value out of the saved registers. */
4603
4604 addr = create_tmp_var (ptr_type_node, "addr");
4605 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4606
4607 if (container)
4608 {
4609 int needed_intregs, needed_sseregs;
4610 bool need_temp;
4611 tree int_addr, sse_addr;
4612
4613 lab_false = create_artificial_label ();
4614 lab_over = create_artificial_label ();
4615
4616 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4617
4618 need_temp = (!REG_P (container)
4619 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4620 || TYPE_ALIGN (type) > 128));
4621
4622 /* In case we are passing structure, verify that it is consecutive block
4623 on the register save area. If not we need to do moves. */
4624 if (!need_temp && !REG_P (container))
4625 {
4626 /* Verify that all registers are strictly consecutive */
4627 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4628 {
4629 int i;
4630
4631 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4632 {
4633 rtx slot = XVECEXP (container, 0, i);
4634 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4635 || INTVAL (XEXP (slot, 1)) != i * 16)
4636 need_temp = 1;
4637 }
4638 }
4639 else
4640 {
4641 int i;
4642
4643 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4644 {
4645 rtx slot = XVECEXP (container, 0, i);
4646 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4647 || INTVAL (XEXP (slot, 1)) != i * 8)
4648 need_temp = 1;
4649 }
4650 }
4651 }
4652 if (!need_temp)
4653 {
4654 int_addr = addr;
4655 sse_addr = addr;
4656 }
4657 else
4658 {
4659 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4660 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4661 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4662 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4663 }
4664
4665 /* First ensure that we fit completely in registers. */
4666 if (needed_intregs)
4667 {
4668 t = build_int_cst (TREE_TYPE (gpr),
4669 (REGPARM_MAX - needed_intregs + 1) * 8);
4670 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4671 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4672 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4673 gimplify_and_add (t, pre_p);
4674 }
4675 if (needed_sseregs)
4676 {
4677 t = build_int_cst (TREE_TYPE (fpr),
4678 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4679 + REGPARM_MAX * 8);
4680 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4681 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4682 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4683 gimplify_and_add (t, pre_p);
4684 }
4685
4686 /* Compute index to start of area used for integer regs. */
4687 if (needed_intregs)
4688 {
4689 /* int_addr = gpr + sav; */
4690 t = fold_convert (ptr_type_node, gpr);
4691 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4692 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4693 gimplify_and_add (t, pre_p);
4694 }
4695 if (needed_sseregs)
4696 {
4697 /* sse_addr = fpr + sav; */
4698 t = fold_convert (ptr_type_node, fpr);
4699 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4700 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4701 gimplify_and_add (t, pre_p);
4702 }
4703 if (need_temp)
4704 {
4705 int i;
4706 tree temp = create_tmp_var (type, "va_arg_tmp");
4707
4708 /* addr = &temp; */
4709 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4710 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4711 gimplify_and_add (t, pre_p);
4712
4713 for (i = 0; i < XVECLEN (container, 0); i++)
4714 {
4715 rtx slot = XVECEXP (container, 0, i);
4716 rtx reg = XEXP (slot, 0);
4717 enum machine_mode mode = GET_MODE (reg);
4718 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4719 tree addr_type = build_pointer_type (piece_type);
4720 tree src_addr, src;
4721 int src_offset;
4722 tree dest_addr, dest;
4723
4724 if (SSE_REGNO_P (REGNO (reg)))
4725 {
4726 src_addr = sse_addr;
4727 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4728 }
4729 else
4730 {
4731 src_addr = int_addr;
4732 src_offset = REGNO (reg) * 8;
4733 }
4734 src_addr = fold_convert (addr_type, src_addr);
4735 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4736 size_int (src_offset)));
4737 src = build_va_arg_indirect_ref (src_addr);
4738
4739 dest_addr = fold_convert (addr_type, addr);
4740 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4741 size_int (INTVAL (XEXP (slot, 1)))));
4742 dest = build_va_arg_indirect_ref (dest_addr);
4743
4744 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4745 gimplify_and_add (t, pre_p);
4746 }
4747 }
4748
4749 if (needed_intregs)
4750 {
4751 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4752 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4753 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4754 gimplify_and_add (t, pre_p);
4755 }
4756 if (needed_sseregs)
4757 {
4758 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4759 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4760 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4761 gimplify_and_add (t, pre_p);
4762 }
4763
4764 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4765 gimplify_and_add (t, pre_p);
4766
4767 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4768 append_to_statement_list (t, pre_p);
4769 }
4770
4771 /* ... otherwise out of the overflow area. */
4772
4773 /* Care for on-stack alignment if needed. */
4774 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4775 || integer_zerop (TYPE_SIZE (type)))
4776 t = ovf;
4777 else
4778 {
4779 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4780 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4781 build_int_cst (TREE_TYPE (ovf), align - 1));
4782 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4783 build_int_cst (TREE_TYPE (t), -align));
4784 }
4785 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4786
4787 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4788 gimplify_and_add (t2, pre_p);
4789
4790 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4791 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4792 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4793 gimplify_and_add (t, pre_p);
4794
4795 if (container)
4796 {
4797 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4798 append_to_statement_list (t, pre_p);
4799 }
4800
4801 ptrtype = build_pointer_type (type);
4802 addr = fold_convert (ptrtype, addr);
4803
4804 if (indirect_p)
4805 addr = build_va_arg_indirect_ref (addr);
4806 return build_va_arg_indirect_ref (addr);
4807 }
4808 \f
4809 /* Return nonzero if OPNUM's MEM should be matched
4810 in movabs* patterns. */
4811
4812 int
4813 ix86_check_movabs (rtx insn, int opnum)
4814 {
4815 rtx set, mem;
4816
4817 set = PATTERN (insn);
4818 if (GET_CODE (set) == PARALLEL)
4819 set = XVECEXP (set, 0, 0);
4820 gcc_assert (GET_CODE (set) == SET);
4821 mem = XEXP (set, opnum);
4822 while (GET_CODE (mem) == SUBREG)
4823 mem = SUBREG_REG (mem);
4824 gcc_assert (GET_CODE (mem) == MEM);
4825 return (volatile_ok || !MEM_VOLATILE_P (mem));
4826 }
4827 \f
4828 /* Initialize the table of extra 80387 mathematical constants. */
4829
4830 static void
4831 init_ext_80387_constants (void)
4832 {
4833 static const char * cst[5] =
4834 {
4835 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4836 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4837 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4838 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4839 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4840 };
4841 int i;
4842
4843 for (i = 0; i < 5; i++)
4844 {
4845 real_from_string (&ext_80387_constants_table[i], cst[i]);
4846 /* Ensure each constant is rounded to XFmode precision. */
4847 real_convert (&ext_80387_constants_table[i],
4848 XFmode, &ext_80387_constants_table[i]);
4849 }
4850
4851 ext_80387_constants_init = 1;
4852 }
4853
4854 /* Return true if the constant is something that can be loaded with
4855 a special instruction. */
4856
4857 int
4858 standard_80387_constant_p (rtx x)
4859 {
4860 REAL_VALUE_TYPE r;
4861
4862 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4863 return -1;
4864
4865 if (x == CONST0_RTX (GET_MODE (x)))
4866 return 1;
4867 if (x == CONST1_RTX (GET_MODE (x)))
4868 return 2;
4869
4870 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4871
4872 /* For XFmode constants, try to find a special 80387 instruction when
4873 optimizing for size or on those CPUs that benefit from them. */
4874 if (GET_MODE (x) == XFmode
4875 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4876 {
4877 int i;
4878
4879 if (! ext_80387_constants_init)
4880 init_ext_80387_constants ();
4881
4882 for (i = 0; i < 5; i++)
4883 if (real_identical (&r, &ext_80387_constants_table[i]))
4884 return i + 3;
4885 }
4886
4887 /* Load of the constant -0.0 or -1.0 will be split as
4888 fldz;fchs or fld1;fchs sequence. */
4889 if (real_isnegzero (&r))
4890 return 8;
4891 if (real_identical (&r, &dconstm1))
4892 return 9;
4893
4894 return 0;
4895 }
4896
4897 /* Return the opcode of the special instruction to be used to load
4898 the constant X. */
4899
4900 const char *
4901 standard_80387_constant_opcode (rtx x)
4902 {
4903 switch (standard_80387_constant_p (x))
4904 {
4905 case 1:
4906 return "fldz";
4907 case 2:
4908 return "fld1";
4909 case 3:
4910 return "fldlg2";
4911 case 4:
4912 return "fldln2";
4913 case 5:
4914 return "fldl2e";
4915 case 6:
4916 return "fldl2t";
4917 case 7:
4918 return "fldpi";
4919 case 8:
4920 case 9:
4921 return "#";
4922 default:
4923 gcc_unreachable ();
4924 }
4925 }
4926
4927 /* Return the CONST_DOUBLE representing the 80387 constant that is
4928 loaded by the specified special instruction. The argument IDX
4929 matches the return value from standard_80387_constant_p. */
4930
4931 rtx
4932 standard_80387_constant_rtx (int idx)
4933 {
4934 int i;
4935
4936 if (! ext_80387_constants_init)
4937 init_ext_80387_constants ();
4938
4939 switch (idx)
4940 {
4941 case 3:
4942 case 4:
4943 case 5:
4944 case 6:
4945 case 7:
4946 i = idx - 3;
4947 break;
4948
4949 default:
4950 gcc_unreachable ();
4951 }
4952
4953 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4954 XFmode);
4955 }
4956
4957 /* Return 1 if mode is a valid mode for sse. */
4958 static int
4959 standard_sse_mode_p (enum machine_mode mode)
4960 {
4961 switch (mode)
4962 {
4963 case V16QImode:
4964 case V8HImode:
4965 case V4SImode:
4966 case V2DImode:
4967 case V4SFmode:
4968 case V2DFmode:
4969 return 1;
4970
4971 default:
4972 return 0;
4973 }
4974 }
4975
4976 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4977 */
4978 int
4979 standard_sse_constant_p (rtx x)
4980 {
4981 enum machine_mode mode = GET_MODE (x);
4982
4983 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4984 return 1;
4985 if (vector_all_ones_operand (x, mode)
4986 && standard_sse_mode_p (mode))
4987 return TARGET_SSE2 ? 2 : -1;
4988
4989 return 0;
4990 }
4991
4992 /* Return the opcode of the special instruction to be used to load
4993 the constant X. */
4994
4995 const char *
4996 standard_sse_constant_opcode (rtx insn, rtx x)
4997 {
4998 switch (standard_sse_constant_p (x))
4999 {
5000 case 1:
5001 if (get_attr_mode (insn) == MODE_V4SF)
5002 return "xorps\t%0, %0";
5003 else if (get_attr_mode (insn) == MODE_V2DF)
5004 return "xorpd\t%0, %0";
5005 else
5006 return "pxor\t%0, %0";
5007 case 2:
5008 return "pcmpeqd\t%0, %0";
5009 }
5010 gcc_unreachable ();
5011 }
5012
5013 /* Returns 1 if OP contains a symbol reference */
5014
5015 int
5016 symbolic_reference_mentioned_p (rtx op)
5017 {
5018 const char *fmt;
5019 int i;
5020
5021 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5022 return 1;
5023
5024 fmt = GET_RTX_FORMAT (GET_CODE (op));
5025 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5026 {
5027 if (fmt[i] == 'E')
5028 {
5029 int j;
5030
5031 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5032 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5033 return 1;
5034 }
5035
5036 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5037 return 1;
5038 }
5039
5040 return 0;
5041 }
5042
5043 /* Return 1 if it is appropriate to emit `ret' instructions in the
5044 body of a function. Do this only if the epilogue is simple, needing a
5045 couple of insns. Prior to reloading, we can't tell how many registers
5046 must be saved, so return 0 then. Return 0 if there is no frame
5047 marker to de-allocate. */
5048
5049 int
5050 ix86_can_use_return_insn_p (void)
5051 {
5052 struct ix86_frame frame;
5053
5054 if (! reload_completed || frame_pointer_needed)
5055 return 0;
5056
5057 /* Don't allow more than 32 pop, since that's all we can do
5058 with one instruction. */
5059 if (current_function_pops_args
5060 && current_function_args_size >= 32768)
5061 return 0;
5062
5063 ix86_compute_frame_layout (&frame);
5064 return frame.to_allocate == 0 && frame.nregs == 0;
5065 }
5066 \f
5067 /* Value should be nonzero if functions must have frame pointers.
5068 Zero means the frame pointer need not be set up (and parms may
5069 be accessed via the stack pointer) in functions that seem suitable. */
5070
5071 int
5072 ix86_frame_pointer_required (void)
5073 {
5074 /* If we accessed previous frames, then the generated code expects
5075 to be able to access the saved ebp value in our frame. */
5076 if (cfun->machine->accesses_prev_frame)
5077 return 1;
5078
5079 /* Several x86 os'es need a frame pointer for other reasons,
5080 usually pertaining to setjmp. */
5081 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5082 return 1;
5083
5084 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5085 the frame pointer by default. Turn it back on now if we've not
5086 got a leaf function. */
5087 if (TARGET_OMIT_LEAF_FRAME_POINTER
5088 && (!current_function_is_leaf
5089 || ix86_current_function_calls_tls_descriptor))
5090 return 1;
5091
5092 if (current_function_profile)
5093 return 1;
5094
5095 return 0;
5096 }
5097
5098 /* Record that the current function accesses previous call frames. */
5099
5100 void
5101 ix86_setup_frame_addresses (void)
5102 {
5103 cfun->machine->accesses_prev_frame = 1;
5104 }
5105 \f
5106 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5107 # define USE_HIDDEN_LINKONCE 1
5108 #else
5109 # define USE_HIDDEN_LINKONCE 0
5110 #endif
5111
5112 static int pic_labels_used;
5113
5114 /* Fills in the label name that should be used for a pc thunk for
5115 the given register. */
5116
5117 static void
5118 get_pc_thunk_name (char name[32], unsigned int regno)
5119 {
5120 gcc_assert (!TARGET_64BIT);
5121
5122 if (USE_HIDDEN_LINKONCE)
5123 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5124 else
5125 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5126 }
5127
5128
5129 /* This function generates code for -fpic that loads %ebx with
5130 the return address of the caller and then returns. */
5131
5132 void
5133 ix86_file_end (void)
5134 {
5135 rtx xops[2];
5136 int regno;
5137
5138 for (regno = 0; regno < 8; ++regno)
5139 {
5140 char name[32];
5141
5142 if (! ((pic_labels_used >> regno) & 1))
5143 continue;
5144
5145 get_pc_thunk_name (name, regno);
5146
5147 #if TARGET_MACHO
5148 if (TARGET_MACHO)
5149 {
5150 switch_to_section (darwin_sections[text_coal_section]);
5151 fputs ("\t.weak_definition\t", asm_out_file);
5152 assemble_name (asm_out_file, name);
5153 fputs ("\n\t.private_extern\t", asm_out_file);
5154 assemble_name (asm_out_file, name);
5155 fputs ("\n", asm_out_file);
5156 ASM_OUTPUT_LABEL (asm_out_file, name);
5157 }
5158 else
5159 #endif
5160 if (USE_HIDDEN_LINKONCE)
5161 {
5162 tree decl;
5163
5164 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5165 error_mark_node);
5166 TREE_PUBLIC (decl) = 1;
5167 TREE_STATIC (decl) = 1;
5168 DECL_ONE_ONLY (decl) = 1;
5169
5170 (*targetm.asm_out.unique_section) (decl, 0);
5171 switch_to_section (get_named_section (decl, NULL, 0));
5172
5173 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5174 fputs ("\t.hidden\t", asm_out_file);
5175 assemble_name (asm_out_file, name);
5176 fputc ('\n', asm_out_file);
5177 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5178 }
5179 else
5180 {
5181 switch_to_section (text_section);
5182 ASM_OUTPUT_LABEL (asm_out_file, name);
5183 }
5184
5185 xops[0] = gen_rtx_REG (SImode, regno);
5186 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5187 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5188 output_asm_insn ("ret", xops);
5189 }
5190
5191 if (NEED_INDICATE_EXEC_STACK)
5192 file_end_indicate_exec_stack ();
5193 }
5194
5195 /* Emit code for the SET_GOT patterns. */
5196
5197 const char *
5198 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5199 {
5200 rtx xops[3];
5201
5202 xops[0] = dest;
5203 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5204
5205 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5206 {
5207 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5208
5209 if (!flag_pic)
5210 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5211 else
5212 output_asm_insn ("call\t%a2", xops);
5213
5214 #if TARGET_MACHO
5215 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5216 is what will be referenced by the Mach-O PIC subsystem. */
5217 if (!label)
5218 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5219 #endif
5220
5221 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5222 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5223
5224 if (flag_pic)
5225 output_asm_insn ("pop{l}\t%0", xops);
5226 }
5227 else
5228 {
5229 char name[32];
5230 get_pc_thunk_name (name, REGNO (dest));
5231 pic_labels_used |= 1 << REGNO (dest);
5232
5233 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5234 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5235 output_asm_insn ("call\t%X2", xops);
5236 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5237 is what will be referenced by the Mach-O PIC subsystem. */
5238 #if TARGET_MACHO
5239 if (!label)
5240 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5241 else
5242 targetm.asm_out.internal_label (asm_out_file, "L",
5243 CODE_LABEL_NUMBER (label));
5244 #endif
5245 }
5246
5247 if (TARGET_MACHO)
5248 return "";
5249
5250 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5251 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5252 else
5253 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5254
5255 return "";
5256 }
5257
5258 /* Generate an "push" pattern for input ARG. */
5259
5260 static rtx
5261 gen_push (rtx arg)
5262 {
5263 return gen_rtx_SET (VOIDmode,
5264 gen_rtx_MEM (Pmode,
5265 gen_rtx_PRE_DEC (Pmode,
5266 stack_pointer_rtx)),
5267 arg);
5268 }
5269
5270 /* Return >= 0 if there is an unused call-clobbered register available
5271 for the entire function. */
5272
5273 static unsigned int
5274 ix86_select_alt_pic_regnum (void)
5275 {
5276 if (current_function_is_leaf && !current_function_profile
5277 && !ix86_current_function_calls_tls_descriptor)
5278 {
5279 int i;
5280 for (i = 2; i >= 0; --i)
5281 if (!regs_ever_live[i])
5282 return i;
5283 }
5284
5285 return INVALID_REGNUM;
5286 }
5287
5288 /* Return 1 if we need to save REGNO. */
5289 static int
5290 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5291 {
5292 if (pic_offset_table_rtx
5293 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5294 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5295 || current_function_profile
5296 || current_function_calls_eh_return
5297 || current_function_uses_const_pool))
5298 {
5299 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5300 return 0;
5301 return 1;
5302 }
5303
5304 if (current_function_calls_eh_return && maybe_eh_return)
5305 {
5306 unsigned i;
5307 for (i = 0; ; i++)
5308 {
5309 unsigned test = EH_RETURN_DATA_REGNO (i);
5310 if (test == INVALID_REGNUM)
5311 break;
5312 if (test == regno)
5313 return 1;
5314 }
5315 }
5316
5317 if (cfun->machine->force_align_arg_pointer
5318 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5319 return 1;
5320
5321 return (regs_ever_live[regno]
5322 && !call_used_regs[regno]
5323 && !fixed_regs[regno]
5324 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5325 }
5326
5327 /* Return number of registers to be saved on the stack. */
5328
5329 static int
5330 ix86_nsaved_regs (void)
5331 {
5332 int nregs = 0;
5333 int regno;
5334
5335 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5336 if (ix86_save_reg (regno, true))
5337 nregs++;
5338 return nregs;
5339 }
5340
5341 /* Return the offset between two registers, one to be eliminated, and the other
5342 its replacement, at the start of a routine. */
5343
5344 HOST_WIDE_INT
5345 ix86_initial_elimination_offset (int from, int to)
5346 {
5347 struct ix86_frame frame;
5348 ix86_compute_frame_layout (&frame);
5349
5350 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5351 return frame.hard_frame_pointer_offset;
5352 else if (from == FRAME_POINTER_REGNUM
5353 && to == HARD_FRAME_POINTER_REGNUM)
5354 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5355 else
5356 {
5357 gcc_assert (to == STACK_POINTER_REGNUM);
5358
5359 if (from == ARG_POINTER_REGNUM)
5360 return frame.stack_pointer_offset;
5361
5362 gcc_assert (from == FRAME_POINTER_REGNUM);
5363 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5364 }
5365 }
5366
5367 /* Fill structure ix86_frame about frame of currently computed function. */
5368
5369 static void
5370 ix86_compute_frame_layout (struct ix86_frame *frame)
5371 {
5372 HOST_WIDE_INT total_size;
5373 unsigned int stack_alignment_needed;
5374 HOST_WIDE_INT offset;
5375 unsigned int preferred_alignment;
5376 HOST_WIDE_INT size = get_frame_size ();
5377
5378 frame->nregs = ix86_nsaved_regs ();
5379 total_size = size;
5380
5381 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5382 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5383
5384 /* During reload iteration the amount of registers saved can change.
5385 Recompute the value as needed. Do not recompute when amount of registers
5386 didn't change as reload does multiple calls to the function and does not
5387 expect the decision to change within single iteration. */
5388 if (!optimize_size
5389 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5390 {
5391 int count = frame->nregs;
5392
5393 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5394 /* The fast prologue uses move instead of push to save registers. This
5395 is significantly longer, but also executes faster as modern hardware
5396 can execute the moves in parallel, but can't do that for push/pop.
5397
5398 Be careful about choosing what prologue to emit: When function takes
5399 many instructions to execute we may use slow version as well as in
5400 case function is known to be outside hot spot (this is known with
5401 feedback only). Weight the size of function by number of registers
5402 to save as it is cheap to use one or two push instructions but very
5403 slow to use many of them. */
5404 if (count)
5405 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5406 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5407 || (flag_branch_probabilities
5408 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5409 cfun->machine->use_fast_prologue_epilogue = false;
5410 else
5411 cfun->machine->use_fast_prologue_epilogue
5412 = !expensive_function_p (count);
5413 }
5414 if (TARGET_PROLOGUE_USING_MOVE
5415 && cfun->machine->use_fast_prologue_epilogue)
5416 frame->save_regs_using_mov = true;
5417 else
5418 frame->save_regs_using_mov = false;
5419
5420
5421 /* Skip return address and saved base pointer. */
5422 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5423
5424 frame->hard_frame_pointer_offset = offset;
5425
5426 /* Do some sanity checking of stack_alignment_needed and
5427 preferred_alignment, since i386 port is the only using those features
5428 that may break easily. */
5429
5430 gcc_assert (!size || stack_alignment_needed);
5431 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5432 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5433 gcc_assert (stack_alignment_needed
5434 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5435
5436 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5437 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5438
5439 /* Register save area */
5440 offset += frame->nregs * UNITS_PER_WORD;
5441
5442 /* Va-arg area */
5443 if (ix86_save_varrargs_registers)
5444 {
5445 offset += X86_64_VARARGS_SIZE;
5446 frame->va_arg_size = X86_64_VARARGS_SIZE;
5447 }
5448 else
5449 frame->va_arg_size = 0;
5450
5451 /* Align start of frame for local function. */
5452 frame->padding1 = ((offset + stack_alignment_needed - 1)
5453 & -stack_alignment_needed) - offset;
5454
5455 offset += frame->padding1;
5456
5457 /* Frame pointer points here. */
5458 frame->frame_pointer_offset = offset;
5459
5460 offset += size;
5461
5462 /* Add outgoing arguments area. Can be skipped if we eliminated
5463 all the function calls as dead code.
5464 Skipping is however impossible when function calls alloca. Alloca
5465 expander assumes that last current_function_outgoing_args_size
5466 of stack frame are unused. */
5467 if (ACCUMULATE_OUTGOING_ARGS
5468 && (!current_function_is_leaf || current_function_calls_alloca
5469 || ix86_current_function_calls_tls_descriptor))
5470 {
5471 offset += current_function_outgoing_args_size;
5472 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5473 }
5474 else
5475 frame->outgoing_arguments_size = 0;
5476
5477 /* Align stack boundary. Only needed if we're calling another function
5478 or using alloca. */
5479 if (!current_function_is_leaf || current_function_calls_alloca
5480 || ix86_current_function_calls_tls_descriptor)
5481 frame->padding2 = ((offset + preferred_alignment - 1)
5482 & -preferred_alignment) - offset;
5483 else
5484 frame->padding2 = 0;
5485
5486 offset += frame->padding2;
5487
5488 /* We've reached end of stack frame. */
5489 frame->stack_pointer_offset = offset;
5490
5491 /* Size prologue needs to allocate. */
5492 frame->to_allocate =
5493 (size + frame->padding1 + frame->padding2
5494 + frame->outgoing_arguments_size + frame->va_arg_size);
5495
5496 if ((!frame->to_allocate && frame->nregs <= 1)
5497 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5498 frame->save_regs_using_mov = false;
5499
5500 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5501 && current_function_is_leaf
5502 && !ix86_current_function_calls_tls_descriptor)
5503 {
5504 frame->red_zone_size = frame->to_allocate;
5505 if (frame->save_regs_using_mov)
5506 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5507 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5508 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5509 }
5510 else
5511 frame->red_zone_size = 0;
5512 frame->to_allocate -= frame->red_zone_size;
5513 frame->stack_pointer_offset -= frame->red_zone_size;
5514 #if 0
5515 fprintf (stderr, "nregs: %i\n", frame->nregs);
5516 fprintf (stderr, "size: %i\n", size);
5517 fprintf (stderr, "alignment1: %i\n", stack_alignment_needed);
5518 fprintf (stderr, "padding1: %i\n", frame->padding1);
5519 fprintf (stderr, "va_arg: %i\n", frame->va_arg_size);
5520 fprintf (stderr, "padding2: %i\n", frame->padding2);
5521 fprintf (stderr, "to_allocate: %i\n", frame->to_allocate);
5522 fprintf (stderr, "red_zone_size: %i\n", frame->red_zone_size);
5523 fprintf (stderr, "frame_pointer_offset: %i\n", frame->frame_pointer_offset);
5524 fprintf (stderr, "hard_frame_pointer_offset: %i\n",
5525 frame->hard_frame_pointer_offset);
5526 fprintf (stderr, "stack_pointer_offset: %i\n", frame->stack_pointer_offset);
5527 #endif
5528 }
5529
5530 /* Emit code to save registers in the prologue. */
5531
5532 static void
5533 ix86_emit_save_regs (void)
5534 {
5535 unsigned int regno;
5536 rtx insn;
5537
5538 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5539 if (ix86_save_reg (regno, true))
5540 {
5541 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5542 RTX_FRAME_RELATED_P (insn) = 1;
5543 }
5544 }
5545
5546 /* Emit code to save registers using MOV insns. First register
5547 is restored from POINTER + OFFSET. */
5548 static void
5549 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5550 {
5551 unsigned int regno;
5552 rtx insn;
5553
5554 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5555 if (ix86_save_reg (regno, true))
5556 {
5557 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5558 Pmode, offset),
5559 gen_rtx_REG (Pmode, regno));
5560 RTX_FRAME_RELATED_P (insn) = 1;
5561 offset += UNITS_PER_WORD;
5562 }
5563 }
5564
5565 /* Expand prologue or epilogue stack adjustment.
5566 The pattern exist to put a dependency on all ebp-based memory accesses.
5567 STYLE should be negative if instructions should be marked as frame related,
5568 zero if %r11 register is live and cannot be freely used and positive
5569 otherwise. */
5570
5571 static void
5572 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5573 {
5574 rtx insn;
5575
5576 if (! TARGET_64BIT)
5577 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5578 else if (x86_64_immediate_operand (offset, DImode))
5579 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5580 else
5581 {
5582 rtx r11;
5583 /* r11 is used by indirect sibcall return as well, set before the
5584 epilogue and used after the epilogue. ATM indirect sibcall
5585 shouldn't be used together with huge frame sizes in one
5586 function because of the frame_size check in sibcall.c. */
5587 gcc_assert (style);
5588 r11 = gen_rtx_REG (DImode, R11_REG);
5589 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5590 if (style < 0)
5591 RTX_FRAME_RELATED_P (insn) = 1;
5592 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5593 offset));
5594 }
5595 if (style < 0)
5596 RTX_FRAME_RELATED_P (insn) = 1;
5597 }
5598
5599 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5600
5601 static rtx
5602 ix86_internal_arg_pointer (void)
5603 {
5604 bool has_force_align_arg_pointer =
5605 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5606 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5607 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5608 && DECL_NAME (current_function_decl)
5609 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5610 && DECL_FILE_SCOPE_P (current_function_decl))
5611 || ix86_force_align_arg_pointer
5612 || has_force_align_arg_pointer)
5613 {
5614 /* Nested functions can't realign the stack due to a register
5615 conflict. */
5616 if (DECL_CONTEXT (current_function_decl)
5617 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5618 {
5619 if (ix86_force_align_arg_pointer)
5620 warning (0, "-mstackrealign ignored for nested functions");
5621 if (has_force_align_arg_pointer)
5622 error ("%s not supported for nested functions",
5623 ix86_force_align_arg_pointer_string);
5624 return virtual_incoming_args_rtx;
5625 }
5626 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5627 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5628 }
5629 else
5630 return virtual_incoming_args_rtx;
5631 }
5632
5633 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5634 This is called from dwarf2out.c to emit call frame instructions
5635 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5636 static void
5637 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5638 {
5639 rtx unspec = SET_SRC (pattern);
5640 gcc_assert (GET_CODE (unspec) == UNSPEC);
5641
5642 switch (index)
5643 {
5644 case UNSPEC_REG_SAVE:
5645 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5646 SET_DEST (pattern));
5647 break;
5648 case UNSPEC_DEF_CFA:
5649 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5650 INTVAL (XVECEXP (unspec, 0, 0)));
5651 break;
5652 default:
5653 gcc_unreachable ();
5654 }
5655 }
5656
5657 /* Expand the prologue into a bunch of separate insns. */
5658
5659 void
5660 ix86_expand_prologue (void)
5661 {
5662 rtx insn;
5663 bool pic_reg_used;
5664 struct ix86_frame frame;
5665 HOST_WIDE_INT allocate;
5666
5667 ix86_compute_frame_layout (&frame);
5668
5669 if (cfun->machine->force_align_arg_pointer)
5670 {
5671 rtx x, y;
5672
5673 /* Grab the argument pointer. */
5674 x = plus_constant (stack_pointer_rtx, 4);
5675 y = cfun->machine->force_align_arg_pointer;
5676 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5677 RTX_FRAME_RELATED_P (insn) = 1;
5678
5679 /* The unwind info consists of two parts: install the fafp as the cfa,
5680 and record the fafp as the "save register" of the stack pointer.
5681 The later is there in order that the unwinder can see where it
5682 should restore the stack pointer across the and insn. */
5683 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5684 x = gen_rtx_SET (VOIDmode, y, x);
5685 RTX_FRAME_RELATED_P (x) = 1;
5686 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5687 UNSPEC_REG_SAVE);
5688 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5689 RTX_FRAME_RELATED_P (y) = 1;
5690 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5691 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5692 REG_NOTES (insn) = x;
5693
5694 /* Align the stack. */
5695 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5696 GEN_INT (-16)));
5697
5698 /* And here we cheat like madmen with the unwind info. We force the
5699 cfa register back to sp+4, which is exactly what it was at the
5700 start of the function. Re-pushing the return address results in
5701 the return at the same spot relative to the cfa, and thus is
5702 correct wrt the unwind info. */
5703 x = cfun->machine->force_align_arg_pointer;
5704 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5705 insn = emit_insn (gen_push (x));
5706 RTX_FRAME_RELATED_P (insn) = 1;
5707
5708 x = GEN_INT (4);
5709 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5710 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5711 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5712 REG_NOTES (insn) = x;
5713 }
5714
5715 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5716 slower on all targets. Also sdb doesn't like it. */
5717
5718 if (frame_pointer_needed)
5719 {
5720 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5721 RTX_FRAME_RELATED_P (insn) = 1;
5722
5723 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5724 RTX_FRAME_RELATED_P (insn) = 1;
5725 }
5726
5727 allocate = frame.to_allocate;
5728
5729 if (!frame.save_regs_using_mov)
5730 ix86_emit_save_regs ();
5731 else
5732 allocate += frame.nregs * UNITS_PER_WORD;
5733
5734 /* When using red zone we may start register saving before allocating
5735 the stack frame saving one cycle of the prologue. */
5736 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5737 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5738 : stack_pointer_rtx,
5739 -frame.nregs * UNITS_PER_WORD);
5740
5741 if (allocate == 0)
5742 ;
5743 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5744 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5745 GEN_INT (-allocate), -1);
5746 else
5747 {
5748 /* Only valid for Win32. */
5749 rtx eax = gen_rtx_REG (SImode, 0);
5750 bool eax_live = ix86_eax_live_at_start_p ();
5751 rtx t;
5752
5753 gcc_assert (!TARGET_64BIT);
5754
5755 if (eax_live)
5756 {
5757 emit_insn (gen_push (eax));
5758 allocate -= 4;
5759 }
5760
5761 emit_move_insn (eax, GEN_INT (allocate));
5762
5763 insn = emit_insn (gen_allocate_stack_worker (eax));
5764 RTX_FRAME_RELATED_P (insn) = 1;
5765 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5766 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5767 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5768 t, REG_NOTES (insn));
5769
5770 if (eax_live)
5771 {
5772 if (frame_pointer_needed)
5773 t = plus_constant (hard_frame_pointer_rtx,
5774 allocate
5775 - frame.to_allocate
5776 - frame.nregs * UNITS_PER_WORD);
5777 else
5778 t = plus_constant (stack_pointer_rtx, allocate);
5779 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5780 }
5781 }
5782
5783 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5784 {
5785 if (!frame_pointer_needed || !frame.to_allocate)
5786 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5787 else
5788 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5789 -frame.nregs * UNITS_PER_WORD);
5790 }
5791
5792 pic_reg_used = false;
5793 if (pic_offset_table_rtx
5794 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5795 || current_function_profile))
5796 {
5797 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5798
5799 if (alt_pic_reg_used != INVALID_REGNUM)
5800 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5801
5802 pic_reg_used = true;
5803 }
5804
5805 if (pic_reg_used)
5806 {
5807 if (TARGET_64BIT)
5808 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5809 else
5810 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5811
5812 /* Even with accurate pre-reload life analysis, we can wind up
5813 deleting all references to the pic register after reload.
5814 Consider if cross-jumping unifies two sides of a branch
5815 controlled by a comparison vs the only read from a global.
5816 In which case, allow the set_got to be deleted, though we're
5817 too late to do anything about the ebx save in the prologue. */
5818 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5819 }
5820
5821 /* Prevent function calls from be scheduled before the call to mcount.
5822 In the pic_reg_used case, make sure that the got load isn't deleted. */
5823 if (current_function_profile)
5824 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5825 }
5826
5827 /* Emit code to restore saved registers using MOV insns. First register
5828 is restored from POINTER + OFFSET. */
5829 static void
5830 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5831 int maybe_eh_return)
5832 {
5833 int regno;
5834 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5835
5836 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5837 if (ix86_save_reg (regno, maybe_eh_return))
5838 {
5839 /* Ensure that adjust_address won't be forced to produce pointer
5840 out of range allowed by x86-64 instruction set. */
5841 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5842 {
5843 rtx r11;
5844
5845 r11 = gen_rtx_REG (DImode, R11_REG);
5846 emit_move_insn (r11, GEN_INT (offset));
5847 emit_insn (gen_adddi3 (r11, r11, pointer));
5848 base_address = gen_rtx_MEM (Pmode, r11);
5849 offset = 0;
5850 }
5851 emit_move_insn (gen_rtx_REG (Pmode, regno),
5852 adjust_address (base_address, Pmode, offset));
5853 offset += UNITS_PER_WORD;
5854 }
5855 }
5856
5857 /* Restore function stack, frame, and registers. */
5858
5859 void
5860 ix86_expand_epilogue (int style)
5861 {
5862 int regno;
5863 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5864 struct ix86_frame frame;
5865 HOST_WIDE_INT offset;
5866
5867 ix86_compute_frame_layout (&frame);
5868
5869 /* Calculate start of saved registers relative to ebp. Special care
5870 must be taken for the normal return case of a function using
5871 eh_return: the eax and edx registers are marked as saved, but not
5872 restored along this path. */
5873 offset = frame.nregs;
5874 if (current_function_calls_eh_return && style != 2)
5875 offset -= 2;
5876 offset *= -UNITS_PER_WORD;
5877
5878 /* If we're only restoring one register and sp is not valid then
5879 using a move instruction to restore the register since it's
5880 less work than reloading sp and popping the register.
5881
5882 The default code result in stack adjustment using add/lea instruction,
5883 while this code results in LEAVE instruction (or discrete equivalent),
5884 so it is profitable in some other cases as well. Especially when there
5885 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5886 and there is exactly one register to pop. This heuristic may need some
5887 tuning in future. */
5888 if ((!sp_valid && frame.nregs <= 1)
5889 || (TARGET_EPILOGUE_USING_MOVE
5890 && cfun->machine->use_fast_prologue_epilogue
5891 && (frame.nregs > 1 || frame.to_allocate))
5892 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5893 || (frame_pointer_needed && TARGET_USE_LEAVE
5894 && cfun->machine->use_fast_prologue_epilogue
5895 && frame.nregs == 1)
5896 || current_function_calls_eh_return)
5897 {
5898 /* Restore registers. We can use ebp or esp to address the memory
5899 locations. If both are available, default to ebp, since offsets
5900 are known to be small. Only exception is esp pointing directly to the
5901 end of block of saved registers, where we may simplify addressing
5902 mode. */
5903
5904 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5905 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5906 frame.to_allocate, style == 2);
5907 else
5908 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5909 offset, style == 2);
5910
5911 /* eh_return epilogues need %ecx added to the stack pointer. */
5912 if (style == 2)
5913 {
5914 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5915
5916 if (frame_pointer_needed)
5917 {
5918 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5919 tmp = plus_constant (tmp, UNITS_PER_WORD);
5920 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5921
5922 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5923 emit_move_insn (hard_frame_pointer_rtx, tmp);
5924
5925 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5926 const0_rtx, style);
5927 }
5928 else
5929 {
5930 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5931 tmp = plus_constant (tmp, (frame.to_allocate
5932 + frame.nregs * UNITS_PER_WORD));
5933 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5934 }
5935 }
5936 else if (!frame_pointer_needed)
5937 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5938 GEN_INT (frame.to_allocate
5939 + frame.nregs * UNITS_PER_WORD),
5940 style);
5941 /* If not an i386, mov & pop is faster than "leave". */
5942 else if (TARGET_USE_LEAVE || optimize_size
5943 || !cfun->machine->use_fast_prologue_epilogue)
5944 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5945 else
5946 {
5947 pro_epilogue_adjust_stack (stack_pointer_rtx,
5948 hard_frame_pointer_rtx,
5949 const0_rtx, style);
5950 if (TARGET_64BIT)
5951 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5952 else
5953 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5954 }
5955 }
5956 else
5957 {
5958 /* First step is to deallocate the stack frame so that we can
5959 pop the registers. */
5960 if (!sp_valid)
5961 {
5962 gcc_assert (frame_pointer_needed);
5963 pro_epilogue_adjust_stack (stack_pointer_rtx,
5964 hard_frame_pointer_rtx,
5965 GEN_INT (offset), style);
5966 }
5967 else if (frame.to_allocate)
5968 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5969 GEN_INT (frame.to_allocate), style);
5970
5971 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5972 if (ix86_save_reg (regno, false))
5973 {
5974 if (TARGET_64BIT)
5975 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5976 else
5977 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5978 }
5979 if (frame_pointer_needed)
5980 {
5981 /* Leave results in shorter dependency chains on CPUs that are
5982 able to grok it fast. */
5983 if (TARGET_USE_LEAVE)
5984 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5985 else if (TARGET_64BIT)
5986 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5987 else
5988 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5989 }
5990 }
5991
5992 if (cfun->machine->force_align_arg_pointer)
5993 {
5994 emit_insn (gen_addsi3 (stack_pointer_rtx,
5995 cfun->machine->force_align_arg_pointer,
5996 GEN_INT (-4)));
5997 }
5998
5999 /* Sibcall epilogues don't want a return instruction. */
6000 if (style == 0)
6001 return;
6002
6003 if (current_function_pops_args && current_function_args_size)
6004 {
6005 rtx popc = GEN_INT (current_function_pops_args);
6006
6007 /* i386 can only pop 64K bytes. If asked to pop more, pop
6008 return address, do explicit add, and jump indirectly to the
6009 caller. */
6010
6011 if (current_function_pops_args >= 65536)
6012 {
6013 rtx ecx = gen_rtx_REG (SImode, 2);
6014
6015 /* There is no "pascal" calling convention in 64bit ABI. */
6016 gcc_assert (!TARGET_64BIT);
6017
6018 emit_insn (gen_popsi1 (ecx));
6019 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6020 emit_jump_insn (gen_return_indirect_internal (ecx));
6021 }
6022 else
6023 emit_jump_insn (gen_return_pop_internal (popc));
6024 }
6025 else
6026 emit_jump_insn (gen_return_internal ());
6027 }
6028
6029 /* Reset from the function's potential modifications. */
6030
6031 static void
6032 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6033 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6034 {
6035 if (pic_offset_table_rtx)
6036 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6037 #if TARGET_MACHO
6038 /* Mach-O doesn't support labels at the end of objects, so if
6039 it looks like we might want one, insert a NOP. */
6040 {
6041 rtx insn = get_last_insn ();
6042 while (insn
6043 && NOTE_P (insn)
6044 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6045 insn = PREV_INSN (insn);
6046 if (insn
6047 && (LABEL_P (insn)
6048 || (NOTE_P (insn)
6049 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6050 fputs ("\tnop\n", file);
6051 }
6052 #endif
6053
6054 }
6055 \f
6056 /* Extract the parts of an RTL expression that is a valid memory address
6057 for an instruction. Return 0 if the structure of the address is
6058 grossly off. Return -1 if the address contains ASHIFT, so it is not
6059 strictly valid, but still used for computing length of lea instruction. */
6060
6061 int
6062 ix86_decompose_address (rtx addr, struct ix86_address *out)
6063 {
6064 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6065 rtx base_reg, index_reg;
6066 HOST_WIDE_INT scale = 1;
6067 rtx scale_rtx = NULL_RTX;
6068 int retval = 1;
6069 enum ix86_address_seg seg = SEG_DEFAULT;
6070
6071 if (GET_CODE (addr) == REG || GET_CODE (addr) == SUBREG)
6072 base = addr;
6073 else if (GET_CODE (addr) == PLUS)
6074 {
6075 rtx addends[4], op;
6076 int n = 0, i;
6077
6078 op = addr;
6079 do
6080 {
6081 if (n >= 4)
6082 return 0;
6083 addends[n++] = XEXP (op, 1);
6084 op = XEXP (op, 0);
6085 }
6086 while (GET_CODE (op) == PLUS);
6087 if (n >= 4)
6088 return 0;
6089 addends[n] = op;
6090
6091 for (i = n; i >= 0; --i)
6092 {
6093 op = addends[i];
6094 switch (GET_CODE (op))
6095 {
6096 case MULT:
6097 if (index)
6098 return 0;
6099 index = XEXP (op, 0);
6100 scale_rtx = XEXP (op, 1);
6101 break;
6102
6103 case UNSPEC:
6104 if (XINT (op, 1) == UNSPEC_TP
6105 && TARGET_TLS_DIRECT_SEG_REFS
6106 && seg == SEG_DEFAULT)
6107 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6108 else
6109 return 0;
6110 break;
6111
6112 case REG:
6113 case SUBREG:
6114 if (!base)
6115 base = op;
6116 else if (!index)
6117 index = op;
6118 else
6119 return 0;
6120 break;
6121
6122 case CONST:
6123 case CONST_INT:
6124 case SYMBOL_REF:
6125 case LABEL_REF:
6126 if (disp)
6127 return 0;
6128 disp = op;
6129 break;
6130
6131 default:
6132 return 0;
6133 }
6134 }
6135 }
6136 else if (GET_CODE (addr) == MULT)
6137 {
6138 index = XEXP (addr, 0); /* index*scale */
6139 scale_rtx = XEXP (addr, 1);
6140 }
6141 else if (GET_CODE (addr) == ASHIFT)
6142 {
6143 rtx tmp;
6144
6145 /* We're called for lea too, which implements ashift on occasion. */
6146 index = XEXP (addr, 0);
6147 tmp = XEXP (addr, 1);
6148 if (GET_CODE (tmp) != CONST_INT)
6149 return 0;
6150 scale = INTVAL (tmp);
6151 if ((unsigned HOST_WIDE_INT) scale > 3)
6152 return 0;
6153 scale = 1 << scale;
6154 retval = -1;
6155 }
6156 else
6157 disp = addr; /* displacement */
6158
6159 /* Extract the integral value of scale. */
6160 if (scale_rtx)
6161 {
6162 if (GET_CODE (scale_rtx) != CONST_INT)
6163 return 0;
6164 scale = INTVAL (scale_rtx);
6165 }
6166
6167 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6168 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6169
6170 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6171 if (base_reg && index_reg && scale == 1
6172 && (index_reg == arg_pointer_rtx
6173 || index_reg == frame_pointer_rtx
6174 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6175 {
6176 rtx tmp;
6177 tmp = base, base = index, index = tmp;
6178 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6179 }
6180
6181 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6182 if ((base_reg == hard_frame_pointer_rtx
6183 || base_reg == frame_pointer_rtx
6184 || base_reg == arg_pointer_rtx) && !disp)
6185 disp = const0_rtx;
6186
6187 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6188 Avoid this by transforming to [%esi+0]. */
6189 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6190 && base_reg && !index_reg && !disp
6191 && REG_P (base_reg)
6192 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6193 disp = const0_rtx;
6194
6195 /* Special case: encode reg+reg instead of reg*2. */
6196 if (!base && index && scale && scale == 2)
6197 base = index, base_reg = index_reg, scale = 1;
6198
6199 /* Special case: scaling cannot be encoded without base or displacement. */
6200 if (!base && !disp && index && scale != 1)
6201 disp = const0_rtx;
6202
6203 out->base = base;
6204 out->index = index;
6205 out->disp = disp;
6206 out->scale = scale;
6207 out->seg = seg;
6208
6209 return retval;
6210 }
6211 \f
6212 /* Return cost of the memory address x.
6213 For i386, it is better to use a complex address than let gcc copy
6214 the address into a reg and make a new pseudo. But not if the address
6215 requires to two regs - that would mean more pseudos with longer
6216 lifetimes. */
6217 static int
6218 ix86_address_cost (rtx x)
6219 {
6220 struct ix86_address parts;
6221 int cost = 1;
6222 int ok = ix86_decompose_address (x, &parts);
6223
6224 gcc_assert (ok);
6225
6226 if (parts.base && GET_CODE (parts.base) == SUBREG)
6227 parts.base = SUBREG_REG (parts.base);
6228 if (parts.index && GET_CODE (parts.index) == SUBREG)
6229 parts.index = SUBREG_REG (parts.index);
6230
6231 /* More complex memory references are better. */
6232 if (parts.disp && parts.disp != const0_rtx)
6233 cost--;
6234 if (parts.seg != SEG_DEFAULT)
6235 cost--;
6236
6237 /* Attempt to minimize number of registers in the address. */
6238 if ((parts.base
6239 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6240 || (parts.index
6241 && (!REG_P (parts.index)
6242 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6243 cost++;
6244
6245 if (parts.base
6246 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6247 && parts.index
6248 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6249 && parts.base != parts.index)
6250 cost++;
6251
6252 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6253 since it's predecode logic can't detect the length of instructions
6254 and it degenerates to vector decoded. Increase cost of such
6255 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6256 to split such addresses or even refuse such addresses at all.
6257
6258 Following addressing modes are affected:
6259 [base+scale*index]
6260 [scale*index+disp]
6261 [base+index]
6262
6263 The first and last case may be avoidable by explicitly coding the zero in
6264 memory address, but I don't have AMD-K6 machine handy to check this
6265 theory. */
6266
6267 if (TARGET_K6
6268 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6269 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6270 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6271 cost += 10;
6272
6273 return cost;
6274 }
6275 \f
6276 /* If X is a machine specific address (i.e. a symbol or label being
6277 referenced as a displacement from the GOT implemented using an
6278 UNSPEC), then return the base term. Otherwise return X. */
6279
6280 rtx
6281 ix86_find_base_term (rtx x)
6282 {
6283 rtx term;
6284
6285 if (TARGET_64BIT)
6286 {
6287 if (GET_CODE (x) != CONST)
6288 return x;
6289 term = XEXP (x, 0);
6290 if (GET_CODE (term) == PLUS
6291 && (GET_CODE (XEXP (term, 1)) == CONST_INT
6292 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6293 term = XEXP (term, 0);
6294 if (GET_CODE (term) != UNSPEC
6295 || XINT (term, 1) != UNSPEC_GOTPCREL)
6296 return x;
6297
6298 term = XVECEXP (term, 0, 0);
6299
6300 if (GET_CODE (term) != SYMBOL_REF
6301 && GET_CODE (term) != LABEL_REF)
6302 return x;
6303
6304 return term;
6305 }
6306
6307 term = ix86_delegitimize_address (x);
6308
6309 if (GET_CODE (term) != SYMBOL_REF
6310 && GET_CODE (term) != LABEL_REF)
6311 return x;
6312
6313 return term;
6314 }
6315
6316 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6317 this is used for to form addresses to local data when -fPIC is in
6318 use. */
6319
6320 static bool
6321 darwin_local_data_pic (rtx disp)
6322 {
6323 if (GET_CODE (disp) == MINUS)
6324 {
6325 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6326 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6327 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6328 {
6329 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6330 if (! strcmp (sym_name, "<pic base>"))
6331 return true;
6332 }
6333 }
6334
6335 return false;
6336 }
6337 \f
6338 /* Determine if a given RTX is a valid constant. We already know this
6339 satisfies CONSTANT_P. */
6340
6341 bool
6342 legitimate_constant_p (rtx x)
6343 {
6344 switch (GET_CODE (x))
6345 {
6346 case CONST:
6347 x = XEXP (x, 0);
6348
6349 if (GET_CODE (x) == PLUS)
6350 {
6351 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6352 return false;
6353 x = XEXP (x, 0);
6354 }
6355
6356 if (TARGET_MACHO && darwin_local_data_pic (x))
6357 return true;
6358
6359 /* Only some unspecs are valid as "constants". */
6360 if (GET_CODE (x) == UNSPEC)
6361 switch (XINT (x, 1))
6362 {
6363 case UNSPEC_GOTOFF:
6364 return TARGET_64BIT;
6365 case UNSPEC_TPOFF:
6366 case UNSPEC_NTPOFF:
6367 x = XVECEXP (x, 0, 0);
6368 return (GET_CODE (x) == SYMBOL_REF
6369 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6370 case UNSPEC_DTPOFF:
6371 x = XVECEXP (x, 0, 0);
6372 return (GET_CODE (x) == SYMBOL_REF
6373 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6374 default:
6375 return false;
6376 }
6377
6378 /* We must have drilled down to a symbol. */
6379 if (GET_CODE (x) == LABEL_REF)
6380 return true;
6381 if (GET_CODE (x) != SYMBOL_REF)
6382 return false;
6383 /* FALLTHRU */
6384
6385 case SYMBOL_REF:
6386 /* TLS symbols are never valid. */
6387 if (SYMBOL_REF_TLS_MODEL (x))
6388 return false;
6389 break;
6390
6391 case CONST_DOUBLE:
6392 if (GET_MODE (x) == TImode
6393 && x != CONST0_RTX (TImode)
6394 && !TARGET_64BIT)
6395 return false;
6396 break;
6397
6398 case CONST_VECTOR:
6399 if (x == CONST0_RTX (GET_MODE (x)))
6400 return true;
6401 return false;
6402
6403 default:
6404 break;
6405 }
6406
6407 /* Otherwise we handle everything else in the move patterns. */
6408 return true;
6409 }
6410
6411 /* Determine if it's legal to put X into the constant pool. This
6412 is not possible for the address of thread-local symbols, which
6413 is checked above. */
6414
6415 static bool
6416 ix86_cannot_force_const_mem (rtx x)
6417 {
6418 /* We can always put integral constants and vectors in memory. */
6419 switch (GET_CODE (x))
6420 {
6421 case CONST_INT:
6422 case CONST_DOUBLE:
6423 case CONST_VECTOR:
6424 return false;
6425
6426 default:
6427 break;
6428 }
6429 return !legitimate_constant_p (x);
6430 }
6431
6432 /* Determine if a given RTX is a valid constant address. */
6433
6434 bool
6435 constant_address_p (rtx x)
6436 {
6437 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6438 }
6439
6440 /* Nonzero if the constant value X is a legitimate general operand
6441 when generating PIC code. It is given that flag_pic is on and
6442 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6443
6444 bool
6445 legitimate_pic_operand_p (rtx x)
6446 {
6447 rtx inner;
6448
6449 switch (GET_CODE (x))
6450 {
6451 case CONST:
6452 inner = XEXP (x, 0);
6453 if (GET_CODE (inner) == PLUS
6454 && GET_CODE (XEXP (inner, 1)) == CONST_INT)
6455 inner = XEXP (inner, 0);
6456
6457 /* Only some unspecs are valid as "constants". */
6458 if (GET_CODE (inner) == UNSPEC)
6459 switch (XINT (inner, 1))
6460 {
6461 case UNSPEC_GOTOFF:
6462 return TARGET_64BIT;
6463 case UNSPEC_TPOFF:
6464 x = XVECEXP (inner, 0, 0);
6465 return (GET_CODE (x) == SYMBOL_REF
6466 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6467 default:
6468 return false;
6469 }
6470 /* FALLTHRU */
6471
6472 case SYMBOL_REF:
6473 case LABEL_REF:
6474 return legitimate_pic_address_disp_p (x);
6475
6476 default:
6477 return true;
6478 }
6479 }
6480
6481 /* Determine if a given CONST RTX is a valid memory displacement
6482 in PIC mode. */
6483
6484 int
6485 legitimate_pic_address_disp_p (rtx disp)
6486 {
6487 bool saw_plus;
6488
6489 /* In 64bit mode we can allow direct addresses of symbols and labels
6490 when they are not dynamic symbols. */
6491 if (TARGET_64BIT)
6492 {
6493 rtx op0 = disp, op1;
6494
6495 switch (GET_CODE (disp))
6496 {
6497 case LABEL_REF:
6498 return true;
6499
6500 case CONST:
6501 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6502 break;
6503 op0 = XEXP (XEXP (disp, 0), 0);
6504 op1 = XEXP (XEXP (disp, 0), 1);
6505 if (GET_CODE (op1) != CONST_INT
6506 || INTVAL (op1) >= 16*1024*1024
6507 || INTVAL (op1) < -16*1024*1024)
6508 break;
6509 if (GET_CODE (op0) == LABEL_REF)
6510 return true;
6511 if (GET_CODE (op0) != SYMBOL_REF)
6512 break;
6513 /* FALLTHRU */
6514
6515 case SYMBOL_REF:
6516 /* TLS references should always be enclosed in UNSPEC. */
6517 if (SYMBOL_REF_TLS_MODEL (op0))
6518 return false;
6519 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6520 return true;
6521 break;
6522
6523 default:
6524 break;
6525 }
6526 }
6527 if (GET_CODE (disp) != CONST)
6528 return 0;
6529 disp = XEXP (disp, 0);
6530
6531 if (TARGET_64BIT)
6532 {
6533 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6534 of GOT tables. We should not need these anyway. */
6535 if (GET_CODE (disp) != UNSPEC
6536 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6537 && XINT (disp, 1) != UNSPEC_GOTOFF))
6538 return 0;
6539
6540 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6541 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6542 return 0;
6543 return 1;
6544 }
6545
6546 saw_plus = false;
6547 if (GET_CODE (disp) == PLUS)
6548 {
6549 if (GET_CODE (XEXP (disp, 1)) != CONST_INT)
6550 return 0;
6551 disp = XEXP (disp, 0);
6552 saw_plus = true;
6553 }
6554
6555 if (TARGET_MACHO && darwin_local_data_pic (disp))
6556 return 1;
6557
6558 if (GET_CODE (disp) != UNSPEC)
6559 return 0;
6560
6561 switch (XINT (disp, 1))
6562 {
6563 case UNSPEC_GOT:
6564 if (saw_plus)
6565 return false;
6566 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6567 case UNSPEC_GOTOFF:
6568 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6569 While ABI specify also 32bit relocation but we don't produce it in
6570 small PIC model at all. */
6571 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6572 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6573 && !TARGET_64BIT)
6574 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6575 return false;
6576 case UNSPEC_GOTTPOFF:
6577 case UNSPEC_GOTNTPOFF:
6578 case UNSPEC_INDNTPOFF:
6579 if (saw_plus)
6580 return false;
6581 disp = XVECEXP (disp, 0, 0);
6582 return (GET_CODE (disp) == SYMBOL_REF
6583 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6584 case UNSPEC_NTPOFF:
6585 disp = XVECEXP (disp, 0, 0);
6586 return (GET_CODE (disp) == SYMBOL_REF
6587 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6588 case UNSPEC_DTPOFF:
6589 disp = XVECEXP (disp, 0, 0);
6590 return (GET_CODE (disp) == SYMBOL_REF
6591 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6592 }
6593
6594 return 0;
6595 }
6596
6597 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6598 memory address for an instruction. The MODE argument is the machine mode
6599 for the MEM expression that wants to use this address.
6600
6601 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6602 convert common non-canonical forms to canonical form so that they will
6603 be recognized. */
6604
6605 int
6606 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6607 {
6608 struct ix86_address parts;
6609 rtx base, index, disp;
6610 HOST_WIDE_INT scale;
6611 const char *reason = NULL;
6612 rtx reason_rtx = NULL_RTX;
6613
6614 if (TARGET_DEBUG_ADDR)
6615 {
6616 fprintf (stderr,
6617 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6618 GET_MODE_NAME (mode), strict);
6619 debug_rtx (addr);
6620 }
6621
6622 if (ix86_decompose_address (addr, &parts) <= 0)
6623 {
6624 reason = "decomposition failed";
6625 goto report_error;
6626 }
6627
6628 base = parts.base;
6629 index = parts.index;
6630 disp = parts.disp;
6631 scale = parts.scale;
6632
6633 /* Validate base register.
6634
6635 Don't allow SUBREG's that span more than a word here. It can lead to spill
6636 failures when the base is one word out of a two word structure, which is
6637 represented internally as a DImode int. */
6638
6639 if (base)
6640 {
6641 rtx reg;
6642 reason_rtx = base;
6643
6644 if (REG_P (base))
6645 reg = base;
6646 else if (GET_CODE (base) == SUBREG
6647 && REG_P (SUBREG_REG (base))
6648 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6649 <= UNITS_PER_WORD)
6650 reg = SUBREG_REG (base);
6651 else
6652 {
6653 reason = "base is not a register";
6654 goto report_error;
6655 }
6656
6657 if (GET_MODE (base) != Pmode)
6658 {
6659 reason = "base is not in Pmode";
6660 goto report_error;
6661 }
6662
6663 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6664 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6665 {
6666 reason = "base is not valid";
6667 goto report_error;
6668 }
6669 }
6670
6671 /* Validate index register.
6672
6673 Don't allow SUBREG's that span more than a word here -- same as above. */
6674
6675 if (index)
6676 {
6677 rtx reg;
6678 reason_rtx = index;
6679
6680 if (REG_P (index))
6681 reg = index;
6682 else if (GET_CODE (index) == SUBREG
6683 && REG_P (SUBREG_REG (index))
6684 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6685 <= UNITS_PER_WORD)
6686 reg = SUBREG_REG (index);
6687 else
6688 {
6689 reason = "index is not a register";
6690 goto report_error;
6691 }
6692
6693 if (GET_MODE (index) != Pmode)
6694 {
6695 reason = "index is not in Pmode";
6696 goto report_error;
6697 }
6698
6699 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6700 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6701 {
6702 reason = "index is not valid";
6703 goto report_error;
6704 }
6705 }
6706
6707 /* Validate scale factor. */
6708 if (scale != 1)
6709 {
6710 reason_rtx = GEN_INT (scale);
6711 if (!index)
6712 {
6713 reason = "scale without index";
6714 goto report_error;
6715 }
6716
6717 if (scale != 2 && scale != 4 && scale != 8)
6718 {
6719 reason = "scale is not a valid multiplier";
6720 goto report_error;
6721 }
6722 }
6723
6724 /* Validate displacement. */
6725 if (disp)
6726 {
6727 reason_rtx = disp;
6728
6729 if (GET_CODE (disp) == CONST
6730 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6731 switch (XINT (XEXP (disp, 0), 1))
6732 {
6733 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6734 used. While ABI specify also 32bit relocations, we don't produce
6735 them at all and use IP relative instead. */
6736 case UNSPEC_GOT:
6737 case UNSPEC_GOTOFF:
6738 gcc_assert (flag_pic);
6739 if (!TARGET_64BIT)
6740 goto is_legitimate_pic;
6741 reason = "64bit address unspec";
6742 goto report_error;
6743
6744 case UNSPEC_GOTPCREL:
6745 gcc_assert (flag_pic);
6746 goto is_legitimate_pic;
6747
6748 case UNSPEC_GOTTPOFF:
6749 case UNSPEC_GOTNTPOFF:
6750 case UNSPEC_INDNTPOFF:
6751 case UNSPEC_NTPOFF:
6752 case UNSPEC_DTPOFF:
6753 break;
6754
6755 default:
6756 reason = "invalid address unspec";
6757 goto report_error;
6758 }
6759
6760 else if (SYMBOLIC_CONST (disp)
6761 && (flag_pic
6762 || (TARGET_MACHO
6763 #if TARGET_MACHO
6764 && MACHOPIC_INDIRECT
6765 && !machopic_operand_p (disp)
6766 #endif
6767 )))
6768 {
6769
6770 is_legitimate_pic:
6771 if (TARGET_64BIT && (index || base))
6772 {
6773 /* foo@dtpoff(%rX) is ok. */
6774 if (GET_CODE (disp) != CONST
6775 || GET_CODE (XEXP (disp, 0)) != PLUS
6776 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6777 || GET_CODE (XEXP (XEXP (disp, 0), 1)) != CONST_INT
6778 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6779 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6780 {
6781 reason = "non-constant pic memory reference";
6782 goto report_error;
6783 }
6784 }
6785 else if (! legitimate_pic_address_disp_p (disp))
6786 {
6787 reason = "displacement is an invalid pic construct";
6788 goto report_error;
6789 }
6790
6791 /* This code used to verify that a symbolic pic displacement
6792 includes the pic_offset_table_rtx register.
6793
6794 While this is good idea, unfortunately these constructs may
6795 be created by "adds using lea" optimization for incorrect
6796 code like:
6797
6798 int a;
6799 int foo(int i)
6800 {
6801 return *(&a+i);
6802 }
6803
6804 This code is nonsensical, but results in addressing
6805 GOT table with pic_offset_table_rtx base. We can't
6806 just refuse it easily, since it gets matched by
6807 "addsi3" pattern, that later gets split to lea in the
6808 case output register differs from input. While this
6809 can be handled by separate addsi pattern for this case
6810 that never results in lea, this seems to be easier and
6811 correct fix for crash to disable this test. */
6812 }
6813 else if (GET_CODE (disp) != LABEL_REF
6814 && GET_CODE (disp) != CONST_INT
6815 && (GET_CODE (disp) != CONST
6816 || !legitimate_constant_p (disp))
6817 && (GET_CODE (disp) != SYMBOL_REF
6818 || !legitimate_constant_p (disp)))
6819 {
6820 reason = "displacement is not constant";
6821 goto report_error;
6822 }
6823 else if (TARGET_64BIT
6824 && !x86_64_immediate_operand (disp, VOIDmode))
6825 {
6826 reason = "displacement is out of range";
6827 goto report_error;
6828 }
6829 }
6830
6831 /* Everything looks valid. */
6832 if (TARGET_DEBUG_ADDR)
6833 fprintf (stderr, "Success.\n");
6834 return TRUE;
6835
6836 report_error:
6837 if (TARGET_DEBUG_ADDR)
6838 {
6839 fprintf (stderr, "Error: %s\n", reason);
6840 debug_rtx (reason_rtx);
6841 }
6842 return FALSE;
6843 }
6844 \f
6845 /* Return a unique alias set for the GOT. */
6846
6847 static HOST_WIDE_INT
6848 ix86_GOT_alias_set (void)
6849 {
6850 static HOST_WIDE_INT set = -1;
6851 if (set == -1)
6852 set = new_alias_set ();
6853 return set;
6854 }
6855
6856 /* Return a legitimate reference for ORIG (an address) using the
6857 register REG. If REG is 0, a new pseudo is generated.
6858
6859 There are two types of references that must be handled:
6860
6861 1. Global data references must load the address from the GOT, via
6862 the PIC reg. An insn is emitted to do this load, and the reg is
6863 returned.
6864
6865 2. Static data references, constant pool addresses, and code labels
6866 compute the address as an offset from the GOT, whose base is in
6867 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6868 differentiate them from global data objects. The returned
6869 address is the PIC reg + an unspec constant.
6870
6871 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6872 reg also appears in the address. */
6873
6874 static rtx
6875 legitimize_pic_address (rtx orig, rtx reg)
6876 {
6877 rtx addr = orig;
6878 rtx new = orig;
6879 rtx base;
6880
6881 #if TARGET_MACHO
6882 if (TARGET_MACHO && !TARGET_64BIT)
6883 {
6884 if (reg == 0)
6885 reg = gen_reg_rtx (Pmode);
6886 /* Use the generic Mach-O PIC machinery. */
6887 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6888 }
6889 #endif
6890
6891 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6892 new = addr;
6893 else if (TARGET_64BIT
6894 && ix86_cmodel != CM_SMALL_PIC
6895 && local_symbolic_operand (addr, Pmode))
6896 {
6897 rtx tmpreg;
6898 /* This symbol may be referenced via a displacement from the PIC
6899 base address (@GOTOFF). */
6900
6901 if (reload_in_progress)
6902 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6903 if (GET_CODE (addr) == CONST)
6904 addr = XEXP (addr, 0);
6905 if (GET_CODE (addr) == PLUS)
6906 {
6907 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6908 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6909 }
6910 else
6911 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6912 new = gen_rtx_CONST (Pmode, new);
6913 if (!reg)
6914 tmpreg = gen_reg_rtx (Pmode);
6915 else
6916 tmpreg = reg;
6917 emit_move_insn (tmpreg, new);
6918
6919 if (reg != 0)
6920 {
6921 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6922 tmpreg, 1, OPTAB_DIRECT);
6923 new = reg;
6924 }
6925 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6926 }
6927 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
6928 {
6929 /* This symbol may be referenced via a displacement from the PIC
6930 base address (@GOTOFF). */
6931
6932 if (reload_in_progress)
6933 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6934 if (GET_CODE (addr) == CONST)
6935 addr = XEXP (addr, 0);
6936 if (GET_CODE (addr) == PLUS)
6937 {
6938 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6939 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6940 }
6941 else
6942 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6943 new = gen_rtx_CONST (Pmode, new);
6944 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6945
6946 if (reg != 0)
6947 {
6948 emit_move_insn (reg, new);
6949 new = reg;
6950 }
6951 }
6952 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6953 {
6954 if (TARGET_64BIT)
6955 {
6956 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6957 new = gen_rtx_CONST (Pmode, new);
6958 new = gen_const_mem (Pmode, new);
6959 set_mem_alias_set (new, ix86_GOT_alias_set ());
6960
6961 if (reg == 0)
6962 reg = gen_reg_rtx (Pmode);
6963 /* Use directly gen_movsi, otherwise the address is loaded
6964 into register for CSE. We don't want to CSE this addresses,
6965 instead we CSE addresses from the GOT table, so skip this. */
6966 emit_insn (gen_movsi (reg, new));
6967 new = reg;
6968 }
6969 else
6970 {
6971 /* This symbol must be referenced via a load from the
6972 Global Offset Table (@GOT). */
6973
6974 if (reload_in_progress)
6975 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6976 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6977 new = gen_rtx_CONST (Pmode, new);
6978 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6979 new = gen_const_mem (Pmode, new);
6980 set_mem_alias_set (new, ix86_GOT_alias_set ());
6981
6982 if (reg == 0)
6983 reg = gen_reg_rtx (Pmode);
6984 emit_move_insn (reg, new);
6985 new = reg;
6986 }
6987 }
6988 else
6989 {
6990 if (GET_CODE (addr) == CONST_INT
6991 && !x86_64_immediate_operand (addr, VOIDmode))
6992 {
6993 if (reg)
6994 {
6995 emit_move_insn (reg, addr);
6996 new = reg;
6997 }
6998 else
6999 new = force_reg (Pmode, addr);
7000 }
7001 else if (GET_CODE (addr) == CONST)
7002 {
7003 addr = XEXP (addr, 0);
7004
7005 /* We must match stuff we generate before. Assume the only
7006 unspecs that can get here are ours. Not that we could do
7007 anything with them anyway.... */
7008 if (GET_CODE (addr) == UNSPEC
7009 || (GET_CODE (addr) == PLUS
7010 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7011 return orig;
7012 gcc_assert (GET_CODE (addr) == PLUS);
7013 }
7014 if (GET_CODE (addr) == PLUS)
7015 {
7016 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7017
7018 /* Check first to see if this is a constant offset from a @GOTOFF
7019 symbol reference. */
7020 if (local_symbolic_operand (op0, Pmode)
7021 && GET_CODE (op1) == CONST_INT)
7022 {
7023 if (!TARGET_64BIT)
7024 {
7025 if (reload_in_progress)
7026 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7027 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7028 UNSPEC_GOTOFF);
7029 new = gen_rtx_PLUS (Pmode, new, op1);
7030 new = gen_rtx_CONST (Pmode, new);
7031 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7032
7033 if (reg != 0)
7034 {
7035 emit_move_insn (reg, new);
7036 new = reg;
7037 }
7038 }
7039 else
7040 {
7041 if (INTVAL (op1) < -16*1024*1024
7042 || INTVAL (op1) >= 16*1024*1024)
7043 {
7044 if (!x86_64_immediate_operand (op1, Pmode))
7045 op1 = force_reg (Pmode, op1);
7046 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7047 }
7048 }
7049 }
7050 else
7051 {
7052 base = legitimize_pic_address (XEXP (addr, 0), reg);
7053 new = legitimize_pic_address (XEXP (addr, 1),
7054 base == reg ? NULL_RTX : reg);
7055
7056 if (GET_CODE (new) == CONST_INT)
7057 new = plus_constant (base, INTVAL (new));
7058 else
7059 {
7060 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7061 {
7062 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7063 new = XEXP (new, 1);
7064 }
7065 new = gen_rtx_PLUS (Pmode, base, new);
7066 }
7067 }
7068 }
7069 }
7070 return new;
7071 }
7072 \f
7073 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7074
7075 static rtx
7076 get_thread_pointer (int to_reg)
7077 {
7078 rtx tp, reg, insn;
7079
7080 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7081 if (!to_reg)
7082 return tp;
7083
7084 reg = gen_reg_rtx (Pmode);
7085 insn = gen_rtx_SET (VOIDmode, reg, tp);
7086 insn = emit_insn (insn);
7087
7088 return reg;
7089 }
7090
7091 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7092 false if we expect this to be used for a memory address and true if
7093 we expect to load the address into a register. */
7094
7095 static rtx
7096 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7097 {
7098 rtx dest, base, off, pic, tp;
7099 int type;
7100
7101 switch (model)
7102 {
7103 case TLS_MODEL_GLOBAL_DYNAMIC:
7104 dest = gen_reg_rtx (Pmode);
7105 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7106
7107 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7108 {
7109 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7110
7111 start_sequence ();
7112 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7113 insns = get_insns ();
7114 end_sequence ();
7115
7116 emit_libcall_block (insns, dest, rax, x);
7117 }
7118 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7119 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7120 else
7121 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7122
7123 if (TARGET_GNU2_TLS)
7124 {
7125 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7126
7127 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7128 }
7129 break;
7130
7131 case TLS_MODEL_LOCAL_DYNAMIC:
7132 base = gen_reg_rtx (Pmode);
7133 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7134
7135 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7136 {
7137 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7138
7139 start_sequence ();
7140 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7141 insns = get_insns ();
7142 end_sequence ();
7143
7144 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7145 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7146 emit_libcall_block (insns, base, rax, note);
7147 }
7148 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7149 emit_insn (gen_tls_local_dynamic_base_64 (base));
7150 else
7151 emit_insn (gen_tls_local_dynamic_base_32 (base));
7152
7153 if (TARGET_GNU2_TLS)
7154 {
7155 rtx x = ix86_tls_module_base ();
7156
7157 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7158 gen_rtx_MINUS (Pmode, x, tp));
7159 }
7160
7161 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7162 off = gen_rtx_CONST (Pmode, off);
7163
7164 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7165
7166 if (TARGET_GNU2_TLS)
7167 {
7168 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7169
7170 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7171 }
7172
7173 break;
7174
7175 case TLS_MODEL_INITIAL_EXEC:
7176 if (TARGET_64BIT)
7177 {
7178 pic = NULL;
7179 type = UNSPEC_GOTNTPOFF;
7180 }
7181 else if (flag_pic)
7182 {
7183 if (reload_in_progress)
7184 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7185 pic = pic_offset_table_rtx;
7186 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7187 }
7188 else if (!TARGET_ANY_GNU_TLS)
7189 {
7190 pic = gen_reg_rtx (Pmode);
7191 emit_insn (gen_set_got (pic));
7192 type = UNSPEC_GOTTPOFF;
7193 }
7194 else
7195 {
7196 pic = NULL;
7197 type = UNSPEC_INDNTPOFF;
7198 }
7199
7200 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7201 off = gen_rtx_CONST (Pmode, off);
7202 if (pic)
7203 off = gen_rtx_PLUS (Pmode, pic, off);
7204 off = gen_const_mem (Pmode, off);
7205 set_mem_alias_set (off, ix86_GOT_alias_set ());
7206
7207 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7208 {
7209 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7210 off = force_reg (Pmode, off);
7211 return gen_rtx_PLUS (Pmode, base, off);
7212 }
7213 else
7214 {
7215 base = get_thread_pointer (true);
7216 dest = gen_reg_rtx (Pmode);
7217 emit_insn (gen_subsi3 (dest, base, off));
7218 }
7219 break;
7220
7221 case TLS_MODEL_LOCAL_EXEC:
7222 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7223 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7224 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7225 off = gen_rtx_CONST (Pmode, off);
7226
7227 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7228 {
7229 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7230 return gen_rtx_PLUS (Pmode, base, off);
7231 }
7232 else
7233 {
7234 base = get_thread_pointer (true);
7235 dest = gen_reg_rtx (Pmode);
7236 emit_insn (gen_subsi3 (dest, base, off));
7237 }
7238 break;
7239
7240 default:
7241 gcc_unreachable ();
7242 }
7243
7244 return dest;
7245 }
7246
7247 /* Try machine-dependent ways of modifying an illegitimate address
7248 to be legitimate. If we find one, return the new, valid address.
7249 This macro is used in only one place: `memory_address' in explow.c.
7250
7251 OLDX is the address as it was before break_out_memory_refs was called.
7252 In some cases it is useful to look at this to decide what needs to be done.
7253
7254 MODE and WIN are passed so that this macro can use
7255 GO_IF_LEGITIMATE_ADDRESS.
7256
7257 It is always safe for this macro to do nothing. It exists to recognize
7258 opportunities to optimize the output.
7259
7260 For the 80386, we handle X+REG by loading X into a register R and
7261 using R+REG. R will go in a general reg and indexing will be used.
7262 However, if REG is a broken-out memory address or multiplication,
7263 nothing needs to be done because REG can certainly go in a general reg.
7264
7265 When -fpic is used, special handling is needed for symbolic references.
7266 See comments by legitimize_pic_address in i386.c for details. */
7267
7268 rtx
7269 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7270 {
7271 int changed = 0;
7272 unsigned log;
7273
7274 if (TARGET_DEBUG_ADDR)
7275 {
7276 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7277 GET_MODE_NAME (mode));
7278 debug_rtx (x);
7279 }
7280
7281 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7282 if (log)
7283 return legitimize_tls_address (x, log, false);
7284 if (GET_CODE (x) == CONST
7285 && GET_CODE (XEXP (x, 0)) == PLUS
7286 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7287 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7288 {
7289 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7290 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7291 }
7292
7293 if (flag_pic && SYMBOLIC_CONST (x))
7294 return legitimize_pic_address (x, 0);
7295
7296 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7297 if (GET_CODE (x) == ASHIFT
7298 && GET_CODE (XEXP (x, 1)) == CONST_INT
7299 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7300 {
7301 changed = 1;
7302 log = INTVAL (XEXP (x, 1));
7303 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7304 GEN_INT (1 << log));
7305 }
7306
7307 if (GET_CODE (x) == PLUS)
7308 {
7309 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7310
7311 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7312 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7313 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7314 {
7315 changed = 1;
7316 log = INTVAL (XEXP (XEXP (x, 0), 1));
7317 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7318 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7319 GEN_INT (1 << log));
7320 }
7321
7322 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7323 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
7324 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7325 {
7326 changed = 1;
7327 log = INTVAL (XEXP (XEXP (x, 1), 1));
7328 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7329 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7330 GEN_INT (1 << log));
7331 }
7332
7333 /* Put multiply first if it isn't already. */
7334 if (GET_CODE (XEXP (x, 1)) == MULT)
7335 {
7336 rtx tmp = XEXP (x, 0);
7337 XEXP (x, 0) = XEXP (x, 1);
7338 XEXP (x, 1) = tmp;
7339 changed = 1;
7340 }
7341
7342 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7343 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7344 created by virtual register instantiation, register elimination, and
7345 similar optimizations. */
7346 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7347 {
7348 changed = 1;
7349 x = gen_rtx_PLUS (Pmode,
7350 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7351 XEXP (XEXP (x, 1), 0)),
7352 XEXP (XEXP (x, 1), 1));
7353 }
7354
7355 /* Canonicalize
7356 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7357 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7358 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7359 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7360 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7361 && CONSTANT_P (XEXP (x, 1)))
7362 {
7363 rtx constant;
7364 rtx other = NULL_RTX;
7365
7366 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
7367 {
7368 constant = XEXP (x, 1);
7369 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7370 }
7371 else if (GET_CODE (XEXP (XEXP (XEXP (x, 0), 1), 1)) == CONST_INT)
7372 {
7373 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7374 other = XEXP (x, 1);
7375 }
7376 else
7377 constant = 0;
7378
7379 if (constant)
7380 {
7381 changed = 1;
7382 x = gen_rtx_PLUS (Pmode,
7383 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7384 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7385 plus_constant (other, INTVAL (constant)));
7386 }
7387 }
7388
7389 if (changed && legitimate_address_p (mode, x, FALSE))
7390 return x;
7391
7392 if (GET_CODE (XEXP (x, 0)) == MULT)
7393 {
7394 changed = 1;
7395 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7396 }
7397
7398 if (GET_CODE (XEXP (x, 1)) == MULT)
7399 {
7400 changed = 1;
7401 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7402 }
7403
7404 if (changed
7405 && GET_CODE (XEXP (x, 1)) == REG
7406 && GET_CODE (XEXP (x, 0)) == REG)
7407 return x;
7408
7409 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7410 {
7411 changed = 1;
7412 x = legitimize_pic_address (x, 0);
7413 }
7414
7415 if (changed && legitimate_address_p (mode, x, FALSE))
7416 return x;
7417
7418 if (GET_CODE (XEXP (x, 0)) == REG)
7419 {
7420 rtx temp = gen_reg_rtx (Pmode);
7421 rtx val = force_operand (XEXP (x, 1), temp);
7422 if (val != temp)
7423 emit_move_insn (temp, val);
7424
7425 XEXP (x, 1) = temp;
7426 return x;
7427 }
7428
7429 else if (GET_CODE (XEXP (x, 1)) == REG)
7430 {
7431 rtx temp = gen_reg_rtx (Pmode);
7432 rtx val = force_operand (XEXP (x, 0), temp);
7433 if (val != temp)
7434 emit_move_insn (temp, val);
7435
7436 XEXP (x, 0) = temp;
7437 return x;
7438 }
7439 }
7440
7441 return x;
7442 }
7443 \f
7444 /* Print an integer constant expression in assembler syntax. Addition
7445 and subtraction are the only arithmetic that may appear in these
7446 expressions. FILE is the stdio stream to write to, X is the rtx, and
7447 CODE is the operand print code from the output string. */
7448
7449 static void
7450 output_pic_addr_const (FILE *file, rtx x, int code)
7451 {
7452 char buf[256];
7453
7454 switch (GET_CODE (x))
7455 {
7456 case PC:
7457 gcc_assert (flag_pic);
7458 putc ('.', file);
7459 break;
7460
7461 case SYMBOL_REF:
7462 output_addr_const (file, x);
7463 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7464 fputs ("@PLT", file);
7465 break;
7466
7467 case LABEL_REF:
7468 x = XEXP (x, 0);
7469 /* FALLTHRU */
7470 case CODE_LABEL:
7471 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7472 assemble_name (asm_out_file, buf);
7473 break;
7474
7475 case CONST_INT:
7476 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7477 break;
7478
7479 case CONST:
7480 /* This used to output parentheses around the expression,
7481 but that does not work on the 386 (either ATT or BSD assembler). */
7482 output_pic_addr_const (file, XEXP (x, 0), code);
7483 break;
7484
7485 case CONST_DOUBLE:
7486 if (GET_MODE (x) == VOIDmode)
7487 {
7488 /* We can use %d if the number is <32 bits and positive. */
7489 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7490 fprintf (file, "0x%lx%08lx",
7491 (unsigned long) CONST_DOUBLE_HIGH (x),
7492 (unsigned long) CONST_DOUBLE_LOW (x));
7493 else
7494 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7495 }
7496 else
7497 /* We can't handle floating point constants;
7498 PRINT_OPERAND must handle them. */
7499 output_operand_lossage ("floating constant misused");
7500 break;
7501
7502 case PLUS:
7503 /* Some assemblers need integer constants to appear first. */
7504 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
7505 {
7506 output_pic_addr_const (file, XEXP (x, 0), code);
7507 putc ('+', file);
7508 output_pic_addr_const (file, XEXP (x, 1), code);
7509 }
7510 else
7511 {
7512 gcc_assert (GET_CODE (XEXP (x, 1)) == CONST_INT);
7513 output_pic_addr_const (file, XEXP (x, 1), code);
7514 putc ('+', file);
7515 output_pic_addr_const (file, XEXP (x, 0), code);
7516 }
7517 break;
7518
7519 case MINUS:
7520 if (!TARGET_MACHO)
7521 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7522 output_pic_addr_const (file, XEXP (x, 0), code);
7523 putc ('-', file);
7524 output_pic_addr_const (file, XEXP (x, 1), code);
7525 if (!TARGET_MACHO)
7526 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7527 break;
7528
7529 case UNSPEC:
7530 gcc_assert (XVECLEN (x, 0) == 1);
7531 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7532 switch (XINT (x, 1))
7533 {
7534 case UNSPEC_GOT:
7535 fputs ("@GOT", file);
7536 break;
7537 case UNSPEC_GOTOFF:
7538 fputs ("@GOTOFF", file);
7539 break;
7540 case UNSPEC_GOTPCREL:
7541 fputs ("@GOTPCREL(%rip)", file);
7542 break;
7543 case UNSPEC_GOTTPOFF:
7544 /* FIXME: This might be @TPOFF in Sun ld too. */
7545 fputs ("@GOTTPOFF", file);
7546 break;
7547 case UNSPEC_TPOFF:
7548 fputs ("@TPOFF", file);
7549 break;
7550 case UNSPEC_NTPOFF:
7551 if (TARGET_64BIT)
7552 fputs ("@TPOFF", file);
7553 else
7554 fputs ("@NTPOFF", file);
7555 break;
7556 case UNSPEC_DTPOFF:
7557 fputs ("@DTPOFF", file);
7558 break;
7559 case UNSPEC_GOTNTPOFF:
7560 if (TARGET_64BIT)
7561 fputs ("@GOTTPOFF(%rip)", file);
7562 else
7563 fputs ("@GOTNTPOFF", file);
7564 break;
7565 case UNSPEC_INDNTPOFF:
7566 fputs ("@INDNTPOFF", file);
7567 break;
7568 default:
7569 output_operand_lossage ("invalid UNSPEC as operand");
7570 break;
7571 }
7572 break;
7573
7574 default:
7575 output_operand_lossage ("invalid expression as operand");
7576 }
7577 }
7578
7579 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7580 We need to emit DTP-relative relocations. */
7581
7582 static void
7583 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7584 {
7585 fputs (ASM_LONG, file);
7586 output_addr_const (file, x);
7587 fputs ("@DTPOFF", file);
7588 switch (size)
7589 {
7590 case 4:
7591 break;
7592 case 8:
7593 fputs (", 0", file);
7594 break;
7595 default:
7596 gcc_unreachable ();
7597 }
7598 }
7599
7600 /* In the name of slightly smaller debug output, and to cater to
7601 general assembler lossage, recognize PIC+GOTOFF and turn it back
7602 into a direct symbol reference.
7603
7604 On Darwin, this is necessary to avoid a crash, because Darwin
7605 has a different PIC label for each routine but the DWARF debugging
7606 information is not associated with any particular routine, so it's
7607 necessary to remove references to the PIC label from RTL stored by
7608 the DWARF output code. */
7609
7610 static rtx
7611 ix86_delegitimize_address (rtx orig_x)
7612 {
7613 rtx x = orig_x;
7614 /* reg_addend is NULL or a multiple of some register. */
7615 rtx reg_addend = NULL_RTX;
7616 /* const_addend is NULL or a const_int. */
7617 rtx const_addend = NULL_RTX;
7618 /* This is the result, or NULL. */
7619 rtx result = NULL_RTX;
7620
7621 if (GET_CODE (x) == MEM)
7622 x = XEXP (x, 0);
7623
7624 if (TARGET_64BIT)
7625 {
7626 if (GET_CODE (x) != CONST
7627 || GET_CODE (XEXP (x, 0)) != UNSPEC
7628 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7629 || GET_CODE (orig_x) != MEM)
7630 return orig_x;
7631 return XVECEXP (XEXP (x, 0), 0, 0);
7632 }
7633
7634 if (GET_CODE (x) != PLUS
7635 || GET_CODE (XEXP (x, 1)) != CONST)
7636 return orig_x;
7637
7638 if (GET_CODE (XEXP (x, 0)) == REG
7639 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7640 /* %ebx + GOT/GOTOFF */
7641 ;
7642 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7643 {
7644 /* %ebx + %reg * scale + GOT/GOTOFF */
7645 reg_addend = XEXP (x, 0);
7646 if (GET_CODE (XEXP (reg_addend, 0)) == REG
7647 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7648 reg_addend = XEXP (reg_addend, 1);
7649 else if (GET_CODE (XEXP (reg_addend, 1)) == REG
7650 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7651 reg_addend = XEXP (reg_addend, 0);
7652 else
7653 return orig_x;
7654 if (GET_CODE (reg_addend) != REG
7655 && GET_CODE (reg_addend) != MULT
7656 && GET_CODE (reg_addend) != ASHIFT)
7657 return orig_x;
7658 }
7659 else
7660 return orig_x;
7661
7662 x = XEXP (XEXP (x, 1), 0);
7663 if (GET_CODE (x) == PLUS
7664 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7665 {
7666 const_addend = XEXP (x, 1);
7667 x = XEXP (x, 0);
7668 }
7669
7670 if (GET_CODE (x) == UNSPEC
7671 && ((XINT (x, 1) == UNSPEC_GOT && GET_CODE (orig_x) == MEM)
7672 || (XINT (x, 1) == UNSPEC_GOTOFF && GET_CODE (orig_x) != MEM)))
7673 result = XVECEXP (x, 0, 0);
7674
7675 if (TARGET_MACHO && darwin_local_data_pic (x)
7676 && GET_CODE (orig_x) != MEM)
7677 result = XEXP (x, 0);
7678
7679 if (! result)
7680 return orig_x;
7681
7682 if (const_addend)
7683 result = gen_rtx_PLUS (Pmode, result, const_addend);
7684 if (reg_addend)
7685 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7686 return result;
7687 }
7688 \f
7689 static void
7690 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7691 int fp, FILE *file)
7692 {
7693 const char *suffix;
7694
7695 if (mode == CCFPmode || mode == CCFPUmode)
7696 {
7697 enum rtx_code second_code, bypass_code;
7698 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7699 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7700 code = ix86_fp_compare_code_to_integer (code);
7701 mode = CCmode;
7702 }
7703 if (reverse)
7704 code = reverse_condition (code);
7705
7706 switch (code)
7707 {
7708 case EQ:
7709 suffix = "e";
7710 break;
7711 case NE:
7712 suffix = "ne";
7713 break;
7714 case GT:
7715 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7716 suffix = "g";
7717 break;
7718 case GTU:
7719 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7720 Those same assemblers have the same but opposite lossage on cmov. */
7721 gcc_assert (mode == CCmode);
7722 suffix = fp ? "nbe" : "a";
7723 break;
7724 case LT:
7725 switch (mode)
7726 {
7727 case CCNOmode:
7728 case CCGOCmode:
7729 suffix = "s";
7730 break;
7731
7732 case CCmode:
7733 case CCGCmode:
7734 suffix = "l";
7735 break;
7736
7737 default:
7738 gcc_unreachable ();
7739 }
7740 break;
7741 case LTU:
7742 gcc_assert (mode == CCmode);
7743 suffix = "b";
7744 break;
7745 case GE:
7746 switch (mode)
7747 {
7748 case CCNOmode:
7749 case CCGOCmode:
7750 suffix = "ns";
7751 break;
7752
7753 case CCmode:
7754 case CCGCmode:
7755 suffix = "ge";
7756 break;
7757
7758 default:
7759 gcc_unreachable ();
7760 }
7761 break;
7762 case GEU:
7763 /* ??? As above. */
7764 gcc_assert (mode == CCmode);
7765 suffix = fp ? "nb" : "ae";
7766 break;
7767 case LE:
7768 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7769 suffix = "le";
7770 break;
7771 case LEU:
7772 gcc_assert (mode == CCmode);
7773 suffix = "be";
7774 break;
7775 case UNORDERED:
7776 suffix = fp ? "u" : "p";
7777 break;
7778 case ORDERED:
7779 suffix = fp ? "nu" : "np";
7780 break;
7781 default:
7782 gcc_unreachable ();
7783 }
7784 fputs (suffix, file);
7785 }
7786
7787 /* Print the name of register X to FILE based on its machine mode and number.
7788 If CODE is 'w', pretend the mode is HImode.
7789 If CODE is 'b', pretend the mode is QImode.
7790 If CODE is 'k', pretend the mode is SImode.
7791 If CODE is 'q', pretend the mode is DImode.
7792 If CODE is 'h', pretend the reg is the 'high' byte register.
7793 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7794
7795 void
7796 print_reg (rtx x, int code, FILE *file)
7797 {
7798 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7799 && REGNO (x) != FRAME_POINTER_REGNUM
7800 && REGNO (x) != FLAGS_REG
7801 && REGNO (x) != FPSR_REG
7802 && REGNO (x) != FPCR_REG);
7803
7804 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7805 putc ('%', file);
7806
7807 if (code == 'w' || MMX_REG_P (x))
7808 code = 2;
7809 else if (code == 'b')
7810 code = 1;
7811 else if (code == 'k')
7812 code = 4;
7813 else if (code == 'q')
7814 code = 8;
7815 else if (code == 'y')
7816 code = 3;
7817 else if (code == 'h')
7818 code = 0;
7819 else
7820 code = GET_MODE_SIZE (GET_MODE (x));
7821
7822 /* Irritatingly, AMD extended registers use different naming convention
7823 from the normal registers. */
7824 if (REX_INT_REG_P (x))
7825 {
7826 gcc_assert (TARGET_64BIT);
7827 switch (code)
7828 {
7829 case 0:
7830 error ("extended registers have no high halves");
7831 break;
7832 case 1:
7833 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7834 break;
7835 case 2:
7836 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7837 break;
7838 case 4:
7839 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7840 break;
7841 case 8:
7842 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7843 break;
7844 default:
7845 error ("unsupported operand size for extended register");
7846 break;
7847 }
7848 return;
7849 }
7850 switch (code)
7851 {
7852 case 3:
7853 if (STACK_TOP_P (x))
7854 {
7855 fputs ("st(0)", file);
7856 break;
7857 }
7858 /* FALLTHRU */
7859 case 8:
7860 case 4:
7861 case 12:
7862 if (! ANY_FP_REG_P (x))
7863 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7864 /* FALLTHRU */
7865 case 16:
7866 case 2:
7867 normal:
7868 fputs (hi_reg_name[REGNO (x)], file);
7869 break;
7870 case 1:
7871 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7872 goto normal;
7873 fputs (qi_reg_name[REGNO (x)], file);
7874 break;
7875 case 0:
7876 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7877 goto normal;
7878 fputs (qi_high_reg_name[REGNO (x)], file);
7879 break;
7880 default:
7881 gcc_unreachable ();
7882 }
7883 }
7884
7885 /* Locate some local-dynamic symbol still in use by this function
7886 so that we can print its name in some tls_local_dynamic_base
7887 pattern. */
7888
7889 static const char *
7890 get_some_local_dynamic_name (void)
7891 {
7892 rtx insn;
7893
7894 if (cfun->machine->some_ld_name)
7895 return cfun->machine->some_ld_name;
7896
7897 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7898 if (INSN_P (insn)
7899 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7900 return cfun->machine->some_ld_name;
7901
7902 gcc_unreachable ();
7903 }
7904
7905 static int
7906 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7907 {
7908 rtx x = *px;
7909
7910 if (GET_CODE (x) == SYMBOL_REF
7911 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7912 {
7913 cfun->machine->some_ld_name = XSTR (x, 0);
7914 return 1;
7915 }
7916
7917 return 0;
7918 }
7919
7920 /* Meaning of CODE:
7921 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7922 C -- print opcode suffix for set/cmov insn.
7923 c -- like C, but print reversed condition
7924 F,f -- likewise, but for floating-point.
7925 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7926 otherwise nothing
7927 R -- print the prefix for register names.
7928 z -- print the opcode suffix for the size of the current operand.
7929 * -- print a star (in certain assembler syntax)
7930 A -- print an absolute memory reference.
7931 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7932 s -- print a shift double count, followed by the assemblers argument
7933 delimiter.
7934 b -- print the QImode name of the register for the indicated operand.
7935 %b0 would print %al if operands[0] is reg 0.
7936 w -- likewise, print the HImode name of the register.
7937 k -- likewise, print the SImode name of the register.
7938 q -- likewise, print the DImode name of the register.
7939 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7940 y -- print "st(0)" instead of "st" as a register.
7941 D -- print condition for SSE cmp instruction.
7942 P -- if PIC, print an @PLT suffix.
7943 X -- don't print any sort of PIC '@' suffix for a symbol.
7944 & -- print some in-use local-dynamic symbol name.
7945 H -- print a memory address offset by 8; used for sse high-parts
7946 */
7947
7948 void
7949 print_operand (FILE *file, rtx x, int code)
7950 {
7951 if (code)
7952 {
7953 switch (code)
7954 {
7955 case '*':
7956 if (ASSEMBLER_DIALECT == ASM_ATT)
7957 putc ('*', file);
7958 return;
7959
7960 case '&':
7961 assemble_name (file, get_some_local_dynamic_name ());
7962 return;
7963
7964 case 'A':
7965 switch (ASSEMBLER_DIALECT)
7966 {
7967 case ASM_ATT:
7968 putc ('*', file);
7969 break;
7970
7971 case ASM_INTEL:
7972 /* Intel syntax. For absolute addresses, registers should not
7973 be surrounded by braces. */
7974 if (GET_CODE (x) != REG)
7975 {
7976 putc ('[', file);
7977 PRINT_OPERAND (file, x, 0);
7978 putc (']', file);
7979 return;
7980 }
7981 break;
7982
7983 default:
7984 gcc_unreachable ();
7985 }
7986
7987 PRINT_OPERAND (file, x, 0);
7988 return;
7989
7990
7991 case 'L':
7992 if (ASSEMBLER_DIALECT == ASM_ATT)
7993 putc ('l', file);
7994 return;
7995
7996 case 'W':
7997 if (ASSEMBLER_DIALECT == ASM_ATT)
7998 putc ('w', file);
7999 return;
8000
8001 case 'B':
8002 if (ASSEMBLER_DIALECT == ASM_ATT)
8003 putc ('b', file);
8004 return;
8005
8006 case 'Q':
8007 if (ASSEMBLER_DIALECT == ASM_ATT)
8008 putc ('l', file);
8009 return;
8010
8011 case 'S':
8012 if (ASSEMBLER_DIALECT == ASM_ATT)
8013 putc ('s', file);
8014 return;
8015
8016 case 'T':
8017 if (ASSEMBLER_DIALECT == ASM_ATT)
8018 putc ('t', file);
8019 return;
8020
8021 case 'z':
8022 /* 387 opcodes don't get size suffixes if the operands are
8023 registers. */
8024 if (STACK_REG_P (x))
8025 return;
8026
8027 /* Likewise if using Intel opcodes. */
8028 if (ASSEMBLER_DIALECT == ASM_INTEL)
8029 return;
8030
8031 /* This is the size of op from size of operand. */
8032 switch (GET_MODE_SIZE (GET_MODE (x)))
8033 {
8034 case 2:
8035 #ifdef HAVE_GAS_FILDS_FISTS
8036 putc ('s', file);
8037 #endif
8038 return;
8039
8040 case 4:
8041 if (GET_MODE (x) == SFmode)
8042 {
8043 putc ('s', file);
8044 return;
8045 }
8046 else
8047 putc ('l', file);
8048 return;
8049
8050 case 12:
8051 case 16:
8052 putc ('t', file);
8053 return;
8054
8055 case 8:
8056 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8057 {
8058 #ifdef GAS_MNEMONICS
8059 putc ('q', file);
8060 #else
8061 putc ('l', file);
8062 putc ('l', file);
8063 #endif
8064 }
8065 else
8066 putc ('l', file);
8067 return;
8068
8069 default:
8070 gcc_unreachable ();
8071 }
8072
8073 case 'b':
8074 case 'w':
8075 case 'k':
8076 case 'q':
8077 case 'h':
8078 case 'y':
8079 case 'X':
8080 case 'P':
8081 break;
8082
8083 case 's':
8084 if (GET_CODE (x) == CONST_INT || ! SHIFT_DOUBLE_OMITS_COUNT)
8085 {
8086 PRINT_OPERAND (file, x, 0);
8087 putc (',', file);
8088 }
8089 return;
8090
8091 case 'D':
8092 /* Little bit of braindamage here. The SSE compare instructions
8093 does use completely different names for the comparisons that the
8094 fp conditional moves. */
8095 switch (GET_CODE (x))
8096 {
8097 case EQ:
8098 case UNEQ:
8099 fputs ("eq", file);
8100 break;
8101 case LT:
8102 case UNLT:
8103 fputs ("lt", file);
8104 break;
8105 case LE:
8106 case UNLE:
8107 fputs ("le", file);
8108 break;
8109 case UNORDERED:
8110 fputs ("unord", file);
8111 break;
8112 case NE:
8113 case LTGT:
8114 fputs ("neq", file);
8115 break;
8116 case UNGE:
8117 case GE:
8118 fputs ("nlt", file);
8119 break;
8120 case UNGT:
8121 case GT:
8122 fputs ("nle", file);
8123 break;
8124 case ORDERED:
8125 fputs ("ord", file);
8126 break;
8127 default:
8128 gcc_unreachable ();
8129 }
8130 return;
8131 case 'O':
8132 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8133 if (ASSEMBLER_DIALECT == ASM_ATT)
8134 {
8135 switch (GET_MODE (x))
8136 {
8137 case HImode: putc ('w', file); break;
8138 case SImode:
8139 case SFmode: putc ('l', file); break;
8140 case DImode:
8141 case DFmode: putc ('q', file); break;
8142 default: gcc_unreachable ();
8143 }
8144 putc ('.', file);
8145 }
8146 #endif
8147 return;
8148 case 'C':
8149 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8150 return;
8151 case 'F':
8152 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8153 if (ASSEMBLER_DIALECT == ASM_ATT)
8154 putc ('.', file);
8155 #endif
8156 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8157 return;
8158
8159 /* Like above, but reverse condition */
8160 case 'c':
8161 /* Check to see if argument to %c is really a constant
8162 and not a condition code which needs to be reversed. */
8163 if (!COMPARISON_P (x))
8164 {
8165 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8166 return;
8167 }
8168 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8169 return;
8170 case 'f':
8171 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8172 if (ASSEMBLER_DIALECT == ASM_ATT)
8173 putc ('.', file);
8174 #endif
8175 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8176 return;
8177
8178 case 'H':
8179 /* It doesn't actually matter what mode we use here, as we're
8180 only going to use this for printing. */
8181 x = adjust_address_nv (x, DImode, 8);
8182 break;
8183
8184 case '+':
8185 {
8186 rtx x;
8187
8188 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8189 return;
8190
8191 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8192 if (x)
8193 {
8194 int pred_val = INTVAL (XEXP (x, 0));
8195
8196 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8197 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8198 {
8199 int taken = pred_val > REG_BR_PROB_BASE / 2;
8200 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8201
8202 /* Emit hints only in the case default branch prediction
8203 heuristics would fail. */
8204 if (taken != cputaken)
8205 {
8206 /* We use 3e (DS) prefix for taken branches and
8207 2e (CS) prefix for not taken branches. */
8208 if (taken)
8209 fputs ("ds ; ", file);
8210 else
8211 fputs ("cs ; ", file);
8212 }
8213 }
8214 }
8215 return;
8216 }
8217 default:
8218 output_operand_lossage ("invalid operand code '%c'", code);
8219 }
8220 }
8221
8222 if (GET_CODE (x) == REG)
8223 print_reg (x, code, file);
8224
8225 else if (GET_CODE (x) == MEM)
8226 {
8227 /* No `byte ptr' prefix for call instructions. */
8228 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8229 {
8230 const char * size;
8231 switch (GET_MODE_SIZE (GET_MODE (x)))
8232 {
8233 case 1: size = "BYTE"; break;
8234 case 2: size = "WORD"; break;
8235 case 4: size = "DWORD"; break;
8236 case 8: size = "QWORD"; break;
8237 case 12: size = "XWORD"; break;
8238 case 16: size = "XMMWORD"; break;
8239 default:
8240 gcc_unreachable ();
8241 }
8242
8243 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8244 if (code == 'b')
8245 size = "BYTE";
8246 else if (code == 'w')
8247 size = "WORD";
8248 else if (code == 'k')
8249 size = "DWORD";
8250
8251 fputs (size, file);
8252 fputs (" PTR ", file);
8253 }
8254
8255 x = XEXP (x, 0);
8256 /* Avoid (%rip) for call operands. */
8257 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8258 && GET_CODE (x) != CONST_INT)
8259 output_addr_const (file, x);
8260 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8261 output_operand_lossage ("invalid constraints for operand");
8262 else
8263 output_address (x);
8264 }
8265
8266 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8267 {
8268 REAL_VALUE_TYPE r;
8269 long l;
8270
8271 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8272 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8273
8274 if (ASSEMBLER_DIALECT == ASM_ATT)
8275 putc ('$', file);
8276 fprintf (file, "0x%08lx", l);
8277 }
8278
8279 /* These float cases don't actually occur as immediate operands. */
8280 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8281 {
8282 char dstr[30];
8283
8284 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8285 fprintf (file, "%s", dstr);
8286 }
8287
8288 else if (GET_CODE (x) == CONST_DOUBLE
8289 && GET_MODE (x) == XFmode)
8290 {
8291 char dstr[30];
8292
8293 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8294 fprintf (file, "%s", dstr);
8295 }
8296
8297 else
8298 {
8299 /* We have patterns that allow zero sets of memory, for instance.
8300 In 64-bit mode, we should probably support all 8-byte vectors,
8301 since we can in fact encode that into an immediate. */
8302 if (GET_CODE (x) == CONST_VECTOR)
8303 {
8304 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8305 x = const0_rtx;
8306 }
8307
8308 if (code != 'P')
8309 {
8310 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
8311 {
8312 if (ASSEMBLER_DIALECT == ASM_ATT)
8313 putc ('$', file);
8314 }
8315 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8316 || GET_CODE (x) == LABEL_REF)
8317 {
8318 if (ASSEMBLER_DIALECT == ASM_ATT)
8319 putc ('$', file);
8320 else
8321 fputs ("OFFSET FLAT:", file);
8322 }
8323 }
8324 if (GET_CODE (x) == CONST_INT)
8325 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8326 else if (flag_pic)
8327 output_pic_addr_const (file, x, code);
8328 else
8329 output_addr_const (file, x);
8330 }
8331 }
8332 \f
8333 /* Print a memory operand whose address is ADDR. */
8334
8335 void
8336 print_operand_address (FILE *file, rtx addr)
8337 {
8338 struct ix86_address parts;
8339 rtx base, index, disp;
8340 int scale;
8341 int ok = ix86_decompose_address (addr, &parts);
8342
8343 gcc_assert (ok);
8344
8345 base = parts.base;
8346 index = parts.index;
8347 disp = parts.disp;
8348 scale = parts.scale;
8349
8350 switch (parts.seg)
8351 {
8352 case SEG_DEFAULT:
8353 break;
8354 case SEG_FS:
8355 case SEG_GS:
8356 if (USER_LABEL_PREFIX[0] == 0)
8357 putc ('%', file);
8358 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8359 break;
8360 default:
8361 gcc_unreachable ();
8362 }
8363
8364 if (!base && !index)
8365 {
8366 /* Displacement only requires special attention. */
8367
8368 if (GET_CODE (disp) == CONST_INT)
8369 {
8370 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8371 {
8372 if (USER_LABEL_PREFIX[0] == 0)
8373 putc ('%', file);
8374 fputs ("ds:", file);
8375 }
8376 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8377 }
8378 else if (flag_pic)
8379 output_pic_addr_const (file, disp, 0);
8380 else
8381 output_addr_const (file, disp);
8382
8383 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8384 if (TARGET_64BIT)
8385 {
8386 if (GET_CODE (disp) == CONST
8387 && GET_CODE (XEXP (disp, 0)) == PLUS
8388 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8389 disp = XEXP (XEXP (disp, 0), 0);
8390 if (GET_CODE (disp) == LABEL_REF
8391 || (GET_CODE (disp) == SYMBOL_REF
8392 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8393 fputs ("(%rip)", file);
8394 }
8395 }
8396 else
8397 {
8398 if (ASSEMBLER_DIALECT == ASM_ATT)
8399 {
8400 if (disp)
8401 {
8402 if (flag_pic)
8403 output_pic_addr_const (file, disp, 0);
8404 else if (GET_CODE (disp) == LABEL_REF)
8405 output_asm_label (disp);
8406 else
8407 output_addr_const (file, disp);
8408 }
8409
8410 putc ('(', file);
8411 if (base)
8412 print_reg (base, 0, file);
8413 if (index)
8414 {
8415 putc (',', file);
8416 print_reg (index, 0, file);
8417 if (scale != 1)
8418 fprintf (file, ",%d", scale);
8419 }
8420 putc (')', file);
8421 }
8422 else
8423 {
8424 rtx offset = NULL_RTX;
8425
8426 if (disp)
8427 {
8428 /* Pull out the offset of a symbol; print any symbol itself. */
8429 if (GET_CODE (disp) == CONST
8430 && GET_CODE (XEXP (disp, 0)) == PLUS
8431 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
8432 {
8433 offset = XEXP (XEXP (disp, 0), 1);
8434 disp = gen_rtx_CONST (VOIDmode,
8435 XEXP (XEXP (disp, 0), 0));
8436 }
8437
8438 if (flag_pic)
8439 output_pic_addr_const (file, disp, 0);
8440 else if (GET_CODE (disp) == LABEL_REF)
8441 output_asm_label (disp);
8442 else if (GET_CODE (disp) == CONST_INT)
8443 offset = disp;
8444 else
8445 output_addr_const (file, disp);
8446 }
8447
8448 putc ('[', file);
8449 if (base)
8450 {
8451 print_reg (base, 0, file);
8452 if (offset)
8453 {
8454 if (INTVAL (offset) >= 0)
8455 putc ('+', file);
8456 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8457 }
8458 }
8459 else if (offset)
8460 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8461 else
8462 putc ('0', file);
8463
8464 if (index)
8465 {
8466 putc ('+', file);
8467 print_reg (index, 0, file);
8468 if (scale != 1)
8469 fprintf (file, "*%d", scale);
8470 }
8471 putc (']', file);
8472 }
8473 }
8474 }
8475
8476 bool
8477 output_addr_const_extra (FILE *file, rtx x)
8478 {
8479 rtx op;
8480
8481 if (GET_CODE (x) != UNSPEC)
8482 return false;
8483
8484 op = XVECEXP (x, 0, 0);
8485 switch (XINT (x, 1))
8486 {
8487 case UNSPEC_GOTTPOFF:
8488 output_addr_const (file, op);
8489 /* FIXME: This might be @TPOFF in Sun ld. */
8490 fputs ("@GOTTPOFF", file);
8491 break;
8492 case UNSPEC_TPOFF:
8493 output_addr_const (file, op);
8494 fputs ("@TPOFF", file);
8495 break;
8496 case UNSPEC_NTPOFF:
8497 output_addr_const (file, op);
8498 if (TARGET_64BIT)
8499 fputs ("@TPOFF", file);
8500 else
8501 fputs ("@NTPOFF", file);
8502 break;
8503 case UNSPEC_DTPOFF:
8504 output_addr_const (file, op);
8505 fputs ("@DTPOFF", file);
8506 break;
8507 case UNSPEC_GOTNTPOFF:
8508 output_addr_const (file, op);
8509 if (TARGET_64BIT)
8510 fputs ("@GOTTPOFF(%rip)", file);
8511 else
8512 fputs ("@GOTNTPOFF", file);
8513 break;
8514 case UNSPEC_INDNTPOFF:
8515 output_addr_const (file, op);
8516 fputs ("@INDNTPOFF", file);
8517 break;
8518
8519 default:
8520 return false;
8521 }
8522
8523 return true;
8524 }
8525 \f
8526 /* Split one or more DImode RTL references into pairs of SImode
8527 references. The RTL can be REG, offsettable MEM, integer constant, or
8528 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8529 split and "num" is its length. lo_half and hi_half are output arrays
8530 that parallel "operands". */
8531
8532 void
8533 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8534 {
8535 while (num--)
8536 {
8537 rtx op = operands[num];
8538
8539 /* simplify_subreg refuse to split volatile memory addresses,
8540 but we still have to handle it. */
8541 if (GET_CODE (op) == MEM)
8542 {
8543 lo_half[num] = adjust_address (op, SImode, 0);
8544 hi_half[num] = adjust_address (op, SImode, 4);
8545 }
8546 else
8547 {
8548 lo_half[num] = simplify_gen_subreg (SImode, op,
8549 GET_MODE (op) == VOIDmode
8550 ? DImode : GET_MODE (op), 0);
8551 hi_half[num] = simplify_gen_subreg (SImode, op,
8552 GET_MODE (op) == VOIDmode
8553 ? DImode : GET_MODE (op), 4);
8554 }
8555 }
8556 }
8557 /* Split one or more TImode RTL references into pairs of DImode
8558 references. The RTL can be REG, offsettable MEM, integer constant, or
8559 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8560 split and "num" is its length. lo_half and hi_half are output arrays
8561 that parallel "operands". */
8562
8563 void
8564 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8565 {
8566 while (num--)
8567 {
8568 rtx op = operands[num];
8569
8570 /* simplify_subreg refuse to split volatile memory addresses, but we
8571 still have to handle it. */
8572 if (GET_CODE (op) == MEM)
8573 {
8574 lo_half[num] = adjust_address (op, DImode, 0);
8575 hi_half[num] = adjust_address (op, DImode, 8);
8576 }
8577 else
8578 {
8579 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8580 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8581 }
8582 }
8583 }
8584 \f
8585 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8586 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8587 is the expression of the binary operation. The output may either be
8588 emitted here, or returned to the caller, like all output_* functions.
8589
8590 There is no guarantee that the operands are the same mode, as they
8591 might be within FLOAT or FLOAT_EXTEND expressions. */
8592
8593 #ifndef SYSV386_COMPAT
8594 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8595 wants to fix the assemblers because that causes incompatibility
8596 with gcc. No-one wants to fix gcc because that causes
8597 incompatibility with assemblers... You can use the option of
8598 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8599 #define SYSV386_COMPAT 1
8600 #endif
8601
8602 const char *
8603 output_387_binary_op (rtx insn, rtx *operands)
8604 {
8605 static char buf[30];
8606 const char *p;
8607 const char *ssep;
8608 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8609
8610 #ifdef ENABLE_CHECKING
8611 /* Even if we do not want to check the inputs, this documents input
8612 constraints. Which helps in understanding the following code. */
8613 if (STACK_REG_P (operands[0])
8614 && ((REG_P (operands[1])
8615 && REGNO (operands[0]) == REGNO (operands[1])
8616 && (STACK_REG_P (operands[2]) || GET_CODE (operands[2]) == MEM))
8617 || (REG_P (operands[2])
8618 && REGNO (operands[0]) == REGNO (operands[2])
8619 && (STACK_REG_P (operands[1]) || GET_CODE (operands[1]) == MEM)))
8620 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8621 ; /* ok */
8622 else
8623 gcc_assert (is_sse);
8624 #endif
8625
8626 switch (GET_CODE (operands[3]))
8627 {
8628 case PLUS:
8629 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8630 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8631 p = "fiadd";
8632 else
8633 p = "fadd";
8634 ssep = "add";
8635 break;
8636
8637 case MINUS:
8638 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8639 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8640 p = "fisub";
8641 else
8642 p = "fsub";
8643 ssep = "sub";
8644 break;
8645
8646 case MULT:
8647 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8648 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8649 p = "fimul";
8650 else
8651 p = "fmul";
8652 ssep = "mul";
8653 break;
8654
8655 case DIV:
8656 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8657 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8658 p = "fidiv";
8659 else
8660 p = "fdiv";
8661 ssep = "div";
8662 break;
8663
8664 default:
8665 gcc_unreachable ();
8666 }
8667
8668 if (is_sse)
8669 {
8670 strcpy (buf, ssep);
8671 if (GET_MODE (operands[0]) == SFmode)
8672 strcat (buf, "ss\t{%2, %0|%0, %2}");
8673 else
8674 strcat (buf, "sd\t{%2, %0|%0, %2}");
8675 return buf;
8676 }
8677 strcpy (buf, p);
8678
8679 switch (GET_CODE (operands[3]))
8680 {
8681 case MULT:
8682 case PLUS:
8683 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8684 {
8685 rtx temp = operands[2];
8686 operands[2] = operands[1];
8687 operands[1] = temp;
8688 }
8689
8690 /* know operands[0] == operands[1]. */
8691
8692 if (GET_CODE (operands[2]) == MEM)
8693 {
8694 p = "%z2\t%2";
8695 break;
8696 }
8697
8698 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8699 {
8700 if (STACK_TOP_P (operands[0]))
8701 /* How is it that we are storing to a dead operand[2]?
8702 Well, presumably operands[1] is dead too. We can't
8703 store the result to st(0) as st(0) gets popped on this
8704 instruction. Instead store to operands[2] (which I
8705 think has to be st(1)). st(1) will be popped later.
8706 gcc <= 2.8.1 didn't have this check and generated
8707 assembly code that the Unixware assembler rejected. */
8708 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8709 else
8710 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8711 break;
8712 }
8713
8714 if (STACK_TOP_P (operands[0]))
8715 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8716 else
8717 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8718 break;
8719
8720 case MINUS:
8721 case DIV:
8722 if (GET_CODE (operands[1]) == MEM)
8723 {
8724 p = "r%z1\t%1";
8725 break;
8726 }
8727
8728 if (GET_CODE (operands[2]) == MEM)
8729 {
8730 p = "%z2\t%2";
8731 break;
8732 }
8733
8734 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8735 {
8736 #if SYSV386_COMPAT
8737 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8738 derived assemblers, confusingly reverse the direction of
8739 the operation for fsub{r} and fdiv{r} when the
8740 destination register is not st(0). The Intel assembler
8741 doesn't have this brain damage. Read !SYSV386_COMPAT to
8742 figure out what the hardware really does. */
8743 if (STACK_TOP_P (operands[0]))
8744 p = "{p\t%0, %2|rp\t%2, %0}";
8745 else
8746 p = "{rp\t%2, %0|p\t%0, %2}";
8747 #else
8748 if (STACK_TOP_P (operands[0]))
8749 /* As above for fmul/fadd, we can't store to st(0). */
8750 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8751 else
8752 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8753 #endif
8754 break;
8755 }
8756
8757 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8758 {
8759 #if SYSV386_COMPAT
8760 if (STACK_TOP_P (operands[0]))
8761 p = "{rp\t%0, %1|p\t%1, %0}";
8762 else
8763 p = "{p\t%1, %0|rp\t%0, %1}";
8764 #else
8765 if (STACK_TOP_P (operands[0]))
8766 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8767 else
8768 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8769 #endif
8770 break;
8771 }
8772
8773 if (STACK_TOP_P (operands[0]))
8774 {
8775 if (STACK_TOP_P (operands[1]))
8776 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8777 else
8778 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8779 break;
8780 }
8781 else if (STACK_TOP_P (operands[1]))
8782 {
8783 #if SYSV386_COMPAT
8784 p = "{\t%1, %0|r\t%0, %1}";
8785 #else
8786 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8787 #endif
8788 }
8789 else
8790 {
8791 #if SYSV386_COMPAT
8792 p = "{r\t%2, %0|\t%0, %2}";
8793 #else
8794 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8795 #endif
8796 }
8797 break;
8798
8799 default:
8800 gcc_unreachable ();
8801 }
8802
8803 strcat (buf, p);
8804 return buf;
8805 }
8806
8807 /* Return needed mode for entity in optimize_mode_switching pass. */
8808
8809 int
8810 ix86_mode_needed (int entity, rtx insn)
8811 {
8812 enum attr_i387_cw mode;
8813
8814 /* The mode UNINITIALIZED is used to store control word after a
8815 function call or ASM pattern. The mode ANY specify that function
8816 has no requirements on the control word and make no changes in the
8817 bits we are interested in. */
8818
8819 if (CALL_P (insn)
8820 || (NONJUMP_INSN_P (insn)
8821 && (asm_noperands (PATTERN (insn)) >= 0
8822 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8823 return I387_CW_UNINITIALIZED;
8824
8825 if (recog_memoized (insn) < 0)
8826 return I387_CW_ANY;
8827
8828 mode = get_attr_i387_cw (insn);
8829
8830 switch (entity)
8831 {
8832 case I387_TRUNC:
8833 if (mode == I387_CW_TRUNC)
8834 return mode;
8835 break;
8836
8837 case I387_FLOOR:
8838 if (mode == I387_CW_FLOOR)
8839 return mode;
8840 break;
8841
8842 case I387_CEIL:
8843 if (mode == I387_CW_CEIL)
8844 return mode;
8845 break;
8846
8847 case I387_MASK_PM:
8848 if (mode == I387_CW_MASK_PM)
8849 return mode;
8850 break;
8851
8852 default:
8853 gcc_unreachable ();
8854 }
8855
8856 return I387_CW_ANY;
8857 }
8858
8859 /* Output code to initialize control word copies used by trunc?f?i and
8860 rounding patterns. CURRENT_MODE is set to current control word,
8861 while NEW_MODE is set to new control word. */
8862
8863 void
8864 emit_i387_cw_initialization (int mode)
8865 {
8866 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8867 rtx new_mode;
8868
8869 int slot;
8870
8871 rtx reg = gen_reg_rtx (HImode);
8872
8873 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8874 emit_move_insn (reg, copy_rtx (stored_mode));
8875
8876 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8877 {
8878 switch (mode)
8879 {
8880 case I387_CW_TRUNC:
8881 /* round toward zero (truncate) */
8882 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8883 slot = SLOT_CW_TRUNC;
8884 break;
8885
8886 case I387_CW_FLOOR:
8887 /* round down toward -oo */
8888 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8889 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8890 slot = SLOT_CW_FLOOR;
8891 break;
8892
8893 case I387_CW_CEIL:
8894 /* round up toward +oo */
8895 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8896 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8897 slot = SLOT_CW_CEIL;
8898 break;
8899
8900 case I387_CW_MASK_PM:
8901 /* mask precision exception for nearbyint() */
8902 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8903 slot = SLOT_CW_MASK_PM;
8904 break;
8905
8906 default:
8907 gcc_unreachable ();
8908 }
8909 }
8910 else
8911 {
8912 switch (mode)
8913 {
8914 case I387_CW_TRUNC:
8915 /* round toward zero (truncate) */
8916 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8917 slot = SLOT_CW_TRUNC;
8918 break;
8919
8920 case I387_CW_FLOOR:
8921 /* round down toward -oo */
8922 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8923 slot = SLOT_CW_FLOOR;
8924 break;
8925
8926 case I387_CW_CEIL:
8927 /* round up toward +oo */
8928 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8929 slot = SLOT_CW_CEIL;
8930 break;
8931
8932 case I387_CW_MASK_PM:
8933 /* mask precision exception for nearbyint() */
8934 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8935 slot = SLOT_CW_MASK_PM;
8936 break;
8937
8938 default:
8939 gcc_unreachable ();
8940 }
8941 }
8942
8943 gcc_assert (slot < MAX_386_STACK_LOCALS);
8944
8945 new_mode = assign_386_stack_local (HImode, slot);
8946 emit_move_insn (new_mode, reg);
8947 }
8948
8949 /* Output code for INSN to convert a float to a signed int. OPERANDS
8950 are the insn operands. The output may be [HSD]Imode and the input
8951 operand may be [SDX]Fmode. */
8952
8953 const char *
8954 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8955 {
8956 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8957 int dimode_p = GET_MODE (operands[0]) == DImode;
8958 int round_mode = get_attr_i387_cw (insn);
8959
8960 /* Jump through a hoop or two for DImode, since the hardware has no
8961 non-popping instruction. We used to do this a different way, but
8962 that was somewhat fragile and broke with post-reload splitters. */
8963 if ((dimode_p || fisttp) && !stack_top_dies)
8964 output_asm_insn ("fld\t%y1", operands);
8965
8966 gcc_assert (STACK_TOP_P (operands[1]));
8967 gcc_assert (GET_CODE (operands[0]) == MEM);
8968
8969 if (fisttp)
8970 output_asm_insn ("fisttp%z0\t%0", operands);
8971 else
8972 {
8973 if (round_mode != I387_CW_ANY)
8974 output_asm_insn ("fldcw\t%3", operands);
8975 if (stack_top_dies || dimode_p)
8976 output_asm_insn ("fistp%z0\t%0", operands);
8977 else
8978 output_asm_insn ("fist%z0\t%0", operands);
8979 if (round_mode != I387_CW_ANY)
8980 output_asm_insn ("fldcw\t%2", operands);
8981 }
8982
8983 return "";
8984 }
8985
8986 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8987 have the values zero or one, indicates the ffreep insn's operand
8988 from the OPERANDS array. */
8989
8990 static const char *
8991 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8992 {
8993 if (TARGET_USE_FFREEP)
8994 #if HAVE_AS_IX86_FFREEP
8995 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8996 #else
8997 {
8998 static char retval[] = ".word\t0xc_df";
8999 int regno = REGNO (operands[opno]);
9000
9001 gcc_assert (FP_REGNO_P (regno));
9002
9003 retval[9] = '0' + (regno - FIRST_STACK_REG);
9004 return retval;
9005 }
9006 #endif
9007
9008 return opno ? "fstp\t%y1" : "fstp\t%y0";
9009 }
9010
9011
9012 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9013 should be used. UNORDERED_P is true when fucom should be used. */
9014
9015 const char *
9016 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9017 {
9018 int stack_top_dies;
9019 rtx cmp_op0, cmp_op1;
9020 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9021
9022 if (eflags_p)
9023 {
9024 cmp_op0 = operands[0];
9025 cmp_op1 = operands[1];
9026 }
9027 else
9028 {
9029 cmp_op0 = operands[1];
9030 cmp_op1 = operands[2];
9031 }
9032
9033 if (is_sse)
9034 {
9035 if (GET_MODE (operands[0]) == SFmode)
9036 if (unordered_p)
9037 return "ucomiss\t{%1, %0|%0, %1}";
9038 else
9039 return "comiss\t{%1, %0|%0, %1}";
9040 else
9041 if (unordered_p)
9042 return "ucomisd\t{%1, %0|%0, %1}";
9043 else
9044 return "comisd\t{%1, %0|%0, %1}";
9045 }
9046
9047 gcc_assert (STACK_TOP_P (cmp_op0));
9048
9049 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9050
9051 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9052 {
9053 if (stack_top_dies)
9054 {
9055 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9056 return output_387_ffreep (operands, 1);
9057 }
9058 else
9059 return "ftst\n\tfnstsw\t%0";
9060 }
9061
9062 if (STACK_REG_P (cmp_op1)
9063 && stack_top_dies
9064 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9065 && REGNO (cmp_op1) != FIRST_STACK_REG)
9066 {
9067 /* If both the top of the 387 stack dies, and the other operand
9068 is also a stack register that dies, then this must be a
9069 `fcompp' float compare */
9070
9071 if (eflags_p)
9072 {
9073 /* There is no double popping fcomi variant. Fortunately,
9074 eflags is immune from the fstp's cc clobbering. */
9075 if (unordered_p)
9076 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9077 else
9078 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9079 return output_387_ffreep (operands, 0);
9080 }
9081 else
9082 {
9083 if (unordered_p)
9084 return "fucompp\n\tfnstsw\t%0";
9085 else
9086 return "fcompp\n\tfnstsw\t%0";
9087 }
9088 }
9089 else
9090 {
9091 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9092
9093 static const char * const alt[16] =
9094 {
9095 "fcom%z2\t%y2\n\tfnstsw\t%0",
9096 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9097 "fucom%z2\t%y2\n\tfnstsw\t%0",
9098 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9099
9100 "ficom%z2\t%y2\n\tfnstsw\t%0",
9101 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9102 NULL,
9103 NULL,
9104
9105 "fcomi\t{%y1, %0|%0, %y1}",
9106 "fcomip\t{%y1, %0|%0, %y1}",
9107 "fucomi\t{%y1, %0|%0, %y1}",
9108 "fucomip\t{%y1, %0|%0, %y1}",
9109
9110 NULL,
9111 NULL,
9112 NULL,
9113 NULL
9114 };
9115
9116 int mask;
9117 const char *ret;
9118
9119 mask = eflags_p << 3;
9120 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9121 mask |= unordered_p << 1;
9122 mask |= stack_top_dies;
9123
9124 gcc_assert (mask < 16);
9125 ret = alt[mask];
9126 gcc_assert (ret);
9127
9128 return ret;
9129 }
9130 }
9131
9132 void
9133 ix86_output_addr_vec_elt (FILE *file, int value)
9134 {
9135 const char *directive = ASM_LONG;
9136
9137 #ifdef ASM_QUAD
9138 if (TARGET_64BIT)
9139 directive = ASM_QUAD;
9140 #else
9141 gcc_assert (!TARGET_64BIT);
9142 #endif
9143
9144 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9145 }
9146
9147 void
9148 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9149 {
9150 if (TARGET_64BIT)
9151 fprintf (file, "%s%s%d-%s%d\n",
9152 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9153 else if (HAVE_AS_GOTOFF_IN_DATA)
9154 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9155 #if TARGET_MACHO
9156 else if (TARGET_MACHO)
9157 {
9158 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9159 machopic_output_function_base_name (file);
9160 fprintf(file, "\n");
9161 }
9162 #endif
9163 else
9164 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9165 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9166 }
9167 \f
9168 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9169 for the target. */
9170
9171 void
9172 ix86_expand_clear (rtx dest)
9173 {
9174 rtx tmp;
9175
9176 /* We play register width games, which are only valid after reload. */
9177 gcc_assert (reload_completed);
9178
9179 /* Avoid HImode and its attendant prefix byte. */
9180 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9181 dest = gen_rtx_REG (SImode, REGNO (dest));
9182
9183 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9184
9185 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9186 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9187 {
9188 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9189 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9190 }
9191
9192 emit_insn (tmp);
9193 }
9194
9195 /* X is an unchanging MEM. If it is a constant pool reference, return
9196 the constant pool rtx, else NULL. */
9197
9198 rtx
9199 maybe_get_pool_constant (rtx x)
9200 {
9201 x = ix86_delegitimize_address (XEXP (x, 0));
9202
9203 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9204 return get_pool_constant (x);
9205
9206 return NULL_RTX;
9207 }
9208
9209 void
9210 ix86_expand_move (enum machine_mode mode, rtx operands[])
9211 {
9212 int strict = (reload_in_progress || reload_completed);
9213 rtx op0, op1;
9214 enum tls_model model;
9215
9216 op0 = operands[0];
9217 op1 = operands[1];
9218
9219 if (GET_CODE (op1) == SYMBOL_REF)
9220 {
9221 model = SYMBOL_REF_TLS_MODEL (op1);
9222 if (model)
9223 {
9224 op1 = legitimize_tls_address (op1, model, true);
9225 op1 = force_operand (op1, op0);
9226 if (op1 == op0)
9227 return;
9228 }
9229 }
9230 else if (GET_CODE (op1) == CONST
9231 && GET_CODE (XEXP (op1, 0)) == PLUS
9232 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9233 {
9234 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9235 if (model)
9236 {
9237 rtx addend = XEXP (XEXP (op1, 0), 1);
9238 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9239 op1 = force_operand (op1, NULL);
9240 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9241 op0, 1, OPTAB_DIRECT);
9242 if (op1 == op0)
9243 return;
9244 }
9245 }
9246
9247 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9248 {
9249 if (TARGET_MACHO && !TARGET_64BIT)
9250 {
9251 #if TARGET_MACHO
9252 if (MACHOPIC_PURE)
9253 {
9254 rtx temp = ((reload_in_progress
9255 || ((op0 && GET_CODE (op0) == REG)
9256 && mode == Pmode))
9257 ? op0 : gen_reg_rtx (Pmode));
9258 op1 = machopic_indirect_data_reference (op1, temp);
9259 op1 = machopic_legitimize_pic_address (op1, mode,
9260 temp == op1 ? 0 : temp);
9261 }
9262 else if (MACHOPIC_INDIRECT)
9263 op1 = machopic_indirect_data_reference (op1, 0);
9264 if (op0 == op1)
9265 return;
9266 #endif
9267 }
9268 else
9269 {
9270 if (GET_CODE (op0) == MEM)
9271 op1 = force_reg (Pmode, op1);
9272 else
9273 op1 = legitimize_address (op1, op1, Pmode);
9274 }
9275 }
9276 else
9277 {
9278 if (GET_CODE (op0) == MEM
9279 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9280 || !push_operand (op0, mode))
9281 && GET_CODE (op1) == MEM)
9282 op1 = force_reg (mode, op1);
9283
9284 if (push_operand (op0, mode)
9285 && ! general_no_elim_operand (op1, mode))
9286 op1 = copy_to_mode_reg (mode, op1);
9287
9288 /* Force large constants in 64bit compilation into register
9289 to get them CSEed. */
9290 if (TARGET_64BIT && mode == DImode
9291 && immediate_operand (op1, mode)
9292 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9293 && !register_operand (op0, mode)
9294 && optimize && !reload_completed && !reload_in_progress)
9295 op1 = copy_to_mode_reg (mode, op1);
9296
9297 if (FLOAT_MODE_P (mode))
9298 {
9299 /* If we are loading a floating point constant to a register,
9300 force the value to memory now, since we'll get better code
9301 out the back end. */
9302
9303 if (strict)
9304 ;
9305 else if (GET_CODE (op1) == CONST_DOUBLE)
9306 {
9307 op1 = validize_mem (force_const_mem (mode, op1));
9308 if (!register_operand (op0, mode))
9309 {
9310 rtx temp = gen_reg_rtx (mode);
9311 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9312 emit_move_insn (op0, temp);
9313 return;
9314 }
9315 }
9316 }
9317 }
9318
9319 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9320 }
9321
9322 void
9323 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9324 {
9325 rtx op0 = operands[0], op1 = operands[1];
9326
9327 /* Force constants other than zero into memory. We do not know how
9328 the instructions used to build constants modify the upper 64 bits
9329 of the register, once we have that information we may be able
9330 to handle some of them more efficiently. */
9331 if ((reload_in_progress | reload_completed) == 0
9332 && register_operand (op0, mode)
9333 && CONSTANT_P (op1)
9334 && standard_sse_constant_p (op1) <= 0)
9335 op1 = validize_mem (force_const_mem (mode, op1));
9336
9337 /* Make operand1 a register if it isn't already. */
9338 if (!no_new_pseudos
9339 && !register_operand (op0, mode)
9340 && !register_operand (op1, mode))
9341 {
9342 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9343 return;
9344 }
9345
9346 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9347 }
9348
9349 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9350 straight to ix86_expand_vector_move. */
9351
9352 void
9353 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9354 {
9355 rtx op0, op1, m;
9356
9357 op0 = operands[0];
9358 op1 = operands[1];
9359
9360 if (MEM_P (op1))
9361 {
9362 /* If we're optimizing for size, movups is the smallest. */
9363 if (optimize_size)
9364 {
9365 op0 = gen_lowpart (V4SFmode, op0);
9366 op1 = gen_lowpart (V4SFmode, op1);
9367 emit_insn (gen_sse_movups (op0, op1));
9368 return;
9369 }
9370
9371 /* ??? If we have typed data, then it would appear that using
9372 movdqu is the only way to get unaligned data loaded with
9373 integer type. */
9374 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9375 {
9376 op0 = gen_lowpart (V16QImode, op0);
9377 op1 = gen_lowpart (V16QImode, op1);
9378 emit_insn (gen_sse2_movdqu (op0, op1));
9379 return;
9380 }
9381
9382 if (TARGET_SSE2 && mode == V2DFmode)
9383 {
9384 rtx zero;
9385
9386 /* When SSE registers are split into halves, we can avoid
9387 writing to the top half twice. */
9388 if (TARGET_SSE_SPLIT_REGS)
9389 {
9390 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9391 zero = op0;
9392 }
9393 else
9394 {
9395 /* ??? Not sure about the best option for the Intel chips.
9396 The following would seem to satisfy; the register is
9397 entirely cleared, breaking the dependency chain. We
9398 then store to the upper half, with a dependency depth
9399 of one. A rumor has it that Intel recommends two movsd
9400 followed by an unpacklpd, but this is unconfirmed. And
9401 given that the dependency depth of the unpacklpd would
9402 still be one, I'm not sure why this would be better. */
9403 zero = CONST0_RTX (V2DFmode);
9404 }
9405
9406 m = adjust_address (op1, DFmode, 0);
9407 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9408 m = adjust_address (op1, DFmode, 8);
9409 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9410 }
9411 else
9412 {
9413 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9414 emit_move_insn (op0, CONST0_RTX (mode));
9415 else
9416 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9417
9418 if (mode != V4SFmode)
9419 op0 = gen_lowpart (V4SFmode, op0);
9420 m = adjust_address (op1, V2SFmode, 0);
9421 emit_insn (gen_sse_loadlps (op0, op0, m));
9422 m = adjust_address (op1, V2SFmode, 8);
9423 emit_insn (gen_sse_loadhps (op0, op0, m));
9424 }
9425 }
9426 else if (MEM_P (op0))
9427 {
9428 /* If we're optimizing for size, movups is the smallest. */
9429 if (optimize_size)
9430 {
9431 op0 = gen_lowpart (V4SFmode, op0);
9432 op1 = gen_lowpart (V4SFmode, op1);
9433 emit_insn (gen_sse_movups (op0, op1));
9434 return;
9435 }
9436
9437 /* ??? Similar to above, only less clear because of quote
9438 typeless stores unquote. */
9439 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9440 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9441 {
9442 op0 = gen_lowpart (V16QImode, op0);
9443 op1 = gen_lowpart (V16QImode, op1);
9444 emit_insn (gen_sse2_movdqu (op0, op1));
9445 return;
9446 }
9447
9448 if (TARGET_SSE2 && mode == V2DFmode)
9449 {
9450 m = adjust_address (op0, DFmode, 0);
9451 emit_insn (gen_sse2_storelpd (m, op1));
9452 m = adjust_address (op0, DFmode, 8);
9453 emit_insn (gen_sse2_storehpd (m, op1));
9454 }
9455 else
9456 {
9457 if (mode != V4SFmode)
9458 op1 = gen_lowpart (V4SFmode, op1);
9459 m = adjust_address (op0, V2SFmode, 0);
9460 emit_insn (gen_sse_storelps (m, op1));
9461 m = adjust_address (op0, V2SFmode, 8);
9462 emit_insn (gen_sse_storehps (m, op1));
9463 }
9464 }
9465 else
9466 gcc_unreachable ();
9467 }
9468
9469 /* Expand a push in MODE. This is some mode for which we do not support
9470 proper push instructions, at least from the registers that we expect
9471 the value to live in. */
9472
9473 void
9474 ix86_expand_push (enum machine_mode mode, rtx x)
9475 {
9476 rtx tmp;
9477
9478 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9479 GEN_INT (-GET_MODE_SIZE (mode)),
9480 stack_pointer_rtx, 1, OPTAB_DIRECT);
9481 if (tmp != stack_pointer_rtx)
9482 emit_move_insn (stack_pointer_rtx, tmp);
9483
9484 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9485 emit_move_insn (tmp, x);
9486 }
9487
9488 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9489 destination to use for the operation. If different from the true
9490 destination in operands[0], a copy operation will be required. */
9491
9492 rtx
9493 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9494 rtx operands[])
9495 {
9496 int matching_memory;
9497 rtx src1, src2, dst;
9498
9499 dst = operands[0];
9500 src1 = operands[1];
9501 src2 = operands[2];
9502
9503 /* Recognize <var1> = <value> <op> <var1> for commutative operators */
9504 if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9505 && (rtx_equal_p (dst, src2)
9506 || immediate_operand (src1, mode)))
9507 {
9508 rtx temp = src1;
9509 src1 = src2;
9510 src2 = temp;
9511 }
9512
9513 /* If the destination is memory, and we do not have matching source
9514 operands, do things in registers. */
9515 matching_memory = 0;
9516 if (GET_CODE (dst) == MEM)
9517 {
9518 if (rtx_equal_p (dst, src1))
9519 matching_memory = 1;
9520 else if (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9521 && rtx_equal_p (dst, src2))
9522 matching_memory = 2;
9523 else
9524 dst = gen_reg_rtx (mode);
9525 }
9526
9527 /* Both source operands cannot be in memory. */
9528 if (GET_CODE (src1) == MEM && GET_CODE (src2) == MEM)
9529 {
9530 if (matching_memory != 2)
9531 src2 = force_reg (mode, src2);
9532 else
9533 src1 = force_reg (mode, src1);
9534 }
9535
9536 /* If the operation is not commutable, source 1 cannot be a constant
9537 or non-matching memory. */
9538 if ((CONSTANT_P (src1)
9539 || (!matching_memory && GET_CODE (src1) == MEM))
9540 && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9541 src1 = force_reg (mode, src1);
9542
9543 src1 = operands[1] = src1;
9544 src2 = operands[2] = src2;
9545 return dst;
9546 }
9547
9548 /* Similarly, but assume that the destination has already been
9549 set up properly. */
9550
9551 void
9552 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9553 enum machine_mode mode, rtx operands[])
9554 {
9555 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9556 gcc_assert (dst == operands[0]);
9557 }
9558
9559 /* Attempt to expand a binary operator. Make the expansion closer to the
9560 actual machine, then just general_operand, which will allow 3 separate
9561 memory references (one output, two input) in a single insn. */
9562
9563 void
9564 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9565 rtx operands[])
9566 {
9567 rtx src1, src2, dst, op, clob;
9568
9569 dst = ix86_fixup_binary_operands (code, mode, operands);
9570 src1 = operands[1];
9571 src2 = operands[2];
9572
9573 /* Emit the instruction. */
9574
9575 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9576 if (reload_in_progress)
9577 {
9578 /* Reload doesn't know about the flags register, and doesn't know that
9579 it doesn't want to clobber it. We can only do this with PLUS. */
9580 gcc_assert (code == PLUS);
9581 emit_insn (op);
9582 }
9583 else
9584 {
9585 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9586 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9587 }
9588
9589 /* Fix up the destination if needed. */
9590 if (dst != operands[0])
9591 emit_move_insn (operands[0], dst);
9592 }
9593
9594 /* Return TRUE or FALSE depending on whether the binary operator meets the
9595 appropriate constraints. */
9596
9597 int
9598 ix86_binary_operator_ok (enum rtx_code code,
9599 enum machine_mode mode ATTRIBUTE_UNUSED,
9600 rtx operands[3])
9601 {
9602 /* Both source operands cannot be in memory. */
9603 if (GET_CODE (operands[1]) == MEM && GET_CODE (operands[2]) == MEM)
9604 return 0;
9605 /* If the operation is not commutable, source 1 cannot be a constant. */
9606 if (CONSTANT_P (operands[1]) && GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9607 return 0;
9608 /* If the destination is memory, we must have a matching source operand. */
9609 if (GET_CODE (operands[0]) == MEM
9610 && ! (rtx_equal_p (operands[0], operands[1])
9611 || (GET_RTX_CLASS (code) == RTX_COMM_ARITH
9612 && rtx_equal_p (operands[0], operands[2]))))
9613 return 0;
9614 /* If the operation is not commutable and the source 1 is memory, we must
9615 have a matching destination. */
9616 if (GET_CODE (operands[1]) == MEM
9617 && GET_RTX_CLASS (code) != RTX_COMM_ARITH
9618 && ! rtx_equal_p (operands[0], operands[1]))
9619 return 0;
9620 return 1;
9621 }
9622
9623 /* Attempt to expand a unary operator. Make the expansion closer to the
9624 actual machine, then just general_operand, which will allow 2 separate
9625 memory references (one output, one input) in a single insn. */
9626
9627 void
9628 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9629 rtx operands[])
9630 {
9631 int matching_memory;
9632 rtx src, dst, op, clob;
9633
9634 dst = operands[0];
9635 src = operands[1];
9636
9637 /* If the destination is memory, and we do not have matching source
9638 operands, do things in registers. */
9639 matching_memory = 0;
9640 if (MEM_P (dst))
9641 {
9642 if (rtx_equal_p (dst, src))
9643 matching_memory = 1;
9644 else
9645 dst = gen_reg_rtx (mode);
9646 }
9647
9648 /* When source operand is memory, destination must match. */
9649 if (MEM_P (src) && !matching_memory)
9650 src = force_reg (mode, src);
9651
9652 /* Emit the instruction. */
9653
9654 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9655 if (reload_in_progress || code == NOT)
9656 {
9657 /* Reload doesn't know about the flags register, and doesn't know that
9658 it doesn't want to clobber it. */
9659 gcc_assert (code == NOT);
9660 emit_insn (op);
9661 }
9662 else
9663 {
9664 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9665 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9666 }
9667
9668 /* Fix up the destination if needed. */
9669 if (dst != operands[0])
9670 emit_move_insn (operands[0], dst);
9671 }
9672
9673 /* Return TRUE or FALSE depending on whether the unary operator meets the
9674 appropriate constraints. */
9675
9676 int
9677 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9678 enum machine_mode mode ATTRIBUTE_UNUSED,
9679 rtx operands[2] ATTRIBUTE_UNUSED)
9680 {
9681 /* If one of operands is memory, source and destination must match. */
9682 if ((GET_CODE (operands[0]) == MEM
9683 || GET_CODE (operands[1]) == MEM)
9684 && ! rtx_equal_p (operands[0], operands[1]))
9685 return FALSE;
9686 return TRUE;
9687 }
9688
9689 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
9690 Create a mask for the sign bit in MODE for an SSE register. If VECT is
9691 true, then replicate the mask for all elements of the vector register.
9692 If INVERT is true, then create a mask excluding the sign bit. */
9693
9694 rtx
9695 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
9696 {
9697 enum machine_mode vec_mode;
9698 HOST_WIDE_INT hi, lo;
9699 int shift = 63;
9700 rtvec v;
9701 rtx mask;
9702
9703 /* Find the sign bit, sign extended to 2*HWI. */
9704 if (mode == SFmode)
9705 lo = 0x80000000, hi = lo < 0;
9706 else if (HOST_BITS_PER_WIDE_INT >= 64)
9707 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
9708 else
9709 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
9710
9711 if (invert)
9712 lo = ~lo, hi = ~hi;
9713
9714 /* Force this value into the low part of a fp vector constant. */
9715 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
9716 mask = gen_lowpart (mode, mask);
9717
9718 if (mode == SFmode)
9719 {
9720 if (vect)
9721 v = gen_rtvec (4, mask, mask, mask, mask);
9722 else
9723 v = gen_rtvec (4, mask, CONST0_RTX (SFmode),
9724 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9725 vec_mode = V4SFmode;
9726 }
9727 else
9728 {
9729 if (vect)
9730 v = gen_rtvec (2, mask, mask);
9731 else
9732 v = gen_rtvec (2, mask, CONST0_RTX (DFmode));
9733 vec_mode = V2DFmode;
9734 }
9735
9736 return force_reg (vec_mode, gen_rtx_CONST_VECTOR (vec_mode, v));
9737 }
9738
9739 /* Generate code for floating point ABS or NEG. */
9740
9741 void
9742 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
9743 rtx operands[])
9744 {
9745 rtx mask, set, use, clob, dst, src;
9746 bool matching_memory;
9747 bool use_sse = false;
9748 bool vector_mode = VECTOR_MODE_P (mode);
9749 enum machine_mode elt_mode = mode;
9750
9751 if (vector_mode)
9752 {
9753 elt_mode = GET_MODE_INNER (mode);
9754 use_sse = true;
9755 }
9756 else if (TARGET_SSE_MATH)
9757 use_sse = SSE_FLOAT_MODE_P (mode);
9758
9759 /* NEG and ABS performed with SSE use bitwise mask operations.
9760 Create the appropriate mask now. */
9761 if (use_sse)
9762 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
9763 else
9764 mask = NULL_RTX;
9765
9766 dst = operands[0];
9767 src = operands[1];
9768
9769 /* If the destination is memory, and we don't have matching source
9770 operands or we're using the x87, do things in registers. */
9771 matching_memory = false;
9772 if (MEM_P (dst))
9773 {
9774 if (use_sse && rtx_equal_p (dst, src))
9775 matching_memory = true;
9776 else
9777 dst = gen_reg_rtx (mode);
9778 }
9779 if (MEM_P (src) && !matching_memory)
9780 src = force_reg (mode, src);
9781
9782 if (vector_mode)
9783 {
9784 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
9785 set = gen_rtx_SET (VOIDmode, dst, set);
9786 emit_insn (set);
9787 }
9788 else
9789 {
9790 set = gen_rtx_fmt_e (code, mode, src);
9791 set = gen_rtx_SET (VOIDmode, dst, set);
9792 if (mask)
9793 {
9794 use = gen_rtx_USE (VOIDmode, mask);
9795 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9796 emit_insn (gen_rtx_PARALLEL (VOIDmode,
9797 gen_rtvec (3, set, use, clob)));
9798 }
9799 else
9800 emit_insn (set);
9801 }
9802
9803 if (dst != operands[0])
9804 emit_move_insn (operands[0], dst);
9805 }
9806
9807 /* Expand a copysign operation. Special case operand 0 being a constant. */
9808
9809 void
9810 ix86_expand_copysign (rtx operands[])
9811 {
9812 enum machine_mode mode, vmode;
9813 rtx dest, op0, op1, mask, nmask;
9814
9815 dest = operands[0];
9816 op0 = operands[1];
9817 op1 = operands[2];
9818
9819 mode = GET_MODE (dest);
9820 vmode = mode == SFmode ? V4SFmode : V2DFmode;
9821
9822 if (GET_CODE (op0) == CONST_DOUBLE)
9823 {
9824 rtvec v;
9825
9826 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
9827 op0 = simplify_unary_operation (ABS, mode, op0, mode);
9828
9829 if (op0 == CONST0_RTX (mode))
9830 op0 = CONST0_RTX (vmode);
9831 else
9832 {
9833 if (mode == SFmode)
9834 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
9835 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
9836 else
9837 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
9838 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
9839 }
9840
9841 mask = ix86_build_signbit_mask (mode, 0, 0);
9842
9843 if (mode == SFmode)
9844 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
9845 else
9846 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
9847 }
9848 else
9849 {
9850 nmask = ix86_build_signbit_mask (mode, 0, 1);
9851 mask = ix86_build_signbit_mask (mode, 0, 0);
9852
9853 if (mode == SFmode)
9854 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
9855 else
9856 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
9857 }
9858 }
9859
9860 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
9861 be a constant, and so has already been expanded into a vector constant. */
9862
9863 void
9864 ix86_split_copysign_const (rtx operands[])
9865 {
9866 enum machine_mode mode, vmode;
9867 rtx dest, op0, op1, mask, x;
9868
9869 dest = operands[0];
9870 op0 = operands[1];
9871 op1 = operands[2];
9872 mask = operands[3];
9873
9874 mode = GET_MODE (dest);
9875 vmode = GET_MODE (mask);
9876
9877 dest = simplify_gen_subreg (vmode, dest, mode, 0);
9878 x = gen_rtx_AND (vmode, dest, mask);
9879 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9880
9881 if (op0 != CONST0_RTX (vmode))
9882 {
9883 x = gen_rtx_IOR (vmode, dest, op0);
9884 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9885 }
9886 }
9887
9888 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
9889 so we have to do two masks. */
9890
9891 void
9892 ix86_split_copysign_var (rtx operands[])
9893 {
9894 enum machine_mode mode, vmode;
9895 rtx dest, scratch, op0, op1, mask, nmask, x;
9896
9897 dest = operands[0];
9898 scratch = operands[1];
9899 op0 = operands[2];
9900 op1 = operands[3];
9901 nmask = operands[4];
9902 mask = operands[5];
9903
9904 mode = GET_MODE (dest);
9905 vmode = GET_MODE (mask);
9906
9907 if (rtx_equal_p (op0, op1))
9908 {
9909 /* Shouldn't happen often (it's useless, obviously), but when it does
9910 we'd generate incorrect code if we continue below. */
9911 emit_move_insn (dest, op0);
9912 return;
9913 }
9914
9915 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
9916 {
9917 gcc_assert (REGNO (op1) == REGNO (scratch));
9918
9919 x = gen_rtx_AND (vmode, scratch, mask);
9920 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9921
9922 dest = mask;
9923 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9924 x = gen_rtx_NOT (vmode, dest);
9925 x = gen_rtx_AND (vmode, x, op0);
9926 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9927 }
9928 else
9929 {
9930 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
9931 {
9932 x = gen_rtx_AND (vmode, scratch, mask);
9933 }
9934 else /* alternative 2,4 */
9935 {
9936 gcc_assert (REGNO (mask) == REGNO (scratch));
9937 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
9938 x = gen_rtx_AND (vmode, scratch, op1);
9939 }
9940 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
9941
9942 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
9943 {
9944 dest = simplify_gen_subreg (vmode, op0, mode, 0);
9945 x = gen_rtx_AND (vmode, dest, nmask);
9946 }
9947 else /* alternative 3,4 */
9948 {
9949 gcc_assert (REGNO (nmask) == REGNO (dest));
9950 dest = nmask;
9951 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
9952 x = gen_rtx_AND (vmode, dest, op0);
9953 }
9954 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9955 }
9956
9957 x = gen_rtx_IOR (vmode, dest, scratch);
9958 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
9959 }
9960
9961 /* Return TRUE or FALSE depending on whether the first SET in INSN
9962 has source and destination with matching CC modes, and that the
9963 CC mode is at least as constrained as REQ_MODE. */
9964
9965 int
9966 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
9967 {
9968 rtx set;
9969 enum machine_mode set_mode;
9970
9971 set = PATTERN (insn);
9972 if (GET_CODE (set) == PARALLEL)
9973 set = XVECEXP (set, 0, 0);
9974 gcc_assert (GET_CODE (set) == SET);
9975 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
9976
9977 set_mode = GET_MODE (SET_DEST (set));
9978 switch (set_mode)
9979 {
9980 case CCNOmode:
9981 if (req_mode != CCNOmode
9982 && (req_mode != CCmode
9983 || XEXP (SET_SRC (set), 1) != const0_rtx))
9984 return 0;
9985 break;
9986 case CCmode:
9987 if (req_mode == CCGCmode)
9988 return 0;
9989 /* FALLTHRU */
9990 case CCGCmode:
9991 if (req_mode == CCGOCmode || req_mode == CCNOmode)
9992 return 0;
9993 /* FALLTHRU */
9994 case CCGOCmode:
9995 if (req_mode == CCZmode)
9996 return 0;
9997 /* FALLTHRU */
9998 case CCZmode:
9999 break;
10000
10001 default:
10002 gcc_unreachable ();
10003 }
10004
10005 return (GET_MODE (SET_SRC (set)) == set_mode);
10006 }
10007
10008 /* Generate insn patterns to do an integer compare of OPERANDS. */
10009
10010 static rtx
10011 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10012 {
10013 enum machine_mode cmpmode;
10014 rtx tmp, flags;
10015
10016 cmpmode = SELECT_CC_MODE (code, op0, op1);
10017 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10018
10019 /* This is very simple, but making the interface the same as in the
10020 FP case makes the rest of the code easier. */
10021 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10022 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10023
10024 /* Return the test that should be put into the flags user, i.e.
10025 the bcc, scc, or cmov instruction. */
10026 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10027 }
10028
10029 /* Figure out whether to use ordered or unordered fp comparisons.
10030 Return the appropriate mode to use. */
10031
10032 enum machine_mode
10033 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10034 {
10035 /* ??? In order to make all comparisons reversible, we do all comparisons
10036 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10037 all forms trapping and nontrapping comparisons, we can make inequality
10038 comparisons trapping again, since it results in better code when using
10039 FCOM based compares. */
10040 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10041 }
10042
10043 enum machine_mode
10044 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10045 {
10046 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10047 return ix86_fp_compare_mode (code);
10048 switch (code)
10049 {
10050 /* Only zero flag is needed. */
10051 case EQ: /* ZF=0 */
10052 case NE: /* ZF!=0 */
10053 return CCZmode;
10054 /* Codes needing carry flag. */
10055 case GEU: /* CF=0 */
10056 case GTU: /* CF=0 & ZF=0 */
10057 case LTU: /* CF=1 */
10058 case LEU: /* CF=1 | ZF=1 */
10059 return CCmode;
10060 /* Codes possibly doable only with sign flag when
10061 comparing against zero. */
10062 case GE: /* SF=OF or SF=0 */
10063 case LT: /* SF<>OF or SF=1 */
10064 if (op1 == const0_rtx)
10065 return CCGOCmode;
10066 else
10067 /* For other cases Carry flag is not required. */
10068 return CCGCmode;
10069 /* Codes doable only with sign flag when comparing
10070 against zero, but we miss jump instruction for it
10071 so we need to use relational tests against overflow
10072 that thus needs to be zero. */
10073 case GT: /* ZF=0 & SF=OF */
10074 case LE: /* ZF=1 | SF<>OF */
10075 if (op1 == const0_rtx)
10076 return CCNOmode;
10077 else
10078 return CCGCmode;
10079 /* strcmp pattern do (use flags) and combine may ask us for proper
10080 mode. */
10081 case USE:
10082 return CCmode;
10083 default:
10084 gcc_unreachable ();
10085 }
10086 }
10087
10088 /* Return the fixed registers used for condition codes. */
10089
10090 static bool
10091 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10092 {
10093 *p1 = FLAGS_REG;
10094 *p2 = FPSR_REG;
10095 return true;
10096 }
10097
10098 /* If two condition code modes are compatible, return a condition code
10099 mode which is compatible with both. Otherwise, return
10100 VOIDmode. */
10101
10102 static enum machine_mode
10103 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10104 {
10105 if (m1 == m2)
10106 return m1;
10107
10108 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10109 return VOIDmode;
10110
10111 if ((m1 == CCGCmode && m2 == CCGOCmode)
10112 || (m1 == CCGOCmode && m2 == CCGCmode))
10113 return CCGCmode;
10114
10115 switch (m1)
10116 {
10117 default:
10118 gcc_unreachable ();
10119
10120 case CCmode:
10121 case CCGCmode:
10122 case CCGOCmode:
10123 case CCNOmode:
10124 case CCZmode:
10125 switch (m2)
10126 {
10127 default:
10128 return VOIDmode;
10129
10130 case CCmode:
10131 case CCGCmode:
10132 case CCGOCmode:
10133 case CCNOmode:
10134 case CCZmode:
10135 return CCmode;
10136 }
10137
10138 case CCFPmode:
10139 case CCFPUmode:
10140 /* These are only compatible with themselves, which we already
10141 checked above. */
10142 return VOIDmode;
10143 }
10144 }
10145
10146 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10147
10148 int
10149 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10150 {
10151 enum rtx_code swapped_code = swap_condition (code);
10152 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10153 || (ix86_fp_comparison_cost (swapped_code)
10154 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10155 }
10156
10157 /* Swap, force into registers, or otherwise massage the two operands
10158 to a fp comparison. The operands are updated in place; the new
10159 comparison code is returned. */
10160
10161 static enum rtx_code
10162 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10163 {
10164 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10165 rtx op0 = *pop0, op1 = *pop1;
10166 enum machine_mode op_mode = GET_MODE (op0);
10167 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10168
10169 /* All of the unordered compare instructions only work on registers.
10170 The same is true of the fcomi compare instructions. The XFmode
10171 compare instructions require registers except when comparing
10172 against zero or when converting operand 1 from fixed point to
10173 floating point. */
10174
10175 if (!is_sse
10176 && (fpcmp_mode == CCFPUmode
10177 || (op_mode == XFmode
10178 && ! (standard_80387_constant_p (op0) == 1
10179 || standard_80387_constant_p (op1) == 1)
10180 && GET_CODE (op1) != FLOAT)
10181 || ix86_use_fcomi_compare (code)))
10182 {
10183 op0 = force_reg (op_mode, op0);
10184 op1 = force_reg (op_mode, op1);
10185 }
10186 else
10187 {
10188 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10189 things around if they appear profitable, otherwise force op0
10190 into a register. */
10191
10192 if (standard_80387_constant_p (op0) == 0
10193 || (GET_CODE (op0) == MEM
10194 && ! (standard_80387_constant_p (op1) == 0
10195 || GET_CODE (op1) == MEM)))
10196 {
10197 rtx tmp;
10198 tmp = op0, op0 = op1, op1 = tmp;
10199 code = swap_condition (code);
10200 }
10201
10202 if (GET_CODE (op0) != REG)
10203 op0 = force_reg (op_mode, op0);
10204
10205 if (CONSTANT_P (op1))
10206 {
10207 int tmp = standard_80387_constant_p (op1);
10208 if (tmp == 0)
10209 op1 = validize_mem (force_const_mem (op_mode, op1));
10210 else if (tmp == 1)
10211 {
10212 if (TARGET_CMOVE)
10213 op1 = force_reg (op_mode, op1);
10214 }
10215 else
10216 op1 = force_reg (op_mode, op1);
10217 }
10218 }
10219
10220 /* Try to rearrange the comparison to make it cheaper. */
10221 if (ix86_fp_comparison_cost (code)
10222 > ix86_fp_comparison_cost (swap_condition (code))
10223 && (GET_CODE (op1) == REG || !no_new_pseudos))
10224 {
10225 rtx tmp;
10226 tmp = op0, op0 = op1, op1 = tmp;
10227 code = swap_condition (code);
10228 if (GET_CODE (op0) != REG)
10229 op0 = force_reg (op_mode, op0);
10230 }
10231
10232 *pop0 = op0;
10233 *pop1 = op1;
10234 return code;
10235 }
10236
10237 /* Convert comparison codes we use to represent FP comparison to integer
10238 code that will result in proper branch. Return UNKNOWN if no such code
10239 is available. */
10240
10241 enum rtx_code
10242 ix86_fp_compare_code_to_integer (enum rtx_code code)
10243 {
10244 switch (code)
10245 {
10246 case GT:
10247 return GTU;
10248 case GE:
10249 return GEU;
10250 case ORDERED:
10251 case UNORDERED:
10252 return code;
10253 break;
10254 case UNEQ:
10255 return EQ;
10256 break;
10257 case UNLT:
10258 return LTU;
10259 break;
10260 case UNLE:
10261 return LEU;
10262 break;
10263 case LTGT:
10264 return NE;
10265 break;
10266 default:
10267 return UNKNOWN;
10268 }
10269 }
10270
10271 /* Split comparison code CODE into comparisons we can do using branch
10272 instructions. BYPASS_CODE is comparison code for branch that will
10273 branch around FIRST_CODE and SECOND_CODE. If some of branches
10274 is not required, set value to UNKNOWN.
10275 We never require more than two branches. */
10276
10277 void
10278 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10279 enum rtx_code *first_code,
10280 enum rtx_code *second_code)
10281 {
10282 *first_code = code;
10283 *bypass_code = UNKNOWN;
10284 *second_code = UNKNOWN;
10285
10286 /* The fcomi comparison sets flags as follows:
10287
10288 cmp ZF PF CF
10289 > 0 0 0
10290 < 0 0 1
10291 = 1 0 0
10292 un 1 1 1 */
10293
10294 switch (code)
10295 {
10296 case GT: /* GTU - CF=0 & ZF=0 */
10297 case GE: /* GEU - CF=0 */
10298 case ORDERED: /* PF=0 */
10299 case UNORDERED: /* PF=1 */
10300 case UNEQ: /* EQ - ZF=1 */
10301 case UNLT: /* LTU - CF=1 */
10302 case UNLE: /* LEU - CF=1 | ZF=1 */
10303 case LTGT: /* EQ - ZF=0 */
10304 break;
10305 case LT: /* LTU - CF=1 - fails on unordered */
10306 *first_code = UNLT;
10307 *bypass_code = UNORDERED;
10308 break;
10309 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10310 *first_code = UNLE;
10311 *bypass_code = UNORDERED;
10312 break;
10313 case EQ: /* EQ - ZF=1 - fails on unordered */
10314 *first_code = UNEQ;
10315 *bypass_code = UNORDERED;
10316 break;
10317 case NE: /* NE - ZF=0 - fails on unordered */
10318 *first_code = LTGT;
10319 *second_code = UNORDERED;
10320 break;
10321 case UNGE: /* GEU - CF=0 - fails on unordered */
10322 *first_code = GE;
10323 *second_code = UNORDERED;
10324 break;
10325 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10326 *first_code = GT;
10327 *second_code = UNORDERED;
10328 break;
10329 default:
10330 gcc_unreachable ();
10331 }
10332 if (!TARGET_IEEE_FP)
10333 {
10334 *second_code = UNKNOWN;
10335 *bypass_code = UNKNOWN;
10336 }
10337 }
10338
10339 /* Return cost of comparison done fcom + arithmetics operations on AX.
10340 All following functions do use number of instructions as a cost metrics.
10341 In future this should be tweaked to compute bytes for optimize_size and
10342 take into account performance of various instructions on various CPUs. */
10343 static int
10344 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10345 {
10346 if (!TARGET_IEEE_FP)
10347 return 4;
10348 /* The cost of code output by ix86_expand_fp_compare. */
10349 switch (code)
10350 {
10351 case UNLE:
10352 case UNLT:
10353 case LTGT:
10354 case GT:
10355 case GE:
10356 case UNORDERED:
10357 case ORDERED:
10358 case UNEQ:
10359 return 4;
10360 break;
10361 case LT:
10362 case NE:
10363 case EQ:
10364 case UNGE:
10365 return 5;
10366 break;
10367 case LE:
10368 case UNGT:
10369 return 6;
10370 break;
10371 default:
10372 gcc_unreachable ();
10373 }
10374 }
10375
10376 /* Return cost of comparison done using fcomi operation.
10377 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10378 static int
10379 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10380 {
10381 enum rtx_code bypass_code, first_code, second_code;
10382 /* Return arbitrarily high cost when instruction is not supported - this
10383 prevents gcc from using it. */
10384 if (!TARGET_CMOVE)
10385 return 1024;
10386 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10387 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10388 }
10389
10390 /* Return cost of comparison done using sahf operation.
10391 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10392 static int
10393 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10394 {
10395 enum rtx_code bypass_code, first_code, second_code;
10396 /* Return arbitrarily high cost when instruction is not preferred - this
10397 avoids gcc from using it. */
10398 if (!TARGET_USE_SAHF && !optimize_size)
10399 return 1024;
10400 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10401 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10402 }
10403
10404 /* Compute cost of the comparison done using any method.
10405 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10406 static int
10407 ix86_fp_comparison_cost (enum rtx_code code)
10408 {
10409 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10410 int min;
10411
10412 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10413 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10414
10415 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10416 if (min > sahf_cost)
10417 min = sahf_cost;
10418 if (min > fcomi_cost)
10419 min = fcomi_cost;
10420 return min;
10421 }
10422
10423 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10424
10425 static rtx
10426 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10427 rtx *second_test, rtx *bypass_test)
10428 {
10429 enum machine_mode fpcmp_mode, intcmp_mode;
10430 rtx tmp, tmp2;
10431 int cost = ix86_fp_comparison_cost (code);
10432 enum rtx_code bypass_code, first_code, second_code;
10433
10434 fpcmp_mode = ix86_fp_compare_mode (code);
10435 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10436
10437 if (second_test)
10438 *second_test = NULL_RTX;
10439 if (bypass_test)
10440 *bypass_test = NULL_RTX;
10441
10442 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10443
10444 /* Do fcomi/sahf based test when profitable. */
10445 if ((bypass_code == UNKNOWN || bypass_test)
10446 && (second_code == UNKNOWN || second_test)
10447 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10448 {
10449 if (TARGET_CMOVE)
10450 {
10451 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10452 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10453 tmp);
10454 emit_insn (tmp);
10455 }
10456 else
10457 {
10458 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10459 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10460 if (!scratch)
10461 scratch = gen_reg_rtx (HImode);
10462 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10463 emit_insn (gen_x86_sahf_1 (scratch));
10464 }
10465
10466 /* The FP codes work out to act like unsigned. */
10467 intcmp_mode = fpcmp_mode;
10468 code = first_code;
10469 if (bypass_code != UNKNOWN)
10470 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10471 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10472 const0_rtx);
10473 if (second_code != UNKNOWN)
10474 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10475 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10476 const0_rtx);
10477 }
10478 else
10479 {
10480 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10481 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10482 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10483 if (!scratch)
10484 scratch = gen_reg_rtx (HImode);
10485 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10486
10487 /* In the unordered case, we have to check C2 for NaN's, which
10488 doesn't happen to work out to anything nice combination-wise.
10489 So do some bit twiddling on the value we've got in AH to come
10490 up with an appropriate set of condition codes. */
10491
10492 intcmp_mode = CCNOmode;
10493 switch (code)
10494 {
10495 case GT:
10496 case UNGT:
10497 if (code == GT || !TARGET_IEEE_FP)
10498 {
10499 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10500 code = EQ;
10501 }
10502 else
10503 {
10504 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10505 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10506 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10507 intcmp_mode = CCmode;
10508 code = GEU;
10509 }
10510 break;
10511 case LT:
10512 case UNLT:
10513 if (code == LT && TARGET_IEEE_FP)
10514 {
10515 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10516 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10517 intcmp_mode = CCmode;
10518 code = EQ;
10519 }
10520 else
10521 {
10522 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10523 code = NE;
10524 }
10525 break;
10526 case GE:
10527 case UNGE:
10528 if (code == GE || !TARGET_IEEE_FP)
10529 {
10530 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10531 code = EQ;
10532 }
10533 else
10534 {
10535 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10536 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10537 GEN_INT (0x01)));
10538 code = NE;
10539 }
10540 break;
10541 case LE:
10542 case UNLE:
10543 if (code == LE && TARGET_IEEE_FP)
10544 {
10545 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10546 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10547 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10548 intcmp_mode = CCmode;
10549 code = LTU;
10550 }
10551 else
10552 {
10553 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10554 code = NE;
10555 }
10556 break;
10557 case EQ:
10558 case UNEQ:
10559 if (code == EQ && TARGET_IEEE_FP)
10560 {
10561 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10562 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10563 intcmp_mode = CCmode;
10564 code = EQ;
10565 }
10566 else
10567 {
10568 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10569 code = NE;
10570 break;
10571 }
10572 break;
10573 case NE:
10574 case LTGT:
10575 if (code == NE && TARGET_IEEE_FP)
10576 {
10577 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10578 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10579 GEN_INT (0x40)));
10580 code = NE;
10581 }
10582 else
10583 {
10584 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10585 code = EQ;
10586 }
10587 break;
10588
10589 case UNORDERED:
10590 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10591 code = NE;
10592 break;
10593 case ORDERED:
10594 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10595 code = EQ;
10596 break;
10597
10598 default:
10599 gcc_unreachable ();
10600 }
10601 }
10602
10603 /* Return the test that should be put into the flags user, i.e.
10604 the bcc, scc, or cmov instruction. */
10605 return gen_rtx_fmt_ee (code, VOIDmode,
10606 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10607 const0_rtx);
10608 }
10609
10610 rtx
10611 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10612 {
10613 rtx op0, op1, ret;
10614 op0 = ix86_compare_op0;
10615 op1 = ix86_compare_op1;
10616
10617 if (second_test)
10618 *second_test = NULL_RTX;
10619 if (bypass_test)
10620 *bypass_test = NULL_RTX;
10621
10622 if (ix86_compare_emitted)
10623 {
10624 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10625 ix86_compare_emitted = NULL_RTX;
10626 }
10627 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10628 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10629 second_test, bypass_test);
10630 else
10631 ret = ix86_expand_int_compare (code, op0, op1);
10632
10633 return ret;
10634 }
10635
10636 /* Return true if the CODE will result in nontrivial jump sequence. */
10637 bool
10638 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10639 {
10640 enum rtx_code bypass_code, first_code, second_code;
10641 if (!TARGET_CMOVE)
10642 return true;
10643 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10644 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10645 }
10646
10647 void
10648 ix86_expand_branch (enum rtx_code code, rtx label)
10649 {
10650 rtx tmp;
10651
10652 /* If we have emitted a compare insn, go straight to simple.
10653 ix86_expand_compare won't emit anything if ix86_compare_emitted
10654 is non NULL. */
10655 if (ix86_compare_emitted)
10656 goto simple;
10657
10658 switch (GET_MODE (ix86_compare_op0))
10659 {
10660 case QImode:
10661 case HImode:
10662 case SImode:
10663 simple:
10664 tmp = ix86_expand_compare (code, NULL, NULL);
10665 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10666 gen_rtx_LABEL_REF (VOIDmode, label),
10667 pc_rtx);
10668 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
10669 return;
10670
10671 case SFmode:
10672 case DFmode:
10673 case XFmode:
10674 {
10675 rtvec vec;
10676 int use_fcomi;
10677 enum rtx_code bypass_code, first_code, second_code;
10678
10679 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
10680 &ix86_compare_op1);
10681
10682 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10683
10684 /* Check whether we will use the natural sequence with one jump. If
10685 so, we can expand jump early. Otherwise delay expansion by
10686 creating compound insn to not confuse optimizers. */
10687 if (bypass_code == UNKNOWN && second_code == UNKNOWN
10688 && TARGET_CMOVE)
10689 {
10690 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
10691 gen_rtx_LABEL_REF (VOIDmode, label),
10692 pc_rtx, NULL_RTX, NULL_RTX);
10693 }
10694 else
10695 {
10696 tmp = gen_rtx_fmt_ee (code, VOIDmode,
10697 ix86_compare_op0, ix86_compare_op1);
10698 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
10699 gen_rtx_LABEL_REF (VOIDmode, label),
10700 pc_rtx);
10701 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
10702
10703 use_fcomi = ix86_use_fcomi_compare (code);
10704 vec = rtvec_alloc (3 + !use_fcomi);
10705 RTVEC_ELT (vec, 0) = tmp;
10706 RTVEC_ELT (vec, 1)
10707 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
10708 RTVEC_ELT (vec, 2)
10709 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
10710 if (! use_fcomi)
10711 RTVEC_ELT (vec, 3)
10712 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
10713
10714 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
10715 }
10716 return;
10717 }
10718
10719 case DImode:
10720 if (TARGET_64BIT)
10721 goto simple;
10722 case TImode:
10723 /* Expand DImode branch into multiple compare+branch. */
10724 {
10725 rtx lo[2], hi[2], label2;
10726 enum rtx_code code1, code2, code3;
10727 enum machine_mode submode;
10728
10729 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
10730 {
10731 tmp = ix86_compare_op0;
10732 ix86_compare_op0 = ix86_compare_op1;
10733 ix86_compare_op1 = tmp;
10734 code = swap_condition (code);
10735 }
10736 if (GET_MODE (ix86_compare_op0) == DImode)
10737 {
10738 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
10739 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
10740 submode = SImode;
10741 }
10742 else
10743 {
10744 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
10745 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
10746 submode = DImode;
10747 }
10748
10749 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
10750 avoid two branches. This costs one extra insn, so disable when
10751 optimizing for size. */
10752
10753 if ((code == EQ || code == NE)
10754 && (!optimize_size
10755 || hi[1] == const0_rtx || lo[1] == const0_rtx))
10756 {
10757 rtx xor0, xor1;
10758
10759 xor1 = hi[0];
10760 if (hi[1] != const0_rtx)
10761 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
10762 NULL_RTX, 0, OPTAB_WIDEN);
10763
10764 xor0 = lo[0];
10765 if (lo[1] != const0_rtx)
10766 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
10767 NULL_RTX, 0, OPTAB_WIDEN);
10768
10769 tmp = expand_binop (submode, ior_optab, xor1, xor0,
10770 NULL_RTX, 0, OPTAB_WIDEN);
10771
10772 ix86_compare_op0 = tmp;
10773 ix86_compare_op1 = const0_rtx;
10774 ix86_expand_branch (code, label);
10775 return;
10776 }
10777
10778 /* Otherwise, if we are doing less-than or greater-or-equal-than,
10779 op1 is a constant and the low word is zero, then we can just
10780 examine the high word. */
10781
10782 if (GET_CODE (hi[1]) == CONST_INT && lo[1] == const0_rtx)
10783 switch (code)
10784 {
10785 case LT: case LTU: case GE: case GEU:
10786 ix86_compare_op0 = hi[0];
10787 ix86_compare_op1 = hi[1];
10788 ix86_expand_branch (code, label);
10789 return;
10790 default:
10791 break;
10792 }
10793
10794 /* Otherwise, we need two or three jumps. */
10795
10796 label2 = gen_label_rtx ();
10797
10798 code1 = code;
10799 code2 = swap_condition (code);
10800 code3 = unsigned_condition (code);
10801
10802 switch (code)
10803 {
10804 case LT: case GT: case LTU: case GTU:
10805 break;
10806
10807 case LE: code1 = LT; code2 = GT; break;
10808 case GE: code1 = GT; code2 = LT; break;
10809 case LEU: code1 = LTU; code2 = GTU; break;
10810 case GEU: code1 = GTU; code2 = LTU; break;
10811
10812 case EQ: code1 = UNKNOWN; code2 = NE; break;
10813 case NE: code2 = UNKNOWN; break;
10814
10815 default:
10816 gcc_unreachable ();
10817 }
10818
10819 /*
10820 * a < b =>
10821 * if (hi(a) < hi(b)) goto true;
10822 * if (hi(a) > hi(b)) goto false;
10823 * if (lo(a) < lo(b)) goto true;
10824 * false:
10825 */
10826
10827 ix86_compare_op0 = hi[0];
10828 ix86_compare_op1 = hi[1];
10829
10830 if (code1 != UNKNOWN)
10831 ix86_expand_branch (code1, label);
10832 if (code2 != UNKNOWN)
10833 ix86_expand_branch (code2, label2);
10834
10835 ix86_compare_op0 = lo[0];
10836 ix86_compare_op1 = lo[1];
10837 ix86_expand_branch (code3, label);
10838
10839 if (code2 != UNKNOWN)
10840 emit_label (label2);
10841 return;
10842 }
10843
10844 default:
10845 gcc_unreachable ();
10846 }
10847 }
10848
10849 /* Split branch based on floating point condition. */
10850 void
10851 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
10852 rtx target1, rtx target2, rtx tmp, rtx pushed)
10853 {
10854 rtx second, bypass;
10855 rtx label = NULL_RTX;
10856 rtx condition;
10857 int bypass_probability = -1, second_probability = -1, probability = -1;
10858 rtx i;
10859
10860 if (target2 != pc_rtx)
10861 {
10862 rtx tmp = target2;
10863 code = reverse_condition_maybe_unordered (code);
10864 target2 = target1;
10865 target1 = tmp;
10866 }
10867
10868 condition = ix86_expand_fp_compare (code, op1, op2,
10869 tmp, &second, &bypass);
10870
10871 /* Remove pushed operand from stack. */
10872 if (pushed)
10873 ix86_free_from_memory (GET_MODE (pushed));
10874
10875 if (split_branch_probability >= 0)
10876 {
10877 /* Distribute the probabilities across the jumps.
10878 Assume the BYPASS and SECOND to be always test
10879 for UNORDERED. */
10880 probability = split_branch_probability;
10881
10882 /* Value of 1 is low enough to make no need for probability
10883 to be updated. Later we may run some experiments and see
10884 if unordered values are more frequent in practice. */
10885 if (bypass)
10886 bypass_probability = 1;
10887 if (second)
10888 second_probability = 1;
10889 }
10890 if (bypass != NULL_RTX)
10891 {
10892 label = gen_label_rtx ();
10893 i = emit_jump_insn (gen_rtx_SET
10894 (VOIDmode, pc_rtx,
10895 gen_rtx_IF_THEN_ELSE (VOIDmode,
10896 bypass,
10897 gen_rtx_LABEL_REF (VOIDmode,
10898 label),
10899 pc_rtx)));
10900 if (bypass_probability >= 0)
10901 REG_NOTES (i)
10902 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10903 GEN_INT (bypass_probability),
10904 REG_NOTES (i));
10905 }
10906 i = emit_jump_insn (gen_rtx_SET
10907 (VOIDmode, pc_rtx,
10908 gen_rtx_IF_THEN_ELSE (VOIDmode,
10909 condition, target1, target2)));
10910 if (probability >= 0)
10911 REG_NOTES (i)
10912 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10913 GEN_INT (probability),
10914 REG_NOTES (i));
10915 if (second != NULL_RTX)
10916 {
10917 i = emit_jump_insn (gen_rtx_SET
10918 (VOIDmode, pc_rtx,
10919 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
10920 target2)));
10921 if (second_probability >= 0)
10922 REG_NOTES (i)
10923 = gen_rtx_EXPR_LIST (REG_BR_PROB,
10924 GEN_INT (second_probability),
10925 REG_NOTES (i));
10926 }
10927 if (label != NULL_RTX)
10928 emit_label (label);
10929 }
10930
10931 int
10932 ix86_expand_setcc (enum rtx_code code, rtx dest)
10933 {
10934 rtx ret, tmp, tmpreg, equiv;
10935 rtx second_test, bypass_test;
10936
10937 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
10938 return 0; /* FAIL */
10939
10940 gcc_assert (GET_MODE (dest) == QImode);
10941
10942 ret = ix86_expand_compare (code, &second_test, &bypass_test);
10943 PUT_MODE (ret, QImode);
10944
10945 tmp = dest;
10946 tmpreg = dest;
10947
10948 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
10949 if (bypass_test || second_test)
10950 {
10951 rtx test = second_test;
10952 int bypass = 0;
10953 rtx tmp2 = gen_reg_rtx (QImode);
10954 if (bypass_test)
10955 {
10956 gcc_assert (!second_test);
10957 test = bypass_test;
10958 bypass = 1;
10959 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
10960 }
10961 PUT_MODE (test, QImode);
10962 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
10963
10964 if (bypass)
10965 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
10966 else
10967 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
10968 }
10969
10970 /* Attach a REG_EQUAL note describing the comparison result. */
10971 if (ix86_compare_op0 && ix86_compare_op1)
10972 {
10973 equiv = simplify_gen_relational (code, QImode,
10974 GET_MODE (ix86_compare_op0),
10975 ix86_compare_op0, ix86_compare_op1);
10976 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
10977 }
10978
10979 return 1; /* DONE */
10980 }
10981
10982 /* Expand comparison setting or clearing carry flag. Return true when
10983 successful and set pop for the operation. */
10984 static bool
10985 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
10986 {
10987 enum machine_mode mode =
10988 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
10989
10990 /* Do not handle DImode compares that go through special path. Also we can't
10991 deal with FP compares yet. This is possible to add. */
10992 if (mode == (TARGET_64BIT ? TImode : DImode))
10993 return false;
10994 if (FLOAT_MODE_P (mode))
10995 {
10996 rtx second_test = NULL, bypass_test = NULL;
10997 rtx compare_op, compare_seq;
10998
10999 /* Shortcut: following common codes never translate into carry flag compares. */
11000 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11001 || code == ORDERED || code == UNORDERED)
11002 return false;
11003
11004 /* These comparisons require zero flag; swap operands so they won't. */
11005 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11006 && !TARGET_IEEE_FP)
11007 {
11008 rtx tmp = op0;
11009 op0 = op1;
11010 op1 = tmp;
11011 code = swap_condition (code);
11012 }
11013
11014 /* Try to expand the comparison and verify that we end up with carry flag
11015 based comparison. This is fails to be true only when we decide to expand
11016 comparison using arithmetic that is not too common scenario. */
11017 start_sequence ();
11018 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11019 &second_test, &bypass_test);
11020 compare_seq = get_insns ();
11021 end_sequence ();
11022
11023 if (second_test || bypass_test)
11024 return false;
11025 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11026 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11027 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11028 else
11029 code = GET_CODE (compare_op);
11030 if (code != LTU && code != GEU)
11031 return false;
11032 emit_insn (compare_seq);
11033 *pop = compare_op;
11034 return true;
11035 }
11036 if (!INTEGRAL_MODE_P (mode))
11037 return false;
11038 switch (code)
11039 {
11040 case LTU:
11041 case GEU:
11042 break;
11043
11044 /* Convert a==0 into (unsigned)a<1. */
11045 case EQ:
11046 case NE:
11047 if (op1 != const0_rtx)
11048 return false;
11049 op1 = const1_rtx;
11050 code = (code == EQ ? LTU : GEU);
11051 break;
11052
11053 /* Convert a>b into b<a or a>=b-1. */
11054 case GTU:
11055 case LEU:
11056 if (GET_CODE (op1) == CONST_INT)
11057 {
11058 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11059 /* Bail out on overflow. We still can swap operands but that
11060 would force loading of the constant into register. */
11061 if (op1 == const0_rtx
11062 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11063 return false;
11064 code = (code == GTU ? GEU : LTU);
11065 }
11066 else
11067 {
11068 rtx tmp = op1;
11069 op1 = op0;
11070 op0 = tmp;
11071 code = (code == GTU ? LTU : GEU);
11072 }
11073 break;
11074
11075 /* Convert a>=0 into (unsigned)a<0x80000000. */
11076 case LT:
11077 case GE:
11078 if (mode == DImode || op1 != const0_rtx)
11079 return false;
11080 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11081 code = (code == LT ? GEU : LTU);
11082 break;
11083 case LE:
11084 case GT:
11085 if (mode == DImode || op1 != constm1_rtx)
11086 return false;
11087 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11088 code = (code == LE ? GEU : LTU);
11089 break;
11090
11091 default:
11092 return false;
11093 }
11094 /* Swapping operands may cause constant to appear as first operand. */
11095 if (!nonimmediate_operand (op0, VOIDmode))
11096 {
11097 if (no_new_pseudos)
11098 return false;
11099 op0 = force_reg (mode, op0);
11100 }
11101 ix86_compare_op0 = op0;
11102 ix86_compare_op1 = op1;
11103 *pop = ix86_expand_compare (code, NULL, NULL);
11104 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11105 return true;
11106 }
11107
11108 int
11109 ix86_expand_int_movcc (rtx operands[])
11110 {
11111 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11112 rtx compare_seq, compare_op;
11113 rtx second_test, bypass_test;
11114 enum machine_mode mode = GET_MODE (operands[0]);
11115 bool sign_bit_compare_p = false;;
11116
11117 start_sequence ();
11118 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11119 compare_seq = get_insns ();
11120 end_sequence ();
11121
11122 compare_code = GET_CODE (compare_op);
11123
11124 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11125 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11126 sign_bit_compare_p = true;
11127
11128 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11129 HImode insns, we'd be swallowed in word prefix ops. */
11130
11131 if ((mode != HImode || TARGET_FAST_PREFIX)
11132 && (mode != (TARGET_64BIT ? TImode : DImode))
11133 && GET_CODE (operands[2]) == CONST_INT
11134 && GET_CODE (operands[3]) == CONST_INT)
11135 {
11136 rtx out = operands[0];
11137 HOST_WIDE_INT ct = INTVAL (operands[2]);
11138 HOST_WIDE_INT cf = INTVAL (operands[3]);
11139 HOST_WIDE_INT diff;
11140
11141 diff = ct - cf;
11142 /* Sign bit compares are better done using shifts than we do by using
11143 sbb. */
11144 if (sign_bit_compare_p
11145 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11146 ix86_compare_op1, &compare_op))
11147 {
11148 /* Detect overlap between destination and compare sources. */
11149 rtx tmp = out;
11150
11151 if (!sign_bit_compare_p)
11152 {
11153 bool fpcmp = false;
11154
11155 compare_code = GET_CODE (compare_op);
11156
11157 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11158 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11159 {
11160 fpcmp = true;
11161 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11162 }
11163
11164 /* To simplify rest of code, restrict to the GEU case. */
11165 if (compare_code == LTU)
11166 {
11167 HOST_WIDE_INT tmp = ct;
11168 ct = cf;
11169 cf = tmp;
11170 compare_code = reverse_condition (compare_code);
11171 code = reverse_condition (code);
11172 }
11173 else
11174 {
11175 if (fpcmp)
11176 PUT_CODE (compare_op,
11177 reverse_condition_maybe_unordered
11178 (GET_CODE (compare_op)));
11179 else
11180 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11181 }
11182 diff = ct - cf;
11183
11184 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11185 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11186 tmp = gen_reg_rtx (mode);
11187
11188 if (mode == DImode)
11189 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11190 else
11191 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11192 }
11193 else
11194 {
11195 if (code == GT || code == GE)
11196 code = reverse_condition (code);
11197 else
11198 {
11199 HOST_WIDE_INT tmp = ct;
11200 ct = cf;
11201 cf = tmp;
11202 diff = ct - cf;
11203 }
11204 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11205 ix86_compare_op1, VOIDmode, 0, -1);
11206 }
11207
11208 if (diff == 1)
11209 {
11210 /*
11211 * cmpl op0,op1
11212 * sbbl dest,dest
11213 * [addl dest, ct]
11214 *
11215 * Size 5 - 8.
11216 */
11217 if (ct)
11218 tmp = expand_simple_binop (mode, PLUS,
11219 tmp, GEN_INT (ct),
11220 copy_rtx (tmp), 1, OPTAB_DIRECT);
11221 }
11222 else if (cf == -1)
11223 {
11224 /*
11225 * cmpl op0,op1
11226 * sbbl dest,dest
11227 * orl $ct, dest
11228 *
11229 * Size 8.
11230 */
11231 tmp = expand_simple_binop (mode, IOR,
11232 tmp, GEN_INT (ct),
11233 copy_rtx (tmp), 1, OPTAB_DIRECT);
11234 }
11235 else if (diff == -1 && ct)
11236 {
11237 /*
11238 * cmpl op0,op1
11239 * sbbl dest,dest
11240 * notl dest
11241 * [addl dest, cf]
11242 *
11243 * Size 8 - 11.
11244 */
11245 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11246 if (cf)
11247 tmp = expand_simple_binop (mode, PLUS,
11248 copy_rtx (tmp), GEN_INT (cf),
11249 copy_rtx (tmp), 1, OPTAB_DIRECT);
11250 }
11251 else
11252 {
11253 /*
11254 * cmpl op0,op1
11255 * sbbl dest,dest
11256 * [notl dest]
11257 * andl cf - ct, dest
11258 * [addl dest, ct]
11259 *
11260 * Size 8 - 11.
11261 */
11262
11263 if (cf == 0)
11264 {
11265 cf = ct;
11266 ct = 0;
11267 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11268 }
11269
11270 tmp = expand_simple_binop (mode, AND,
11271 copy_rtx (tmp),
11272 gen_int_mode (cf - ct, mode),
11273 copy_rtx (tmp), 1, OPTAB_DIRECT);
11274 if (ct)
11275 tmp = expand_simple_binop (mode, PLUS,
11276 copy_rtx (tmp), GEN_INT (ct),
11277 copy_rtx (tmp), 1, OPTAB_DIRECT);
11278 }
11279
11280 if (!rtx_equal_p (tmp, out))
11281 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11282
11283 return 1; /* DONE */
11284 }
11285
11286 if (diff < 0)
11287 {
11288 HOST_WIDE_INT tmp;
11289 tmp = ct, ct = cf, cf = tmp;
11290 diff = -diff;
11291 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11292 {
11293 /* We may be reversing unordered compare to normal compare, that
11294 is not valid in general (we may convert non-trapping condition
11295 to trapping one), however on i386 we currently emit all
11296 comparisons unordered. */
11297 compare_code = reverse_condition_maybe_unordered (compare_code);
11298 code = reverse_condition_maybe_unordered (code);
11299 }
11300 else
11301 {
11302 compare_code = reverse_condition (compare_code);
11303 code = reverse_condition (code);
11304 }
11305 }
11306
11307 compare_code = UNKNOWN;
11308 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11309 && GET_CODE (ix86_compare_op1) == CONST_INT)
11310 {
11311 if (ix86_compare_op1 == const0_rtx
11312 && (code == LT || code == GE))
11313 compare_code = code;
11314 else if (ix86_compare_op1 == constm1_rtx)
11315 {
11316 if (code == LE)
11317 compare_code = LT;
11318 else if (code == GT)
11319 compare_code = GE;
11320 }
11321 }
11322
11323 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11324 if (compare_code != UNKNOWN
11325 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11326 && (cf == -1 || ct == -1))
11327 {
11328 /* If lea code below could be used, only optimize
11329 if it results in a 2 insn sequence. */
11330
11331 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11332 || diff == 3 || diff == 5 || diff == 9)
11333 || (compare_code == LT && ct == -1)
11334 || (compare_code == GE && cf == -1))
11335 {
11336 /*
11337 * notl op1 (if necessary)
11338 * sarl $31, op1
11339 * orl cf, op1
11340 */
11341 if (ct != -1)
11342 {
11343 cf = ct;
11344 ct = -1;
11345 code = reverse_condition (code);
11346 }
11347
11348 out = emit_store_flag (out, code, ix86_compare_op0,
11349 ix86_compare_op1, VOIDmode, 0, -1);
11350
11351 out = expand_simple_binop (mode, IOR,
11352 out, GEN_INT (cf),
11353 out, 1, OPTAB_DIRECT);
11354 if (out != operands[0])
11355 emit_move_insn (operands[0], out);
11356
11357 return 1; /* DONE */
11358 }
11359 }
11360
11361
11362 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11363 || diff == 3 || diff == 5 || diff == 9)
11364 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11365 && (mode != DImode
11366 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11367 {
11368 /*
11369 * xorl dest,dest
11370 * cmpl op1,op2
11371 * setcc dest
11372 * lea cf(dest*(ct-cf)),dest
11373 *
11374 * Size 14.
11375 *
11376 * This also catches the degenerate setcc-only case.
11377 */
11378
11379 rtx tmp;
11380 int nops;
11381
11382 out = emit_store_flag (out, code, ix86_compare_op0,
11383 ix86_compare_op1, VOIDmode, 0, 1);
11384
11385 nops = 0;
11386 /* On x86_64 the lea instruction operates on Pmode, so we need
11387 to get arithmetics done in proper mode to match. */
11388 if (diff == 1)
11389 tmp = copy_rtx (out);
11390 else
11391 {
11392 rtx out1;
11393 out1 = copy_rtx (out);
11394 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11395 nops++;
11396 if (diff & 1)
11397 {
11398 tmp = gen_rtx_PLUS (mode, tmp, out1);
11399 nops++;
11400 }
11401 }
11402 if (cf != 0)
11403 {
11404 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11405 nops++;
11406 }
11407 if (!rtx_equal_p (tmp, out))
11408 {
11409 if (nops == 1)
11410 out = force_operand (tmp, copy_rtx (out));
11411 else
11412 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11413 }
11414 if (!rtx_equal_p (out, operands[0]))
11415 emit_move_insn (operands[0], copy_rtx (out));
11416
11417 return 1; /* DONE */
11418 }
11419
11420 /*
11421 * General case: Jumpful:
11422 * xorl dest,dest cmpl op1, op2
11423 * cmpl op1, op2 movl ct, dest
11424 * setcc dest jcc 1f
11425 * decl dest movl cf, dest
11426 * andl (cf-ct),dest 1:
11427 * addl ct,dest
11428 *
11429 * Size 20. Size 14.
11430 *
11431 * This is reasonably steep, but branch mispredict costs are
11432 * high on modern cpus, so consider failing only if optimizing
11433 * for space.
11434 */
11435
11436 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11437 && BRANCH_COST >= 2)
11438 {
11439 if (cf == 0)
11440 {
11441 cf = ct;
11442 ct = 0;
11443 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11444 /* We may be reversing unordered compare to normal compare,
11445 that is not valid in general (we may convert non-trapping
11446 condition to trapping one), however on i386 we currently
11447 emit all comparisons unordered. */
11448 code = reverse_condition_maybe_unordered (code);
11449 else
11450 {
11451 code = reverse_condition (code);
11452 if (compare_code != UNKNOWN)
11453 compare_code = reverse_condition (compare_code);
11454 }
11455 }
11456
11457 if (compare_code != UNKNOWN)
11458 {
11459 /* notl op1 (if needed)
11460 sarl $31, op1
11461 andl (cf-ct), op1
11462 addl ct, op1
11463
11464 For x < 0 (resp. x <= -1) there will be no notl,
11465 so if possible swap the constants to get rid of the
11466 complement.
11467 True/false will be -1/0 while code below (store flag
11468 followed by decrement) is 0/-1, so the constants need
11469 to be exchanged once more. */
11470
11471 if (compare_code == GE || !cf)
11472 {
11473 code = reverse_condition (code);
11474 compare_code = LT;
11475 }
11476 else
11477 {
11478 HOST_WIDE_INT tmp = cf;
11479 cf = ct;
11480 ct = tmp;
11481 }
11482
11483 out = emit_store_flag (out, code, ix86_compare_op0,
11484 ix86_compare_op1, VOIDmode, 0, -1);
11485 }
11486 else
11487 {
11488 out = emit_store_flag (out, code, ix86_compare_op0,
11489 ix86_compare_op1, VOIDmode, 0, 1);
11490
11491 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11492 copy_rtx (out), 1, OPTAB_DIRECT);
11493 }
11494
11495 out = expand_simple_binop (mode, AND, copy_rtx (out),
11496 gen_int_mode (cf - ct, mode),
11497 copy_rtx (out), 1, OPTAB_DIRECT);
11498 if (ct)
11499 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11500 copy_rtx (out), 1, OPTAB_DIRECT);
11501 if (!rtx_equal_p (out, operands[0]))
11502 emit_move_insn (operands[0], copy_rtx (out));
11503
11504 return 1; /* DONE */
11505 }
11506 }
11507
11508 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11509 {
11510 /* Try a few things more with specific constants and a variable. */
11511
11512 optab op;
11513 rtx var, orig_out, out, tmp;
11514
11515 if (BRANCH_COST <= 2)
11516 return 0; /* FAIL */
11517
11518 /* If one of the two operands is an interesting constant, load a
11519 constant with the above and mask it in with a logical operation. */
11520
11521 if (GET_CODE (operands[2]) == CONST_INT)
11522 {
11523 var = operands[3];
11524 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11525 operands[3] = constm1_rtx, op = and_optab;
11526 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11527 operands[3] = const0_rtx, op = ior_optab;
11528 else
11529 return 0; /* FAIL */
11530 }
11531 else if (GET_CODE (operands[3]) == CONST_INT)
11532 {
11533 var = operands[2];
11534 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11535 operands[2] = constm1_rtx, op = and_optab;
11536 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11537 operands[2] = const0_rtx, op = ior_optab;
11538 else
11539 return 0; /* FAIL */
11540 }
11541 else
11542 return 0; /* FAIL */
11543
11544 orig_out = operands[0];
11545 tmp = gen_reg_rtx (mode);
11546 operands[0] = tmp;
11547
11548 /* Recurse to get the constant loaded. */
11549 if (ix86_expand_int_movcc (operands) == 0)
11550 return 0; /* FAIL */
11551
11552 /* Mask in the interesting variable. */
11553 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11554 OPTAB_WIDEN);
11555 if (!rtx_equal_p (out, orig_out))
11556 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11557
11558 return 1; /* DONE */
11559 }
11560
11561 /*
11562 * For comparison with above,
11563 *
11564 * movl cf,dest
11565 * movl ct,tmp
11566 * cmpl op1,op2
11567 * cmovcc tmp,dest
11568 *
11569 * Size 15.
11570 */
11571
11572 if (! nonimmediate_operand (operands[2], mode))
11573 operands[2] = force_reg (mode, operands[2]);
11574 if (! nonimmediate_operand (operands[3], mode))
11575 operands[3] = force_reg (mode, operands[3]);
11576
11577 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11578 {
11579 rtx tmp = gen_reg_rtx (mode);
11580 emit_move_insn (tmp, operands[3]);
11581 operands[3] = tmp;
11582 }
11583 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11584 {
11585 rtx tmp = gen_reg_rtx (mode);
11586 emit_move_insn (tmp, operands[2]);
11587 operands[2] = tmp;
11588 }
11589
11590 if (! register_operand (operands[2], VOIDmode)
11591 && (mode == QImode
11592 || ! register_operand (operands[3], VOIDmode)))
11593 operands[2] = force_reg (mode, operands[2]);
11594
11595 if (mode == QImode
11596 && ! register_operand (operands[3], VOIDmode))
11597 operands[3] = force_reg (mode, operands[3]);
11598
11599 emit_insn (compare_seq);
11600 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11601 gen_rtx_IF_THEN_ELSE (mode,
11602 compare_op, operands[2],
11603 operands[3])));
11604 if (bypass_test)
11605 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11606 gen_rtx_IF_THEN_ELSE (mode,
11607 bypass_test,
11608 copy_rtx (operands[3]),
11609 copy_rtx (operands[0]))));
11610 if (second_test)
11611 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11612 gen_rtx_IF_THEN_ELSE (mode,
11613 second_test,
11614 copy_rtx (operands[2]),
11615 copy_rtx (operands[0]))));
11616
11617 return 1; /* DONE */
11618 }
11619
11620 /* Swap, force into registers, or otherwise massage the two operands
11621 to an sse comparison with a mask result. Thus we differ a bit from
11622 ix86_prepare_fp_compare_args which expects to produce a flags result.
11623
11624 The DEST operand exists to help determine whether to commute commutative
11625 operators. The POP0/POP1 operands are updated in place. The new
11626 comparison code is returned, or UNKNOWN if not implementable. */
11627
11628 static enum rtx_code
11629 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11630 rtx *pop0, rtx *pop1)
11631 {
11632 rtx tmp;
11633
11634 switch (code)
11635 {
11636 case LTGT:
11637 case UNEQ:
11638 /* We have no LTGT as an operator. We could implement it with
11639 NE & ORDERED, but this requires an extra temporary. It's
11640 not clear that it's worth it. */
11641 return UNKNOWN;
11642
11643 case LT:
11644 case LE:
11645 case UNGT:
11646 case UNGE:
11647 /* These are supported directly. */
11648 break;
11649
11650 case EQ:
11651 case NE:
11652 case UNORDERED:
11653 case ORDERED:
11654 /* For commutative operators, try to canonicalize the destination
11655 operand to be first in the comparison - this helps reload to
11656 avoid extra moves. */
11657 if (!dest || !rtx_equal_p (dest, *pop1))
11658 break;
11659 /* FALLTHRU */
11660
11661 case GE:
11662 case GT:
11663 case UNLE:
11664 case UNLT:
11665 /* These are not supported directly. Swap the comparison operands
11666 to transform into something that is supported. */
11667 tmp = *pop0;
11668 *pop0 = *pop1;
11669 *pop1 = tmp;
11670 code = swap_condition (code);
11671 break;
11672
11673 default:
11674 gcc_unreachable ();
11675 }
11676
11677 return code;
11678 }
11679
11680 /* Detect conditional moves that exactly match min/max operational
11681 semantics. Note that this is IEEE safe, as long as we don't
11682 interchange the operands.
11683
11684 Returns FALSE if this conditional move doesn't match a MIN/MAX,
11685 and TRUE if the operation is successful and instructions are emitted. */
11686
11687 static bool
11688 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
11689 rtx cmp_op1, rtx if_true, rtx if_false)
11690 {
11691 enum machine_mode mode;
11692 bool is_min;
11693 rtx tmp;
11694
11695 if (code == LT)
11696 ;
11697 else if (code == UNGE)
11698 {
11699 tmp = if_true;
11700 if_true = if_false;
11701 if_false = tmp;
11702 }
11703 else
11704 return false;
11705
11706 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
11707 is_min = true;
11708 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
11709 is_min = false;
11710 else
11711 return false;
11712
11713 mode = GET_MODE (dest);
11714
11715 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
11716 but MODE may be a vector mode and thus not appropriate. */
11717 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
11718 {
11719 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
11720 rtvec v;
11721
11722 if_true = force_reg (mode, if_true);
11723 v = gen_rtvec (2, if_true, if_false);
11724 tmp = gen_rtx_UNSPEC (mode, v, u);
11725 }
11726 else
11727 {
11728 code = is_min ? SMIN : SMAX;
11729 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
11730 }
11731
11732 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
11733 return true;
11734 }
11735
11736 /* Expand an sse vector comparison. Return the register with the result. */
11737
11738 static rtx
11739 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
11740 rtx op_true, rtx op_false)
11741 {
11742 enum machine_mode mode = GET_MODE (dest);
11743 rtx x;
11744
11745 cmp_op0 = force_reg (mode, cmp_op0);
11746 if (!nonimmediate_operand (cmp_op1, mode))
11747 cmp_op1 = force_reg (mode, cmp_op1);
11748
11749 if (optimize
11750 || reg_overlap_mentioned_p (dest, op_true)
11751 || reg_overlap_mentioned_p (dest, op_false))
11752 dest = gen_reg_rtx (mode);
11753
11754 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
11755 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11756
11757 return dest;
11758 }
11759
11760 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
11761 operations. This is used for both scalar and vector conditional moves. */
11762
11763 static void
11764 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
11765 {
11766 enum machine_mode mode = GET_MODE (dest);
11767 rtx t2, t3, x;
11768
11769 if (op_false == CONST0_RTX (mode))
11770 {
11771 op_true = force_reg (mode, op_true);
11772 x = gen_rtx_AND (mode, cmp, op_true);
11773 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11774 }
11775 else if (op_true == CONST0_RTX (mode))
11776 {
11777 op_false = force_reg (mode, op_false);
11778 x = gen_rtx_NOT (mode, cmp);
11779 x = gen_rtx_AND (mode, x, op_false);
11780 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11781 }
11782 else
11783 {
11784 op_true = force_reg (mode, op_true);
11785 op_false = force_reg (mode, op_false);
11786
11787 t2 = gen_reg_rtx (mode);
11788 if (optimize)
11789 t3 = gen_reg_rtx (mode);
11790 else
11791 t3 = dest;
11792
11793 x = gen_rtx_AND (mode, op_true, cmp);
11794 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
11795
11796 x = gen_rtx_NOT (mode, cmp);
11797 x = gen_rtx_AND (mode, x, op_false);
11798 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
11799
11800 x = gen_rtx_IOR (mode, t3, t2);
11801 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
11802 }
11803 }
11804
11805 /* Expand a floating-point conditional move. Return true if successful. */
11806
11807 int
11808 ix86_expand_fp_movcc (rtx operands[])
11809 {
11810 enum machine_mode mode = GET_MODE (operands[0]);
11811 enum rtx_code code = GET_CODE (operands[1]);
11812 rtx tmp, compare_op, second_test, bypass_test;
11813
11814 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
11815 {
11816 enum machine_mode cmode;
11817
11818 /* Since we've no cmove for sse registers, don't force bad register
11819 allocation just to gain access to it. Deny movcc when the
11820 comparison mode doesn't match the move mode. */
11821 cmode = GET_MODE (ix86_compare_op0);
11822 if (cmode == VOIDmode)
11823 cmode = GET_MODE (ix86_compare_op1);
11824 if (cmode != mode)
11825 return 0;
11826
11827 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11828 &ix86_compare_op0,
11829 &ix86_compare_op1);
11830 if (code == UNKNOWN)
11831 return 0;
11832
11833 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
11834 ix86_compare_op1, operands[2],
11835 operands[3]))
11836 return 1;
11837
11838 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
11839 ix86_compare_op1, operands[2], operands[3]);
11840 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
11841 return 1;
11842 }
11843
11844 /* The floating point conditional move instructions don't directly
11845 support conditions resulting from a signed integer comparison. */
11846
11847 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11848
11849 /* The floating point conditional move instructions don't directly
11850 support signed integer comparisons. */
11851
11852 if (!fcmov_comparison_operator (compare_op, VOIDmode))
11853 {
11854 gcc_assert (!second_test && !bypass_test);
11855 tmp = gen_reg_rtx (QImode);
11856 ix86_expand_setcc (code, tmp);
11857 code = NE;
11858 ix86_compare_op0 = tmp;
11859 ix86_compare_op1 = const0_rtx;
11860 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11861 }
11862 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11863 {
11864 tmp = gen_reg_rtx (mode);
11865 emit_move_insn (tmp, operands[3]);
11866 operands[3] = tmp;
11867 }
11868 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11869 {
11870 tmp = gen_reg_rtx (mode);
11871 emit_move_insn (tmp, operands[2]);
11872 operands[2] = tmp;
11873 }
11874
11875 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11876 gen_rtx_IF_THEN_ELSE (mode, compare_op,
11877 operands[2], operands[3])));
11878 if (bypass_test)
11879 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11880 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
11881 operands[3], operands[0])));
11882 if (second_test)
11883 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11884 gen_rtx_IF_THEN_ELSE (mode, second_test,
11885 operands[2], operands[0])));
11886
11887 return 1;
11888 }
11889
11890 /* Expand a floating-point vector conditional move; a vcond operation
11891 rather than a movcc operation. */
11892
11893 bool
11894 ix86_expand_fp_vcond (rtx operands[])
11895 {
11896 enum rtx_code code = GET_CODE (operands[3]);
11897 rtx cmp;
11898
11899 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
11900 &operands[4], &operands[5]);
11901 if (code == UNKNOWN)
11902 return false;
11903
11904 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
11905 operands[5], operands[1], operands[2]))
11906 return true;
11907
11908 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
11909 operands[1], operands[2]);
11910 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
11911 return true;
11912 }
11913
11914 /* Expand a signed integral vector conditional move. */
11915
11916 bool
11917 ix86_expand_int_vcond (rtx operands[])
11918 {
11919 enum machine_mode mode = GET_MODE (operands[0]);
11920 enum rtx_code code = GET_CODE (operands[3]);
11921 bool negate = false;
11922 rtx x, cop0, cop1;
11923
11924 cop0 = operands[4];
11925 cop1 = operands[5];
11926
11927 /* Canonicalize the comparison to EQ, GT, GTU. */
11928 switch (code)
11929 {
11930 case EQ:
11931 case GT:
11932 case GTU:
11933 break;
11934
11935 case NE:
11936 case LE:
11937 case LEU:
11938 code = reverse_condition (code);
11939 negate = true;
11940 break;
11941
11942 case GE:
11943 case GEU:
11944 code = reverse_condition (code);
11945 negate = true;
11946 /* FALLTHRU */
11947
11948 case LT:
11949 case LTU:
11950 code = swap_condition (code);
11951 x = cop0, cop0 = cop1, cop1 = x;
11952 break;
11953
11954 default:
11955 gcc_unreachable ();
11956 }
11957
11958 /* Unsigned parallel compare is not supported by the hardware. Play some
11959 tricks to turn this into a signed comparison against 0. */
11960 if (code == GTU)
11961 {
11962 cop0 = force_reg (mode, cop0);
11963
11964 switch (mode)
11965 {
11966 case V4SImode:
11967 {
11968 rtx t1, t2, mask;
11969
11970 /* Perform a parallel modulo subtraction. */
11971 t1 = gen_reg_rtx (mode);
11972 emit_insn (gen_subv4si3 (t1, cop0, cop1));
11973
11974 /* Extract the original sign bit of op0. */
11975 mask = GEN_INT (-0x80000000);
11976 mask = gen_rtx_CONST_VECTOR (mode,
11977 gen_rtvec (4, mask, mask, mask, mask));
11978 mask = force_reg (mode, mask);
11979 t2 = gen_reg_rtx (mode);
11980 emit_insn (gen_andv4si3 (t2, cop0, mask));
11981
11982 /* XOR it back into the result of the subtraction. This results
11983 in the sign bit set iff we saw unsigned underflow. */
11984 x = gen_reg_rtx (mode);
11985 emit_insn (gen_xorv4si3 (x, t1, t2));
11986
11987 code = GT;
11988 }
11989 break;
11990
11991 case V16QImode:
11992 case V8HImode:
11993 /* Perform a parallel unsigned saturating subtraction. */
11994 x = gen_reg_rtx (mode);
11995 emit_insn (gen_rtx_SET (VOIDmode, x,
11996 gen_rtx_US_MINUS (mode, cop0, cop1)));
11997
11998 code = EQ;
11999 negate = !negate;
12000 break;
12001
12002 default:
12003 gcc_unreachable ();
12004 }
12005
12006 cop0 = x;
12007 cop1 = CONST0_RTX (mode);
12008 }
12009
12010 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12011 operands[1+negate], operands[2-negate]);
12012
12013 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12014 operands[2-negate]);
12015 return true;
12016 }
12017
12018 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12019 true if we should do zero extension, else sign extension. HIGH_P is
12020 true if we want the N/2 high elements, else the low elements. */
12021
12022 void
12023 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12024 {
12025 enum machine_mode imode = GET_MODE (operands[1]);
12026 rtx (*unpack)(rtx, rtx, rtx);
12027 rtx se, dest;
12028
12029 switch (imode)
12030 {
12031 case V16QImode:
12032 if (high_p)
12033 unpack = gen_vec_interleave_highv16qi;
12034 else
12035 unpack = gen_vec_interleave_lowv16qi;
12036 break;
12037 case V8HImode:
12038 if (high_p)
12039 unpack = gen_vec_interleave_highv8hi;
12040 else
12041 unpack = gen_vec_interleave_lowv8hi;
12042 break;
12043 case V4SImode:
12044 if (high_p)
12045 unpack = gen_vec_interleave_highv4si;
12046 else
12047 unpack = gen_vec_interleave_lowv4si;
12048 break;
12049 default:
12050 gcc_unreachable ();
12051 }
12052
12053 dest = gen_lowpart (imode, operands[0]);
12054
12055 if (unsigned_p)
12056 se = force_reg (imode, CONST0_RTX (imode));
12057 else
12058 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12059 operands[1], pc_rtx, pc_rtx);
12060
12061 emit_insn (unpack (dest, operands[1], se));
12062 }
12063
12064 /* Expand conditional increment or decrement using adb/sbb instructions.
12065 The default case using setcc followed by the conditional move can be
12066 done by generic code. */
12067 int
12068 ix86_expand_int_addcc (rtx operands[])
12069 {
12070 enum rtx_code code = GET_CODE (operands[1]);
12071 rtx compare_op;
12072 rtx val = const0_rtx;
12073 bool fpcmp = false;
12074 enum machine_mode mode = GET_MODE (operands[0]);
12075
12076 if (operands[3] != const1_rtx
12077 && operands[3] != constm1_rtx)
12078 return 0;
12079 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12080 ix86_compare_op1, &compare_op))
12081 return 0;
12082 code = GET_CODE (compare_op);
12083
12084 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12085 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12086 {
12087 fpcmp = true;
12088 code = ix86_fp_compare_code_to_integer (code);
12089 }
12090
12091 if (code != LTU)
12092 {
12093 val = constm1_rtx;
12094 if (fpcmp)
12095 PUT_CODE (compare_op,
12096 reverse_condition_maybe_unordered
12097 (GET_CODE (compare_op)));
12098 else
12099 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12100 }
12101 PUT_MODE (compare_op, mode);
12102
12103 /* Construct either adc or sbb insn. */
12104 if ((code == LTU) == (operands[3] == constm1_rtx))
12105 {
12106 switch (GET_MODE (operands[0]))
12107 {
12108 case QImode:
12109 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12110 break;
12111 case HImode:
12112 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12113 break;
12114 case SImode:
12115 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12116 break;
12117 case DImode:
12118 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12119 break;
12120 default:
12121 gcc_unreachable ();
12122 }
12123 }
12124 else
12125 {
12126 switch (GET_MODE (operands[0]))
12127 {
12128 case QImode:
12129 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12130 break;
12131 case HImode:
12132 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12133 break;
12134 case SImode:
12135 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12136 break;
12137 case DImode:
12138 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12139 break;
12140 default:
12141 gcc_unreachable ();
12142 }
12143 }
12144 return 1; /* DONE */
12145 }
12146
12147
12148 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12149 works for floating pointer parameters and nonoffsetable memories.
12150 For pushes, it returns just stack offsets; the values will be saved
12151 in the right order. Maximally three parts are generated. */
12152
12153 static int
12154 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12155 {
12156 int size;
12157
12158 if (!TARGET_64BIT)
12159 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12160 else
12161 size = (GET_MODE_SIZE (mode) + 4) / 8;
12162
12163 gcc_assert (GET_CODE (operand) != REG || !MMX_REGNO_P (REGNO (operand)));
12164 gcc_assert (size >= 2 && size <= 3);
12165
12166 /* Optimize constant pool reference to immediates. This is used by fp
12167 moves, that force all constants to memory to allow combining. */
12168 if (GET_CODE (operand) == MEM && MEM_READONLY_P (operand))
12169 {
12170 rtx tmp = maybe_get_pool_constant (operand);
12171 if (tmp)
12172 operand = tmp;
12173 }
12174
12175 if (GET_CODE (operand) == MEM && !offsettable_memref_p (operand))
12176 {
12177 /* The only non-offsetable memories we handle are pushes. */
12178 int ok = push_operand (operand, VOIDmode);
12179
12180 gcc_assert (ok);
12181
12182 operand = copy_rtx (operand);
12183 PUT_MODE (operand, Pmode);
12184 parts[0] = parts[1] = parts[2] = operand;
12185 return size;
12186 }
12187
12188 if (GET_CODE (operand) == CONST_VECTOR)
12189 {
12190 enum machine_mode imode = int_mode_for_mode (mode);
12191 /* Caution: if we looked through a constant pool memory above,
12192 the operand may actually have a different mode now. That's
12193 ok, since we want to pun this all the way back to an integer. */
12194 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12195 gcc_assert (operand != NULL);
12196 mode = imode;
12197 }
12198
12199 if (!TARGET_64BIT)
12200 {
12201 if (mode == DImode)
12202 split_di (&operand, 1, &parts[0], &parts[1]);
12203 else
12204 {
12205 if (REG_P (operand))
12206 {
12207 gcc_assert (reload_completed);
12208 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12209 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12210 if (size == 3)
12211 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12212 }
12213 else if (offsettable_memref_p (operand))
12214 {
12215 operand = adjust_address (operand, SImode, 0);
12216 parts[0] = operand;
12217 parts[1] = adjust_address (operand, SImode, 4);
12218 if (size == 3)
12219 parts[2] = adjust_address (operand, SImode, 8);
12220 }
12221 else if (GET_CODE (operand) == CONST_DOUBLE)
12222 {
12223 REAL_VALUE_TYPE r;
12224 long l[4];
12225
12226 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12227 switch (mode)
12228 {
12229 case XFmode:
12230 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12231 parts[2] = gen_int_mode (l[2], SImode);
12232 break;
12233 case DFmode:
12234 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12235 break;
12236 default:
12237 gcc_unreachable ();
12238 }
12239 parts[1] = gen_int_mode (l[1], SImode);
12240 parts[0] = gen_int_mode (l[0], SImode);
12241 }
12242 else
12243 gcc_unreachable ();
12244 }
12245 }
12246 else
12247 {
12248 if (mode == TImode)
12249 split_ti (&operand, 1, &parts[0], &parts[1]);
12250 if (mode == XFmode || mode == TFmode)
12251 {
12252 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12253 if (REG_P (operand))
12254 {
12255 gcc_assert (reload_completed);
12256 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12257 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12258 }
12259 else if (offsettable_memref_p (operand))
12260 {
12261 operand = adjust_address (operand, DImode, 0);
12262 parts[0] = operand;
12263 parts[1] = adjust_address (operand, upper_mode, 8);
12264 }
12265 else if (GET_CODE (operand) == CONST_DOUBLE)
12266 {
12267 REAL_VALUE_TYPE r;
12268 long l[4];
12269
12270 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12271 real_to_target (l, &r, mode);
12272
12273 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12274 if (HOST_BITS_PER_WIDE_INT >= 64)
12275 parts[0]
12276 = gen_int_mode
12277 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12278 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12279 DImode);
12280 else
12281 parts[0] = immed_double_const (l[0], l[1], DImode);
12282
12283 if (upper_mode == SImode)
12284 parts[1] = gen_int_mode (l[2], SImode);
12285 else if (HOST_BITS_PER_WIDE_INT >= 64)
12286 parts[1]
12287 = gen_int_mode
12288 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12289 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12290 DImode);
12291 else
12292 parts[1] = immed_double_const (l[2], l[3], DImode);
12293 }
12294 else
12295 gcc_unreachable ();
12296 }
12297 }
12298
12299 return size;
12300 }
12301
12302 /* Emit insns to perform a move or push of DI, DF, and XF values.
12303 Return false when normal moves are needed; true when all required
12304 insns have been emitted. Operands 2-4 contain the input values
12305 int the correct order; operands 5-7 contain the output values. */
12306
12307 void
12308 ix86_split_long_move (rtx operands[])
12309 {
12310 rtx part[2][3];
12311 int nparts;
12312 int push = 0;
12313 int collisions = 0;
12314 enum machine_mode mode = GET_MODE (operands[0]);
12315
12316 /* The DFmode expanders may ask us to move double.
12317 For 64bit target this is single move. By hiding the fact
12318 here we simplify i386.md splitters. */
12319 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12320 {
12321 /* Optimize constant pool reference to immediates. This is used by
12322 fp moves, that force all constants to memory to allow combining. */
12323
12324 if (GET_CODE (operands[1]) == MEM
12325 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12326 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12327 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12328 if (push_operand (operands[0], VOIDmode))
12329 {
12330 operands[0] = copy_rtx (operands[0]);
12331 PUT_MODE (operands[0], Pmode);
12332 }
12333 else
12334 operands[0] = gen_lowpart (DImode, operands[0]);
12335 operands[1] = gen_lowpart (DImode, operands[1]);
12336 emit_move_insn (operands[0], operands[1]);
12337 return;
12338 }
12339
12340 /* The only non-offsettable memory we handle is push. */
12341 if (push_operand (operands[0], VOIDmode))
12342 push = 1;
12343 else
12344 gcc_assert (GET_CODE (operands[0]) != MEM
12345 || offsettable_memref_p (operands[0]));
12346
12347 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12348 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12349
12350 /* When emitting push, take care for source operands on the stack. */
12351 if (push && GET_CODE (operands[1]) == MEM
12352 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12353 {
12354 if (nparts == 3)
12355 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12356 XEXP (part[1][2], 0));
12357 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12358 XEXP (part[1][1], 0));
12359 }
12360
12361 /* We need to do copy in the right order in case an address register
12362 of the source overlaps the destination. */
12363 if (REG_P (part[0][0]) && GET_CODE (part[1][0]) == MEM)
12364 {
12365 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12366 collisions++;
12367 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12368 collisions++;
12369 if (nparts == 3
12370 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12371 collisions++;
12372
12373 /* Collision in the middle part can be handled by reordering. */
12374 if (collisions == 1 && nparts == 3
12375 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12376 {
12377 rtx tmp;
12378 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12379 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12380 }
12381
12382 /* If there are more collisions, we can't handle it by reordering.
12383 Do an lea to the last part and use only one colliding move. */
12384 else if (collisions > 1)
12385 {
12386 rtx base;
12387
12388 collisions = 1;
12389
12390 base = part[0][nparts - 1];
12391
12392 /* Handle the case when the last part isn't valid for lea.
12393 Happens in 64-bit mode storing the 12-byte XFmode. */
12394 if (GET_MODE (base) != Pmode)
12395 base = gen_rtx_REG (Pmode, REGNO (base));
12396
12397 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12398 part[1][0] = replace_equiv_address (part[1][0], base);
12399 part[1][1] = replace_equiv_address (part[1][1],
12400 plus_constant (base, UNITS_PER_WORD));
12401 if (nparts == 3)
12402 part[1][2] = replace_equiv_address (part[1][2],
12403 plus_constant (base, 8));
12404 }
12405 }
12406
12407 if (push)
12408 {
12409 if (!TARGET_64BIT)
12410 {
12411 if (nparts == 3)
12412 {
12413 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12414 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12415 emit_move_insn (part[0][2], part[1][2]);
12416 }
12417 }
12418 else
12419 {
12420 /* In 64bit mode we don't have 32bit push available. In case this is
12421 register, it is OK - we will just use larger counterpart. We also
12422 retype memory - these comes from attempt to avoid REX prefix on
12423 moving of second half of TFmode value. */
12424 if (GET_MODE (part[1][1]) == SImode)
12425 {
12426 switch (GET_CODE (part[1][1]))
12427 {
12428 case MEM:
12429 part[1][1] = adjust_address (part[1][1], DImode, 0);
12430 break;
12431
12432 case REG:
12433 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12434 break;
12435
12436 default:
12437 gcc_unreachable ();
12438 }
12439
12440 if (GET_MODE (part[1][0]) == SImode)
12441 part[1][0] = part[1][1];
12442 }
12443 }
12444 emit_move_insn (part[0][1], part[1][1]);
12445 emit_move_insn (part[0][0], part[1][0]);
12446 return;
12447 }
12448
12449 /* Choose correct order to not overwrite the source before it is copied. */
12450 if ((REG_P (part[0][0])
12451 && REG_P (part[1][1])
12452 && (REGNO (part[0][0]) == REGNO (part[1][1])
12453 || (nparts == 3
12454 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12455 || (collisions > 0
12456 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12457 {
12458 if (nparts == 3)
12459 {
12460 operands[2] = part[0][2];
12461 operands[3] = part[0][1];
12462 operands[4] = part[0][0];
12463 operands[5] = part[1][2];
12464 operands[6] = part[1][1];
12465 operands[7] = part[1][0];
12466 }
12467 else
12468 {
12469 operands[2] = part[0][1];
12470 operands[3] = part[0][0];
12471 operands[5] = part[1][1];
12472 operands[6] = part[1][0];
12473 }
12474 }
12475 else
12476 {
12477 if (nparts == 3)
12478 {
12479 operands[2] = part[0][0];
12480 operands[3] = part[0][1];
12481 operands[4] = part[0][2];
12482 operands[5] = part[1][0];
12483 operands[6] = part[1][1];
12484 operands[7] = part[1][2];
12485 }
12486 else
12487 {
12488 operands[2] = part[0][0];
12489 operands[3] = part[0][1];
12490 operands[5] = part[1][0];
12491 operands[6] = part[1][1];
12492 }
12493 }
12494
12495 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12496 if (optimize_size)
12497 {
12498 if (GET_CODE (operands[5]) == CONST_INT
12499 && operands[5] != const0_rtx
12500 && REG_P (operands[2]))
12501 {
12502 if (GET_CODE (operands[6]) == CONST_INT
12503 && INTVAL (operands[6]) == INTVAL (operands[5]))
12504 operands[6] = operands[2];
12505
12506 if (nparts == 3
12507 && GET_CODE (operands[7]) == CONST_INT
12508 && INTVAL (operands[7]) == INTVAL (operands[5]))
12509 operands[7] = operands[2];
12510 }
12511
12512 if (nparts == 3
12513 && GET_CODE (operands[6]) == CONST_INT
12514 && operands[6] != const0_rtx
12515 && REG_P (operands[3])
12516 && GET_CODE (operands[7]) == CONST_INT
12517 && INTVAL (operands[7]) == INTVAL (operands[6]))
12518 operands[7] = operands[3];
12519 }
12520
12521 emit_move_insn (operands[2], operands[5]);
12522 emit_move_insn (operands[3], operands[6]);
12523 if (nparts == 3)
12524 emit_move_insn (operands[4], operands[7]);
12525
12526 return;
12527 }
12528
12529 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12530 left shift by a constant, either using a single shift or
12531 a sequence of add instructions. */
12532
12533 static void
12534 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12535 {
12536 if (count == 1)
12537 {
12538 emit_insn ((mode == DImode
12539 ? gen_addsi3
12540 : gen_adddi3) (operand, operand, operand));
12541 }
12542 else if (!optimize_size
12543 && count * ix86_cost->add <= ix86_cost->shift_const)
12544 {
12545 int i;
12546 for (i=0; i<count; i++)
12547 {
12548 emit_insn ((mode == DImode
12549 ? gen_addsi3
12550 : gen_adddi3) (operand, operand, operand));
12551 }
12552 }
12553 else
12554 emit_insn ((mode == DImode
12555 ? gen_ashlsi3
12556 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12557 }
12558
12559 void
12560 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12561 {
12562 rtx low[2], high[2];
12563 int count;
12564 const int single_width = mode == DImode ? 32 : 64;
12565
12566 if (GET_CODE (operands[2]) == CONST_INT)
12567 {
12568 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12569 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12570
12571 if (count >= single_width)
12572 {
12573 emit_move_insn (high[0], low[1]);
12574 emit_move_insn (low[0], const0_rtx);
12575
12576 if (count > single_width)
12577 ix86_expand_ashl_const (high[0], count - single_width, mode);
12578 }
12579 else
12580 {
12581 if (!rtx_equal_p (operands[0], operands[1]))
12582 emit_move_insn (operands[0], operands[1]);
12583 emit_insn ((mode == DImode
12584 ? gen_x86_shld_1
12585 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12586 ix86_expand_ashl_const (low[0], count, mode);
12587 }
12588 return;
12589 }
12590
12591 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12592
12593 if (operands[1] == const1_rtx)
12594 {
12595 /* Assuming we've chosen a QImode capable registers, then 1 << N
12596 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12597 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12598 {
12599 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12600
12601 ix86_expand_clear (low[0]);
12602 ix86_expand_clear (high[0]);
12603 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12604
12605 d = gen_lowpart (QImode, low[0]);
12606 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12607 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12608 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12609
12610 d = gen_lowpart (QImode, high[0]);
12611 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12612 s = gen_rtx_NE (QImode, flags, const0_rtx);
12613 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12614 }
12615
12616 /* Otherwise, we can get the same results by manually performing
12617 a bit extract operation on bit 5/6, and then performing the two
12618 shifts. The two methods of getting 0/1 into low/high are exactly
12619 the same size. Avoiding the shift in the bit extract case helps
12620 pentium4 a bit; no one else seems to care much either way. */
12621 else
12622 {
12623 rtx x;
12624
12625 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12626 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12627 else
12628 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12629 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12630
12631 emit_insn ((mode == DImode
12632 ? gen_lshrsi3
12633 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12634 emit_insn ((mode == DImode
12635 ? gen_andsi3
12636 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12637 emit_move_insn (low[0], high[0]);
12638 emit_insn ((mode == DImode
12639 ? gen_xorsi3
12640 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12641 }
12642
12643 emit_insn ((mode == DImode
12644 ? gen_ashlsi3
12645 : gen_ashldi3) (low[0], low[0], operands[2]));
12646 emit_insn ((mode == DImode
12647 ? gen_ashlsi3
12648 : gen_ashldi3) (high[0], high[0], operands[2]));
12649 return;
12650 }
12651
12652 if (operands[1] == constm1_rtx)
12653 {
12654 /* For -1 << N, we can avoid the shld instruction, because we
12655 know that we're shifting 0...31/63 ones into a -1. */
12656 emit_move_insn (low[0], constm1_rtx);
12657 if (optimize_size)
12658 emit_move_insn (high[0], low[0]);
12659 else
12660 emit_move_insn (high[0], constm1_rtx);
12661 }
12662 else
12663 {
12664 if (!rtx_equal_p (operands[0], operands[1]))
12665 emit_move_insn (operands[0], operands[1]);
12666
12667 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12668 emit_insn ((mode == DImode
12669 ? gen_x86_shld_1
12670 : gen_x86_64_shld) (high[0], low[0], operands[2]));
12671 }
12672
12673 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
12674
12675 if (TARGET_CMOVE && scratch)
12676 {
12677 ix86_expand_clear (scratch);
12678 emit_insn ((mode == DImode
12679 ? gen_x86_shift_adj_1
12680 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
12681 }
12682 else
12683 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
12684 }
12685
12686 void
12687 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
12688 {
12689 rtx low[2], high[2];
12690 int count;
12691 const int single_width = mode == DImode ? 32 : 64;
12692
12693 if (GET_CODE (operands[2]) == CONST_INT)
12694 {
12695 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12696 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12697
12698 if (count == single_width * 2 - 1)
12699 {
12700 emit_move_insn (high[0], high[1]);
12701 emit_insn ((mode == DImode
12702 ? gen_ashrsi3
12703 : gen_ashrdi3) (high[0], high[0],
12704 GEN_INT (single_width - 1)));
12705 emit_move_insn (low[0], high[0]);
12706
12707 }
12708 else if (count >= single_width)
12709 {
12710 emit_move_insn (low[0], high[1]);
12711 emit_move_insn (high[0], low[0]);
12712 emit_insn ((mode == DImode
12713 ? gen_ashrsi3
12714 : gen_ashrdi3) (high[0], high[0],
12715 GEN_INT (single_width - 1)));
12716 if (count > single_width)
12717 emit_insn ((mode == DImode
12718 ? gen_ashrsi3
12719 : gen_ashrdi3) (low[0], low[0],
12720 GEN_INT (count - single_width)));
12721 }
12722 else
12723 {
12724 if (!rtx_equal_p (operands[0], operands[1]))
12725 emit_move_insn (operands[0], operands[1]);
12726 emit_insn ((mode == DImode
12727 ? gen_x86_shrd_1
12728 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12729 emit_insn ((mode == DImode
12730 ? gen_ashrsi3
12731 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
12732 }
12733 }
12734 else
12735 {
12736 if (!rtx_equal_p (operands[0], operands[1]))
12737 emit_move_insn (operands[0], operands[1]);
12738
12739 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12740
12741 emit_insn ((mode == DImode
12742 ? gen_x86_shrd_1
12743 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12744 emit_insn ((mode == DImode
12745 ? gen_ashrsi3
12746 : gen_ashrdi3) (high[0], high[0], operands[2]));
12747
12748 if (TARGET_CMOVE && scratch)
12749 {
12750 emit_move_insn (scratch, high[0]);
12751 emit_insn ((mode == DImode
12752 ? gen_ashrsi3
12753 : gen_ashrdi3) (scratch, scratch,
12754 GEN_INT (single_width - 1)));
12755 emit_insn ((mode == DImode
12756 ? gen_x86_shift_adj_1
12757 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12758 scratch));
12759 }
12760 else
12761 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
12762 }
12763 }
12764
12765 void
12766 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
12767 {
12768 rtx low[2], high[2];
12769 int count;
12770 const int single_width = mode == DImode ? 32 : 64;
12771
12772 if (GET_CODE (operands[2]) == CONST_INT)
12773 {
12774 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12775 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12776
12777 if (count >= single_width)
12778 {
12779 emit_move_insn (low[0], high[1]);
12780 ix86_expand_clear (high[0]);
12781
12782 if (count > single_width)
12783 emit_insn ((mode == DImode
12784 ? gen_lshrsi3
12785 : gen_lshrdi3) (low[0], low[0],
12786 GEN_INT (count - single_width)));
12787 }
12788 else
12789 {
12790 if (!rtx_equal_p (operands[0], operands[1]))
12791 emit_move_insn (operands[0], operands[1]);
12792 emit_insn ((mode == DImode
12793 ? gen_x86_shrd_1
12794 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
12795 emit_insn ((mode == DImode
12796 ? gen_lshrsi3
12797 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
12798 }
12799 }
12800 else
12801 {
12802 if (!rtx_equal_p (operands[0], operands[1]))
12803 emit_move_insn (operands[0], operands[1]);
12804
12805 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12806
12807 emit_insn ((mode == DImode
12808 ? gen_x86_shrd_1
12809 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
12810 emit_insn ((mode == DImode
12811 ? gen_lshrsi3
12812 : gen_lshrdi3) (high[0], high[0], operands[2]));
12813
12814 /* Heh. By reversing the arguments, we can reuse this pattern. */
12815 if (TARGET_CMOVE && scratch)
12816 {
12817 ix86_expand_clear (scratch);
12818 emit_insn ((mode == DImode
12819 ? gen_x86_shift_adj_1
12820 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
12821 scratch));
12822 }
12823 else
12824 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
12825 }
12826 }
12827
12828 /* Predict just emitted jump instruction to be taken with probability PROB. */
12829 static void
12830 predict_jump (int prob)
12831 {
12832 rtx insn = get_last_insn ();
12833 gcc_assert (GET_CODE (insn) == JUMP_INSN);
12834 REG_NOTES (insn)
12835 = gen_rtx_EXPR_LIST (REG_BR_PROB,
12836 GEN_INT (prob),
12837 REG_NOTES (insn));
12838 }
12839
12840 /* Helper function for the string operations below. Dest VARIABLE whether
12841 it is aligned to VALUE bytes. If true, jump to the label. */
12842 static rtx
12843 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
12844 {
12845 rtx label = gen_label_rtx ();
12846 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
12847 if (GET_MODE (variable) == DImode)
12848 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
12849 else
12850 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
12851 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
12852 1, label);
12853 if (epilogue)
12854 predict_jump (REG_BR_PROB_BASE * 50 / 100);
12855 else
12856 predict_jump (REG_BR_PROB_BASE * 90 / 100);
12857 return label;
12858 }
12859
12860 /* Adjust COUNTER by the VALUE. */
12861 static void
12862 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
12863 {
12864 if (GET_MODE (countreg) == DImode)
12865 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
12866 else
12867 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
12868 }
12869
12870 /* Zero extend possibly SImode EXP to Pmode register. */
12871 rtx
12872 ix86_zero_extend_to_Pmode (rtx exp)
12873 {
12874 rtx r;
12875 if (GET_MODE (exp) == VOIDmode)
12876 return force_reg (Pmode, exp);
12877 if (GET_MODE (exp) == Pmode)
12878 return copy_to_mode_reg (Pmode, exp);
12879 r = gen_reg_rtx (Pmode);
12880 emit_insn (gen_zero_extendsidi2 (r, exp));
12881 return r;
12882 }
12883
12884 /* Divide COUNTREG by SCALE. */
12885 static rtx
12886 scale_counter (rtx countreg, int scale)
12887 {
12888 rtx sc;
12889 rtx piece_size_mask;
12890
12891 if (scale == 1)
12892 return countreg;
12893 if (GET_CODE (countreg) == CONST_INT)
12894 return GEN_INT (INTVAL (countreg) / scale);
12895 gcc_assert (REG_P (countreg));
12896
12897 piece_size_mask = GEN_INT (scale - 1);
12898 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
12899 GEN_INT (exact_log2 (scale)),
12900 NULL, 1, OPTAB_DIRECT);
12901 return sc;
12902 }
12903
12904 /* When SRCPTR is non-NULL, output simple loop to move memory
12905 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
12906 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
12907 equivalent loop to set memory by VALUE (supposed to be in MODE).
12908
12909 The size is rounded down to whole number of chunk size moved at once.
12910 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
12911
12912
12913 static void
12914 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
12915 rtx destptr, rtx srcptr, rtx value,
12916 rtx count, enum machine_mode mode, int unroll,
12917 int expected_size)
12918 {
12919 rtx out_label, top_label, iter, tmp;
12920 enum machine_mode iter_mode;
12921 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
12922 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
12923 rtx size;
12924 rtx x_addr;
12925 rtx y_addr;
12926 int i;
12927
12928 iter_mode = GET_MODE (count);
12929 if (iter_mode == VOIDmode)
12930 iter_mode = word_mode;
12931
12932 top_label = gen_label_rtx ();
12933 out_label = gen_label_rtx ();
12934 iter = gen_reg_rtx (iter_mode);
12935
12936 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
12937 NULL, 1, OPTAB_DIRECT);
12938 /* Those two should combine. */
12939 if (piece_size == const1_rtx)
12940 {
12941 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
12942 true, out_label);
12943 predict_jump (REG_BR_PROB_BASE * 10 / 100);
12944 }
12945 emit_move_insn (iter, const0_rtx);
12946
12947 emit_label (top_label);
12948
12949 tmp = convert_modes (Pmode, iter_mode, iter, true);
12950 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
12951 destmem = change_address (destmem, mode, x_addr);
12952
12953 if (srcmem)
12954 {
12955 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
12956 srcmem = change_address (srcmem, mode, y_addr);
12957
12958 /* When unrolling for chips that reorder memory reads and writes,
12959 we can save registers by using single temporary.
12960 Also using 4 temporaries is overkill in 32bit mode. */
12961 if (!TARGET_64BIT && 0)
12962 {
12963 for (i = 0; i < unroll; i++)
12964 {
12965 if (i)
12966 {
12967 destmem =
12968 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12969 srcmem =
12970 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
12971 }
12972 emit_move_insn (destmem, srcmem);
12973 }
12974 }
12975 else
12976 {
12977 rtx tmpreg[4];
12978 gcc_assert (unroll <= 4);
12979 for (i = 0; i < unroll; i++)
12980 {
12981 tmpreg[i] = gen_reg_rtx (mode);
12982 if (i)
12983 {
12984 srcmem =
12985 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
12986 }
12987 emit_move_insn (tmpreg[i], srcmem);
12988 }
12989 for (i = 0; i < unroll; i++)
12990 {
12991 if (i)
12992 {
12993 destmem =
12994 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
12995 }
12996 emit_move_insn (destmem, tmpreg[i]);
12997 }
12998 }
12999 }
13000 else
13001 for (i = 0; i < unroll; i++)
13002 {
13003 if (i)
13004 destmem =
13005 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13006 emit_move_insn (destmem, value);
13007 }
13008
13009 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13010 true, OPTAB_LIB_WIDEN);
13011 if (tmp != iter)
13012 emit_move_insn (iter, tmp);
13013
13014 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13015 true, top_label);
13016 if (expected_size != -1)
13017 {
13018 expected_size /= GET_MODE_SIZE (mode) * unroll;
13019 if (expected_size == 0)
13020 predict_jump (0);
13021 else if (expected_size > REG_BR_PROB_BASE)
13022 predict_jump (REG_BR_PROB_BASE - 1);
13023 else
13024 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13025 }
13026 else
13027 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13028 iter = ix86_zero_extend_to_Pmode (iter);
13029 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13030 true, OPTAB_LIB_WIDEN);
13031 if (tmp != destptr)
13032 emit_move_insn (destptr, tmp);
13033 if (srcptr)
13034 {
13035 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13036 true, OPTAB_LIB_WIDEN);
13037 if (tmp != srcptr)
13038 emit_move_insn (srcptr, tmp);
13039 }
13040 emit_label (out_label);
13041 }
13042
13043 /* Output "rep; mov" instruction.
13044 Arguments have same meaning as for previous function */
13045 static void
13046 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13047 rtx destptr, rtx srcptr,
13048 rtx count,
13049 enum machine_mode mode)
13050 {
13051 rtx destexp;
13052 rtx srcexp;
13053 rtx countreg;
13054
13055 /* If the size is known, it is shorter to use rep movs. */
13056 if (mode == QImode && GET_CODE (count) == CONST_INT
13057 && !(INTVAL (count) & 3))
13058 mode = SImode;
13059
13060 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13061 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13062 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13063 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13064 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13065 if (mode != QImode)
13066 {
13067 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13068 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13069 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13070 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13071 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13072 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13073 }
13074 else
13075 {
13076 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13077 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13078 }
13079 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13080 destexp, srcexp));
13081 }
13082
13083 /* Output "rep; stos" instruction.
13084 Arguments have same meaning as for previous function */
13085 static void
13086 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13087 rtx count,
13088 enum machine_mode mode)
13089 {
13090 rtx destexp;
13091 rtx countreg;
13092
13093 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13094 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13095 value = force_reg (mode, gen_lowpart (mode, value));
13096 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13097 if (mode != QImode)
13098 {
13099 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13100 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13101 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13102 }
13103 else
13104 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13105 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13106 }
13107
13108 static void
13109 emit_strmov (rtx destmem, rtx srcmem,
13110 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13111 {
13112 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13113 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13114 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13115 }
13116
13117 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13118 static void
13119 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13120 rtx destptr, rtx srcptr, rtx count, int max_size)
13121 {
13122 rtx src, dest;
13123 if (GET_CODE (count) == CONST_INT)
13124 {
13125 HOST_WIDE_INT countval = INTVAL (count);
13126 int offset = 0;
13127
13128 if ((countval & 0x16) && max_size > 16)
13129 {
13130 if (TARGET_64BIT)
13131 {
13132 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13133 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13134 }
13135 else
13136 gcc_unreachable ();
13137 offset += 16;
13138 }
13139 if ((countval & 0x08) && max_size > 8)
13140 {
13141 if (TARGET_64BIT)
13142 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13143 else
13144 {
13145 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13146 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 4);
13147 }
13148 offset += 8;
13149 }
13150 if ((countval & 0x04) && max_size > 4)
13151 {
13152 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13153 offset += 4;
13154 }
13155 if ((countval & 0x02) && max_size > 2)
13156 {
13157 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13158 offset += 2;
13159 }
13160 if ((countval & 0x01) && max_size > 1)
13161 {
13162 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13163 offset += 1;
13164 }
13165 return;
13166 }
13167 if (max_size > 8)
13168 {
13169 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13170 count, 1, OPTAB_DIRECT);
13171 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13172 count, QImode, 1, 4);
13173 return;
13174 }
13175
13176 /* When there are stringops, we can cheaply increase dest and src pointers.
13177 Otherwise we save code size by maintaining offset (zero is readily
13178 available from preceding rep operation) and using x86 addressing modes.
13179 */
13180 if (TARGET_SINGLE_STRINGOP)
13181 {
13182 if (max_size > 4)
13183 {
13184 rtx label = ix86_expand_aligntest (count, 4, true);
13185 src = change_address (srcmem, SImode, srcptr);
13186 dest = change_address (destmem, SImode, destptr);
13187 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13188 emit_label (label);
13189 LABEL_NUSES (label) = 1;
13190 }
13191 if (max_size > 2)
13192 {
13193 rtx label = ix86_expand_aligntest (count, 2, true);
13194 src = change_address (srcmem, HImode, srcptr);
13195 dest = change_address (destmem, HImode, destptr);
13196 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13197 emit_label (label);
13198 LABEL_NUSES (label) = 1;
13199 }
13200 if (max_size > 1)
13201 {
13202 rtx label = ix86_expand_aligntest (count, 1, true);
13203 src = change_address (srcmem, QImode, srcptr);
13204 dest = change_address (destmem, QImode, destptr);
13205 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13206 emit_label (label);
13207 LABEL_NUSES (label) = 1;
13208 }
13209 }
13210 else
13211 {
13212 rtx offset = force_reg (Pmode, const0_rtx);
13213 rtx tmp;
13214
13215 if (max_size > 4)
13216 {
13217 rtx label = ix86_expand_aligntest (count, 4, true);
13218 src = change_address (srcmem, SImode, srcptr);
13219 dest = change_address (destmem, SImode, destptr);
13220 emit_move_insn (dest, src);
13221 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13222 true, OPTAB_LIB_WIDEN);
13223 if (tmp != offset)
13224 emit_move_insn (offset, tmp);
13225 emit_label (label);
13226 LABEL_NUSES (label) = 1;
13227 }
13228 if (max_size > 2)
13229 {
13230 rtx label = ix86_expand_aligntest (count, 2, true);
13231 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13232 src = change_address (srcmem, HImode, tmp);
13233 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13234 dest = change_address (destmem, HImode, tmp);
13235 emit_move_insn (dest, src);
13236 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13237 true, OPTAB_LIB_WIDEN);
13238 if (tmp != offset)
13239 emit_move_insn (offset, tmp);
13240 emit_label (label);
13241 LABEL_NUSES (label) = 1;
13242 }
13243 if (max_size > 1)
13244 {
13245 rtx label = ix86_expand_aligntest (count, 1, true);
13246 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13247 src = change_address (srcmem, QImode, tmp);
13248 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13249 dest = change_address (destmem, QImode, tmp);
13250 emit_move_insn (dest, src);
13251 emit_label (label);
13252 LABEL_NUSES (label) = 1;
13253 }
13254 }
13255 }
13256
13257 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13258 static void
13259 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13260 rtx count, int max_size)
13261 {
13262 count =
13263 expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13264 count, 1, OPTAB_DIRECT);
13265 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13266 gen_lowpart (QImode, value), count, QImode,
13267 1, max_size / 2);
13268 }
13269
13270 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13271 static void
13272 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13273 {
13274 rtx dest;
13275 if (GET_CODE (count) == CONST_INT)
13276 {
13277 HOST_WIDE_INT countval = INTVAL (count);
13278 int offset = 0;
13279
13280 if ((countval & 0x16) && max_size > 16)
13281 {
13282 if (TARGET_64BIT)
13283 {
13284 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13285 emit_insn (gen_strset (destptr, dest, value));
13286 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13287 emit_insn (gen_strset (destptr, dest, value));
13288 }
13289 else
13290 gcc_unreachable ();
13291 offset += 16;
13292 }
13293 if ((countval & 0x08) && max_size > 8)
13294 {
13295 if (TARGET_64BIT)
13296 {
13297 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13298 emit_insn (gen_strset (destptr, dest, value));
13299 }
13300 else
13301 {
13302 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13303 emit_insn (gen_strset (destptr, dest, value));
13304 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13305 emit_insn (gen_strset (destptr, dest, value));
13306 }
13307 offset += 8;
13308 }
13309 if ((countval & 0x04) && max_size > 4)
13310 {
13311 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13312 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13313 offset += 4;
13314 }
13315 if ((countval & 0x02) && max_size > 2)
13316 {
13317 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13318 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13319 offset += 2;
13320 }
13321 if ((countval & 0x01) && max_size > 1)
13322 {
13323 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13324 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13325 offset += 1;
13326 }
13327 return;
13328 }
13329 if (max_size > 32)
13330 {
13331 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13332 return;
13333 }
13334 if (max_size > 16)
13335 {
13336 rtx label = ix86_expand_aligntest (count, 16, true);
13337 if (TARGET_64BIT)
13338 {
13339 dest = change_address (destmem, DImode, destptr);
13340 emit_insn (gen_strset (destptr, dest, value));
13341 emit_insn (gen_strset (destptr, dest, value));
13342 }
13343 else
13344 {
13345 dest = change_address (destmem, SImode, destptr);
13346 emit_insn (gen_strset (destptr, dest, value));
13347 emit_insn (gen_strset (destptr, dest, value));
13348 emit_insn (gen_strset (destptr, dest, value));
13349 emit_insn (gen_strset (destptr, dest, value));
13350 }
13351 emit_label (label);
13352 LABEL_NUSES (label) = 1;
13353 }
13354 if (max_size > 8)
13355 {
13356 rtx label = ix86_expand_aligntest (count, 8, true);
13357 if (TARGET_64BIT)
13358 {
13359 dest = change_address (destmem, DImode, destptr);
13360 emit_insn (gen_strset (destptr, dest, value));
13361 }
13362 else
13363 {
13364 dest = change_address (destmem, SImode, destptr);
13365 emit_insn (gen_strset (destptr, dest, value));
13366 emit_insn (gen_strset (destptr, dest, value));
13367 }
13368 emit_label (label);
13369 LABEL_NUSES (label) = 1;
13370 }
13371 if (max_size > 4)
13372 {
13373 rtx label = ix86_expand_aligntest (count, 4, true);
13374 dest = change_address (destmem, SImode, destptr);
13375 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13376 emit_label (label);
13377 LABEL_NUSES (label) = 1;
13378 }
13379 if (max_size > 2)
13380 {
13381 rtx label = ix86_expand_aligntest (count, 2, true);
13382 dest = change_address (destmem, HImode, destptr);
13383 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13384 emit_label (label);
13385 LABEL_NUSES (label) = 1;
13386 }
13387 if (max_size > 1)
13388 {
13389 rtx label = ix86_expand_aligntest (count, 1, true);
13390 dest = change_address (destmem, QImode, destptr);
13391 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13392 emit_label (label);
13393 LABEL_NUSES (label) = 1;
13394 }
13395 }
13396
13397 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13398 DESIRED_ALIGNMENT. */
13399 static void
13400 expand_movmem_prologue (rtx destmem, rtx srcmem,
13401 rtx destptr, rtx srcptr, rtx count,
13402 int align, int desired_alignment)
13403 {
13404 if (align <= 1 && desired_alignment > 1)
13405 {
13406 rtx label = ix86_expand_aligntest (destptr, 1, false);
13407 srcmem = change_address (srcmem, QImode, srcptr);
13408 destmem = change_address (destmem, QImode, destptr);
13409 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13410 ix86_adjust_counter (count, 1);
13411 emit_label (label);
13412 LABEL_NUSES (label) = 1;
13413 }
13414 if (align <= 2 && desired_alignment > 2)
13415 {
13416 rtx label = ix86_expand_aligntest (destptr, 2, false);
13417 srcmem = change_address (srcmem, HImode, srcptr);
13418 destmem = change_address (destmem, HImode, destptr);
13419 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13420 ix86_adjust_counter (count, 2);
13421 emit_label (label);
13422 LABEL_NUSES (label) = 1;
13423 }
13424 if (align <= 4 && desired_alignment > 4)
13425 {
13426 rtx label = ix86_expand_aligntest (destptr, 4, false);
13427 srcmem = change_address (srcmem, SImode, srcptr);
13428 destmem = change_address (destmem, SImode, destptr);
13429 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13430 ix86_adjust_counter (count, 4);
13431 emit_label (label);
13432 LABEL_NUSES (label) = 1;
13433 }
13434 gcc_assert (desired_alignment <= 8);
13435 }
13436
13437 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13438 DESIRED_ALIGNMENT. */
13439 static void
13440 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13441 int align, int desired_alignment)
13442 {
13443 if (align <= 1 && desired_alignment > 1)
13444 {
13445 rtx label = ix86_expand_aligntest (destptr, 1, false);
13446 destmem = change_address (destmem, QImode, destptr);
13447 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13448 ix86_adjust_counter (count, 1);
13449 emit_label (label);
13450 LABEL_NUSES (label) = 1;
13451 }
13452 if (align <= 2 && desired_alignment > 2)
13453 {
13454 rtx label = ix86_expand_aligntest (destptr, 2, false);
13455 destmem = change_address (destmem, HImode, destptr);
13456 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13457 ix86_adjust_counter (count, 2);
13458 emit_label (label);
13459 LABEL_NUSES (label) = 1;
13460 }
13461 if (align <= 4 && desired_alignment > 4)
13462 {
13463 rtx label = ix86_expand_aligntest (destptr, 4, false);
13464 destmem = change_address (destmem, SImode, destptr);
13465 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13466 ix86_adjust_counter (count, 4);
13467 emit_label (label);
13468 LABEL_NUSES (label) = 1;
13469 }
13470 gcc_assert (desired_alignment <= 8);
13471 }
13472
13473 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13474 static enum stringop_alg
13475 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13476 int *dynamic_check)
13477 {
13478 const struct stringop_algs * algs;
13479
13480 *dynamic_check = -1;
13481 if (memset)
13482 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13483 else
13484 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13485 if (stringop_alg != no_stringop)
13486 return stringop_alg;
13487 /* rep; movq or rep; movl is the smallest variant. */
13488 else if (optimize_size)
13489 {
13490 if (!count || (count & 3))
13491 return rep_prefix_1_byte;
13492 else
13493 return rep_prefix_4_byte;
13494 }
13495 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13496 */
13497 else if (expected_size != -1 && expected_size < 4)
13498 return loop_1_byte;
13499 else if (expected_size != -1)
13500 {
13501 unsigned int i;
13502 enum stringop_alg alg = libcall;
13503 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13504 {
13505 gcc_assert (algs->size[i].max);
13506 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13507 {
13508 if (algs->size[i].alg != libcall)
13509 alg = algs->size[i].alg;
13510 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13511 last non-libcall inline algorithm. */
13512 if (TARGET_INLINE_ALL_STRINGOPS)
13513 {
13514 /* When the current size is best to be copied by a libcall,
13515 but we are still forced to inline, run the heuristic bellow
13516 that will pick code for medium sized blocks. */
13517 if (alg != libcall)
13518 return alg;
13519 break;
13520 }
13521 else
13522 return algs->size[i].alg;
13523 }
13524 }
13525 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13526 }
13527 /* When asked to inline the call anyway, try to pick meaningful choice.
13528 We look for maximal size of block that is faster to copy by hand and
13529 take blocks of at most of that size guessing that average size will
13530 be roughly half of the block.
13531
13532 If this turns out to be bad, we might simply specify the preferred
13533 choice in ix86_costs. */
13534 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13535 && algs->unknown_size == libcall)
13536 {
13537 int max = -1;
13538 enum stringop_alg alg;
13539 int i;
13540
13541 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13542 if (algs->size[i].alg != libcall && algs->size[i].alg)
13543 max = algs->size[i].max;
13544 if (max == -1)
13545 max = 4096;
13546 alg = decide_alg (count, max / 2, memset, dynamic_check);
13547 gcc_assert (*dynamic_check == -1);
13548 gcc_assert (alg != libcall);
13549 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13550 *dynamic_check = max;
13551 return alg;
13552 }
13553 return algs->unknown_size;
13554 }
13555
13556 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13557 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13558 static int
13559 decide_alignment (int align,
13560 enum stringop_alg alg,
13561 int expected_size)
13562 {
13563 int desired_align = 0;
13564 switch (alg)
13565 {
13566 case no_stringop:
13567 gcc_unreachable ();
13568 case loop:
13569 case unrolled_loop:
13570 desired_align = GET_MODE_SIZE (Pmode);
13571 break;
13572 case rep_prefix_8_byte:
13573 desired_align = 8;
13574 break;
13575 case rep_prefix_4_byte:
13576 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13577 copying whole cacheline at once. */
13578 if (TARGET_PENTIUMPRO)
13579 desired_align = 8;
13580 else
13581 desired_align = 4;
13582 break;
13583 case rep_prefix_1_byte:
13584 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13585 copying whole cacheline at once. */
13586 if (TARGET_PENTIUMPRO)
13587 desired_align = 8;
13588 else
13589 desired_align = 1;
13590 break;
13591 case loop_1_byte:
13592 desired_align = 1;
13593 break;
13594 case libcall:
13595 return 0;
13596 }
13597
13598 if (optimize_size)
13599 desired_align = 1;
13600 if (desired_align < align)
13601 desired_align = align;
13602 if (expected_size != -1 && expected_size < 4)
13603 desired_align = align;
13604 return desired_align;
13605 }
13606
13607 /* Expand string move (memcpy) operation. Use i386 string operations when
13608 profitable. expand_clrmem contains similar code. */
13609 int
13610 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
13611 rtx expected_align_exp, rtx expected_size_exp)
13612 {
13613 rtx destreg;
13614 rtx srcreg;
13615 rtx label = NULL;
13616 rtx tmp;
13617 rtx jump_around_label = NULL;
13618 HOST_WIDE_INT align = 1;
13619 unsigned HOST_WIDE_INT count = 0;
13620 HOST_WIDE_INT expected_size = -1;
13621 int size_needed = 0;
13622 int desired_align = 0;
13623 enum stringop_alg alg;
13624 int dynamic_check;
13625
13626 if (GET_CODE (align_exp) == CONST_INT)
13627 align = INTVAL (align_exp);
13628 /* i386 can do misaligned access on reasonably increased cost. */
13629 if (GET_CODE (expected_align_exp) == CONST_INT
13630 && INTVAL (expected_align_exp) > align)
13631 align = INTVAL (expected_align_exp);
13632 if (GET_CODE (count_exp) == CONST_INT)
13633 count = expected_size = INTVAL (count_exp);
13634 if (GET_CODE (expected_size_exp) == CONST_INT && count == 0)
13635 {
13636 expected_size = INTVAL (expected_size_exp);
13637 }
13638
13639 alg = decide_alg (count, expected_size, false, &dynamic_check);
13640 desired_align = decide_alignment (align, alg, expected_size);
13641
13642 if (!TARGET_ALIGN_STRINGOPS)
13643 align = desired_align;
13644
13645 if (alg == libcall)
13646 return 0;
13647 gcc_assert (alg != no_stringop);
13648 if (!count)
13649 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
13650 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13651 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
13652 switch (alg)
13653 {
13654 case libcall:
13655 case no_stringop:
13656 gcc_unreachable ();
13657 case loop:
13658 size_needed = GET_MODE_SIZE (Pmode);
13659 break;
13660 case unrolled_loop:
13661 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
13662 break;
13663 case rep_prefix_8_byte:
13664 size_needed = 8;
13665 break;
13666 case rep_prefix_4_byte:
13667 size_needed = 4;
13668 break;
13669 case rep_prefix_1_byte:
13670 case loop_1_byte:
13671 size_needed = 1;
13672 break;
13673 }
13674
13675 /* Alignment code needs count to be in register. */
13676 if (GET_CODE (count_exp) == CONST_INT && desired_align > align)
13677 {
13678 enum machine_mode mode = SImode;
13679 if (TARGET_64BIT && (count & ~0xffffffff))
13680 mode = DImode;
13681 count_exp = force_reg (mode, count_exp);
13682 }
13683 gcc_assert (desired_align >= 1 && align >= 1);
13684 /* Ensure that alignment prologue won't copy past end of block. */
13685 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13686 && !count)
13687 {
13688 int size = MAX (size_needed - 1, desired_align - align);
13689
13690 label = gen_label_rtx ();
13691 emit_cmp_and_jump_insns (count_exp,
13692 GEN_INT (size),
13693 LEU, 0, GET_MODE (count_exp), 1, label);
13694 if (expected_size == -1 || expected_size < size)
13695 predict_jump (REG_BR_PROB_BASE * 60 / 100);
13696 else
13697 predict_jump (REG_BR_PROB_BASE * 20 / 100);
13698 }
13699 /* Emit code to decide on runtime whether library call or inline should be
13700 used. */
13701 if (dynamic_check != -1)
13702 {
13703 rtx hot_label = gen_label_rtx ();
13704 jump_around_label = gen_label_rtx ();
13705 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
13706 LEU, 0, GET_MODE (count_exp), 1, hot_label);
13707 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13708 emit_block_move_via_libcall (dst, src, count_exp, false);
13709 emit_jump (jump_around_label);
13710 emit_label (hot_label);
13711 }
13712
13713
13714 /* Alignment prologue. */
13715 if (desired_align > align)
13716 {
13717 /* Except for the first move in epilogue, we no longer know
13718 constant offset in aliasing info. It don't seems to worth
13719 the pain to maintain it for the first move, so throw away
13720 the info early. */
13721 src = change_address (src, BLKmode, srcreg);
13722 dst = change_address (dst, BLKmode, destreg);
13723 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
13724 desired_align);
13725 }
13726 if (label && size_needed == 1)
13727 {
13728 emit_label (label);
13729 LABEL_NUSES (label) = 1;
13730 label = NULL;
13731 }
13732
13733 /* Main body. */
13734 switch (alg)
13735 {
13736 case libcall:
13737 case no_stringop:
13738 gcc_unreachable ();
13739 case loop_1_byte:
13740 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13741 count_exp, QImode, 1, expected_size);
13742 break;
13743 case loop:
13744 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13745 count_exp, Pmode, 1, expected_size);
13746 break;
13747 case unrolled_loop:
13748 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
13749 registers for 4 temporaries anyway. */
13750 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
13751 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
13752 expected_size);
13753 break;
13754 case rep_prefix_8_byte:
13755 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13756 DImode);
13757 break;
13758 case rep_prefix_4_byte:
13759 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13760 SImode);
13761 break;
13762 case rep_prefix_1_byte:
13763 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
13764 QImode);
13765 break;
13766 }
13767 /* Adjust properly the offset of src and dest memory for aliasing. */
13768 if (GET_CODE (count_exp) == CONST_INT)
13769 {
13770 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
13771 (count / size_needed) * size_needed);
13772 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
13773 (count / size_needed) * size_needed);
13774 }
13775 else
13776 {
13777 src = change_address (src, BLKmode, srcreg);
13778 dst = change_address (dst, BLKmode, destreg);
13779 }
13780
13781 /* Epilogue to copy the remaining bytes. */
13782 if (label)
13783 {
13784 if (size_needed < desired_align - align)
13785 {
13786 tmp =
13787 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
13788 GEN_INT (size_needed - 1), count_exp, 1,
13789 OPTAB_DIRECT);
13790 size_needed = desired_align - align + 1;
13791 if (tmp != count_exp)
13792 emit_move_insn (count_exp, tmp);
13793 }
13794 emit_label (label);
13795 LABEL_NUSES (label) = 1;
13796 }
13797 if (count_exp != const0_rtx && size_needed > 1)
13798 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
13799 size_needed);
13800 if (jump_around_label)
13801 emit_label (jump_around_label);
13802 return 1;
13803 }
13804
13805 /* Helper function for memcpy. For QImode value 0xXY produce
13806 0xXYXYXYXY of wide specified by MODE. This is essentially
13807 a * 0x10101010, but we can do slightly better than
13808 synth_mult by unwinding the sequence by hand on CPUs with
13809 slow multiply. */
13810 static rtx
13811 promote_duplicated_reg (enum machine_mode mode, rtx val)
13812 {
13813 enum machine_mode valmode = GET_MODE (val);
13814 rtx tmp;
13815 int nops = mode == DImode ? 3 : 2;
13816
13817 gcc_assert (mode == SImode || mode == DImode);
13818 if (val == const0_rtx)
13819 return copy_to_mode_reg (mode, const0_rtx);
13820 if (GET_CODE (val) == CONST_INT)
13821 {
13822 HOST_WIDE_INT v = INTVAL (val) & 255;
13823
13824 v |= v << 8;
13825 v |= v << 16;
13826 if (mode == DImode)
13827 v |= (v << 16) << 16;
13828 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
13829 }
13830
13831 if (valmode == VOIDmode)
13832 valmode = QImode;
13833 if (valmode != QImode)
13834 val = gen_lowpart (QImode, val);
13835 if (mode == QImode)
13836 return val;
13837 if (!TARGET_PARTIAL_REG_STALL)
13838 nops--;
13839 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
13840 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
13841 <= (ix86_cost->shift_const + ix86_cost->add) * nops
13842 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
13843 {
13844 rtx reg = convert_modes (mode, QImode, val, true);
13845 tmp = promote_duplicated_reg (mode, const1_rtx);
13846 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
13847 OPTAB_DIRECT);
13848 }
13849 else
13850 {
13851 rtx reg = convert_modes (mode, QImode, val, true);
13852
13853 if (!TARGET_PARTIAL_REG_STALL)
13854 if (mode == SImode)
13855 emit_insn (gen_movsi_insv_1 (reg, reg));
13856 else
13857 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
13858 else
13859 {
13860 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
13861 NULL, 1, OPTAB_DIRECT);
13862 reg =
13863 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13864 }
13865 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
13866 NULL, 1, OPTAB_DIRECT);
13867 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13868 if (mode == SImode)
13869 return reg;
13870 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
13871 NULL, 1, OPTAB_DIRECT);
13872 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
13873 return reg;
13874 }
13875 }
13876
13877 /* Expand string clear operation (bzero). Use i386 string operations when
13878 profitable. expand_movmem contains similar code. */
13879 int
13880 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
13881 rtx expected_align_exp, rtx expected_size_exp)
13882 {
13883 rtx destreg;
13884 rtx label = NULL;
13885 rtx tmp;
13886 rtx jump_around_label = NULL;
13887 HOST_WIDE_INT align = 1;
13888 unsigned HOST_WIDE_INT count = 0;
13889 HOST_WIDE_INT expected_size = -1;
13890 int size_needed = 0;
13891 int desired_align = 0;
13892 enum stringop_alg alg;
13893 rtx promoted_val = val_exp;
13894 bool force_loopy_epilogue = false;
13895 int dynamic_check;
13896
13897 if (GET_CODE (align_exp) == CONST_INT)
13898 align = INTVAL (align_exp);
13899 /* i386 can do misaligned access on reasonably increased cost. */
13900 if (GET_CODE (expected_align_exp) == CONST_INT
13901 && INTVAL (expected_align_exp) > align)
13902 align = INTVAL (expected_align_exp);
13903 if (GET_CODE (count_exp) == CONST_INT)
13904 count = expected_size = INTVAL (count_exp);
13905 if (GET_CODE (expected_size_exp) == CONST_INT && count == 0)
13906 expected_size = INTVAL (expected_size_exp);
13907
13908 alg = decide_alg (count, expected_size, true, &dynamic_check);
13909 desired_align = decide_alignment (align, alg, expected_size);
13910
13911 if (!TARGET_ALIGN_STRINGOPS)
13912 align = desired_align;
13913
13914 if (alg == libcall)
13915 return 0;
13916 gcc_assert (alg != no_stringop);
13917 if (!count)
13918 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
13919 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
13920 switch (alg)
13921 {
13922 case libcall:
13923 case no_stringop:
13924 gcc_unreachable ();
13925 case loop:
13926 size_needed = GET_MODE_SIZE (Pmode);
13927 break;
13928 case unrolled_loop:
13929 size_needed = GET_MODE_SIZE (Pmode) * 4;
13930 break;
13931 case rep_prefix_8_byte:
13932 size_needed = 8;
13933 break;
13934 case rep_prefix_4_byte:
13935 size_needed = 4;
13936 break;
13937 case rep_prefix_1_byte:
13938 case loop_1_byte:
13939 size_needed = 1;
13940 break;
13941 }
13942 /* Alignment code needs count to be in register. */
13943 if (GET_CODE (count_exp) == CONST_INT && desired_align > align)
13944 {
13945 enum machine_mode mode = SImode;
13946 if (TARGET_64BIT && (count & ~0xffffffff))
13947 mode = DImode;
13948 count_exp = force_reg (mode, count_exp);
13949 }
13950 /* Ensure that alignment prologue won't copy past end of block. */
13951 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13952 && !count)
13953 {
13954 int size = MAX (size_needed - 1, desired_align - align);
13955 /* To improve performance of small blocks, we jump around the promoting
13956 code, so we need to use QImode accesses in epilogue. */
13957 if (GET_CODE (val_exp) != CONST_INT && size_needed > 1)
13958 force_loopy_epilogue = true;
13959 label = gen_label_rtx ();
13960 emit_cmp_and_jump_insns (count_exp,
13961 GEN_INT (size),
13962 LEU, 0, GET_MODE (count_exp), 1, label);
13963 if (expected_size == -1 || expected_size <= size)
13964 predict_jump (REG_BR_PROB_BASE * 60 / 100);
13965 else
13966 predict_jump (REG_BR_PROB_BASE * 20 / 100);
13967 }
13968 if (dynamic_check != -1)
13969 {
13970 rtx hot_label = gen_label_rtx ();
13971 jump_around_label = gen_label_rtx ();
13972 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
13973 LEU, 0, GET_MODE (count_exp), 1, hot_label);
13974 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13975 set_storage_via_libcall (dst, count_exp, val_exp, false);
13976 emit_jump (jump_around_label);
13977 emit_label (hot_label);
13978 }
13979 if (TARGET_64BIT
13980 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
13981 promoted_val = promote_duplicated_reg (DImode, val_exp);
13982 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
13983 promoted_val = promote_duplicated_reg (SImode, val_exp);
13984 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
13985 promoted_val = promote_duplicated_reg (HImode, val_exp);
13986 else
13987 promoted_val = val_exp;
13988 gcc_assert (desired_align >= 1 && align >= 1);
13989 if ((size_needed > 1 || (desired_align > 1 && desired_align > align))
13990 && !count && !label)
13991 {
13992 int size = MAX (size_needed - 1, desired_align - align);
13993
13994 label = gen_label_rtx ();
13995 emit_cmp_and_jump_insns (count_exp,
13996 GEN_INT (size),
13997 LEU, 0, GET_MODE (count_exp), 1, label);
13998 if (expected_size == -1 || expected_size <= size)
13999 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14000 else
14001 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14002 }
14003 if (desired_align > align)
14004 {
14005 /* Except for the first move in epilogue, we no longer know
14006 constant offset in aliasing info. It don't seems to worth
14007 the pain to maintain it for the first move, so throw away
14008 the info early. */
14009 dst = change_address (dst, BLKmode, destreg);
14010 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14011 desired_align);
14012 }
14013 if (label && size_needed == 1)
14014 {
14015 emit_label (label);
14016 LABEL_NUSES (label) = 1;
14017 label = NULL;
14018 }
14019 switch (alg)
14020 {
14021 case libcall:
14022 case no_stringop:
14023 gcc_unreachable ();
14024 case loop_1_byte:
14025 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14026 count_exp, QImode, 1, expected_size);
14027 break;
14028 case loop:
14029 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14030 count_exp, Pmode, 1, expected_size);
14031 break;
14032 case unrolled_loop:
14033 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14034 count_exp, Pmode, 4, expected_size);
14035 break;
14036 case rep_prefix_8_byte:
14037 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14038 DImode);
14039 break;
14040 case rep_prefix_4_byte:
14041 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14042 SImode);
14043 break;
14044 case rep_prefix_1_byte:
14045 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14046 QImode);
14047 break;
14048 }
14049 /* Adjust properly the offset of src and dest memory for aliasing. */
14050 if (GET_CODE (count_exp) == CONST_INT)
14051 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14052 (count / size_needed) * size_needed);
14053 else
14054 dst = change_address (dst, BLKmode, destreg);
14055
14056 if (label)
14057 {
14058 if (size_needed < desired_align - align)
14059 {
14060 tmp =
14061 expand_simple_binop (GET_MODE (count_exp), AND, count_exp,
14062 GEN_INT (size_needed - 1), count_exp, 1,
14063 OPTAB_DIRECT);
14064 size_needed = desired_align - align + 1;
14065 if (tmp != count_exp)
14066 emit_move_insn (count_exp, tmp);
14067 }
14068 emit_label (label);
14069 LABEL_NUSES (label) = 1;
14070 }
14071 if (count_exp != const0_rtx && size_needed > 1)
14072 {
14073 if (force_loopy_epilogue)
14074 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14075 size_needed);
14076 else
14077 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14078 size_needed);
14079 }
14080 if (jump_around_label)
14081 emit_label (jump_around_label);
14082 return 1;
14083 }
14084
14085 /* Expand strlen. */
14086 int
14087 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14088 {
14089 rtx addr, scratch1, scratch2, scratch3, scratch4;
14090
14091 /* The generic case of strlen expander is long. Avoid it's
14092 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14093
14094 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14095 && !TARGET_INLINE_ALL_STRINGOPS
14096 && !optimize_size
14097 && (GET_CODE (align) != CONST_INT || INTVAL (align) < 4))
14098 return 0;
14099
14100 addr = force_reg (Pmode, XEXP (src, 0));
14101 scratch1 = gen_reg_rtx (Pmode);
14102
14103 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14104 && !optimize_size)
14105 {
14106 /* Well it seems that some optimizer does not combine a call like
14107 foo(strlen(bar), strlen(bar));
14108 when the move and the subtraction is done here. It does calculate
14109 the length just once when these instructions are done inside of
14110 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14111 often used and I use one fewer register for the lifetime of
14112 output_strlen_unroll() this is better. */
14113
14114 emit_move_insn (out, addr);
14115
14116 ix86_expand_strlensi_unroll_1 (out, src, align);
14117
14118 /* strlensi_unroll_1 returns the address of the zero at the end of
14119 the string, like memchr(), so compute the length by subtracting
14120 the start address. */
14121 if (TARGET_64BIT)
14122 emit_insn (gen_subdi3 (out, out, addr));
14123 else
14124 emit_insn (gen_subsi3 (out, out, addr));
14125 }
14126 else
14127 {
14128 rtx unspec;
14129 scratch2 = gen_reg_rtx (Pmode);
14130 scratch3 = gen_reg_rtx (Pmode);
14131 scratch4 = force_reg (Pmode, constm1_rtx);
14132
14133 emit_move_insn (scratch3, addr);
14134 eoschar = force_reg (QImode, eoschar);
14135
14136 src = replace_equiv_address_nv (src, scratch3);
14137
14138 /* If .md starts supporting :P, this can be done in .md. */
14139 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14140 scratch4), UNSPEC_SCAS);
14141 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14142 if (TARGET_64BIT)
14143 {
14144 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14145 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14146 }
14147 else
14148 {
14149 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14150 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14151 }
14152 }
14153 return 1;
14154 }
14155
14156 /* Expand the appropriate insns for doing strlen if not just doing
14157 repnz; scasb
14158
14159 out = result, initialized with the start address
14160 align_rtx = alignment of the address.
14161 scratch = scratch register, initialized with the startaddress when
14162 not aligned, otherwise undefined
14163
14164 This is just the body. It needs the initializations mentioned above and
14165 some address computing at the end. These things are done in i386.md. */
14166
14167 static void
14168 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14169 {
14170 int align;
14171 rtx tmp;
14172 rtx align_2_label = NULL_RTX;
14173 rtx align_3_label = NULL_RTX;
14174 rtx align_4_label = gen_label_rtx ();
14175 rtx end_0_label = gen_label_rtx ();
14176 rtx mem;
14177 rtx tmpreg = gen_reg_rtx (SImode);
14178 rtx scratch = gen_reg_rtx (SImode);
14179 rtx cmp;
14180
14181 align = 0;
14182 if (GET_CODE (align_rtx) == CONST_INT)
14183 align = INTVAL (align_rtx);
14184
14185 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14186
14187 /* Is there a known alignment and is it less than 4? */
14188 if (align < 4)
14189 {
14190 rtx scratch1 = gen_reg_rtx (Pmode);
14191 emit_move_insn (scratch1, out);
14192 /* Is there a known alignment and is it not 2? */
14193 if (align != 2)
14194 {
14195 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14196 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14197
14198 /* Leave just the 3 lower bits. */
14199 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14200 NULL_RTX, 0, OPTAB_WIDEN);
14201
14202 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14203 Pmode, 1, align_4_label);
14204 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14205 Pmode, 1, align_2_label);
14206 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14207 Pmode, 1, align_3_label);
14208 }
14209 else
14210 {
14211 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14212 check if is aligned to 4 - byte. */
14213
14214 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14215 NULL_RTX, 0, OPTAB_WIDEN);
14216
14217 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14218 Pmode, 1, align_4_label);
14219 }
14220
14221 mem = change_address (src, QImode, out);
14222
14223 /* Now compare the bytes. */
14224
14225 /* Compare the first n unaligned byte on a byte per byte basis. */
14226 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14227 QImode, 1, end_0_label);
14228
14229 /* Increment the address. */
14230 if (TARGET_64BIT)
14231 emit_insn (gen_adddi3 (out, out, const1_rtx));
14232 else
14233 emit_insn (gen_addsi3 (out, out, const1_rtx));
14234
14235 /* Not needed with an alignment of 2 */
14236 if (align != 2)
14237 {
14238 emit_label (align_2_label);
14239
14240 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14241 end_0_label);
14242
14243 if (TARGET_64BIT)
14244 emit_insn (gen_adddi3 (out, out, const1_rtx));
14245 else
14246 emit_insn (gen_addsi3 (out, out, const1_rtx));
14247
14248 emit_label (align_3_label);
14249 }
14250
14251 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14252 end_0_label);
14253
14254 if (TARGET_64BIT)
14255 emit_insn (gen_adddi3 (out, out, const1_rtx));
14256 else
14257 emit_insn (gen_addsi3 (out, out, const1_rtx));
14258 }
14259
14260 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14261 align this loop. It gives only huge programs, but does not help to
14262 speed up. */
14263 emit_label (align_4_label);
14264
14265 mem = change_address (src, SImode, out);
14266 emit_move_insn (scratch, mem);
14267 if (TARGET_64BIT)
14268 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14269 else
14270 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14271
14272 /* This formula yields a nonzero result iff one of the bytes is zero.
14273 This saves three branches inside loop and many cycles. */
14274
14275 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14276 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14277 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14278 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14279 gen_int_mode (0x80808080, SImode)));
14280 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14281 align_4_label);
14282
14283 if (TARGET_CMOVE)
14284 {
14285 rtx reg = gen_reg_rtx (SImode);
14286 rtx reg2 = gen_reg_rtx (Pmode);
14287 emit_move_insn (reg, tmpreg);
14288 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14289
14290 /* If zero is not in the first two bytes, move two bytes forward. */
14291 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14292 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14293 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14294 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14295 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14296 reg,
14297 tmpreg)));
14298 /* Emit lea manually to avoid clobbering of flags. */
14299 emit_insn (gen_rtx_SET (SImode, reg2,
14300 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14301
14302 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14303 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14304 emit_insn (gen_rtx_SET (VOIDmode, out,
14305 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14306 reg2,
14307 out)));
14308
14309 }
14310 else
14311 {
14312 rtx end_2_label = gen_label_rtx ();
14313 /* Is zero in the first two bytes? */
14314
14315 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14316 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14317 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14318 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14319 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14320 pc_rtx);
14321 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14322 JUMP_LABEL (tmp) = end_2_label;
14323
14324 /* Not in the first two. Move two bytes forward. */
14325 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14326 if (TARGET_64BIT)
14327 emit_insn (gen_adddi3 (out, out, const2_rtx));
14328 else
14329 emit_insn (gen_addsi3 (out, out, const2_rtx));
14330
14331 emit_label (end_2_label);
14332
14333 }
14334
14335 /* Avoid branch in fixing the byte. */
14336 tmpreg = gen_lowpart (QImode, tmpreg);
14337 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14338 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14339 if (TARGET_64BIT)
14340 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14341 else
14342 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14343
14344 emit_label (end_0_label);
14345 }
14346
14347 void
14348 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14349 rtx callarg2 ATTRIBUTE_UNUSED,
14350 rtx pop, int sibcall)
14351 {
14352 rtx use = NULL, call;
14353
14354 if (pop == const0_rtx)
14355 pop = NULL;
14356 gcc_assert (!TARGET_64BIT || !pop);
14357
14358 if (TARGET_MACHO && !TARGET_64BIT)
14359 {
14360 #if TARGET_MACHO
14361 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14362 fnaddr = machopic_indirect_call_target (fnaddr);
14363 #endif
14364 }
14365 else
14366 {
14367 /* Static functions and indirect calls don't need the pic register. */
14368 if (! TARGET_64BIT && flag_pic
14369 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14370 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14371 use_reg (&use, pic_offset_table_rtx);
14372 }
14373
14374 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14375 {
14376 rtx al = gen_rtx_REG (QImode, 0);
14377 emit_move_insn (al, callarg2);
14378 use_reg (&use, al);
14379 }
14380
14381 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14382 {
14383 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14384 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14385 }
14386 if (sibcall && TARGET_64BIT
14387 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14388 {
14389 rtx addr;
14390 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14391 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14392 emit_move_insn (fnaddr, addr);
14393 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14394 }
14395
14396 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14397 if (retval)
14398 call = gen_rtx_SET (VOIDmode, retval, call);
14399 if (pop)
14400 {
14401 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14402 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14403 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14404 }
14405
14406 call = emit_call_insn (call);
14407 if (use)
14408 CALL_INSN_FUNCTION_USAGE (call) = use;
14409 }
14410
14411 \f
14412 /* Clear stack slot assignments remembered from previous functions.
14413 This is called from INIT_EXPANDERS once before RTL is emitted for each
14414 function. */
14415
14416 static struct machine_function *
14417 ix86_init_machine_status (void)
14418 {
14419 struct machine_function *f;
14420
14421 f = ggc_alloc_cleared (sizeof (struct machine_function));
14422 f->use_fast_prologue_epilogue_nregs = -1;
14423 f->tls_descriptor_call_expanded_p = 0;
14424
14425 return f;
14426 }
14427
14428 /* Return a MEM corresponding to a stack slot with mode MODE.
14429 Allocate a new slot if necessary.
14430
14431 The RTL for a function can have several slots available: N is
14432 which slot to use. */
14433
14434 rtx
14435 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14436 {
14437 struct stack_local_entry *s;
14438
14439 gcc_assert (n < MAX_386_STACK_LOCALS);
14440
14441 for (s = ix86_stack_locals; s; s = s->next)
14442 if (s->mode == mode && s->n == n)
14443 return copy_rtx (s->rtl);
14444
14445 s = (struct stack_local_entry *)
14446 ggc_alloc (sizeof (struct stack_local_entry));
14447 s->n = n;
14448 s->mode = mode;
14449 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14450
14451 s->next = ix86_stack_locals;
14452 ix86_stack_locals = s;
14453 return s->rtl;
14454 }
14455
14456 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14457
14458 static GTY(()) rtx ix86_tls_symbol;
14459 rtx
14460 ix86_tls_get_addr (void)
14461 {
14462
14463 if (!ix86_tls_symbol)
14464 {
14465 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14466 (TARGET_ANY_GNU_TLS
14467 && !TARGET_64BIT)
14468 ? "___tls_get_addr"
14469 : "__tls_get_addr");
14470 }
14471
14472 return ix86_tls_symbol;
14473 }
14474
14475 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14476
14477 static GTY(()) rtx ix86_tls_module_base_symbol;
14478 rtx
14479 ix86_tls_module_base (void)
14480 {
14481
14482 if (!ix86_tls_module_base_symbol)
14483 {
14484 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14485 "_TLS_MODULE_BASE_");
14486 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14487 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14488 }
14489
14490 return ix86_tls_module_base_symbol;
14491 }
14492 \f
14493 /* Calculate the length of the memory address in the instruction
14494 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14495
14496 int
14497 memory_address_length (rtx addr)
14498 {
14499 struct ix86_address parts;
14500 rtx base, index, disp;
14501 int len;
14502 int ok;
14503
14504 if (GET_CODE (addr) == PRE_DEC
14505 || GET_CODE (addr) == POST_INC
14506 || GET_CODE (addr) == PRE_MODIFY
14507 || GET_CODE (addr) == POST_MODIFY)
14508 return 0;
14509
14510 ok = ix86_decompose_address (addr, &parts);
14511 gcc_assert (ok);
14512
14513 if (parts.base && GET_CODE (parts.base) == SUBREG)
14514 parts.base = SUBREG_REG (parts.base);
14515 if (parts.index && GET_CODE (parts.index) == SUBREG)
14516 parts.index = SUBREG_REG (parts.index);
14517
14518 base = parts.base;
14519 index = parts.index;
14520 disp = parts.disp;
14521 len = 0;
14522
14523 /* Rule of thumb:
14524 - esp as the base always wants an index,
14525 - ebp as the base always wants a displacement. */
14526
14527 /* Register Indirect. */
14528 if (base && !index && !disp)
14529 {
14530 /* esp (for its index) and ebp (for its displacement) need
14531 the two-byte modrm form. */
14532 if (addr == stack_pointer_rtx
14533 || addr == arg_pointer_rtx
14534 || addr == frame_pointer_rtx
14535 || addr == hard_frame_pointer_rtx)
14536 len = 1;
14537 }
14538
14539 /* Direct Addressing. */
14540 else if (disp && !base && !index)
14541 len = 4;
14542
14543 else
14544 {
14545 /* Find the length of the displacement constant. */
14546 if (disp)
14547 {
14548 if (base && satisfies_constraint_K (disp))
14549 len = 1;
14550 else
14551 len = 4;
14552 }
14553 /* ebp always wants a displacement. */
14554 else if (base == hard_frame_pointer_rtx)
14555 len = 1;
14556
14557 /* An index requires the two-byte modrm form.... */
14558 if (index
14559 /* ...like esp, which always wants an index. */
14560 || base == stack_pointer_rtx
14561 || base == arg_pointer_rtx
14562 || base == frame_pointer_rtx)
14563 len += 1;
14564 }
14565
14566 return len;
14567 }
14568
14569 /* Compute default value for "length_immediate" attribute. When SHORTFORM
14570 is set, expect that insn have 8bit immediate alternative. */
14571 int
14572 ix86_attr_length_immediate_default (rtx insn, int shortform)
14573 {
14574 int len = 0;
14575 int i;
14576 extract_insn_cached (insn);
14577 for (i = recog_data.n_operands - 1; i >= 0; --i)
14578 if (CONSTANT_P (recog_data.operand[i]))
14579 {
14580 gcc_assert (!len);
14581 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
14582 len = 1;
14583 else
14584 {
14585 switch (get_attr_mode (insn))
14586 {
14587 case MODE_QI:
14588 len+=1;
14589 break;
14590 case MODE_HI:
14591 len+=2;
14592 break;
14593 case MODE_SI:
14594 len+=4;
14595 break;
14596 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
14597 case MODE_DI:
14598 len+=4;
14599 break;
14600 default:
14601 fatal_insn ("unknown insn mode", insn);
14602 }
14603 }
14604 }
14605 return len;
14606 }
14607 /* Compute default value for "length_address" attribute. */
14608 int
14609 ix86_attr_length_address_default (rtx insn)
14610 {
14611 int i;
14612
14613 if (get_attr_type (insn) == TYPE_LEA)
14614 {
14615 rtx set = PATTERN (insn);
14616
14617 if (GET_CODE (set) == PARALLEL)
14618 set = XVECEXP (set, 0, 0);
14619
14620 gcc_assert (GET_CODE (set) == SET);
14621
14622 return memory_address_length (SET_SRC (set));
14623 }
14624
14625 extract_insn_cached (insn);
14626 for (i = recog_data.n_operands - 1; i >= 0; --i)
14627 if (GET_CODE (recog_data.operand[i]) == MEM)
14628 {
14629 return memory_address_length (XEXP (recog_data.operand[i], 0));
14630 break;
14631 }
14632 return 0;
14633 }
14634 \f
14635 /* Return the maximum number of instructions a cpu can issue. */
14636
14637 static int
14638 ix86_issue_rate (void)
14639 {
14640 switch (ix86_tune)
14641 {
14642 case PROCESSOR_PENTIUM:
14643 case PROCESSOR_K6:
14644 return 2;
14645
14646 case PROCESSOR_PENTIUMPRO:
14647 case PROCESSOR_PENTIUM4:
14648 case PROCESSOR_ATHLON:
14649 case PROCESSOR_K8:
14650 case PROCESSOR_NOCONA:
14651 case PROCESSOR_GENERIC32:
14652 case PROCESSOR_GENERIC64:
14653 return 3;
14654
14655 case PROCESSOR_CORE2:
14656 return 4;
14657
14658 default:
14659 return 1;
14660 }
14661 }
14662
14663 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
14664 by DEP_INSN and nothing set by DEP_INSN. */
14665
14666 static int
14667 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14668 {
14669 rtx set, set2;
14670
14671 /* Simplify the test for uninteresting insns. */
14672 if (insn_type != TYPE_SETCC
14673 && insn_type != TYPE_ICMOV
14674 && insn_type != TYPE_FCMOV
14675 && insn_type != TYPE_IBR)
14676 return 0;
14677
14678 if ((set = single_set (dep_insn)) != 0)
14679 {
14680 set = SET_DEST (set);
14681 set2 = NULL_RTX;
14682 }
14683 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
14684 && XVECLEN (PATTERN (dep_insn), 0) == 2
14685 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
14686 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
14687 {
14688 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14689 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
14690 }
14691 else
14692 return 0;
14693
14694 if (GET_CODE (set) != REG || REGNO (set) != FLAGS_REG)
14695 return 0;
14696
14697 /* This test is true if the dependent insn reads the flags but
14698 not any other potentially set register. */
14699 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
14700 return 0;
14701
14702 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
14703 return 0;
14704
14705 return 1;
14706 }
14707
14708 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
14709 address with operands set by DEP_INSN. */
14710
14711 static int
14712 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
14713 {
14714 rtx addr;
14715
14716 if (insn_type == TYPE_LEA
14717 && TARGET_PENTIUM)
14718 {
14719 addr = PATTERN (insn);
14720
14721 if (GET_CODE (addr) == PARALLEL)
14722 addr = XVECEXP (addr, 0, 0);
14723
14724 gcc_assert (GET_CODE (addr) == SET);
14725
14726 addr = SET_SRC (addr);
14727 }
14728 else
14729 {
14730 int i;
14731 extract_insn_cached (insn);
14732 for (i = recog_data.n_operands - 1; i >= 0; --i)
14733 if (GET_CODE (recog_data.operand[i]) == MEM)
14734 {
14735 addr = XEXP (recog_data.operand[i], 0);
14736 goto found;
14737 }
14738 return 0;
14739 found:;
14740 }
14741
14742 return modified_in_p (addr, dep_insn);
14743 }
14744
14745 static int
14746 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
14747 {
14748 enum attr_type insn_type, dep_insn_type;
14749 enum attr_memory memory;
14750 rtx set, set2;
14751 int dep_insn_code_number;
14752
14753 /* Anti and output dependencies have zero cost on all CPUs. */
14754 if (REG_NOTE_KIND (link) != 0)
14755 return 0;
14756
14757 dep_insn_code_number = recog_memoized (dep_insn);
14758
14759 /* If we can't recognize the insns, we can't really do anything. */
14760 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
14761 return cost;
14762
14763 insn_type = get_attr_type (insn);
14764 dep_insn_type = get_attr_type (dep_insn);
14765
14766 switch (ix86_tune)
14767 {
14768 case PROCESSOR_PENTIUM:
14769 /* Address Generation Interlock adds a cycle of latency. */
14770 if (ix86_agi_dependent (insn, dep_insn, insn_type))
14771 cost += 1;
14772
14773 /* ??? Compares pair with jump/setcc. */
14774 if (ix86_flags_dependent (insn, dep_insn, insn_type))
14775 cost = 0;
14776
14777 /* Floating point stores require value to be ready one cycle earlier. */
14778 if (insn_type == TYPE_FMOV
14779 && get_attr_memory (insn) == MEMORY_STORE
14780 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14781 cost += 1;
14782 break;
14783
14784 case PROCESSOR_PENTIUMPRO:
14785 memory = get_attr_memory (insn);
14786
14787 /* INT->FP conversion is expensive. */
14788 if (get_attr_fp_int_src (dep_insn))
14789 cost += 5;
14790
14791 /* There is one cycle extra latency between an FP op and a store. */
14792 if (insn_type == TYPE_FMOV
14793 && (set = single_set (dep_insn)) != NULL_RTX
14794 && (set2 = single_set (insn)) != NULL_RTX
14795 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
14796 && GET_CODE (SET_DEST (set2)) == MEM)
14797 cost += 1;
14798
14799 /* Show ability of reorder buffer to hide latency of load by executing
14800 in parallel with previous instruction in case
14801 previous instruction is not needed to compute the address. */
14802 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14803 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14804 {
14805 /* Claim moves to take one cycle, as core can issue one load
14806 at time and the next load can start cycle later. */
14807 if (dep_insn_type == TYPE_IMOV
14808 || dep_insn_type == TYPE_FMOV)
14809 cost = 1;
14810 else if (cost > 1)
14811 cost--;
14812 }
14813 break;
14814
14815 case PROCESSOR_K6:
14816 memory = get_attr_memory (insn);
14817
14818 /* The esp dependency is resolved before the instruction is really
14819 finished. */
14820 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
14821 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
14822 return 1;
14823
14824 /* INT->FP conversion is expensive. */
14825 if (get_attr_fp_int_src (dep_insn))
14826 cost += 5;
14827
14828 /* Show ability of reorder buffer to hide latency of load by executing
14829 in parallel with previous instruction in case
14830 previous instruction is not needed to compute the address. */
14831 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14832 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14833 {
14834 /* Claim moves to take one cycle, as core can issue one load
14835 at time and the next load can start cycle later. */
14836 if (dep_insn_type == TYPE_IMOV
14837 || dep_insn_type == TYPE_FMOV)
14838 cost = 1;
14839 else if (cost > 2)
14840 cost -= 2;
14841 else
14842 cost = 1;
14843 }
14844 break;
14845
14846 case PROCESSOR_ATHLON:
14847 case PROCESSOR_K8:
14848 case PROCESSOR_GENERIC32:
14849 case PROCESSOR_GENERIC64:
14850 memory = get_attr_memory (insn);
14851
14852 /* Show ability of reorder buffer to hide latency of load by executing
14853 in parallel with previous instruction in case
14854 previous instruction is not needed to compute the address. */
14855 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
14856 && !ix86_agi_dependent (insn, dep_insn, insn_type))
14857 {
14858 enum attr_unit unit = get_attr_unit (insn);
14859 int loadcost = 3;
14860
14861 /* Because of the difference between the length of integer and
14862 floating unit pipeline preparation stages, the memory operands
14863 for floating point are cheaper.
14864
14865 ??? For Athlon it the difference is most probably 2. */
14866 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
14867 loadcost = 3;
14868 else
14869 loadcost = TARGET_ATHLON ? 2 : 0;
14870
14871 if (cost >= loadcost)
14872 cost -= loadcost;
14873 else
14874 cost = 0;
14875 }
14876
14877 default:
14878 break;
14879 }
14880
14881 return cost;
14882 }
14883
14884 /* How many alternative schedules to try. This should be as wide as the
14885 scheduling freedom in the DFA, but no wider. Making this value too
14886 large results extra work for the scheduler. */
14887
14888 static int
14889 ia32_multipass_dfa_lookahead (void)
14890 {
14891 if (ix86_tune == PROCESSOR_PENTIUM)
14892 return 2;
14893
14894 if (ix86_tune == PROCESSOR_PENTIUMPRO
14895 || ix86_tune == PROCESSOR_K6)
14896 return 1;
14897
14898 else
14899 return 0;
14900 }
14901
14902 \f
14903 /* Compute the alignment given to a constant that is being placed in memory.
14904 EXP is the constant and ALIGN is the alignment that the object would
14905 ordinarily have.
14906 The value of this function is used instead of that alignment to align
14907 the object. */
14908
14909 int
14910 ix86_constant_alignment (tree exp, int align)
14911 {
14912 if (TREE_CODE (exp) == REAL_CST)
14913 {
14914 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
14915 return 64;
14916 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
14917 return 128;
14918 }
14919 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
14920 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
14921 return BITS_PER_WORD;
14922
14923 return align;
14924 }
14925
14926 /* Compute the alignment for a static variable.
14927 TYPE is the data type, and ALIGN is the alignment that
14928 the object would ordinarily have. The value of this function is used
14929 instead of that alignment to align the object. */
14930
14931 int
14932 ix86_data_alignment (tree type, int align)
14933 {
14934 int max_align = optimize_size ? BITS_PER_WORD : 256;
14935
14936 if (AGGREGATE_TYPE_P (type)
14937 && TYPE_SIZE (type)
14938 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14939 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
14940 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
14941 && align < max_align)
14942 align = max_align;
14943
14944 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
14945 to 16byte boundary. */
14946 if (TARGET_64BIT)
14947 {
14948 if (AGGREGATE_TYPE_P (type)
14949 && TYPE_SIZE (type)
14950 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
14951 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
14952 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
14953 return 128;
14954 }
14955
14956 if (TREE_CODE (type) == ARRAY_TYPE)
14957 {
14958 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
14959 return 64;
14960 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
14961 return 128;
14962 }
14963 else if (TREE_CODE (type) == COMPLEX_TYPE)
14964 {
14965
14966 if (TYPE_MODE (type) == DCmode && align < 64)
14967 return 64;
14968 if (TYPE_MODE (type) == XCmode && align < 128)
14969 return 128;
14970 }
14971 else if ((TREE_CODE (type) == RECORD_TYPE
14972 || TREE_CODE (type) == UNION_TYPE
14973 || TREE_CODE (type) == QUAL_UNION_TYPE)
14974 && TYPE_FIELDS (type))
14975 {
14976 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
14977 return 64;
14978 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
14979 return 128;
14980 }
14981 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
14982 || TREE_CODE (type) == INTEGER_TYPE)
14983 {
14984 if (TYPE_MODE (type) == DFmode && align < 64)
14985 return 64;
14986 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
14987 return 128;
14988 }
14989
14990 return align;
14991 }
14992
14993 /* Compute the alignment for a local variable.
14994 TYPE is the data type, and ALIGN is the alignment that
14995 the object would ordinarily have. The value of this macro is used
14996 instead of that alignment to align the object. */
14997
14998 int
14999 ix86_local_alignment (tree type, int align)
15000 {
15001 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15002 to 16byte boundary. */
15003 if (TARGET_64BIT)
15004 {
15005 if (AGGREGATE_TYPE_P (type)
15006 && TYPE_SIZE (type)
15007 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15008 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15009 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15010 return 128;
15011 }
15012 if (TREE_CODE (type) == ARRAY_TYPE)
15013 {
15014 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15015 return 64;
15016 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15017 return 128;
15018 }
15019 else if (TREE_CODE (type) == COMPLEX_TYPE)
15020 {
15021 if (TYPE_MODE (type) == DCmode && align < 64)
15022 return 64;
15023 if (TYPE_MODE (type) == XCmode && align < 128)
15024 return 128;
15025 }
15026 else if ((TREE_CODE (type) == RECORD_TYPE
15027 || TREE_CODE (type) == UNION_TYPE
15028 || TREE_CODE (type) == QUAL_UNION_TYPE)
15029 && TYPE_FIELDS (type))
15030 {
15031 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15032 return 64;
15033 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15034 return 128;
15035 }
15036 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15037 || TREE_CODE (type) == INTEGER_TYPE)
15038 {
15039
15040 if (TYPE_MODE (type) == DFmode && align < 64)
15041 return 64;
15042 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15043 return 128;
15044 }
15045 return align;
15046 }
15047 \f
15048 /* Emit RTL insns to initialize the variable parts of a trampoline.
15049 FNADDR is an RTX for the address of the function's pure code.
15050 CXT is an RTX for the static chain value for the function. */
15051 void
15052 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15053 {
15054 if (!TARGET_64BIT)
15055 {
15056 /* Compute offset from the end of the jmp to the target function. */
15057 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15058 plus_constant (tramp, 10),
15059 NULL_RTX, 1, OPTAB_DIRECT);
15060 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15061 gen_int_mode (0xb9, QImode));
15062 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15063 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15064 gen_int_mode (0xe9, QImode));
15065 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15066 }
15067 else
15068 {
15069 int offset = 0;
15070 /* Try to load address using shorter movl instead of movabs.
15071 We may want to support movq for kernel mode, but kernel does not use
15072 trampolines at the moment. */
15073 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15074 {
15075 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15076 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15077 gen_int_mode (0xbb41, HImode));
15078 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15079 gen_lowpart (SImode, fnaddr));
15080 offset += 6;
15081 }
15082 else
15083 {
15084 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15085 gen_int_mode (0xbb49, HImode));
15086 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15087 fnaddr);
15088 offset += 10;
15089 }
15090 /* Load static chain using movabs to r10. */
15091 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15092 gen_int_mode (0xba49, HImode));
15093 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15094 cxt);
15095 offset += 10;
15096 /* Jump to the r11 */
15097 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15098 gen_int_mode (0xff49, HImode));
15099 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15100 gen_int_mode (0xe3, QImode));
15101 offset += 3;
15102 gcc_assert (offset <= TRAMPOLINE_SIZE);
15103 }
15104
15105 #ifdef ENABLE_EXECUTE_STACK
15106 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15107 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15108 #endif
15109 }
15110 \f
15111 /* Codes for all the SSE/MMX builtins. */
15112 enum ix86_builtins
15113 {
15114 IX86_BUILTIN_ADDPS,
15115 IX86_BUILTIN_ADDSS,
15116 IX86_BUILTIN_DIVPS,
15117 IX86_BUILTIN_DIVSS,
15118 IX86_BUILTIN_MULPS,
15119 IX86_BUILTIN_MULSS,
15120 IX86_BUILTIN_SUBPS,
15121 IX86_BUILTIN_SUBSS,
15122
15123 IX86_BUILTIN_CMPEQPS,
15124 IX86_BUILTIN_CMPLTPS,
15125 IX86_BUILTIN_CMPLEPS,
15126 IX86_BUILTIN_CMPGTPS,
15127 IX86_BUILTIN_CMPGEPS,
15128 IX86_BUILTIN_CMPNEQPS,
15129 IX86_BUILTIN_CMPNLTPS,
15130 IX86_BUILTIN_CMPNLEPS,
15131 IX86_BUILTIN_CMPNGTPS,
15132 IX86_BUILTIN_CMPNGEPS,
15133 IX86_BUILTIN_CMPORDPS,
15134 IX86_BUILTIN_CMPUNORDPS,
15135 IX86_BUILTIN_CMPEQSS,
15136 IX86_BUILTIN_CMPLTSS,
15137 IX86_BUILTIN_CMPLESS,
15138 IX86_BUILTIN_CMPNEQSS,
15139 IX86_BUILTIN_CMPNLTSS,
15140 IX86_BUILTIN_CMPNLESS,
15141 IX86_BUILTIN_CMPNGTSS,
15142 IX86_BUILTIN_CMPNGESS,
15143 IX86_BUILTIN_CMPORDSS,
15144 IX86_BUILTIN_CMPUNORDSS,
15145
15146 IX86_BUILTIN_COMIEQSS,
15147 IX86_BUILTIN_COMILTSS,
15148 IX86_BUILTIN_COMILESS,
15149 IX86_BUILTIN_COMIGTSS,
15150 IX86_BUILTIN_COMIGESS,
15151 IX86_BUILTIN_COMINEQSS,
15152 IX86_BUILTIN_UCOMIEQSS,
15153 IX86_BUILTIN_UCOMILTSS,
15154 IX86_BUILTIN_UCOMILESS,
15155 IX86_BUILTIN_UCOMIGTSS,
15156 IX86_BUILTIN_UCOMIGESS,
15157 IX86_BUILTIN_UCOMINEQSS,
15158
15159 IX86_BUILTIN_CVTPI2PS,
15160 IX86_BUILTIN_CVTPS2PI,
15161 IX86_BUILTIN_CVTSI2SS,
15162 IX86_BUILTIN_CVTSI642SS,
15163 IX86_BUILTIN_CVTSS2SI,
15164 IX86_BUILTIN_CVTSS2SI64,
15165 IX86_BUILTIN_CVTTPS2PI,
15166 IX86_BUILTIN_CVTTSS2SI,
15167 IX86_BUILTIN_CVTTSS2SI64,
15168
15169 IX86_BUILTIN_MAXPS,
15170 IX86_BUILTIN_MAXSS,
15171 IX86_BUILTIN_MINPS,
15172 IX86_BUILTIN_MINSS,
15173
15174 IX86_BUILTIN_LOADUPS,
15175 IX86_BUILTIN_STOREUPS,
15176 IX86_BUILTIN_MOVSS,
15177
15178 IX86_BUILTIN_MOVHLPS,
15179 IX86_BUILTIN_MOVLHPS,
15180 IX86_BUILTIN_LOADHPS,
15181 IX86_BUILTIN_LOADLPS,
15182 IX86_BUILTIN_STOREHPS,
15183 IX86_BUILTIN_STORELPS,
15184
15185 IX86_BUILTIN_MASKMOVQ,
15186 IX86_BUILTIN_MOVMSKPS,
15187 IX86_BUILTIN_PMOVMSKB,
15188
15189 IX86_BUILTIN_MOVNTPS,
15190 IX86_BUILTIN_MOVNTQ,
15191
15192 IX86_BUILTIN_LOADDQU,
15193 IX86_BUILTIN_STOREDQU,
15194
15195 IX86_BUILTIN_PACKSSWB,
15196 IX86_BUILTIN_PACKSSDW,
15197 IX86_BUILTIN_PACKUSWB,
15198
15199 IX86_BUILTIN_PADDB,
15200 IX86_BUILTIN_PADDW,
15201 IX86_BUILTIN_PADDD,
15202 IX86_BUILTIN_PADDQ,
15203 IX86_BUILTIN_PADDSB,
15204 IX86_BUILTIN_PADDSW,
15205 IX86_BUILTIN_PADDUSB,
15206 IX86_BUILTIN_PADDUSW,
15207 IX86_BUILTIN_PSUBB,
15208 IX86_BUILTIN_PSUBW,
15209 IX86_BUILTIN_PSUBD,
15210 IX86_BUILTIN_PSUBQ,
15211 IX86_BUILTIN_PSUBSB,
15212 IX86_BUILTIN_PSUBSW,
15213 IX86_BUILTIN_PSUBUSB,
15214 IX86_BUILTIN_PSUBUSW,
15215
15216 IX86_BUILTIN_PAND,
15217 IX86_BUILTIN_PANDN,
15218 IX86_BUILTIN_POR,
15219 IX86_BUILTIN_PXOR,
15220
15221 IX86_BUILTIN_PAVGB,
15222 IX86_BUILTIN_PAVGW,
15223
15224 IX86_BUILTIN_PCMPEQB,
15225 IX86_BUILTIN_PCMPEQW,
15226 IX86_BUILTIN_PCMPEQD,
15227 IX86_BUILTIN_PCMPGTB,
15228 IX86_BUILTIN_PCMPGTW,
15229 IX86_BUILTIN_PCMPGTD,
15230
15231 IX86_BUILTIN_PMADDWD,
15232
15233 IX86_BUILTIN_PMAXSW,
15234 IX86_BUILTIN_PMAXUB,
15235 IX86_BUILTIN_PMINSW,
15236 IX86_BUILTIN_PMINUB,
15237
15238 IX86_BUILTIN_PMULHUW,
15239 IX86_BUILTIN_PMULHW,
15240 IX86_BUILTIN_PMULLW,
15241
15242 IX86_BUILTIN_PSADBW,
15243 IX86_BUILTIN_PSHUFW,
15244
15245 IX86_BUILTIN_PSLLW,
15246 IX86_BUILTIN_PSLLD,
15247 IX86_BUILTIN_PSLLQ,
15248 IX86_BUILTIN_PSRAW,
15249 IX86_BUILTIN_PSRAD,
15250 IX86_BUILTIN_PSRLW,
15251 IX86_BUILTIN_PSRLD,
15252 IX86_BUILTIN_PSRLQ,
15253 IX86_BUILTIN_PSLLWI,
15254 IX86_BUILTIN_PSLLDI,
15255 IX86_BUILTIN_PSLLQI,
15256 IX86_BUILTIN_PSRAWI,
15257 IX86_BUILTIN_PSRADI,
15258 IX86_BUILTIN_PSRLWI,
15259 IX86_BUILTIN_PSRLDI,
15260 IX86_BUILTIN_PSRLQI,
15261
15262 IX86_BUILTIN_PUNPCKHBW,
15263 IX86_BUILTIN_PUNPCKHWD,
15264 IX86_BUILTIN_PUNPCKHDQ,
15265 IX86_BUILTIN_PUNPCKLBW,
15266 IX86_BUILTIN_PUNPCKLWD,
15267 IX86_BUILTIN_PUNPCKLDQ,
15268
15269 IX86_BUILTIN_SHUFPS,
15270
15271 IX86_BUILTIN_RCPPS,
15272 IX86_BUILTIN_RCPSS,
15273 IX86_BUILTIN_RSQRTPS,
15274 IX86_BUILTIN_RSQRTSS,
15275 IX86_BUILTIN_SQRTPS,
15276 IX86_BUILTIN_SQRTSS,
15277
15278 IX86_BUILTIN_UNPCKHPS,
15279 IX86_BUILTIN_UNPCKLPS,
15280
15281 IX86_BUILTIN_ANDPS,
15282 IX86_BUILTIN_ANDNPS,
15283 IX86_BUILTIN_ORPS,
15284 IX86_BUILTIN_XORPS,
15285
15286 IX86_BUILTIN_EMMS,
15287 IX86_BUILTIN_LDMXCSR,
15288 IX86_BUILTIN_STMXCSR,
15289 IX86_BUILTIN_SFENCE,
15290
15291 /* 3DNow! Original */
15292 IX86_BUILTIN_FEMMS,
15293 IX86_BUILTIN_PAVGUSB,
15294 IX86_BUILTIN_PF2ID,
15295 IX86_BUILTIN_PFACC,
15296 IX86_BUILTIN_PFADD,
15297 IX86_BUILTIN_PFCMPEQ,
15298 IX86_BUILTIN_PFCMPGE,
15299 IX86_BUILTIN_PFCMPGT,
15300 IX86_BUILTIN_PFMAX,
15301 IX86_BUILTIN_PFMIN,
15302 IX86_BUILTIN_PFMUL,
15303 IX86_BUILTIN_PFRCP,
15304 IX86_BUILTIN_PFRCPIT1,
15305 IX86_BUILTIN_PFRCPIT2,
15306 IX86_BUILTIN_PFRSQIT1,
15307 IX86_BUILTIN_PFRSQRT,
15308 IX86_BUILTIN_PFSUB,
15309 IX86_BUILTIN_PFSUBR,
15310 IX86_BUILTIN_PI2FD,
15311 IX86_BUILTIN_PMULHRW,
15312
15313 /* 3DNow! Athlon Extensions */
15314 IX86_BUILTIN_PF2IW,
15315 IX86_BUILTIN_PFNACC,
15316 IX86_BUILTIN_PFPNACC,
15317 IX86_BUILTIN_PI2FW,
15318 IX86_BUILTIN_PSWAPDSI,
15319 IX86_BUILTIN_PSWAPDSF,
15320
15321 /* SSE2 */
15322 IX86_BUILTIN_ADDPD,
15323 IX86_BUILTIN_ADDSD,
15324 IX86_BUILTIN_DIVPD,
15325 IX86_BUILTIN_DIVSD,
15326 IX86_BUILTIN_MULPD,
15327 IX86_BUILTIN_MULSD,
15328 IX86_BUILTIN_SUBPD,
15329 IX86_BUILTIN_SUBSD,
15330
15331 IX86_BUILTIN_CMPEQPD,
15332 IX86_BUILTIN_CMPLTPD,
15333 IX86_BUILTIN_CMPLEPD,
15334 IX86_BUILTIN_CMPGTPD,
15335 IX86_BUILTIN_CMPGEPD,
15336 IX86_BUILTIN_CMPNEQPD,
15337 IX86_BUILTIN_CMPNLTPD,
15338 IX86_BUILTIN_CMPNLEPD,
15339 IX86_BUILTIN_CMPNGTPD,
15340 IX86_BUILTIN_CMPNGEPD,
15341 IX86_BUILTIN_CMPORDPD,
15342 IX86_BUILTIN_CMPUNORDPD,
15343 IX86_BUILTIN_CMPNEPD,
15344 IX86_BUILTIN_CMPEQSD,
15345 IX86_BUILTIN_CMPLTSD,
15346 IX86_BUILTIN_CMPLESD,
15347 IX86_BUILTIN_CMPNEQSD,
15348 IX86_BUILTIN_CMPNLTSD,
15349 IX86_BUILTIN_CMPNLESD,
15350 IX86_BUILTIN_CMPORDSD,
15351 IX86_BUILTIN_CMPUNORDSD,
15352 IX86_BUILTIN_CMPNESD,
15353
15354 IX86_BUILTIN_COMIEQSD,
15355 IX86_BUILTIN_COMILTSD,
15356 IX86_BUILTIN_COMILESD,
15357 IX86_BUILTIN_COMIGTSD,
15358 IX86_BUILTIN_COMIGESD,
15359 IX86_BUILTIN_COMINEQSD,
15360 IX86_BUILTIN_UCOMIEQSD,
15361 IX86_BUILTIN_UCOMILTSD,
15362 IX86_BUILTIN_UCOMILESD,
15363 IX86_BUILTIN_UCOMIGTSD,
15364 IX86_BUILTIN_UCOMIGESD,
15365 IX86_BUILTIN_UCOMINEQSD,
15366
15367 IX86_BUILTIN_MAXPD,
15368 IX86_BUILTIN_MAXSD,
15369 IX86_BUILTIN_MINPD,
15370 IX86_BUILTIN_MINSD,
15371
15372 IX86_BUILTIN_ANDPD,
15373 IX86_BUILTIN_ANDNPD,
15374 IX86_BUILTIN_ORPD,
15375 IX86_BUILTIN_XORPD,
15376
15377 IX86_BUILTIN_SQRTPD,
15378 IX86_BUILTIN_SQRTSD,
15379
15380 IX86_BUILTIN_UNPCKHPD,
15381 IX86_BUILTIN_UNPCKLPD,
15382
15383 IX86_BUILTIN_SHUFPD,
15384
15385 IX86_BUILTIN_LOADUPD,
15386 IX86_BUILTIN_STOREUPD,
15387 IX86_BUILTIN_MOVSD,
15388
15389 IX86_BUILTIN_LOADHPD,
15390 IX86_BUILTIN_LOADLPD,
15391
15392 IX86_BUILTIN_CVTDQ2PD,
15393 IX86_BUILTIN_CVTDQ2PS,
15394
15395 IX86_BUILTIN_CVTPD2DQ,
15396 IX86_BUILTIN_CVTPD2PI,
15397 IX86_BUILTIN_CVTPD2PS,
15398 IX86_BUILTIN_CVTTPD2DQ,
15399 IX86_BUILTIN_CVTTPD2PI,
15400
15401 IX86_BUILTIN_CVTPI2PD,
15402 IX86_BUILTIN_CVTSI2SD,
15403 IX86_BUILTIN_CVTSI642SD,
15404
15405 IX86_BUILTIN_CVTSD2SI,
15406 IX86_BUILTIN_CVTSD2SI64,
15407 IX86_BUILTIN_CVTSD2SS,
15408 IX86_BUILTIN_CVTSS2SD,
15409 IX86_BUILTIN_CVTTSD2SI,
15410 IX86_BUILTIN_CVTTSD2SI64,
15411
15412 IX86_BUILTIN_CVTPS2DQ,
15413 IX86_BUILTIN_CVTPS2PD,
15414 IX86_BUILTIN_CVTTPS2DQ,
15415
15416 IX86_BUILTIN_MOVNTI,
15417 IX86_BUILTIN_MOVNTPD,
15418 IX86_BUILTIN_MOVNTDQ,
15419
15420 /* SSE2 MMX */
15421 IX86_BUILTIN_MASKMOVDQU,
15422 IX86_BUILTIN_MOVMSKPD,
15423 IX86_BUILTIN_PMOVMSKB128,
15424
15425 IX86_BUILTIN_PACKSSWB128,
15426 IX86_BUILTIN_PACKSSDW128,
15427 IX86_BUILTIN_PACKUSWB128,
15428
15429 IX86_BUILTIN_PADDB128,
15430 IX86_BUILTIN_PADDW128,
15431 IX86_BUILTIN_PADDD128,
15432 IX86_BUILTIN_PADDQ128,
15433 IX86_BUILTIN_PADDSB128,
15434 IX86_BUILTIN_PADDSW128,
15435 IX86_BUILTIN_PADDUSB128,
15436 IX86_BUILTIN_PADDUSW128,
15437 IX86_BUILTIN_PSUBB128,
15438 IX86_BUILTIN_PSUBW128,
15439 IX86_BUILTIN_PSUBD128,
15440 IX86_BUILTIN_PSUBQ128,
15441 IX86_BUILTIN_PSUBSB128,
15442 IX86_BUILTIN_PSUBSW128,
15443 IX86_BUILTIN_PSUBUSB128,
15444 IX86_BUILTIN_PSUBUSW128,
15445
15446 IX86_BUILTIN_PAND128,
15447 IX86_BUILTIN_PANDN128,
15448 IX86_BUILTIN_POR128,
15449 IX86_BUILTIN_PXOR128,
15450
15451 IX86_BUILTIN_PAVGB128,
15452 IX86_BUILTIN_PAVGW128,
15453
15454 IX86_BUILTIN_PCMPEQB128,
15455 IX86_BUILTIN_PCMPEQW128,
15456 IX86_BUILTIN_PCMPEQD128,
15457 IX86_BUILTIN_PCMPGTB128,
15458 IX86_BUILTIN_PCMPGTW128,
15459 IX86_BUILTIN_PCMPGTD128,
15460
15461 IX86_BUILTIN_PMADDWD128,
15462
15463 IX86_BUILTIN_PMAXSW128,
15464 IX86_BUILTIN_PMAXUB128,
15465 IX86_BUILTIN_PMINSW128,
15466 IX86_BUILTIN_PMINUB128,
15467
15468 IX86_BUILTIN_PMULUDQ,
15469 IX86_BUILTIN_PMULUDQ128,
15470 IX86_BUILTIN_PMULHUW128,
15471 IX86_BUILTIN_PMULHW128,
15472 IX86_BUILTIN_PMULLW128,
15473
15474 IX86_BUILTIN_PSADBW128,
15475 IX86_BUILTIN_PSHUFHW,
15476 IX86_BUILTIN_PSHUFLW,
15477 IX86_BUILTIN_PSHUFD,
15478
15479 IX86_BUILTIN_PSLLW128,
15480 IX86_BUILTIN_PSLLD128,
15481 IX86_BUILTIN_PSLLQ128,
15482 IX86_BUILTIN_PSRAW128,
15483 IX86_BUILTIN_PSRAD128,
15484 IX86_BUILTIN_PSRLW128,
15485 IX86_BUILTIN_PSRLD128,
15486 IX86_BUILTIN_PSRLQ128,
15487 IX86_BUILTIN_PSLLDQI128,
15488 IX86_BUILTIN_PSLLWI128,
15489 IX86_BUILTIN_PSLLDI128,
15490 IX86_BUILTIN_PSLLQI128,
15491 IX86_BUILTIN_PSRAWI128,
15492 IX86_BUILTIN_PSRADI128,
15493 IX86_BUILTIN_PSRLDQI128,
15494 IX86_BUILTIN_PSRLWI128,
15495 IX86_BUILTIN_PSRLDI128,
15496 IX86_BUILTIN_PSRLQI128,
15497
15498 IX86_BUILTIN_PUNPCKHBW128,
15499 IX86_BUILTIN_PUNPCKHWD128,
15500 IX86_BUILTIN_PUNPCKHDQ128,
15501 IX86_BUILTIN_PUNPCKHQDQ128,
15502 IX86_BUILTIN_PUNPCKLBW128,
15503 IX86_BUILTIN_PUNPCKLWD128,
15504 IX86_BUILTIN_PUNPCKLDQ128,
15505 IX86_BUILTIN_PUNPCKLQDQ128,
15506
15507 IX86_BUILTIN_CLFLUSH,
15508 IX86_BUILTIN_MFENCE,
15509 IX86_BUILTIN_LFENCE,
15510
15511 /* Prescott New Instructions. */
15512 IX86_BUILTIN_ADDSUBPS,
15513 IX86_BUILTIN_HADDPS,
15514 IX86_BUILTIN_HSUBPS,
15515 IX86_BUILTIN_MOVSHDUP,
15516 IX86_BUILTIN_MOVSLDUP,
15517 IX86_BUILTIN_ADDSUBPD,
15518 IX86_BUILTIN_HADDPD,
15519 IX86_BUILTIN_HSUBPD,
15520 IX86_BUILTIN_LDDQU,
15521
15522 IX86_BUILTIN_MONITOR,
15523 IX86_BUILTIN_MWAIT,
15524
15525 /* SSSE3. */
15526 IX86_BUILTIN_PHADDW,
15527 IX86_BUILTIN_PHADDD,
15528 IX86_BUILTIN_PHADDSW,
15529 IX86_BUILTIN_PHSUBW,
15530 IX86_BUILTIN_PHSUBD,
15531 IX86_BUILTIN_PHSUBSW,
15532 IX86_BUILTIN_PMADDUBSW,
15533 IX86_BUILTIN_PMULHRSW,
15534 IX86_BUILTIN_PSHUFB,
15535 IX86_BUILTIN_PSIGNB,
15536 IX86_BUILTIN_PSIGNW,
15537 IX86_BUILTIN_PSIGND,
15538 IX86_BUILTIN_PALIGNR,
15539 IX86_BUILTIN_PABSB,
15540 IX86_BUILTIN_PABSW,
15541 IX86_BUILTIN_PABSD,
15542
15543 IX86_BUILTIN_PHADDW128,
15544 IX86_BUILTIN_PHADDD128,
15545 IX86_BUILTIN_PHADDSW128,
15546 IX86_BUILTIN_PHSUBW128,
15547 IX86_BUILTIN_PHSUBD128,
15548 IX86_BUILTIN_PHSUBSW128,
15549 IX86_BUILTIN_PMADDUBSW128,
15550 IX86_BUILTIN_PMULHRSW128,
15551 IX86_BUILTIN_PSHUFB128,
15552 IX86_BUILTIN_PSIGNB128,
15553 IX86_BUILTIN_PSIGNW128,
15554 IX86_BUILTIN_PSIGND128,
15555 IX86_BUILTIN_PALIGNR128,
15556 IX86_BUILTIN_PABSB128,
15557 IX86_BUILTIN_PABSW128,
15558 IX86_BUILTIN_PABSD128,
15559
15560 IX86_BUILTIN_VEC_INIT_V2SI,
15561 IX86_BUILTIN_VEC_INIT_V4HI,
15562 IX86_BUILTIN_VEC_INIT_V8QI,
15563 IX86_BUILTIN_VEC_EXT_V2DF,
15564 IX86_BUILTIN_VEC_EXT_V2DI,
15565 IX86_BUILTIN_VEC_EXT_V4SF,
15566 IX86_BUILTIN_VEC_EXT_V4SI,
15567 IX86_BUILTIN_VEC_EXT_V8HI,
15568 IX86_BUILTIN_VEC_EXT_V2SI,
15569 IX86_BUILTIN_VEC_EXT_V4HI,
15570 IX86_BUILTIN_VEC_SET_V8HI,
15571 IX86_BUILTIN_VEC_SET_V4HI,
15572
15573 IX86_BUILTIN_MAX
15574 };
15575
15576 /* Table for the ix86 builtin decls. */
15577 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
15578
15579 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
15580 * if the target_flags include one of MASK. Stores the function decl
15581 * in the ix86_builtins array.
15582 * Returns the function decl or NULL_TREE, if the builtin was not added. */
15583
15584 static inline tree
15585 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
15586 {
15587 tree decl = NULL_TREE;
15588
15589 if (mask & target_flags
15590 && (!(mask & MASK_64BIT) || TARGET_64BIT))
15591 {
15592 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
15593 NULL, NULL_TREE);
15594 ix86_builtins[(int) code] = decl;
15595 }
15596
15597 return decl;
15598 }
15599
15600 /* Like def_builtin, but also marks the function decl "const". */
15601
15602 static inline tree
15603 def_builtin_const (int mask, const char *name, tree type,
15604 enum ix86_builtins code)
15605 {
15606 tree decl = def_builtin (mask, name, type, code);
15607 if (decl)
15608 TREE_READONLY (decl) = 1;
15609 return decl;
15610 }
15611
15612 /* Bits for builtin_description.flag. */
15613
15614 /* Set when we don't support the comparison natively, and should
15615 swap_comparison in order to support it. */
15616 #define BUILTIN_DESC_SWAP_OPERANDS 1
15617
15618 struct builtin_description
15619 {
15620 const unsigned int mask;
15621 const enum insn_code icode;
15622 const char *const name;
15623 const enum ix86_builtins code;
15624 const enum rtx_code comparison;
15625 const unsigned int flag;
15626 };
15627
15628 static const struct builtin_description bdesc_comi[] =
15629 {
15630 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
15631 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
15632 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
15633 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
15634 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
15635 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
15636 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
15637 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
15638 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
15639 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
15640 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
15641 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
15642 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
15643 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
15644 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
15645 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
15646 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
15647 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
15648 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
15649 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
15650 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
15651 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
15652 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
15653 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
15654 };
15655
15656 static const struct builtin_description bdesc_2arg[] =
15657 {
15658 /* SSE */
15659 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
15660 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
15661 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
15662 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
15663 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
15664 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
15665 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
15666 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
15667
15668 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
15669 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
15670 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
15671 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
15672 BUILTIN_DESC_SWAP_OPERANDS },
15673 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
15674 BUILTIN_DESC_SWAP_OPERANDS },
15675 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
15676 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
15677 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
15678 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
15679 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
15680 BUILTIN_DESC_SWAP_OPERANDS },
15681 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
15682 BUILTIN_DESC_SWAP_OPERANDS },
15683 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
15684 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
15685 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
15686 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
15687 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
15688 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
15689 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
15690 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
15691 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
15692 BUILTIN_DESC_SWAP_OPERANDS },
15693 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
15694 BUILTIN_DESC_SWAP_OPERANDS },
15695 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
15696
15697 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
15698 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
15699 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
15700 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
15701
15702 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
15703 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
15704 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
15705 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
15706
15707 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
15708 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
15709 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
15710 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
15711 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
15712
15713 /* MMX */
15714 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
15715 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
15716 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
15717 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
15718 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
15719 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
15720 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
15721 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
15722
15723 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
15724 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
15725 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
15726 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
15727 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
15728 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
15729 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
15730 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
15731
15732 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
15733 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
15734 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
15735
15736 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
15737 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
15738 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
15739 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
15740
15741 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
15742 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
15743
15744 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
15745 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
15746 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
15747 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
15748 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
15749 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
15750
15751 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
15752 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
15753 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
15754 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
15755
15756 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
15757 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
15758 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
15759 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
15760 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
15761 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
15762
15763 /* Special. */
15764 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
15765 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
15766 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
15767
15768 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
15769 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
15770 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
15771
15772 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
15773 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
15774 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
15775 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
15776 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
15777 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
15778
15779 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
15780 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
15781 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
15782 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
15783 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
15784 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
15785
15786 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
15787 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
15788 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
15789 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
15790
15791 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
15792 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
15793
15794 /* SSE2 */
15795 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
15796 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
15797 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
15798 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
15799 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
15800 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
15801 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
15802 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
15803
15804 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
15805 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
15806 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
15807 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
15808 BUILTIN_DESC_SWAP_OPERANDS },
15809 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
15810 BUILTIN_DESC_SWAP_OPERANDS },
15811 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
15812 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
15813 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
15814 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
15815 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
15816 BUILTIN_DESC_SWAP_OPERANDS },
15817 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
15818 BUILTIN_DESC_SWAP_OPERANDS },
15819 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
15820 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
15821 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
15822 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
15823 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
15824 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
15825 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
15826 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
15827 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
15828
15829 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
15830 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
15831 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
15832 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
15833
15834 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
15835 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
15836 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
15837 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
15838
15839 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
15840 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
15841 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
15842
15843 /* SSE2 MMX */
15844 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
15845 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
15846 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
15847 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
15848 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
15849 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
15850 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
15851 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
15852
15853 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
15854 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
15855 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
15856 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
15857 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
15858 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
15859 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
15860 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
15861
15862 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
15863 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
15864
15865 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
15866 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
15867 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
15868 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
15869
15870 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
15871 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
15872
15873 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
15874 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
15875 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
15876 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
15877 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
15878 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
15879
15880 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
15881 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
15882 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
15883 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
15884
15885 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
15886 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
15887 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
15888 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
15889 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
15890 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
15891 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
15892 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
15893
15894 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
15895 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
15896 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
15897
15898 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
15899 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
15900
15901 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
15902 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
15903
15904 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
15905 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
15906 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
15907
15908 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
15909 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
15910 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
15911
15912 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
15913 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
15914
15915 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
15916
15917 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
15918 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
15919 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
15920 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
15921
15922 /* SSE3 MMX */
15923 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
15924 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
15925 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
15926 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
15927 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
15928 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
15929
15930 /* SSSE3 */
15931 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
15932 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
15933 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
15934 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
15935 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
15936 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
15937 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
15938 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
15939 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
15940 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
15941 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
15942 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
15943 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
15944 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
15945 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
15946 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
15947 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
15948 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
15949 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
15950 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
15951 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
15952 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
15953 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
15954 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
15955 };
15956
15957 static const struct builtin_description bdesc_1arg[] =
15958 {
15959 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
15960 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
15961
15962 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
15963 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
15964 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
15965
15966 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
15967 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
15968 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
15969 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
15970 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
15971 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
15972
15973 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
15974 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
15975
15976 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
15977
15978 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
15979 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
15980
15981 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
15982 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
15983 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
15984 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
15985 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
15986
15987 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
15988
15989 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
15990 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
15991 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
15992 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
15993
15994 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
15995 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
15996 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
15997
15998 /* SSE3 */
15999 { MASK_SSE3, CODE_FOR_sse3_movshdup, 0, IX86_BUILTIN_MOVSHDUP, 0, 0 },
16000 { MASK_SSE3, CODE_FOR_sse3_movsldup, 0, IX86_BUILTIN_MOVSLDUP, 0, 0 },
16001
16002 /* SSSE3 */
16003 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16004 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16005 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16006 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16007 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16008 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16009 };
16010
16011 static void
16012 ix86_init_builtins (void)
16013 {
16014 if (TARGET_MMX)
16015 ix86_init_mmx_sse_builtins ();
16016 }
16017
16018 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16019 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16020 builtins. */
16021 static void
16022 ix86_init_mmx_sse_builtins (void)
16023 {
16024 const struct builtin_description * d;
16025 size_t i;
16026
16027 tree V16QI_type_node = build_vector_type_for_mode (intQI_type_node, V16QImode);
16028 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16029 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16030 tree V2DI_type_node
16031 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16032 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16033 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16034 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16035 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16036 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
16037 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16038
16039 tree pchar_type_node = build_pointer_type (char_type_node);
16040 tree pcchar_type_node = build_pointer_type (
16041 build_type_variant (char_type_node, 1, 0));
16042 tree pfloat_type_node = build_pointer_type (float_type_node);
16043 tree pcfloat_type_node = build_pointer_type (
16044 build_type_variant (float_type_node, 1, 0));
16045 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16046 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16047 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16048
16049 /* Comparisons. */
16050 tree int_ftype_v4sf_v4sf
16051 = build_function_type_list (integer_type_node,
16052 V4SF_type_node, V4SF_type_node, NULL_TREE);
16053 tree v4si_ftype_v4sf_v4sf
16054 = build_function_type_list (V4SI_type_node,
16055 V4SF_type_node, V4SF_type_node, NULL_TREE);
16056 /* MMX/SSE/integer conversions. */
16057 tree int_ftype_v4sf
16058 = build_function_type_list (integer_type_node,
16059 V4SF_type_node, NULL_TREE);
16060 tree int64_ftype_v4sf
16061 = build_function_type_list (long_long_integer_type_node,
16062 V4SF_type_node, NULL_TREE);
16063 tree int_ftype_v8qi
16064 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16065 tree v4sf_ftype_v4sf_int
16066 = build_function_type_list (V4SF_type_node,
16067 V4SF_type_node, integer_type_node, NULL_TREE);
16068 tree v4sf_ftype_v4sf_int64
16069 = build_function_type_list (V4SF_type_node,
16070 V4SF_type_node, long_long_integer_type_node,
16071 NULL_TREE);
16072 tree v4sf_ftype_v4sf_v2si
16073 = build_function_type_list (V4SF_type_node,
16074 V4SF_type_node, V2SI_type_node, NULL_TREE);
16075
16076 /* Miscellaneous. */
16077 tree v8qi_ftype_v4hi_v4hi
16078 = build_function_type_list (V8QI_type_node,
16079 V4HI_type_node, V4HI_type_node, NULL_TREE);
16080 tree v4hi_ftype_v2si_v2si
16081 = build_function_type_list (V4HI_type_node,
16082 V2SI_type_node, V2SI_type_node, NULL_TREE);
16083 tree v4sf_ftype_v4sf_v4sf_int
16084 = build_function_type_list (V4SF_type_node,
16085 V4SF_type_node, V4SF_type_node,
16086 integer_type_node, NULL_TREE);
16087 tree v2si_ftype_v4hi_v4hi
16088 = build_function_type_list (V2SI_type_node,
16089 V4HI_type_node, V4HI_type_node, NULL_TREE);
16090 tree v4hi_ftype_v4hi_int
16091 = build_function_type_list (V4HI_type_node,
16092 V4HI_type_node, integer_type_node, NULL_TREE);
16093 tree v4hi_ftype_v4hi_di
16094 = build_function_type_list (V4HI_type_node,
16095 V4HI_type_node, long_long_unsigned_type_node,
16096 NULL_TREE);
16097 tree v2si_ftype_v2si_di
16098 = build_function_type_list (V2SI_type_node,
16099 V2SI_type_node, long_long_unsigned_type_node,
16100 NULL_TREE);
16101 tree void_ftype_void
16102 = build_function_type (void_type_node, void_list_node);
16103 tree void_ftype_unsigned
16104 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16105 tree void_ftype_unsigned_unsigned
16106 = build_function_type_list (void_type_node, unsigned_type_node,
16107 unsigned_type_node, NULL_TREE);
16108 tree void_ftype_pcvoid_unsigned_unsigned
16109 = build_function_type_list (void_type_node, const_ptr_type_node,
16110 unsigned_type_node, unsigned_type_node,
16111 NULL_TREE);
16112 tree unsigned_ftype_void
16113 = build_function_type (unsigned_type_node, void_list_node);
16114 tree v2si_ftype_v4sf
16115 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16116 /* Loads/stores. */
16117 tree void_ftype_v8qi_v8qi_pchar
16118 = build_function_type_list (void_type_node,
16119 V8QI_type_node, V8QI_type_node,
16120 pchar_type_node, NULL_TREE);
16121 tree v4sf_ftype_pcfloat
16122 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16123 /* @@@ the type is bogus */
16124 tree v4sf_ftype_v4sf_pv2si
16125 = build_function_type_list (V4SF_type_node,
16126 V4SF_type_node, pv2si_type_node, NULL_TREE);
16127 tree void_ftype_pv2si_v4sf
16128 = build_function_type_list (void_type_node,
16129 pv2si_type_node, V4SF_type_node, NULL_TREE);
16130 tree void_ftype_pfloat_v4sf
16131 = build_function_type_list (void_type_node,
16132 pfloat_type_node, V4SF_type_node, NULL_TREE);
16133 tree void_ftype_pdi_di
16134 = build_function_type_list (void_type_node,
16135 pdi_type_node, long_long_unsigned_type_node,
16136 NULL_TREE);
16137 tree void_ftype_pv2di_v2di
16138 = build_function_type_list (void_type_node,
16139 pv2di_type_node, V2DI_type_node, NULL_TREE);
16140 /* Normal vector unops. */
16141 tree v4sf_ftype_v4sf
16142 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16143 tree v16qi_ftype_v16qi
16144 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16145 tree v8hi_ftype_v8hi
16146 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16147 tree v4si_ftype_v4si
16148 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16149 tree v8qi_ftype_v8qi
16150 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16151 tree v4hi_ftype_v4hi
16152 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16153
16154 /* Normal vector binops. */
16155 tree v4sf_ftype_v4sf_v4sf
16156 = build_function_type_list (V4SF_type_node,
16157 V4SF_type_node, V4SF_type_node, NULL_TREE);
16158 tree v8qi_ftype_v8qi_v8qi
16159 = build_function_type_list (V8QI_type_node,
16160 V8QI_type_node, V8QI_type_node, NULL_TREE);
16161 tree v4hi_ftype_v4hi_v4hi
16162 = build_function_type_list (V4HI_type_node,
16163 V4HI_type_node, V4HI_type_node, NULL_TREE);
16164 tree v2si_ftype_v2si_v2si
16165 = build_function_type_list (V2SI_type_node,
16166 V2SI_type_node, V2SI_type_node, NULL_TREE);
16167 tree di_ftype_di_di
16168 = build_function_type_list (long_long_unsigned_type_node,
16169 long_long_unsigned_type_node,
16170 long_long_unsigned_type_node, NULL_TREE);
16171
16172 tree di_ftype_di_di_int
16173 = build_function_type_list (long_long_unsigned_type_node,
16174 long_long_unsigned_type_node,
16175 long_long_unsigned_type_node,
16176 integer_type_node, NULL_TREE);
16177
16178 tree v2si_ftype_v2sf
16179 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16180 tree v2sf_ftype_v2si
16181 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16182 tree v2si_ftype_v2si
16183 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16184 tree v2sf_ftype_v2sf
16185 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16186 tree v2sf_ftype_v2sf_v2sf
16187 = build_function_type_list (V2SF_type_node,
16188 V2SF_type_node, V2SF_type_node, NULL_TREE);
16189 tree v2si_ftype_v2sf_v2sf
16190 = build_function_type_list (V2SI_type_node,
16191 V2SF_type_node, V2SF_type_node, NULL_TREE);
16192 tree pint_type_node = build_pointer_type (integer_type_node);
16193 tree pdouble_type_node = build_pointer_type (double_type_node);
16194 tree pcdouble_type_node = build_pointer_type (
16195 build_type_variant (double_type_node, 1, 0));
16196 tree int_ftype_v2df_v2df
16197 = build_function_type_list (integer_type_node,
16198 V2DF_type_node, V2DF_type_node, NULL_TREE);
16199
16200 tree void_ftype_pcvoid
16201 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16202 tree v4sf_ftype_v4si
16203 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16204 tree v4si_ftype_v4sf
16205 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16206 tree v2df_ftype_v4si
16207 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16208 tree v4si_ftype_v2df
16209 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16210 tree v2si_ftype_v2df
16211 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16212 tree v4sf_ftype_v2df
16213 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16214 tree v2df_ftype_v2si
16215 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16216 tree v2df_ftype_v4sf
16217 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16218 tree int_ftype_v2df
16219 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16220 tree int64_ftype_v2df
16221 = build_function_type_list (long_long_integer_type_node,
16222 V2DF_type_node, NULL_TREE);
16223 tree v2df_ftype_v2df_int
16224 = build_function_type_list (V2DF_type_node,
16225 V2DF_type_node, integer_type_node, NULL_TREE);
16226 tree v2df_ftype_v2df_int64
16227 = build_function_type_list (V2DF_type_node,
16228 V2DF_type_node, long_long_integer_type_node,
16229 NULL_TREE);
16230 tree v4sf_ftype_v4sf_v2df
16231 = build_function_type_list (V4SF_type_node,
16232 V4SF_type_node, V2DF_type_node, NULL_TREE);
16233 tree v2df_ftype_v2df_v4sf
16234 = build_function_type_list (V2DF_type_node,
16235 V2DF_type_node, V4SF_type_node, NULL_TREE);
16236 tree v2df_ftype_v2df_v2df_int
16237 = build_function_type_list (V2DF_type_node,
16238 V2DF_type_node, V2DF_type_node,
16239 integer_type_node,
16240 NULL_TREE);
16241 tree v2df_ftype_v2df_pcdouble
16242 = build_function_type_list (V2DF_type_node,
16243 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16244 tree void_ftype_pdouble_v2df
16245 = build_function_type_list (void_type_node,
16246 pdouble_type_node, V2DF_type_node, NULL_TREE);
16247 tree void_ftype_pint_int
16248 = build_function_type_list (void_type_node,
16249 pint_type_node, integer_type_node, NULL_TREE);
16250 tree void_ftype_v16qi_v16qi_pchar
16251 = build_function_type_list (void_type_node,
16252 V16QI_type_node, V16QI_type_node,
16253 pchar_type_node, NULL_TREE);
16254 tree v2df_ftype_pcdouble
16255 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16256 tree v2df_ftype_v2df_v2df
16257 = build_function_type_list (V2DF_type_node,
16258 V2DF_type_node, V2DF_type_node, NULL_TREE);
16259 tree v16qi_ftype_v16qi_v16qi
16260 = build_function_type_list (V16QI_type_node,
16261 V16QI_type_node, V16QI_type_node, NULL_TREE);
16262 tree v8hi_ftype_v8hi_v8hi
16263 = build_function_type_list (V8HI_type_node,
16264 V8HI_type_node, V8HI_type_node, NULL_TREE);
16265 tree v4si_ftype_v4si_v4si
16266 = build_function_type_list (V4SI_type_node,
16267 V4SI_type_node, V4SI_type_node, NULL_TREE);
16268 tree v2di_ftype_v2di_v2di
16269 = build_function_type_list (V2DI_type_node,
16270 V2DI_type_node, V2DI_type_node, NULL_TREE);
16271 tree v2di_ftype_v2df_v2df
16272 = build_function_type_list (V2DI_type_node,
16273 V2DF_type_node, V2DF_type_node, NULL_TREE);
16274 tree v2df_ftype_v2df
16275 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16276 tree v2di_ftype_v2di_int
16277 = build_function_type_list (V2DI_type_node,
16278 V2DI_type_node, integer_type_node, NULL_TREE);
16279 tree v2di_ftype_v2di_v2di_int
16280 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16281 V2DI_type_node, integer_type_node, NULL_TREE);
16282 tree v4si_ftype_v4si_int
16283 = build_function_type_list (V4SI_type_node,
16284 V4SI_type_node, integer_type_node, NULL_TREE);
16285 tree v8hi_ftype_v8hi_int
16286 = build_function_type_list (V8HI_type_node,
16287 V8HI_type_node, integer_type_node, NULL_TREE);
16288 tree v8hi_ftype_v8hi_v2di
16289 = build_function_type_list (V8HI_type_node,
16290 V8HI_type_node, V2DI_type_node, NULL_TREE);
16291 tree v4si_ftype_v4si_v2di
16292 = build_function_type_list (V4SI_type_node,
16293 V4SI_type_node, V2DI_type_node, NULL_TREE);
16294 tree v4si_ftype_v8hi_v8hi
16295 = build_function_type_list (V4SI_type_node,
16296 V8HI_type_node, V8HI_type_node, NULL_TREE);
16297 tree di_ftype_v8qi_v8qi
16298 = build_function_type_list (long_long_unsigned_type_node,
16299 V8QI_type_node, V8QI_type_node, NULL_TREE);
16300 tree di_ftype_v2si_v2si
16301 = build_function_type_list (long_long_unsigned_type_node,
16302 V2SI_type_node, V2SI_type_node, NULL_TREE);
16303 tree v2di_ftype_v16qi_v16qi
16304 = build_function_type_list (V2DI_type_node,
16305 V16QI_type_node, V16QI_type_node, NULL_TREE);
16306 tree v2di_ftype_v4si_v4si
16307 = build_function_type_list (V2DI_type_node,
16308 V4SI_type_node, V4SI_type_node, NULL_TREE);
16309 tree int_ftype_v16qi
16310 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16311 tree v16qi_ftype_pcchar
16312 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16313 tree void_ftype_pchar_v16qi
16314 = build_function_type_list (void_type_node,
16315 pchar_type_node, V16QI_type_node, NULL_TREE);
16316
16317 tree float80_type;
16318 tree float128_type;
16319 tree ftype;
16320
16321 /* The __float80 type. */
16322 if (TYPE_MODE (long_double_type_node) == XFmode)
16323 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16324 "__float80");
16325 else
16326 {
16327 /* The __float80 type. */
16328 float80_type = make_node (REAL_TYPE);
16329 TYPE_PRECISION (float80_type) = 80;
16330 layout_type (float80_type);
16331 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16332 }
16333
16334 if (TARGET_64BIT)
16335 {
16336 float128_type = make_node (REAL_TYPE);
16337 TYPE_PRECISION (float128_type) = 128;
16338 layout_type (float128_type);
16339 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16340 }
16341
16342 /* Add all builtins that are more or less simple operations on two
16343 operands. */
16344 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16345 {
16346 /* Use one of the operands; the target can have a different mode for
16347 mask-generating compares. */
16348 enum machine_mode mode;
16349 tree type;
16350
16351 if (d->name == 0)
16352 continue;
16353 mode = insn_data[d->icode].operand[1].mode;
16354
16355 switch (mode)
16356 {
16357 case V16QImode:
16358 type = v16qi_ftype_v16qi_v16qi;
16359 break;
16360 case V8HImode:
16361 type = v8hi_ftype_v8hi_v8hi;
16362 break;
16363 case V4SImode:
16364 type = v4si_ftype_v4si_v4si;
16365 break;
16366 case V2DImode:
16367 type = v2di_ftype_v2di_v2di;
16368 break;
16369 case V2DFmode:
16370 type = v2df_ftype_v2df_v2df;
16371 break;
16372 case V4SFmode:
16373 type = v4sf_ftype_v4sf_v4sf;
16374 break;
16375 case V8QImode:
16376 type = v8qi_ftype_v8qi_v8qi;
16377 break;
16378 case V4HImode:
16379 type = v4hi_ftype_v4hi_v4hi;
16380 break;
16381 case V2SImode:
16382 type = v2si_ftype_v2si_v2si;
16383 break;
16384 case DImode:
16385 type = di_ftype_di_di;
16386 break;
16387
16388 default:
16389 gcc_unreachable ();
16390 }
16391
16392 /* Override for comparisons. */
16393 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16394 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16395 type = v4si_ftype_v4sf_v4sf;
16396
16397 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16398 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16399 type = v2di_ftype_v2df_v2df;
16400
16401 def_builtin (d->mask, d->name, type, d->code);
16402 }
16403
16404 /* Add all builtins that are more or less simple operations on 1 operand. */
16405 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16406 {
16407 enum machine_mode mode;
16408 tree type;
16409
16410 if (d->name == 0)
16411 continue;
16412 mode = insn_data[d->icode].operand[1].mode;
16413
16414 switch (mode)
16415 {
16416 case V16QImode:
16417 type = v16qi_ftype_v16qi;
16418 break;
16419 case V8HImode:
16420 type = v8hi_ftype_v8hi;
16421 break;
16422 case V4SImode:
16423 type = v4si_ftype_v4si;
16424 break;
16425 case V2DFmode:
16426 type = v2df_ftype_v2df;
16427 break;
16428 case V4SFmode:
16429 type = v4sf_ftype_v4sf;
16430 break;
16431 case V8QImode:
16432 type = v8qi_ftype_v8qi;
16433 break;
16434 case V4HImode:
16435 type = v4hi_ftype_v4hi;
16436 break;
16437 case V2SImode:
16438 type = v2si_ftype_v2si;
16439 break;
16440
16441 default:
16442 abort ();
16443 }
16444
16445 def_builtin (d->mask, d->name, type, d->code);
16446 }
16447
16448 /* Add the remaining MMX insns with somewhat more complicated types. */
16449 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16450 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16451 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16452 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16453
16454 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16455 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16456 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16457
16458 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16459 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16460
16461 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16462 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16463
16464 /* comi/ucomi insns. */
16465 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16466 if (d->mask == MASK_SSE2)
16467 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16468 else
16469 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16470
16471 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16472 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16473 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16474
16475 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16476 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16477 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16478 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16479 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16480 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16481 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16482 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16483 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16484 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16485 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16486
16487 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16488
16489 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16490 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16491
16492 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16493 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
16494 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
16495 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
16496
16497 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
16498 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
16499 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
16500 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
16501
16502 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
16503
16504 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
16505
16506 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
16507 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
16508 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
16509 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
16510 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
16511 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
16512
16513 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
16514
16515 /* Original 3DNow! */
16516 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
16517 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
16518 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
16519 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
16520 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
16521 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
16522 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
16523 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
16524 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
16525 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
16526 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
16527 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
16528 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
16529 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
16530 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
16531 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
16532 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
16533 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
16534 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
16535 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
16536
16537 /* 3DNow! extension as used in the Athlon CPU. */
16538 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
16539 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
16540 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
16541 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
16542 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
16543 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
16544
16545 /* SSE2 */
16546 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
16547
16548 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
16549 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
16550
16551 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
16552 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
16553
16554 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
16555 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
16556 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
16557 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
16558 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
16559
16560 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
16561 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
16562 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
16563 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
16564
16565 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
16566 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
16567
16568 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
16569
16570 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
16571 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
16572
16573 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
16574 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
16575 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
16576 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
16577 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
16578
16579 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
16580
16581 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
16582 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
16583 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
16584 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
16585
16586 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
16587 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
16588 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
16589
16590 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
16591 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
16592 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
16593 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
16594
16595 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
16596 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
16597 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
16598
16599 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
16600 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
16601
16602 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
16603 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
16604
16605 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
16606 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
16607 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
16608
16609 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
16610 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
16611 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
16612
16613 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
16614 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
16615
16616 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
16617 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
16618 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
16619 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
16620
16621 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
16622 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
16623 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
16624 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
16625
16626 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
16627 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
16628
16629 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
16630
16631 /* Prescott New Instructions. */
16632 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
16633 void_ftype_pcvoid_unsigned_unsigned,
16634 IX86_BUILTIN_MONITOR);
16635 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
16636 void_ftype_unsigned_unsigned,
16637 IX86_BUILTIN_MWAIT);
16638 def_builtin (MASK_SSE3, "__builtin_ia32_movshdup",
16639 v4sf_ftype_v4sf,
16640 IX86_BUILTIN_MOVSHDUP);
16641 def_builtin (MASK_SSE3, "__builtin_ia32_movsldup",
16642 v4sf_ftype_v4sf,
16643 IX86_BUILTIN_MOVSLDUP);
16644 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
16645 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
16646
16647 /* SSSE3. */
16648 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
16649 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
16650 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
16651 IX86_BUILTIN_PALIGNR);
16652
16653 /* Access to the vec_init patterns. */
16654 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
16655 integer_type_node, NULL_TREE);
16656 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
16657 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
16658
16659 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
16660 short_integer_type_node,
16661 short_integer_type_node,
16662 short_integer_type_node, NULL_TREE);
16663 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
16664 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
16665
16666 ftype = build_function_type_list (V8QI_type_node, char_type_node,
16667 char_type_node, char_type_node,
16668 char_type_node, char_type_node,
16669 char_type_node, char_type_node,
16670 char_type_node, NULL_TREE);
16671 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
16672 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
16673
16674 /* Access to the vec_extract patterns. */
16675 ftype = build_function_type_list (double_type_node, V2DF_type_node,
16676 integer_type_node, NULL_TREE);
16677 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
16678 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
16679
16680 ftype = build_function_type_list (long_long_integer_type_node,
16681 V2DI_type_node, integer_type_node,
16682 NULL_TREE);
16683 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
16684 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
16685
16686 ftype = build_function_type_list (float_type_node, V4SF_type_node,
16687 integer_type_node, NULL_TREE);
16688 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
16689 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
16690
16691 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
16692 integer_type_node, NULL_TREE);
16693 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
16694 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
16695
16696 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
16697 integer_type_node, NULL_TREE);
16698 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
16699 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
16700
16701 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
16702 integer_type_node, NULL_TREE);
16703 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
16704 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
16705
16706 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
16707 integer_type_node, NULL_TREE);
16708 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
16709 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
16710
16711 /* Access to the vec_set patterns. */
16712 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
16713 intHI_type_node,
16714 integer_type_node, NULL_TREE);
16715 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
16716 ftype, IX86_BUILTIN_VEC_SET_V8HI);
16717
16718 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
16719 intHI_type_node,
16720 integer_type_node, NULL_TREE);
16721 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
16722 ftype, IX86_BUILTIN_VEC_SET_V4HI);
16723 }
16724
16725 /* Errors in the source file can cause expand_expr to return const0_rtx
16726 where we expect a vector. To avoid crashing, use one of the vector
16727 clear instructions. */
16728 static rtx
16729 safe_vector_operand (rtx x, enum machine_mode mode)
16730 {
16731 if (x == const0_rtx)
16732 x = CONST0_RTX (mode);
16733 return x;
16734 }
16735
16736 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
16737
16738 static rtx
16739 ix86_expand_binop_builtin (enum insn_code icode, tree arglist, rtx target)
16740 {
16741 rtx pat, xops[3];
16742 tree arg0 = TREE_VALUE (arglist);
16743 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16744 rtx op0 = expand_normal (arg0);
16745 rtx op1 = expand_normal (arg1);
16746 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16747 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16748 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
16749
16750 if (VECTOR_MODE_P (mode0))
16751 op0 = safe_vector_operand (op0, mode0);
16752 if (VECTOR_MODE_P (mode1))
16753 op1 = safe_vector_operand (op1, mode1);
16754
16755 if (optimize || !target
16756 || GET_MODE (target) != tmode
16757 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16758 target = gen_reg_rtx (tmode);
16759
16760 if (GET_MODE (op1) == SImode && mode1 == TImode)
16761 {
16762 rtx x = gen_reg_rtx (V4SImode);
16763 emit_insn (gen_sse2_loadd (x, op1));
16764 op1 = gen_lowpart (TImode, x);
16765 }
16766
16767 /* The insn must want input operands in the same modes as the
16768 result. */
16769 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
16770 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
16771
16772 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
16773 op0 = copy_to_mode_reg (mode0, op0);
16774 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
16775 op1 = copy_to_mode_reg (mode1, op1);
16776
16777 /* ??? Using ix86_fixup_binary_operands is problematic when
16778 we've got mismatched modes. Fake it. */
16779
16780 xops[0] = target;
16781 xops[1] = op0;
16782 xops[2] = op1;
16783
16784 if (tmode == mode0 && tmode == mode1)
16785 {
16786 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
16787 op0 = xops[1];
16788 op1 = xops[2];
16789 }
16790 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
16791 {
16792 op0 = force_reg (mode0, op0);
16793 op1 = force_reg (mode1, op1);
16794 target = gen_reg_rtx (tmode);
16795 }
16796
16797 pat = GEN_FCN (icode) (target, op0, op1);
16798 if (! pat)
16799 return 0;
16800 emit_insn (pat);
16801 return target;
16802 }
16803
16804 /* Subroutine of ix86_expand_builtin to take care of stores. */
16805
16806 static rtx
16807 ix86_expand_store_builtin (enum insn_code icode, tree arglist)
16808 {
16809 rtx pat;
16810 tree arg0 = TREE_VALUE (arglist);
16811 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16812 rtx op0 = expand_normal (arg0);
16813 rtx op1 = expand_normal (arg1);
16814 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
16815 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
16816
16817 if (VECTOR_MODE_P (mode1))
16818 op1 = safe_vector_operand (op1, mode1);
16819
16820 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16821 op1 = copy_to_mode_reg (mode1, op1);
16822
16823 pat = GEN_FCN (icode) (op0, op1);
16824 if (pat)
16825 emit_insn (pat);
16826 return 0;
16827 }
16828
16829 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
16830
16831 static rtx
16832 ix86_expand_unop_builtin (enum insn_code icode, tree arglist,
16833 rtx target, int do_load)
16834 {
16835 rtx pat;
16836 tree arg0 = TREE_VALUE (arglist);
16837 rtx op0 = expand_normal (arg0);
16838 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16839 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16840
16841 if (optimize || !target
16842 || GET_MODE (target) != tmode
16843 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16844 target = gen_reg_rtx (tmode);
16845 if (do_load)
16846 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
16847 else
16848 {
16849 if (VECTOR_MODE_P (mode0))
16850 op0 = safe_vector_operand (op0, mode0);
16851
16852 if ((optimize && !register_operand (op0, mode0))
16853 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16854 op0 = copy_to_mode_reg (mode0, op0);
16855 }
16856
16857 pat = GEN_FCN (icode) (target, op0);
16858 if (! pat)
16859 return 0;
16860 emit_insn (pat);
16861 return target;
16862 }
16863
16864 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
16865 sqrtss, rsqrtss, rcpss. */
16866
16867 static rtx
16868 ix86_expand_unop1_builtin (enum insn_code icode, tree arglist, rtx target)
16869 {
16870 rtx pat;
16871 tree arg0 = TREE_VALUE (arglist);
16872 rtx op1, op0 = expand_normal (arg0);
16873 enum machine_mode tmode = insn_data[icode].operand[0].mode;
16874 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
16875
16876 if (optimize || !target
16877 || GET_MODE (target) != tmode
16878 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
16879 target = gen_reg_rtx (tmode);
16880
16881 if (VECTOR_MODE_P (mode0))
16882 op0 = safe_vector_operand (op0, mode0);
16883
16884 if ((optimize && !register_operand (op0, mode0))
16885 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
16886 op0 = copy_to_mode_reg (mode0, op0);
16887
16888 op1 = op0;
16889 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
16890 op1 = copy_to_mode_reg (mode0, op1);
16891
16892 pat = GEN_FCN (icode) (target, op0, op1);
16893 if (! pat)
16894 return 0;
16895 emit_insn (pat);
16896 return target;
16897 }
16898
16899 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
16900
16901 static rtx
16902 ix86_expand_sse_compare (const struct builtin_description *d, tree arglist,
16903 rtx target)
16904 {
16905 rtx pat;
16906 tree arg0 = TREE_VALUE (arglist);
16907 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16908 rtx op0 = expand_normal (arg0);
16909 rtx op1 = expand_normal (arg1);
16910 rtx op2;
16911 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
16912 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
16913 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
16914 enum rtx_code comparison = d->comparison;
16915
16916 if (VECTOR_MODE_P (mode0))
16917 op0 = safe_vector_operand (op0, mode0);
16918 if (VECTOR_MODE_P (mode1))
16919 op1 = safe_vector_operand (op1, mode1);
16920
16921 /* Swap operands if we have a comparison that isn't available in
16922 hardware. */
16923 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16924 {
16925 rtx tmp = gen_reg_rtx (mode1);
16926 emit_move_insn (tmp, op1);
16927 op1 = op0;
16928 op0 = tmp;
16929 }
16930
16931 if (optimize || !target
16932 || GET_MODE (target) != tmode
16933 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
16934 target = gen_reg_rtx (tmode);
16935
16936 if ((optimize && !register_operand (op0, mode0))
16937 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
16938 op0 = copy_to_mode_reg (mode0, op0);
16939 if ((optimize && !register_operand (op1, mode1))
16940 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
16941 op1 = copy_to_mode_reg (mode1, op1);
16942
16943 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16944 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
16945 if (! pat)
16946 return 0;
16947 emit_insn (pat);
16948 return target;
16949 }
16950
16951 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
16952
16953 static rtx
16954 ix86_expand_sse_comi (const struct builtin_description *d, tree arglist,
16955 rtx target)
16956 {
16957 rtx pat;
16958 tree arg0 = TREE_VALUE (arglist);
16959 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
16960 rtx op0 = expand_normal (arg0);
16961 rtx op1 = expand_normal (arg1);
16962 rtx op2;
16963 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
16964 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
16965 enum rtx_code comparison = d->comparison;
16966
16967 if (VECTOR_MODE_P (mode0))
16968 op0 = safe_vector_operand (op0, mode0);
16969 if (VECTOR_MODE_P (mode1))
16970 op1 = safe_vector_operand (op1, mode1);
16971
16972 /* Swap operands if we have a comparison that isn't available in
16973 hardware. */
16974 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
16975 {
16976 rtx tmp = op1;
16977 op1 = op0;
16978 op0 = tmp;
16979 }
16980
16981 target = gen_reg_rtx (SImode);
16982 emit_move_insn (target, const0_rtx);
16983 target = gen_rtx_SUBREG (QImode, target, 0);
16984
16985 if ((optimize && !register_operand (op0, mode0))
16986 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
16987 op0 = copy_to_mode_reg (mode0, op0);
16988 if ((optimize && !register_operand (op1, mode1))
16989 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
16990 op1 = copy_to_mode_reg (mode1, op1);
16991
16992 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
16993 pat = GEN_FCN (d->icode) (op0, op1);
16994 if (! pat)
16995 return 0;
16996 emit_insn (pat);
16997 emit_insn (gen_rtx_SET (VOIDmode,
16998 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
16999 gen_rtx_fmt_ee (comparison, QImode,
17000 SET_DEST (pat),
17001 const0_rtx)));
17002
17003 return SUBREG_REG (target);
17004 }
17005
17006 /* Return the integer constant in ARG. Constrain it to be in the range
17007 of the subparts of VEC_TYPE; issue an error if not. */
17008
17009 static int
17010 get_element_number (tree vec_type, tree arg)
17011 {
17012 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17013
17014 if (!host_integerp (arg, 1)
17015 || (elt = tree_low_cst (arg, 1), elt > max))
17016 {
17017 error ("selector must be an integer constant in the range 0..%wi", max);
17018 return 0;
17019 }
17020
17021 return elt;
17022 }
17023
17024 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17025 ix86_expand_vector_init. We DO have language-level syntax for this, in
17026 the form of (type){ init-list }. Except that since we can't place emms
17027 instructions from inside the compiler, we can't allow the use of MMX
17028 registers unless the user explicitly asks for it. So we do *not* define
17029 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17030 we have builtins invoked by mmintrin.h that gives us license to emit
17031 these sorts of instructions. */
17032
17033 static rtx
17034 ix86_expand_vec_init_builtin (tree type, tree arglist, rtx target)
17035 {
17036 enum machine_mode tmode = TYPE_MODE (type);
17037 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17038 int i, n_elt = GET_MODE_NUNITS (tmode);
17039 rtvec v = rtvec_alloc (n_elt);
17040
17041 gcc_assert (VECTOR_MODE_P (tmode));
17042
17043 for (i = 0; i < n_elt; ++i, arglist = TREE_CHAIN (arglist))
17044 {
17045 rtx x = expand_normal (TREE_VALUE (arglist));
17046 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17047 }
17048
17049 gcc_assert (arglist == NULL);
17050
17051 if (!target || !register_operand (target, tmode))
17052 target = gen_reg_rtx (tmode);
17053
17054 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17055 return target;
17056 }
17057
17058 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17059 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17060 had a language-level syntax for referencing vector elements. */
17061
17062 static rtx
17063 ix86_expand_vec_ext_builtin (tree arglist, rtx target)
17064 {
17065 enum machine_mode tmode, mode0;
17066 tree arg0, arg1;
17067 int elt;
17068 rtx op0;
17069
17070 arg0 = TREE_VALUE (arglist);
17071 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17072
17073 op0 = expand_normal (arg0);
17074 elt = get_element_number (TREE_TYPE (arg0), arg1);
17075
17076 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17077 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17078 gcc_assert (VECTOR_MODE_P (mode0));
17079
17080 op0 = force_reg (mode0, op0);
17081
17082 if (optimize || !target || !register_operand (target, tmode))
17083 target = gen_reg_rtx (tmode);
17084
17085 ix86_expand_vector_extract (true, target, op0, elt);
17086
17087 return target;
17088 }
17089
17090 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17091 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17092 a language-level syntax for referencing vector elements. */
17093
17094 static rtx
17095 ix86_expand_vec_set_builtin (tree arglist)
17096 {
17097 enum machine_mode tmode, mode1;
17098 tree arg0, arg1, arg2;
17099 int elt;
17100 rtx op0, op1;
17101
17102 arg0 = TREE_VALUE (arglist);
17103 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17104 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17105
17106 tmode = TYPE_MODE (TREE_TYPE (arg0));
17107 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17108 gcc_assert (VECTOR_MODE_P (tmode));
17109
17110 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17111 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17112 elt = get_element_number (TREE_TYPE (arg0), arg2);
17113
17114 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17115 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17116
17117 op0 = force_reg (tmode, op0);
17118 op1 = force_reg (mode1, op1);
17119
17120 ix86_expand_vector_set (true, op0, op1, elt);
17121
17122 return op0;
17123 }
17124
17125 /* Expand an expression EXP that calls a built-in function,
17126 with result going to TARGET if that's convenient
17127 (and in mode MODE if that's convenient).
17128 SUBTARGET may be used as the target for computing one of EXP's operands.
17129 IGNORE is nonzero if the value is to be ignored. */
17130
17131 static rtx
17132 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17133 enum machine_mode mode ATTRIBUTE_UNUSED,
17134 int ignore ATTRIBUTE_UNUSED)
17135 {
17136 const struct builtin_description *d;
17137 size_t i;
17138 enum insn_code icode;
17139 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
17140 tree arglist = TREE_OPERAND (exp, 1);
17141 tree arg0, arg1, arg2;
17142 rtx op0, op1, op2, pat;
17143 enum machine_mode tmode, mode0, mode1, mode2, mode3;
17144 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17145
17146 switch (fcode)
17147 {
17148 case IX86_BUILTIN_EMMS:
17149 emit_insn (gen_mmx_emms ());
17150 return 0;
17151
17152 case IX86_BUILTIN_SFENCE:
17153 emit_insn (gen_sse_sfence ());
17154 return 0;
17155
17156 case IX86_BUILTIN_MASKMOVQ:
17157 case IX86_BUILTIN_MASKMOVDQU:
17158 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17159 ? CODE_FOR_mmx_maskmovq
17160 : CODE_FOR_sse2_maskmovdqu);
17161 /* Note the arg order is different from the operand order. */
17162 arg1 = TREE_VALUE (arglist);
17163 arg2 = TREE_VALUE (TREE_CHAIN (arglist));
17164 arg0 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17165 op0 = expand_normal (arg0);
17166 op1 = expand_normal (arg1);
17167 op2 = expand_normal (arg2);
17168 mode0 = insn_data[icode].operand[0].mode;
17169 mode1 = insn_data[icode].operand[1].mode;
17170 mode2 = insn_data[icode].operand[2].mode;
17171
17172 op0 = force_reg (Pmode, op0);
17173 op0 = gen_rtx_MEM (mode1, op0);
17174
17175 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17176 op0 = copy_to_mode_reg (mode0, op0);
17177 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17178 op1 = copy_to_mode_reg (mode1, op1);
17179 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17180 op2 = copy_to_mode_reg (mode2, op2);
17181 pat = GEN_FCN (icode) (op0, op1, op2);
17182 if (! pat)
17183 return 0;
17184 emit_insn (pat);
17185 return 0;
17186
17187 case IX86_BUILTIN_SQRTSS:
17188 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, arglist, target);
17189 case IX86_BUILTIN_RSQRTSS:
17190 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, arglist, target);
17191 case IX86_BUILTIN_RCPSS:
17192 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, arglist, target);
17193
17194 case IX86_BUILTIN_LOADUPS:
17195 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, arglist, target, 1);
17196
17197 case IX86_BUILTIN_STOREUPS:
17198 return ix86_expand_store_builtin (CODE_FOR_sse_movups, arglist);
17199
17200 case IX86_BUILTIN_LOADHPS:
17201 case IX86_BUILTIN_LOADLPS:
17202 case IX86_BUILTIN_LOADHPD:
17203 case IX86_BUILTIN_LOADLPD:
17204 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17205 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17206 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17207 : CODE_FOR_sse2_loadlpd);
17208 arg0 = TREE_VALUE (arglist);
17209 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17210 op0 = expand_normal (arg0);
17211 op1 = expand_normal (arg1);
17212 tmode = insn_data[icode].operand[0].mode;
17213 mode0 = insn_data[icode].operand[1].mode;
17214 mode1 = insn_data[icode].operand[2].mode;
17215
17216 op0 = force_reg (mode0, op0);
17217 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17218 if (optimize || target == 0
17219 || GET_MODE (target) != tmode
17220 || !register_operand (target, tmode))
17221 target = gen_reg_rtx (tmode);
17222 pat = GEN_FCN (icode) (target, op0, op1);
17223 if (! pat)
17224 return 0;
17225 emit_insn (pat);
17226 return target;
17227
17228 case IX86_BUILTIN_STOREHPS:
17229 case IX86_BUILTIN_STORELPS:
17230 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17231 : CODE_FOR_sse_storelps);
17232 arg0 = TREE_VALUE (arglist);
17233 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17234 op0 = expand_normal (arg0);
17235 op1 = expand_normal (arg1);
17236 mode0 = insn_data[icode].operand[0].mode;
17237 mode1 = insn_data[icode].operand[1].mode;
17238
17239 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17240 op1 = force_reg (mode1, op1);
17241
17242 pat = GEN_FCN (icode) (op0, op1);
17243 if (! pat)
17244 return 0;
17245 emit_insn (pat);
17246 return const0_rtx;
17247
17248 case IX86_BUILTIN_MOVNTPS:
17249 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, arglist);
17250 case IX86_BUILTIN_MOVNTQ:
17251 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, arglist);
17252
17253 case IX86_BUILTIN_LDMXCSR:
17254 op0 = expand_normal (TREE_VALUE (arglist));
17255 target = assign_386_stack_local (SImode, SLOT_TEMP);
17256 emit_move_insn (target, op0);
17257 emit_insn (gen_sse_ldmxcsr (target));
17258 return 0;
17259
17260 case IX86_BUILTIN_STMXCSR:
17261 target = assign_386_stack_local (SImode, SLOT_TEMP);
17262 emit_insn (gen_sse_stmxcsr (target));
17263 return copy_to_mode_reg (SImode, target);
17264
17265 case IX86_BUILTIN_SHUFPS:
17266 case IX86_BUILTIN_SHUFPD:
17267 icode = (fcode == IX86_BUILTIN_SHUFPS
17268 ? CODE_FOR_sse_shufps
17269 : CODE_FOR_sse2_shufpd);
17270 arg0 = TREE_VALUE (arglist);
17271 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17272 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17273 op0 = expand_normal (arg0);
17274 op1 = expand_normal (arg1);
17275 op2 = expand_normal (arg2);
17276 tmode = insn_data[icode].operand[0].mode;
17277 mode0 = insn_data[icode].operand[1].mode;
17278 mode1 = insn_data[icode].operand[2].mode;
17279 mode2 = insn_data[icode].operand[3].mode;
17280
17281 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17282 op0 = copy_to_mode_reg (mode0, op0);
17283 if ((optimize && !register_operand (op1, mode1))
17284 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17285 op1 = copy_to_mode_reg (mode1, op1);
17286 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17287 {
17288 /* @@@ better error message */
17289 error ("mask must be an immediate");
17290 return gen_reg_rtx (tmode);
17291 }
17292 if (optimize || target == 0
17293 || GET_MODE (target) != tmode
17294 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17295 target = gen_reg_rtx (tmode);
17296 pat = GEN_FCN (icode) (target, op0, op1, op2);
17297 if (! pat)
17298 return 0;
17299 emit_insn (pat);
17300 return target;
17301
17302 case IX86_BUILTIN_PSHUFW:
17303 case IX86_BUILTIN_PSHUFD:
17304 case IX86_BUILTIN_PSHUFHW:
17305 case IX86_BUILTIN_PSHUFLW:
17306 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17307 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17308 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17309 : CODE_FOR_mmx_pshufw);
17310 arg0 = TREE_VALUE (arglist);
17311 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17312 op0 = expand_normal (arg0);
17313 op1 = expand_normal (arg1);
17314 tmode = insn_data[icode].operand[0].mode;
17315 mode1 = insn_data[icode].operand[1].mode;
17316 mode2 = insn_data[icode].operand[2].mode;
17317
17318 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17319 op0 = copy_to_mode_reg (mode1, op0);
17320 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17321 {
17322 /* @@@ better error message */
17323 error ("mask must be an immediate");
17324 return const0_rtx;
17325 }
17326 if (target == 0
17327 || GET_MODE (target) != tmode
17328 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17329 target = gen_reg_rtx (tmode);
17330 pat = GEN_FCN (icode) (target, op0, op1);
17331 if (! pat)
17332 return 0;
17333 emit_insn (pat);
17334 return target;
17335
17336 case IX86_BUILTIN_PSLLDQI128:
17337 case IX86_BUILTIN_PSRLDQI128:
17338 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17339 : CODE_FOR_sse2_lshrti3);
17340 arg0 = TREE_VALUE (arglist);
17341 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17342 op0 = expand_normal (arg0);
17343 op1 = expand_normal (arg1);
17344 tmode = insn_data[icode].operand[0].mode;
17345 mode1 = insn_data[icode].operand[1].mode;
17346 mode2 = insn_data[icode].operand[2].mode;
17347
17348 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17349 {
17350 op0 = copy_to_reg (op0);
17351 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17352 }
17353 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17354 {
17355 error ("shift must be an immediate");
17356 return const0_rtx;
17357 }
17358 target = gen_reg_rtx (V2DImode);
17359 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17360 if (! pat)
17361 return 0;
17362 emit_insn (pat);
17363 return target;
17364
17365 case IX86_BUILTIN_FEMMS:
17366 emit_insn (gen_mmx_femms ());
17367 return NULL_RTX;
17368
17369 case IX86_BUILTIN_PAVGUSB:
17370 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, arglist, target);
17371
17372 case IX86_BUILTIN_PF2ID:
17373 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, arglist, target, 0);
17374
17375 case IX86_BUILTIN_PFACC:
17376 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, arglist, target);
17377
17378 case IX86_BUILTIN_PFADD:
17379 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, arglist, target);
17380
17381 case IX86_BUILTIN_PFCMPEQ:
17382 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, arglist, target);
17383
17384 case IX86_BUILTIN_PFCMPGE:
17385 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, arglist, target);
17386
17387 case IX86_BUILTIN_PFCMPGT:
17388 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, arglist, target);
17389
17390 case IX86_BUILTIN_PFMAX:
17391 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, arglist, target);
17392
17393 case IX86_BUILTIN_PFMIN:
17394 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, arglist, target);
17395
17396 case IX86_BUILTIN_PFMUL:
17397 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, arglist, target);
17398
17399 case IX86_BUILTIN_PFRCP:
17400 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, arglist, target, 0);
17401
17402 case IX86_BUILTIN_PFRCPIT1:
17403 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, arglist, target);
17404
17405 case IX86_BUILTIN_PFRCPIT2:
17406 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, arglist, target);
17407
17408 case IX86_BUILTIN_PFRSQIT1:
17409 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, arglist, target);
17410
17411 case IX86_BUILTIN_PFRSQRT:
17412 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, arglist, target, 0);
17413
17414 case IX86_BUILTIN_PFSUB:
17415 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, arglist, target);
17416
17417 case IX86_BUILTIN_PFSUBR:
17418 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, arglist, target);
17419
17420 case IX86_BUILTIN_PI2FD:
17421 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, arglist, target, 0);
17422
17423 case IX86_BUILTIN_PMULHRW:
17424 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, arglist, target);
17425
17426 case IX86_BUILTIN_PF2IW:
17427 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, arglist, target, 0);
17428
17429 case IX86_BUILTIN_PFNACC:
17430 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, arglist, target);
17431
17432 case IX86_BUILTIN_PFPNACC:
17433 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, arglist, target);
17434
17435 case IX86_BUILTIN_PI2FW:
17436 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, arglist, target, 0);
17437
17438 case IX86_BUILTIN_PSWAPDSI:
17439 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, arglist, target, 0);
17440
17441 case IX86_BUILTIN_PSWAPDSF:
17442 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, arglist, target, 0);
17443
17444 case IX86_BUILTIN_SQRTSD:
17445 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, arglist, target);
17446 case IX86_BUILTIN_LOADUPD:
17447 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, arglist, target, 1);
17448 case IX86_BUILTIN_STOREUPD:
17449 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, arglist);
17450
17451 case IX86_BUILTIN_MFENCE:
17452 emit_insn (gen_sse2_mfence ());
17453 return 0;
17454 case IX86_BUILTIN_LFENCE:
17455 emit_insn (gen_sse2_lfence ());
17456 return 0;
17457
17458 case IX86_BUILTIN_CLFLUSH:
17459 arg0 = TREE_VALUE (arglist);
17460 op0 = expand_normal (arg0);
17461 icode = CODE_FOR_sse2_clflush;
17462 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
17463 op0 = copy_to_mode_reg (Pmode, op0);
17464
17465 emit_insn (gen_sse2_clflush (op0));
17466 return 0;
17467
17468 case IX86_BUILTIN_MOVNTPD:
17469 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, arglist);
17470 case IX86_BUILTIN_MOVNTDQ:
17471 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, arglist);
17472 case IX86_BUILTIN_MOVNTI:
17473 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, arglist);
17474
17475 case IX86_BUILTIN_LOADDQU:
17476 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, arglist, target, 1);
17477 case IX86_BUILTIN_STOREDQU:
17478 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, arglist);
17479
17480 case IX86_BUILTIN_MONITOR:
17481 arg0 = TREE_VALUE (arglist);
17482 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17483 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17484 op0 = expand_normal (arg0);
17485 op1 = expand_normal (arg1);
17486 op2 = expand_normal (arg2);
17487 if (!REG_P (op0))
17488 op0 = copy_to_mode_reg (Pmode, op0);
17489 if (!REG_P (op1))
17490 op1 = copy_to_mode_reg (SImode, op1);
17491 if (!REG_P (op2))
17492 op2 = copy_to_mode_reg (SImode, op2);
17493 if (!TARGET_64BIT)
17494 emit_insn (gen_sse3_monitor (op0, op1, op2));
17495 else
17496 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
17497 return 0;
17498
17499 case IX86_BUILTIN_MWAIT:
17500 arg0 = TREE_VALUE (arglist);
17501 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17502 op0 = expand_normal (arg0);
17503 op1 = expand_normal (arg1);
17504 if (!REG_P (op0))
17505 op0 = copy_to_mode_reg (SImode, op0);
17506 if (!REG_P (op1))
17507 op1 = copy_to_mode_reg (SImode, op1);
17508 emit_insn (gen_sse3_mwait (op0, op1));
17509 return 0;
17510
17511 case IX86_BUILTIN_LDDQU:
17512 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, arglist,
17513 target, 1);
17514
17515 case IX86_BUILTIN_PALIGNR:
17516 case IX86_BUILTIN_PALIGNR128:
17517 if (fcode == IX86_BUILTIN_PALIGNR)
17518 {
17519 icode = CODE_FOR_ssse3_palignrdi;
17520 mode = DImode;
17521 }
17522 else
17523 {
17524 icode = CODE_FOR_ssse3_palignrti;
17525 mode = V2DImode;
17526 }
17527 arg0 = TREE_VALUE (arglist);
17528 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
17529 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
17530 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
17531 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
17532 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
17533 tmode = insn_data[icode].operand[0].mode;
17534 mode1 = insn_data[icode].operand[1].mode;
17535 mode2 = insn_data[icode].operand[2].mode;
17536 mode3 = insn_data[icode].operand[3].mode;
17537
17538 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17539 {
17540 op0 = copy_to_reg (op0);
17541 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17542 }
17543 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17544 {
17545 op1 = copy_to_reg (op1);
17546 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
17547 }
17548 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
17549 {
17550 error ("shift must be an immediate");
17551 return const0_rtx;
17552 }
17553 target = gen_reg_rtx (mode);
17554 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
17555 op0, op1, op2);
17556 if (! pat)
17557 return 0;
17558 emit_insn (pat);
17559 return target;
17560
17561 case IX86_BUILTIN_VEC_INIT_V2SI:
17562 case IX86_BUILTIN_VEC_INIT_V4HI:
17563 case IX86_BUILTIN_VEC_INIT_V8QI:
17564 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), arglist, target);
17565
17566 case IX86_BUILTIN_VEC_EXT_V2DF:
17567 case IX86_BUILTIN_VEC_EXT_V2DI:
17568 case IX86_BUILTIN_VEC_EXT_V4SF:
17569 case IX86_BUILTIN_VEC_EXT_V4SI:
17570 case IX86_BUILTIN_VEC_EXT_V8HI:
17571 case IX86_BUILTIN_VEC_EXT_V2SI:
17572 case IX86_BUILTIN_VEC_EXT_V4HI:
17573 return ix86_expand_vec_ext_builtin (arglist, target);
17574
17575 case IX86_BUILTIN_VEC_SET_V8HI:
17576 case IX86_BUILTIN_VEC_SET_V4HI:
17577 return ix86_expand_vec_set_builtin (arglist);
17578
17579 default:
17580 break;
17581 }
17582
17583 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17584 if (d->code == fcode)
17585 {
17586 /* Compares are treated specially. */
17587 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17588 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
17589 || d->icode == CODE_FOR_sse2_maskcmpv2df3
17590 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17591 return ix86_expand_sse_compare (d, arglist, target);
17592
17593 return ix86_expand_binop_builtin (d->icode, arglist, target);
17594 }
17595
17596 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17597 if (d->code == fcode)
17598 return ix86_expand_unop_builtin (d->icode, arglist, target, 0);
17599
17600 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17601 if (d->code == fcode)
17602 return ix86_expand_sse_comi (d, arglist, target);
17603
17604 gcc_unreachable ();
17605 }
17606
17607 /* Returns a function decl for a vectorized version of the builtin function
17608 with builtin function code FN and the result vector type TYPE, or NULL_TREE
17609 if it is not available. */
17610
17611 static tree
17612 ix86_builtin_vectorized_function (enum built_in_function fn, tree type)
17613 {
17614 enum machine_mode el_mode;
17615 int n;
17616
17617 if (TREE_CODE (type) != VECTOR_TYPE)
17618 return NULL_TREE;
17619
17620 el_mode = TYPE_MODE (TREE_TYPE (type));
17621 n = TYPE_VECTOR_SUBPARTS (type);
17622
17623 switch (fn)
17624 {
17625 case BUILT_IN_SQRT:
17626 if (el_mode == DFmode && n == 2)
17627 return ix86_builtins[IX86_BUILTIN_SQRTPD];
17628 return NULL_TREE;
17629
17630 case BUILT_IN_SQRTF:
17631 if (el_mode == SFmode && n == 4)
17632 return ix86_builtins[IX86_BUILTIN_SQRTPS];
17633 return NULL_TREE;
17634
17635 default:
17636 ;
17637 }
17638
17639 return NULL_TREE;
17640 }
17641
17642 /* Store OPERAND to the memory after reload is completed. This means
17643 that we can't easily use assign_stack_local. */
17644 rtx
17645 ix86_force_to_memory (enum machine_mode mode, rtx operand)
17646 {
17647 rtx result;
17648
17649 gcc_assert (reload_completed);
17650 if (TARGET_RED_ZONE)
17651 {
17652 result = gen_rtx_MEM (mode,
17653 gen_rtx_PLUS (Pmode,
17654 stack_pointer_rtx,
17655 GEN_INT (-RED_ZONE_SIZE)));
17656 emit_move_insn (result, operand);
17657 }
17658 else if (!TARGET_RED_ZONE && TARGET_64BIT)
17659 {
17660 switch (mode)
17661 {
17662 case HImode:
17663 case SImode:
17664 operand = gen_lowpart (DImode, operand);
17665 /* FALLTHRU */
17666 case DImode:
17667 emit_insn (
17668 gen_rtx_SET (VOIDmode,
17669 gen_rtx_MEM (DImode,
17670 gen_rtx_PRE_DEC (DImode,
17671 stack_pointer_rtx)),
17672 operand));
17673 break;
17674 default:
17675 gcc_unreachable ();
17676 }
17677 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17678 }
17679 else
17680 {
17681 switch (mode)
17682 {
17683 case DImode:
17684 {
17685 rtx operands[2];
17686 split_di (&operand, 1, operands, operands + 1);
17687 emit_insn (
17688 gen_rtx_SET (VOIDmode,
17689 gen_rtx_MEM (SImode,
17690 gen_rtx_PRE_DEC (Pmode,
17691 stack_pointer_rtx)),
17692 operands[1]));
17693 emit_insn (
17694 gen_rtx_SET (VOIDmode,
17695 gen_rtx_MEM (SImode,
17696 gen_rtx_PRE_DEC (Pmode,
17697 stack_pointer_rtx)),
17698 operands[0]));
17699 }
17700 break;
17701 case HImode:
17702 /* Store HImodes as SImodes. */
17703 operand = gen_lowpart (SImode, operand);
17704 /* FALLTHRU */
17705 case SImode:
17706 emit_insn (
17707 gen_rtx_SET (VOIDmode,
17708 gen_rtx_MEM (GET_MODE (operand),
17709 gen_rtx_PRE_DEC (SImode,
17710 stack_pointer_rtx)),
17711 operand));
17712 break;
17713 default:
17714 gcc_unreachable ();
17715 }
17716 result = gen_rtx_MEM (mode, stack_pointer_rtx);
17717 }
17718 return result;
17719 }
17720
17721 /* Free operand from the memory. */
17722 void
17723 ix86_free_from_memory (enum machine_mode mode)
17724 {
17725 if (!TARGET_RED_ZONE)
17726 {
17727 int size;
17728
17729 if (mode == DImode || TARGET_64BIT)
17730 size = 8;
17731 else
17732 size = 4;
17733 /* Use LEA to deallocate stack space. In peephole2 it will be converted
17734 to pop or add instruction if registers are available. */
17735 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
17736 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
17737 GEN_INT (size))));
17738 }
17739 }
17740
17741 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
17742 QImode must go into class Q_REGS.
17743 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
17744 movdf to do mem-to-mem moves through integer regs. */
17745 enum reg_class
17746 ix86_preferred_reload_class (rtx x, enum reg_class class)
17747 {
17748 enum machine_mode mode = GET_MODE (x);
17749
17750 /* We're only allowed to return a subclass of CLASS. Many of the
17751 following checks fail for NO_REGS, so eliminate that early. */
17752 if (class == NO_REGS)
17753 return NO_REGS;
17754
17755 /* All classes can load zeros. */
17756 if (x == CONST0_RTX (mode))
17757 return class;
17758
17759 /* Force constants into memory if we are loading a (nonzero) constant into
17760 an MMX or SSE register. This is because there are no MMX/SSE instructions
17761 to load from a constant. */
17762 if (CONSTANT_P (x)
17763 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
17764 return NO_REGS;
17765
17766 /* Prefer SSE regs only, if we can use them for math. */
17767 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
17768 return SSE_CLASS_P (class) ? class : NO_REGS;
17769
17770 /* Floating-point constants need more complex checks. */
17771 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
17772 {
17773 /* General regs can load everything. */
17774 if (reg_class_subset_p (class, GENERAL_REGS))
17775 return class;
17776
17777 /* Floats can load 0 and 1 plus some others. Note that we eliminated
17778 zero above. We only want to wind up preferring 80387 registers if
17779 we plan on doing computation with them. */
17780 if (TARGET_80387
17781 && standard_80387_constant_p (x))
17782 {
17783 /* Limit class to non-sse. */
17784 if (class == FLOAT_SSE_REGS)
17785 return FLOAT_REGS;
17786 if (class == FP_TOP_SSE_REGS)
17787 return FP_TOP_REG;
17788 if (class == FP_SECOND_SSE_REGS)
17789 return FP_SECOND_REG;
17790 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
17791 return class;
17792 }
17793
17794 return NO_REGS;
17795 }
17796
17797 /* Generally when we see PLUS here, it's the function invariant
17798 (plus soft-fp const_int). Which can only be computed into general
17799 regs. */
17800 if (GET_CODE (x) == PLUS)
17801 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
17802
17803 /* QImode constants are easy to load, but non-constant QImode data
17804 must go into Q_REGS. */
17805 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
17806 {
17807 if (reg_class_subset_p (class, Q_REGS))
17808 return class;
17809 if (reg_class_subset_p (Q_REGS, class))
17810 return Q_REGS;
17811 return NO_REGS;
17812 }
17813
17814 return class;
17815 }
17816
17817 /* Discourage putting floating-point values in SSE registers unless
17818 SSE math is being used, and likewise for the 387 registers. */
17819 enum reg_class
17820 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
17821 {
17822 enum machine_mode mode = GET_MODE (x);
17823
17824 /* Restrict the output reload class to the register bank that we are doing
17825 math on. If we would like not to return a subset of CLASS, reject this
17826 alternative: if reload cannot do this, it will still use its choice. */
17827 mode = GET_MODE (x);
17828 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
17829 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
17830
17831 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
17832 {
17833 if (class == FP_TOP_SSE_REGS)
17834 return FP_TOP_REG;
17835 else if (class == FP_SECOND_SSE_REGS)
17836 return FP_SECOND_REG;
17837 else
17838 return FLOAT_CLASS_P (class) ? class : NO_REGS;
17839 }
17840
17841 return class;
17842 }
17843
17844 /* If we are copying between general and FP registers, we need a memory
17845 location. The same is true for SSE and MMX registers.
17846
17847 The macro can't work reliably when one of the CLASSES is class containing
17848 registers from multiple units (SSE, MMX, integer). We avoid this by never
17849 combining those units in single alternative in the machine description.
17850 Ensure that this constraint holds to avoid unexpected surprises.
17851
17852 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
17853 enforce these sanity checks. */
17854
17855 int
17856 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
17857 enum machine_mode mode, int strict)
17858 {
17859 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
17860 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
17861 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
17862 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
17863 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
17864 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
17865 {
17866 gcc_assert (!strict);
17867 return true;
17868 }
17869
17870 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
17871 return true;
17872
17873 /* ??? This is a lie. We do have moves between mmx/general, and for
17874 mmx/sse2. But by saying we need secondary memory we discourage the
17875 register allocator from using the mmx registers unless needed. */
17876 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
17877 return true;
17878
17879 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17880 {
17881 /* SSE1 doesn't have any direct moves from other classes. */
17882 if (!TARGET_SSE2)
17883 return true;
17884
17885 /* If the target says that inter-unit moves are more expensive
17886 than moving through memory, then don't generate them. */
17887 if (!TARGET_INTER_UNIT_MOVES && !optimize_size)
17888 return true;
17889
17890 /* Between SSE and general, we have moves no larger than word size. */
17891 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
17892 return true;
17893
17894 /* ??? For the cost of one register reformat penalty, we could use
17895 the same instructions to move SFmode and DFmode data, but the
17896 relevant move patterns don't support those alternatives. */
17897 if (mode == SFmode || mode == DFmode)
17898 return true;
17899 }
17900
17901 return false;
17902 }
17903
17904 /* Return true if the registers in CLASS cannot represent the change from
17905 modes FROM to TO. */
17906
17907 bool
17908 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
17909 enum reg_class class)
17910 {
17911 if (from == to)
17912 return false;
17913
17914 /* x87 registers can't do subreg at all, as all values are reformatted
17915 to extended precision. */
17916 if (MAYBE_FLOAT_CLASS_P (class))
17917 return true;
17918
17919 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
17920 {
17921 /* Vector registers do not support QI or HImode loads. If we don't
17922 disallow a change to these modes, reload will assume it's ok to
17923 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
17924 the vec_dupv4hi pattern. */
17925 if (GET_MODE_SIZE (from) < 4)
17926 return true;
17927
17928 /* Vector registers do not support subreg with nonzero offsets, which
17929 are otherwise valid for integer registers. Since we can't see
17930 whether we have a nonzero offset from here, prohibit all
17931 nonparadoxical subregs changing size. */
17932 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
17933 return true;
17934 }
17935
17936 return false;
17937 }
17938
17939 /* Return the cost of moving data from a register in class CLASS1 to
17940 one in class CLASS2.
17941
17942 It is not required that the cost always equal 2 when FROM is the same as TO;
17943 on some machines it is expensive to move between registers if they are not
17944 general registers. */
17945
17946 int
17947 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
17948 enum reg_class class2)
17949 {
17950 /* In case we require secondary memory, compute cost of the store followed
17951 by load. In order to avoid bad register allocation choices, we need
17952 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
17953
17954 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
17955 {
17956 int cost = 1;
17957
17958 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
17959 MEMORY_MOVE_COST (mode, class1, 1));
17960 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
17961 MEMORY_MOVE_COST (mode, class2, 1));
17962
17963 /* In case of copying from general_purpose_register we may emit multiple
17964 stores followed by single load causing memory size mismatch stall.
17965 Count this as arbitrarily high cost of 20. */
17966 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
17967 cost += 20;
17968
17969 /* In the case of FP/MMX moves, the registers actually overlap, and we
17970 have to switch modes in order to treat them differently. */
17971 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
17972 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
17973 cost += 20;
17974
17975 return cost;
17976 }
17977
17978 /* Moves between SSE/MMX and integer unit are expensive. */
17979 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
17980 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
17981 return ix86_cost->mmxsse_to_integer;
17982 if (MAYBE_FLOAT_CLASS_P (class1))
17983 return ix86_cost->fp_move;
17984 if (MAYBE_SSE_CLASS_P (class1))
17985 return ix86_cost->sse_move;
17986 if (MAYBE_MMX_CLASS_P (class1))
17987 return ix86_cost->mmx_move;
17988 return 2;
17989 }
17990
17991 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
17992
17993 bool
17994 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
17995 {
17996 /* Flags and only flags can only hold CCmode values. */
17997 if (CC_REGNO_P (regno))
17998 return GET_MODE_CLASS (mode) == MODE_CC;
17999 if (GET_MODE_CLASS (mode) == MODE_CC
18000 || GET_MODE_CLASS (mode) == MODE_RANDOM
18001 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18002 return 0;
18003 if (FP_REGNO_P (regno))
18004 return VALID_FP_MODE_P (mode);
18005 if (SSE_REGNO_P (regno))
18006 {
18007 /* We implement the move patterns for all vector modes into and
18008 out of SSE registers, even when no operation instructions
18009 are available. */
18010 return (VALID_SSE_REG_MODE (mode)
18011 || VALID_SSE2_REG_MODE (mode)
18012 || VALID_MMX_REG_MODE (mode)
18013 || VALID_MMX_REG_MODE_3DNOW (mode));
18014 }
18015 if (MMX_REGNO_P (regno))
18016 {
18017 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18018 so if the register is available at all, then we can move data of
18019 the given mode into or out of it. */
18020 return (VALID_MMX_REG_MODE (mode)
18021 || VALID_MMX_REG_MODE_3DNOW (mode));
18022 }
18023
18024 if (mode == QImode)
18025 {
18026 /* Take care for QImode values - they can be in non-QI regs,
18027 but then they do cause partial register stalls. */
18028 if (regno < 4 || TARGET_64BIT)
18029 return 1;
18030 if (!TARGET_PARTIAL_REG_STALL)
18031 return 1;
18032 return reload_in_progress || reload_completed;
18033 }
18034 /* We handle both integer and floats in the general purpose registers. */
18035 else if (VALID_INT_MODE_P (mode))
18036 return 1;
18037 else if (VALID_FP_MODE_P (mode))
18038 return 1;
18039 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18040 on to use that value in smaller contexts, this can easily force a
18041 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18042 supporting DImode, allow it. */
18043 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18044 return 1;
18045
18046 return 0;
18047 }
18048
18049 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18050 tieable integer mode. */
18051
18052 static bool
18053 ix86_tieable_integer_mode_p (enum machine_mode mode)
18054 {
18055 switch (mode)
18056 {
18057 case HImode:
18058 case SImode:
18059 return true;
18060
18061 case QImode:
18062 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18063
18064 case DImode:
18065 return TARGET_64BIT;
18066
18067 default:
18068 return false;
18069 }
18070 }
18071
18072 /* Return true if MODE1 is accessible in a register that can hold MODE2
18073 without copying. That is, all register classes that can hold MODE2
18074 can also hold MODE1. */
18075
18076 bool
18077 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18078 {
18079 if (mode1 == mode2)
18080 return true;
18081
18082 if (ix86_tieable_integer_mode_p (mode1)
18083 && ix86_tieable_integer_mode_p (mode2))
18084 return true;
18085
18086 /* MODE2 being XFmode implies fp stack or general regs, which means we
18087 can tie any smaller floating point modes to it. Note that we do not
18088 tie this with TFmode. */
18089 if (mode2 == XFmode)
18090 return mode1 == SFmode || mode1 == DFmode;
18091
18092 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18093 that we can tie it with SFmode. */
18094 if (mode2 == DFmode)
18095 return mode1 == SFmode;
18096
18097 /* If MODE2 is only appropriate for an SSE register, then tie with
18098 any other mode acceptable to SSE registers. */
18099 if (GET_MODE_SIZE (mode2) >= 8
18100 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18101 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
18102
18103 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18104 with any other mode acceptable to MMX registers. */
18105 if (GET_MODE_SIZE (mode2) == 8
18106 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18107 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
18108
18109 return false;
18110 }
18111
18112 /* Return the cost of moving data of mode M between a
18113 register and memory. A value of 2 is the default; this cost is
18114 relative to those in `REGISTER_MOVE_COST'.
18115
18116 If moving between registers and memory is more expensive than
18117 between two registers, you should define this macro to express the
18118 relative cost.
18119
18120 Model also increased moving costs of QImode registers in non
18121 Q_REGS classes.
18122 */
18123 int
18124 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18125 {
18126 if (FLOAT_CLASS_P (class))
18127 {
18128 int index;
18129 switch (mode)
18130 {
18131 case SFmode:
18132 index = 0;
18133 break;
18134 case DFmode:
18135 index = 1;
18136 break;
18137 case XFmode:
18138 index = 2;
18139 break;
18140 default:
18141 return 100;
18142 }
18143 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18144 }
18145 if (SSE_CLASS_P (class))
18146 {
18147 int index;
18148 switch (GET_MODE_SIZE (mode))
18149 {
18150 case 4:
18151 index = 0;
18152 break;
18153 case 8:
18154 index = 1;
18155 break;
18156 case 16:
18157 index = 2;
18158 break;
18159 default:
18160 return 100;
18161 }
18162 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18163 }
18164 if (MMX_CLASS_P (class))
18165 {
18166 int index;
18167 switch (GET_MODE_SIZE (mode))
18168 {
18169 case 4:
18170 index = 0;
18171 break;
18172 case 8:
18173 index = 1;
18174 break;
18175 default:
18176 return 100;
18177 }
18178 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18179 }
18180 switch (GET_MODE_SIZE (mode))
18181 {
18182 case 1:
18183 if (in)
18184 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18185 : ix86_cost->movzbl_load);
18186 else
18187 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18188 : ix86_cost->int_store[0] + 4);
18189 break;
18190 case 2:
18191 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18192 default:
18193 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18194 if (mode == TFmode)
18195 mode = XFmode;
18196 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18197 * (((int) GET_MODE_SIZE (mode)
18198 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18199 }
18200 }
18201
18202 /* Compute a (partial) cost for rtx X. Return true if the complete
18203 cost has been computed, and false if subexpressions should be
18204 scanned. In either case, *TOTAL contains the cost result. */
18205
18206 static bool
18207 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18208 {
18209 enum machine_mode mode = GET_MODE (x);
18210
18211 switch (code)
18212 {
18213 case CONST_INT:
18214 case CONST:
18215 case LABEL_REF:
18216 case SYMBOL_REF:
18217 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18218 *total = 3;
18219 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18220 *total = 2;
18221 else if (flag_pic && SYMBOLIC_CONST (x)
18222 && (!TARGET_64BIT
18223 || (!GET_CODE (x) != LABEL_REF
18224 && (GET_CODE (x) != SYMBOL_REF
18225 || !SYMBOL_REF_LOCAL_P (x)))))
18226 *total = 1;
18227 else
18228 *total = 0;
18229 return true;
18230
18231 case CONST_DOUBLE:
18232 if (mode == VOIDmode)
18233 *total = 0;
18234 else
18235 switch (standard_80387_constant_p (x))
18236 {
18237 case 1: /* 0.0 */
18238 *total = 1;
18239 break;
18240 default: /* Other constants */
18241 *total = 2;
18242 break;
18243 case 0:
18244 case -1:
18245 /* Start with (MEM (SYMBOL_REF)), since that's where
18246 it'll probably end up. Add a penalty for size. */
18247 *total = (COSTS_N_INSNS (1)
18248 + (flag_pic != 0 && !TARGET_64BIT)
18249 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18250 break;
18251 }
18252 return true;
18253
18254 case ZERO_EXTEND:
18255 /* The zero extensions is often completely free on x86_64, so make
18256 it as cheap as possible. */
18257 if (TARGET_64BIT && mode == DImode
18258 && GET_MODE (XEXP (x, 0)) == SImode)
18259 *total = 1;
18260 else if (TARGET_ZERO_EXTEND_WITH_AND)
18261 *total = ix86_cost->add;
18262 else
18263 *total = ix86_cost->movzx;
18264 return false;
18265
18266 case SIGN_EXTEND:
18267 *total = ix86_cost->movsx;
18268 return false;
18269
18270 case ASHIFT:
18271 if (GET_CODE (XEXP (x, 1)) == CONST_INT
18272 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18273 {
18274 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18275 if (value == 1)
18276 {
18277 *total = ix86_cost->add;
18278 return false;
18279 }
18280 if ((value == 2 || value == 3)
18281 && ix86_cost->lea <= ix86_cost->shift_const)
18282 {
18283 *total = ix86_cost->lea;
18284 return false;
18285 }
18286 }
18287 /* FALLTHRU */
18288
18289 case ROTATE:
18290 case ASHIFTRT:
18291 case LSHIFTRT:
18292 case ROTATERT:
18293 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18294 {
18295 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18296 {
18297 if (INTVAL (XEXP (x, 1)) > 32)
18298 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18299 else
18300 *total = ix86_cost->shift_const * 2;
18301 }
18302 else
18303 {
18304 if (GET_CODE (XEXP (x, 1)) == AND)
18305 *total = ix86_cost->shift_var * 2;
18306 else
18307 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
18308 }
18309 }
18310 else
18311 {
18312 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18313 *total = ix86_cost->shift_const;
18314 else
18315 *total = ix86_cost->shift_var;
18316 }
18317 return false;
18318
18319 case MULT:
18320 if (FLOAT_MODE_P (mode))
18321 {
18322 *total = ix86_cost->fmul;
18323 return false;
18324 }
18325 else
18326 {
18327 rtx op0 = XEXP (x, 0);
18328 rtx op1 = XEXP (x, 1);
18329 int nbits;
18330 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
18331 {
18332 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18333 for (nbits = 0; value != 0; value &= value - 1)
18334 nbits++;
18335 }
18336 else
18337 /* This is arbitrary. */
18338 nbits = 7;
18339
18340 /* Compute costs correctly for widening multiplication. */
18341 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
18342 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
18343 == GET_MODE_SIZE (mode))
18344 {
18345 int is_mulwiden = 0;
18346 enum machine_mode inner_mode = GET_MODE (op0);
18347
18348 if (GET_CODE (op0) == GET_CODE (op1))
18349 is_mulwiden = 1, op1 = XEXP (op1, 0);
18350 else if (GET_CODE (op1) == CONST_INT)
18351 {
18352 if (GET_CODE (op0) == SIGN_EXTEND)
18353 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
18354 == INTVAL (op1);
18355 else
18356 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
18357 }
18358
18359 if (is_mulwiden)
18360 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
18361 }
18362
18363 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
18364 + nbits * ix86_cost->mult_bit
18365 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
18366
18367 return true;
18368 }
18369
18370 case DIV:
18371 case UDIV:
18372 case MOD:
18373 case UMOD:
18374 if (FLOAT_MODE_P (mode))
18375 *total = ix86_cost->fdiv;
18376 else
18377 *total = ix86_cost->divide[MODE_INDEX (mode)];
18378 return false;
18379
18380 case PLUS:
18381 if (FLOAT_MODE_P (mode))
18382 *total = ix86_cost->fadd;
18383 else if (GET_MODE_CLASS (mode) == MODE_INT
18384 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
18385 {
18386 if (GET_CODE (XEXP (x, 0)) == PLUS
18387 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
18388 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == CONST_INT
18389 && CONSTANT_P (XEXP (x, 1)))
18390 {
18391 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
18392 if (val == 2 || val == 4 || val == 8)
18393 {
18394 *total = ix86_cost->lea;
18395 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18396 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
18397 outer_code);
18398 *total += rtx_cost (XEXP (x, 1), outer_code);
18399 return true;
18400 }
18401 }
18402 else if (GET_CODE (XEXP (x, 0)) == MULT
18403 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
18404 {
18405 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
18406 if (val == 2 || val == 4 || val == 8)
18407 {
18408 *total = ix86_cost->lea;
18409 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18410 *total += rtx_cost (XEXP (x, 1), outer_code);
18411 return true;
18412 }
18413 }
18414 else if (GET_CODE (XEXP (x, 0)) == PLUS)
18415 {
18416 *total = ix86_cost->lea;
18417 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
18418 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
18419 *total += rtx_cost (XEXP (x, 1), outer_code);
18420 return true;
18421 }
18422 }
18423 /* FALLTHRU */
18424
18425 case MINUS:
18426 if (FLOAT_MODE_P (mode))
18427 {
18428 *total = ix86_cost->fadd;
18429 return false;
18430 }
18431 /* FALLTHRU */
18432
18433 case AND:
18434 case IOR:
18435 case XOR:
18436 if (!TARGET_64BIT && mode == DImode)
18437 {
18438 *total = (ix86_cost->add * 2
18439 + (rtx_cost (XEXP (x, 0), outer_code)
18440 << (GET_MODE (XEXP (x, 0)) != DImode))
18441 + (rtx_cost (XEXP (x, 1), outer_code)
18442 << (GET_MODE (XEXP (x, 1)) != DImode)));
18443 return true;
18444 }
18445 /* FALLTHRU */
18446
18447 case NEG:
18448 if (FLOAT_MODE_P (mode))
18449 {
18450 *total = ix86_cost->fchs;
18451 return false;
18452 }
18453 /* FALLTHRU */
18454
18455 case NOT:
18456 if (!TARGET_64BIT && mode == DImode)
18457 *total = ix86_cost->add * 2;
18458 else
18459 *total = ix86_cost->add;
18460 return false;
18461
18462 case COMPARE:
18463 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
18464 && XEXP (XEXP (x, 0), 1) == const1_rtx
18465 && GET_CODE (XEXP (XEXP (x, 0), 2)) == CONST_INT
18466 && XEXP (x, 1) == const0_rtx)
18467 {
18468 /* This kind of construct is implemented using test[bwl].
18469 Treat it as if we had an AND. */
18470 *total = (ix86_cost->add
18471 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
18472 + rtx_cost (const1_rtx, outer_code));
18473 return true;
18474 }
18475 return false;
18476
18477 case FLOAT_EXTEND:
18478 if (!TARGET_SSE_MATH
18479 || mode == XFmode
18480 || (mode == DFmode && !TARGET_SSE2))
18481 *total = 0;
18482 return false;
18483
18484 case ABS:
18485 if (FLOAT_MODE_P (mode))
18486 *total = ix86_cost->fabs;
18487 return false;
18488
18489 case SQRT:
18490 if (FLOAT_MODE_P (mode))
18491 *total = ix86_cost->fsqrt;
18492 return false;
18493
18494 case UNSPEC:
18495 if (XINT (x, 1) == UNSPEC_TP)
18496 *total = 0;
18497 return false;
18498
18499 default:
18500 return false;
18501 }
18502 }
18503
18504 #if TARGET_MACHO
18505
18506 static int current_machopic_label_num;
18507
18508 /* Given a symbol name and its associated stub, write out the
18509 definition of the stub. */
18510
18511 void
18512 machopic_output_stub (FILE *file, const char *symb, const char *stub)
18513 {
18514 unsigned int length;
18515 char *binder_name, *symbol_name, lazy_ptr_name[32];
18516 int label = ++current_machopic_label_num;
18517
18518 /* For 64-bit we shouldn't get here. */
18519 gcc_assert (!TARGET_64BIT);
18520
18521 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
18522 symb = (*targetm.strip_name_encoding) (symb);
18523
18524 length = strlen (stub);
18525 binder_name = alloca (length + 32);
18526 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
18527
18528 length = strlen (symb);
18529 symbol_name = alloca (length + 32);
18530 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
18531
18532 sprintf (lazy_ptr_name, "L%d$lz", label);
18533
18534 if (MACHOPIC_PURE)
18535 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
18536 else
18537 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
18538
18539 fprintf (file, "%s:\n", stub);
18540 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18541
18542 if (MACHOPIC_PURE)
18543 {
18544 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
18545 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
18546 fprintf (file, "\tjmp\t*%%edx\n");
18547 }
18548 else
18549 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
18550
18551 fprintf (file, "%s:\n", binder_name);
18552
18553 if (MACHOPIC_PURE)
18554 {
18555 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
18556 fprintf (file, "\tpushl\t%%eax\n");
18557 }
18558 else
18559 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
18560
18561 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
18562
18563 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
18564 fprintf (file, "%s:\n", lazy_ptr_name);
18565 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
18566 fprintf (file, "\t.long %s\n", binder_name);
18567 }
18568
18569 void
18570 darwin_x86_file_end (void)
18571 {
18572 darwin_file_end ();
18573 ix86_file_end ();
18574 }
18575 #endif /* TARGET_MACHO */
18576
18577 /* Order the registers for register allocator. */
18578
18579 void
18580 x86_order_regs_for_local_alloc (void)
18581 {
18582 int pos = 0;
18583 int i;
18584
18585 /* First allocate the local general purpose registers. */
18586 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18587 if (GENERAL_REGNO_P (i) && call_used_regs[i])
18588 reg_alloc_order [pos++] = i;
18589
18590 /* Global general purpose registers. */
18591 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
18592 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
18593 reg_alloc_order [pos++] = i;
18594
18595 /* x87 registers come first in case we are doing FP math
18596 using them. */
18597 if (!TARGET_SSE_MATH)
18598 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18599 reg_alloc_order [pos++] = i;
18600
18601 /* SSE registers. */
18602 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
18603 reg_alloc_order [pos++] = i;
18604 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
18605 reg_alloc_order [pos++] = i;
18606
18607 /* x87 registers. */
18608 if (TARGET_SSE_MATH)
18609 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
18610 reg_alloc_order [pos++] = i;
18611
18612 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
18613 reg_alloc_order [pos++] = i;
18614
18615 /* Initialize the rest of array as we do not allocate some registers
18616 at all. */
18617 while (pos < FIRST_PSEUDO_REGISTER)
18618 reg_alloc_order [pos++] = 0;
18619 }
18620
18621 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
18622 struct attribute_spec.handler. */
18623 static tree
18624 ix86_handle_struct_attribute (tree *node, tree name,
18625 tree args ATTRIBUTE_UNUSED,
18626 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
18627 {
18628 tree *type = NULL;
18629 if (DECL_P (*node))
18630 {
18631 if (TREE_CODE (*node) == TYPE_DECL)
18632 type = &TREE_TYPE (*node);
18633 }
18634 else
18635 type = node;
18636
18637 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
18638 || TREE_CODE (*type) == UNION_TYPE)))
18639 {
18640 warning (OPT_Wattributes, "%qs attribute ignored",
18641 IDENTIFIER_POINTER (name));
18642 *no_add_attrs = true;
18643 }
18644
18645 else if ((is_attribute_p ("ms_struct", name)
18646 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
18647 || ((is_attribute_p ("gcc_struct", name)
18648 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
18649 {
18650 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
18651 IDENTIFIER_POINTER (name));
18652 *no_add_attrs = true;
18653 }
18654
18655 return NULL_TREE;
18656 }
18657
18658 static bool
18659 ix86_ms_bitfield_layout_p (tree record_type)
18660 {
18661 return (TARGET_MS_BITFIELD_LAYOUT &&
18662 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
18663 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
18664 }
18665
18666 /* Returns an expression indicating where the this parameter is
18667 located on entry to the FUNCTION. */
18668
18669 static rtx
18670 x86_this_parameter (tree function)
18671 {
18672 tree type = TREE_TYPE (function);
18673
18674 if (TARGET_64BIT)
18675 {
18676 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
18677 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
18678 }
18679
18680 if (ix86_function_regparm (type, function) > 0)
18681 {
18682 tree parm;
18683
18684 parm = TYPE_ARG_TYPES (type);
18685 /* Figure out whether or not the function has a variable number of
18686 arguments. */
18687 for (; parm; parm = TREE_CHAIN (parm))
18688 if (TREE_VALUE (parm) == void_type_node)
18689 break;
18690 /* If not, the this parameter is in the first argument. */
18691 if (parm)
18692 {
18693 int regno = 0;
18694 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
18695 regno = 2;
18696 return gen_rtx_REG (SImode, regno);
18697 }
18698 }
18699
18700 if (aggregate_value_p (TREE_TYPE (type), type))
18701 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
18702 else
18703 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
18704 }
18705
18706 /* Determine whether x86_output_mi_thunk can succeed. */
18707
18708 static bool
18709 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
18710 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
18711 HOST_WIDE_INT vcall_offset, tree function)
18712 {
18713 /* 64-bit can handle anything. */
18714 if (TARGET_64BIT)
18715 return true;
18716
18717 /* For 32-bit, everything's fine if we have one free register. */
18718 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
18719 return true;
18720
18721 /* Need a free register for vcall_offset. */
18722 if (vcall_offset)
18723 return false;
18724
18725 /* Need a free register for GOT references. */
18726 if (flag_pic && !(*targetm.binds_local_p) (function))
18727 return false;
18728
18729 /* Otherwise ok. */
18730 return true;
18731 }
18732
18733 /* Output the assembler code for a thunk function. THUNK_DECL is the
18734 declaration for the thunk function itself, FUNCTION is the decl for
18735 the target function. DELTA is an immediate constant offset to be
18736 added to THIS. If VCALL_OFFSET is nonzero, the word at
18737 *(*this + vcall_offset) should be added to THIS. */
18738
18739 static void
18740 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
18741 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
18742 HOST_WIDE_INT vcall_offset, tree function)
18743 {
18744 rtx xops[3];
18745 rtx this = x86_this_parameter (function);
18746 rtx this_reg, tmp;
18747
18748 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
18749 pull it in now and let DELTA benefit. */
18750 if (REG_P (this))
18751 this_reg = this;
18752 else if (vcall_offset)
18753 {
18754 /* Put the this parameter into %eax. */
18755 xops[0] = this;
18756 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
18757 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18758 }
18759 else
18760 this_reg = NULL_RTX;
18761
18762 /* Adjust the this parameter by a fixed constant. */
18763 if (delta)
18764 {
18765 xops[0] = GEN_INT (delta);
18766 xops[1] = this_reg ? this_reg : this;
18767 if (TARGET_64BIT)
18768 {
18769 if (!x86_64_general_operand (xops[0], DImode))
18770 {
18771 tmp = gen_rtx_REG (DImode, R10_REG);
18772 xops[1] = tmp;
18773 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
18774 xops[0] = tmp;
18775 xops[1] = this;
18776 }
18777 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18778 }
18779 else
18780 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18781 }
18782
18783 /* Adjust the this parameter by a value stored in the vtable. */
18784 if (vcall_offset)
18785 {
18786 if (TARGET_64BIT)
18787 tmp = gen_rtx_REG (DImode, R10_REG);
18788 else
18789 {
18790 int tmp_regno = 2 /* ECX */;
18791 if (lookup_attribute ("fastcall",
18792 TYPE_ATTRIBUTES (TREE_TYPE (function))))
18793 tmp_regno = 0 /* EAX */;
18794 tmp = gen_rtx_REG (SImode, tmp_regno);
18795 }
18796
18797 xops[0] = gen_rtx_MEM (Pmode, this_reg);
18798 xops[1] = tmp;
18799 if (TARGET_64BIT)
18800 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18801 else
18802 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18803
18804 /* Adjust the this parameter. */
18805 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
18806 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
18807 {
18808 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
18809 xops[0] = GEN_INT (vcall_offset);
18810 xops[1] = tmp2;
18811 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
18812 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
18813 }
18814 xops[1] = this_reg;
18815 if (TARGET_64BIT)
18816 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
18817 else
18818 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
18819 }
18820
18821 /* If necessary, drop THIS back to its stack slot. */
18822 if (this_reg && this_reg != this)
18823 {
18824 xops[0] = this_reg;
18825 xops[1] = this;
18826 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
18827 }
18828
18829 xops[0] = XEXP (DECL_RTL (function), 0);
18830 if (TARGET_64BIT)
18831 {
18832 if (!flag_pic || (*targetm.binds_local_p) (function))
18833 output_asm_insn ("jmp\t%P0", xops);
18834 else
18835 {
18836 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
18837 tmp = gen_rtx_CONST (Pmode, tmp);
18838 tmp = gen_rtx_MEM (QImode, tmp);
18839 xops[0] = tmp;
18840 output_asm_insn ("jmp\t%A0", xops);
18841 }
18842 }
18843 else
18844 {
18845 if (!flag_pic || (*targetm.binds_local_p) (function))
18846 output_asm_insn ("jmp\t%P0", xops);
18847 else
18848 #if TARGET_MACHO
18849 if (TARGET_MACHO)
18850 {
18851 rtx sym_ref = XEXP (DECL_RTL (function), 0);
18852 tmp = (gen_rtx_SYMBOL_REF
18853 (Pmode,
18854 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
18855 tmp = gen_rtx_MEM (QImode, tmp);
18856 xops[0] = tmp;
18857 output_asm_insn ("jmp\t%0", xops);
18858 }
18859 else
18860 #endif /* TARGET_MACHO */
18861 {
18862 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
18863 output_set_got (tmp, NULL_RTX);
18864
18865 xops[1] = tmp;
18866 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
18867 output_asm_insn ("jmp\t{*}%1", xops);
18868 }
18869 }
18870 }
18871
18872 static void
18873 x86_file_start (void)
18874 {
18875 default_file_start ();
18876 #if TARGET_MACHO
18877 darwin_file_start ();
18878 #endif
18879 if (X86_FILE_START_VERSION_DIRECTIVE)
18880 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
18881 if (X86_FILE_START_FLTUSED)
18882 fputs ("\t.global\t__fltused\n", asm_out_file);
18883 if (ix86_asm_dialect == ASM_INTEL)
18884 fputs ("\t.intel_syntax\n", asm_out_file);
18885 }
18886
18887 int
18888 x86_field_alignment (tree field, int computed)
18889 {
18890 enum machine_mode mode;
18891 tree type = TREE_TYPE (field);
18892
18893 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
18894 return computed;
18895 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
18896 ? get_inner_array_type (type) : type);
18897 if (mode == DFmode || mode == DCmode
18898 || GET_MODE_CLASS (mode) == MODE_INT
18899 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
18900 return MIN (32, computed);
18901 return computed;
18902 }
18903
18904 /* Output assembler code to FILE to increment profiler label # LABELNO
18905 for profiling a function entry. */
18906 void
18907 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
18908 {
18909 if (TARGET_64BIT)
18910 if (flag_pic)
18911 {
18912 #ifndef NO_PROFILE_COUNTERS
18913 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
18914 #endif
18915 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
18916 }
18917 else
18918 {
18919 #ifndef NO_PROFILE_COUNTERS
18920 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
18921 #endif
18922 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18923 }
18924 else if (flag_pic)
18925 {
18926 #ifndef NO_PROFILE_COUNTERS
18927 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
18928 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
18929 #endif
18930 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
18931 }
18932 else
18933 {
18934 #ifndef NO_PROFILE_COUNTERS
18935 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
18936 PROFILE_COUNT_REGISTER);
18937 #endif
18938 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
18939 }
18940 }
18941
18942 /* We don't have exact information about the insn sizes, but we may assume
18943 quite safely that we are informed about all 1 byte insns and memory
18944 address sizes. This is enough to eliminate unnecessary padding in
18945 99% of cases. */
18946
18947 static int
18948 min_insn_size (rtx insn)
18949 {
18950 int l = 0;
18951
18952 if (!INSN_P (insn) || !active_insn_p (insn))
18953 return 0;
18954
18955 /* Discard alignments we've emit and jump instructions. */
18956 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
18957 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
18958 return 0;
18959 if (GET_CODE (insn) == JUMP_INSN
18960 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
18961 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
18962 return 0;
18963
18964 /* Important case - calls are always 5 bytes.
18965 It is common to have many calls in the row. */
18966 if (GET_CODE (insn) == CALL_INSN
18967 && symbolic_reference_mentioned_p (PATTERN (insn))
18968 && !SIBLING_CALL_P (insn))
18969 return 5;
18970 if (get_attr_length (insn) <= 1)
18971 return 1;
18972
18973 /* For normal instructions we may rely on the sizes of addresses
18974 and the presence of symbol to require 4 bytes of encoding.
18975 This is not the case for jumps where references are PC relative. */
18976 if (GET_CODE (insn) != JUMP_INSN)
18977 {
18978 l = get_attr_length_address (insn);
18979 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
18980 l = 4;
18981 }
18982 if (l)
18983 return 1+l;
18984 else
18985 return 2;
18986 }
18987
18988 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
18989 window. */
18990
18991 static void
18992 ix86_avoid_jump_misspredicts (void)
18993 {
18994 rtx insn, start = get_insns ();
18995 int nbytes = 0, njumps = 0;
18996 int isjump = 0;
18997
18998 /* Look for all minimal intervals of instructions containing 4 jumps.
18999 The intervals are bounded by START and INSN. NBYTES is the total
19000 size of instructions in the interval including INSN and not including
19001 START. When the NBYTES is smaller than 16 bytes, it is possible
19002 that the end of START and INSN ends up in the same 16byte page.
19003
19004 The smallest offset in the page INSN can start is the case where START
19005 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19006 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19007 */
19008 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19009 {
19010
19011 nbytes += min_insn_size (insn);
19012 if (dump_file)
19013 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19014 INSN_UID (insn), min_insn_size (insn));
19015 if ((GET_CODE (insn) == JUMP_INSN
19016 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19017 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19018 || GET_CODE (insn) == CALL_INSN)
19019 njumps++;
19020 else
19021 continue;
19022
19023 while (njumps > 3)
19024 {
19025 start = NEXT_INSN (start);
19026 if ((GET_CODE (start) == JUMP_INSN
19027 && GET_CODE (PATTERN (start)) != ADDR_VEC
19028 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19029 || GET_CODE (start) == CALL_INSN)
19030 njumps--, isjump = 1;
19031 else
19032 isjump = 0;
19033 nbytes -= min_insn_size (start);
19034 }
19035 gcc_assert (njumps >= 0);
19036 if (dump_file)
19037 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19038 INSN_UID (start), INSN_UID (insn), nbytes);
19039
19040 if (njumps == 3 && isjump && nbytes < 16)
19041 {
19042 int padsize = 15 - nbytes + min_insn_size (insn);
19043
19044 if (dump_file)
19045 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19046 INSN_UID (insn), padsize);
19047 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19048 }
19049 }
19050 }
19051
19052 /* AMD Athlon works faster
19053 when RET is not destination of conditional jump or directly preceded
19054 by other jump instruction. We avoid the penalty by inserting NOP just
19055 before the RET instructions in such cases. */
19056 static void
19057 ix86_pad_returns (void)
19058 {
19059 edge e;
19060 edge_iterator ei;
19061
19062 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19063 {
19064 basic_block bb = e->src;
19065 rtx ret = BB_END (bb);
19066 rtx prev;
19067 bool replace = false;
19068
19069 if (GET_CODE (ret) != JUMP_INSN || GET_CODE (PATTERN (ret)) != RETURN
19070 || !maybe_hot_bb_p (bb))
19071 continue;
19072 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19073 if (active_insn_p (prev) || GET_CODE (prev) == CODE_LABEL)
19074 break;
19075 if (prev && GET_CODE (prev) == CODE_LABEL)
19076 {
19077 edge e;
19078 edge_iterator ei;
19079
19080 FOR_EACH_EDGE (e, ei, bb->preds)
19081 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19082 && !(e->flags & EDGE_FALLTHRU))
19083 replace = true;
19084 }
19085 if (!replace)
19086 {
19087 prev = prev_active_insn (ret);
19088 if (prev
19089 && ((GET_CODE (prev) == JUMP_INSN && any_condjump_p (prev))
19090 || GET_CODE (prev) == CALL_INSN))
19091 replace = true;
19092 /* Empty functions get branch mispredict even when the jump destination
19093 is not visible to us. */
19094 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19095 replace = true;
19096 }
19097 if (replace)
19098 {
19099 emit_insn_before (gen_return_internal_long (), ret);
19100 delete_insn (ret);
19101 }
19102 }
19103 }
19104
19105 /* Implement machine specific optimizations. We implement padding of returns
19106 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19107 static void
19108 ix86_reorg (void)
19109 {
19110 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19111 ix86_pad_returns ();
19112 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19113 ix86_avoid_jump_misspredicts ();
19114 }
19115
19116 /* Return nonzero when QImode register that must be represented via REX prefix
19117 is used. */
19118 bool
19119 x86_extended_QIreg_mentioned_p (rtx insn)
19120 {
19121 int i;
19122 extract_insn_cached (insn);
19123 for (i = 0; i < recog_data.n_operands; i++)
19124 if (REG_P (recog_data.operand[i])
19125 && REGNO (recog_data.operand[i]) >= 4)
19126 return true;
19127 return false;
19128 }
19129
19130 /* Return nonzero when P points to register encoded via REX prefix.
19131 Called via for_each_rtx. */
19132 static int
19133 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19134 {
19135 unsigned int regno;
19136 if (!REG_P (*p))
19137 return 0;
19138 regno = REGNO (*p);
19139 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19140 }
19141
19142 /* Return true when INSN mentions register that must be encoded using REX
19143 prefix. */
19144 bool
19145 x86_extended_reg_mentioned_p (rtx insn)
19146 {
19147 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19148 }
19149
19150 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19151 optabs would emit if we didn't have TFmode patterns. */
19152
19153 void
19154 x86_emit_floatuns (rtx operands[2])
19155 {
19156 rtx neglab, donelab, i0, i1, f0, in, out;
19157 enum machine_mode mode, inmode;
19158
19159 inmode = GET_MODE (operands[1]);
19160 gcc_assert (inmode == SImode || inmode == DImode);
19161
19162 out = operands[0];
19163 in = force_reg (inmode, operands[1]);
19164 mode = GET_MODE (out);
19165 neglab = gen_label_rtx ();
19166 donelab = gen_label_rtx ();
19167 i1 = gen_reg_rtx (Pmode);
19168 f0 = gen_reg_rtx (mode);
19169
19170 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, Pmode, 0, neglab);
19171
19172 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_FLOAT (mode, in)));
19173 emit_jump_insn (gen_jump (donelab));
19174 emit_barrier ();
19175
19176 emit_label (neglab);
19177
19178 i0 = expand_simple_binop (Pmode, LSHIFTRT, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
19179 i1 = expand_simple_binop (Pmode, AND, in, const1_rtx, NULL, 1, OPTAB_DIRECT);
19180 i0 = expand_simple_binop (Pmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19181 expand_float (f0, i0, 0);
19182 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19183
19184 emit_label (donelab);
19185 }
19186 \f
19187 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19188 with all elements equal to VAR. Return true if successful. */
19189
19190 static bool
19191 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19192 rtx target, rtx val)
19193 {
19194 enum machine_mode smode, wsmode, wvmode;
19195 rtx x;
19196
19197 switch (mode)
19198 {
19199 case V2SImode:
19200 case V2SFmode:
19201 if (!mmx_ok)
19202 return false;
19203 /* FALLTHRU */
19204
19205 case V2DFmode:
19206 case V2DImode:
19207 case V4SFmode:
19208 case V4SImode:
19209 val = force_reg (GET_MODE_INNER (mode), val);
19210 x = gen_rtx_VEC_DUPLICATE (mode, val);
19211 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19212 return true;
19213
19214 case V4HImode:
19215 if (!mmx_ok)
19216 return false;
19217 if (TARGET_SSE || TARGET_3DNOW_A)
19218 {
19219 val = gen_lowpart (SImode, val);
19220 x = gen_rtx_TRUNCATE (HImode, val);
19221 x = gen_rtx_VEC_DUPLICATE (mode, x);
19222 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19223 return true;
19224 }
19225 else
19226 {
19227 smode = HImode;
19228 wsmode = SImode;
19229 wvmode = V2SImode;
19230 goto widen;
19231 }
19232
19233 case V8QImode:
19234 if (!mmx_ok)
19235 return false;
19236 smode = QImode;
19237 wsmode = HImode;
19238 wvmode = V4HImode;
19239 goto widen;
19240 case V8HImode:
19241 if (TARGET_SSE2)
19242 {
19243 rtx tmp1, tmp2;
19244 /* Extend HImode to SImode using a paradoxical SUBREG. */
19245 tmp1 = gen_reg_rtx (SImode);
19246 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19247 /* Insert the SImode value as low element of V4SImode vector. */
19248 tmp2 = gen_reg_rtx (V4SImode);
19249 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19250 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19251 CONST0_RTX (V4SImode),
19252 const1_rtx);
19253 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19254 /* Cast the V4SImode vector back to a V8HImode vector. */
19255 tmp1 = gen_reg_rtx (V8HImode);
19256 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19257 /* Duplicate the low short through the whole low SImode word. */
19258 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19259 /* Cast the V8HImode vector back to a V4SImode vector. */
19260 tmp2 = gen_reg_rtx (V4SImode);
19261 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19262 /* Replicate the low element of the V4SImode vector. */
19263 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19264 /* Cast the V2SImode back to V8HImode, and store in target. */
19265 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19266 return true;
19267 }
19268 smode = HImode;
19269 wsmode = SImode;
19270 wvmode = V4SImode;
19271 goto widen;
19272 case V16QImode:
19273 if (TARGET_SSE2)
19274 {
19275 rtx tmp1, tmp2;
19276 /* Extend QImode to SImode using a paradoxical SUBREG. */
19277 tmp1 = gen_reg_rtx (SImode);
19278 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19279 /* Insert the SImode value as low element of V4SImode vector. */
19280 tmp2 = gen_reg_rtx (V4SImode);
19281 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19282 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19283 CONST0_RTX (V4SImode),
19284 const1_rtx);
19285 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19286 /* Cast the V4SImode vector back to a V16QImode vector. */
19287 tmp1 = gen_reg_rtx (V16QImode);
19288 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19289 /* Duplicate the low byte through the whole low SImode word. */
19290 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19291 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19292 /* Cast the V16QImode vector back to a V4SImode vector. */
19293 tmp2 = gen_reg_rtx (V4SImode);
19294 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19295 /* Replicate the low element of the V4SImode vector. */
19296 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19297 /* Cast the V2SImode back to V16QImode, and store in target. */
19298 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
19299 return true;
19300 }
19301 smode = QImode;
19302 wsmode = HImode;
19303 wvmode = V8HImode;
19304 goto widen;
19305 widen:
19306 /* Replicate the value once into the next wider mode and recurse. */
19307 val = convert_modes (wsmode, smode, val, true);
19308 x = expand_simple_binop (wsmode, ASHIFT, val,
19309 GEN_INT (GET_MODE_BITSIZE (smode)),
19310 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19311 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
19312
19313 x = gen_reg_rtx (wvmode);
19314 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
19315 gcc_unreachable ();
19316 emit_move_insn (target, gen_lowpart (mode, x));
19317 return true;
19318
19319 default:
19320 return false;
19321 }
19322 }
19323
19324 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19325 whose ONE_VAR element is VAR, and other elements are zero. Return true
19326 if successful. */
19327
19328 static bool
19329 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
19330 rtx target, rtx var, int one_var)
19331 {
19332 enum machine_mode vsimode;
19333 rtx new_target;
19334 rtx x, tmp;
19335
19336 switch (mode)
19337 {
19338 case V2SFmode:
19339 case V2SImode:
19340 if (!mmx_ok)
19341 return false;
19342 /* FALLTHRU */
19343
19344 case V2DFmode:
19345 case V2DImode:
19346 if (one_var != 0)
19347 return false;
19348 var = force_reg (GET_MODE_INNER (mode), var);
19349 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
19350 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19351 return true;
19352
19353 case V4SFmode:
19354 case V4SImode:
19355 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
19356 new_target = gen_reg_rtx (mode);
19357 else
19358 new_target = target;
19359 var = force_reg (GET_MODE_INNER (mode), var);
19360 x = gen_rtx_VEC_DUPLICATE (mode, var);
19361 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
19362 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
19363 if (one_var != 0)
19364 {
19365 /* We need to shuffle the value to the correct position, so
19366 create a new pseudo to store the intermediate result. */
19367
19368 /* With SSE2, we can use the integer shuffle insns. */
19369 if (mode != V4SFmode && TARGET_SSE2)
19370 {
19371 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
19372 GEN_INT (1),
19373 GEN_INT (one_var == 1 ? 0 : 1),
19374 GEN_INT (one_var == 2 ? 0 : 1),
19375 GEN_INT (one_var == 3 ? 0 : 1)));
19376 if (target != new_target)
19377 emit_move_insn (target, new_target);
19378 return true;
19379 }
19380
19381 /* Otherwise convert the intermediate result to V4SFmode and
19382 use the SSE1 shuffle instructions. */
19383 if (mode != V4SFmode)
19384 {
19385 tmp = gen_reg_rtx (V4SFmode);
19386 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
19387 }
19388 else
19389 tmp = new_target;
19390
19391 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
19392 GEN_INT (1),
19393 GEN_INT (one_var == 1 ? 0 : 1),
19394 GEN_INT (one_var == 2 ? 0+4 : 1+4),
19395 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
19396
19397 if (mode != V4SFmode)
19398 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
19399 else if (tmp != target)
19400 emit_move_insn (target, tmp);
19401 }
19402 else if (target != new_target)
19403 emit_move_insn (target, new_target);
19404 return true;
19405
19406 case V8HImode:
19407 case V16QImode:
19408 vsimode = V4SImode;
19409 goto widen;
19410 case V4HImode:
19411 case V8QImode:
19412 if (!mmx_ok)
19413 return false;
19414 vsimode = V2SImode;
19415 goto widen;
19416 widen:
19417 if (one_var != 0)
19418 return false;
19419
19420 /* Zero extend the variable element to SImode and recurse. */
19421 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
19422
19423 x = gen_reg_rtx (vsimode);
19424 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
19425 var, one_var))
19426 gcc_unreachable ();
19427
19428 emit_move_insn (target, gen_lowpart (mode, x));
19429 return true;
19430
19431 default:
19432 return false;
19433 }
19434 }
19435
19436 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19437 consisting of the values in VALS. It is known that all elements
19438 except ONE_VAR are constants. Return true if successful. */
19439
19440 static bool
19441 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
19442 rtx target, rtx vals, int one_var)
19443 {
19444 rtx var = XVECEXP (vals, 0, one_var);
19445 enum machine_mode wmode;
19446 rtx const_vec, x;
19447
19448 const_vec = copy_rtx (vals);
19449 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
19450 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
19451
19452 switch (mode)
19453 {
19454 case V2DFmode:
19455 case V2DImode:
19456 case V2SFmode:
19457 case V2SImode:
19458 /* For the two element vectors, it's just as easy to use
19459 the general case. */
19460 return false;
19461
19462 case V4SFmode:
19463 case V4SImode:
19464 case V8HImode:
19465 case V4HImode:
19466 break;
19467
19468 case V16QImode:
19469 wmode = V8HImode;
19470 goto widen;
19471 case V8QImode:
19472 wmode = V4HImode;
19473 goto widen;
19474 widen:
19475 /* There's no way to set one QImode entry easily. Combine
19476 the variable value with its adjacent constant value, and
19477 promote to an HImode set. */
19478 x = XVECEXP (vals, 0, one_var ^ 1);
19479 if (one_var & 1)
19480 {
19481 var = convert_modes (HImode, QImode, var, true);
19482 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
19483 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19484 x = GEN_INT (INTVAL (x) & 0xff);
19485 }
19486 else
19487 {
19488 var = convert_modes (HImode, QImode, var, true);
19489 x = gen_int_mode (INTVAL (x) << 8, HImode);
19490 }
19491 if (x != const0_rtx)
19492 var = expand_simple_binop (HImode, IOR, var, x, var,
19493 1, OPTAB_LIB_WIDEN);
19494
19495 x = gen_reg_rtx (wmode);
19496 emit_move_insn (x, gen_lowpart (wmode, const_vec));
19497 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
19498
19499 emit_move_insn (target, gen_lowpart (mode, x));
19500 return true;
19501
19502 default:
19503 return false;
19504 }
19505
19506 emit_move_insn (target, const_vec);
19507 ix86_expand_vector_set (mmx_ok, target, var, one_var);
19508 return true;
19509 }
19510
19511 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
19512 all values variable, and none identical. */
19513
19514 static void
19515 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
19516 rtx target, rtx vals)
19517 {
19518 enum machine_mode half_mode = GET_MODE_INNER (mode);
19519 rtx op0 = NULL, op1 = NULL;
19520 bool use_vec_concat = false;
19521
19522 switch (mode)
19523 {
19524 case V2SFmode:
19525 case V2SImode:
19526 if (!mmx_ok && !TARGET_SSE)
19527 break;
19528 /* FALLTHRU */
19529
19530 case V2DFmode:
19531 case V2DImode:
19532 /* For the two element vectors, we always implement VEC_CONCAT. */
19533 op0 = XVECEXP (vals, 0, 0);
19534 op1 = XVECEXP (vals, 0, 1);
19535 use_vec_concat = true;
19536 break;
19537
19538 case V4SFmode:
19539 half_mode = V2SFmode;
19540 goto half;
19541 case V4SImode:
19542 half_mode = V2SImode;
19543 goto half;
19544 half:
19545 {
19546 rtvec v;
19547
19548 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
19549 Recurse to load the two halves. */
19550
19551 op0 = gen_reg_rtx (half_mode);
19552 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
19553 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
19554
19555 op1 = gen_reg_rtx (half_mode);
19556 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
19557 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
19558
19559 use_vec_concat = true;
19560 }
19561 break;
19562
19563 case V8HImode:
19564 case V16QImode:
19565 case V4HImode:
19566 case V8QImode:
19567 break;
19568
19569 default:
19570 gcc_unreachable ();
19571 }
19572
19573 if (use_vec_concat)
19574 {
19575 if (!register_operand (op0, half_mode))
19576 op0 = force_reg (half_mode, op0);
19577 if (!register_operand (op1, half_mode))
19578 op1 = force_reg (half_mode, op1);
19579
19580 emit_insn (gen_rtx_SET (VOIDmode, target,
19581 gen_rtx_VEC_CONCAT (mode, op0, op1)));
19582 }
19583 else
19584 {
19585 int i, j, n_elts, n_words, n_elt_per_word;
19586 enum machine_mode inner_mode;
19587 rtx words[4], shift;
19588
19589 inner_mode = GET_MODE_INNER (mode);
19590 n_elts = GET_MODE_NUNITS (mode);
19591 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
19592 n_elt_per_word = n_elts / n_words;
19593 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
19594
19595 for (i = 0; i < n_words; ++i)
19596 {
19597 rtx word = NULL_RTX;
19598
19599 for (j = 0; j < n_elt_per_word; ++j)
19600 {
19601 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
19602 elt = convert_modes (word_mode, inner_mode, elt, true);
19603
19604 if (j == 0)
19605 word = elt;
19606 else
19607 {
19608 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
19609 word, 1, OPTAB_LIB_WIDEN);
19610 word = expand_simple_binop (word_mode, IOR, word, elt,
19611 word, 1, OPTAB_LIB_WIDEN);
19612 }
19613 }
19614
19615 words[i] = word;
19616 }
19617
19618 if (n_words == 1)
19619 emit_move_insn (target, gen_lowpart (mode, words[0]));
19620 else if (n_words == 2)
19621 {
19622 rtx tmp = gen_reg_rtx (mode);
19623 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
19624 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
19625 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
19626 emit_move_insn (target, tmp);
19627 }
19628 else if (n_words == 4)
19629 {
19630 rtx tmp = gen_reg_rtx (V4SImode);
19631 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
19632 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
19633 emit_move_insn (target, gen_lowpart (mode, tmp));
19634 }
19635 else
19636 gcc_unreachable ();
19637 }
19638 }
19639
19640 /* Initialize vector TARGET via VALS. Suppress the use of MMX
19641 instructions unless MMX_OK is true. */
19642
19643 void
19644 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
19645 {
19646 enum machine_mode mode = GET_MODE (target);
19647 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19648 int n_elts = GET_MODE_NUNITS (mode);
19649 int n_var = 0, one_var = -1;
19650 bool all_same = true, all_const_zero = true;
19651 int i;
19652 rtx x;
19653
19654 for (i = 0; i < n_elts; ++i)
19655 {
19656 x = XVECEXP (vals, 0, i);
19657 if (!CONSTANT_P (x))
19658 n_var++, one_var = i;
19659 else if (x != CONST0_RTX (inner_mode))
19660 all_const_zero = false;
19661 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
19662 all_same = false;
19663 }
19664
19665 /* Constants are best loaded from the constant pool. */
19666 if (n_var == 0)
19667 {
19668 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
19669 return;
19670 }
19671
19672 /* If all values are identical, broadcast the value. */
19673 if (all_same
19674 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
19675 XVECEXP (vals, 0, 0)))
19676 return;
19677
19678 /* Values where only one field is non-constant are best loaded from
19679 the pool and overwritten via move later. */
19680 if (n_var == 1)
19681 {
19682 if (all_const_zero
19683 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
19684 XVECEXP (vals, 0, one_var),
19685 one_var))
19686 return;
19687
19688 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
19689 return;
19690 }
19691
19692 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
19693 }
19694
19695 void
19696 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
19697 {
19698 enum machine_mode mode = GET_MODE (target);
19699 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19700 bool use_vec_merge = false;
19701 rtx tmp;
19702
19703 switch (mode)
19704 {
19705 case V2SFmode:
19706 case V2SImode:
19707 if (mmx_ok)
19708 {
19709 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
19710 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
19711 if (elt == 0)
19712 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
19713 else
19714 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
19715 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19716 return;
19717 }
19718 break;
19719
19720 case V2DFmode:
19721 case V2DImode:
19722 {
19723 rtx op0, op1;
19724
19725 /* For the two element vectors, we implement a VEC_CONCAT with
19726 the extraction of the other element. */
19727
19728 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
19729 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
19730
19731 if (elt == 0)
19732 op0 = val, op1 = tmp;
19733 else
19734 op0 = tmp, op1 = val;
19735
19736 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
19737 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19738 }
19739 return;
19740
19741 case V4SFmode:
19742 switch (elt)
19743 {
19744 case 0:
19745 use_vec_merge = true;
19746 break;
19747
19748 case 1:
19749 /* tmp = target = A B C D */
19750 tmp = copy_to_reg (target);
19751 /* target = A A B B */
19752 emit_insn (gen_sse_unpcklps (target, target, target));
19753 /* target = X A B B */
19754 ix86_expand_vector_set (false, target, val, 0);
19755 /* target = A X C D */
19756 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19757 GEN_INT (1), GEN_INT (0),
19758 GEN_INT (2+4), GEN_INT (3+4)));
19759 return;
19760
19761 case 2:
19762 /* tmp = target = A B C D */
19763 tmp = copy_to_reg (target);
19764 /* tmp = X B C D */
19765 ix86_expand_vector_set (false, tmp, val, 0);
19766 /* target = A B X D */
19767 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19768 GEN_INT (0), GEN_INT (1),
19769 GEN_INT (0+4), GEN_INT (3+4)));
19770 return;
19771
19772 case 3:
19773 /* tmp = target = A B C D */
19774 tmp = copy_to_reg (target);
19775 /* tmp = X B C D */
19776 ix86_expand_vector_set (false, tmp, val, 0);
19777 /* target = A B X D */
19778 emit_insn (gen_sse_shufps_1 (target, target, tmp,
19779 GEN_INT (0), GEN_INT (1),
19780 GEN_INT (2+4), GEN_INT (0+4)));
19781 return;
19782
19783 default:
19784 gcc_unreachable ();
19785 }
19786 break;
19787
19788 case V4SImode:
19789 /* Element 0 handled by vec_merge below. */
19790 if (elt == 0)
19791 {
19792 use_vec_merge = true;
19793 break;
19794 }
19795
19796 if (TARGET_SSE2)
19797 {
19798 /* With SSE2, use integer shuffles to swap element 0 and ELT,
19799 store into element 0, then shuffle them back. */
19800
19801 rtx order[4];
19802
19803 order[0] = GEN_INT (elt);
19804 order[1] = const1_rtx;
19805 order[2] = const2_rtx;
19806 order[3] = GEN_INT (3);
19807 order[elt] = const0_rtx;
19808
19809 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19810 order[1], order[2], order[3]));
19811
19812 ix86_expand_vector_set (false, target, val, 0);
19813
19814 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
19815 order[1], order[2], order[3]));
19816 }
19817 else
19818 {
19819 /* For SSE1, we have to reuse the V4SF code. */
19820 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
19821 gen_lowpart (SFmode, val), elt);
19822 }
19823 return;
19824
19825 case V8HImode:
19826 use_vec_merge = TARGET_SSE2;
19827 break;
19828 case V4HImode:
19829 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19830 break;
19831
19832 case V16QImode:
19833 case V8QImode:
19834 default:
19835 break;
19836 }
19837
19838 if (use_vec_merge)
19839 {
19840 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
19841 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
19842 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19843 }
19844 else
19845 {
19846 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19847
19848 emit_move_insn (mem, target);
19849
19850 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19851 emit_move_insn (tmp, val);
19852
19853 emit_move_insn (target, mem);
19854 }
19855 }
19856
19857 void
19858 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
19859 {
19860 enum machine_mode mode = GET_MODE (vec);
19861 enum machine_mode inner_mode = GET_MODE_INNER (mode);
19862 bool use_vec_extr = false;
19863 rtx tmp;
19864
19865 switch (mode)
19866 {
19867 case V2SImode:
19868 case V2SFmode:
19869 if (!mmx_ok)
19870 break;
19871 /* FALLTHRU */
19872
19873 case V2DFmode:
19874 case V2DImode:
19875 use_vec_extr = true;
19876 break;
19877
19878 case V4SFmode:
19879 switch (elt)
19880 {
19881 case 0:
19882 tmp = vec;
19883 break;
19884
19885 case 1:
19886 case 3:
19887 tmp = gen_reg_rtx (mode);
19888 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
19889 GEN_INT (elt), GEN_INT (elt),
19890 GEN_INT (elt+4), GEN_INT (elt+4)));
19891 break;
19892
19893 case 2:
19894 tmp = gen_reg_rtx (mode);
19895 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
19896 break;
19897
19898 default:
19899 gcc_unreachable ();
19900 }
19901 vec = tmp;
19902 use_vec_extr = true;
19903 elt = 0;
19904 break;
19905
19906 case V4SImode:
19907 if (TARGET_SSE2)
19908 {
19909 switch (elt)
19910 {
19911 case 0:
19912 tmp = vec;
19913 break;
19914
19915 case 1:
19916 case 3:
19917 tmp = gen_reg_rtx (mode);
19918 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
19919 GEN_INT (elt), GEN_INT (elt),
19920 GEN_INT (elt), GEN_INT (elt)));
19921 break;
19922
19923 case 2:
19924 tmp = gen_reg_rtx (mode);
19925 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
19926 break;
19927
19928 default:
19929 gcc_unreachable ();
19930 }
19931 vec = tmp;
19932 use_vec_extr = true;
19933 elt = 0;
19934 }
19935 else
19936 {
19937 /* For SSE1, we have to reuse the V4SF code. */
19938 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
19939 gen_lowpart (V4SFmode, vec), elt);
19940 return;
19941 }
19942 break;
19943
19944 case V8HImode:
19945 use_vec_extr = TARGET_SSE2;
19946 break;
19947 case V4HImode:
19948 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
19949 break;
19950
19951 case V16QImode:
19952 case V8QImode:
19953 /* ??? Could extract the appropriate HImode element and shift. */
19954 default:
19955 break;
19956 }
19957
19958 if (use_vec_extr)
19959 {
19960 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
19961 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
19962
19963 /* Let the rtl optimizers know about the zero extension performed. */
19964 if (inner_mode == HImode)
19965 {
19966 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
19967 target = gen_lowpart (SImode, target);
19968 }
19969
19970 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
19971 }
19972 else
19973 {
19974 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
19975
19976 emit_move_insn (mem, vec);
19977
19978 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
19979 emit_move_insn (target, tmp);
19980 }
19981 }
19982
19983 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
19984 pattern to reduce; DEST is the destination; IN is the input vector. */
19985
19986 void
19987 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
19988 {
19989 rtx tmp1, tmp2, tmp3;
19990
19991 tmp1 = gen_reg_rtx (V4SFmode);
19992 tmp2 = gen_reg_rtx (V4SFmode);
19993 tmp3 = gen_reg_rtx (V4SFmode);
19994
19995 emit_insn (gen_sse_movhlps (tmp1, in, in));
19996 emit_insn (fn (tmp2, tmp1, in));
19997
19998 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
19999 GEN_INT (1), GEN_INT (1),
20000 GEN_INT (1+4), GEN_INT (1+4)));
20001 emit_insn (fn (dest, tmp2, tmp3));
20002 }
20003 \f
20004 /* Target hook for scalar_mode_supported_p. */
20005 static bool
20006 ix86_scalar_mode_supported_p (enum machine_mode mode)
20007 {
20008 if (DECIMAL_FLOAT_MODE_P (mode))
20009 return true;
20010 else
20011 return default_scalar_mode_supported_p (mode);
20012 }
20013
20014 /* Implements target hook vector_mode_supported_p. */
20015 static bool
20016 ix86_vector_mode_supported_p (enum machine_mode mode)
20017 {
20018 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20019 return true;
20020 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20021 return true;
20022 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20023 return true;
20024 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20025 return true;
20026 return false;
20027 }
20028
20029 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20030
20031 We do this in the new i386 backend to maintain source compatibility
20032 with the old cc0-based compiler. */
20033
20034 static tree
20035 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20036 tree inputs ATTRIBUTE_UNUSED,
20037 tree clobbers)
20038 {
20039 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20040 clobbers);
20041 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20042 clobbers);
20043 clobbers = tree_cons (NULL_TREE, build_string (7, "dirflag"),
20044 clobbers);
20045 return clobbers;
20046 }
20047
20048 /* Return true if this goes in small data/bss. */
20049
20050 static bool
20051 ix86_in_large_data_p (tree exp)
20052 {
20053 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20054 return false;
20055
20056 /* Functions are never large data. */
20057 if (TREE_CODE (exp) == FUNCTION_DECL)
20058 return false;
20059
20060 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20061 {
20062 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20063 if (strcmp (section, ".ldata") == 0
20064 || strcmp (section, ".lbss") == 0)
20065 return true;
20066 return false;
20067 }
20068 else
20069 {
20070 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20071
20072 /* If this is an incomplete type with size 0, then we can't put it
20073 in data because it might be too big when completed. */
20074 if (!size || size > ix86_section_threshold)
20075 return true;
20076 }
20077
20078 return false;
20079 }
20080 static void
20081 ix86_encode_section_info (tree decl, rtx rtl, int first)
20082 {
20083 default_encode_section_info (decl, rtl, first);
20084
20085 if (TREE_CODE (decl) == VAR_DECL
20086 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20087 && ix86_in_large_data_p (decl))
20088 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20089 }
20090
20091 /* Worker function for REVERSE_CONDITION. */
20092
20093 enum rtx_code
20094 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20095 {
20096 return (mode != CCFPmode && mode != CCFPUmode
20097 ? reverse_condition (code)
20098 : reverse_condition_maybe_unordered (code));
20099 }
20100
20101 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20102 to OPERANDS[0]. */
20103
20104 const char *
20105 output_387_reg_move (rtx insn, rtx *operands)
20106 {
20107 if (REG_P (operands[1])
20108 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20109 {
20110 if (REGNO (operands[0]) == FIRST_STACK_REG)
20111 return output_387_ffreep (operands, 0);
20112 return "fstp\t%y0";
20113 }
20114 if (STACK_TOP_P (operands[0]))
20115 return "fld%z1\t%y1";
20116 return "fst\t%y0";
20117 }
20118
20119 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20120 FP status register is set. */
20121
20122 void
20123 ix86_emit_fp_unordered_jump (rtx label)
20124 {
20125 rtx reg = gen_reg_rtx (HImode);
20126 rtx temp;
20127
20128 emit_insn (gen_x86_fnstsw_1 (reg));
20129
20130 if (TARGET_USE_SAHF)
20131 {
20132 emit_insn (gen_x86_sahf_1 (reg));
20133
20134 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20135 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20136 }
20137 else
20138 {
20139 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20140
20141 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20142 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20143 }
20144
20145 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20146 gen_rtx_LABEL_REF (VOIDmode, label),
20147 pc_rtx);
20148 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20149 emit_jump_insn (temp);
20150 }
20151
20152 /* Output code to perform a log1p XFmode calculation. */
20153
20154 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20155 {
20156 rtx label1 = gen_label_rtx ();
20157 rtx label2 = gen_label_rtx ();
20158
20159 rtx tmp = gen_reg_rtx (XFmode);
20160 rtx tmp2 = gen_reg_rtx (XFmode);
20161
20162 emit_insn (gen_absxf2 (tmp, op1));
20163 emit_insn (gen_cmpxf (tmp,
20164 CONST_DOUBLE_FROM_REAL_VALUE (
20165 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20166 XFmode)));
20167 emit_jump_insn (gen_bge (label1));
20168
20169 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20170 emit_insn (gen_fyl2xp1_xf3 (op0, tmp2, op1));
20171 emit_jump (label2);
20172
20173 emit_label (label1);
20174 emit_move_insn (tmp, CONST1_RTX (XFmode));
20175 emit_insn (gen_addxf3 (tmp, op1, tmp));
20176 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20177 emit_insn (gen_fyl2x_xf3 (op0, tmp2, tmp));
20178
20179 emit_label (label2);
20180 }
20181
20182 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20183
20184 static void
20185 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20186 tree decl)
20187 {
20188 /* With Binutils 2.15, the "@unwind" marker must be specified on
20189 every occurrence of the ".eh_frame" section, not just the first
20190 one. */
20191 if (TARGET_64BIT
20192 && strcmp (name, ".eh_frame") == 0)
20193 {
20194 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20195 flags & SECTION_WRITE ? "aw" : "a");
20196 return;
20197 }
20198 default_elf_asm_named_section (name, flags, decl);
20199 }
20200
20201 /* Return the mangling of TYPE if it is an extended fundamental type. */
20202
20203 static const char *
20204 ix86_mangle_fundamental_type (tree type)
20205 {
20206 switch (TYPE_MODE (type))
20207 {
20208 case TFmode:
20209 /* __float128 is "g". */
20210 return "g";
20211 case XFmode:
20212 /* "long double" or __float80 is "e". */
20213 return "e";
20214 default:
20215 return NULL;
20216 }
20217 }
20218
20219 /* For 32-bit code we can save PIC register setup by using
20220 __stack_chk_fail_local hidden function instead of calling
20221 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20222 register, so it is better to call __stack_chk_fail directly. */
20223
20224 static tree
20225 ix86_stack_protect_fail (void)
20226 {
20227 return TARGET_64BIT
20228 ? default_external_stack_protect_fail ()
20229 : default_hidden_stack_protect_fail ();
20230 }
20231
20232 /* Select a format to encode pointers in exception handling data. CODE
20233 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20234 true if the symbol may be affected by dynamic relocations.
20235
20236 ??? All x86 object file formats are capable of representing this.
20237 After all, the relocation needed is the same as for the call insn.
20238 Whether or not a particular assembler allows us to enter such, I
20239 guess we'll have to see. */
20240 int
20241 asm_preferred_eh_data_format (int code, int global)
20242 {
20243 if (flag_pic)
20244 {
20245 int type = DW_EH_PE_sdata8;
20246 if (!TARGET_64BIT
20247 || ix86_cmodel == CM_SMALL_PIC
20248 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20249 type = DW_EH_PE_sdata4;
20250 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20251 }
20252 if (ix86_cmodel == CM_SMALL
20253 || (ix86_cmodel == CM_MEDIUM && code))
20254 return DW_EH_PE_udata4;
20255 return DW_EH_PE_absptr;
20256 }
20257 \f
20258 /* Expand copysign from SIGN to the positive value ABS_VALUE
20259 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20260 the sign-bit. */
20261 static void
20262 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20263 {
20264 enum machine_mode mode = GET_MODE (sign);
20265 rtx sgn = gen_reg_rtx (mode);
20266 if (mask == NULL_RTX)
20267 {
20268 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20269 if (!VECTOR_MODE_P (mode))
20270 {
20271 /* We need to generate a scalar mode mask in this case. */
20272 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20273 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20274 mask = gen_reg_rtx (mode);
20275 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20276 }
20277 }
20278 else
20279 mask = gen_rtx_NOT (mode, mask);
20280 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20281 gen_rtx_AND (mode, mask, sign)));
20282 emit_insn (gen_rtx_SET (VOIDmode, result,
20283 gen_rtx_IOR (mode, abs_value, sgn)));
20284 }
20285
20286 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20287 mask for masking out the sign-bit is stored in *SMASK, if that is
20288 non-null. */
20289 static rtx
20290 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20291 {
20292 enum machine_mode mode = GET_MODE (op0);
20293 rtx xa, mask;
20294
20295 xa = gen_reg_rtx (mode);
20296 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20297 if (!VECTOR_MODE_P (mode))
20298 {
20299 /* We need to generate a scalar mode mask in this case. */
20300 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20301 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20302 mask = gen_reg_rtx (mode);
20303 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20304 }
20305 emit_insn (gen_rtx_SET (VOIDmode, xa,
20306 gen_rtx_AND (mode, op0, mask)));
20307
20308 if (smask)
20309 *smask = mask;
20310
20311 return xa;
20312 }
20313
20314 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
20315 swapping the operands if SWAP_OPERANDS is true. The expanded
20316 code is a forward jump to a newly created label in case the
20317 comparison is true. The generated label rtx is returned. */
20318 static rtx
20319 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
20320 bool swap_operands)
20321 {
20322 rtx label, tmp;
20323
20324 if (swap_operands)
20325 {
20326 tmp = op0;
20327 op0 = op1;
20328 op1 = tmp;
20329 }
20330
20331 label = gen_label_rtx ();
20332 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
20333 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20334 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
20335 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
20336 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
20337 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
20338 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
20339 JUMP_LABEL (tmp) = label;
20340
20341 return label;
20342 }
20343
20344 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
20345 using comparison code CODE. Operands are swapped for the comparison if
20346 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
20347 static rtx
20348 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
20349 bool swap_operands)
20350 {
20351 enum machine_mode mode = GET_MODE (op0);
20352 rtx mask = gen_reg_rtx (mode);
20353
20354 if (swap_operands)
20355 {
20356 rtx tmp = op0;
20357 op0 = op1;
20358 op1 = tmp;
20359 }
20360
20361 if (mode == DFmode)
20362 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
20363 gen_rtx_fmt_ee (code, mode, op0, op1)));
20364 else
20365 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
20366 gen_rtx_fmt_ee (code, mode, op0, op1)));
20367
20368 return mask;
20369 }
20370
20371 /* Generate and return a rtx of mode MODE for 2**n where n is the number
20372 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
20373 static rtx
20374 ix86_gen_TWO52 (enum machine_mode mode)
20375 {
20376 REAL_VALUE_TYPE TWO52r;
20377 rtx TWO52;
20378
20379 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
20380 TWO52 = const_double_from_real_value (TWO52r, mode);
20381 TWO52 = force_reg (mode, TWO52);
20382
20383 return TWO52;
20384 }
20385
20386 /* Expand SSE sequence for computing lround from OP1 storing
20387 into OP0. */
20388 void
20389 ix86_expand_lround (rtx op0, rtx op1)
20390 {
20391 /* C code for the stuff we're doing below:
20392 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
20393 return (long)tmp;
20394 */
20395 enum machine_mode mode = GET_MODE (op1);
20396 const struct real_format *fmt;
20397 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20398 rtx adj;
20399
20400 /* load nextafter (0.5, 0.0) */
20401 fmt = REAL_MODE_FORMAT (mode);
20402 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20403 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20404
20405 /* adj = copysign (0.5, op1) */
20406 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
20407 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
20408
20409 /* adj = op1 + adj */
20410 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
20411
20412 /* op0 = (imode)adj */
20413 expand_fix (op0, adj, 0);
20414 }
20415
20416 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
20417 into OPERAND0. */
20418 void
20419 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
20420 {
20421 /* C code for the stuff we're doing below (for do_floor):
20422 xi = (long)op1;
20423 xi -= (double)xi > op1 ? 1 : 0;
20424 return xi;
20425 */
20426 enum machine_mode fmode = GET_MODE (op1);
20427 enum machine_mode imode = GET_MODE (op0);
20428 rtx ireg, freg, label, tmp;
20429
20430 /* reg = (long)op1 */
20431 ireg = gen_reg_rtx (imode);
20432 expand_fix (ireg, op1, 0);
20433
20434 /* freg = (double)reg */
20435 freg = gen_reg_rtx (fmode);
20436 expand_float (freg, ireg, 0);
20437
20438 /* ireg = (freg > op1) ? ireg - 1 : ireg */
20439 label = ix86_expand_sse_compare_and_jump (UNLE,
20440 freg, op1, !do_floor);
20441 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
20442 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
20443 emit_move_insn (ireg, tmp);
20444
20445 emit_label (label);
20446 LABEL_NUSES (label) = 1;
20447
20448 emit_move_insn (op0, ireg);
20449 }
20450
20451 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
20452 result in OPERAND0. */
20453 void
20454 ix86_expand_rint (rtx operand0, rtx operand1)
20455 {
20456 /* C code for the stuff we're doing below:
20457 xa = fabs (operand1);
20458 if (!isless (xa, 2**52))
20459 return operand1;
20460 xa = xa + 2**52 - 2**52;
20461 return copysign (xa, operand1);
20462 */
20463 enum machine_mode mode = GET_MODE (operand0);
20464 rtx res, xa, label, TWO52, mask;
20465
20466 res = gen_reg_rtx (mode);
20467 emit_move_insn (res, operand1);
20468
20469 /* xa = abs (operand1) */
20470 xa = ix86_expand_sse_fabs (res, &mask);
20471
20472 /* if (!isless (xa, TWO52)) goto label; */
20473 TWO52 = ix86_gen_TWO52 (mode);
20474 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20475
20476 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20477 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
20478
20479 ix86_sse_copysign_to_positive (res, xa, res, mask);
20480
20481 emit_label (label);
20482 LABEL_NUSES (label) = 1;
20483
20484 emit_move_insn (operand0, res);
20485 }
20486
20487 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20488 into OPERAND0. */
20489 void
20490 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
20491 {
20492 /* C code for the stuff we expand below.
20493 double xa = fabs (x), x2;
20494 if (!isless (xa, TWO52))
20495 return x;
20496 xa = xa + TWO52 - TWO52;
20497 x2 = copysign (xa, x);
20498 Compensate. Floor:
20499 if (x2 > x)
20500 x2 -= 1;
20501 Compensate. Ceil:
20502 if (x2 < x)
20503 x2 -= -1;
20504 return x2;
20505 */
20506 enum machine_mode mode = GET_MODE (operand0);
20507 rtx xa, TWO52, tmp, label, one, res, mask;
20508
20509 TWO52 = ix86_gen_TWO52 (mode);
20510
20511 /* Temporary for holding the result, initialized to the input
20512 operand to ease control flow. */
20513 res = gen_reg_rtx (mode);
20514 emit_move_insn (res, operand1);
20515
20516 /* xa = abs (operand1) */
20517 xa = ix86_expand_sse_fabs (res, &mask);
20518
20519 /* if (!isless (xa, TWO52)) goto label; */
20520 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20521
20522 /* xa = xa + TWO52 - TWO52; */
20523 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20524 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
20525
20526 /* xa = copysign (xa, operand1) */
20527 ix86_sse_copysign_to_positive (xa, xa, res, mask);
20528
20529 /* generate 1.0 or -1.0 */
20530 one = force_reg (mode,
20531 const_double_from_real_value (do_floor
20532 ? dconst1 : dconstm1, mode));
20533
20534 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20535 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
20536 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20537 gen_rtx_AND (mode, one, tmp)));
20538 /* We always need to subtract here to preserve signed zero. */
20539 tmp = expand_simple_binop (mode, MINUS,
20540 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20541 emit_move_insn (res, tmp);
20542
20543 emit_label (label);
20544 LABEL_NUSES (label) = 1;
20545
20546 emit_move_insn (operand0, res);
20547 }
20548
20549 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
20550 into OPERAND0. */
20551 void
20552 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
20553 {
20554 /* C code for the stuff we expand below.
20555 double xa = fabs (x), x2;
20556 if (!isless (xa, TWO52))
20557 return x;
20558 x2 = (double)(long)x;
20559 Compensate. Floor:
20560 if (x2 > x)
20561 x2 -= 1;
20562 Compensate. Ceil:
20563 if (x2 < x)
20564 x2 += 1;
20565 if (HONOR_SIGNED_ZEROS (mode))
20566 return copysign (x2, x);
20567 return x2;
20568 */
20569 enum machine_mode mode = GET_MODE (operand0);
20570 rtx xa, xi, TWO52, tmp, label, one, res, mask;
20571
20572 TWO52 = ix86_gen_TWO52 (mode);
20573
20574 /* Temporary for holding the result, initialized to the input
20575 operand to ease control flow. */
20576 res = gen_reg_rtx (mode);
20577 emit_move_insn (res, operand1);
20578
20579 /* xa = abs (operand1) */
20580 xa = ix86_expand_sse_fabs (res, &mask);
20581
20582 /* if (!isless (xa, TWO52)) goto label; */
20583 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20584
20585 /* xa = (double)(long)x */
20586 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20587 expand_fix (xi, res, 0);
20588 expand_float (xa, xi, 0);
20589
20590 /* generate 1.0 */
20591 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
20592
20593 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
20594 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
20595 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20596 gen_rtx_AND (mode, one, tmp)));
20597 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
20598 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20599 emit_move_insn (res, tmp);
20600
20601 if (HONOR_SIGNED_ZEROS (mode))
20602 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
20603
20604 emit_label (label);
20605 LABEL_NUSES (label) = 1;
20606
20607 emit_move_insn (operand0, res);
20608 }
20609
20610 /* Expand SSE sequence for computing round from OPERAND1 storing
20611 into OPERAND0. Sequence that works without relying on DImode truncation
20612 via cvttsd2siq that is only available on 64bit targets. */
20613 void
20614 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
20615 {
20616 /* C code for the stuff we expand below.
20617 double xa = fabs (x), xa2, x2;
20618 if (!isless (xa, TWO52))
20619 return x;
20620 Using the absolute value and copying back sign makes
20621 -0.0 -> -0.0 correct.
20622 xa2 = xa + TWO52 - TWO52;
20623 Compensate.
20624 dxa = xa2 - xa;
20625 if (dxa <= -0.5)
20626 xa2 += 1;
20627 else if (dxa > 0.5)
20628 xa2 -= 1;
20629 x2 = copysign (xa2, x);
20630 return x2;
20631 */
20632 enum machine_mode mode = GET_MODE (operand0);
20633 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
20634
20635 TWO52 = ix86_gen_TWO52 (mode);
20636
20637 /* Temporary for holding the result, initialized to the input
20638 operand to ease control flow. */
20639 res = gen_reg_rtx (mode);
20640 emit_move_insn (res, operand1);
20641
20642 /* xa = abs (operand1) */
20643 xa = ix86_expand_sse_fabs (res, &mask);
20644
20645 /* if (!isless (xa, TWO52)) goto label; */
20646 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20647
20648 /* xa2 = xa + TWO52 - TWO52; */
20649 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20650 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
20651
20652 /* dxa = xa2 - xa; */
20653 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
20654
20655 /* generate 0.5, 1.0 and -0.5 */
20656 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
20657 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
20658 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
20659 0, OPTAB_DIRECT);
20660
20661 /* Compensate. */
20662 tmp = gen_reg_rtx (mode);
20663 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
20664 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
20665 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20666 gen_rtx_AND (mode, one, tmp)));
20667 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20668 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
20669 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
20670 emit_insn (gen_rtx_SET (VOIDmode, tmp,
20671 gen_rtx_AND (mode, one, tmp)));
20672 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
20673
20674 /* res = copysign (xa2, operand1) */
20675 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
20676
20677 emit_label (label);
20678 LABEL_NUSES (label) = 1;
20679
20680 emit_move_insn (operand0, res);
20681 }
20682
20683 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20684 into OPERAND0. */
20685 void
20686 ix86_expand_trunc (rtx operand0, rtx operand1)
20687 {
20688 /* C code for SSE variant we expand below.
20689 double xa = fabs (x), x2;
20690 if (!isless (xa, TWO52))
20691 return x;
20692 x2 = (double)(long)x;
20693 if (HONOR_SIGNED_ZEROS (mode))
20694 return copysign (x2, x);
20695 return x2;
20696 */
20697 enum machine_mode mode = GET_MODE (operand0);
20698 rtx xa, xi, TWO52, label, res, mask;
20699
20700 TWO52 = ix86_gen_TWO52 (mode);
20701
20702 /* Temporary for holding the result, initialized to the input
20703 operand to ease control flow. */
20704 res = gen_reg_rtx (mode);
20705 emit_move_insn (res, operand1);
20706
20707 /* xa = abs (operand1) */
20708 xa = ix86_expand_sse_fabs (res, &mask);
20709
20710 /* if (!isless (xa, TWO52)) goto label; */
20711 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20712
20713 /* x = (double)(long)x */
20714 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20715 expand_fix (xi, res, 0);
20716 expand_float (res, xi, 0);
20717
20718 if (HONOR_SIGNED_ZEROS (mode))
20719 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
20720
20721 emit_label (label);
20722 LABEL_NUSES (label) = 1;
20723
20724 emit_move_insn (operand0, res);
20725 }
20726
20727 /* Expand SSE sequence for computing trunc from OPERAND1 storing
20728 into OPERAND0. */
20729 void
20730 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
20731 {
20732 enum machine_mode mode = GET_MODE (operand0);
20733 rtx xa, mask, TWO52, label, one, res, smask, tmp;
20734
20735 /* C code for SSE variant we expand below.
20736 double xa = fabs (x), x2;
20737 if (!isless (xa, TWO52))
20738 return x;
20739 xa2 = xa + TWO52 - TWO52;
20740 Compensate:
20741 if (xa2 > xa)
20742 xa2 -= 1.0;
20743 x2 = copysign (xa2, x);
20744 return x2;
20745 */
20746
20747 TWO52 = ix86_gen_TWO52 (mode);
20748
20749 /* Temporary for holding the result, initialized to the input
20750 operand to ease control flow. */
20751 res = gen_reg_rtx (mode);
20752 emit_move_insn (res, operand1);
20753
20754 /* xa = abs (operand1) */
20755 xa = ix86_expand_sse_fabs (res, &smask);
20756
20757 /* if (!isless (xa, TWO52)) goto label; */
20758 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20759
20760 /* res = xa + TWO52 - TWO52; */
20761 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
20762 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
20763 emit_move_insn (res, tmp);
20764
20765 /* generate 1.0 */
20766 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
20767
20768 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
20769 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
20770 emit_insn (gen_rtx_SET (VOIDmode, mask,
20771 gen_rtx_AND (mode, mask, one)));
20772 tmp = expand_simple_binop (mode, MINUS,
20773 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
20774 emit_move_insn (res, tmp);
20775
20776 /* res = copysign (res, operand1) */
20777 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
20778
20779 emit_label (label);
20780 LABEL_NUSES (label) = 1;
20781
20782 emit_move_insn (operand0, res);
20783 }
20784
20785 /* Expand SSE sequence for computing round from OPERAND1 storing
20786 into OPERAND0. */
20787 void
20788 ix86_expand_round (rtx operand0, rtx operand1)
20789 {
20790 /* C code for the stuff we're doing below:
20791 double xa = fabs (x);
20792 if (!isless (xa, TWO52))
20793 return x;
20794 xa = (double)(long)(xa + nextafter (0.5, 0.0));
20795 return copysign (xa, x);
20796 */
20797 enum machine_mode mode = GET_MODE (operand0);
20798 rtx res, TWO52, xa, label, xi, half, mask;
20799 const struct real_format *fmt;
20800 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
20801
20802 /* Temporary for holding the result, initialized to the input
20803 operand to ease control flow. */
20804 res = gen_reg_rtx (mode);
20805 emit_move_insn (res, operand1);
20806
20807 TWO52 = ix86_gen_TWO52 (mode);
20808 xa = ix86_expand_sse_fabs (res, &mask);
20809 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
20810
20811 /* load nextafter (0.5, 0.0) */
20812 fmt = REAL_MODE_FORMAT (mode);
20813 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
20814 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
20815
20816 /* xa = xa + 0.5 */
20817 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
20818 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
20819
20820 /* xa = (double)(int64_t)xa */
20821 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
20822 expand_fix (xi, xa, 0);
20823 expand_float (xa, xi, 0);
20824
20825 /* res = copysign (xa, operand1) */
20826 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
20827
20828 emit_label (label);
20829 LABEL_NUSES (label) = 1;
20830
20831 emit_move_insn (operand0, res);
20832 }
20833
20834 #include "gt-i386.h"