2e0586bf95ad2e40457666fd3e31ad230e7d7127
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1030
1031 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1032 on simulation result. But after P4 was made, no performance benefit
1033 was observed with branch hints. It also increases the code size.
1034 As a result, icc never generates branch hints. */
1035 0,
1036
1037 /* X86_TUNE_DOUBLE_WITH_ADD */
1038 ~m_386,
1039
1040 /* X86_TUNE_USE_SAHF */
1041 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1042 | m_NOCONA | m_CORE2 | m_GENERIC,
1043
1044 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1045 partial dependencies. */
1046 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1047 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1048
1049 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1050 register stalls on Generic32 compilation setting as well. However
1051 in current implementation the partial register stalls are not eliminated
1052 very well - they can be introduced via subregs synthesized by combine
1053 and can happen in caller/callee saving sequences. Because this option
1054 pays back little on PPro based chips and is in conflict with partial reg
1055 dependencies used by Athlon/P4 based chips, it is better to leave it off
1056 for generic32 for now. */
1057 m_PPRO,
1058
1059 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1060 m_CORE2 | m_GENERIC,
1061
1062 /* X86_TUNE_USE_HIMODE_FIOP */
1063 m_386 | m_486 | m_K6_GEODE,
1064
1065 /* X86_TUNE_USE_SIMODE_FIOP */
1066 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1067
1068 /* X86_TUNE_USE_MOV0 */
1069 m_K6,
1070
1071 /* X86_TUNE_USE_CLTD */
1072 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1073
1074 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1075 m_PENT4,
1076
1077 /* X86_TUNE_SPLIT_LONG_MOVES */
1078 m_PPRO,
1079
1080 /* X86_TUNE_READ_MODIFY_WRITE */
1081 ~m_PENT,
1082
1083 /* X86_TUNE_READ_MODIFY */
1084 ~(m_PENT | m_PPRO),
1085
1086 /* X86_TUNE_PROMOTE_QIMODE */
1087 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1088 | m_GENERIC /* | m_PENT4 ? */,
1089
1090 /* X86_TUNE_FAST_PREFIX */
1091 ~(m_PENT | m_486 | m_386),
1092
1093 /* X86_TUNE_SINGLE_STRINGOP */
1094 m_386 | m_PENT4 | m_NOCONA,
1095
1096 /* X86_TUNE_QIMODE_MATH */
1097 ~0,
1098
1099 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1100 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1101 might be considered for Generic32 if our scheme for avoiding partial
1102 stalls was more effective. */
1103 ~m_PPRO,
1104
1105 /* X86_TUNE_PROMOTE_QI_REGS */
1106 0,
1107
1108 /* X86_TUNE_PROMOTE_HI_REGS */
1109 m_PPRO,
1110
1111 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1112 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1113
1114 /* X86_TUNE_ADD_ESP_8 */
1115 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1116 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1117
1118 /* X86_TUNE_SUB_ESP_4 */
1119 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1120
1121 /* X86_TUNE_SUB_ESP_8 */
1122 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1123 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1124
1125 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1126 for DFmode copies */
1127 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1128 | m_GENERIC | m_GEODE),
1129
1130 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1131 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1132
1133 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1134 conflict here in between PPro/Pentium4 based chips that thread 128bit
1135 SSE registers as single units versus K8 based chips that divide SSE
1136 registers to two 64bit halves. This knob promotes all store destinations
1137 to be 128bit to allow register renaming on 128bit SSE units, but usually
1138 results in one extra microop on 64bit SSE units. Experimental results
1139 shows that disabling this option on P4 brings over 20% SPECfp regression,
1140 while enabling it on K8 brings roughly 2.4% regression that can be partly
1141 masked by careful scheduling of moves. */
1142 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1143
1144 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1145 m_AMDFAM10,
1146
1147 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1148 are resolved on SSE register parts instead of whole registers, so we may
1149 maintain just lower part of scalar values in proper format leaving the
1150 upper part undefined. */
1151 m_ATHLON_K8,
1152
1153 /* X86_TUNE_SSE_TYPELESS_STORES */
1154 m_ATHLON_K8_AMDFAM10,
1155
1156 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1157 m_PPRO | m_PENT4 | m_NOCONA,
1158
1159 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1160 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1161
1162 /* X86_TUNE_PROLOGUE_USING_MOVE */
1163 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1164
1165 /* X86_TUNE_EPILOGUE_USING_MOVE */
1166 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1167
1168 /* X86_TUNE_SHIFT1 */
1169 ~m_486,
1170
1171 /* X86_TUNE_USE_FFREEP */
1172 m_ATHLON_K8_AMDFAM10,
1173
1174 /* X86_TUNE_INTER_UNIT_MOVES */
1175 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1176
1177 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1178 than 4 branch instructions in the 16 byte window. */
1179 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1180
1181 /* X86_TUNE_SCHEDULE */
1182 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1183
1184 /* X86_TUNE_USE_BT */
1185 m_ATHLON_K8_AMDFAM10,
1186
1187 /* X86_TUNE_USE_INCDEC */
1188 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1189
1190 /* X86_TUNE_PAD_RETURNS */
1191 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1192
1193 /* X86_TUNE_EXT_80387_CONSTANTS */
1194 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1195
1196 /* X86_TUNE_SHORTEN_X87_SSE */
1197 ~m_K8,
1198
1199 /* X86_TUNE_AVOID_VECTOR_DECODE */
1200 m_K8 | m_GENERIC64,
1201
1202 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1203 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1204 ~(m_386 | m_486),
1205
1206 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1207 vector path on AMD machines. */
1208 m_K8 | m_GENERIC64 | m_AMDFAM10,
1209
1210 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1211 machines. */
1212 m_K8 | m_GENERIC64 | m_AMDFAM10,
1213
1214 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1215 than a MOV. */
1216 m_PENT,
1217
1218 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1219 but one byte longer. */
1220 m_PENT,
1221
1222 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1223 operand that cannot be represented using a modRM byte. The XOR
1224 replacement is long decoded, so this split helps here as well. */
1225 m_K6,
1226 };
1227
1228 /* Feature tests against the various architecture variations. */
1229 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1230 /* X86_ARCH_CMOVE */
1231 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1232
1233 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1234 ~m_386,
1235
1236 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1237 ~(m_386 | m_486),
1238
1239 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1240 ~m_386,
1241
1242 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1243 ~m_386,
1244 };
1245
1246 static const unsigned int x86_accumulate_outgoing_args
1247 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1248
1249 static const unsigned int x86_arch_always_fancy_math_387
1250 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1251 | m_NOCONA | m_CORE2 | m_GENERIC;
1252
1253 static enum stringop_alg stringop_alg = no_stringop;
1254
1255 /* In case the average insn count for single function invocation is
1256 lower than this constant, emit fast (but longer) prologue and
1257 epilogue code. */
1258 #define FAST_PROLOGUE_INSN_COUNT 20
1259
1260 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1261 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1262 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1263 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1264
1265 /* Array of the smallest class containing reg number REGNO, indexed by
1266 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1267
1268 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1269 {
1270 /* ax, dx, cx, bx */
1271 AREG, DREG, CREG, BREG,
1272 /* si, di, bp, sp */
1273 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1274 /* FP registers */
1275 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1276 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1277 /* arg pointer */
1278 NON_Q_REGS,
1279 /* flags, fpsr, fpcr, frame */
1280 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1281 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1282 SSE_REGS, SSE_REGS,
1283 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1284 MMX_REGS, MMX_REGS,
1285 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1288 SSE_REGS, SSE_REGS,
1289 };
1290
1291 /* The "default" register map used in 32bit mode. */
1292
1293 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1294 {
1295 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1296 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1297 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1298 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1299 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1300 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1302 };
1303
1304 static int const x86_64_int_parameter_registers[6] =
1305 {
1306 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1307 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1308 };
1309
1310 static int const x86_64_ms_abi_int_parameter_registers[4] =
1311 {
1312 2 /*RCX*/, 1 /*RDX*/,
1313 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1314 };
1315
1316 static int const x86_64_int_return_registers[4] =
1317 {
1318 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1319 };
1320
1321 /* The "default" register map used in 64bit mode. */
1322 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1323 {
1324 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1325 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1326 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1327 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1328 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1329 8,9,10,11,12,13,14,15, /* extended integer registers */
1330 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1331 };
1332
1333 /* Define the register numbers to be used in Dwarf debugging information.
1334 The SVR4 reference port C compiler uses the following register numbers
1335 in its Dwarf output code:
1336 0 for %eax (gcc regno = 0)
1337 1 for %ecx (gcc regno = 2)
1338 2 for %edx (gcc regno = 1)
1339 3 for %ebx (gcc regno = 3)
1340 4 for %esp (gcc regno = 7)
1341 5 for %ebp (gcc regno = 6)
1342 6 for %esi (gcc regno = 4)
1343 7 for %edi (gcc regno = 5)
1344 The following three DWARF register numbers are never generated by
1345 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1346 believes these numbers have these meanings.
1347 8 for %eip (no gcc equivalent)
1348 9 for %eflags (gcc regno = 17)
1349 10 for %trapno (no gcc equivalent)
1350 It is not at all clear how we should number the FP stack registers
1351 for the x86 architecture. If the version of SDB on x86/svr4 were
1352 a bit less brain dead with respect to floating-point then we would
1353 have a precedent to follow with respect to DWARF register numbers
1354 for x86 FP registers, but the SDB on x86/svr4 is so completely
1355 broken with respect to FP registers that it is hardly worth thinking
1356 of it as something to strive for compatibility with.
1357 The version of x86/svr4 SDB I have at the moment does (partially)
1358 seem to believe that DWARF register number 11 is associated with
1359 the x86 register %st(0), but that's about all. Higher DWARF
1360 register numbers don't seem to be associated with anything in
1361 particular, and even for DWARF regno 11, SDB only seems to under-
1362 stand that it should say that a variable lives in %st(0) (when
1363 asked via an `=' command) if we said it was in DWARF regno 11,
1364 but SDB still prints garbage when asked for the value of the
1365 variable in question (via a `/' command).
1366 (Also note that the labels SDB prints for various FP stack regs
1367 when doing an `x' command are all wrong.)
1368 Note that these problems generally don't affect the native SVR4
1369 C compiler because it doesn't allow the use of -O with -g and
1370 because when it is *not* optimizing, it allocates a memory
1371 location for each floating-point variable, and the memory
1372 location is what gets described in the DWARF AT_location
1373 attribute for the variable in question.
1374 Regardless of the severe mental illness of the x86/svr4 SDB, we
1375 do something sensible here and we use the following DWARF
1376 register numbers. Note that these are all stack-top-relative
1377 numbers.
1378 11 for %st(0) (gcc regno = 8)
1379 12 for %st(1) (gcc regno = 9)
1380 13 for %st(2) (gcc regno = 10)
1381 14 for %st(3) (gcc regno = 11)
1382 15 for %st(4) (gcc regno = 12)
1383 16 for %st(5) (gcc regno = 13)
1384 17 for %st(6) (gcc regno = 14)
1385 18 for %st(7) (gcc regno = 15)
1386 */
1387 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1388 {
1389 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1390 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1391 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1392 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1393 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1394 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1395 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1396 };
1397
1398 /* Test and compare insns in i386.md store the information needed to
1399 generate branch and scc insns here. */
1400
1401 rtx ix86_compare_op0 = NULL_RTX;
1402 rtx ix86_compare_op1 = NULL_RTX;
1403 rtx ix86_compare_emitted = NULL_RTX;
1404
1405 /* Size of the register save area. */
1406 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1407
1408 /* Define the structure for the machine field in struct function. */
1409
1410 struct stack_local_entry GTY(())
1411 {
1412 unsigned short mode;
1413 unsigned short n;
1414 rtx rtl;
1415 struct stack_local_entry *next;
1416 };
1417
1418 /* Structure describing stack frame layout.
1419 Stack grows downward:
1420
1421 [arguments]
1422 <- ARG_POINTER
1423 saved pc
1424
1425 saved frame pointer if frame_pointer_needed
1426 <- HARD_FRAME_POINTER
1427 [saved regs]
1428
1429 [padding1] \
1430 )
1431 [va_arg registers] (
1432 > to_allocate <- FRAME_POINTER
1433 [frame] (
1434 )
1435 [padding2] /
1436 */
1437 struct ix86_frame
1438 {
1439 int nregs;
1440 int padding1;
1441 int va_arg_size;
1442 HOST_WIDE_INT frame;
1443 int padding2;
1444 int outgoing_arguments_size;
1445 int red_zone_size;
1446
1447 HOST_WIDE_INT to_allocate;
1448 /* The offsets relative to ARG_POINTER. */
1449 HOST_WIDE_INT frame_pointer_offset;
1450 HOST_WIDE_INT hard_frame_pointer_offset;
1451 HOST_WIDE_INT stack_pointer_offset;
1452
1453 /* When save_regs_using_mov is set, emit prologue using
1454 move instead of push instructions. */
1455 bool save_regs_using_mov;
1456 };
1457
1458 /* Code model option. */
1459 enum cmodel ix86_cmodel;
1460 /* Asm dialect. */
1461 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1462 /* TLS dialects. */
1463 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1464
1465 /* Which unit we are generating floating point math for. */
1466 enum fpmath_unit ix86_fpmath;
1467
1468 /* Which cpu are we scheduling for. */
1469 enum processor_type ix86_tune;
1470
1471 /* Which instruction set architecture to use. */
1472 enum processor_type ix86_arch;
1473
1474 /* true if sse prefetch instruction is not NOOP. */
1475 int x86_prefetch_sse;
1476
1477 /* ix86_regparm_string as a number */
1478 static int ix86_regparm;
1479
1480 /* -mstackrealign option */
1481 extern int ix86_force_align_arg_pointer;
1482 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1483
1484 /* Preferred alignment for stack boundary in bits. */
1485 unsigned int ix86_preferred_stack_boundary;
1486
1487 /* Values 1-5: see jump.c */
1488 int ix86_branch_cost;
1489
1490 /* Variables which are this size or smaller are put in the data/bss
1491 or ldata/lbss sections. */
1492
1493 int ix86_section_threshold = 65536;
1494
1495 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1496 char internal_label_prefix[16];
1497 int internal_label_prefix_len;
1498
1499 /* Register class used for passing given 64bit part of the argument.
1500 These represent classes as documented by the PS ABI, with the exception
1501 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1502 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1503
1504 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1505 whenever possible (upper half does contain padding). */
1506 enum x86_64_reg_class
1507 {
1508 X86_64_NO_CLASS,
1509 X86_64_INTEGER_CLASS,
1510 X86_64_INTEGERSI_CLASS,
1511 X86_64_SSE_CLASS,
1512 X86_64_SSESF_CLASS,
1513 X86_64_SSEDF_CLASS,
1514 X86_64_SSEUP_CLASS,
1515 X86_64_X87_CLASS,
1516 X86_64_X87UP_CLASS,
1517 X86_64_COMPLEX_X87_CLASS,
1518 X86_64_MEMORY_CLASS
1519 };
1520 static const char * const x86_64_reg_class_name[] =
1521 {
1522 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1523 "sseup", "x87", "x87up", "cplx87", "no"
1524 };
1525
1526 #define MAX_CLASSES 4
1527
1528 /* Table of constants used by fldpi, fldln2, etc.... */
1529 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1530 static bool ext_80387_constants_init = 0;
1531
1532 \f
1533 static struct machine_function * ix86_init_machine_status (void);
1534 static rtx ix86_function_value (tree, tree, bool);
1535 static int ix86_function_regparm (tree, tree);
1536 static void ix86_compute_frame_layout (struct ix86_frame *);
1537 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1538 rtx, rtx, int);
1539
1540 \f
1541 /* The svr4 ABI for the i386 says that records and unions are returned
1542 in memory. */
1543 #ifndef DEFAULT_PCC_STRUCT_RETURN
1544 #define DEFAULT_PCC_STRUCT_RETURN 1
1545 #endif
1546
1547 /* Implement TARGET_HANDLE_OPTION. */
1548
1549 static bool
1550 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1551 {
1552 switch (code)
1553 {
1554 case OPT_m3dnow:
1555 if (!value)
1556 {
1557 target_flags &= ~MASK_3DNOW_A;
1558 target_flags_explicit |= MASK_3DNOW_A;
1559 }
1560 return true;
1561
1562 case OPT_mmmx:
1563 if (!value)
1564 {
1565 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1566 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1567 }
1568 return true;
1569
1570 case OPT_msse:
1571 if (!value)
1572 {
1573 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1574 | MASK_SSE4A);
1575 target_flags_explicit |= (MASK_SSE2 | MASK_SSE3 | MASK_SSSE3
1576 | MASK_SSE4A);
1577 }
1578 return true;
1579
1580 case OPT_msse2:
1581 if (!value)
1582 {
1583 target_flags &= ~(MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A);
1584 target_flags_explicit |= MASK_SSE3 | MASK_SSSE3 | MASK_SSE4A;
1585 }
1586 return true;
1587
1588 case OPT_msse3:
1589 if (!value)
1590 {
1591 target_flags &= ~(MASK_SSSE3 | MASK_SSE4A);
1592 target_flags_explicit |= MASK_SSSE3 | MASK_SSE4A;
1593 }
1594 return true;
1595
1596 case OPT_mssse3:
1597 if (!value)
1598 {
1599 target_flags &= ~MASK_SSE4A;
1600 target_flags_explicit |= MASK_SSE4A;
1601 }
1602 return true;
1603
1604 default:
1605 return true;
1606 }
1607 }
1608
1609 /* Sometimes certain combinations of command options do not make
1610 sense on a particular target machine. You can define a macro
1611 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1612 defined, is executed once just after all the command options have
1613 been parsed.
1614
1615 Don't use this macro to turn on various extra optimizations for
1616 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1617
1618 void
1619 override_options (void)
1620 {
1621 int i;
1622 int ix86_tune_defaulted = 0;
1623 unsigned int ix86_arch_mask, ix86_tune_mask;
1624
1625 /* Comes from final.c -- no real reason to change it. */
1626 #define MAX_CODE_ALIGN 16
1627
1628 static struct ptt
1629 {
1630 const struct processor_costs *cost; /* Processor costs */
1631 const int target_enable; /* Target flags to enable. */
1632 const int target_disable; /* Target flags to disable. */
1633 const int align_loop; /* Default alignments. */
1634 const int align_loop_max_skip;
1635 const int align_jump;
1636 const int align_jump_max_skip;
1637 const int align_func;
1638 }
1639 const processor_target_table[PROCESSOR_max] =
1640 {
1641 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1642 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1643 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1644 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1645 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1646 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1647 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1648 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1649 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1650 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1651 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1652 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1653 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1654 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1655 };
1656
1657 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1658 static struct pta
1659 {
1660 const char *const name; /* processor name or nickname. */
1661 const enum processor_type processor;
1662 const enum pta_flags
1663 {
1664 PTA_SSE = 1 << 0,
1665 PTA_SSE2 = 1 << 1,
1666 PTA_SSE3 = 1 << 2,
1667 PTA_MMX = 1 << 3,
1668 PTA_PREFETCH_SSE = 1 << 4,
1669 PTA_3DNOW = 1 << 5,
1670 PTA_3DNOW_A = 1 << 6,
1671 PTA_64BIT = 1 << 7,
1672 PTA_SSSE3 = 1 << 8,
1673 PTA_CX16 = 1 << 9,
1674 PTA_POPCNT = 1 << 10,
1675 PTA_ABM = 1 << 11,
1676 PTA_SSE4A = 1 << 12,
1677 PTA_NO_SAHF = 1 << 13
1678 } flags;
1679 }
1680 const processor_alias_table[] =
1681 {
1682 {"i386", PROCESSOR_I386, 0},
1683 {"i486", PROCESSOR_I486, 0},
1684 {"i586", PROCESSOR_PENTIUM, 0},
1685 {"pentium", PROCESSOR_PENTIUM, 0},
1686 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1687 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1688 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1689 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1690 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1691 {"i686", PROCESSOR_PENTIUMPRO, 0},
1692 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1693 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1694 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1695 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1696 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1697 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1698 | PTA_MMX | PTA_PREFETCH_SSE},
1699 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1700 | PTA_MMX | PTA_PREFETCH_SSE},
1701 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1702 | PTA_MMX | PTA_PREFETCH_SSE},
1703 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1704 | PTA_MMX | PTA_PREFETCH_SSE
1705 | PTA_CX16 | PTA_NO_SAHF},
1706 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1707 | PTA_64BIT | PTA_MMX
1708 | PTA_PREFETCH_SSE | PTA_CX16},
1709 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1710 | PTA_3DNOW_A},
1711 {"k6", PROCESSOR_K6, PTA_MMX},
1712 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1713 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1714 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1715 | PTA_3DNOW_A},
1716 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1717 | PTA_3DNOW | PTA_3DNOW_A},
1718 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1719 | PTA_3DNOW_A | PTA_SSE},
1720 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1721 | PTA_3DNOW_A | PTA_SSE},
1722 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1723 | PTA_3DNOW_A | PTA_SSE},
1724 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1725 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1726 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1727 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1728 | PTA_NO_SAHF},
1729 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1730 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1731 | PTA_SSE2 | PTA_NO_SAHF},
1732 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1733 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1734 | PTA_SSE2 | PTA_NO_SAHF},
1735 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1736 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1737 | PTA_SSE2 | PTA_NO_SAHF},
1738 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1739 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1740 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1741 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1742 {"barcelona", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1743 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1744 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1745 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1746 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1747 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1748 };
1749
1750 int const pta_size = ARRAY_SIZE (processor_alias_table);
1751
1752 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1753 SUBTARGET_OVERRIDE_OPTIONS;
1754 #endif
1755
1756 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1757 SUBSUBTARGET_OVERRIDE_OPTIONS;
1758 #endif
1759
1760 /* -fPIC is the default for x86_64. */
1761 if (TARGET_MACHO && TARGET_64BIT)
1762 flag_pic = 2;
1763
1764 /* Set the default values for switches whose default depends on TARGET_64BIT
1765 in case they weren't overwritten by command line options. */
1766 if (TARGET_64BIT)
1767 {
1768 /* Mach-O doesn't support omitting the frame pointer for now. */
1769 if (flag_omit_frame_pointer == 2)
1770 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1771 if (flag_asynchronous_unwind_tables == 2)
1772 flag_asynchronous_unwind_tables = 1;
1773 if (flag_pcc_struct_return == 2)
1774 flag_pcc_struct_return = 0;
1775 }
1776 else
1777 {
1778 if (flag_omit_frame_pointer == 2)
1779 flag_omit_frame_pointer = 0;
1780 if (flag_asynchronous_unwind_tables == 2)
1781 flag_asynchronous_unwind_tables = 0;
1782 if (flag_pcc_struct_return == 2)
1783 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1784 }
1785
1786 /* Need to check -mtune=generic first. */
1787 if (ix86_tune_string)
1788 {
1789 if (!strcmp (ix86_tune_string, "generic")
1790 || !strcmp (ix86_tune_string, "i686")
1791 /* As special support for cross compilers we read -mtune=native
1792 as -mtune=generic. With native compilers we won't see the
1793 -mtune=native, as it was changed by the driver. */
1794 || !strcmp (ix86_tune_string, "native"))
1795 {
1796 if (TARGET_64BIT)
1797 ix86_tune_string = "generic64";
1798 else
1799 ix86_tune_string = "generic32";
1800 }
1801 else if (!strncmp (ix86_tune_string, "generic", 7))
1802 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1803 }
1804 else
1805 {
1806 if (ix86_arch_string)
1807 ix86_tune_string = ix86_arch_string;
1808 if (!ix86_tune_string)
1809 {
1810 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1811 ix86_tune_defaulted = 1;
1812 }
1813
1814 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1815 need to use a sensible tune option. */
1816 if (!strcmp (ix86_tune_string, "generic")
1817 || !strcmp (ix86_tune_string, "x86-64")
1818 || !strcmp (ix86_tune_string, "i686"))
1819 {
1820 if (TARGET_64BIT)
1821 ix86_tune_string = "generic64";
1822 else
1823 ix86_tune_string = "generic32";
1824 }
1825 }
1826 if (ix86_stringop_string)
1827 {
1828 if (!strcmp (ix86_stringop_string, "rep_byte"))
1829 stringop_alg = rep_prefix_1_byte;
1830 else if (!strcmp (ix86_stringop_string, "libcall"))
1831 stringop_alg = libcall;
1832 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1833 stringop_alg = rep_prefix_4_byte;
1834 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1835 stringop_alg = rep_prefix_8_byte;
1836 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1837 stringop_alg = loop_1_byte;
1838 else if (!strcmp (ix86_stringop_string, "loop"))
1839 stringop_alg = loop;
1840 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1841 stringop_alg = unrolled_loop;
1842 else
1843 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1844 }
1845 if (!strcmp (ix86_tune_string, "x86-64"))
1846 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1847 "-mtune=generic instead as appropriate.");
1848
1849 if (!ix86_arch_string)
1850 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1851 if (!strcmp (ix86_arch_string, "generic"))
1852 error ("generic CPU can be used only for -mtune= switch");
1853 if (!strncmp (ix86_arch_string, "generic", 7))
1854 error ("bad value (%s) for -march= switch", ix86_arch_string);
1855
1856 if (ix86_cmodel_string != 0)
1857 {
1858 if (!strcmp (ix86_cmodel_string, "small"))
1859 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1860 else if (!strcmp (ix86_cmodel_string, "medium"))
1861 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1862 else if (!strcmp (ix86_cmodel_string, "large"))
1863 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1864 else if (flag_pic)
1865 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1866 else if (!strcmp (ix86_cmodel_string, "32"))
1867 ix86_cmodel = CM_32;
1868 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1869 ix86_cmodel = CM_KERNEL;
1870 else
1871 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1872 }
1873 else
1874 {
1875 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
1876 use of rip-relative addressing. This eliminates fixups that
1877 would otherwise be needed if this object is to be placed in a
1878 DLL, and is essentially just as efficient as direct addressing. */
1879 if (TARGET_64BIT_MS_ABI)
1880 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
1881 else if (TARGET_64BIT)
1882 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1883 else
1884 ix86_cmodel = CM_32;
1885 }
1886 if (ix86_asm_string != 0)
1887 {
1888 if (! TARGET_MACHO
1889 && !strcmp (ix86_asm_string, "intel"))
1890 ix86_asm_dialect = ASM_INTEL;
1891 else if (!strcmp (ix86_asm_string, "att"))
1892 ix86_asm_dialect = ASM_ATT;
1893 else
1894 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1895 }
1896 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1897 error ("code model %qs not supported in the %s bit mode",
1898 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1899 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1900 sorry ("%i-bit mode not compiled in",
1901 (target_flags & MASK_64BIT) ? 64 : 32);
1902
1903 for (i = 0; i < pta_size; i++)
1904 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1905 {
1906 ix86_arch = processor_alias_table[i].processor;
1907 /* Default cpu tuning to the architecture. */
1908 ix86_tune = ix86_arch;
1909 if (processor_alias_table[i].flags & PTA_MMX
1910 && !(target_flags_explicit & MASK_MMX))
1911 target_flags |= MASK_MMX;
1912 if (processor_alias_table[i].flags & PTA_3DNOW
1913 && !(target_flags_explicit & MASK_3DNOW))
1914 target_flags |= MASK_3DNOW;
1915 if (processor_alias_table[i].flags & PTA_3DNOW_A
1916 && !(target_flags_explicit & MASK_3DNOW_A))
1917 target_flags |= MASK_3DNOW_A;
1918 if (processor_alias_table[i].flags & PTA_SSE
1919 && !(target_flags_explicit & MASK_SSE))
1920 target_flags |= MASK_SSE;
1921 if (processor_alias_table[i].flags & PTA_SSE2
1922 && !(target_flags_explicit & MASK_SSE2))
1923 target_flags |= MASK_SSE2;
1924 if (processor_alias_table[i].flags & PTA_SSE3
1925 && !(target_flags_explicit & MASK_SSE3))
1926 target_flags |= MASK_SSE3;
1927 if (processor_alias_table[i].flags & PTA_SSSE3
1928 && !(target_flags_explicit & MASK_SSSE3))
1929 target_flags |= MASK_SSSE3;
1930 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1931 x86_prefetch_sse = true;
1932 if (processor_alias_table[i].flags & PTA_CX16)
1933 x86_cmpxchg16b = true;
1934 if (processor_alias_table[i].flags & PTA_POPCNT
1935 && !(target_flags_explicit & MASK_POPCNT))
1936 target_flags |= MASK_POPCNT;
1937 if (processor_alias_table[i].flags & PTA_ABM
1938 && !(target_flags_explicit & MASK_ABM))
1939 target_flags |= MASK_ABM;
1940 if (processor_alias_table[i].flags & PTA_SSE4A
1941 && !(target_flags_explicit & MASK_SSE4A))
1942 target_flags |= MASK_SSE4A;
1943 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1944 x86_sahf = true;
1945 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1946 error ("CPU you selected does not support x86-64 "
1947 "instruction set");
1948 break;
1949 }
1950
1951 if (i == pta_size)
1952 error ("bad value (%s) for -march= switch", ix86_arch_string);
1953
1954 ix86_arch_mask = 1u << ix86_arch;
1955 for (i = 0; i < X86_ARCH_LAST; ++i)
1956 ix86_arch_features[i] &= ix86_arch_mask;
1957
1958 for (i = 0; i < pta_size; i++)
1959 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1960 {
1961 ix86_tune = processor_alias_table[i].processor;
1962 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1963 {
1964 if (ix86_tune_defaulted)
1965 {
1966 ix86_tune_string = "x86-64";
1967 for (i = 0; i < pta_size; i++)
1968 if (! strcmp (ix86_tune_string,
1969 processor_alias_table[i].name))
1970 break;
1971 ix86_tune = processor_alias_table[i].processor;
1972 }
1973 else
1974 error ("CPU you selected does not support x86-64 "
1975 "instruction set");
1976 }
1977 /* Intel CPUs have always interpreted SSE prefetch instructions as
1978 NOPs; so, we can enable SSE prefetch instructions even when
1979 -mtune (rather than -march) points us to a processor that has them.
1980 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1981 higher processors. */
1982 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1983 x86_prefetch_sse = true;
1984 break;
1985 }
1986 if (i == pta_size)
1987 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1988
1989 ix86_tune_mask = 1u << ix86_tune;
1990 for (i = 0; i < X86_TUNE_LAST; ++i)
1991 ix86_tune_features[i] &= ix86_tune_mask;
1992
1993 if (optimize_size)
1994 ix86_cost = &size_cost;
1995 else
1996 ix86_cost = processor_target_table[ix86_tune].cost;
1997 target_flags |= processor_target_table[ix86_tune].target_enable;
1998 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1999
2000 /* Arrange to set up i386_stack_locals for all functions. */
2001 init_machine_status = ix86_init_machine_status;
2002
2003 /* Validate -mregparm= value. */
2004 if (ix86_regparm_string)
2005 {
2006 if (TARGET_64BIT)
2007 warning (0, "-mregparm is ignored in 64-bit mode");
2008 i = atoi (ix86_regparm_string);
2009 if (i < 0 || i > REGPARM_MAX)
2010 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2011 else
2012 ix86_regparm = i;
2013 }
2014 if (TARGET_64BIT)
2015 ix86_regparm = REGPARM_MAX;
2016
2017 /* If the user has provided any of the -malign-* options,
2018 warn and use that value only if -falign-* is not set.
2019 Remove this code in GCC 3.2 or later. */
2020 if (ix86_align_loops_string)
2021 {
2022 warning (0, "-malign-loops is obsolete, use -falign-loops");
2023 if (align_loops == 0)
2024 {
2025 i = atoi (ix86_align_loops_string);
2026 if (i < 0 || i > MAX_CODE_ALIGN)
2027 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2028 else
2029 align_loops = 1 << i;
2030 }
2031 }
2032
2033 if (ix86_align_jumps_string)
2034 {
2035 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2036 if (align_jumps == 0)
2037 {
2038 i = atoi (ix86_align_jumps_string);
2039 if (i < 0 || i > MAX_CODE_ALIGN)
2040 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2041 else
2042 align_jumps = 1 << i;
2043 }
2044 }
2045
2046 if (ix86_align_funcs_string)
2047 {
2048 warning (0, "-malign-functions is obsolete, use -falign-functions");
2049 if (align_functions == 0)
2050 {
2051 i = atoi (ix86_align_funcs_string);
2052 if (i < 0 || i > MAX_CODE_ALIGN)
2053 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2054 else
2055 align_functions = 1 << i;
2056 }
2057 }
2058
2059 /* Default align_* from the processor table. */
2060 if (align_loops == 0)
2061 {
2062 align_loops = processor_target_table[ix86_tune].align_loop;
2063 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2064 }
2065 if (align_jumps == 0)
2066 {
2067 align_jumps = processor_target_table[ix86_tune].align_jump;
2068 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2069 }
2070 if (align_functions == 0)
2071 {
2072 align_functions = processor_target_table[ix86_tune].align_func;
2073 }
2074
2075 /* Validate -mbranch-cost= value, or provide default. */
2076 ix86_branch_cost = ix86_cost->branch_cost;
2077 if (ix86_branch_cost_string)
2078 {
2079 i = atoi (ix86_branch_cost_string);
2080 if (i < 0 || i > 5)
2081 error ("-mbranch-cost=%d is not between 0 and 5", i);
2082 else
2083 ix86_branch_cost = i;
2084 }
2085 if (ix86_section_threshold_string)
2086 {
2087 i = atoi (ix86_section_threshold_string);
2088 if (i < 0)
2089 error ("-mlarge-data-threshold=%d is negative", i);
2090 else
2091 ix86_section_threshold = i;
2092 }
2093
2094 if (ix86_tls_dialect_string)
2095 {
2096 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2097 ix86_tls_dialect = TLS_DIALECT_GNU;
2098 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2099 ix86_tls_dialect = TLS_DIALECT_GNU2;
2100 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2101 ix86_tls_dialect = TLS_DIALECT_SUN;
2102 else
2103 error ("bad value (%s) for -mtls-dialect= switch",
2104 ix86_tls_dialect_string);
2105 }
2106
2107 if (ix87_precision_string)
2108 {
2109 i = atoi (ix87_precision_string);
2110 if (i != 32 && i != 64 && i != 80)
2111 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2112 }
2113
2114 /* Keep nonleaf frame pointers. */
2115 if (flag_omit_frame_pointer)
2116 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2117 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2118 flag_omit_frame_pointer = 1;
2119
2120 /* If we're doing fast math, we don't care about comparison order
2121 wrt NaNs. This lets us use a shorter comparison sequence. */
2122 if (flag_finite_math_only)
2123 target_flags &= ~MASK_IEEE_FP;
2124
2125 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2126 since the insns won't need emulation. */
2127 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2128 target_flags &= ~MASK_NO_FANCY_MATH_387;
2129
2130 /* Likewise, if the target doesn't have a 387, or we've specified
2131 software floating point, don't use 387 inline intrinsics. */
2132 if (!TARGET_80387)
2133 target_flags |= MASK_NO_FANCY_MATH_387;
2134
2135 /* Turn on SSE3 builtins for -mssse3. */
2136 if (TARGET_SSSE3)
2137 target_flags |= MASK_SSE3;
2138
2139 /* Turn on SSE3 builtins for -msse4a. */
2140 if (TARGET_SSE4A)
2141 target_flags |= MASK_SSE3;
2142
2143 /* Turn on SSE2 builtins for -msse3. */
2144 if (TARGET_SSE3)
2145 target_flags |= MASK_SSE2;
2146
2147 /* Turn on SSE builtins for -msse2. */
2148 if (TARGET_SSE2)
2149 target_flags |= MASK_SSE;
2150
2151 /* Turn on MMX builtins for -msse. */
2152 if (TARGET_SSE)
2153 {
2154 target_flags |= MASK_MMX & ~target_flags_explicit;
2155 x86_prefetch_sse = true;
2156 }
2157
2158 /* Turn on MMX builtins for 3Dnow. */
2159 if (TARGET_3DNOW)
2160 target_flags |= MASK_MMX;
2161
2162 /* Turn on POPCNT builtins for -mabm. */
2163 if (TARGET_ABM)
2164 target_flags |= MASK_POPCNT;
2165
2166 if (TARGET_64BIT)
2167 {
2168 if (TARGET_RTD)
2169 warning (0, "-mrtd is ignored in 64bit mode");
2170
2171 /* Enable by default the SSE and MMX builtins. Do allow the user to
2172 explicitly disable any of these. In particular, disabling SSE and
2173 MMX for kernel code is extremely useful. */
2174 target_flags
2175 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | TARGET_SUBTARGET64_DEFAULT)
2176 & ~target_flags_explicit);
2177 }
2178 else
2179 {
2180 /* i386 ABI does not specify red zone. It still makes sense to use it
2181 when programmer takes care to stack from being destroyed. */
2182 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2183 target_flags |= MASK_NO_RED_ZONE;
2184 }
2185
2186 /* Validate -mpreferred-stack-boundary= value, or provide default.
2187 The default of 128 bits is for Pentium III's SSE __m128. We can't
2188 change it because of optimize_size. Otherwise, we can't mix object
2189 files compiled with -Os and -On. */
2190 ix86_preferred_stack_boundary = 128;
2191 if (ix86_preferred_stack_boundary_string)
2192 {
2193 i = atoi (ix86_preferred_stack_boundary_string);
2194 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2195 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2196 TARGET_64BIT ? 4 : 2);
2197 else
2198 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2199 }
2200
2201 /* Accept -msseregparm only if at least SSE support is enabled. */
2202 if (TARGET_SSEREGPARM
2203 && ! TARGET_SSE)
2204 error ("-msseregparm used without SSE enabled");
2205
2206 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2207 if (ix86_fpmath_string != 0)
2208 {
2209 if (! strcmp (ix86_fpmath_string, "387"))
2210 ix86_fpmath = FPMATH_387;
2211 else if (! strcmp (ix86_fpmath_string, "sse"))
2212 {
2213 if (!TARGET_SSE)
2214 {
2215 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2216 ix86_fpmath = FPMATH_387;
2217 }
2218 else
2219 ix86_fpmath = FPMATH_SSE;
2220 }
2221 else if (! strcmp (ix86_fpmath_string, "387,sse")
2222 || ! strcmp (ix86_fpmath_string, "sse,387"))
2223 {
2224 if (!TARGET_SSE)
2225 {
2226 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2227 ix86_fpmath = FPMATH_387;
2228 }
2229 else if (!TARGET_80387)
2230 {
2231 warning (0, "387 instruction set disabled, using SSE arithmetics");
2232 ix86_fpmath = FPMATH_SSE;
2233 }
2234 else
2235 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2236 }
2237 else
2238 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2239 }
2240
2241 /* If the i387 is disabled, then do not return values in it. */
2242 if (!TARGET_80387)
2243 target_flags &= ~MASK_FLOAT_RETURNS;
2244
2245 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2246 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2247 && !optimize_size)
2248 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2249
2250 /* ??? Unwind info is not correct around the CFG unless either a frame
2251 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2252 unwind info generation to be aware of the CFG and propagating states
2253 around edges. */
2254 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2255 || flag_exceptions || flag_non_call_exceptions)
2256 && flag_omit_frame_pointer
2257 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2258 {
2259 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2260 warning (0, "unwind tables currently require either a frame pointer "
2261 "or -maccumulate-outgoing-args for correctness");
2262 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2263 }
2264
2265 /* For sane SSE instruction set generation we need fcomi instruction.
2266 It is safe to enable all CMOVE instructions. */
2267 if (TARGET_SSE)
2268 TARGET_CMOVE = 1;
2269
2270 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2271 {
2272 char *p;
2273 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2274 p = strchr (internal_label_prefix, 'X');
2275 internal_label_prefix_len = p - internal_label_prefix;
2276 *p = '\0';
2277 }
2278
2279 /* When scheduling description is not available, disable scheduler pass
2280 so it won't slow down the compilation and make x87 code slower. */
2281 if (!TARGET_SCHEDULE)
2282 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2283
2284 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2285 set_param_value ("simultaneous-prefetches",
2286 ix86_cost->simultaneous_prefetches);
2287 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2288 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2289 }
2290 \f
2291 /* Return true if this goes in large data/bss. */
2292
2293 static bool
2294 ix86_in_large_data_p (tree exp)
2295 {
2296 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2297 return false;
2298
2299 /* Functions are never large data. */
2300 if (TREE_CODE (exp) == FUNCTION_DECL)
2301 return false;
2302
2303 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2304 {
2305 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2306 if (strcmp (section, ".ldata") == 0
2307 || strcmp (section, ".lbss") == 0)
2308 return true;
2309 return false;
2310 }
2311 else
2312 {
2313 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2314
2315 /* If this is an incomplete type with size 0, then we can't put it
2316 in data because it might be too big when completed. */
2317 if (!size || size > ix86_section_threshold)
2318 return true;
2319 }
2320
2321 return false;
2322 }
2323
2324 /* Switch to the appropriate section for output of DECL.
2325 DECL is either a `VAR_DECL' node or a constant of some sort.
2326 RELOC indicates whether forming the initial value of DECL requires
2327 link-time relocations. */
2328
2329 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2330 ATTRIBUTE_UNUSED;
2331
2332 static section *
2333 x86_64_elf_select_section (tree decl, int reloc,
2334 unsigned HOST_WIDE_INT align)
2335 {
2336 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2337 && ix86_in_large_data_p (decl))
2338 {
2339 const char *sname = NULL;
2340 unsigned int flags = SECTION_WRITE;
2341 switch (categorize_decl_for_section (decl, reloc))
2342 {
2343 case SECCAT_DATA:
2344 sname = ".ldata";
2345 break;
2346 case SECCAT_DATA_REL:
2347 sname = ".ldata.rel";
2348 break;
2349 case SECCAT_DATA_REL_LOCAL:
2350 sname = ".ldata.rel.local";
2351 break;
2352 case SECCAT_DATA_REL_RO:
2353 sname = ".ldata.rel.ro";
2354 break;
2355 case SECCAT_DATA_REL_RO_LOCAL:
2356 sname = ".ldata.rel.ro.local";
2357 break;
2358 case SECCAT_BSS:
2359 sname = ".lbss";
2360 flags |= SECTION_BSS;
2361 break;
2362 case SECCAT_RODATA:
2363 case SECCAT_RODATA_MERGE_STR:
2364 case SECCAT_RODATA_MERGE_STR_INIT:
2365 case SECCAT_RODATA_MERGE_CONST:
2366 sname = ".lrodata";
2367 flags = 0;
2368 break;
2369 case SECCAT_SRODATA:
2370 case SECCAT_SDATA:
2371 case SECCAT_SBSS:
2372 gcc_unreachable ();
2373 case SECCAT_TEXT:
2374 case SECCAT_TDATA:
2375 case SECCAT_TBSS:
2376 /* We don't split these for medium model. Place them into
2377 default sections and hope for best. */
2378 break;
2379 }
2380 if (sname)
2381 {
2382 /* We might get called with string constants, but get_named_section
2383 doesn't like them as they are not DECLs. Also, we need to set
2384 flags in that case. */
2385 if (!DECL_P (decl))
2386 return get_section (sname, flags, NULL);
2387 return get_named_section (decl, sname, reloc);
2388 }
2389 }
2390 return default_elf_select_section (decl, reloc, align);
2391 }
2392
2393 /* Build up a unique section name, expressed as a
2394 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2395 RELOC indicates whether the initial value of EXP requires
2396 link-time relocations. */
2397
2398 static void ATTRIBUTE_UNUSED
2399 x86_64_elf_unique_section (tree decl, int reloc)
2400 {
2401 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2402 && ix86_in_large_data_p (decl))
2403 {
2404 const char *prefix = NULL;
2405 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2406 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2407
2408 switch (categorize_decl_for_section (decl, reloc))
2409 {
2410 case SECCAT_DATA:
2411 case SECCAT_DATA_REL:
2412 case SECCAT_DATA_REL_LOCAL:
2413 case SECCAT_DATA_REL_RO:
2414 case SECCAT_DATA_REL_RO_LOCAL:
2415 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2416 break;
2417 case SECCAT_BSS:
2418 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2419 break;
2420 case SECCAT_RODATA:
2421 case SECCAT_RODATA_MERGE_STR:
2422 case SECCAT_RODATA_MERGE_STR_INIT:
2423 case SECCAT_RODATA_MERGE_CONST:
2424 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2425 break;
2426 case SECCAT_SRODATA:
2427 case SECCAT_SDATA:
2428 case SECCAT_SBSS:
2429 gcc_unreachable ();
2430 case SECCAT_TEXT:
2431 case SECCAT_TDATA:
2432 case SECCAT_TBSS:
2433 /* We don't split these for medium model. Place them into
2434 default sections and hope for best. */
2435 break;
2436 }
2437 if (prefix)
2438 {
2439 const char *name;
2440 size_t nlen, plen;
2441 char *string;
2442 plen = strlen (prefix);
2443
2444 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2445 name = targetm.strip_name_encoding (name);
2446 nlen = strlen (name);
2447
2448 string = alloca (nlen + plen + 1);
2449 memcpy (string, prefix, plen);
2450 memcpy (string + plen, name, nlen + 1);
2451
2452 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2453 return;
2454 }
2455 }
2456 default_unique_section (decl, reloc);
2457 }
2458
2459 #ifdef COMMON_ASM_OP
2460 /* This says how to output assembler code to declare an
2461 uninitialized external linkage data object.
2462
2463 For medium model x86-64 we need to use .largecomm opcode for
2464 large objects. */
2465 void
2466 x86_elf_aligned_common (FILE *file,
2467 const char *name, unsigned HOST_WIDE_INT size,
2468 int align)
2469 {
2470 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2471 && size > (unsigned int)ix86_section_threshold)
2472 fprintf (file, ".largecomm\t");
2473 else
2474 fprintf (file, "%s", COMMON_ASM_OP);
2475 assemble_name (file, name);
2476 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2477 size, align / BITS_PER_UNIT);
2478 }
2479 #endif
2480
2481 /* Utility function for targets to use in implementing
2482 ASM_OUTPUT_ALIGNED_BSS. */
2483
2484 void
2485 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2486 const char *name, unsigned HOST_WIDE_INT size,
2487 int align)
2488 {
2489 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2490 && size > (unsigned int)ix86_section_threshold)
2491 switch_to_section (get_named_section (decl, ".lbss", 0));
2492 else
2493 switch_to_section (bss_section);
2494 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2495 #ifdef ASM_DECLARE_OBJECT_NAME
2496 last_assemble_variable_decl = decl;
2497 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2498 #else
2499 /* Standard thing is just output label for the object. */
2500 ASM_OUTPUT_LABEL (file, name);
2501 #endif /* ASM_DECLARE_OBJECT_NAME */
2502 ASM_OUTPUT_SKIP (file, size ? size : 1);
2503 }
2504 \f
2505 void
2506 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2507 {
2508 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2509 make the problem with not enough registers even worse. */
2510 #ifdef INSN_SCHEDULING
2511 if (level > 1)
2512 flag_schedule_insns = 0;
2513 #endif
2514
2515 if (TARGET_MACHO)
2516 /* The Darwin libraries never set errno, so we might as well
2517 avoid calling them when that's the only reason we would. */
2518 flag_errno_math = 0;
2519
2520 /* The default values of these switches depend on the TARGET_64BIT
2521 that is not known at this moment. Mark these values with 2 and
2522 let user the to override these. In case there is no command line option
2523 specifying them, we will set the defaults in override_options. */
2524 if (optimize >= 1)
2525 flag_omit_frame_pointer = 2;
2526 flag_pcc_struct_return = 2;
2527 flag_asynchronous_unwind_tables = 2;
2528 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2529 SUBTARGET_OPTIMIZATION_OPTIONS;
2530 #endif
2531 }
2532 \f
2533 /* Decide whether we can make a sibling call to a function. DECL is the
2534 declaration of the function being targeted by the call and EXP is the
2535 CALL_EXPR representing the call. */
2536
2537 static bool
2538 ix86_function_ok_for_sibcall (tree decl, tree exp)
2539 {
2540 tree func;
2541 rtx a, b;
2542
2543 /* If we are generating position-independent code, we cannot sibcall
2544 optimize any indirect call, or a direct call to a global function,
2545 as the PLT requires %ebx be live. */
2546 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2547 return false;
2548
2549 if (decl)
2550 func = decl;
2551 else
2552 {
2553 func = TREE_TYPE (CALL_EXPR_FN (exp));
2554 if (POINTER_TYPE_P (func))
2555 func = TREE_TYPE (func);
2556 }
2557
2558 /* Check that the return value locations are the same. Like
2559 if we are returning floats on the 80387 register stack, we cannot
2560 make a sibcall from a function that doesn't return a float to a
2561 function that does or, conversely, from a function that does return
2562 a float to a function that doesn't; the necessary stack adjustment
2563 would not be executed. This is also the place we notice
2564 differences in the return value ABI. Note that it is ok for one
2565 of the functions to have void return type as long as the return
2566 value of the other is passed in a register. */
2567 a = ix86_function_value (TREE_TYPE (exp), func, false);
2568 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2569 cfun->decl, false);
2570 if (STACK_REG_P (a) || STACK_REG_P (b))
2571 {
2572 if (!rtx_equal_p (a, b))
2573 return false;
2574 }
2575 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2576 ;
2577 else if (!rtx_equal_p (a, b))
2578 return false;
2579
2580 /* If this call is indirect, we'll need to be able to use a call-clobbered
2581 register for the address of the target function. Make sure that all
2582 such registers are not used for passing parameters. */
2583 if (!decl && !TARGET_64BIT)
2584 {
2585 tree type;
2586
2587 /* We're looking at the CALL_EXPR, we need the type of the function. */
2588 type = CALL_EXPR_FN (exp); /* pointer expression */
2589 type = TREE_TYPE (type); /* pointer type */
2590 type = TREE_TYPE (type); /* function type */
2591
2592 if (ix86_function_regparm (type, NULL) >= 3)
2593 {
2594 /* ??? Need to count the actual number of registers to be used,
2595 not the possible number of registers. Fix later. */
2596 return false;
2597 }
2598 }
2599
2600 /* Dllimport'd functions are also called indirectly. */
2601 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2602 && decl && DECL_DLLIMPORT_P (decl)
2603 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2604 return false;
2605
2606 /* If we forced aligned the stack, then sibcalling would unalign the
2607 stack, which may break the called function. */
2608 if (cfun->machine->force_align_arg_pointer)
2609 return false;
2610
2611 /* Otherwise okay. That also includes certain types of indirect calls. */
2612 return true;
2613 }
2614
2615 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2616 calling convention attributes;
2617 arguments as in struct attribute_spec.handler. */
2618
2619 static tree
2620 ix86_handle_cconv_attribute (tree *node, tree name,
2621 tree args,
2622 int flags ATTRIBUTE_UNUSED,
2623 bool *no_add_attrs)
2624 {
2625 if (TREE_CODE (*node) != FUNCTION_TYPE
2626 && TREE_CODE (*node) != METHOD_TYPE
2627 && TREE_CODE (*node) != FIELD_DECL
2628 && TREE_CODE (*node) != TYPE_DECL)
2629 {
2630 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2631 IDENTIFIER_POINTER (name));
2632 *no_add_attrs = true;
2633 return NULL_TREE;
2634 }
2635
2636 /* Can combine regparm with all attributes but fastcall. */
2637 if (is_attribute_p ("regparm", name))
2638 {
2639 tree cst;
2640
2641 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2642 {
2643 error ("fastcall and regparm attributes are not compatible");
2644 }
2645
2646 cst = TREE_VALUE (args);
2647 if (TREE_CODE (cst) != INTEGER_CST)
2648 {
2649 warning (OPT_Wattributes,
2650 "%qs attribute requires an integer constant argument",
2651 IDENTIFIER_POINTER (name));
2652 *no_add_attrs = true;
2653 }
2654 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2655 {
2656 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2657 IDENTIFIER_POINTER (name), REGPARM_MAX);
2658 *no_add_attrs = true;
2659 }
2660
2661 if (!TARGET_64BIT
2662 && lookup_attribute (ix86_force_align_arg_pointer_string,
2663 TYPE_ATTRIBUTES (*node))
2664 && compare_tree_int (cst, REGPARM_MAX-1))
2665 {
2666 error ("%s functions limited to %d register parameters",
2667 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2668 }
2669
2670 return NULL_TREE;
2671 }
2672
2673 if (TARGET_64BIT)
2674 {
2675 /* Do not warn when emulating the MS ABI. */
2676 if (!TARGET_64BIT_MS_ABI)
2677 warning (OPT_Wattributes, "%qs attribute ignored",
2678 IDENTIFIER_POINTER (name));
2679 *no_add_attrs = true;
2680 return NULL_TREE;
2681 }
2682
2683 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2684 if (is_attribute_p ("fastcall", name))
2685 {
2686 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2687 {
2688 error ("fastcall and cdecl attributes are not compatible");
2689 }
2690 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2691 {
2692 error ("fastcall and stdcall attributes are not compatible");
2693 }
2694 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2695 {
2696 error ("fastcall and regparm attributes are not compatible");
2697 }
2698 }
2699
2700 /* Can combine stdcall with fastcall (redundant), regparm and
2701 sseregparm. */
2702 else if (is_attribute_p ("stdcall", name))
2703 {
2704 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2705 {
2706 error ("stdcall and cdecl attributes are not compatible");
2707 }
2708 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2709 {
2710 error ("stdcall and fastcall attributes are not compatible");
2711 }
2712 }
2713
2714 /* Can combine cdecl with regparm and sseregparm. */
2715 else if (is_attribute_p ("cdecl", name))
2716 {
2717 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2718 {
2719 error ("stdcall and cdecl attributes are not compatible");
2720 }
2721 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2722 {
2723 error ("fastcall and cdecl attributes are not compatible");
2724 }
2725 }
2726
2727 /* Can combine sseregparm with all attributes. */
2728
2729 return NULL_TREE;
2730 }
2731
2732 /* Return 0 if the attributes for two types are incompatible, 1 if they
2733 are compatible, and 2 if they are nearly compatible (which causes a
2734 warning to be generated). */
2735
2736 static int
2737 ix86_comp_type_attributes (tree type1, tree type2)
2738 {
2739 /* Check for mismatch of non-default calling convention. */
2740 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2741
2742 if (TREE_CODE (type1) != FUNCTION_TYPE)
2743 return 1;
2744
2745 /* Check for mismatched fastcall/regparm types. */
2746 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2747 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2748 || (ix86_function_regparm (type1, NULL)
2749 != ix86_function_regparm (type2, NULL)))
2750 return 0;
2751
2752 /* Check for mismatched sseregparm types. */
2753 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2754 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2755 return 0;
2756
2757 /* Check for mismatched return types (cdecl vs stdcall). */
2758 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2759 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2760 return 0;
2761
2762 return 1;
2763 }
2764 \f
2765 /* Return the regparm value for a function with the indicated TYPE and DECL.
2766 DECL may be NULL when calling function indirectly
2767 or considering a libcall. */
2768
2769 static int
2770 ix86_function_regparm (tree type, tree decl)
2771 {
2772 tree attr;
2773 int regparm = ix86_regparm;
2774
2775 if (TARGET_64BIT)
2776 return regparm;
2777
2778 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2779 if (attr)
2780 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2781
2782 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2783 return 2;
2784
2785 /* Use register calling convention for local functions when possible. */
2786 if (decl && flag_unit_at_a_time && !profile_flag)
2787 {
2788 struct cgraph_local_info *i = cgraph_local_info (decl);
2789 if (i && i->local)
2790 {
2791 int local_regparm, globals = 0, regno;
2792 struct function *f;
2793
2794 /* Make sure no regparm register is taken by a
2795 global register variable. */
2796 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2797 if (global_regs[local_regparm])
2798 break;
2799
2800 /* We can't use regparm(3) for nested functions as these use
2801 static chain pointer in third argument. */
2802 if (local_regparm == 3
2803 && decl_function_context (decl)
2804 && !DECL_NO_STATIC_CHAIN (decl))
2805 local_regparm = 2;
2806
2807 /* If the function realigns its stackpointer, the prologue will
2808 clobber %ecx. If we've already generated code for the callee,
2809 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2810 scanning the attributes for the self-realigning property. */
2811 f = DECL_STRUCT_FUNCTION (decl);
2812 if (local_regparm == 3
2813 && (f ? !!f->machine->force_align_arg_pointer
2814 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2815 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2816 local_regparm = 2;
2817
2818 /* Each global register variable increases register preassure,
2819 so the more global reg vars there are, the smaller regparm
2820 optimization use, unless requested by the user explicitly. */
2821 for (regno = 0; regno < 6; regno++)
2822 if (global_regs[regno])
2823 globals++;
2824 local_regparm
2825 = globals < local_regparm ? local_regparm - globals : 0;
2826
2827 if (local_regparm > regparm)
2828 regparm = local_regparm;
2829 }
2830 }
2831
2832 return regparm;
2833 }
2834
2835 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2836 DFmode (2) arguments in SSE registers for a function with the
2837 indicated TYPE and DECL. DECL may be NULL when calling function
2838 indirectly or considering a libcall. Otherwise return 0. */
2839
2840 static int
2841 ix86_function_sseregparm (tree type, tree decl)
2842 {
2843 gcc_assert (!TARGET_64BIT);
2844
2845 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2846 by the sseregparm attribute. */
2847 if (TARGET_SSEREGPARM
2848 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2849 {
2850 if (!TARGET_SSE)
2851 {
2852 if (decl)
2853 error ("Calling %qD with attribute sseregparm without "
2854 "SSE/SSE2 enabled", decl);
2855 else
2856 error ("Calling %qT with attribute sseregparm without "
2857 "SSE/SSE2 enabled", type);
2858 return 0;
2859 }
2860
2861 return 2;
2862 }
2863
2864 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2865 (and DFmode for SSE2) arguments in SSE registers. */
2866 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2867 {
2868 struct cgraph_local_info *i = cgraph_local_info (decl);
2869 if (i && i->local)
2870 return TARGET_SSE2 ? 2 : 1;
2871 }
2872
2873 return 0;
2874 }
2875
2876 /* Return true if EAX is live at the start of the function. Used by
2877 ix86_expand_prologue to determine if we need special help before
2878 calling allocate_stack_worker. */
2879
2880 static bool
2881 ix86_eax_live_at_start_p (void)
2882 {
2883 /* Cheat. Don't bother working forward from ix86_function_regparm
2884 to the function type to whether an actual argument is located in
2885 eax. Instead just look at cfg info, which is still close enough
2886 to correct at this point. This gives false positives for broken
2887 functions that might use uninitialized data that happens to be
2888 allocated in eax, but who cares? */
2889 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2890 }
2891
2892 /* Return true if TYPE has a variable argument list. */
2893
2894 static bool
2895 type_has_variadic_args_p (tree type)
2896 {
2897 tree n, t = TYPE_ARG_TYPES (type);
2898
2899 if (t == NULL)
2900 return false;
2901
2902 while ((n = TREE_CHAIN (t)) != NULL)
2903 t = n;
2904
2905 return TREE_VALUE (t) != void_type_node;
2906 }
2907
2908 /* Value is the number of bytes of arguments automatically
2909 popped when returning from a subroutine call.
2910 FUNDECL is the declaration node of the function (as a tree),
2911 FUNTYPE is the data type of the function (as a tree),
2912 or for a library call it is an identifier node for the subroutine name.
2913 SIZE is the number of bytes of arguments passed on the stack.
2914
2915 On the 80386, the RTD insn may be used to pop them if the number
2916 of args is fixed, but if the number is variable then the caller
2917 must pop them all. RTD can't be used for library calls now
2918 because the library is compiled with the Unix compiler.
2919 Use of RTD is a selectable option, since it is incompatible with
2920 standard Unix calling sequences. If the option is not selected,
2921 the caller must always pop the args.
2922
2923 The attribute stdcall is equivalent to RTD on a per module basis. */
2924
2925 int
2926 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2927 {
2928 int rtd;
2929
2930 /* None of the 64-bit ABIs pop arguments. */
2931 if (TARGET_64BIT)
2932 return 0;
2933
2934 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2935
2936 /* Cdecl functions override -mrtd, and never pop the stack. */
2937 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2938 {
2939 /* Stdcall and fastcall functions will pop the stack if not
2940 variable args. */
2941 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2942 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2943 rtd = 1;
2944
2945 if (rtd && ! type_has_variadic_args_p (funtype))
2946 return size;
2947 }
2948
2949 /* Lose any fake structure return argument if it is passed on the stack. */
2950 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2951 && !KEEP_AGGREGATE_RETURN_POINTER)
2952 {
2953 int nregs = ix86_function_regparm (funtype, fundecl);
2954 if (nregs == 0)
2955 return GET_MODE_SIZE (Pmode);
2956 }
2957
2958 return 0;
2959 }
2960 \f
2961 /* Argument support functions. */
2962
2963 /* Return true when register may be used to pass function parameters. */
2964 bool
2965 ix86_function_arg_regno_p (int regno)
2966 {
2967 int i;
2968 const int *parm_regs;
2969
2970 if (!TARGET_64BIT)
2971 {
2972 if (TARGET_MACHO)
2973 return (regno < REGPARM_MAX
2974 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2975 else
2976 return (regno < REGPARM_MAX
2977 || (TARGET_MMX && MMX_REGNO_P (regno)
2978 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2979 || (TARGET_SSE && SSE_REGNO_P (regno)
2980 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2981 }
2982
2983 if (TARGET_MACHO)
2984 {
2985 if (SSE_REGNO_P (regno) && TARGET_SSE)
2986 return true;
2987 }
2988 else
2989 {
2990 if (TARGET_SSE && SSE_REGNO_P (regno)
2991 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2992 return true;
2993 }
2994
2995 /* RAX is used as hidden argument to va_arg functions. */
2996 if (!TARGET_64BIT_MS_ABI && regno == 0)
2997 return true;
2998
2999 if (TARGET_64BIT_MS_ABI)
3000 parm_regs = x86_64_ms_abi_int_parameter_registers;
3001 else
3002 parm_regs = x86_64_int_parameter_registers;
3003 for (i = 0; i < REGPARM_MAX; i++)
3004 if (regno == parm_regs[i])
3005 return true;
3006 return false;
3007 }
3008
3009 /* Return if we do not know how to pass TYPE solely in registers. */
3010
3011 static bool
3012 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3013 {
3014 if (must_pass_in_stack_var_size_or_pad (mode, type))
3015 return true;
3016
3017 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3018 The layout_type routine is crafty and tries to trick us into passing
3019 currently unsupported vector types on the stack by using TImode. */
3020 return (!TARGET_64BIT && mode == TImode
3021 && type && TREE_CODE (type) != VECTOR_TYPE);
3022 }
3023
3024 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3025 for a call to a function whose data type is FNTYPE.
3026 For a library call, FNTYPE is 0. */
3027
3028 void
3029 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3030 tree fntype, /* tree ptr for function decl */
3031 rtx libname, /* SYMBOL_REF of library name or 0 */
3032 tree fndecl)
3033 {
3034 memset (cum, 0, sizeof (*cum));
3035
3036 /* Set up the number of registers to use for passing arguments. */
3037 cum->nregs = ix86_regparm;
3038 if (TARGET_SSE)
3039 cum->sse_nregs = SSE_REGPARM_MAX;
3040 if (TARGET_MMX)
3041 cum->mmx_nregs = MMX_REGPARM_MAX;
3042 cum->warn_sse = true;
3043 cum->warn_mmx = true;
3044 cum->maybe_vaarg = (fntype ? type_has_variadic_args_p (fntype) : !libname);
3045
3046 if (!TARGET_64BIT)
3047 {
3048 /* If there are variable arguments, then we won't pass anything
3049 in registers in 32-bit mode. */
3050 if (cum->maybe_vaarg)
3051 {
3052 cum->nregs = 0;
3053 cum->sse_nregs = 0;
3054 cum->mmx_nregs = 0;
3055 cum->warn_sse = 0;
3056 cum->warn_mmx = 0;
3057 return;
3058 }
3059
3060 /* Use ecx and edx registers if function has fastcall attribute,
3061 else look for regparm information. */
3062 if (fntype)
3063 {
3064 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3065 {
3066 cum->nregs = 2;
3067 cum->fastcall = 1;
3068 }
3069 else
3070 cum->nregs = ix86_function_regparm (fntype, fndecl);
3071 }
3072
3073 /* Set up the number of SSE registers used for passing SFmode
3074 and DFmode arguments. Warn for mismatching ABI. */
3075 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3076 }
3077 }
3078
3079 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3080 But in the case of vector types, it is some vector mode.
3081
3082 When we have only some of our vector isa extensions enabled, then there
3083 are some modes for which vector_mode_supported_p is false. For these
3084 modes, the generic vector support in gcc will choose some non-vector mode
3085 in order to implement the type. By computing the natural mode, we'll
3086 select the proper ABI location for the operand and not depend on whatever
3087 the middle-end decides to do with these vector types. */
3088
3089 static enum machine_mode
3090 type_natural_mode (tree type)
3091 {
3092 enum machine_mode mode = TYPE_MODE (type);
3093
3094 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3095 {
3096 HOST_WIDE_INT size = int_size_in_bytes (type);
3097 if ((size == 8 || size == 16)
3098 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3099 && TYPE_VECTOR_SUBPARTS (type) > 1)
3100 {
3101 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3102
3103 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3104 mode = MIN_MODE_VECTOR_FLOAT;
3105 else
3106 mode = MIN_MODE_VECTOR_INT;
3107
3108 /* Get the mode which has this inner mode and number of units. */
3109 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3110 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3111 && GET_MODE_INNER (mode) == innermode)
3112 return mode;
3113
3114 gcc_unreachable ();
3115 }
3116 }
3117
3118 return mode;
3119 }
3120
3121 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3122 this may not agree with the mode that the type system has chosen for the
3123 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3124 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3125
3126 static rtx
3127 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3128 unsigned int regno)
3129 {
3130 rtx tmp;
3131
3132 if (orig_mode != BLKmode)
3133 tmp = gen_rtx_REG (orig_mode, regno);
3134 else
3135 {
3136 tmp = gen_rtx_REG (mode, regno);
3137 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3138 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3139 }
3140
3141 return tmp;
3142 }
3143
3144 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3145 of this code is to classify each 8bytes of incoming argument by the register
3146 class and assign registers accordingly. */
3147
3148 /* Return the union class of CLASS1 and CLASS2.
3149 See the x86-64 PS ABI for details. */
3150
3151 static enum x86_64_reg_class
3152 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3153 {
3154 /* Rule #1: If both classes are equal, this is the resulting class. */
3155 if (class1 == class2)
3156 return class1;
3157
3158 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3159 the other class. */
3160 if (class1 == X86_64_NO_CLASS)
3161 return class2;
3162 if (class2 == X86_64_NO_CLASS)
3163 return class1;
3164
3165 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3166 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3167 return X86_64_MEMORY_CLASS;
3168
3169 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3170 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3171 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3172 return X86_64_INTEGERSI_CLASS;
3173 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3174 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3175 return X86_64_INTEGER_CLASS;
3176
3177 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3178 MEMORY is used. */
3179 if (class1 == X86_64_X87_CLASS
3180 || class1 == X86_64_X87UP_CLASS
3181 || class1 == X86_64_COMPLEX_X87_CLASS
3182 || class2 == X86_64_X87_CLASS
3183 || class2 == X86_64_X87UP_CLASS
3184 || class2 == X86_64_COMPLEX_X87_CLASS)
3185 return X86_64_MEMORY_CLASS;
3186
3187 /* Rule #6: Otherwise class SSE is used. */
3188 return X86_64_SSE_CLASS;
3189 }
3190
3191 /* Classify the argument of type TYPE and mode MODE.
3192 CLASSES will be filled by the register class used to pass each word
3193 of the operand. The number of words is returned. In case the parameter
3194 should be passed in memory, 0 is returned. As a special case for zero
3195 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3196
3197 BIT_OFFSET is used internally for handling records and specifies offset
3198 of the offset in bits modulo 256 to avoid overflow cases.
3199
3200 See the x86-64 PS ABI for details.
3201 */
3202
3203 static int
3204 classify_argument (enum machine_mode mode, tree type,
3205 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3206 {
3207 HOST_WIDE_INT bytes =
3208 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3209 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3210
3211 /* Variable sized entities are always passed/returned in memory. */
3212 if (bytes < 0)
3213 return 0;
3214
3215 if (mode != VOIDmode
3216 && targetm.calls.must_pass_in_stack (mode, type))
3217 return 0;
3218
3219 if (type && AGGREGATE_TYPE_P (type))
3220 {
3221 int i;
3222 tree field;
3223 enum x86_64_reg_class subclasses[MAX_CLASSES];
3224
3225 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3226 if (bytes > 16)
3227 return 0;
3228
3229 for (i = 0; i < words; i++)
3230 classes[i] = X86_64_NO_CLASS;
3231
3232 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3233 signalize memory class, so handle it as special case. */
3234 if (!words)
3235 {
3236 classes[0] = X86_64_NO_CLASS;
3237 return 1;
3238 }
3239
3240 /* Classify each field of record and merge classes. */
3241 switch (TREE_CODE (type))
3242 {
3243 case RECORD_TYPE:
3244 /* And now merge the fields of structure. */
3245 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3246 {
3247 if (TREE_CODE (field) == FIELD_DECL)
3248 {
3249 int num;
3250
3251 if (TREE_TYPE (field) == error_mark_node)
3252 continue;
3253
3254 /* Bitfields are always classified as integer. Handle them
3255 early, since later code would consider them to be
3256 misaligned integers. */
3257 if (DECL_BIT_FIELD (field))
3258 {
3259 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3260 i < ((int_bit_position (field) + (bit_offset % 64))
3261 + tree_low_cst (DECL_SIZE (field), 0)
3262 + 63) / 8 / 8; i++)
3263 classes[i] =
3264 merge_classes (X86_64_INTEGER_CLASS,
3265 classes[i]);
3266 }
3267 else
3268 {
3269 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3270 TREE_TYPE (field), subclasses,
3271 (int_bit_position (field)
3272 + bit_offset) % 256);
3273 if (!num)
3274 return 0;
3275 for (i = 0; i < num; i++)
3276 {
3277 int pos =
3278 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3279 classes[i + pos] =
3280 merge_classes (subclasses[i], classes[i + pos]);
3281 }
3282 }
3283 }
3284 }
3285 break;
3286
3287 case ARRAY_TYPE:
3288 /* Arrays are handled as small records. */
3289 {
3290 int num;
3291 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3292 TREE_TYPE (type), subclasses, bit_offset);
3293 if (!num)
3294 return 0;
3295
3296 /* The partial classes are now full classes. */
3297 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3298 subclasses[0] = X86_64_SSE_CLASS;
3299 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3300 subclasses[0] = X86_64_INTEGER_CLASS;
3301
3302 for (i = 0; i < words; i++)
3303 classes[i] = subclasses[i % num];
3304
3305 break;
3306 }
3307 case UNION_TYPE:
3308 case QUAL_UNION_TYPE:
3309 /* Unions are similar to RECORD_TYPE but offset is always 0.
3310 */
3311 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3312 {
3313 if (TREE_CODE (field) == FIELD_DECL)
3314 {
3315 int num;
3316
3317 if (TREE_TYPE (field) == error_mark_node)
3318 continue;
3319
3320 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3321 TREE_TYPE (field), subclasses,
3322 bit_offset);
3323 if (!num)
3324 return 0;
3325 for (i = 0; i < num; i++)
3326 classes[i] = merge_classes (subclasses[i], classes[i]);
3327 }
3328 }
3329 break;
3330
3331 default:
3332 gcc_unreachable ();
3333 }
3334
3335 /* Final merger cleanup. */
3336 for (i = 0; i < words; i++)
3337 {
3338 /* If one class is MEMORY, everything should be passed in
3339 memory. */
3340 if (classes[i] == X86_64_MEMORY_CLASS)
3341 return 0;
3342
3343 /* The X86_64_SSEUP_CLASS should be always preceded by
3344 X86_64_SSE_CLASS. */
3345 if (classes[i] == X86_64_SSEUP_CLASS
3346 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3347 classes[i] = X86_64_SSE_CLASS;
3348
3349 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3350 if (classes[i] == X86_64_X87UP_CLASS
3351 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3352 classes[i] = X86_64_SSE_CLASS;
3353 }
3354 return words;
3355 }
3356
3357 /* Compute alignment needed. We align all types to natural boundaries with
3358 exception of XFmode that is aligned to 64bits. */
3359 if (mode != VOIDmode && mode != BLKmode)
3360 {
3361 int mode_alignment = GET_MODE_BITSIZE (mode);
3362
3363 if (mode == XFmode)
3364 mode_alignment = 128;
3365 else if (mode == XCmode)
3366 mode_alignment = 256;
3367 if (COMPLEX_MODE_P (mode))
3368 mode_alignment /= 2;
3369 /* Misaligned fields are always returned in memory. */
3370 if (bit_offset % mode_alignment)
3371 return 0;
3372 }
3373
3374 /* for V1xx modes, just use the base mode */
3375 if (VECTOR_MODE_P (mode)
3376 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3377 mode = GET_MODE_INNER (mode);
3378
3379 /* Classification of atomic types. */
3380 switch (mode)
3381 {
3382 case SDmode:
3383 case DDmode:
3384 classes[0] = X86_64_SSE_CLASS;
3385 return 1;
3386 case TDmode:
3387 classes[0] = X86_64_SSE_CLASS;
3388 classes[1] = X86_64_SSEUP_CLASS;
3389 return 2;
3390 case DImode:
3391 case SImode:
3392 case HImode:
3393 case QImode:
3394 case CSImode:
3395 case CHImode:
3396 case CQImode:
3397 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3398 classes[0] = X86_64_INTEGERSI_CLASS;
3399 else
3400 classes[0] = X86_64_INTEGER_CLASS;
3401 return 1;
3402 case CDImode:
3403 case TImode:
3404 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3405 return 2;
3406 case CTImode:
3407 return 0;
3408 case SFmode:
3409 if (!(bit_offset % 64))
3410 classes[0] = X86_64_SSESF_CLASS;
3411 else
3412 classes[0] = X86_64_SSE_CLASS;
3413 return 1;
3414 case DFmode:
3415 classes[0] = X86_64_SSEDF_CLASS;
3416 return 1;
3417 case XFmode:
3418 classes[0] = X86_64_X87_CLASS;
3419 classes[1] = X86_64_X87UP_CLASS;
3420 return 2;
3421 case TFmode:
3422 classes[0] = X86_64_SSE_CLASS;
3423 classes[1] = X86_64_SSEUP_CLASS;
3424 return 2;
3425 case SCmode:
3426 classes[0] = X86_64_SSE_CLASS;
3427 return 1;
3428 case DCmode:
3429 classes[0] = X86_64_SSEDF_CLASS;
3430 classes[1] = X86_64_SSEDF_CLASS;
3431 return 2;
3432 case XCmode:
3433 classes[0] = X86_64_COMPLEX_X87_CLASS;
3434 return 1;
3435 case TCmode:
3436 /* This modes is larger than 16 bytes. */
3437 return 0;
3438 case V4SFmode:
3439 case V4SImode:
3440 case V16QImode:
3441 case V8HImode:
3442 case V2DFmode:
3443 case V2DImode:
3444 classes[0] = X86_64_SSE_CLASS;
3445 classes[1] = X86_64_SSEUP_CLASS;
3446 return 2;
3447 case V2SFmode:
3448 case V2SImode:
3449 case V4HImode:
3450 case V8QImode:
3451 classes[0] = X86_64_SSE_CLASS;
3452 return 1;
3453 case BLKmode:
3454 case VOIDmode:
3455 return 0;
3456 default:
3457 gcc_assert (VECTOR_MODE_P (mode));
3458
3459 if (bytes > 16)
3460 return 0;
3461
3462 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3463
3464 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3465 classes[0] = X86_64_INTEGERSI_CLASS;
3466 else
3467 classes[0] = X86_64_INTEGER_CLASS;
3468 classes[1] = X86_64_INTEGER_CLASS;
3469 return 1 + (bytes > 8);
3470 }
3471 }
3472
3473 /* Examine the argument and return set number of register required in each
3474 class. Return 0 iff parameter should be passed in memory. */
3475 static int
3476 examine_argument (enum machine_mode mode, tree type, int in_return,
3477 int *int_nregs, int *sse_nregs)
3478 {
3479 enum x86_64_reg_class class[MAX_CLASSES];
3480 int n = classify_argument (mode, type, class, 0);
3481
3482 *int_nregs = 0;
3483 *sse_nregs = 0;
3484 if (!n)
3485 return 0;
3486 for (n--; n >= 0; n--)
3487 switch (class[n])
3488 {
3489 case X86_64_INTEGER_CLASS:
3490 case X86_64_INTEGERSI_CLASS:
3491 (*int_nregs)++;
3492 break;
3493 case X86_64_SSE_CLASS:
3494 case X86_64_SSESF_CLASS:
3495 case X86_64_SSEDF_CLASS:
3496 (*sse_nregs)++;
3497 break;
3498 case X86_64_NO_CLASS:
3499 case X86_64_SSEUP_CLASS:
3500 break;
3501 case X86_64_X87_CLASS:
3502 case X86_64_X87UP_CLASS:
3503 if (!in_return)
3504 return 0;
3505 break;
3506 case X86_64_COMPLEX_X87_CLASS:
3507 return in_return ? 2 : 0;
3508 case X86_64_MEMORY_CLASS:
3509 gcc_unreachable ();
3510 }
3511 return 1;
3512 }
3513
3514 /* Construct container for the argument used by GCC interface. See
3515 FUNCTION_ARG for the detailed description. */
3516
3517 static rtx
3518 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3519 tree type, int in_return, int nintregs, int nsseregs,
3520 const int *intreg, int sse_regno)
3521 {
3522 /* The following variables hold the static issued_error state. */
3523 static bool issued_sse_arg_error;
3524 static bool issued_sse_ret_error;
3525 static bool issued_x87_ret_error;
3526
3527 enum machine_mode tmpmode;
3528 int bytes =
3529 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3530 enum x86_64_reg_class class[MAX_CLASSES];
3531 int n;
3532 int i;
3533 int nexps = 0;
3534 int needed_sseregs, needed_intregs;
3535 rtx exp[MAX_CLASSES];
3536 rtx ret;
3537
3538 n = classify_argument (mode, type, class, 0);
3539 if (!n)
3540 return NULL;
3541 if (!examine_argument (mode, type, in_return, &needed_intregs,
3542 &needed_sseregs))
3543 return NULL;
3544 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3545 return NULL;
3546
3547 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3548 some less clueful developer tries to use floating-point anyway. */
3549 if (needed_sseregs && !TARGET_SSE)
3550 {
3551 if (in_return)
3552 {
3553 if (!issued_sse_ret_error)
3554 {
3555 error ("SSE register return with SSE disabled");
3556 issued_sse_ret_error = true;
3557 }
3558 }
3559 else if (!issued_sse_arg_error)
3560 {
3561 error ("SSE register argument with SSE disabled");
3562 issued_sse_arg_error = true;
3563 }
3564 return NULL;
3565 }
3566
3567 /* Likewise, error if the ABI requires us to return values in the
3568 x87 registers and the user specified -mno-80387. */
3569 if (!TARGET_80387 && in_return)
3570 for (i = 0; i < n; i++)
3571 if (class[i] == X86_64_X87_CLASS
3572 || class[i] == X86_64_X87UP_CLASS
3573 || class[i] == X86_64_COMPLEX_X87_CLASS)
3574 {
3575 if (!issued_x87_ret_error)
3576 {
3577 error ("x87 register return with x87 disabled");
3578 issued_x87_ret_error = true;
3579 }
3580 return NULL;
3581 }
3582
3583 /* First construct simple cases. Avoid SCmode, since we want to use
3584 single register to pass this type. */
3585 if (n == 1 && mode != SCmode)
3586 switch (class[0])
3587 {
3588 case X86_64_INTEGER_CLASS:
3589 case X86_64_INTEGERSI_CLASS:
3590 return gen_rtx_REG (mode, intreg[0]);
3591 case X86_64_SSE_CLASS:
3592 case X86_64_SSESF_CLASS:
3593 case X86_64_SSEDF_CLASS:
3594 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3595 case X86_64_X87_CLASS:
3596 case X86_64_COMPLEX_X87_CLASS:
3597 return gen_rtx_REG (mode, FIRST_STACK_REG);
3598 case X86_64_NO_CLASS:
3599 /* Zero sized array, struct or class. */
3600 return NULL;
3601 default:
3602 gcc_unreachable ();
3603 }
3604 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3605 && mode != BLKmode)
3606 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3607
3608 if (n == 2
3609 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3610 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3611 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3612 && class[1] == X86_64_INTEGER_CLASS
3613 && (mode == CDImode || mode == TImode || mode == TFmode)
3614 && intreg[0] + 1 == intreg[1])
3615 return gen_rtx_REG (mode, intreg[0]);
3616
3617 /* Otherwise figure out the entries of the PARALLEL. */
3618 for (i = 0; i < n; i++)
3619 {
3620 switch (class[i])
3621 {
3622 case X86_64_NO_CLASS:
3623 break;
3624 case X86_64_INTEGER_CLASS:
3625 case X86_64_INTEGERSI_CLASS:
3626 /* Merge TImodes on aligned occasions here too. */
3627 if (i * 8 + 8 > bytes)
3628 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3629 else if (class[i] == X86_64_INTEGERSI_CLASS)
3630 tmpmode = SImode;
3631 else
3632 tmpmode = DImode;
3633 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3634 if (tmpmode == BLKmode)
3635 tmpmode = DImode;
3636 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3637 gen_rtx_REG (tmpmode, *intreg),
3638 GEN_INT (i*8));
3639 intreg++;
3640 break;
3641 case X86_64_SSESF_CLASS:
3642 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3643 gen_rtx_REG (SFmode,
3644 SSE_REGNO (sse_regno)),
3645 GEN_INT (i*8));
3646 sse_regno++;
3647 break;
3648 case X86_64_SSEDF_CLASS:
3649 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3650 gen_rtx_REG (DFmode,
3651 SSE_REGNO (sse_regno)),
3652 GEN_INT (i*8));
3653 sse_regno++;
3654 break;
3655 case X86_64_SSE_CLASS:
3656 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3657 tmpmode = TImode;
3658 else
3659 tmpmode = DImode;
3660 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3661 gen_rtx_REG (tmpmode,
3662 SSE_REGNO (sse_regno)),
3663 GEN_INT (i*8));
3664 if (tmpmode == TImode)
3665 i++;
3666 sse_regno++;
3667 break;
3668 default:
3669 gcc_unreachable ();
3670 }
3671 }
3672
3673 /* Empty aligned struct, union or class. */
3674 if (nexps == 0)
3675 return NULL;
3676
3677 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3678 for (i = 0; i < nexps; i++)
3679 XVECEXP (ret, 0, i) = exp [i];
3680 return ret;
3681 }
3682
3683 /* Update the data in CUM to advance over an argument of mode MODE
3684 and data type TYPE. (TYPE is null for libcalls where that information
3685 may not be available.) */
3686
3687 static void
3688 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3689 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3690 {
3691 switch (mode)
3692 {
3693 default:
3694 break;
3695
3696 case BLKmode:
3697 if (bytes < 0)
3698 break;
3699 /* FALLTHRU */
3700
3701 case DImode:
3702 case SImode:
3703 case HImode:
3704 case QImode:
3705 cum->words += words;
3706 cum->nregs -= words;
3707 cum->regno += words;
3708
3709 if (cum->nregs <= 0)
3710 {
3711 cum->nregs = 0;
3712 cum->regno = 0;
3713 }
3714 break;
3715
3716 case DFmode:
3717 if (cum->float_in_sse < 2)
3718 break;
3719 case SFmode:
3720 if (cum->float_in_sse < 1)
3721 break;
3722 /* FALLTHRU */
3723
3724 case TImode:
3725 case V16QImode:
3726 case V8HImode:
3727 case V4SImode:
3728 case V2DImode:
3729 case V4SFmode:
3730 case V2DFmode:
3731 if (!type || !AGGREGATE_TYPE_P (type))
3732 {
3733 cum->sse_words += words;
3734 cum->sse_nregs -= 1;
3735 cum->sse_regno += 1;
3736 if (cum->sse_nregs <= 0)
3737 {
3738 cum->sse_nregs = 0;
3739 cum->sse_regno = 0;
3740 }
3741 }
3742 break;
3743
3744 case V8QImode:
3745 case V4HImode:
3746 case V2SImode:
3747 case V2SFmode:
3748 if (!type || !AGGREGATE_TYPE_P (type))
3749 {
3750 cum->mmx_words += words;
3751 cum->mmx_nregs -= 1;
3752 cum->mmx_regno += 1;
3753 if (cum->mmx_nregs <= 0)
3754 {
3755 cum->mmx_nregs = 0;
3756 cum->mmx_regno = 0;
3757 }
3758 }
3759 break;
3760 }
3761 }
3762
3763 static void
3764 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3765 tree type, HOST_WIDE_INT words)
3766 {
3767 int int_nregs, sse_nregs;
3768
3769 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3770 cum->words += words;
3771 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3772 {
3773 cum->nregs -= int_nregs;
3774 cum->sse_nregs -= sse_nregs;
3775 cum->regno += int_nregs;
3776 cum->sse_regno += sse_nregs;
3777 }
3778 else
3779 cum->words += words;
3780 }
3781
3782 static void
3783 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3784 HOST_WIDE_INT words)
3785 {
3786 /* Otherwise, this should be passed indirect. */
3787 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3788
3789 cum->words += words;
3790 if (cum->nregs > 0)
3791 {
3792 cum->nregs -= 1;
3793 cum->regno += 1;
3794 }
3795 }
3796
3797 void
3798 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3799 tree type, int named ATTRIBUTE_UNUSED)
3800 {
3801 HOST_WIDE_INT bytes, words;
3802
3803 if (mode == BLKmode)
3804 bytes = int_size_in_bytes (type);
3805 else
3806 bytes = GET_MODE_SIZE (mode);
3807 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3808
3809 if (type)
3810 mode = type_natural_mode (type);
3811
3812 if (TARGET_64BIT_MS_ABI)
3813 function_arg_advance_ms_64 (cum, bytes, words);
3814 else if (TARGET_64BIT)
3815 function_arg_advance_64 (cum, mode, type, words);
3816 else
3817 function_arg_advance_32 (cum, mode, type, bytes, words);
3818 }
3819
3820 /* Define where to put the arguments to a function.
3821 Value is zero to push the argument on the stack,
3822 or a hard register in which to store the argument.
3823
3824 MODE is the argument's machine mode.
3825 TYPE is the data type of the argument (as a tree).
3826 This is null for libcalls where that information may
3827 not be available.
3828 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3829 the preceding args and about the function being called.
3830 NAMED is nonzero if this argument is a named parameter
3831 (otherwise it is an extra parameter matching an ellipsis). */
3832
3833 static rtx
3834 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3835 enum machine_mode orig_mode, tree type,
3836 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3837 {
3838 static bool warnedsse, warnedmmx;
3839
3840 /* Avoid the AL settings for the Unix64 ABI. */
3841 if (mode == VOIDmode)
3842 return constm1_rtx;
3843
3844 switch (mode)
3845 {
3846 default:
3847 break;
3848
3849 case BLKmode:
3850 if (bytes < 0)
3851 break;
3852 /* FALLTHRU */
3853 case DImode:
3854 case SImode:
3855 case HImode:
3856 case QImode:
3857 if (words <= cum->nregs)
3858 {
3859 int regno = cum->regno;
3860
3861 /* Fastcall allocates the first two DWORD (SImode) or
3862 smaller arguments to ECX and EDX. */
3863 if (cum->fastcall)
3864 {
3865 if (mode == BLKmode || mode == DImode)
3866 break;
3867
3868 /* ECX not EAX is the first allocated register. */
3869 if (regno == 0)
3870 regno = 2;
3871 }
3872 return gen_rtx_REG (mode, regno);
3873 }
3874 break;
3875
3876 case DFmode:
3877 if (cum->float_in_sse < 2)
3878 break;
3879 case SFmode:
3880 if (cum->float_in_sse < 1)
3881 break;
3882 /* FALLTHRU */
3883 case TImode:
3884 case V16QImode:
3885 case V8HImode:
3886 case V4SImode:
3887 case V2DImode:
3888 case V4SFmode:
3889 case V2DFmode:
3890 if (!type || !AGGREGATE_TYPE_P (type))
3891 {
3892 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3893 {
3894 warnedsse = true;
3895 warning (0, "SSE vector argument without SSE enabled "
3896 "changes the ABI");
3897 }
3898 if (cum->sse_nregs)
3899 return gen_reg_or_parallel (mode, orig_mode,
3900 cum->sse_regno + FIRST_SSE_REG);
3901 }
3902 break;
3903
3904 case V8QImode:
3905 case V4HImode:
3906 case V2SImode:
3907 case V2SFmode:
3908 if (!type || !AGGREGATE_TYPE_P (type))
3909 {
3910 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3911 {
3912 warnedmmx = true;
3913 warning (0, "MMX vector argument without MMX enabled "
3914 "changes the ABI");
3915 }
3916 if (cum->mmx_nregs)
3917 return gen_reg_or_parallel (mode, orig_mode,
3918 cum->mmx_regno + FIRST_MMX_REG);
3919 }
3920 break;
3921 }
3922
3923 return NULL_RTX;
3924 }
3925
3926 static rtx
3927 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3928 enum machine_mode orig_mode, tree type)
3929 {
3930 /* Handle a hidden AL argument containing number of registers
3931 for varargs x86-64 functions. */
3932 if (mode == VOIDmode)
3933 return GEN_INT (cum->maybe_vaarg
3934 ? (cum->sse_nregs < 0
3935 ? SSE_REGPARM_MAX
3936 : cum->sse_regno)
3937 : -1);
3938
3939 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3940 cum->sse_nregs,
3941 &x86_64_int_parameter_registers [cum->regno],
3942 cum->sse_regno);
3943 }
3944
3945 static rtx
3946 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3947 enum machine_mode orig_mode, int named)
3948 {
3949 unsigned int regno;
3950
3951 /* Avoid the AL settings for the Unix64 ABI. */
3952 if (mode == VOIDmode)
3953 return constm1_rtx;
3954
3955 /* If we've run out of registers, it goes on the stack. */
3956 if (cum->nregs == 0)
3957 return NULL_RTX;
3958
3959 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
3960
3961 /* Only floating point modes are passed in anything but integer regs. */
3962 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
3963 {
3964 if (named)
3965 regno = cum->regno + FIRST_SSE_REG;
3966 else
3967 {
3968 rtx t1, t2;
3969
3970 /* Unnamed floating parameters are passed in both the
3971 SSE and integer registers. */
3972 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
3973 t2 = gen_rtx_REG (mode, regno);
3974 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
3975 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
3976 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
3977 }
3978 }
3979
3980 return gen_reg_or_parallel (mode, orig_mode, regno);
3981 }
3982
3983 rtx
3984 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
3985 tree type, int named)
3986 {
3987 enum machine_mode mode = omode;
3988 HOST_WIDE_INT bytes, words;
3989
3990 if (mode == BLKmode)
3991 bytes = int_size_in_bytes (type);
3992 else
3993 bytes = GET_MODE_SIZE (mode);
3994 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3995
3996 /* To simplify the code below, represent vector types with a vector mode
3997 even if MMX/SSE are not active. */
3998 if (type && TREE_CODE (type) == VECTOR_TYPE)
3999 mode = type_natural_mode (type);
4000
4001 if (TARGET_64BIT_MS_ABI)
4002 return function_arg_ms_64 (cum, mode, omode, named);
4003 else if (TARGET_64BIT)
4004 return function_arg_64 (cum, mode, omode, type);
4005 else
4006 return function_arg_32 (cum, mode, omode, type, bytes, words);
4007 }
4008
4009 /* A C expression that indicates when an argument must be passed by
4010 reference. If nonzero for an argument, a copy of that argument is
4011 made in memory and a pointer to the argument is passed instead of
4012 the argument itself. The pointer is passed in whatever way is
4013 appropriate for passing a pointer to that type. */
4014
4015 static bool
4016 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4017 enum machine_mode mode ATTRIBUTE_UNUSED,
4018 tree type, bool named ATTRIBUTE_UNUSED)
4019 {
4020 if (TARGET_64BIT_MS_ABI)
4021 {
4022 if (type)
4023 {
4024 /* Arrays are passed by reference. */
4025 if (TREE_CODE (type) == ARRAY_TYPE)
4026 return true;
4027
4028 if (AGGREGATE_TYPE_P (type))
4029 {
4030 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4031 are passed by reference. */
4032 int el2 = exact_log2 (int_size_in_bytes (type));
4033 return !(el2 >= 0 && el2 <= 3);
4034 }
4035 }
4036
4037 /* __m128 is passed by reference. */
4038 /* ??? How to handle complex? For now treat them as structs,
4039 and pass them by reference if they're too large. */
4040 if (GET_MODE_SIZE (mode) > 8)
4041 return true;
4042 }
4043 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4044 return 1;
4045
4046 return 0;
4047 }
4048
4049 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4050 ABI. Only called if TARGET_SSE. */
4051 static bool
4052 contains_128bit_aligned_vector_p (tree type)
4053 {
4054 enum machine_mode mode = TYPE_MODE (type);
4055 if (SSE_REG_MODE_P (mode)
4056 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4057 return true;
4058 if (TYPE_ALIGN (type) < 128)
4059 return false;
4060
4061 if (AGGREGATE_TYPE_P (type))
4062 {
4063 /* Walk the aggregates recursively. */
4064 switch (TREE_CODE (type))
4065 {
4066 case RECORD_TYPE:
4067 case UNION_TYPE:
4068 case QUAL_UNION_TYPE:
4069 {
4070 tree field;
4071
4072 /* Walk all the structure fields. */
4073 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4074 {
4075 if (TREE_CODE (field) == FIELD_DECL
4076 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4077 return true;
4078 }
4079 break;
4080 }
4081
4082 case ARRAY_TYPE:
4083 /* Just for use if some languages passes arrays by value. */
4084 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4085 return true;
4086 break;
4087
4088 default:
4089 gcc_unreachable ();
4090 }
4091 }
4092 return false;
4093 }
4094
4095 /* Gives the alignment boundary, in bits, of an argument with the
4096 specified mode and type. */
4097
4098 int
4099 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4100 {
4101 int align;
4102 if (type)
4103 align = TYPE_ALIGN (type);
4104 else
4105 align = GET_MODE_ALIGNMENT (mode);
4106 if (align < PARM_BOUNDARY)
4107 align = PARM_BOUNDARY;
4108 if (!TARGET_64BIT)
4109 {
4110 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4111 make an exception for SSE modes since these require 128bit
4112 alignment.
4113
4114 The handling here differs from field_alignment. ICC aligns MMX
4115 arguments to 4 byte boundaries, while structure fields are aligned
4116 to 8 byte boundaries. */
4117 if (!TARGET_SSE)
4118 align = PARM_BOUNDARY;
4119 else if (!type)
4120 {
4121 if (!SSE_REG_MODE_P (mode))
4122 align = PARM_BOUNDARY;
4123 }
4124 else
4125 {
4126 if (!contains_128bit_aligned_vector_p (type))
4127 align = PARM_BOUNDARY;
4128 }
4129 }
4130 if (align > 128)
4131 align = 128;
4132 return align;
4133 }
4134
4135 /* Return true if N is a possible register number of function value. */
4136
4137 bool
4138 ix86_function_value_regno_p (int regno)
4139 {
4140 switch (regno)
4141 {
4142 case 0:
4143 return true;
4144
4145 case FIRST_FLOAT_REG:
4146 if (TARGET_64BIT_MS_ABI)
4147 return false;
4148 return TARGET_FLOAT_RETURNS_IN_80387;
4149
4150 case FIRST_SSE_REG:
4151 return TARGET_SSE;
4152
4153 case FIRST_MMX_REG:
4154 if (TARGET_MACHO || TARGET_64BIT)
4155 return false;
4156 return TARGET_MMX;
4157 }
4158
4159 return false;
4160 }
4161
4162 /* Define how to find the value returned by a function.
4163 VALTYPE is the data type of the value (as a tree).
4164 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4165 otherwise, FUNC is 0. */
4166
4167 static rtx
4168 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4169 tree fntype, tree fn)
4170 {
4171 unsigned int regno;
4172
4173 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4174 we normally prevent this case when mmx is not available. However
4175 some ABIs may require the result to be returned like DImode. */
4176 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4177 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4178
4179 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4180 we prevent this case when sse is not available. However some ABIs
4181 may require the result to be returned like integer TImode. */
4182 else if (mode == TImode
4183 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4184 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4185
4186 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4187 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4188 regno = FIRST_FLOAT_REG;
4189 else
4190 /* Most things go in %eax. */
4191 regno = 0;
4192
4193 /* Override FP return register with %xmm0 for local functions when
4194 SSE math is enabled or for functions with sseregparm attribute. */
4195 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4196 {
4197 int sse_level = ix86_function_sseregparm (fntype, fn);
4198 if ((sse_level >= 1 && mode == SFmode)
4199 || (sse_level == 2 && mode == DFmode))
4200 regno = FIRST_SSE_REG;
4201 }
4202
4203 return gen_rtx_REG (orig_mode, regno);
4204 }
4205
4206 static rtx
4207 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4208 tree valtype)
4209 {
4210 rtx ret;
4211
4212 /* Handle libcalls, which don't provide a type node. */
4213 if (valtype == NULL)
4214 {
4215 switch (mode)
4216 {
4217 case SFmode:
4218 case SCmode:
4219 case DFmode:
4220 case DCmode:
4221 case TFmode:
4222 case SDmode:
4223 case DDmode:
4224 case TDmode:
4225 return gen_rtx_REG (mode, FIRST_SSE_REG);
4226 case XFmode:
4227 case XCmode:
4228 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4229 case TCmode:
4230 return NULL;
4231 default:
4232 return gen_rtx_REG (mode, 0);
4233 }
4234 }
4235
4236 ret = construct_container (mode, orig_mode, valtype, 1,
4237 REGPARM_MAX, SSE_REGPARM_MAX,
4238 x86_64_int_return_registers, 0);
4239
4240 /* For zero sized structures, construct_container returns NULL, but we
4241 need to keep rest of compiler happy by returning meaningful value. */
4242 if (!ret)
4243 ret = gen_rtx_REG (orig_mode, 0);
4244
4245 return ret;
4246 }
4247
4248 static rtx
4249 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4250 {
4251 unsigned int regno = 0;
4252
4253 if (TARGET_SSE)
4254 {
4255 if (mode == SFmode || mode == DFmode)
4256 regno = FIRST_SSE_REG;
4257 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4258 regno = FIRST_SSE_REG;
4259 }
4260
4261 return gen_rtx_REG (orig_mode, regno);
4262 }
4263
4264 static rtx
4265 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4266 enum machine_mode orig_mode, enum machine_mode mode)
4267 {
4268 tree fn, fntype;
4269
4270 fn = NULL_TREE;
4271 if (fntype_or_decl && DECL_P (fntype_or_decl))
4272 fn = fntype_or_decl;
4273 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4274
4275 if (TARGET_64BIT_MS_ABI)
4276 return function_value_ms_64 (orig_mode, mode);
4277 else if (TARGET_64BIT)
4278 return function_value_64 (orig_mode, mode, valtype);
4279 else
4280 return function_value_32 (orig_mode, mode, fntype, fn);
4281 }
4282
4283 static rtx
4284 ix86_function_value (tree valtype, tree fntype_or_decl,
4285 bool outgoing ATTRIBUTE_UNUSED)
4286 {
4287 enum machine_mode mode, orig_mode;
4288
4289 orig_mode = TYPE_MODE (valtype);
4290 mode = type_natural_mode (valtype);
4291 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4292 }
4293
4294 rtx
4295 ix86_libcall_value (enum machine_mode mode)
4296 {
4297 return ix86_function_value_1 (NULL, NULL, mode, mode);
4298 }
4299
4300 /* Return true iff type is returned in memory. */
4301
4302 static int
4303 return_in_memory_32 (tree type, enum machine_mode mode)
4304 {
4305 HOST_WIDE_INT size;
4306
4307 if (mode == BLKmode)
4308 return 1;
4309
4310 size = int_size_in_bytes (type);
4311
4312 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4313 return 0;
4314
4315 if (VECTOR_MODE_P (mode) || mode == TImode)
4316 {
4317 /* User-created vectors small enough to fit in EAX. */
4318 if (size < 8)
4319 return 0;
4320
4321 /* MMX/3dNow values are returned in MM0,
4322 except when it doesn't exits. */
4323 if (size == 8)
4324 return (TARGET_MMX ? 0 : 1);
4325
4326 /* SSE values are returned in XMM0, except when it doesn't exist. */
4327 if (size == 16)
4328 return (TARGET_SSE ? 0 : 1);
4329 }
4330
4331 if (mode == XFmode)
4332 return 0;
4333
4334 if (mode == TDmode)
4335 return 1;
4336
4337 if (size > 12)
4338 return 1;
4339 return 0;
4340 }
4341
4342 static int
4343 return_in_memory_64 (tree type, enum machine_mode mode)
4344 {
4345 int needed_intregs, needed_sseregs;
4346 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4347 }
4348
4349 static int
4350 return_in_memory_ms_64 (tree type, enum machine_mode mode)
4351 {
4352 HOST_WIDE_INT size = int_size_in_bytes (type);
4353
4354 /* __m128 and friends are returned in xmm0. */
4355 if (size == 16 && VECTOR_MODE_P (mode))
4356 return 0;
4357
4358 /* Otherwise, the size must be exactly in [1248]. */
4359 return (size != 1 && size != 2 && size != 4 && size != 8);
4360 }
4361
4362 int
4363 ix86_return_in_memory (tree type)
4364 {
4365 enum machine_mode mode = type_natural_mode (type);
4366
4367 if (TARGET_64BIT_MS_ABI)
4368 return return_in_memory_ms_64 (type, mode);
4369 else if (TARGET_64BIT)
4370 return return_in_memory_64 (type, mode);
4371 else
4372 return return_in_memory_32 (type, mode);
4373 }
4374
4375 /* When returning SSE vector types, we have a choice of either
4376 (1) being abi incompatible with a -march switch, or
4377 (2) generating an error.
4378 Given no good solution, I think the safest thing is one warning.
4379 The user won't be able to use -Werror, but....
4380
4381 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4382 called in response to actually generating a caller or callee that
4383 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4384 via aggregate_value_p for general type probing from tree-ssa. */
4385
4386 static rtx
4387 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4388 {
4389 static bool warnedsse, warnedmmx;
4390
4391 if (!TARGET_64BIT && type)
4392 {
4393 /* Look at the return type of the function, not the function type. */
4394 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4395
4396 if (!TARGET_SSE && !warnedsse)
4397 {
4398 if (mode == TImode
4399 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4400 {
4401 warnedsse = true;
4402 warning (0, "SSE vector return without SSE enabled "
4403 "changes the ABI");
4404 }
4405 }
4406
4407 if (!TARGET_MMX && !warnedmmx)
4408 {
4409 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4410 {
4411 warnedmmx = true;
4412 warning (0, "MMX vector return without MMX enabled "
4413 "changes the ABI");
4414 }
4415 }
4416 }
4417
4418 return NULL;
4419 }
4420
4421 \f
4422 /* Create the va_list data type. */
4423
4424 static tree
4425 ix86_build_builtin_va_list (void)
4426 {
4427 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4428
4429 /* For i386 we use plain pointer to argument area. */
4430 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4431 return build_pointer_type (char_type_node);
4432
4433 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4434 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4435
4436 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4437 unsigned_type_node);
4438 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4439 unsigned_type_node);
4440 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4441 ptr_type_node);
4442 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4443 ptr_type_node);
4444
4445 va_list_gpr_counter_field = f_gpr;
4446 va_list_fpr_counter_field = f_fpr;
4447
4448 DECL_FIELD_CONTEXT (f_gpr) = record;
4449 DECL_FIELD_CONTEXT (f_fpr) = record;
4450 DECL_FIELD_CONTEXT (f_ovf) = record;
4451 DECL_FIELD_CONTEXT (f_sav) = record;
4452
4453 TREE_CHAIN (record) = type_decl;
4454 TYPE_NAME (record) = type_decl;
4455 TYPE_FIELDS (record) = f_gpr;
4456 TREE_CHAIN (f_gpr) = f_fpr;
4457 TREE_CHAIN (f_fpr) = f_ovf;
4458 TREE_CHAIN (f_ovf) = f_sav;
4459
4460 layout_type (record);
4461
4462 /* The correct type is an array type of one element. */
4463 return build_array_type (record, build_index_type (size_zero_node));
4464 }
4465
4466 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4467
4468 static void
4469 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4470 {
4471 rtx save_area, mem;
4472 rtx label;
4473 rtx label_ref;
4474 rtx tmp_reg;
4475 rtx nsse_reg;
4476 int set;
4477 int i;
4478
4479 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4480 return;
4481
4482 /* Indicate to allocate space on the stack for varargs save area. */
4483 ix86_save_varrargs_registers = 1;
4484 cfun->stack_alignment_needed = 128;
4485
4486 save_area = frame_pointer_rtx;
4487 set = get_varargs_alias_set ();
4488
4489 for (i = cum->regno;
4490 i < ix86_regparm
4491 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4492 i++)
4493 {
4494 mem = gen_rtx_MEM (Pmode,
4495 plus_constant (save_area, i * UNITS_PER_WORD));
4496 MEM_NOTRAP_P (mem) = 1;
4497 set_mem_alias_set (mem, set);
4498 emit_move_insn (mem, gen_rtx_REG (Pmode,
4499 x86_64_int_parameter_registers[i]));
4500 }
4501
4502 if (cum->sse_nregs && cfun->va_list_fpr_size)
4503 {
4504 /* Now emit code to save SSE registers. The AX parameter contains number
4505 of SSE parameter registers used to call this function. We use
4506 sse_prologue_save insn template that produces computed jump across
4507 SSE saves. We need some preparation work to get this working. */
4508
4509 label = gen_label_rtx ();
4510 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4511
4512 /* Compute address to jump to :
4513 label - 5*eax + nnamed_sse_arguments*5 */
4514 tmp_reg = gen_reg_rtx (Pmode);
4515 nsse_reg = gen_reg_rtx (Pmode);
4516 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4517 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4518 gen_rtx_MULT (Pmode, nsse_reg,
4519 GEN_INT (4))));
4520 if (cum->sse_regno)
4521 emit_move_insn
4522 (nsse_reg,
4523 gen_rtx_CONST (DImode,
4524 gen_rtx_PLUS (DImode,
4525 label_ref,
4526 GEN_INT (cum->sse_regno * 4))));
4527 else
4528 emit_move_insn (nsse_reg, label_ref);
4529 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4530
4531 /* Compute address of memory block we save into. We always use pointer
4532 pointing 127 bytes after first byte to store - this is needed to keep
4533 instruction size limited by 4 bytes. */
4534 tmp_reg = gen_reg_rtx (Pmode);
4535 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4536 plus_constant (save_area,
4537 8 * REGPARM_MAX + 127)));
4538 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4539 MEM_NOTRAP_P (mem) = 1;
4540 set_mem_alias_set (mem, set);
4541 set_mem_align (mem, BITS_PER_WORD);
4542
4543 /* And finally do the dirty job! */
4544 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4545 GEN_INT (cum->sse_regno), label));
4546 }
4547 }
4548
4549 static void
4550 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4551 {
4552 int set = get_varargs_alias_set ();
4553 int i;
4554
4555 for (i = cum->regno; i < REGPARM_MAX; i++)
4556 {
4557 rtx reg, mem;
4558
4559 mem = gen_rtx_MEM (Pmode,
4560 plus_constant (virtual_incoming_args_rtx,
4561 i * UNITS_PER_WORD));
4562 MEM_NOTRAP_P (mem) = 1;
4563 set_mem_alias_set (mem, set);
4564
4565 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4566 emit_move_insn (mem, reg);
4567 }
4568 }
4569
4570 static void
4571 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4572 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4573 int no_rtl)
4574 {
4575 CUMULATIVE_ARGS next_cum;
4576 tree fntype;
4577 int stdarg_p;
4578
4579 /* This argument doesn't appear to be used anymore. Which is good,
4580 because the old code here didn't suppress rtl generation. */
4581 gcc_assert (!no_rtl);
4582
4583 if (!TARGET_64BIT)
4584 return;
4585
4586 fntype = TREE_TYPE (current_function_decl);
4587 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4588 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4589 != void_type_node));
4590
4591 /* For varargs, we do not want to skip the dummy va_dcl argument.
4592 For stdargs, we do want to skip the last named argument. */
4593 next_cum = *cum;
4594 if (stdarg_p)
4595 function_arg_advance (&next_cum, mode, type, 1);
4596
4597 if (TARGET_64BIT_MS_ABI)
4598 setup_incoming_varargs_ms_64 (&next_cum);
4599 else
4600 setup_incoming_varargs_64 (&next_cum);
4601 }
4602
4603 /* Implement va_start. */
4604
4605 void
4606 ix86_va_start (tree valist, rtx nextarg)
4607 {
4608 HOST_WIDE_INT words, n_gpr, n_fpr;
4609 tree f_gpr, f_fpr, f_ovf, f_sav;
4610 tree gpr, fpr, ovf, sav, t;
4611 tree type;
4612
4613 /* Only 64bit target needs something special. */
4614 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4615 {
4616 std_expand_builtin_va_start (valist, nextarg);
4617 return;
4618 }
4619
4620 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4621 f_fpr = TREE_CHAIN (f_gpr);
4622 f_ovf = TREE_CHAIN (f_fpr);
4623 f_sav = TREE_CHAIN (f_ovf);
4624
4625 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4626 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4627 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4628 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4629 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4630
4631 /* Count number of gp and fp argument registers used. */
4632 words = current_function_args_info.words;
4633 n_gpr = current_function_args_info.regno;
4634 n_fpr = current_function_args_info.sse_regno;
4635
4636 if (cfun->va_list_gpr_size)
4637 {
4638 type = TREE_TYPE (gpr);
4639 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4640 build_int_cst (type, n_gpr * 8));
4641 TREE_SIDE_EFFECTS (t) = 1;
4642 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4643 }
4644
4645 if (cfun->va_list_fpr_size)
4646 {
4647 type = TREE_TYPE (fpr);
4648 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4649 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4650 TREE_SIDE_EFFECTS (t) = 1;
4651 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4652 }
4653
4654 /* Find the overflow area. */
4655 type = TREE_TYPE (ovf);
4656 t = make_tree (type, virtual_incoming_args_rtx);
4657 if (words != 0)
4658 t = build2 (PLUS_EXPR, type, t,
4659 build_int_cst (type, words * UNITS_PER_WORD));
4660 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4661 TREE_SIDE_EFFECTS (t) = 1;
4662 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4663
4664 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4665 {
4666 /* Find the register save area.
4667 Prologue of the function save it right above stack frame. */
4668 type = TREE_TYPE (sav);
4669 t = make_tree (type, frame_pointer_rtx);
4670 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4671 TREE_SIDE_EFFECTS (t) = 1;
4672 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4673 }
4674 }
4675
4676 /* Implement va_arg. */
4677
4678 static tree
4679 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4680 {
4681 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4682 tree f_gpr, f_fpr, f_ovf, f_sav;
4683 tree gpr, fpr, ovf, sav, t;
4684 int size, rsize;
4685 tree lab_false, lab_over = NULL_TREE;
4686 tree addr, t2;
4687 rtx container;
4688 int indirect_p = 0;
4689 tree ptrtype;
4690 enum machine_mode nat_mode;
4691
4692 /* Only 64bit target needs something special. */
4693 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4694 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4695
4696 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4697 f_fpr = TREE_CHAIN (f_gpr);
4698 f_ovf = TREE_CHAIN (f_fpr);
4699 f_sav = TREE_CHAIN (f_ovf);
4700
4701 valist = build_va_arg_indirect_ref (valist);
4702 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4703 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4704 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4705 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4706
4707 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4708 if (indirect_p)
4709 type = build_pointer_type (type);
4710 size = int_size_in_bytes (type);
4711 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4712
4713 nat_mode = type_natural_mode (type);
4714 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4715 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4716
4717 /* Pull the value out of the saved registers. */
4718
4719 addr = create_tmp_var (ptr_type_node, "addr");
4720 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4721
4722 if (container)
4723 {
4724 int needed_intregs, needed_sseregs;
4725 bool need_temp;
4726 tree int_addr, sse_addr;
4727
4728 lab_false = create_artificial_label ();
4729 lab_over = create_artificial_label ();
4730
4731 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4732
4733 need_temp = (!REG_P (container)
4734 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4735 || TYPE_ALIGN (type) > 128));
4736
4737 /* In case we are passing structure, verify that it is consecutive block
4738 on the register save area. If not we need to do moves. */
4739 if (!need_temp && !REG_P (container))
4740 {
4741 /* Verify that all registers are strictly consecutive */
4742 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4743 {
4744 int i;
4745
4746 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4747 {
4748 rtx slot = XVECEXP (container, 0, i);
4749 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4750 || INTVAL (XEXP (slot, 1)) != i * 16)
4751 need_temp = 1;
4752 }
4753 }
4754 else
4755 {
4756 int i;
4757
4758 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4759 {
4760 rtx slot = XVECEXP (container, 0, i);
4761 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4762 || INTVAL (XEXP (slot, 1)) != i * 8)
4763 need_temp = 1;
4764 }
4765 }
4766 }
4767 if (!need_temp)
4768 {
4769 int_addr = addr;
4770 sse_addr = addr;
4771 }
4772 else
4773 {
4774 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4775 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4776 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4777 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4778 }
4779
4780 /* First ensure that we fit completely in registers. */
4781 if (needed_intregs)
4782 {
4783 t = build_int_cst (TREE_TYPE (gpr),
4784 (REGPARM_MAX - needed_intregs + 1) * 8);
4785 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4786 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4787 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4788 gimplify_and_add (t, pre_p);
4789 }
4790 if (needed_sseregs)
4791 {
4792 t = build_int_cst (TREE_TYPE (fpr),
4793 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4794 + REGPARM_MAX * 8);
4795 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4796 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4797 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4798 gimplify_and_add (t, pre_p);
4799 }
4800
4801 /* Compute index to start of area used for integer regs. */
4802 if (needed_intregs)
4803 {
4804 /* int_addr = gpr + sav; */
4805 t = fold_convert (ptr_type_node, gpr);
4806 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4807 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4808 gimplify_and_add (t, pre_p);
4809 }
4810 if (needed_sseregs)
4811 {
4812 /* sse_addr = fpr + sav; */
4813 t = fold_convert (ptr_type_node, fpr);
4814 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4815 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4816 gimplify_and_add (t, pre_p);
4817 }
4818 if (need_temp)
4819 {
4820 int i;
4821 tree temp = create_tmp_var (type, "va_arg_tmp");
4822
4823 /* addr = &temp; */
4824 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4825 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4826 gimplify_and_add (t, pre_p);
4827
4828 for (i = 0; i < XVECLEN (container, 0); i++)
4829 {
4830 rtx slot = XVECEXP (container, 0, i);
4831 rtx reg = XEXP (slot, 0);
4832 enum machine_mode mode = GET_MODE (reg);
4833 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4834 tree addr_type = build_pointer_type (piece_type);
4835 tree src_addr, src;
4836 int src_offset;
4837 tree dest_addr, dest;
4838
4839 if (SSE_REGNO_P (REGNO (reg)))
4840 {
4841 src_addr = sse_addr;
4842 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4843 }
4844 else
4845 {
4846 src_addr = int_addr;
4847 src_offset = REGNO (reg) * 8;
4848 }
4849 src_addr = fold_convert (addr_type, src_addr);
4850 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4851 size_int (src_offset));
4852 src = build_va_arg_indirect_ref (src_addr);
4853
4854 dest_addr = fold_convert (addr_type, addr);
4855 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4856 size_int (INTVAL (XEXP (slot, 1))));
4857 dest = build_va_arg_indirect_ref (dest_addr);
4858
4859 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4860 gimplify_and_add (t, pre_p);
4861 }
4862 }
4863
4864 if (needed_intregs)
4865 {
4866 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4867 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4868 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4869 gimplify_and_add (t, pre_p);
4870 }
4871 if (needed_sseregs)
4872 {
4873 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4874 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4875 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4876 gimplify_and_add (t, pre_p);
4877 }
4878
4879 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4880 gimplify_and_add (t, pre_p);
4881
4882 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4883 append_to_statement_list (t, pre_p);
4884 }
4885
4886 /* ... otherwise out of the overflow area. */
4887
4888 /* Care for on-stack alignment if needed. */
4889 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4890 || integer_zerop (TYPE_SIZE (type)))
4891 t = ovf;
4892 else
4893 {
4894 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4895 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4896 build_int_cst (TREE_TYPE (ovf), align - 1));
4897 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4898 build_int_cst (TREE_TYPE (t), -align));
4899 }
4900 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4901
4902 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4903 gimplify_and_add (t2, pre_p);
4904
4905 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4906 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4907 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4908 gimplify_and_add (t, pre_p);
4909
4910 if (container)
4911 {
4912 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4913 append_to_statement_list (t, pre_p);
4914 }
4915
4916 ptrtype = build_pointer_type (type);
4917 addr = fold_convert (ptrtype, addr);
4918
4919 if (indirect_p)
4920 addr = build_va_arg_indirect_ref (addr);
4921 return build_va_arg_indirect_ref (addr);
4922 }
4923 \f
4924 /* Return nonzero if OPNUM's MEM should be matched
4925 in movabs* patterns. */
4926
4927 int
4928 ix86_check_movabs (rtx insn, int opnum)
4929 {
4930 rtx set, mem;
4931
4932 set = PATTERN (insn);
4933 if (GET_CODE (set) == PARALLEL)
4934 set = XVECEXP (set, 0, 0);
4935 gcc_assert (GET_CODE (set) == SET);
4936 mem = XEXP (set, opnum);
4937 while (GET_CODE (mem) == SUBREG)
4938 mem = SUBREG_REG (mem);
4939 gcc_assert (MEM_P (mem));
4940 return (volatile_ok || !MEM_VOLATILE_P (mem));
4941 }
4942 \f
4943 /* Initialize the table of extra 80387 mathematical constants. */
4944
4945 static void
4946 init_ext_80387_constants (void)
4947 {
4948 static const char * cst[5] =
4949 {
4950 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4951 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4952 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4953 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4954 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4955 };
4956 int i;
4957
4958 for (i = 0; i < 5; i++)
4959 {
4960 real_from_string (&ext_80387_constants_table[i], cst[i]);
4961 /* Ensure each constant is rounded to XFmode precision. */
4962 real_convert (&ext_80387_constants_table[i],
4963 XFmode, &ext_80387_constants_table[i]);
4964 }
4965
4966 ext_80387_constants_init = 1;
4967 }
4968
4969 /* Return true if the constant is something that can be loaded with
4970 a special instruction. */
4971
4972 int
4973 standard_80387_constant_p (rtx x)
4974 {
4975 enum machine_mode mode = GET_MODE (x);
4976
4977 REAL_VALUE_TYPE r;
4978
4979 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
4980 return -1;
4981
4982 if (x == CONST0_RTX (mode))
4983 return 1;
4984 if (x == CONST1_RTX (mode))
4985 return 2;
4986
4987 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4988
4989 /* For XFmode constants, try to find a special 80387 instruction when
4990 optimizing for size or on those CPUs that benefit from them. */
4991 if (mode == XFmode
4992 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
4993 {
4994 int i;
4995
4996 if (! ext_80387_constants_init)
4997 init_ext_80387_constants ();
4998
4999 for (i = 0; i < 5; i++)
5000 if (real_identical (&r, &ext_80387_constants_table[i]))
5001 return i + 3;
5002 }
5003
5004 /* Load of the constant -0.0 or -1.0 will be split as
5005 fldz;fchs or fld1;fchs sequence. */
5006 if (real_isnegzero (&r))
5007 return 8;
5008 if (real_identical (&r, &dconstm1))
5009 return 9;
5010
5011 return 0;
5012 }
5013
5014 /* Return the opcode of the special instruction to be used to load
5015 the constant X. */
5016
5017 const char *
5018 standard_80387_constant_opcode (rtx x)
5019 {
5020 switch (standard_80387_constant_p (x))
5021 {
5022 case 1:
5023 return "fldz";
5024 case 2:
5025 return "fld1";
5026 case 3:
5027 return "fldlg2";
5028 case 4:
5029 return "fldln2";
5030 case 5:
5031 return "fldl2e";
5032 case 6:
5033 return "fldl2t";
5034 case 7:
5035 return "fldpi";
5036 case 8:
5037 case 9:
5038 return "#";
5039 default:
5040 gcc_unreachable ();
5041 }
5042 }
5043
5044 /* Return the CONST_DOUBLE representing the 80387 constant that is
5045 loaded by the specified special instruction. The argument IDX
5046 matches the return value from standard_80387_constant_p. */
5047
5048 rtx
5049 standard_80387_constant_rtx (int idx)
5050 {
5051 int i;
5052
5053 if (! ext_80387_constants_init)
5054 init_ext_80387_constants ();
5055
5056 switch (idx)
5057 {
5058 case 3:
5059 case 4:
5060 case 5:
5061 case 6:
5062 case 7:
5063 i = idx - 3;
5064 break;
5065
5066 default:
5067 gcc_unreachable ();
5068 }
5069
5070 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5071 XFmode);
5072 }
5073
5074 /* Return 1 if mode is a valid mode for sse. */
5075 static int
5076 standard_sse_mode_p (enum machine_mode mode)
5077 {
5078 switch (mode)
5079 {
5080 case V16QImode:
5081 case V8HImode:
5082 case V4SImode:
5083 case V2DImode:
5084 case V4SFmode:
5085 case V2DFmode:
5086 return 1;
5087
5088 default:
5089 return 0;
5090 }
5091 }
5092
5093 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5094 */
5095 int
5096 standard_sse_constant_p (rtx x)
5097 {
5098 enum machine_mode mode = GET_MODE (x);
5099
5100 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5101 return 1;
5102 if (vector_all_ones_operand (x, mode)
5103 && standard_sse_mode_p (mode))
5104 return TARGET_SSE2 ? 2 : -1;
5105
5106 return 0;
5107 }
5108
5109 /* Return the opcode of the special instruction to be used to load
5110 the constant X. */
5111
5112 const char *
5113 standard_sse_constant_opcode (rtx insn, rtx x)
5114 {
5115 switch (standard_sse_constant_p (x))
5116 {
5117 case 1:
5118 if (get_attr_mode (insn) == MODE_V4SF)
5119 return "xorps\t%0, %0";
5120 else if (get_attr_mode (insn) == MODE_V2DF)
5121 return "xorpd\t%0, %0";
5122 else
5123 return "pxor\t%0, %0";
5124 case 2:
5125 return "pcmpeqd\t%0, %0";
5126 }
5127 gcc_unreachable ();
5128 }
5129
5130 /* Returns 1 if OP contains a symbol reference */
5131
5132 int
5133 symbolic_reference_mentioned_p (rtx op)
5134 {
5135 const char *fmt;
5136 int i;
5137
5138 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5139 return 1;
5140
5141 fmt = GET_RTX_FORMAT (GET_CODE (op));
5142 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5143 {
5144 if (fmt[i] == 'E')
5145 {
5146 int j;
5147
5148 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5149 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5150 return 1;
5151 }
5152
5153 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5154 return 1;
5155 }
5156
5157 return 0;
5158 }
5159
5160 /* Return 1 if it is appropriate to emit `ret' instructions in the
5161 body of a function. Do this only if the epilogue is simple, needing a
5162 couple of insns. Prior to reloading, we can't tell how many registers
5163 must be saved, so return 0 then. Return 0 if there is no frame
5164 marker to de-allocate. */
5165
5166 int
5167 ix86_can_use_return_insn_p (void)
5168 {
5169 struct ix86_frame frame;
5170
5171 if (! reload_completed || frame_pointer_needed)
5172 return 0;
5173
5174 /* Don't allow more than 32 pop, since that's all we can do
5175 with one instruction. */
5176 if (current_function_pops_args
5177 && current_function_args_size >= 32768)
5178 return 0;
5179
5180 ix86_compute_frame_layout (&frame);
5181 return frame.to_allocate == 0 && frame.nregs == 0;
5182 }
5183 \f
5184 /* Value should be nonzero if functions must have frame pointers.
5185 Zero means the frame pointer need not be set up (and parms may
5186 be accessed via the stack pointer) in functions that seem suitable. */
5187
5188 int
5189 ix86_frame_pointer_required (void)
5190 {
5191 /* If we accessed previous frames, then the generated code expects
5192 to be able to access the saved ebp value in our frame. */
5193 if (cfun->machine->accesses_prev_frame)
5194 return 1;
5195
5196 /* Several x86 os'es need a frame pointer for other reasons,
5197 usually pertaining to setjmp. */
5198 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5199 return 1;
5200
5201 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5202 the frame pointer by default. Turn it back on now if we've not
5203 got a leaf function. */
5204 if (TARGET_OMIT_LEAF_FRAME_POINTER
5205 && (!current_function_is_leaf
5206 || ix86_current_function_calls_tls_descriptor))
5207 return 1;
5208
5209 if (current_function_profile)
5210 return 1;
5211
5212 return 0;
5213 }
5214
5215 /* Record that the current function accesses previous call frames. */
5216
5217 void
5218 ix86_setup_frame_addresses (void)
5219 {
5220 cfun->machine->accesses_prev_frame = 1;
5221 }
5222 \f
5223 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5224 # define USE_HIDDEN_LINKONCE 1
5225 #else
5226 # define USE_HIDDEN_LINKONCE 0
5227 #endif
5228
5229 static int pic_labels_used;
5230
5231 /* Fills in the label name that should be used for a pc thunk for
5232 the given register. */
5233
5234 static void
5235 get_pc_thunk_name (char name[32], unsigned int regno)
5236 {
5237 gcc_assert (!TARGET_64BIT);
5238
5239 if (USE_HIDDEN_LINKONCE)
5240 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5241 else
5242 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5243 }
5244
5245
5246 /* This function generates code for -fpic that loads %ebx with
5247 the return address of the caller and then returns. */
5248
5249 void
5250 ix86_file_end (void)
5251 {
5252 rtx xops[2];
5253 int regno;
5254
5255 for (regno = 0; regno < 8; ++regno)
5256 {
5257 char name[32];
5258
5259 if (! ((pic_labels_used >> regno) & 1))
5260 continue;
5261
5262 get_pc_thunk_name (name, regno);
5263
5264 #if TARGET_MACHO
5265 if (TARGET_MACHO)
5266 {
5267 switch_to_section (darwin_sections[text_coal_section]);
5268 fputs ("\t.weak_definition\t", asm_out_file);
5269 assemble_name (asm_out_file, name);
5270 fputs ("\n\t.private_extern\t", asm_out_file);
5271 assemble_name (asm_out_file, name);
5272 fputs ("\n", asm_out_file);
5273 ASM_OUTPUT_LABEL (asm_out_file, name);
5274 }
5275 else
5276 #endif
5277 if (USE_HIDDEN_LINKONCE)
5278 {
5279 tree decl;
5280
5281 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5282 error_mark_node);
5283 TREE_PUBLIC (decl) = 1;
5284 TREE_STATIC (decl) = 1;
5285 DECL_ONE_ONLY (decl) = 1;
5286
5287 (*targetm.asm_out.unique_section) (decl, 0);
5288 switch_to_section (get_named_section (decl, NULL, 0));
5289
5290 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5291 fputs ("\t.hidden\t", asm_out_file);
5292 assemble_name (asm_out_file, name);
5293 fputc ('\n', asm_out_file);
5294 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5295 }
5296 else
5297 {
5298 switch_to_section (text_section);
5299 ASM_OUTPUT_LABEL (asm_out_file, name);
5300 }
5301
5302 xops[0] = gen_rtx_REG (SImode, regno);
5303 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5304 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5305 output_asm_insn ("ret", xops);
5306 }
5307
5308 if (NEED_INDICATE_EXEC_STACK)
5309 file_end_indicate_exec_stack ();
5310 }
5311
5312 /* Emit code for the SET_GOT patterns. */
5313
5314 const char *
5315 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5316 {
5317 rtx xops[3];
5318
5319 xops[0] = dest;
5320
5321 if (TARGET_VXWORKS_RTP && flag_pic)
5322 {
5323 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5324 xops[2] = gen_rtx_MEM (Pmode,
5325 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5326 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5327
5328 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5329 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5330 an unadorned address. */
5331 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5332 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5333 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5334 return "";
5335 }
5336
5337 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5338
5339 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5340 {
5341 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5342
5343 if (!flag_pic)
5344 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5345 else
5346 output_asm_insn ("call\t%a2", xops);
5347
5348 #if TARGET_MACHO
5349 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5350 is what will be referenced by the Mach-O PIC subsystem. */
5351 if (!label)
5352 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5353 #endif
5354
5355 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5356 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5357
5358 if (flag_pic)
5359 output_asm_insn ("pop{l}\t%0", xops);
5360 }
5361 else
5362 {
5363 char name[32];
5364 get_pc_thunk_name (name, REGNO (dest));
5365 pic_labels_used |= 1 << REGNO (dest);
5366
5367 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5368 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5369 output_asm_insn ("call\t%X2", xops);
5370 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5371 is what will be referenced by the Mach-O PIC subsystem. */
5372 #if TARGET_MACHO
5373 if (!label)
5374 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5375 else
5376 targetm.asm_out.internal_label (asm_out_file, "L",
5377 CODE_LABEL_NUMBER (label));
5378 #endif
5379 }
5380
5381 if (TARGET_MACHO)
5382 return "";
5383
5384 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5385 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5386 else
5387 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5388
5389 return "";
5390 }
5391
5392 /* Generate an "push" pattern for input ARG. */
5393
5394 static rtx
5395 gen_push (rtx arg)
5396 {
5397 return gen_rtx_SET (VOIDmode,
5398 gen_rtx_MEM (Pmode,
5399 gen_rtx_PRE_DEC (Pmode,
5400 stack_pointer_rtx)),
5401 arg);
5402 }
5403
5404 /* Return >= 0 if there is an unused call-clobbered register available
5405 for the entire function. */
5406
5407 static unsigned int
5408 ix86_select_alt_pic_regnum (void)
5409 {
5410 if (current_function_is_leaf && !current_function_profile
5411 && !ix86_current_function_calls_tls_descriptor)
5412 {
5413 int i;
5414 for (i = 2; i >= 0; --i)
5415 if (!regs_ever_live[i])
5416 return i;
5417 }
5418
5419 return INVALID_REGNUM;
5420 }
5421
5422 /* Return 1 if we need to save REGNO. */
5423 static int
5424 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5425 {
5426 if (pic_offset_table_rtx
5427 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5428 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5429 || current_function_profile
5430 || current_function_calls_eh_return
5431 || current_function_uses_const_pool))
5432 {
5433 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5434 return 0;
5435 return 1;
5436 }
5437
5438 if (current_function_calls_eh_return && maybe_eh_return)
5439 {
5440 unsigned i;
5441 for (i = 0; ; i++)
5442 {
5443 unsigned test = EH_RETURN_DATA_REGNO (i);
5444 if (test == INVALID_REGNUM)
5445 break;
5446 if (test == regno)
5447 return 1;
5448 }
5449 }
5450
5451 if (cfun->machine->force_align_arg_pointer
5452 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5453 return 1;
5454
5455 return (regs_ever_live[regno]
5456 && !call_used_regs[regno]
5457 && !fixed_regs[regno]
5458 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5459 }
5460
5461 /* Return number of registers to be saved on the stack. */
5462
5463 static int
5464 ix86_nsaved_regs (void)
5465 {
5466 int nregs = 0;
5467 int regno;
5468
5469 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5470 if (ix86_save_reg (regno, true))
5471 nregs++;
5472 return nregs;
5473 }
5474
5475 /* Return the offset between two registers, one to be eliminated, and the other
5476 its replacement, at the start of a routine. */
5477
5478 HOST_WIDE_INT
5479 ix86_initial_elimination_offset (int from, int to)
5480 {
5481 struct ix86_frame frame;
5482 ix86_compute_frame_layout (&frame);
5483
5484 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5485 return frame.hard_frame_pointer_offset;
5486 else if (from == FRAME_POINTER_REGNUM
5487 && to == HARD_FRAME_POINTER_REGNUM)
5488 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5489 else
5490 {
5491 gcc_assert (to == STACK_POINTER_REGNUM);
5492
5493 if (from == ARG_POINTER_REGNUM)
5494 return frame.stack_pointer_offset;
5495
5496 gcc_assert (from == FRAME_POINTER_REGNUM);
5497 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5498 }
5499 }
5500
5501 /* Fill structure ix86_frame about frame of currently computed function. */
5502
5503 static void
5504 ix86_compute_frame_layout (struct ix86_frame *frame)
5505 {
5506 HOST_WIDE_INT total_size;
5507 unsigned int stack_alignment_needed;
5508 HOST_WIDE_INT offset;
5509 unsigned int preferred_alignment;
5510 HOST_WIDE_INT size = get_frame_size ();
5511
5512 frame->nregs = ix86_nsaved_regs ();
5513 total_size = size;
5514
5515 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5516 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5517
5518 /* During reload iteration the amount of registers saved can change.
5519 Recompute the value as needed. Do not recompute when amount of registers
5520 didn't change as reload does multiple calls to the function and does not
5521 expect the decision to change within single iteration. */
5522 if (!optimize_size
5523 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5524 {
5525 int count = frame->nregs;
5526
5527 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5528 /* The fast prologue uses move instead of push to save registers. This
5529 is significantly longer, but also executes faster as modern hardware
5530 can execute the moves in parallel, but can't do that for push/pop.
5531
5532 Be careful about choosing what prologue to emit: When function takes
5533 many instructions to execute we may use slow version as well as in
5534 case function is known to be outside hot spot (this is known with
5535 feedback only). Weight the size of function by number of registers
5536 to save as it is cheap to use one or two push instructions but very
5537 slow to use many of them. */
5538 if (count)
5539 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5540 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5541 || (flag_branch_probabilities
5542 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5543 cfun->machine->use_fast_prologue_epilogue = false;
5544 else
5545 cfun->machine->use_fast_prologue_epilogue
5546 = !expensive_function_p (count);
5547 }
5548 if (TARGET_PROLOGUE_USING_MOVE
5549 && cfun->machine->use_fast_prologue_epilogue)
5550 frame->save_regs_using_mov = true;
5551 else
5552 frame->save_regs_using_mov = false;
5553
5554
5555 /* Skip return address and saved base pointer. */
5556 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5557
5558 frame->hard_frame_pointer_offset = offset;
5559
5560 /* Do some sanity checking of stack_alignment_needed and
5561 preferred_alignment, since i386 port is the only using those features
5562 that may break easily. */
5563
5564 gcc_assert (!size || stack_alignment_needed);
5565 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5566 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5567 gcc_assert (stack_alignment_needed
5568 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5569
5570 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5571 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5572
5573 /* Register save area */
5574 offset += frame->nregs * UNITS_PER_WORD;
5575
5576 /* Va-arg area */
5577 if (ix86_save_varrargs_registers)
5578 {
5579 offset += X86_64_VARARGS_SIZE;
5580 frame->va_arg_size = X86_64_VARARGS_SIZE;
5581 }
5582 else
5583 frame->va_arg_size = 0;
5584
5585 /* Align start of frame for local function. */
5586 frame->padding1 = ((offset + stack_alignment_needed - 1)
5587 & -stack_alignment_needed) - offset;
5588
5589 offset += frame->padding1;
5590
5591 /* Frame pointer points here. */
5592 frame->frame_pointer_offset = offset;
5593
5594 offset += size;
5595
5596 /* Add outgoing arguments area. Can be skipped if we eliminated
5597 all the function calls as dead code.
5598 Skipping is however impossible when function calls alloca. Alloca
5599 expander assumes that last current_function_outgoing_args_size
5600 of stack frame are unused. */
5601 if (ACCUMULATE_OUTGOING_ARGS
5602 && (!current_function_is_leaf || current_function_calls_alloca
5603 || ix86_current_function_calls_tls_descriptor))
5604 {
5605 offset += current_function_outgoing_args_size;
5606 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5607 }
5608 else
5609 frame->outgoing_arguments_size = 0;
5610
5611 /* Align stack boundary. Only needed if we're calling another function
5612 or using alloca. */
5613 if (!current_function_is_leaf || current_function_calls_alloca
5614 || ix86_current_function_calls_tls_descriptor)
5615 frame->padding2 = ((offset + preferred_alignment - 1)
5616 & -preferred_alignment) - offset;
5617 else
5618 frame->padding2 = 0;
5619
5620 offset += frame->padding2;
5621
5622 /* We've reached end of stack frame. */
5623 frame->stack_pointer_offset = offset;
5624
5625 /* Size prologue needs to allocate. */
5626 frame->to_allocate =
5627 (size + frame->padding1 + frame->padding2
5628 + frame->outgoing_arguments_size + frame->va_arg_size);
5629
5630 if ((!frame->to_allocate && frame->nregs <= 1)
5631 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5632 frame->save_regs_using_mov = false;
5633
5634 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5635 && current_function_is_leaf
5636 && !ix86_current_function_calls_tls_descriptor)
5637 {
5638 frame->red_zone_size = frame->to_allocate;
5639 if (frame->save_regs_using_mov)
5640 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5641 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5642 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5643 }
5644 else
5645 frame->red_zone_size = 0;
5646 frame->to_allocate -= frame->red_zone_size;
5647 frame->stack_pointer_offset -= frame->red_zone_size;
5648 #if 0
5649 fprintf (stderr, "\n");
5650 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5651 fprintf (stderr, "size: %ld\n", (long)size);
5652 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5653 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5654 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5655 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5656 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5657 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5658 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5659 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5660 (long)frame->hard_frame_pointer_offset);
5661 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5662 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5663 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5664 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5665 #endif
5666 }
5667
5668 /* Emit code to save registers in the prologue. */
5669
5670 static void
5671 ix86_emit_save_regs (void)
5672 {
5673 unsigned int regno;
5674 rtx insn;
5675
5676 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5677 if (ix86_save_reg (regno, true))
5678 {
5679 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5680 RTX_FRAME_RELATED_P (insn) = 1;
5681 }
5682 }
5683
5684 /* Emit code to save registers using MOV insns. First register
5685 is restored from POINTER + OFFSET. */
5686 static void
5687 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5688 {
5689 unsigned int regno;
5690 rtx insn;
5691
5692 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5693 if (ix86_save_reg (regno, true))
5694 {
5695 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5696 Pmode, offset),
5697 gen_rtx_REG (Pmode, regno));
5698 RTX_FRAME_RELATED_P (insn) = 1;
5699 offset += UNITS_PER_WORD;
5700 }
5701 }
5702
5703 /* Expand prologue or epilogue stack adjustment.
5704 The pattern exist to put a dependency on all ebp-based memory accesses.
5705 STYLE should be negative if instructions should be marked as frame related,
5706 zero if %r11 register is live and cannot be freely used and positive
5707 otherwise. */
5708
5709 static void
5710 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5711 {
5712 rtx insn;
5713
5714 if (! TARGET_64BIT)
5715 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5716 else if (x86_64_immediate_operand (offset, DImode))
5717 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5718 else
5719 {
5720 rtx r11;
5721 /* r11 is used by indirect sibcall return as well, set before the
5722 epilogue and used after the epilogue. ATM indirect sibcall
5723 shouldn't be used together with huge frame sizes in one
5724 function because of the frame_size check in sibcall.c. */
5725 gcc_assert (style);
5726 r11 = gen_rtx_REG (DImode, R11_REG);
5727 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5728 if (style < 0)
5729 RTX_FRAME_RELATED_P (insn) = 1;
5730 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5731 offset));
5732 }
5733 if (style < 0)
5734 RTX_FRAME_RELATED_P (insn) = 1;
5735 }
5736
5737 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5738
5739 static rtx
5740 ix86_internal_arg_pointer (void)
5741 {
5742 bool has_force_align_arg_pointer =
5743 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5744 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5745 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5746 && DECL_NAME (current_function_decl)
5747 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5748 && DECL_FILE_SCOPE_P (current_function_decl))
5749 || ix86_force_align_arg_pointer
5750 || has_force_align_arg_pointer)
5751 {
5752 /* Nested functions can't realign the stack due to a register
5753 conflict. */
5754 if (DECL_CONTEXT (current_function_decl)
5755 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5756 {
5757 if (ix86_force_align_arg_pointer)
5758 warning (0, "-mstackrealign ignored for nested functions");
5759 if (has_force_align_arg_pointer)
5760 error ("%s not supported for nested functions",
5761 ix86_force_align_arg_pointer_string);
5762 return virtual_incoming_args_rtx;
5763 }
5764 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5765 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5766 }
5767 else
5768 return virtual_incoming_args_rtx;
5769 }
5770
5771 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5772 This is called from dwarf2out.c to emit call frame instructions
5773 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5774 static void
5775 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5776 {
5777 rtx unspec = SET_SRC (pattern);
5778 gcc_assert (GET_CODE (unspec) == UNSPEC);
5779
5780 switch (index)
5781 {
5782 case UNSPEC_REG_SAVE:
5783 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5784 SET_DEST (pattern));
5785 break;
5786 case UNSPEC_DEF_CFA:
5787 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5788 INTVAL (XVECEXP (unspec, 0, 0)));
5789 break;
5790 default:
5791 gcc_unreachable ();
5792 }
5793 }
5794
5795 /* Expand the prologue into a bunch of separate insns. */
5796
5797 void
5798 ix86_expand_prologue (void)
5799 {
5800 rtx insn;
5801 bool pic_reg_used;
5802 struct ix86_frame frame;
5803 HOST_WIDE_INT allocate;
5804
5805 ix86_compute_frame_layout (&frame);
5806
5807 if (cfun->machine->force_align_arg_pointer)
5808 {
5809 rtx x, y;
5810
5811 /* Grab the argument pointer. */
5812 x = plus_constant (stack_pointer_rtx, 4);
5813 y = cfun->machine->force_align_arg_pointer;
5814 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5815 RTX_FRAME_RELATED_P (insn) = 1;
5816
5817 /* The unwind info consists of two parts: install the fafp as the cfa,
5818 and record the fafp as the "save register" of the stack pointer.
5819 The later is there in order that the unwinder can see where it
5820 should restore the stack pointer across the and insn. */
5821 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5822 x = gen_rtx_SET (VOIDmode, y, x);
5823 RTX_FRAME_RELATED_P (x) = 1;
5824 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5825 UNSPEC_REG_SAVE);
5826 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5827 RTX_FRAME_RELATED_P (y) = 1;
5828 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5829 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5830 REG_NOTES (insn) = x;
5831
5832 /* Align the stack. */
5833 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5834 GEN_INT (-16)));
5835
5836 /* And here we cheat like madmen with the unwind info. We force the
5837 cfa register back to sp+4, which is exactly what it was at the
5838 start of the function. Re-pushing the return address results in
5839 the return at the same spot relative to the cfa, and thus is
5840 correct wrt the unwind info. */
5841 x = cfun->machine->force_align_arg_pointer;
5842 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5843 insn = emit_insn (gen_push (x));
5844 RTX_FRAME_RELATED_P (insn) = 1;
5845
5846 x = GEN_INT (4);
5847 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5848 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5849 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5850 REG_NOTES (insn) = x;
5851 }
5852
5853 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5854 slower on all targets. Also sdb doesn't like it. */
5855
5856 if (frame_pointer_needed)
5857 {
5858 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5859 RTX_FRAME_RELATED_P (insn) = 1;
5860
5861 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5862 RTX_FRAME_RELATED_P (insn) = 1;
5863 }
5864
5865 allocate = frame.to_allocate;
5866
5867 if (!frame.save_regs_using_mov)
5868 ix86_emit_save_regs ();
5869 else
5870 allocate += frame.nregs * UNITS_PER_WORD;
5871
5872 /* When using red zone we may start register saving before allocating
5873 the stack frame saving one cycle of the prologue. */
5874 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5875 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5876 : stack_pointer_rtx,
5877 -frame.nregs * UNITS_PER_WORD);
5878
5879 if (allocate == 0)
5880 ;
5881 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5882 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5883 GEN_INT (-allocate), -1);
5884 else
5885 {
5886 /* Only valid for Win32. */
5887 rtx eax = gen_rtx_REG (Pmode, 0);
5888 bool eax_live;
5889 rtx t;
5890
5891 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
5892
5893 if (TARGET_64BIT_MS_ABI)
5894 eax_live = false;
5895 else
5896 eax_live = ix86_eax_live_at_start_p ();
5897
5898 if (eax_live)
5899 {
5900 emit_insn (gen_push (eax));
5901 allocate -= UNITS_PER_WORD;
5902 }
5903
5904 emit_move_insn (eax, GEN_INT (allocate));
5905
5906 if (TARGET_64BIT)
5907 insn = gen_allocate_stack_worker_64 (eax);
5908 else
5909 insn = gen_allocate_stack_worker_32 (eax);
5910 insn = emit_insn (insn);
5911 RTX_FRAME_RELATED_P (insn) = 1;
5912 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5913 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5914 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5915 t, REG_NOTES (insn));
5916
5917 if (eax_live)
5918 {
5919 if (frame_pointer_needed)
5920 t = plus_constant (hard_frame_pointer_rtx,
5921 allocate
5922 - frame.to_allocate
5923 - frame.nregs * UNITS_PER_WORD);
5924 else
5925 t = plus_constant (stack_pointer_rtx, allocate);
5926 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
5927 }
5928 }
5929
5930 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5931 {
5932 if (!frame_pointer_needed || !frame.to_allocate)
5933 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5934 else
5935 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5936 -frame.nregs * UNITS_PER_WORD);
5937 }
5938
5939 pic_reg_used = false;
5940 if (pic_offset_table_rtx
5941 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5942 || current_function_profile))
5943 {
5944 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5945
5946 if (alt_pic_reg_used != INVALID_REGNUM)
5947 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5948
5949 pic_reg_used = true;
5950 }
5951
5952 if (pic_reg_used)
5953 {
5954 if (TARGET_64BIT)
5955 {
5956 if (ix86_cmodel == CM_LARGE_PIC)
5957 {
5958 rtx tmp_reg = gen_rtx_REG (DImode,
5959 FIRST_REX_INT_REG + 3 /* R11 */);
5960 rtx label = gen_label_rtx ();
5961 emit_label (label);
5962 LABEL_PRESERVE_P (label) = 1;
5963 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
5964 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
5965 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5966 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
5967 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5968 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
5969 pic_offset_table_rtx, tmp_reg));
5970 }
5971 else
5972 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5973 }
5974 else
5975 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5976
5977 /* Even with accurate pre-reload life analysis, we can wind up
5978 deleting all references to the pic register after reload.
5979 Consider if cross-jumping unifies two sides of a branch
5980 controlled by a comparison vs the only read from a global.
5981 In which case, allow the set_got to be deleted, though we're
5982 too late to do anything about the ebx save in the prologue. */
5983 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5984 }
5985
5986 /* Prevent function calls from be scheduled before the call to mcount.
5987 In the pic_reg_used case, make sure that the got load isn't deleted. */
5988 if (current_function_profile)
5989 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5990 }
5991
5992 /* Emit code to restore saved registers using MOV insns. First register
5993 is restored from POINTER + OFFSET. */
5994 static void
5995 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5996 int maybe_eh_return)
5997 {
5998 int regno;
5999 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6000
6001 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6002 if (ix86_save_reg (regno, maybe_eh_return))
6003 {
6004 /* Ensure that adjust_address won't be forced to produce pointer
6005 out of range allowed by x86-64 instruction set. */
6006 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6007 {
6008 rtx r11;
6009
6010 r11 = gen_rtx_REG (DImode, R11_REG);
6011 emit_move_insn (r11, GEN_INT (offset));
6012 emit_insn (gen_adddi3 (r11, r11, pointer));
6013 base_address = gen_rtx_MEM (Pmode, r11);
6014 offset = 0;
6015 }
6016 emit_move_insn (gen_rtx_REG (Pmode, regno),
6017 adjust_address (base_address, Pmode, offset));
6018 offset += UNITS_PER_WORD;
6019 }
6020 }
6021
6022 /* Restore function stack, frame, and registers. */
6023
6024 void
6025 ix86_expand_epilogue (int style)
6026 {
6027 int regno;
6028 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6029 struct ix86_frame frame;
6030 HOST_WIDE_INT offset;
6031
6032 ix86_compute_frame_layout (&frame);
6033
6034 /* Calculate start of saved registers relative to ebp. Special care
6035 must be taken for the normal return case of a function using
6036 eh_return: the eax and edx registers are marked as saved, but not
6037 restored along this path. */
6038 offset = frame.nregs;
6039 if (current_function_calls_eh_return && style != 2)
6040 offset -= 2;
6041 offset *= -UNITS_PER_WORD;
6042
6043 /* If we're only restoring one register and sp is not valid then
6044 using a move instruction to restore the register since it's
6045 less work than reloading sp and popping the register.
6046
6047 The default code result in stack adjustment using add/lea instruction,
6048 while this code results in LEAVE instruction (or discrete equivalent),
6049 so it is profitable in some other cases as well. Especially when there
6050 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6051 and there is exactly one register to pop. This heuristic may need some
6052 tuning in future. */
6053 if ((!sp_valid && frame.nregs <= 1)
6054 || (TARGET_EPILOGUE_USING_MOVE
6055 && cfun->machine->use_fast_prologue_epilogue
6056 && (frame.nregs > 1 || frame.to_allocate))
6057 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6058 || (frame_pointer_needed && TARGET_USE_LEAVE
6059 && cfun->machine->use_fast_prologue_epilogue
6060 && frame.nregs == 1)
6061 || current_function_calls_eh_return)
6062 {
6063 /* Restore registers. We can use ebp or esp to address the memory
6064 locations. If both are available, default to ebp, since offsets
6065 are known to be small. Only exception is esp pointing directly to the
6066 end of block of saved registers, where we may simplify addressing
6067 mode. */
6068
6069 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6070 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6071 frame.to_allocate, style == 2);
6072 else
6073 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6074 offset, style == 2);
6075
6076 /* eh_return epilogues need %ecx added to the stack pointer. */
6077 if (style == 2)
6078 {
6079 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6080
6081 if (frame_pointer_needed)
6082 {
6083 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6084 tmp = plus_constant (tmp, UNITS_PER_WORD);
6085 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6086
6087 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6088 emit_move_insn (hard_frame_pointer_rtx, tmp);
6089
6090 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6091 const0_rtx, style);
6092 }
6093 else
6094 {
6095 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6096 tmp = plus_constant (tmp, (frame.to_allocate
6097 + frame.nregs * UNITS_PER_WORD));
6098 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6099 }
6100 }
6101 else if (!frame_pointer_needed)
6102 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6103 GEN_INT (frame.to_allocate
6104 + frame.nregs * UNITS_PER_WORD),
6105 style);
6106 /* If not an i386, mov & pop is faster than "leave". */
6107 else if (TARGET_USE_LEAVE || optimize_size
6108 || !cfun->machine->use_fast_prologue_epilogue)
6109 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6110 else
6111 {
6112 pro_epilogue_adjust_stack (stack_pointer_rtx,
6113 hard_frame_pointer_rtx,
6114 const0_rtx, style);
6115 if (TARGET_64BIT)
6116 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6117 else
6118 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6119 }
6120 }
6121 else
6122 {
6123 /* First step is to deallocate the stack frame so that we can
6124 pop the registers. */
6125 if (!sp_valid)
6126 {
6127 gcc_assert (frame_pointer_needed);
6128 pro_epilogue_adjust_stack (stack_pointer_rtx,
6129 hard_frame_pointer_rtx,
6130 GEN_INT (offset), style);
6131 }
6132 else if (frame.to_allocate)
6133 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6134 GEN_INT (frame.to_allocate), style);
6135
6136 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6137 if (ix86_save_reg (regno, false))
6138 {
6139 if (TARGET_64BIT)
6140 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6141 else
6142 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6143 }
6144 if (frame_pointer_needed)
6145 {
6146 /* Leave results in shorter dependency chains on CPUs that are
6147 able to grok it fast. */
6148 if (TARGET_USE_LEAVE)
6149 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6150 else if (TARGET_64BIT)
6151 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6152 else
6153 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6154 }
6155 }
6156
6157 if (cfun->machine->force_align_arg_pointer)
6158 {
6159 emit_insn (gen_addsi3 (stack_pointer_rtx,
6160 cfun->machine->force_align_arg_pointer,
6161 GEN_INT (-4)));
6162 }
6163
6164 /* Sibcall epilogues don't want a return instruction. */
6165 if (style == 0)
6166 return;
6167
6168 if (current_function_pops_args && current_function_args_size)
6169 {
6170 rtx popc = GEN_INT (current_function_pops_args);
6171
6172 /* i386 can only pop 64K bytes. If asked to pop more, pop
6173 return address, do explicit add, and jump indirectly to the
6174 caller. */
6175
6176 if (current_function_pops_args >= 65536)
6177 {
6178 rtx ecx = gen_rtx_REG (SImode, 2);
6179
6180 /* There is no "pascal" calling convention in any 64bit ABI. */
6181 gcc_assert (!TARGET_64BIT);
6182
6183 emit_insn (gen_popsi1 (ecx));
6184 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6185 emit_jump_insn (gen_return_indirect_internal (ecx));
6186 }
6187 else
6188 emit_jump_insn (gen_return_pop_internal (popc));
6189 }
6190 else
6191 emit_jump_insn (gen_return_internal ());
6192 }
6193
6194 /* Reset from the function's potential modifications. */
6195
6196 static void
6197 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6198 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6199 {
6200 if (pic_offset_table_rtx)
6201 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6202 #if TARGET_MACHO
6203 /* Mach-O doesn't support labels at the end of objects, so if
6204 it looks like we might want one, insert a NOP. */
6205 {
6206 rtx insn = get_last_insn ();
6207 while (insn
6208 && NOTE_P (insn)
6209 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6210 insn = PREV_INSN (insn);
6211 if (insn
6212 && (LABEL_P (insn)
6213 || (NOTE_P (insn)
6214 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6215 fputs ("\tnop\n", file);
6216 }
6217 #endif
6218
6219 }
6220 \f
6221 /* Extract the parts of an RTL expression that is a valid memory address
6222 for an instruction. Return 0 if the structure of the address is
6223 grossly off. Return -1 if the address contains ASHIFT, so it is not
6224 strictly valid, but still used for computing length of lea instruction. */
6225
6226 int
6227 ix86_decompose_address (rtx addr, struct ix86_address *out)
6228 {
6229 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6230 rtx base_reg, index_reg;
6231 HOST_WIDE_INT scale = 1;
6232 rtx scale_rtx = NULL_RTX;
6233 int retval = 1;
6234 enum ix86_address_seg seg = SEG_DEFAULT;
6235
6236 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6237 base = addr;
6238 else if (GET_CODE (addr) == PLUS)
6239 {
6240 rtx addends[4], op;
6241 int n = 0, i;
6242
6243 op = addr;
6244 do
6245 {
6246 if (n >= 4)
6247 return 0;
6248 addends[n++] = XEXP (op, 1);
6249 op = XEXP (op, 0);
6250 }
6251 while (GET_CODE (op) == PLUS);
6252 if (n >= 4)
6253 return 0;
6254 addends[n] = op;
6255
6256 for (i = n; i >= 0; --i)
6257 {
6258 op = addends[i];
6259 switch (GET_CODE (op))
6260 {
6261 case MULT:
6262 if (index)
6263 return 0;
6264 index = XEXP (op, 0);
6265 scale_rtx = XEXP (op, 1);
6266 break;
6267
6268 case UNSPEC:
6269 if (XINT (op, 1) == UNSPEC_TP
6270 && TARGET_TLS_DIRECT_SEG_REFS
6271 && seg == SEG_DEFAULT)
6272 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6273 else
6274 return 0;
6275 break;
6276
6277 case REG:
6278 case SUBREG:
6279 if (!base)
6280 base = op;
6281 else if (!index)
6282 index = op;
6283 else
6284 return 0;
6285 break;
6286
6287 case CONST:
6288 case CONST_INT:
6289 case SYMBOL_REF:
6290 case LABEL_REF:
6291 if (disp)
6292 return 0;
6293 disp = op;
6294 break;
6295
6296 default:
6297 return 0;
6298 }
6299 }
6300 }
6301 else if (GET_CODE (addr) == MULT)
6302 {
6303 index = XEXP (addr, 0); /* index*scale */
6304 scale_rtx = XEXP (addr, 1);
6305 }
6306 else if (GET_CODE (addr) == ASHIFT)
6307 {
6308 rtx tmp;
6309
6310 /* We're called for lea too, which implements ashift on occasion. */
6311 index = XEXP (addr, 0);
6312 tmp = XEXP (addr, 1);
6313 if (!CONST_INT_P (tmp))
6314 return 0;
6315 scale = INTVAL (tmp);
6316 if ((unsigned HOST_WIDE_INT) scale > 3)
6317 return 0;
6318 scale = 1 << scale;
6319 retval = -1;
6320 }
6321 else
6322 disp = addr; /* displacement */
6323
6324 /* Extract the integral value of scale. */
6325 if (scale_rtx)
6326 {
6327 if (!CONST_INT_P (scale_rtx))
6328 return 0;
6329 scale = INTVAL (scale_rtx);
6330 }
6331
6332 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6333 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6334
6335 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6336 if (base_reg && index_reg && scale == 1
6337 && (index_reg == arg_pointer_rtx
6338 || index_reg == frame_pointer_rtx
6339 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6340 {
6341 rtx tmp;
6342 tmp = base, base = index, index = tmp;
6343 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6344 }
6345
6346 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6347 if ((base_reg == hard_frame_pointer_rtx
6348 || base_reg == frame_pointer_rtx
6349 || base_reg == arg_pointer_rtx) && !disp)
6350 disp = const0_rtx;
6351
6352 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6353 Avoid this by transforming to [%esi+0]. */
6354 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6355 && base_reg && !index_reg && !disp
6356 && REG_P (base_reg)
6357 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6358 disp = const0_rtx;
6359
6360 /* Special case: encode reg+reg instead of reg*2. */
6361 if (!base && index && scale && scale == 2)
6362 base = index, base_reg = index_reg, scale = 1;
6363
6364 /* Special case: scaling cannot be encoded without base or displacement. */
6365 if (!base && !disp && index && scale != 1)
6366 disp = const0_rtx;
6367
6368 out->base = base;
6369 out->index = index;
6370 out->disp = disp;
6371 out->scale = scale;
6372 out->seg = seg;
6373
6374 return retval;
6375 }
6376 \f
6377 /* Return cost of the memory address x.
6378 For i386, it is better to use a complex address than let gcc copy
6379 the address into a reg and make a new pseudo. But not if the address
6380 requires to two regs - that would mean more pseudos with longer
6381 lifetimes. */
6382 static int
6383 ix86_address_cost (rtx x)
6384 {
6385 struct ix86_address parts;
6386 int cost = 1;
6387 int ok = ix86_decompose_address (x, &parts);
6388
6389 gcc_assert (ok);
6390
6391 if (parts.base && GET_CODE (parts.base) == SUBREG)
6392 parts.base = SUBREG_REG (parts.base);
6393 if (parts.index && GET_CODE (parts.index) == SUBREG)
6394 parts.index = SUBREG_REG (parts.index);
6395
6396 /* More complex memory references are better. */
6397 if (parts.disp && parts.disp != const0_rtx)
6398 cost--;
6399 if (parts.seg != SEG_DEFAULT)
6400 cost--;
6401
6402 /* Attempt to minimize number of registers in the address. */
6403 if ((parts.base
6404 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6405 || (parts.index
6406 && (!REG_P (parts.index)
6407 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6408 cost++;
6409
6410 if (parts.base
6411 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6412 && parts.index
6413 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6414 && parts.base != parts.index)
6415 cost++;
6416
6417 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6418 since it's predecode logic can't detect the length of instructions
6419 and it degenerates to vector decoded. Increase cost of such
6420 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6421 to split such addresses or even refuse such addresses at all.
6422
6423 Following addressing modes are affected:
6424 [base+scale*index]
6425 [scale*index+disp]
6426 [base+index]
6427
6428 The first and last case may be avoidable by explicitly coding the zero in
6429 memory address, but I don't have AMD-K6 machine handy to check this
6430 theory. */
6431
6432 if (TARGET_K6
6433 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6434 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6435 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6436 cost += 10;
6437
6438 return cost;
6439 }
6440 \f
6441 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6442 this is used for to form addresses to local data when -fPIC is in
6443 use. */
6444
6445 static bool
6446 darwin_local_data_pic (rtx disp)
6447 {
6448 if (GET_CODE (disp) == MINUS)
6449 {
6450 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6451 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6452 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6453 {
6454 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6455 if (! strcmp (sym_name, "<pic base>"))
6456 return true;
6457 }
6458 }
6459
6460 return false;
6461 }
6462
6463 /* Determine if a given RTX is a valid constant. We already know this
6464 satisfies CONSTANT_P. */
6465
6466 bool
6467 legitimate_constant_p (rtx x)
6468 {
6469 switch (GET_CODE (x))
6470 {
6471 case CONST:
6472 x = XEXP (x, 0);
6473
6474 if (GET_CODE (x) == PLUS)
6475 {
6476 if (!CONST_INT_P (XEXP (x, 1)))
6477 return false;
6478 x = XEXP (x, 0);
6479 }
6480
6481 if (TARGET_MACHO && darwin_local_data_pic (x))
6482 return true;
6483
6484 /* Only some unspecs are valid as "constants". */
6485 if (GET_CODE (x) == UNSPEC)
6486 switch (XINT (x, 1))
6487 {
6488 case UNSPEC_GOT:
6489 case UNSPEC_GOTOFF:
6490 case UNSPEC_PLTOFF:
6491 return TARGET_64BIT;
6492 case UNSPEC_TPOFF:
6493 case UNSPEC_NTPOFF:
6494 x = XVECEXP (x, 0, 0);
6495 return (GET_CODE (x) == SYMBOL_REF
6496 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6497 case UNSPEC_DTPOFF:
6498 x = XVECEXP (x, 0, 0);
6499 return (GET_CODE (x) == SYMBOL_REF
6500 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6501 default:
6502 return false;
6503 }
6504
6505 /* We must have drilled down to a symbol. */
6506 if (GET_CODE (x) == LABEL_REF)
6507 return true;
6508 if (GET_CODE (x) != SYMBOL_REF)
6509 return false;
6510 /* FALLTHRU */
6511
6512 case SYMBOL_REF:
6513 /* TLS symbols are never valid. */
6514 if (SYMBOL_REF_TLS_MODEL (x))
6515 return false;
6516
6517 /* DLLIMPORT symbols are never valid. */
6518 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6519 && SYMBOL_REF_DLLIMPORT_P (x))
6520 return false;
6521 break;
6522
6523 case CONST_DOUBLE:
6524 if (GET_MODE (x) == TImode
6525 && x != CONST0_RTX (TImode)
6526 && !TARGET_64BIT)
6527 return false;
6528 break;
6529
6530 case CONST_VECTOR:
6531 if (x == CONST0_RTX (GET_MODE (x)))
6532 return true;
6533 return false;
6534
6535 default:
6536 break;
6537 }
6538
6539 /* Otherwise we handle everything else in the move patterns. */
6540 return true;
6541 }
6542
6543 /* Determine if it's legal to put X into the constant pool. This
6544 is not possible for the address of thread-local symbols, which
6545 is checked above. */
6546
6547 static bool
6548 ix86_cannot_force_const_mem (rtx x)
6549 {
6550 /* We can always put integral constants and vectors in memory. */
6551 switch (GET_CODE (x))
6552 {
6553 case CONST_INT:
6554 case CONST_DOUBLE:
6555 case CONST_VECTOR:
6556 return false;
6557
6558 default:
6559 break;
6560 }
6561 return !legitimate_constant_p (x);
6562 }
6563
6564 /* Determine if a given RTX is a valid constant address. */
6565
6566 bool
6567 constant_address_p (rtx x)
6568 {
6569 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6570 }
6571
6572 /* Nonzero if the constant value X is a legitimate general operand
6573 when generating PIC code. It is given that flag_pic is on and
6574 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6575
6576 bool
6577 legitimate_pic_operand_p (rtx x)
6578 {
6579 rtx inner;
6580
6581 switch (GET_CODE (x))
6582 {
6583 case CONST:
6584 inner = XEXP (x, 0);
6585 if (GET_CODE (inner) == PLUS
6586 && CONST_INT_P (XEXP (inner, 1)))
6587 inner = XEXP (inner, 0);
6588
6589 /* Only some unspecs are valid as "constants". */
6590 if (GET_CODE (inner) == UNSPEC)
6591 switch (XINT (inner, 1))
6592 {
6593 case UNSPEC_GOT:
6594 case UNSPEC_GOTOFF:
6595 case UNSPEC_PLTOFF:
6596 return TARGET_64BIT;
6597 case UNSPEC_TPOFF:
6598 x = XVECEXP (inner, 0, 0);
6599 return (GET_CODE (x) == SYMBOL_REF
6600 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6601 default:
6602 return false;
6603 }
6604 /* FALLTHRU */
6605
6606 case SYMBOL_REF:
6607 case LABEL_REF:
6608 return legitimate_pic_address_disp_p (x);
6609
6610 default:
6611 return true;
6612 }
6613 }
6614
6615 /* Determine if a given CONST RTX is a valid memory displacement
6616 in PIC mode. */
6617
6618 int
6619 legitimate_pic_address_disp_p (rtx disp)
6620 {
6621 bool saw_plus;
6622
6623 /* In 64bit mode we can allow direct addresses of symbols and labels
6624 when they are not dynamic symbols. */
6625 if (TARGET_64BIT)
6626 {
6627 rtx op0 = disp, op1;
6628
6629 switch (GET_CODE (disp))
6630 {
6631 case LABEL_REF:
6632 return true;
6633
6634 case CONST:
6635 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6636 break;
6637 op0 = XEXP (XEXP (disp, 0), 0);
6638 op1 = XEXP (XEXP (disp, 0), 1);
6639 if (!CONST_INT_P (op1)
6640 || INTVAL (op1) >= 16*1024*1024
6641 || INTVAL (op1) < -16*1024*1024)
6642 break;
6643 if (GET_CODE (op0) == LABEL_REF)
6644 return true;
6645 if (GET_CODE (op0) != SYMBOL_REF)
6646 break;
6647 /* FALLTHRU */
6648
6649 case SYMBOL_REF:
6650 /* TLS references should always be enclosed in UNSPEC. */
6651 if (SYMBOL_REF_TLS_MODEL (op0))
6652 return false;
6653 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6654 && ix86_cmodel != CM_LARGE_PIC)
6655 return true;
6656 break;
6657
6658 default:
6659 break;
6660 }
6661 }
6662 if (GET_CODE (disp) != CONST)
6663 return 0;
6664 disp = XEXP (disp, 0);
6665
6666 if (TARGET_64BIT)
6667 {
6668 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6669 of GOT tables. We should not need these anyway. */
6670 if (GET_CODE (disp) != UNSPEC
6671 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6672 && XINT (disp, 1) != UNSPEC_GOTOFF
6673 && XINT (disp, 1) != UNSPEC_PLTOFF))
6674 return 0;
6675
6676 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6677 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6678 return 0;
6679 return 1;
6680 }
6681
6682 saw_plus = false;
6683 if (GET_CODE (disp) == PLUS)
6684 {
6685 if (!CONST_INT_P (XEXP (disp, 1)))
6686 return 0;
6687 disp = XEXP (disp, 0);
6688 saw_plus = true;
6689 }
6690
6691 if (TARGET_MACHO && darwin_local_data_pic (disp))
6692 return 1;
6693
6694 if (GET_CODE (disp) != UNSPEC)
6695 return 0;
6696
6697 switch (XINT (disp, 1))
6698 {
6699 case UNSPEC_GOT:
6700 if (saw_plus)
6701 return false;
6702 /* We need to check for both symbols and labels because VxWorks loads
6703 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6704 details. */
6705 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6706 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6707 case UNSPEC_GOTOFF:
6708 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6709 While ABI specify also 32bit relocation but we don't produce it in
6710 small PIC model at all. */
6711 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6712 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6713 && !TARGET_64BIT)
6714 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6715 return false;
6716 case UNSPEC_GOTTPOFF:
6717 case UNSPEC_GOTNTPOFF:
6718 case UNSPEC_INDNTPOFF:
6719 if (saw_plus)
6720 return false;
6721 disp = XVECEXP (disp, 0, 0);
6722 return (GET_CODE (disp) == SYMBOL_REF
6723 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6724 case UNSPEC_NTPOFF:
6725 disp = XVECEXP (disp, 0, 0);
6726 return (GET_CODE (disp) == SYMBOL_REF
6727 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6728 case UNSPEC_DTPOFF:
6729 disp = XVECEXP (disp, 0, 0);
6730 return (GET_CODE (disp) == SYMBOL_REF
6731 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6732 }
6733
6734 return 0;
6735 }
6736
6737 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6738 memory address for an instruction. The MODE argument is the machine mode
6739 for the MEM expression that wants to use this address.
6740
6741 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6742 convert common non-canonical forms to canonical form so that they will
6743 be recognized. */
6744
6745 int
6746 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6747 rtx addr, int strict)
6748 {
6749 struct ix86_address parts;
6750 rtx base, index, disp;
6751 HOST_WIDE_INT scale;
6752 const char *reason = NULL;
6753 rtx reason_rtx = NULL_RTX;
6754
6755 if (ix86_decompose_address (addr, &parts) <= 0)
6756 {
6757 reason = "decomposition failed";
6758 goto report_error;
6759 }
6760
6761 base = parts.base;
6762 index = parts.index;
6763 disp = parts.disp;
6764 scale = parts.scale;
6765
6766 /* Validate base register.
6767
6768 Don't allow SUBREG's that span more than a word here. It can lead to spill
6769 failures when the base is one word out of a two word structure, which is
6770 represented internally as a DImode int. */
6771
6772 if (base)
6773 {
6774 rtx reg;
6775 reason_rtx = base;
6776
6777 if (REG_P (base))
6778 reg = base;
6779 else if (GET_CODE (base) == SUBREG
6780 && REG_P (SUBREG_REG (base))
6781 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6782 <= UNITS_PER_WORD)
6783 reg = SUBREG_REG (base);
6784 else
6785 {
6786 reason = "base is not a register";
6787 goto report_error;
6788 }
6789
6790 if (GET_MODE (base) != Pmode)
6791 {
6792 reason = "base is not in Pmode";
6793 goto report_error;
6794 }
6795
6796 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6797 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6798 {
6799 reason = "base is not valid";
6800 goto report_error;
6801 }
6802 }
6803
6804 /* Validate index register.
6805
6806 Don't allow SUBREG's that span more than a word here -- same as above. */
6807
6808 if (index)
6809 {
6810 rtx reg;
6811 reason_rtx = index;
6812
6813 if (REG_P (index))
6814 reg = index;
6815 else if (GET_CODE (index) == SUBREG
6816 && REG_P (SUBREG_REG (index))
6817 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6818 <= UNITS_PER_WORD)
6819 reg = SUBREG_REG (index);
6820 else
6821 {
6822 reason = "index is not a register";
6823 goto report_error;
6824 }
6825
6826 if (GET_MODE (index) != Pmode)
6827 {
6828 reason = "index is not in Pmode";
6829 goto report_error;
6830 }
6831
6832 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6833 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6834 {
6835 reason = "index is not valid";
6836 goto report_error;
6837 }
6838 }
6839
6840 /* Validate scale factor. */
6841 if (scale != 1)
6842 {
6843 reason_rtx = GEN_INT (scale);
6844 if (!index)
6845 {
6846 reason = "scale without index";
6847 goto report_error;
6848 }
6849
6850 if (scale != 2 && scale != 4 && scale != 8)
6851 {
6852 reason = "scale is not a valid multiplier";
6853 goto report_error;
6854 }
6855 }
6856
6857 /* Validate displacement. */
6858 if (disp)
6859 {
6860 reason_rtx = disp;
6861
6862 if (GET_CODE (disp) == CONST
6863 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6864 switch (XINT (XEXP (disp, 0), 1))
6865 {
6866 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6867 used. While ABI specify also 32bit relocations, we don't produce
6868 them at all and use IP relative instead. */
6869 case UNSPEC_GOT:
6870 case UNSPEC_GOTOFF:
6871 gcc_assert (flag_pic);
6872 if (!TARGET_64BIT)
6873 goto is_legitimate_pic;
6874 reason = "64bit address unspec";
6875 goto report_error;
6876
6877 case UNSPEC_GOTPCREL:
6878 gcc_assert (flag_pic);
6879 goto is_legitimate_pic;
6880
6881 case UNSPEC_GOTTPOFF:
6882 case UNSPEC_GOTNTPOFF:
6883 case UNSPEC_INDNTPOFF:
6884 case UNSPEC_NTPOFF:
6885 case UNSPEC_DTPOFF:
6886 break;
6887
6888 default:
6889 reason = "invalid address unspec";
6890 goto report_error;
6891 }
6892
6893 else if (SYMBOLIC_CONST (disp)
6894 && (flag_pic
6895 || (TARGET_MACHO
6896 #if TARGET_MACHO
6897 && MACHOPIC_INDIRECT
6898 && !machopic_operand_p (disp)
6899 #endif
6900 )))
6901 {
6902
6903 is_legitimate_pic:
6904 if (TARGET_64BIT && (index || base))
6905 {
6906 /* foo@dtpoff(%rX) is ok. */
6907 if (GET_CODE (disp) != CONST
6908 || GET_CODE (XEXP (disp, 0)) != PLUS
6909 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6910 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6911 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6912 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6913 {
6914 reason = "non-constant pic memory reference";
6915 goto report_error;
6916 }
6917 }
6918 else if (! legitimate_pic_address_disp_p (disp))
6919 {
6920 reason = "displacement is an invalid pic construct";
6921 goto report_error;
6922 }
6923
6924 /* This code used to verify that a symbolic pic displacement
6925 includes the pic_offset_table_rtx register.
6926
6927 While this is good idea, unfortunately these constructs may
6928 be created by "adds using lea" optimization for incorrect
6929 code like:
6930
6931 int a;
6932 int foo(int i)
6933 {
6934 return *(&a+i);
6935 }
6936
6937 This code is nonsensical, but results in addressing
6938 GOT table with pic_offset_table_rtx base. We can't
6939 just refuse it easily, since it gets matched by
6940 "addsi3" pattern, that later gets split to lea in the
6941 case output register differs from input. While this
6942 can be handled by separate addsi pattern for this case
6943 that never results in lea, this seems to be easier and
6944 correct fix for crash to disable this test. */
6945 }
6946 else if (GET_CODE (disp) != LABEL_REF
6947 && !CONST_INT_P (disp)
6948 && (GET_CODE (disp) != CONST
6949 || !legitimate_constant_p (disp))
6950 && (GET_CODE (disp) != SYMBOL_REF
6951 || !legitimate_constant_p (disp)))
6952 {
6953 reason = "displacement is not constant";
6954 goto report_error;
6955 }
6956 else if (TARGET_64BIT
6957 && !x86_64_immediate_operand (disp, VOIDmode))
6958 {
6959 reason = "displacement is out of range";
6960 goto report_error;
6961 }
6962 }
6963
6964 /* Everything looks valid. */
6965 return TRUE;
6966
6967 report_error:
6968 return FALSE;
6969 }
6970 \f
6971 /* Return a unique alias set for the GOT. */
6972
6973 static HOST_WIDE_INT
6974 ix86_GOT_alias_set (void)
6975 {
6976 static HOST_WIDE_INT set = -1;
6977 if (set == -1)
6978 set = new_alias_set ();
6979 return set;
6980 }
6981
6982 /* Return a legitimate reference for ORIG (an address) using the
6983 register REG. If REG is 0, a new pseudo is generated.
6984
6985 There are two types of references that must be handled:
6986
6987 1. Global data references must load the address from the GOT, via
6988 the PIC reg. An insn is emitted to do this load, and the reg is
6989 returned.
6990
6991 2. Static data references, constant pool addresses, and code labels
6992 compute the address as an offset from the GOT, whose base is in
6993 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6994 differentiate them from global data objects. The returned
6995 address is the PIC reg + an unspec constant.
6996
6997 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6998 reg also appears in the address. */
6999
7000 static rtx
7001 legitimize_pic_address (rtx orig, rtx reg)
7002 {
7003 rtx addr = orig;
7004 rtx new = orig;
7005 rtx base;
7006
7007 #if TARGET_MACHO
7008 if (TARGET_MACHO && !TARGET_64BIT)
7009 {
7010 if (reg == 0)
7011 reg = gen_reg_rtx (Pmode);
7012 /* Use the generic Mach-O PIC machinery. */
7013 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7014 }
7015 #endif
7016
7017 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7018 new = addr;
7019 else if (TARGET_64BIT
7020 && ix86_cmodel != CM_SMALL_PIC
7021 && gotoff_operand (addr, Pmode))
7022 {
7023 rtx tmpreg;
7024 /* This symbol may be referenced via a displacement from the PIC
7025 base address (@GOTOFF). */
7026
7027 if (reload_in_progress)
7028 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7029 if (GET_CODE (addr) == CONST)
7030 addr = XEXP (addr, 0);
7031 if (GET_CODE (addr) == PLUS)
7032 {
7033 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7034 UNSPEC_GOTOFF);
7035 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7036 }
7037 else
7038 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7039 new = gen_rtx_CONST (Pmode, new);
7040 if (!reg)
7041 tmpreg = gen_reg_rtx (Pmode);
7042 else
7043 tmpreg = reg;
7044 emit_move_insn (tmpreg, new);
7045
7046 if (reg != 0)
7047 {
7048 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7049 tmpreg, 1, OPTAB_DIRECT);
7050 new = reg;
7051 }
7052 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7053 }
7054 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7055 {
7056 /* This symbol may be referenced via a displacement from the PIC
7057 base address (@GOTOFF). */
7058
7059 if (reload_in_progress)
7060 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7061 if (GET_CODE (addr) == CONST)
7062 addr = XEXP (addr, 0);
7063 if (GET_CODE (addr) == PLUS)
7064 {
7065 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7066 UNSPEC_GOTOFF);
7067 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7068 }
7069 else
7070 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7071 new = gen_rtx_CONST (Pmode, new);
7072 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7073
7074 if (reg != 0)
7075 {
7076 emit_move_insn (reg, new);
7077 new = reg;
7078 }
7079 }
7080 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7081 /* We can't use @GOTOFF for text labels on VxWorks;
7082 see gotoff_operand. */
7083 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7084 {
7085 /* Given that we've already handled dllimport variables separately
7086 in legitimize_address, and all other variables should satisfy
7087 legitimate_pic_address_disp_p, we should never arrive here. */
7088 gcc_assert (!TARGET_64BIT_MS_ABI);
7089
7090 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7091 {
7092 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7093 new = gen_rtx_CONST (Pmode, new);
7094 new = gen_const_mem (Pmode, new);
7095 set_mem_alias_set (new, ix86_GOT_alias_set ());
7096
7097 if (reg == 0)
7098 reg = gen_reg_rtx (Pmode);
7099 /* Use directly gen_movsi, otherwise the address is loaded
7100 into register for CSE. We don't want to CSE this addresses,
7101 instead we CSE addresses from the GOT table, so skip this. */
7102 emit_insn (gen_movsi (reg, new));
7103 new = reg;
7104 }
7105 else
7106 {
7107 /* This symbol must be referenced via a load from the
7108 Global Offset Table (@GOT). */
7109
7110 if (reload_in_progress)
7111 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7112 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7113 new = gen_rtx_CONST (Pmode, new);
7114 if (TARGET_64BIT)
7115 new = force_reg (Pmode, new);
7116 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7117 new = gen_const_mem (Pmode, new);
7118 set_mem_alias_set (new, ix86_GOT_alias_set ());
7119
7120 if (reg == 0)
7121 reg = gen_reg_rtx (Pmode);
7122 emit_move_insn (reg, new);
7123 new = reg;
7124 }
7125 }
7126 else
7127 {
7128 if (CONST_INT_P (addr)
7129 && !x86_64_immediate_operand (addr, VOIDmode))
7130 {
7131 if (reg)
7132 {
7133 emit_move_insn (reg, addr);
7134 new = reg;
7135 }
7136 else
7137 new = force_reg (Pmode, addr);
7138 }
7139 else if (GET_CODE (addr) == CONST)
7140 {
7141 addr = XEXP (addr, 0);
7142
7143 /* We must match stuff we generate before. Assume the only
7144 unspecs that can get here are ours. Not that we could do
7145 anything with them anyway.... */
7146 if (GET_CODE (addr) == UNSPEC
7147 || (GET_CODE (addr) == PLUS
7148 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7149 return orig;
7150 gcc_assert (GET_CODE (addr) == PLUS);
7151 }
7152 if (GET_CODE (addr) == PLUS)
7153 {
7154 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7155
7156 /* Check first to see if this is a constant offset from a @GOTOFF
7157 symbol reference. */
7158 if (gotoff_operand (op0, Pmode)
7159 && CONST_INT_P (op1))
7160 {
7161 if (!TARGET_64BIT)
7162 {
7163 if (reload_in_progress)
7164 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7165 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7166 UNSPEC_GOTOFF);
7167 new = gen_rtx_PLUS (Pmode, new, op1);
7168 new = gen_rtx_CONST (Pmode, new);
7169 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7170
7171 if (reg != 0)
7172 {
7173 emit_move_insn (reg, new);
7174 new = reg;
7175 }
7176 }
7177 else
7178 {
7179 if (INTVAL (op1) < -16*1024*1024
7180 || INTVAL (op1) >= 16*1024*1024)
7181 {
7182 if (!x86_64_immediate_operand (op1, Pmode))
7183 op1 = force_reg (Pmode, op1);
7184 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7185 }
7186 }
7187 }
7188 else
7189 {
7190 base = legitimize_pic_address (XEXP (addr, 0), reg);
7191 new = legitimize_pic_address (XEXP (addr, 1),
7192 base == reg ? NULL_RTX : reg);
7193
7194 if (CONST_INT_P (new))
7195 new = plus_constant (base, INTVAL (new));
7196 else
7197 {
7198 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7199 {
7200 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7201 new = XEXP (new, 1);
7202 }
7203 new = gen_rtx_PLUS (Pmode, base, new);
7204 }
7205 }
7206 }
7207 }
7208 return new;
7209 }
7210 \f
7211 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7212
7213 static rtx
7214 get_thread_pointer (int to_reg)
7215 {
7216 rtx tp, reg, insn;
7217
7218 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7219 if (!to_reg)
7220 return tp;
7221
7222 reg = gen_reg_rtx (Pmode);
7223 insn = gen_rtx_SET (VOIDmode, reg, tp);
7224 insn = emit_insn (insn);
7225
7226 return reg;
7227 }
7228
7229 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7230 false if we expect this to be used for a memory address and true if
7231 we expect to load the address into a register. */
7232
7233 static rtx
7234 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7235 {
7236 rtx dest, base, off, pic, tp;
7237 int type;
7238
7239 switch (model)
7240 {
7241 case TLS_MODEL_GLOBAL_DYNAMIC:
7242 dest = gen_reg_rtx (Pmode);
7243 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7244
7245 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7246 {
7247 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7248
7249 start_sequence ();
7250 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7251 insns = get_insns ();
7252 end_sequence ();
7253
7254 emit_libcall_block (insns, dest, rax, x);
7255 }
7256 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7257 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7258 else
7259 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7260
7261 if (TARGET_GNU2_TLS)
7262 {
7263 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7264
7265 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7266 }
7267 break;
7268
7269 case TLS_MODEL_LOCAL_DYNAMIC:
7270 base = gen_reg_rtx (Pmode);
7271 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7272
7273 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7274 {
7275 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7276
7277 start_sequence ();
7278 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7279 insns = get_insns ();
7280 end_sequence ();
7281
7282 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7283 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7284 emit_libcall_block (insns, base, rax, note);
7285 }
7286 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7287 emit_insn (gen_tls_local_dynamic_base_64 (base));
7288 else
7289 emit_insn (gen_tls_local_dynamic_base_32 (base));
7290
7291 if (TARGET_GNU2_TLS)
7292 {
7293 rtx x = ix86_tls_module_base ();
7294
7295 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7296 gen_rtx_MINUS (Pmode, x, tp));
7297 }
7298
7299 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7300 off = gen_rtx_CONST (Pmode, off);
7301
7302 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7303
7304 if (TARGET_GNU2_TLS)
7305 {
7306 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7307
7308 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7309 }
7310
7311 break;
7312
7313 case TLS_MODEL_INITIAL_EXEC:
7314 if (TARGET_64BIT)
7315 {
7316 pic = NULL;
7317 type = UNSPEC_GOTNTPOFF;
7318 }
7319 else if (flag_pic)
7320 {
7321 if (reload_in_progress)
7322 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7323 pic = pic_offset_table_rtx;
7324 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7325 }
7326 else if (!TARGET_ANY_GNU_TLS)
7327 {
7328 pic = gen_reg_rtx (Pmode);
7329 emit_insn (gen_set_got (pic));
7330 type = UNSPEC_GOTTPOFF;
7331 }
7332 else
7333 {
7334 pic = NULL;
7335 type = UNSPEC_INDNTPOFF;
7336 }
7337
7338 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7339 off = gen_rtx_CONST (Pmode, off);
7340 if (pic)
7341 off = gen_rtx_PLUS (Pmode, pic, off);
7342 off = gen_const_mem (Pmode, off);
7343 set_mem_alias_set (off, ix86_GOT_alias_set ());
7344
7345 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7346 {
7347 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7348 off = force_reg (Pmode, off);
7349 return gen_rtx_PLUS (Pmode, base, off);
7350 }
7351 else
7352 {
7353 base = get_thread_pointer (true);
7354 dest = gen_reg_rtx (Pmode);
7355 emit_insn (gen_subsi3 (dest, base, off));
7356 }
7357 break;
7358
7359 case TLS_MODEL_LOCAL_EXEC:
7360 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7361 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7362 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7363 off = gen_rtx_CONST (Pmode, off);
7364
7365 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7366 {
7367 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7368 return gen_rtx_PLUS (Pmode, base, off);
7369 }
7370 else
7371 {
7372 base = get_thread_pointer (true);
7373 dest = gen_reg_rtx (Pmode);
7374 emit_insn (gen_subsi3 (dest, base, off));
7375 }
7376 break;
7377
7378 default:
7379 gcc_unreachable ();
7380 }
7381
7382 return dest;
7383 }
7384
7385 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7386 to symbol DECL. */
7387
7388 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7389 htab_t dllimport_map;
7390
7391 static tree
7392 get_dllimport_decl (tree decl)
7393 {
7394 struct tree_map *h, in;
7395 void **loc;
7396 const char *name;
7397 const char *prefix;
7398 size_t namelen, prefixlen;
7399 char *imp_name;
7400 tree to;
7401 rtx rtl;
7402
7403 if (!dllimport_map)
7404 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7405
7406 in.hash = htab_hash_pointer (decl);
7407 in.base.from = decl;
7408 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7409 h = *loc;
7410 if (h)
7411 return h->to;
7412
7413 *loc = h = ggc_alloc (sizeof (struct tree_map));
7414 h->hash = in.hash;
7415 h->base.from = decl;
7416 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7417 DECL_ARTIFICIAL (to) = 1;
7418 DECL_IGNORED_P (to) = 1;
7419 DECL_EXTERNAL (to) = 1;
7420 TREE_READONLY (to) = 1;
7421
7422 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7423 name = targetm.strip_name_encoding (name);
7424 if (name[0] == FASTCALL_PREFIX)
7425 {
7426 name++;
7427 prefix = "*__imp_";
7428 }
7429 else
7430 prefix = "*__imp__";
7431
7432 namelen = strlen (name);
7433 prefixlen = strlen (prefix);
7434 imp_name = alloca (namelen + prefixlen + 1);
7435 memcpy (imp_name, prefix, prefixlen);
7436 memcpy (imp_name + prefixlen, name, namelen + 1);
7437
7438 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7439 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7440 SET_SYMBOL_REF_DECL (rtl, to);
7441 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7442
7443 rtl = gen_const_mem (Pmode, rtl);
7444 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7445
7446 SET_DECL_RTL (to, rtl);
7447
7448 return to;
7449 }
7450
7451 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7452 true if we require the result be a register. */
7453
7454 static rtx
7455 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7456 {
7457 tree imp_decl;
7458 rtx x;
7459
7460 gcc_assert (SYMBOL_REF_DECL (symbol));
7461 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7462
7463 x = DECL_RTL (imp_decl);
7464 if (want_reg)
7465 x = force_reg (Pmode, x);
7466 return x;
7467 }
7468
7469 /* Try machine-dependent ways of modifying an illegitimate address
7470 to be legitimate. If we find one, return the new, valid address.
7471 This macro is used in only one place: `memory_address' in explow.c.
7472
7473 OLDX is the address as it was before break_out_memory_refs was called.
7474 In some cases it is useful to look at this to decide what needs to be done.
7475
7476 MODE and WIN are passed so that this macro can use
7477 GO_IF_LEGITIMATE_ADDRESS.
7478
7479 It is always safe for this macro to do nothing. It exists to recognize
7480 opportunities to optimize the output.
7481
7482 For the 80386, we handle X+REG by loading X into a register R and
7483 using R+REG. R will go in a general reg and indexing will be used.
7484 However, if REG is a broken-out memory address or multiplication,
7485 nothing needs to be done because REG can certainly go in a general reg.
7486
7487 When -fpic is used, special handling is needed for symbolic references.
7488 See comments by legitimize_pic_address in i386.c for details. */
7489
7490 rtx
7491 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7492 {
7493 int changed = 0;
7494 unsigned log;
7495
7496 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7497 if (log)
7498 return legitimize_tls_address (x, log, false);
7499 if (GET_CODE (x) == CONST
7500 && GET_CODE (XEXP (x, 0)) == PLUS
7501 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7502 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7503 {
7504 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7505 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7506 }
7507
7508 if (flag_pic && SYMBOLIC_CONST (x))
7509 return legitimize_pic_address (x, 0);
7510
7511 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7512 {
7513 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7514 return legitimize_dllimport_symbol (x, true);
7515 if (GET_CODE (x) == CONST
7516 && GET_CODE (XEXP (x, 0)) == PLUS
7517 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7518 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7519 {
7520 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7521 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7522 }
7523 }
7524
7525 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7526 if (GET_CODE (x) == ASHIFT
7527 && CONST_INT_P (XEXP (x, 1))
7528 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7529 {
7530 changed = 1;
7531 log = INTVAL (XEXP (x, 1));
7532 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7533 GEN_INT (1 << log));
7534 }
7535
7536 if (GET_CODE (x) == PLUS)
7537 {
7538 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7539
7540 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7541 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7542 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7543 {
7544 changed = 1;
7545 log = INTVAL (XEXP (XEXP (x, 0), 1));
7546 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7547 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7548 GEN_INT (1 << log));
7549 }
7550
7551 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7552 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7553 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7554 {
7555 changed = 1;
7556 log = INTVAL (XEXP (XEXP (x, 1), 1));
7557 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7558 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7559 GEN_INT (1 << log));
7560 }
7561
7562 /* Put multiply first if it isn't already. */
7563 if (GET_CODE (XEXP (x, 1)) == MULT)
7564 {
7565 rtx tmp = XEXP (x, 0);
7566 XEXP (x, 0) = XEXP (x, 1);
7567 XEXP (x, 1) = tmp;
7568 changed = 1;
7569 }
7570
7571 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7572 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7573 created by virtual register instantiation, register elimination, and
7574 similar optimizations. */
7575 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7576 {
7577 changed = 1;
7578 x = gen_rtx_PLUS (Pmode,
7579 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7580 XEXP (XEXP (x, 1), 0)),
7581 XEXP (XEXP (x, 1), 1));
7582 }
7583
7584 /* Canonicalize
7585 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7586 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7587 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7588 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7589 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7590 && CONSTANT_P (XEXP (x, 1)))
7591 {
7592 rtx constant;
7593 rtx other = NULL_RTX;
7594
7595 if (CONST_INT_P (XEXP (x, 1)))
7596 {
7597 constant = XEXP (x, 1);
7598 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7599 }
7600 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7601 {
7602 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7603 other = XEXP (x, 1);
7604 }
7605 else
7606 constant = 0;
7607
7608 if (constant)
7609 {
7610 changed = 1;
7611 x = gen_rtx_PLUS (Pmode,
7612 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7613 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7614 plus_constant (other, INTVAL (constant)));
7615 }
7616 }
7617
7618 if (changed && legitimate_address_p (mode, x, FALSE))
7619 return x;
7620
7621 if (GET_CODE (XEXP (x, 0)) == MULT)
7622 {
7623 changed = 1;
7624 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7625 }
7626
7627 if (GET_CODE (XEXP (x, 1)) == MULT)
7628 {
7629 changed = 1;
7630 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7631 }
7632
7633 if (changed
7634 && REG_P (XEXP (x, 1))
7635 && REG_P (XEXP (x, 0)))
7636 return x;
7637
7638 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7639 {
7640 changed = 1;
7641 x = legitimize_pic_address (x, 0);
7642 }
7643
7644 if (changed && legitimate_address_p (mode, x, FALSE))
7645 return x;
7646
7647 if (REG_P (XEXP (x, 0)))
7648 {
7649 rtx temp = gen_reg_rtx (Pmode);
7650 rtx val = force_operand (XEXP (x, 1), temp);
7651 if (val != temp)
7652 emit_move_insn (temp, val);
7653
7654 XEXP (x, 1) = temp;
7655 return x;
7656 }
7657
7658 else if (REG_P (XEXP (x, 1)))
7659 {
7660 rtx temp = gen_reg_rtx (Pmode);
7661 rtx val = force_operand (XEXP (x, 0), temp);
7662 if (val != temp)
7663 emit_move_insn (temp, val);
7664
7665 XEXP (x, 0) = temp;
7666 return x;
7667 }
7668 }
7669
7670 return x;
7671 }
7672 \f
7673 /* Print an integer constant expression in assembler syntax. Addition
7674 and subtraction are the only arithmetic that may appear in these
7675 expressions. FILE is the stdio stream to write to, X is the rtx, and
7676 CODE is the operand print code from the output string. */
7677
7678 static void
7679 output_pic_addr_const (FILE *file, rtx x, int code)
7680 {
7681 char buf[256];
7682
7683 switch (GET_CODE (x))
7684 {
7685 case PC:
7686 gcc_assert (flag_pic);
7687 putc ('.', file);
7688 break;
7689
7690 case SYMBOL_REF:
7691 if (! TARGET_MACHO || TARGET_64BIT)
7692 output_addr_const (file, x);
7693 else
7694 {
7695 const char *name = XSTR (x, 0);
7696
7697 /* Mark the decl as referenced so that cgraph will
7698 output the function. */
7699 if (SYMBOL_REF_DECL (x))
7700 mark_decl_referenced (SYMBOL_REF_DECL (x));
7701
7702 #if TARGET_MACHO
7703 if (MACHOPIC_INDIRECT
7704 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7705 name = machopic_indirection_name (x, /*stub_p=*/true);
7706 #endif
7707 assemble_name (file, name);
7708 }
7709 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7710 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7711 fputs ("@PLT", file);
7712 break;
7713
7714 case LABEL_REF:
7715 x = XEXP (x, 0);
7716 /* FALLTHRU */
7717 case CODE_LABEL:
7718 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7719 assemble_name (asm_out_file, buf);
7720 break;
7721
7722 case CONST_INT:
7723 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7724 break;
7725
7726 case CONST:
7727 /* This used to output parentheses around the expression,
7728 but that does not work on the 386 (either ATT or BSD assembler). */
7729 output_pic_addr_const (file, XEXP (x, 0), code);
7730 break;
7731
7732 case CONST_DOUBLE:
7733 if (GET_MODE (x) == VOIDmode)
7734 {
7735 /* We can use %d if the number is <32 bits and positive. */
7736 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7737 fprintf (file, "0x%lx%08lx",
7738 (unsigned long) CONST_DOUBLE_HIGH (x),
7739 (unsigned long) CONST_DOUBLE_LOW (x));
7740 else
7741 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7742 }
7743 else
7744 /* We can't handle floating point constants;
7745 PRINT_OPERAND must handle them. */
7746 output_operand_lossage ("floating constant misused");
7747 break;
7748
7749 case PLUS:
7750 /* Some assemblers need integer constants to appear first. */
7751 if (CONST_INT_P (XEXP (x, 0)))
7752 {
7753 output_pic_addr_const (file, XEXP (x, 0), code);
7754 putc ('+', file);
7755 output_pic_addr_const (file, XEXP (x, 1), code);
7756 }
7757 else
7758 {
7759 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7760 output_pic_addr_const (file, XEXP (x, 1), code);
7761 putc ('+', file);
7762 output_pic_addr_const (file, XEXP (x, 0), code);
7763 }
7764 break;
7765
7766 case MINUS:
7767 if (!TARGET_MACHO)
7768 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7769 output_pic_addr_const (file, XEXP (x, 0), code);
7770 putc ('-', file);
7771 output_pic_addr_const (file, XEXP (x, 1), code);
7772 if (!TARGET_MACHO)
7773 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7774 break;
7775
7776 case UNSPEC:
7777 gcc_assert (XVECLEN (x, 0) == 1);
7778 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7779 switch (XINT (x, 1))
7780 {
7781 case UNSPEC_GOT:
7782 fputs ("@GOT", file);
7783 break;
7784 case UNSPEC_GOTOFF:
7785 fputs ("@GOTOFF", file);
7786 break;
7787 case UNSPEC_PLTOFF:
7788 fputs ("@PLTOFF", file);
7789 break;
7790 case UNSPEC_GOTPCREL:
7791 fputs ("@GOTPCREL(%rip)", file);
7792 break;
7793 case UNSPEC_GOTTPOFF:
7794 /* FIXME: This might be @TPOFF in Sun ld too. */
7795 fputs ("@GOTTPOFF", file);
7796 break;
7797 case UNSPEC_TPOFF:
7798 fputs ("@TPOFF", file);
7799 break;
7800 case UNSPEC_NTPOFF:
7801 if (TARGET_64BIT)
7802 fputs ("@TPOFF", file);
7803 else
7804 fputs ("@NTPOFF", file);
7805 break;
7806 case UNSPEC_DTPOFF:
7807 fputs ("@DTPOFF", file);
7808 break;
7809 case UNSPEC_GOTNTPOFF:
7810 if (TARGET_64BIT)
7811 fputs ("@GOTTPOFF(%rip)", file);
7812 else
7813 fputs ("@GOTNTPOFF", file);
7814 break;
7815 case UNSPEC_INDNTPOFF:
7816 fputs ("@INDNTPOFF", file);
7817 break;
7818 default:
7819 output_operand_lossage ("invalid UNSPEC as operand");
7820 break;
7821 }
7822 break;
7823
7824 default:
7825 output_operand_lossage ("invalid expression as operand");
7826 }
7827 }
7828
7829 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7830 We need to emit DTP-relative relocations. */
7831
7832 static void ATTRIBUTE_UNUSED
7833 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7834 {
7835 fputs (ASM_LONG, file);
7836 output_addr_const (file, x);
7837 fputs ("@DTPOFF", file);
7838 switch (size)
7839 {
7840 case 4:
7841 break;
7842 case 8:
7843 fputs (", 0", file);
7844 break;
7845 default:
7846 gcc_unreachable ();
7847 }
7848 }
7849
7850 /* In the name of slightly smaller debug output, and to cater to
7851 general assembler lossage, recognize PIC+GOTOFF and turn it back
7852 into a direct symbol reference.
7853
7854 On Darwin, this is necessary to avoid a crash, because Darwin
7855 has a different PIC label for each routine but the DWARF debugging
7856 information is not associated with any particular routine, so it's
7857 necessary to remove references to the PIC label from RTL stored by
7858 the DWARF output code. */
7859
7860 static rtx
7861 ix86_delegitimize_address (rtx orig_x)
7862 {
7863 rtx x = orig_x;
7864 /* reg_addend is NULL or a multiple of some register. */
7865 rtx reg_addend = NULL_RTX;
7866 /* const_addend is NULL or a const_int. */
7867 rtx const_addend = NULL_RTX;
7868 /* This is the result, or NULL. */
7869 rtx result = NULL_RTX;
7870
7871 if (MEM_P (x))
7872 x = XEXP (x, 0);
7873
7874 if (TARGET_64BIT)
7875 {
7876 if (GET_CODE (x) != CONST
7877 || GET_CODE (XEXP (x, 0)) != UNSPEC
7878 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7879 || !MEM_P (orig_x))
7880 return orig_x;
7881 return XVECEXP (XEXP (x, 0), 0, 0);
7882 }
7883
7884 if (GET_CODE (x) != PLUS
7885 || GET_CODE (XEXP (x, 1)) != CONST)
7886 return orig_x;
7887
7888 if (REG_P (XEXP (x, 0))
7889 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7890 /* %ebx + GOT/GOTOFF */
7891 ;
7892 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7893 {
7894 /* %ebx + %reg * scale + GOT/GOTOFF */
7895 reg_addend = XEXP (x, 0);
7896 if (REG_P (XEXP (reg_addend, 0))
7897 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7898 reg_addend = XEXP (reg_addend, 1);
7899 else if (REG_P (XEXP (reg_addend, 1))
7900 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7901 reg_addend = XEXP (reg_addend, 0);
7902 else
7903 return orig_x;
7904 if (!REG_P (reg_addend)
7905 && GET_CODE (reg_addend) != MULT
7906 && GET_CODE (reg_addend) != ASHIFT)
7907 return orig_x;
7908 }
7909 else
7910 return orig_x;
7911
7912 x = XEXP (XEXP (x, 1), 0);
7913 if (GET_CODE (x) == PLUS
7914 && CONST_INT_P (XEXP (x, 1)))
7915 {
7916 const_addend = XEXP (x, 1);
7917 x = XEXP (x, 0);
7918 }
7919
7920 if (GET_CODE (x) == UNSPEC
7921 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7922 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7923 result = XVECEXP (x, 0, 0);
7924
7925 if (TARGET_MACHO && darwin_local_data_pic (x)
7926 && !MEM_P (orig_x))
7927 result = XEXP (x, 0);
7928
7929 if (! result)
7930 return orig_x;
7931
7932 if (const_addend)
7933 result = gen_rtx_PLUS (Pmode, result, const_addend);
7934 if (reg_addend)
7935 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7936 return result;
7937 }
7938
7939 /* If X is a machine specific address (i.e. a symbol or label being
7940 referenced as a displacement from the GOT implemented using an
7941 UNSPEC), then return the base term. Otherwise return X. */
7942
7943 rtx
7944 ix86_find_base_term (rtx x)
7945 {
7946 rtx term;
7947
7948 if (TARGET_64BIT)
7949 {
7950 if (GET_CODE (x) != CONST)
7951 return x;
7952 term = XEXP (x, 0);
7953 if (GET_CODE (term) == PLUS
7954 && (CONST_INT_P (XEXP (term, 1))
7955 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
7956 term = XEXP (term, 0);
7957 if (GET_CODE (term) != UNSPEC
7958 || XINT (term, 1) != UNSPEC_GOTPCREL)
7959 return x;
7960
7961 term = XVECEXP (term, 0, 0);
7962
7963 if (GET_CODE (term) != SYMBOL_REF
7964 && GET_CODE (term) != LABEL_REF)
7965 return x;
7966
7967 return term;
7968 }
7969
7970 term = ix86_delegitimize_address (x);
7971
7972 if (GET_CODE (term) != SYMBOL_REF
7973 && GET_CODE (term) != LABEL_REF)
7974 return x;
7975
7976 return term;
7977 }
7978 \f
7979 static void
7980 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7981 int fp, FILE *file)
7982 {
7983 const char *suffix;
7984
7985 if (mode == CCFPmode || mode == CCFPUmode)
7986 {
7987 enum rtx_code second_code, bypass_code;
7988 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7989 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7990 code = ix86_fp_compare_code_to_integer (code);
7991 mode = CCmode;
7992 }
7993 if (reverse)
7994 code = reverse_condition (code);
7995
7996 switch (code)
7997 {
7998 case EQ:
7999 suffix = "e";
8000 break;
8001 case NE:
8002 suffix = "ne";
8003 break;
8004 case GT:
8005 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8006 suffix = "g";
8007 break;
8008 case GTU:
8009 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8010 Those same assemblers have the same but opposite lossage on cmov. */
8011 gcc_assert (mode == CCmode);
8012 suffix = fp ? "nbe" : "a";
8013 break;
8014 case LT:
8015 switch (mode)
8016 {
8017 case CCNOmode:
8018 case CCGOCmode:
8019 suffix = "s";
8020 break;
8021
8022 case CCmode:
8023 case CCGCmode:
8024 suffix = "l";
8025 break;
8026
8027 default:
8028 gcc_unreachable ();
8029 }
8030 break;
8031 case LTU:
8032 gcc_assert (mode == CCmode);
8033 suffix = "b";
8034 break;
8035 case GE:
8036 switch (mode)
8037 {
8038 case CCNOmode:
8039 case CCGOCmode:
8040 suffix = "ns";
8041 break;
8042
8043 case CCmode:
8044 case CCGCmode:
8045 suffix = "ge";
8046 break;
8047
8048 default:
8049 gcc_unreachable ();
8050 }
8051 break;
8052 case GEU:
8053 /* ??? As above. */
8054 gcc_assert (mode == CCmode);
8055 suffix = fp ? "nb" : "ae";
8056 break;
8057 case LE:
8058 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8059 suffix = "le";
8060 break;
8061 case LEU:
8062 gcc_assert (mode == CCmode);
8063 suffix = "be";
8064 break;
8065 case UNORDERED:
8066 suffix = fp ? "u" : "p";
8067 break;
8068 case ORDERED:
8069 suffix = fp ? "nu" : "np";
8070 break;
8071 default:
8072 gcc_unreachable ();
8073 }
8074 fputs (suffix, file);
8075 }
8076
8077 /* Print the name of register X to FILE based on its machine mode and number.
8078 If CODE is 'w', pretend the mode is HImode.
8079 If CODE is 'b', pretend the mode is QImode.
8080 If CODE is 'k', pretend the mode is SImode.
8081 If CODE is 'q', pretend the mode is DImode.
8082 If CODE is 'h', pretend the reg is the 'high' byte register.
8083 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8084
8085 void
8086 print_reg (rtx x, int code, FILE *file)
8087 {
8088 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8089 && REGNO (x) != FRAME_POINTER_REGNUM
8090 && REGNO (x) != FLAGS_REG
8091 && REGNO (x) != FPSR_REG
8092 && REGNO (x) != FPCR_REG);
8093
8094 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8095 putc ('%', file);
8096
8097 if (code == 'w' || MMX_REG_P (x))
8098 code = 2;
8099 else if (code == 'b')
8100 code = 1;
8101 else if (code == 'k')
8102 code = 4;
8103 else if (code == 'q')
8104 code = 8;
8105 else if (code == 'y')
8106 code = 3;
8107 else if (code == 'h')
8108 code = 0;
8109 else
8110 code = GET_MODE_SIZE (GET_MODE (x));
8111
8112 /* Irritatingly, AMD extended registers use different naming convention
8113 from the normal registers. */
8114 if (REX_INT_REG_P (x))
8115 {
8116 gcc_assert (TARGET_64BIT);
8117 switch (code)
8118 {
8119 case 0:
8120 error ("extended registers have no high halves");
8121 break;
8122 case 1:
8123 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8124 break;
8125 case 2:
8126 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8127 break;
8128 case 4:
8129 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8130 break;
8131 case 8:
8132 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8133 break;
8134 default:
8135 error ("unsupported operand size for extended register");
8136 break;
8137 }
8138 return;
8139 }
8140 switch (code)
8141 {
8142 case 3:
8143 if (STACK_TOP_P (x))
8144 {
8145 fputs ("st(0)", file);
8146 break;
8147 }
8148 /* FALLTHRU */
8149 case 8:
8150 case 4:
8151 case 12:
8152 if (! ANY_FP_REG_P (x))
8153 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8154 /* FALLTHRU */
8155 case 16:
8156 case 2:
8157 normal:
8158 fputs (hi_reg_name[REGNO (x)], file);
8159 break;
8160 case 1:
8161 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8162 goto normal;
8163 fputs (qi_reg_name[REGNO (x)], file);
8164 break;
8165 case 0:
8166 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8167 goto normal;
8168 fputs (qi_high_reg_name[REGNO (x)], file);
8169 break;
8170 default:
8171 gcc_unreachable ();
8172 }
8173 }
8174
8175 /* Locate some local-dynamic symbol still in use by this function
8176 so that we can print its name in some tls_local_dynamic_base
8177 pattern. */
8178
8179 static int
8180 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8181 {
8182 rtx x = *px;
8183
8184 if (GET_CODE (x) == SYMBOL_REF
8185 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8186 {
8187 cfun->machine->some_ld_name = XSTR (x, 0);
8188 return 1;
8189 }
8190
8191 return 0;
8192 }
8193
8194 static const char *
8195 get_some_local_dynamic_name (void)
8196 {
8197 rtx insn;
8198
8199 if (cfun->machine->some_ld_name)
8200 return cfun->machine->some_ld_name;
8201
8202 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8203 if (INSN_P (insn)
8204 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8205 return cfun->machine->some_ld_name;
8206
8207 gcc_unreachable ();
8208 }
8209
8210 /* Meaning of CODE:
8211 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8212 C -- print opcode suffix for set/cmov insn.
8213 c -- like C, but print reversed condition
8214 F,f -- likewise, but for floating-point.
8215 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8216 otherwise nothing
8217 R -- print the prefix for register names.
8218 z -- print the opcode suffix for the size of the current operand.
8219 * -- print a star (in certain assembler syntax)
8220 A -- print an absolute memory reference.
8221 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8222 s -- print a shift double count, followed by the assemblers argument
8223 delimiter.
8224 b -- print the QImode name of the register for the indicated operand.
8225 %b0 would print %al if operands[0] is reg 0.
8226 w -- likewise, print the HImode name of the register.
8227 k -- likewise, print the SImode name of the register.
8228 q -- likewise, print the DImode name of the register.
8229 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8230 y -- print "st(0)" instead of "st" as a register.
8231 D -- print condition for SSE cmp instruction.
8232 P -- if PIC, print an @PLT suffix.
8233 X -- don't print any sort of PIC '@' suffix for a symbol.
8234 & -- print some in-use local-dynamic symbol name.
8235 H -- print a memory address offset by 8; used for sse high-parts
8236 */
8237
8238 void
8239 print_operand (FILE *file, rtx x, int code)
8240 {
8241 if (code)
8242 {
8243 switch (code)
8244 {
8245 case '*':
8246 if (ASSEMBLER_DIALECT == ASM_ATT)
8247 putc ('*', file);
8248 return;
8249
8250 case '&':
8251 assemble_name (file, get_some_local_dynamic_name ());
8252 return;
8253
8254 case 'A':
8255 switch (ASSEMBLER_DIALECT)
8256 {
8257 case ASM_ATT:
8258 putc ('*', file);
8259 break;
8260
8261 case ASM_INTEL:
8262 /* Intel syntax. For absolute addresses, registers should not
8263 be surrounded by braces. */
8264 if (!REG_P (x))
8265 {
8266 putc ('[', file);
8267 PRINT_OPERAND (file, x, 0);
8268 putc (']', file);
8269 return;
8270 }
8271 break;
8272
8273 default:
8274 gcc_unreachable ();
8275 }
8276
8277 PRINT_OPERAND (file, x, 0);
8278 return;
8279
8280
8281 case 'L':
8282 if (ASSEMBLER_DIALECT == ASM_ATT)
8283 putc ('l', file);
8284 return;
8285
8286 case 'W':
8287 if (ASSEMBLER_DIALECT == ASM_ATT)
8288 putc ('w', file);
8289 return;
8290
8291 case 'B':
8292 if (ASSEMBLER_DIALECT == ASM_ATT)
8293 putc ('b', file);
8294 return;
8295
8296 case 'Q':
8297 if (ASSEMBLER_DIALECT == ASM_ATT)
8298 putc ('l', file);
8299 return;
8300
8301 case 'S':
8302 if (ASSEMBLER_DIALECT == ASM_ATT)
8303 putc ('s', file);
8304 return;
8305
8306 case 'T':
8307 if (ASSEMBLER_DIALECT == ASM_ATT)
8308 putc ('t', file);
8309 return;
8310
8311 case 'z':
8312 /* 387 opcodes don't get size suffixes if the operands are
8313 registers. */
8314 if (STACK_REG_P (x))
8315 return;
8316
8317 /* Likewise if using Intel opcodes. */
8318 if (ASSEMBLER_DIALECT == ASM_INTEL)
8319 return;
8320
8321 /* This is the size of op from size of operand. */
8322 switch (GET_MODE_SIZE (GET_MODE (x)))
8323 {
8324 case 1:
8325 putc ('b', file);
8326 return;
8327
8328 case 2:
8329 #ifdef HAVE_GAS_FILDS_FISTS
8330 putc ('s', file);
8331 #endif
8332 return;
8333
8334 case 4:
8335 if (GET_MODE (x) == SFmode)
8336 {
8337 putc ('s', file);
8338 return;
8339 }
8340 else
8341 putc ('l', file);
8342 return;
8343
8344 case 12:
8345 case 16:
8346 putc ('t', file);
8347 return;
8348
8349 case 8:
8350 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8351 {
8352 #ifdef GAS_MNEMONICS
8353 putc ('q', file);
8354 #else
8355 putc ('l', file);
8356 putc ('l', file);
8357 #endif
8358 }
8359 else
8360 putc ('l', file);
8361 return;
8362
8363 default:
8364 gcc_unreachable ();
8365 }
8366
8367 case 'b':
8368 case 'w':
8369 case 'k':
8370 case 'q':
8371 case 'h':
8372 case 'y':
8373 case 'X':
8374 case 'P':
8375 break;
8376
8377 case 's':
8378 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8379 {
8380 PRINT_OPERAND (file, x, 0);
8381 putc (',', file);
8382 }
8383 return;
8384
8385 case 'D':
8386 /* Little bit of braindamage here. The SSE compare instructions
8387 does use completely different names for the comparisons that the
8388 fp conditional moves. */
8389 switch (GET_CODE (x))
8390 {
8391 case EQ:
8392 case UNEQ:
8393 fputs ("eq", file);
8394 break;
8395 case LT:
8396 case UNLT:
8397 fputs ("lt", file);
8398 break;
8399 case LE:
8400 case UNLE:
8401 fputs ("le", file);
8402 break;
8403 case UNORDERED:
8404 fputs ("unord", file);
8405 break;
8406 case NE:
8407 case LTGT:
8408 fputs ("neq", file);
8409 break;
8410 case UNGE:
8411 case GE:
8412 fputs ("nlt", file);
8413 break;
8414 case UNGT:
8415 case GT:
8416 fputs ("nle", file);
8417 break;
8418 case ORDERED:
8419 fputs ("ord", file);
8420 break;
8421 default:
8422 gcc_unreachable ();
8423 }
8424 return;
8425 case 'O':
8426 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8427 if (ASSEMBLER_DIALECT == ASM_ATT)
8428 {
8429 switch (GET_MODE (x))
8430 {
8431 case HImode: putc ('w', file); break;
8432 case SImode:
8433 case SFmode: putc ('l', file); break;
8434 case DImode:
8435 case DFmode: putc ('q', file); break;
8436 default: gcc_unreachable ();
8437 }
8438 putc ('.', file);
8439 }
8440 #endif
8441 return;
8442 case 'C':
8443 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8444 return;
8445 case 'F':
8446 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8447 if (ASSEMBLER_DIALECT == ASM_ATT)
8448 putc ('.', file);
8449 #endif
8450 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8451 return;
8452
8453 /* Like above, but reverse condition */
8454 case 'c':
8455 /* Check to see if argument to %c is really a constant
8456 and not a condition code which needs to be reversed. */
8457 if (!COMPARISON_P (x))
8458 {
8459 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8460 return;
8461 }
8462 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8463 return;
8464 case 'f':
8465 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8466 if (ASSEMBLER_DIALECT == ASM_ATT)
8467 putc ('.', file);
8468 #endif
8469 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8470 return;
8471
8472 case 'H':
8473 /* It doesn't actually matter what mode we use here, as we're
8474 only going to use this for printing. */
8475 x = adjust_address_nv (x, DImode, 8);
8476 break;
8477
8478 case '+':
8479 {
8480 rtx x;
8481
8482 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8483 return;
8484
8485 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8486 if (x)
8487 {
8488 int pred_val = INTVAL (XEXP (x, 0));
8489
8490 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8491 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8492 {
8493 int taken = pred_val > REG_BR_PROB_BASE / 2;
8494 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8495
8496 /* Emit hints only in the case default branch prediction
8497 heuristics would fail. */
8498 if (taken != cputaken)
8499 {
8500 /* We use 3e (DS) prefix for taken branches and
8501 2e (CS) prefix for not taken branches. */
8502 if (taken)
8503 fputs ("ds ; ", file);
8504 else
8505 fputs ("cs ; ", file);
8506 }
8507 }
8508 }
8509 return;
8510 }
8511 default:
8512 output_operand_lossage ("invalid operand code '%c'", code);
8513 }
8514 }
8515
8516 if (REG_P (x))
8517 print_reg (x, code, file);
8518
8519 else if (MEM_P (x))
8520 {
8521 /* No `byte ptr' prefix for call instructions. */
8522 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8523 {
8524 const char * size;
8525 switch (GET_MODE_SIZE (GET_MODE (x)))
8526 {
8527 case 1: size = "BYTE"; break;
8528 case 2: size = "WORD"; break;
8529 case 4: size = "DWORD"; break;
8530 case 8: size = "QWORD"; break;
8531 case 12: size = "XWORD"; break;
8532 case 16: size = "XMMWORD"; break;
8533 default:
8534 gcc_unreachable ();
8535 }
8536
8537 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8538 if (code == 'b')
8539 size = "BYTE";
8540 else if (code == 'w')
8541 size = "WORD";
8542 else if (code == 'k')
8543 size = "DWORD";
8544
8545 fputs (size, file);
8546 fputs (" PTR ", file);
8547 }
8548
8549 x = XEXP (x, 0);
8550 /* Avoid (%rip) for call operands. */
8551 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8552 && !CONST_INT_P (x))
8553 output_addr_const (file, x);
8554 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8555 output_operand_lossage ("invalid constraints for operand");
8556 else
8557 output_address (x);
8558 }
8559
8560 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8561 {
8562 REAL_VALUE_TYPE r;
8563 long l;
8564
8565 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8566 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8567
8568 if (ASSEMBLER_DIALECT == ASM_ATT)
8569 putc ('$', file);
8570 fprintf (file, "0x%08lx", l);
8571 }
8572
8573 /* These float cases don't actually occur as immediate operands. */
8574 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8575 {
8576 char dstr[30];
8577
8578 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8579 fprintf (file, "%s", dstr);
8580 }
8581
8582 else if (GET_CODE (x) == CONST_DOUBLE
8583 && GET_MODE (x) == XFmode)
8584 {
8585 char dstr[30];
8586
8587 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8588 fprintf (file, "%s", dstr);
8589 }
8590
8591 else
8592 {
8593 /* We have patterns that allow zero sets of memory, for instance.
8594 In 64-bit mode, we should probably support all 8-byte vectors,
8595 since we can in fact encode that into an immediate. */
8596 if (GET_CODE (x) == CONST_VECTOR)
8597 {
8598 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8599 x = const0_rtx;
8600 }
8601
8602 if (code != 'P')
8603 {
8604 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8605 {
8606 if (ASSEMBLER_DIALECT == ASM_ATT)
8607 putc ('$', file);
8608 }
8609 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8610 || GET_CODE (x) == LABEL_REF)
8611 {
8612 if (ASSEMBLER_DIALECT == ASM_ATT)
8613 putc ('$', file);
8614 else
8615 fputs ("OFFSET FLAT:", file);
8616 }
8617 }
8618 if (CONST_INT_P (x))
8619 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8620 else if (flag_pic)
8621 output_pic_addr_const (file, x, code);
8622 else
8623 output_addr_const (file, x);
8624 }
8625 }
8626 \f
8627 /* Print a memory operand whose address is ADDR. */
8628
8629 void
8630 print_operand_address (FILE *file, rtx addr)
8631 {
8632 struct ix86_address parts;
8633 rtx base, index, disp;
8634 int scale;
8635 int ok = ix86_decompose_address (addr, &parts);
8636
8637 gcc_assert (ok);
8638
8639 base = parts.base;
8640 index = parts.index;
8641 disp = parts.disp;
8642 scale = parts.scale;
8643
8644 switch (parts.seg)
8645 {
8646 case SEG_DEFAULT:
8647 break;
8648 case SEG_FS:
8649 case SEG_GS:
8650 if (USER_LABEL_PREFIX[0] == 0)
8651 putc ('%', file);
8652 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8653 break;
8654 default:
8655 gcc_unreachable ();
8656 }
8657
8658 if (!base && !index)
8659 {
8660 /* Displacement only requires special attention. */
8661
8662 if (CONST_INT_P (disp))
8663 {
8664 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8665 {
8666 if (USER_LABEL_PREFIX[0] == 0)
8667 putc ('%', file);
8668 fputs ("ds:", file);
8669 }
8670 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8671 }
8672 else if (flag_pic)
8673 output_pic_addr_const (file, disp, 0);
8674 else
8675 output_addr_const (file, disp);
8676
8677 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8678 if (TARGET_64BIT)
8679 {
8680 if (GET_CODE (disp) == CONST
8681 && GET_CODE (XEXP (disp, 0)) == PLUS
8682 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8683 disp = XEXP (XEXP (disp, 0), 0);
8684 if (GET_CODE (disp) == LABEL_REF
8685 || (GET_CODE (disp) == SYMBOL_REF
8686 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8687 fputs ("(%rip)", file);
8688 }
8689 }
8690 else
8691 {
8692 if (ASSEMBLER_DIALECT == ASM_ATT)
8693 {
8694 if (disp)
8695 {
8696 if (flag_pic)
8697 output_pic_addr_const (file, disp, 0);
8698 else if (GET_CODE (disp) == LABEL_REF)
8699 output_asm_label (disp);
8700 else
8701 output_addr_const (file, disp);
8702 }
8703
8704 putc ('(', file);
8705 if (base)
8706 print_reg (base, 0, file);
8707 if (index)
8708 {
8709 putc (',', file);
8710 print_reg (index, 0, file);
8711 if (scale != 1)
8712 fprintf (file, ",%d", scale);
8713 }
8714 putc (')', file);
8715 }
8716 else
8717 {
8718 rtx offset = NULL_RTX;
8719
8720 if (disp)
8721 {
8722 /* Pull out the offset of a symbol; print any symbol itself. */
8723 if (GET_CODE (disp) == CONST
8724 && GET_CODE (XEXP (disp, 0)) == PLUS
8725 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8726 {
8727 offset = XEXP (XEXP (disp, 0), 1);
8728 disp = gen_rtx_CONST (VOIDmode,
8729 XEXP (XEXP (disp, 0), 0));
8730 }
8731
8732 if (flag_pic)
8733 output_pic_addr_const (file, disp, 0);
8734 else if (GET_CODE (disp) == LABEL_REF)
8735 output_asm_label (disp);
8736 else if (CONST_INT_P (disp))
8737 offset = disp;
8738 else
8739 output_addr_const (file, disp);
8740 }
8741
8742 putc ('[', file);
8743 if (base)
8744 {
8745 print_reg (base, 0, file);
8746 if (offset)
8747 {
8748 if (INTVAL (offset) >= 0)
8749 putc ('+', file);
8750 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8751 }
8752 }
8753 else if (offset)
8754 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8755 else
8756 putc ('0', file);
8757
8758 if (index)
8759 {
8760 putc ('+', file);
8761 print_reg (index, 0, file);
8762 if (scale != 1)
8763 fprintf (file, "*%d", scale);
8764 }
8765 putc (']', file);
8766 }
8767 }
8768 }
8769
8770 bool
8771 output_addr_const_extra (FILE *file, rtx x)
8772 {
8773 rtx op;
8774
8775 if (GET_CODE (x) != UNSPEC)
8776 return false;
8777
8778 op = XVECEXP (x, 0, 0);
8779 switch (XINT (x, 1))
8780 {
8781 case UNSPEC_GOTTPOFF:
8782 output_addr_const (file, op);
8783 /* FIXME: This might be @TPOFF in Sun ld. */
8784 fputs ("@GOTTPOFF", file);
8785 break;
8786 case UNSPEC_TPOFF:
8787 output_addr_const (file, op);
8788 fputs ("@TPOFF", file);
8789 break;
8790 case UNSPEC_NTPOFF:
8791 output_addr_const (file, op);
8792 if (TARGET_64BIT)
8793 fputs ("@TPOFF", file);
8794 else
8795 fputs ("@NTPOFF", file);
8796 break;
8797 case UNSPEC_DTPOFF:
8798 output_addr_const (file, op);
8799 fputs ("@DTPOFF", file);
8800 break;
8801 case UNSPEC_GOTNTPOFF:
8802 output_addr_const (file, op);
8803 if (TARGET_64BIT)
8804 fputs ("@GOTTPOFF(%rip)", file);
8805 else
8806 fputs ("@GOTNTPOFF", file);
8807 break;
8808 case UNSPEC_INDNTPOFF:
8809 output_addr_const (file, op);
8810 fputs ("@INDNTPOFF", file);
8811 break;
8812
8813 default:
8814 return false;
8815 }
8816
8817 return true;
8818 }
8819 \f
8820 /* Split one or more DImode RTL references into pairs of SImode
8821 references. The RTL can be REG, offsettable MEM, integer constant, or
8822 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8823 split and "num" is its length. lo_half and hi_half are output arrays
8824 that parallel "operands". */
8825
8826 void
8827 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8828 {
8829 while (num--)
8830 {
8831 rtx op = operands[num];
8832
8833 /* simplify_subreg refuse to split volatile memory addresses,
8834 but we still have to handle it. */
8835 if (MEM_P (op))
8836 {
8837 lo_half[num] = adjust_address (op, SImode, 0);
8838 hi_half[num] = adjust_address (op, SImode, 4);
8839 }
8840 else
8841 {
8842 lo_half[num] = simplify_gen_subreg (SImode, op,
8843 GET_MODE (op) == VOIDmode
8844 ? DImode : GET_MODE (op), 0);
8845 hi_half[num] = simplify_gen_subreg (SImode, op,
8846 GET_MODE (op) == VOIDmode
8847 ? DImode : GET_MODE (op), 4);
8848 }
8849 }
8850 }
8851 /* Split one or more TImode RTL references into pairs of DImode
8852 references. The RTL can be REG, offsettable MEM, integer constant, or
8853 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8854 split and "num" is its length. lo_half and hi_half are output arrays
8855 that parallel "operands". */
8856
8857 void
8858 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8859 {
8860 while (num--)
8861 {
8862 rtx op = operands[num];
8863
8864 /* simplify_subreg refuse to split volatile memory addresses, but we
8865 still have to handle it. */
8866 if (MEM_P (op))
8867 {
8868 lo_half[num] = adjust_address (op, DImode, 0);
8869 hi_half[num] = adjust_address (op, DImode, 8);
8870 }
8871 else
8872 {
8873 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8874 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8875 }
8876 }
8877 }
8878 \f
8879 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8880 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8881 is the expression of the binary operation. The output may either be
8882 emitted here, or returned to the caller, like all output_* functions.
8883
8884 There is no guarantee that the operands are the same mode, as they
8885 might be within FLOAT or FLOAT_EXTEND expressions. */
8886
8887 #ifndef SYSV386_COMPAT
8888 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8889 wants to fix the assemblers because that causes incompatibility
8890 with gcc. No-one wants to fix gcc because that causes
8891 incompatibility with assemblers... You can use the option of
8892 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8893 #define SYSV386_COMPAT 1
8894 #endif
8895
8896 const char *
8897 output_387_binary_op (rtx insn, rtx *operands)
8898 {
8899 static char buf[30];
8900 const char *p;
8901 const char *ssep;
8902 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8903
8904 #ifdef ENABLE_CHECKING
8905 /* Even if we do not want to check the inputs, this documents input
8906 constraints. Which helps in understanding the following code. */
8907 if (STACK_REG_P (operands[0])
8908 && ((REG_P (operands[1])
8909 && REGNO (operands[0]) == REGNO (operands[1])
8910 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8911 || (REG_P (operands[2])
8912 && REGNO (operands[0]) == REGNO (operands[2])
8913 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8914 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8915 ; /* ok */
8916 else
8917 gcc_assert (is_sse);
8918 #endif
8919
8920 switch (GET_CODE (operands[3]))
8921 {
8922 case PLUS:
8923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8925 p = "fiadd";
8926 else
8927 p = "fadd";
8928 ssep = "add";
8929 break;
8930
8931 case MINUS:
8932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8934 p = "fisub";
8935 else
8936 p = "fsub";
8937 ssep = "sub";
8938 break;
8939
8940 case MULT:
8941 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8942 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8943 p = "fimul";
8944 else
8945 p = "fmul";
8946 ssep = "mul";
8947 break;
8948
8949 case DIV:
8950 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8951 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8952 p = "fidiv";
8953 else
8954 p = "fdiv";
8955 ssep = "div";
8956 break;
8957
8958 default:
8959 gcc_unreachable ();
8960 }
8961
8962 if (is_sse)
8963 {
8964 strcpy (buf, ssep);
8965 if (GET_MODE (operands[0]) == SFmode)
8966 strcat (buf, "ss\t{%2, %0|%0, %2}");
8967 else
8968 strcat (buf, "sd\t{%2, %0|%0, %2}");
8969 return buf;
8970 }
8971 strcpy (buf, p);
8972
8973 switch (GET_CODE (operands[3]))
8974 {
8975 case MULT:
8976 case PLUS:
8977 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8978 {
8979 rtx temp = operands[2];
8980 operands[2] = operands[1];
8981 operands[1] = temp;
8982 }
8983
8984 /* know operands[0] == operands[1]. */
8985
8986 if (MEM_P (operands[2]))
8987 {
8988 p = "%z2\t%2";
8989 break;
8990 }
8991
8992 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8993 {
8994 if (STACK_TOP_P (operands[0]))
8995 /* How is it that we are storing to a dead operand[2]?
8996 Well, presumably operands[1] is dead too. We can't
8997 store the result to st(0) as st(0) gets popped on this
8998 instruction. Instead store to operands[2] (which I
8999 think has to be st(1)). st(1) will be popped later.
9000 gcc <= 2.8.1 didn't have this check and generated
9001 assembly code that the Unixware assembler rejected. */
9002 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9003 else
9004 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9005 break;
9006 }
9007
9008 if (STACK_TOP_P (operands[0]))
9009 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9010 else
9011 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9012 break;
9013
9014 case MINUS:
9015 case DIV:
9016 if (MEM_P (operands[1]))
9017 {
9018 p = "r%z1\t%1";
9019 break;
9020 }
9021
9022 if (MEM_P (operands[2]))
9023 {
9024 p = "%z2\t%2";
9025 break;
9026 }
9027
9028 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9029 {
9030 #if SYSV386_COMPAT
9031 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9032 derived assemblers, confusingly reverse the direction of
9033 the operation for fsub{r} and fdiv{r} when the
9034 destination register is not st(0). The Intel assembler
9035 doesn't have this brain damage. Read !SYSV386_COMPAT to
9036 figure out what the hardware really does. */
9037 if (STACK_TOP_P (operands[0]))
9038 p = "{p\t%0, %2|rp\t%2, %0}";
9039 else
9040 p = "{rp\t%2, %0|p\t%0, %2}";
9041 #else
9042 if (STACK_TOP_P (operands[0]))
9043 /* As above for fmul/fadd, we can't store to st(0). */
9044 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9045 else
9046 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9047 #endif
9048 break;
9049 }
9050
9051 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9052 {
9053 #if SYSV386_COMPAT
9054 if (STACK_TOP_P (operands[0]))
9055 p = "{rp\t%0, %1|p\t%1, %0}";
9056 else
9057 p = "{p\t%1, %0|rp\t%0, %1}";
9058 #else
9059 if (STACK_TOP_P (operands[0]))
9060 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9061 else
9062 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9063 #endif
9064 break;
9065 }
9066
9067 if (STACK_TOP_P (operands[0]))
9068 {
9069 if (STACK_TOP_P (operands[1]))
9070 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9071 else
9072 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9073 break;
9074 }
9075 else if (STACK_TOP_P (operands[1]))
9076 {
9077 #if SYSV386_COMPAT
9078 p = "{\t%1, %0|r\t%0, %1}";
9079 #else
9080 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9081 #endif
9082 }
9083 else
9084 {
9085 #if SYSV386_COMPAT
9086 p = "{r\t%2, %0|\t%0, %2}";
9087 #else
9088 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9089 #endif
9090 }
9091 break;
9092
9093 default:
9094 gcc_unreachable ();
9095 }
9096
9097 strcat (buf, p);
9098 return buf;
9099 }
9100
9101 /* Return needed mode for entity in optimize_mode_switching pass. */
9102
9103 int
9104 ix86_mode_needed (int entity, rtx insn)
9105 {
9106 enum attr_i387_cw mode;
9107
9108 /* The mode UNINITIALIZED is used to store control word after a
9109 function call or ASM pattern. The mode ANY specify that function
9110 has no requirements on the control word and make no changes in the
9111 bits we are interested in. */
9112
9113 if (CALL_P (insn)
9114 || (NONJUMP_INSN_P (insn)
9115 && (asm_noperands (PATTERN (insn)) >= 0
9116 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9117 return I387_CW_UNINITIALIZED;
9118
9119 if (recog_memoized (insn) < 0)
9120 return I387_CW_ANY;
9121
9122 mode = get_attr_i387_cw (insn);
9123
9124 switch (entity)
9125 {
9126 case I387_TRUNC:
9127 if (mode == I387_CW_TRUNC)
9128 return mode;
9129 break;
9130
9131 case I387_FLOOR:
9132 if (mode == I387_CW_FLOOR)
9133 return mode;
9134 break;
9135
9136 case I387_CEIL:
9137 if (mode == I387_CW_CEIL)
9138 return mode;
9139 break;
9140
9141 case I387_MASK_PM:
9142 if (mode == I387_CW_MASK_PM)
9143 return mode;
9144 break;
9145
9146 default:
9147 gcc_unreachable ();
9148 }
9149
9150 return I387_CW_ANY;
9151 }
9152
9153 /* Output code to initialize control word copies used by trunc?f?i and
9154 rounding patterns. CURRENT_MODE is set to current control word,
9155 while NEW_MODE is set to new control word. */
9156
9157 void
9158 emit_i387_cw_initialization (int mode)
9159 {
9160 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9161 rtx new_mode;
9162
9163 int slot;
9164
9165 rtx reg = gen_reg_rtx (HImode);
9166
9167 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9168 emit_move_insn (reg, copy_rtx (stored_mode));
9169
9170 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9171 {
9172 switch (mode)
9173 {
9174 case I387_CW_TRUNC:
9175 /* round toward zero (truncate) */
9176 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9177 slot = SLOT_CW_TRUNC;
9178 break;
9179
9180 case I387_CW_FLOOR:
9181 /* round down toward -oo */
9182 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9183 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9184 slot = SLOT_CW_FLOOR;
9185 break;
9186
9187 case I387_CW_CEIL:
9188 /* round up toward +oo */
9189 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9190 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9191 slot = SLOT_CW_CEIL;
9192 break;
9193
9194 case I387_CW_MASK_PM:
9195 /* mask precision exception for nearbyint() */
9196 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9197 slot = SLOT_CW_MASK_PM;
9198 break;
9199
9200 default:
9201 gcc_unreachable ();
9202 }
9203 }
9204 else
9205 {
9206 switch (mode)
9207 {
9208 case I387_CW_TRUNC:
9209 /* round toward zero (truncate) */
9210 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9211 slot = SLOT_CW_TRUNC;
9212 break;
9213
9214 case I387_CW_FLOOR:
9215 /* round down toward -oo */
9216 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9217 slot = SLOT_CW_FLOOR;
9218 break;
9219
9220 case I387_CW_CEIL:
9221 /* round up toward +oo */
9222 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9223 slot = SLOT_CW_CEIL;
9224 break;
9225
9226 case I387_CW_MASK_PM:
9227 /* mask precision exception for nearbyint() */
9228 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9229 slot = SLOT_CW_MASK_PM;
9230 break;
9231
9232 default:
9233 gcc_unreachable ();
9234 }
9235 }
9236
9237 gcc_assert (slot < MAX_386_STACK_LOCALS);
9238
9239 new_mode = assign_386_stack_local (HImode, slot);
9240 emit_move_insn (new_mode, reg);
9241 }
9242
9243 /* Output code for INSN to convert a float to a signed int. OPERANDS
9244 are the insn operands. The output may be [HSD]Imode and the input
9245 operand may be [SDX]Fmode. */
9246
9247 const char *
9248 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9249 {
9250 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9251 int dimode_p = GET_MODE (operands[0]) == DImode;
9252 int round_mode = get_attr_i387_cw (insn);
9253
9254 /* Jump through a hoop or two for DImode, since the hardware has no
9255 non-popping instruction. We used to do this a different way, but
9256 that was somewhat fragile and broke with post-reload splitters. */
9257 if ((dimode_p || fisttp) && !stack_top_dies)
9258 output_asm_insn ("fld\t%y1", operands);
9259
9260 gcc_assert (STACK_TOP_P (operands[1]));
9261 gcc_assert (MEM_P (operands[0]));
9262 gcc_assert (GET_MODE (operands[1]) != TFmode);
9263
9264 if (fisttp)
9265 output_asm_insn ("fisttp%z0\t%0", operands);
9266 else
9267 {
9268 if (round_mode != I387_CW_ANY)
9269 output_asm_insn ("fldcw\t%3", operands);
9270 if (stack_top_dies || dimode_p)
9271 output_asm_insn ("fistp%z0\t%0", operands);
9272 else
9273 output_asm_insn ("fist%z0\t%0", operands);
9274 if (round_mode != I387_CW_ANY)
9275 output_asm_insn ("fldcw\t%2", operands);
9276 }
9277
9278 return "";
9279 }
9280
9281 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9282 have the values zero or one, indicates the ffreep insn's operand
9283 from the OPERANDS array. */
9284
9285 static const char *
9286 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9287 {
9288 if (TARGET_USE_FFREEP)
9289 #if HAVE_AS_IX86_FFREEP
9290 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9291 #else
9292 {
9293 static char retval[] = ".word\t0xc_df";
9294 int regno = REGNO (operands[opno]);
9295
9296 gcc_assert (FP_REGNO_P (regno));
9297
9298 retval[9] = '0' + (regno - FIRST_STACK_REG);
9299 return retval;
9300 }
9301 #endif
9302
9303 return opno ? "fstp\t%y1" : "fstp\t%y0";
9304 }
9305
9306
9307 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9308 should be used. UNORDERED_P is true when fucom should be used. */
9309
9310 const char *
9311 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9312 {
9313 int stack_top_dies;
9314 rtx cmp_op0, cmp_op1;
9315 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9316
9317 if (eflags_p)
9318 {
9319 cmp_op0 = operands[0];
9320 cmp_op1 = operands[1];
9321 }
9322 else
9323 {
9324 cmp_op0 = operands[1];
9325 cmp_op1 = operands[2];
9326 }
9327
9328 if (is_sse)
9329 {
9330 if (GET_MODE (operands[0]) == SFmode)
9331 if (unordered_p)
9332 return "ucomiss\t{%1, %0|%0, %1}";
9333 else
9334 return "comiss\t{%1, %0|%0, %1}";
9335 else
9336 if (unordered_p)
9337 return "ucomisd\t{%1, %0|%0, %1}";
9338 else
9339 return "comisd\t{%1, %0|%0, %1}";
9340 }
9341
9342 gcc_assert (STACK_TOP_P (cmp_op0));
9343
9344 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9345
9346 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9347 {
9348 if (stack_top_dies)
9349 {
9350 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9351 return output_387_ffreep (operands, 1);
9352 }
9353 else
9354 return "ftst\n\tfnstsw\t%0";
9355 }
9356
9357 if (STACK_REG_P (cmp_op1)
9358 && stack_top_dies
9359 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9360 && REGNO (cmp_op1) != FIRST_STACK_REG)
9361 {
9362 /* If both the top of the 387 stack dies, and the other operand
9363 is also a stack register that dies, then this must be a
9364 `fcompp' float compare */
9365
9366 if (eflags_p)
9367 {
9368 /* There is no double popping fcomi variant. Fortunately,
9369 eflags is immune from the fstp's cc clobbering. */
9370 if (unordered_p)
9371 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9372 else
9373 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9374 return output_387_ffreep (operands, 0);
9375 }
9376 else
9377 {
9378 if (unordered_p)
9379 return "fucompp\n\tfnstsw\t%0";
9380 else
9381 return "fcompp\n\tfnstsw\t%0";
9382 }
9383 }
9384 else
9385 {
9386 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9387
9388 static const char * const alt[16] =
9389 {
9390 "fcom%z2\t%y2\n\tfnstsw\t%0",
9391 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9392 "fucom%z2\t%y2\n\tfnstsw\t%0",
9393 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9394
9395 "ficom%z2\t%y2\n\tfnstsw\t%0",
9396 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9397 NULL,
9398 NULL,
9399
9400 "fcomi\t{%y1, %0|%0, %y1}",
9401 "fcomip\t{%y1, %0|%0, %y1}",
9402 "fucomi\t{%y1, %0|%0, %y1}",
9403 "fucomip\t{%y1, %0|%0, %y1}",
9404
9405 NULL,
9406 NULL,
9407 NULL,
9408 NULL
9409 };
9410
9411 int mask;
9412 const char *ret;
9413
9414 mask = eflags_p << 3;
9415 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9416 mask |= unordered_p << 1;
9417 mask |= stack_top_dies;
9418
9419 gcc_assert (mask < 16);
9420 ret = alt[mask];
9421 gcc_assert (ret);
9422
9423 return ret;
9424 }
9425 }
9426
9427 void
9428 ix86_output_addr_vec_elt (FILE *file, int value)
9429 {
9430 const char *directive = ASM_LONG;
9431
9432 #ifdef ASM_QUAD
9433 if (TARGET_64BIT)
9434 directive = ASM_QUAD;
9435 #else
9436 gcc_assert (!TARGET_64BIT);
9437 #endif
9438
9439 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9440 }
9441
9442 void
9443 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9444 {
9445 const char *directive = ASM_LONG;
9446
9447 #ifdef ASM_QUAD
9448 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9449 directive = ASM_QUAD;
9450 #else
9451 gcc_assert (!TARGET_64BIT);
9452 #endif
9453 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9454 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9455 fprintf (file, "%s%s%d-%s%d\n",
9456 directive, LPREFIX, value, LPREFIX, rel);
9457 else if (HAVE_AS_GOTOFF_IN_DATA)
9458 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9459 #if TARGET_MACHO
9460 else if (TARGET_MACHO)
9461 {
9462 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9463 machopic_output_function_base_name (file);
9464 fprintf(file, "\n");
9465 }
9466 #endif
9467 else
9468 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9469 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9470 }
9471 \f
9472 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9473 for the target. */
9474
9475 void
9476 ix86_expand_clear (rtx dest)
9477 {
9478 rtx tmp;
9479
9480 /* We play register width games, which are only valid after reload. */
9481 gcc_assert (reload_completed);
9482
9483 /* Avoid HImode and its attendant prefix byte. */
9484 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9485 dest = gen_rtx_REG (SImode, REGNO (dest));
9486 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9487
9488 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9489 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9490 {
9491 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9492 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9493 }
9494
9495 emit_insn (tmp);
9496 }
9497
9498 /* X is an unchanging MEM. If it is a constant pool reference, return
9499 the constant pool rtx, else NULL. */
9500
9501 rtx
9502 maybe_get_pool_constant (rtx x)
9503 {
9504 x = ix86_delegitimize_address (XEXP (x, 0));
9505
9506 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9507 return get_pool_constant (x);
9508
9509 return NULL_RTX;
9510 }
9511
9512 void
9513 ix86_expand_move (enum machine_mode mode, rtx operands[])
9514 {
9515 int strict = (reload_in_progress || reload_completed);
9516 rtx op0, op1;
9517 enum tls_model model;
9518
9519 op0 = operands[0];
9520 op1 = operands[1];
9521
9522 if (GET_CODE (op1) == SYMBOL_REF)
9523 {
9524 model = SYMBOL_REF_TLS_MODEL (op1);
9525 if (model)
9526 {
9527 op1 = legitimize_tls_address (op1, model, true);
9528 op1 = force_operand (op1, op0);
9529 if (op1 == op0)
9530 return;
9531 }
9532 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9533 && SYMBOL_REF_DLLIMPORT_P (op1))
9534 op1 = legitimize_dllimport_symbol (op1, false);
9535 }
9536 else if (GET_CODE (op1) == CONST
9537 && GET_CODE (XEXP (op1, 0)) == PLUS
9538 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9539 {
9540 rtx addend = XEXP (XEXP (op1, 0), 1);
9541 rtx symbol = XEXP (XEXP (op1, 0), 0);
9542 rtx tmp = NULL;
9543
9544 model = SYMBOL_REF_TLS_MODEL (symbol);
9545 if (model)
9546 tmp = legitimize_tls_address (symbol, model, true);
9547 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9548 && SYMBOL_REF_DLLIMPORT_P (symbol))
9549 tmp = legitimize_dllimport_symbol (symbol, true);
9550
9551 if (tmp)
9552 {
9553 tmp = force_operand (tmp, NULL);
9554 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9555 op0, 1, OPTAB_DIRECT);
9556 if (tmp == op0)
9557 return;
9558 }
9559 }
9560
9561 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9562 {
9563 if (TARGET_MACHO && !TARGET_64BIT)
9564 {
9565 #if TARGET_MACHO
9566 if (MACHOPIC_PURE)
9567 {
9568 rtx temp = ((reload_in_progress
9569 || ((op0 && REG_P (op0))
9570 && mode == Pmode))
9571 ? op0 : gen_reg_rtx (Pmode));
9572 op1 = machopic_indirect_data_reference (op1, temp);
9573 op1 = machopic_legitimize_pic_address (op1, mode,
9574 temp == op1 ? 0 : temp);
9575 }
9576 else if (MACHOPIC_INDIRECT)
9577 op1 = machopic_indirect_data_reference (op1, 0);
9578 if (op0 == op1)
9579 return;
9580 #endif
9581 }
9582 else
9583 {
9584 if (MEM_P (op0))
9585 op1 = force_reg (Pmode, op1);
9586 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9587 {
9588 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9589 op1 = legitimize_pic_address (op1, reg);
9590 if (op0 == op1)
9591 return;
9592 }
9593 }
9594 }
9595 else
9596 {
9597 if (MEM_P (op0)
9598 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9599 || !push_operand (op0, mode))
9600 && MEM_P (op1))
9601 op1 = force_reg (mode, op1);
9602
9603 if (push_operand (op0, mode)
9604 && ! general_no_elim_operand (op1, mode))
9605 op1 = copy_to_mode_reg (mode, op1);
9606
9607 /* Force large constants in 64bit compilation into register
9608 to get them CSEed. */
9609 if (TARGET_64BIT && mode == DImode
9610 && immediate_operand (op1, mode)
9611 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9612 && !register_operand (op0, mode)
9613 && optimize && !reload_completed && !reload_in_progress)
9614 op1 = copy_to_mode_reg (mode, op1);
9615
9616 if (FLOAT_MODE_P (mode))
9617 {
9618 /* If we are loading a floating point constant to a register,
9619 force the value to memory now, since we'll get better code
9620 out the back end. */
9621
9622 if (strict)
9623 ;
9624 else if (GET_CODE (op1) == CONST_DOUBLE)
9625 {
9626 op1 = validize_mem (force_const_mem (mode, op1));
9627 if (!register_operand (op0, mode))
9628 {
9629 rtx temp = gen_reg_rtx (mode);
9630 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9631 emit_move_insn (op0, temp);
9632 return;
9633 }
9634 }
9635 }
9636 }
9637
9638 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9639 }
9640
9641 void
9642 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9643 {
9644 rtx op0 = operands[0], op1 = operands[1];
9645
9646 /* Force constants other than zero into memory. We do not know how
9647 the instructions used to build constants modify the upper 64 bits
9648 of the register, once we have that information we may be able
9649 to handle some of them more efficiently. */
9650 if ((reload_in_progress | reload_completed) == 0
9651 && register_operand (op0, mode)
9652 && CONSTANT_P (op1)
9653 && standard_sse_constant_p (op1) <= 0)
9654 op1 = validize_mem (force_const_mem (mode, op1));
9655
9656 /* Make operand1 a register if it isn't already. */
9657 if (!no_new_pseudos
9658 && !register_operand (op0, mode)
9659 && !register_operand (op1, mode))
9660 {
9661 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9662 return;
9663 }
9664
9665 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9666 }
9667
9668 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9669 straight to ix86_expand_vector_move. */
9670 /* Code generation for scalar reg-reg moves of single and double precision data:
9671 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9672 movaps reg, reg
9673 else
9674 movss reg, reg
9675 if (x86_sse_partial_reg_dependency == true)
9676 movapd reg, reg
9677 else
9678 movsd reg, reg
9679
9680 Code generation for scalar loads of double precision data:
9681 if (x86_sse_split_regs == true)
9682 movlpd mem, reg (gas syntax)
9683 else
9684 movsd mem, reg
9685
9686 Code generation for unaligned packed loads of single precision data
9687 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9688 if (x86_sse_unaligned_move_optimal)
9689 movups mem, reg
9690
9691 if (x86_sse_partial_reg_dependency == true)
9692 {
9693 xorps reg, reg
9694 movlps mem, reg
9695 movhps mem+8, reg
9696 }
9697 else
9698 {
9699 movlps mem, reg
9700 movhps mem+8, reg
9701 }
9702
9703 Code generation for unaligned packed loads of double precision data
9704 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9705 if (x86_sse_unaligned_move_optimal)
9706 movupd mem, reg
9707
9708 if (x86_sse_split_regs == true)
9709 {
9710 movlpd mem, reg
9711 movhpd mem+8, reg
9712 }
9713 else
9714 {
9715 movsd mem, reg
9716 movhpd mem+8, reg
9717 }
9718 */
9719
9720 void
9721 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9722 {
9723 rtx op0, op1, m;
9724
9725 op0 = operands[0];
9726 op1 = operands[1];
9727
9728 if (MEM_P (op1))
9729 {
9730 /* If we're optimizing for size, movups is the smallest. */
9731 if (optimize_size)
9732 {
9733 op0 = gen_lowpart (V4SFmode, op0);
9734 op1 = gen_lowpart (V4SFmode, op1);
9735 emit_insn (gen_sse_movups (op0, op1));
9736 return;
9737 }
9738
9739 /* ??? If we have typed data, then it would appear that using
9740 movdqu is the only way to get unaligned data loaded with
9741 integer type. */
9742 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9743 {
9744 op0 = gen_lowpart (V16QImode, op0);
9745 op1 = gen_lowpart (V16QImode, op1);
9746 emit_insn (gen_sse2_movdqu (op0, op1));
9747 return;
9748 }
9749
9750 if (TARGET_SSE2 && mode == V2DFmode)
9751 {
9752 rtx zero;
9753
9754 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9755 {
9756 op0 = gen_lowpart (V2DFmode, op0);
9757 op1 = gen_lowpart (V2DFmode, op1);
9758 emit_insn (gen_sse2_movupd (op0, op1));
9759 return;
9760 }
9761
9762 /* When SSE registers are split into halves, we can avoid
9763 writing to the top half twice. */
9764 if (TARGET_SSE_SPLIT_REGS)
9765 {
9766 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9767 zero = op0;
9768 }
9769 else
9770 {
9771 /* ??? Not sure about the best option for the Intel chips.
9772 The following would seem to satisfy; the register is
9773 entirely cleared, breaking the dependency chain. We
9774 then store to the upper half, with a dependency depth
9775 of one. A rumor has it that Intel recommends two movsd
9776 followed by an unpacklpd, but this is unconfirmed. And
9777 given that the dependency depth of the unpacklpd would
9778 still be one, I'm not sure why this would be better. */
9779 zero = CONST0_RTX (V2DFmode);
9780 }
9781
9782 m = adjust_address (op1, DFmode, 0);
9783 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9784 m = adjust_address (op1, DFmode, 8);
9785 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9786 }
9787 else
9788 {
9789 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9790 {
9791 op0 = gen_lowpart (V4SFmode, op0);
9792 op1 = gen_lowpart (V4SFmode, op1);
9793 emit_insn (gen_sse_movups (op0, op1));
9794 return;
9795 }
9796
9797 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9798 emit_move_insn (op0, CONST0_RTX (mode));
9799 else
9800 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9801
9802 if (mode != V4SFmode)
9803 op0 = gen_lowpart (V4SFmode, op0);
9804 m = adjust_address (op1, V2SFmode, 0);
9805 emit_insn (gen_sse_loadlps (op0, op0, m));
9806 m = adjust_address (op1, V2SFmode, 8);
9807 emit_insn (gen_sse_loadhps (op0, op0, m));
9808 }
9809 }
9810 else if (MEM_P (op0))
9811 {
9812 /* If we're optimizing for size, movups is the smallest. */
9813 if (optimize_size)
9814 {
9815 op0 = gen_lowpart (V4SFmode, op0);
9816 op1 = gen_lowpart (V4SFmode, op1);
9817 emit_insn (gen_sse_movups (op0, op1));
9818 return;
9819 }
9820
9821 /* ??? Similar to above, only less clear because of quote
9822 typeless stores unquote. */
9823 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9824 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9825 {
9826 op0 = gen_lowpart (V16QImode, op0);
9827 op1 = gen_lowpart (V16QImode, op1);
9828 emit_insn (gen_sse2_movdqu (op0, op1));
9829 return;
9830 }
9831
9832 if (TARGET_SSE2 && mode == V2DFmode)
9833 {
9834 m = adjust_address (op0, DFmode, 0);
9835 emit_insn (gen_sse2_storelpd (m, op1));
9836 m = adjust_address (op0, DFmode, 8);
9837 emit_insn (gen_sse2_storehpd (m, op1));
9838 }
9839 else
9840 {
9841 if (mode != V4SFmode)
9842 op1 = gen_lowpart (V4SFmode, op1);
9843 m = adjust_address (op0, V2SFmode, 0);
9844 emit_insn (gen_sse_storelps (m, op1));
9845 m = adjust_address (op0, V2SFmode, 8);
9846 emit_insn (gen_sse_storehps (m, op1));
9847 }
9848 }
9849 else
9850 gcc_unreachable ();
9851 }
9852
9853 /* Expand a push in MODE. This is some mode for which we do not support
9854 proper push instructions, at least from the registers that we expect
9855 the value to live in. */
9856
9857 void
9858 ix86_expand_push (enum machine_mode mode, rtx x)
9859 {
9860 rtx tmp;
9861
9862 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9863 GEN_INT (-GET_MODE_SIZE (mode)),
9864 stack_pointer_rtx, 1, OPTAB_DIRECT);
9865 if (tmp != stack_pointer_rtx)
9866 emit_move_insn (stack_pointer_rtx, tmp);
9867
9868 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9869 emit_move_insn (tmp, x);
9870 }
9871
9872 /* Helper function of ix86_fixup_binary_operands to canonicalize
9873 operand order. Returns true if the operands should be swapped. */
9874
9875 static bool
9876 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9877 rtx operands[])
9878 {
9879 rtx dst = operands[0];
9880 rtx src1 = operands[1];
9881 rtx src2 = operands[2];
9882
9883 /* If the operation is not commutative, we can't do anything. */
9884 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9885 return false;
9886
9887 /* Highest priority is that src1 should match dst. */
9888 if (rtx_equal_p (dst, src1))
9889 return false;
9890 if (rtx_equal_p (dst, src2))
9891 return true;
9892
9893 /* Next highest priority is that immediate constants come second. */
9894 if (immediate_operand (src2, mode))
9895 return false;
9896 if (immediate_operand (src1, mode))
9897 return true;
9898
9899 /* Lowest priority is that memory references should come second. */
9900 if (MEM_P (src2))
9901 return false;
9902 if (MEM_P (src1))
9903 return true;
9904
9905 return false;
9906 }
9907
9908
9909 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9910 destination to use for the operation. If different from the true
9911 destination in operands[0], a copy operation will be required. */
9912
9913 rtx
9914 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9915 rtx operands[])
9916 {
9917 rtx dst = operands[0];
9918 rtx src1 = operands[1];
9919 rtx src2 = operands[2];
9920
9921 /* Canonicalize operand order. */
9922 if (ix86_swap_binary_operands_p (code, mode, operands))
9923 {
9924 rtx temp = src1;
9925 src1 = src2;
9926 src2 = temp;
9927 }
9928
9929 /* Both source operands cannot be in memory. */
9930 if (MEM_P (src1) && MEM_P (src2))
9931 {
9932 /* Optimization: Only read from memory once. */
9933 if (rtx_equal_p (src1, src2))
9934 {
9935 src2 = force_reg (mode, src2);
9936 src1 = src2;
9937 }
9938 else
9939 src2 = force_reg (mode, src2);
9940 }
9941
9942 /* If the destination is memory, and we do not have matching source
9943 operands, do things in registers. */
9944 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9945 dst = gen_reg_rtx (mode);
9946
9947 /* Source 1 cannot be a constant. */
9948 if (CONSTANT_P (src1))
9949 src1 = force_reg (mode, src1);
9950
9951 /* Source 1 cannot be a non-matching memory. */
9952 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9953 src1 = force_reg (mode, src1);
9954
9955 operands[1] = src1;
9956 operands[2] = src2;
9957 return dst;
9958 }
9959
9960 /* Similarly, but assume that the destination has already been
9961 set up properly. */
9962
9963 void
9964 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9965 enum machine_mode mode, rtx operands[])
9966 {
9967 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9968 gcc_assert (dst == operands[0]);
9969 }
9970
9971 /* Attempt to expand a binary operator. Make the expansion closer to the
9972 actual machine, then just general_operand, which will allow 3 separate
9973 memory references (one output, two input) in a single insn. */
9974
9975 void
9976 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9977 rtx operands[])
9978 {
9979 rtx src1, src2, dst, op, clob;
9980
9981 dst = ix86_fixup_binary_operands (code, mode, operands);
9982 src1 = operands[1];
9983 src2 = operands[2];
9984
9985 /* Emit the instruction. */
9986
9987 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9988 if (reload_in_progress)
9989 {
9990 /* Reload doesn't know about the flags register, and doesn't know that
9991 it doesn't want to clobber it. We can only do this with PLUS. */
9992 gcc_assert (code == PLUS);
9993 emit_insn (op);
9994 }
9995 else
9996 {
9997 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9998 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9999 }
10000
10001 /* Fix up the destination if needed. */
10002 if (dst != operands[0])
10003 emit_move_insn (operands[0], dst);
10004 }
10005
10006 /* Return TRUE or FALSE depending on whether the binary operator meets the
10007 appropriate constraints. */
10008
10009 int
10010 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10011 rtx operands[3])
10012 {
10013 rtx dst = operands[0];
10014 rtx src1 = operands[1];
10015 rtx src2 = operands[2];
10016
10017 /* Both source operands cannot be in memory. */
10018 if (MEM_P (src1) && MEM_P (src2))
10019 return 0;
10020
10021 /* Canonicalize operand order for commutative operators. */
10022 if (ix86_swap_binary_operands_p (code, mode, operands))
10023 {
10024 rtx temp = src1;
10025 src1 = src2;
10026 src2 = temp;
10027 }
10028
10029 /* If the destination is memory, we must have a matching source operand. */
10030 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10031 return 0;
10032
10033 /* Source 1 cannot be a constant. */
10034 if (CONSTANT_P (src1))
10035 return 0;
10036
10037 /* Source 1 cannot be a non-matching memory. */
10038 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10039 return 0;
10040
10041 return 1;
10042 }
10043
10044 /* Attempt to expand a unary operator. Make the expansion closer to the
10045 actual machine, then just general_operand, which will allow 2 separate
10046 memory references (one output, one input) in a single insn. */
10047
10048 void
10049 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10050 rtx operands[])
10051 {
10052 int matching_memory;
10053 rtx src, dst, op, clob;
10054
10055 dst = operands[0];
10056 src = operands[1];
10057
10058 /* If the destination is memory, and we do not have matching source
10059 operands, do things in registers. */
10060 matching_memory = 0;
10061 if (MEM_P (dst))
10062 {
10063 if (rtx_equal_p (dst, src))
10064 matching_memory = 1;
10065 else
10066 dst = gen_reg_rtx (mode);
10067 }
10068
10069 /* When source operand is memory, destination must match. */
10070 if (MEM_P (src) && !matching_memory)
10071 src = force_reg (mode, src);
10072
10073 /* Emit the instruction. */
10074
10075 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10076 if (reload_in_progress || code == NOT)
10077 {
10078 /* Reload doesn't know about the flags register, and doesn't know that
10079 it doesn't want to clobber it. */
10080 gcc_assert (code == NOT);
10081 emit_insn (op);
10082 }
10083 else
10084 {
10085 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10086 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10087 }
10088
10089 /* Fix up the destination if needed. */
10090 if (dst != operands[0])
10091 emit_move_insn (operands[0], dst);
10092 }
10093
10094 /* Return TRUE or FALSE depending on whether the unary operator meets the
10095 appropriate constraints. */
10096
10097 int
10098 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10099 enum machine_mode mode ATTRIBUTE_UNUSED,
10100 rtx operands[2] ATTRIBUTE_UNUSED)
10101 {
10102 /* If one of operands is memory, source and destination must match. */
10103 if ((MEM_P (operands[0])
10104 || MEM_P (operands[1]))
10105 && ! rtx_equal_p (operands[0], operands[1]))
10106 return FALSE;
10107 return TRUE;
10108 }
10109
10110 /* Post-reload splitter for converting an SF or DFmode value in an
10111 SSE register into an unsigned SImode. */
10112
10113 void
10114 ix86_split_convert_uns_si_sse (rtx operands[])
10115 {
10116 enum machine_mode vecmode;
10117 rtx value, large, zero_or_two31, input, two31, x;
10118
10119 large = operands[1];
10120 zero_or_two31 = operands[2];
10121 input = operands[3];
10122 two31 = operands[4];
10123 vecmode = GET_MODE (large);
10124 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10125
10126 /* Load up the value into the low element. We must ensure that the other
10127 elements are valid floats -- zero is the easiest such value. */
10128 if (MEM_P (input))
10129 {
10130 if (vecmode == V4SFmode)
10131 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10132 else
10133 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10134 }
10135 else
10136 {
10137 input = gen_rtx_REG (vecmode, REGNO (input));
10138 emit_move_insn (value, CONST0_RTX (vecmode));
10139 if (vecmode == V4SFmode)
10140 emit_insn (gen_sse_movss (value, value, input));
10141 else
10142 emit_insn (gen_sse2_movsd (value, value, input));
10143 }
10144
10145 emit_move_insn (large, two31);
10146 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10147
10148 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10149 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10150
10151 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10152 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10153
10154 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10155 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10156
10157 large = gen_rtx_REG (V4SImode, REGNO (large));
10158 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10159
10160 x = gen_rtx_REG (V4SImode, REGNO (value));
10161 if (vecmode == V4SFmode)
10162 emit_insn (gen_sse2_cvttps2dq (x, value));
10163 else
10164 emit_insn (gen_sse2_cvttpd2dq (x, value));
10165 value = x;
10166
10167 emit_insn (gen_xorv4si3 (value, value, large));
10168 }
10169
10170 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10171 Expects the 64-bit DImode to be supplied in a pair of integral
10172 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10173 -mfpmath=sse, !optimize_size only. */
10174
10175 void
10176 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10177 {
10178 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10179 rtx int_xmm, fp_xmm;
10180 rtx biases, exponents;
10181 rtx x;
10182
10183 int_xmm = gen_reg_rtx (V4SImode);
10184 if (TARGET_INTER_UNIT_MOVES)
10185 emit_insn (gen_movdi_to_sse (int_xmm, input));
10186 else if (TARGET_SSE_SPLIT_REGS)
10187 {
10188 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10189 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10190 }
10191 else
10192 {
10193 x = gen_reg_rtx (V2DImode);
10194 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10195 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10196 }
10197
10198 x = gen_rtx_CONST_VECTOR (V4SImode,
10199 gen_rtvec (4, GEN_INT (0x43300000UL),
10200 GEN_INT (0x45300000UL),
10201 const0_rtx, const0_rtx));
10202 exponents = validize_mem (force_const_mem (V4SImode, x));
10203
10204 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10205 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10206
10207 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10208 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10209 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10210 (0x1.0p84 + double(fp_value_hi_xmm)).
10211 Note these exponents differ by 32. */
10212
10213 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10214
10215 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10216 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10217 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10218 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10219 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10220 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10221 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10222 biases = validize_mem (force_const_mem (V2DFmode, biases));
10223 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10224
10225 /* Add the upper and lower DFmode values together. */
10226 if (TARGET_SSE3)
10227 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10228 else
10229 {
10230 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10231 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10232 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10233 }
10234
10235 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10236 }
10237
10238 /* Convert an unsigned SImode value into a DFmode. Only currently used
10239 for SSE, but applicable anywhere. */
10240
10241 void
10242 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10243 {
10244 REAL_VALUE_TYPE TWO31r;
10245 rtx x, fp;
10246
10247 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10248 NULL, 1, OPTAB_DIRECT);
10249
10250 fp = gen_reg_rtx (DFmode);
10251 emit_insn (gen_floatsidf2 (fp, x));
10252
10253 real_ldexp (&TWO31r, &dconst1, 31);
10254 x = const_double_from_real_value (TWO31r, DFmode);
10255
10256 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10257 if (x != target)
10258 emit_move_insn (target, x);
10259 }
10260
10261 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10262 32-bit mode; otherwise we have a direct convert instruction. */
10263
10264 void
10265 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10266 {
10267 REAL_VALUE_TYPE TWO32r;
10268 rtx fp_lo, fp_hi, x;
10269
10270 fp_lo = gen_reg_rtx (DFmode);
10271 fp_hi = gen_reg_rtx (DFmode);
10272
10273 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10274
10275 real_ldexp (&TWO32r, &dconst1, 32);
10276 x = const_double_from_real_value (TWO32r, DFmode);
10277 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10278
10279 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10280
10281 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10282 0, OPTAB_DIRECT);
10283 if (x != target)
10284 emit_move_insn (target, x);
10285 }
10286
10287 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10288 For x86_32, -mfpmath=sse, !optimize_size only. */
10289 void
10290 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10291 {
10292 REAL_VALUE_TYPE ONE16r;
10293 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10294
10295 real_ldexp (&ONE16r, &dconst1, 16);
10296 x = const_double_from_real_value (ONE16r, SFmode);
10297 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10298 NULL, 0, OPTAB_DIRECT);
10299 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10300 NULL, 0, OPTAB_DIRECT);
10301 fp_hi = gen_reg_rtx (SFmode);
10302 fp_lo = gen_reg_rtx (SFmode);
10303 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10304 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10305 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10306 0, OPTAB_DIRECT);
10307 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10308 0, OPTAB_DIRECT);
10309 if (!rtx_equal_p (target, fp_hi))
10310 emit_move_insn (target, fp_hi);
10311 }
10312
10313 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10314 then replicate the value for all elements of the vector
10315 register. */
10316
10317 rtx
10318 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10319 {
10320 rtvec v;
10321 switch (mode)
10322 {
10323 case SFmode:
10324 if (vect)
10325 v = gen_rtvec (4, value, value, value, value);
10326 else
10327 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10328 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10329 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10330
10331 case DFmode:
10332 if (vect)
10333 v = gen_rtvec (2, value, value);
10334 else
10335 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10336 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10337
10338 default:
10339 gcc_unreachable ();
10340 }
10341 }
10342
10343 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10344 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10345 true, then replicate the mask for all elements of the vector register.
10346 If INVERT is true, then create a mask excluding the sign bit. */
10347
10348 rtx
10349 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10350 {
10351 enum machine_mode vec_mode;
10352 HOST_WIDE_INT hi, lo;
10353 int shift = 63;
10354 rtx v;
10355 rtx mask;
10356
10357 /* Find the sign bit, sign extended to 2*HWI. */
10358 if (mode == SFmode)
10359 lo = 0x80000000, hi = lo < 0;
10360 else if (HOST_BITS_PER_WIDE_INT >= 64)
10361 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10362 else
10363 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10364
10365 if (invert)
10366 lo = ~lo, hi = ~hi;
10367
10368 /* Force this value into the low part of a fp vector constant. */
10369 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10370 mask = gen_lowpart (mode, mask);
10371
10372 v = ix86_build_const_vector (mode, vect, mask);
10373 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10374 return force_reg (vec_mode, v);
10375 }
10376
10377 /* Generate code for floating point ABS or NEG. */
10378
10379 void
10380 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10381 rtx operands[])
10382 {
10383 rtx mask, set, use, clob, dst, src;
10384 bool matching_memory;
10385 bool use_sse = false;
10386 bool vector_mode = VECTOR_MODE_P (mode);
10387 enum machine_mode elt_mode = mode;
10388
10389 if (vector_mode)
10390 {
10391 elt_mode = GET_MODE_INNER (mode);
10392 use_sse = true;
10393 }
10394 else if (TARGET_SSE_MATH)
10395 use_sse = SSE_FLOAT_MODE_P (mode);
10396
10397 /* NEG and ABS performed with SSE use bitwise mask operations.
10398 Create the appropriate mask now. */
10399 if (use_sse)
10400 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10401 else
10402 mask = NULL_RTX;
10403
10404 dst = operands[0];
10405 src = operands[1];
10406
10407 /* If the destination is memory, and we don't have matching source
10408 operands or we're using the x87, do things in registers. */
10409 matching_memory = false;
10410 if (MEM_P (dst))
10411 {
10412 if (use_sse && rtx_equal_p (dst, src))
10413 matching_memory = true;
10414 else
10415 dst = gen_reg_rtx (mode);
10416 }
10417 if (MEM_P (src) && !matching_memory)
10418 src = force_reg (mode, src);
10419
10420 if (vector_mode)
10421 {
10422 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10423 set = gen_rtx_SET (VOIDmode, dst, set);
10424 emit_insn (set);
10425 }
10426 else
10427 {
10428 set = gen_rtx_fmt_e (code, mode, src);
10429 set = gen_rtx_SET (VOIDmode, dst, set);
10430 if (mask)
10431 {
10432 use = gen_rtx_USE (VOIDmode, mask);
10433 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10434 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10435 gen_rtvec (3, set, use, clob)));
10436 }
10437 else
10438 emit_insn (set);
10439 }
10440
10441 if (dst != operands[0])
10442 emit_move_insn (operands[0], dst);
10443 }
10444
10445 /* Expand a copysign operation. Special case operand 0 being a constant. */
10446
10447 void
10448 ix86_expand_copysign (rtx operands[])
10449 {
10450 enum machine_mode mode, vmode;
10451 rtx dest, op0, op1, mask, nmask;
10452
10453 dest = operands[0];
10454 op0 = operands[1];
10455 op1 = operands[2];
10456
10457 mode = GET_MODE (dest);
10458 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10459
10460 if (GET_CODE (op0) == CONST_DOUBLE)
10461 {
10462 rtvec v;
10463
10464 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10465 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10466
10467 if (op0 == CONST0_RTX (mode))
10468 op0 = CONST0_RTX (vmode);
10469 else
10470 {
10471 if (mode == SFmode)
10472 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10473 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10474 else
10475 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10476 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10477 }
10478
10479 mask = ix86_build_signbit_mask (mode, 0, 0);
10480
10481 if (mode == SFmode)
10482 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10483 else
10484 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10485 }
10486 else
10487 {
10488 nmask = ix86_build_signbit_mask (mode, 0, 1);
10489 mask = ix86_build_signbit_mask (mode, 0, 0);
10490
10491 if (mode == SFmode)
10492 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10493 else
10494 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10495 }
10496 }
10497
10498 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10499 be a constant, and so has already been expanded into a vector constant. */
10500
10501 void
10502 ix86_split_copysign_const (rtx operands[])
10503 {
10504 enum machine_mode mode, vmode;
10505 rtx dest, op0, op1, mask, x;
10506
10507 dest = operands[0];
10508 op0 = operands[1];
10509 op1 = operands[2];
10510 mask = operands[3];
10511
10512 mode = GET_MODE (dest);
10513 vmode = GET_MODE (mask);
10514
10515 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10516 x = gen_rtx_AND (vmode, dest, mask);
10517 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10518
10519 if (op0 != CONST0_RTX (vmode))
10520 {
10521 x = gen_rtx_IOR (vmode, dest, op0);
10522 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10523 }
10524 }
10525
10526 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10527 so we have to do two masks. */
10528
10529 void
10530 ix86_split_copysign_var (rtx operands[])
10531 {
10532 enum machine_mode mode, vmode;
10533 rtx dest, scratch, op0, op1, mask, nmask, x;
10534
10535 dest = operands[0];
10536 scratch = operands[1];
10537 op0 = operands[2];
10538 op1 = operands[3];
10539 nmask = operands[4];
10540 mask = operands[5];
10541
10542 mode = GET_MODE (dest);
10543 vmode = GET_MODE (mask);
10544
10545 if (rtx_equal_p (op0, op1))
10546 {
10547 /* Shouldn't happen often (it's useless, obviously), but when it does
10548 we'd generate incorrect code if we continue below. */
10549 emit_move_insn (dest, op0);
10550 return;
10551 }
10552
10553 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10554 {
10555 gcc_assert (REGNO (op1) == REGNO (scratch));
10556
10557 x = gen_rtx_AND (vmode, scratch, mask);
10558 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10559
10560 dest = mask;
10561 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10562 x = gen_rtx_NOT (vmode, dest);
10563 x = gen_rtx_AND (vmode, x, op0);
10564 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10565 }
10566 else
10567 {
10568 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10569 {
10570 x = gen_rtx_AND (vmode, scratch, mask);
10571 }
10572 else /* alternative 2,4 */
10573 {
10574 gcc_assert (REGNO (mask) == REGNO (scratch));
10575 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10576 x = gen_rtx_AND (vmode, scratch, op1);
10577 }
10578 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10579
10580 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10581 {
10582 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10583 x = gen_rtx_AND (vmode, dest, nmask);
10584 }
10585 else /* alternative 3,4 */
10586 {
10587 gcc_assert (REGNO (nmask) == REGNO (dest));
10588 dest = nmask;
10589 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10590 x = gen_rtx_AND (vmode, dest, op0);
10591 }
10592 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10593 }
10594
10595 x = gen_rtx_IOR (vmode, dest, scratch);
10596 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10597 }
10598
10599 /* Return TRUE or FALSE depending on whether the first SET in INSN
10600 has source and destination with matching CC modes, and that the
10601 CC mode is at least as constrained as REQ_MODE. */
10602
10603 int
10604 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10605 {
10606 rtx set;
10607 enum machine_mode set_mode;
10608
10609 set = PATTERN (insn);
10610 if (GET_CODE (set) == PARALLEL)
10611 set = XVECEXP (set, 0, 0);
10612 gcc_assert (GET_CODE (set) == SET);
10613 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10614
10615 set_mode = GET_MODE (SET_DEST (set));
10616 switch (set_mode)
10617 {
10618 case CCNOmode:
10619 if (req_mode != CCNOmode
10620 && (req_mode != CCmode
10621 || XEXP (SET_SRC (set), 1) != const0_rtx))
10622 return 0;
10623 break;
10624 case CCmode:
10625 if (req_mode == CCGCmode)
10626 return 0;
10627 /* FALLTHRU */
10628 case CCGCmode:
10629 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10630 return 0;
10631 /* FALLTHRU */
10632 case CCGOCmode:
10633 if (req_mode == CCZmode)
10634 return 0;
10635 /* FALLTHRU */
10636 case CCZmode:
10637 break;
10638
10639 default:
10640 gcc_unreachable ();
10641 }
10642
10643 return (GET_MODE (SET_SRC (set)) == set_mode);
10644 }
10645
10646 /* Generate insn patterns to do an integer compare of OPERANDS. */
10647
10648 static rtx
10649 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10650 {
10651 enum machine_mode cmpmode;
10652 rtx tmp, flags;
10653
10654 cmpmode = SELECT_CC_MODE (code, op0, op1);
10655 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10656
10657 /* This is very simple, but making the interface the same as in the
10658 FP case makes the rest of the code easier. */
10659 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10660 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10661
10662 /* Return the test that should be put into the flags user, i.e.
10663 the bcc, scc, or cmov instruction. */
10664 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10665 }
10666
10667 /* Figure out whether to use ordered or unordered fp comparisons.
10668 Return the appropriate mode to use. */
10669
10670 enum machine_mode
10671 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10672 {
10673 /* ??? In order to make all comparisons reversible, we do all comparisons
10674 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10675 all forms trapping and nontrapping comparisons, we can make inequality
10676 comparisons trapping again, since it results in better code when using
10677 FCOM based compares. */
10678 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10679 }
10680
10681 enum machine_mode
10682 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10683 {
10684 enum machine_mode mode = GET_MODE (op0);
10685
10686 if (SCALAR_FLOAT_MODE_P (mode))
10687 {
10688 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
10689 return ix86_fp_compare_mode (code);
10690 }
10691
10692 switch (code)
10693 {
10694 /* Only zero flag is needed. */
10695 case EQ: /* ZF=0 */
10696 case NE: /* ZF!=0 */
10697 return CCZmode;
10698 /* Codes needing carry flag. */
10699 case GEU: /* CF=0 */
10700 case GTU: /* CF=0 & ZF=0 */
10701 case LTU: /* CF=1 */
10702 case LEU: /* CF=1 | ZF=1 */
10703 return CCmode;
10704 /* Codes possibly doable only with sign flag when
10705 comparing against zero. */
10706 case GE: /* SF=OF or SF=0 */
10707 case LT: /* SF<>OF or SF=1 */
10708 if (op1 == const0_rtx)
10709 return CCGOCmode;
10710 else
10711 /* For other cases Carry flag is not required. */
10712 return CCGCmode;
10713 /* Codes doable only with sign flag when comparing
10714 against zero, but we miss jump instruction for it
10715 so we need to use relational tests against overflow
10716 that thus needs to be zero. */
10717 case GT: /* ZF=0 & SF=OF */
10718 case LE: /* ZF=1 | SF<>OF */
10719 if (op1 == const0_rtx)
10720 return CCNOmode;
10721 else
10722 return CCGCmode;
10723 /* strcmp pattern do (use flags) and combine may ask us for proper
10724 mode. */
10725 case USE:
10726 return CCmode;
10727 default:
10728 gcc_unreachable ();
10729 }
10730 }
10731
10732 /* Return the fixed registers used for condition codes. */
10733
10734 static bool
10735 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10736 {
10737 *p1 = FLAGS_REG;
10738 *p2 = FPSR_REG;
10739 return true;
10740 }
10741
10742 /* If two condition code modes are compatible, return a condition code
10743 mode which is compatible with both. Otherwise, return
10744 VOIDmode. */
10745
10746 static enum machine_mode
10747 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10748 {
10749 if (m1 == m2)
10750 return m1;
10751
10752 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10753 return VOIDmode;
10754
10755 if ((m1 == CCGCmode && m2 == CCGOCmode)
10756 || (m1 == CCGOCmode && m2 == CCGCmode))
10757 return CCGCmode;
10758
10759 switch (m1)
10760 {
10761 default:
10762 gcc_unreachable ();
10763
10764 case CCmode:
10765 case CCGCmode:
10766 case CCGOCmode:
10767 case CCNOmode:
10768 case CCZmode:
10769 switch (m2)
10770 {
10771 default:
10772 return VOIDmode;
10773
10774 case CCmode:
10775 case CCGCmode:
10776 case CCGOCmode:
10777 case CCNOmode:
10778 case CCZmode:
10779 return CCmode;
10780 }
10781
10782 case CCFPmode:
10783 case CCFPUmode:
10784 /* These are only compatible with themselves, which we already
10785 checked above. */
10786 return VOIDmode;
10787 }
10788 }
10789
10790 /* Split comparison code CODE into comparisons we can do using branch
10791 instructions. BYPASS_CODE is comparison code for branch that will
10792 branch around FIRST_CODE and SECOND_CODE. If some of branches
10793 is not required, set value to UNKNOWN.
10794 We never require more than two branches. */
10795
10796 void
10797 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10798 enum rtx_code *first_code,
10799 enum rtx_code *second_code)
10800 {
10801 *first_code = code;
10802 *bypass_code = UNKNOWN;
10803 *second_code = UNKNOWN;
10804
10805 /* The fcomi comparison sets flags as follows:
10806
10807 cmp ZF PF CF
10808 > 0 0 0
10809 < 0 0 1
10810 = 1 0 0
10811 un 1 1 1 */
10812
10813 switch (code)
10814 {
10815 case GT: /* GTU - CF=0 & ZF=0 */
10816 case GE: /* GEU - CF=0 */
10817 case ORDERED: /* PF=0 */
10818 case UNORDERED: /* PF=1 */
10819 case UNEQ: /* EQ - ZF=1 */
10820 case UNLT: /* LTU - CF=1 */
10821 case UNLE: /* LEU - CF=1 | ZF=1 */
10822 case LTGT: /* EQ - ZF=0 */
10823 break;
10824 case LT: /* LTU - CF=1 - fails on unordered */
10825 *first_code = UNLT;
10826 *bypass_code = UNORDERED;
10827 break;
10828 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10829 *first_code = UNLE;
10830 *bypass_code = UNORDERED;
10831 break;
10832 case EQ: /* EQ - ZF=1 - fails on unordered */
10833 *first_code = UNEQ;
10834 *bypass_code = UNORDERED;
10835 break;
10836 case NE: /* NE - ZF=0 - fails on unordered */
10837 *first_code = LTGT;
10838 *second_code = UNORDERED;
10839 break;
10840 case UNGE: /* GEU - CF=0 - fails on unordered */
10841 *first_code = GE;
10842 *second_code = UNORDERED;
10843 break;
10844 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10845 *first_code = GT;
10846 *second_code = UNORDERED;
10847 break;
10848 default:
10849 gcc_unreachable ();
10850 }
10851 if (!TARGET_IEEE_FP)
10852 {
10853 *second_code = UNKNOWN;
10854 *bypass_code = UNKNOWN;
10855 }
10856 }
10857
10858 /* Return cost of comparison done fcom + arithmetics operations on AX.
10859 All following functions do use number of instructions as a cost metrics.
10860 In future this should be tweaked to compute bytes for optimize_size and
10861 take into account performance of various instructions on various CPUs. */
10862 static int
10863 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10864 {
10865 if (!TARGET_IEEE_FP)
10866 return 4;
10867 /* The cost of code output by ix86_expand_fp_compare. */
10868 switch (code)
10869 {
10870 case UNLE:
10871 case UNLT:
10872 case LTGT:
10873 case GT:
10874 case GE:
10875 case UNORDERED:
10876 case ORDERED:
10877 case UNEQ:
10878 return 4;
10879 break;
10880 case LT:
10881 case NE:
10882 case EQ:
10883 case UNGE:
10884 return 5;
10885 break;
10886 case LE:
10887 case UNGT:
10888 return 6;
10889 break;
10890 default:
10891 gcc_unreachable ();
10892 }
10893 }
10894
10895 /* Return cost of comparison done using fcomi operation.
10896 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10897 static int
10898 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10899 {
10900 enum rtx_code bypass_code, first_code, second_code;
10901 /* Return arbitrarily high cost when instruction is not supported - this
10902 prevents gcc from using it. */
10903 if (!TARGET_CMOVE)
10904 return 1024;
10905 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10906 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10907 }
10908
10909 /* Return cost of comparison done using sahf operation.
10910 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10911 static int
10912 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10913 {
10914 enum rtx_code bypass_code, first_code, second_code;
10915 /* Return arbitrarily high cost when instruction is not preferred - this
10916 avoids gcc from using it. */
10917 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10918 return 1024;
10919 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10920 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10921 }
10922
10923 /* Compute cost of the comparison done using any method.
10924 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10925 static int
10926 ix86_fp_comparison_cost (enum rtx_code code)
10927 {
10928 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10929 int min;
10930
10931 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10932 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10933
10934 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10935 if (min > sahf_cost)
10936 min = sahf_cost;
10937 if (min > fcomi_cost)
10938 min = fcomi_cost;
10939 return min;
10940 }
10941
10942 /* Return true if we should use an FCOMI instruction for this
10943 fp comparison. */
10944
10945 int
10946 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10947 {
10948 enum rtx_code swapped_code = swap_condition (code);
10949
10950 return ((ix86_fp_comparison_cost (code)
10951 == ix86_fp_comparison_fcomi_cost (code))
10952 || (ix86_fp_comparison_cost (swapped_code)
10953 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10954 }
10955
10956 /* Swap, force into registers, or otherwise massage the two operands
10957 to a fp comparison. The operands are updated in place; the new
10958 comparison code is returned. */
10959
10960 static enum rtx_code
10961 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10962 {
10963 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10964 rtx op0 = *pop0, op1 = *pop1;
10965 enum machine_mode op_mode = GET_MODE (op0);
10966 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10967
10968 /* All of the unordered compare instructions only work on registers.
10969 The same is true of the fcomi compare instructions. The XFmode
10970 compare instructions require registers except when comparing
10971 against zero or when converting operand 1 from fixed point to
10972 floating point. */
10973
10974 if (!is_sse
10975 && (fpcmp_mode == CCFPUmode
10976 || (op_mode == XFmode
10977 && ! (standard_80387_constant_p (op0) == 1
10978 || standard_80387_constant_p (op1) == 1)
10979 && GET_CODE (op1) != FLOAT)
10980 || ix86_use_fcomi_compare (code)))
10981 {
10982 op0 = force_reg (op_mode, op0);
10983 op1 = force_reg (op_mode, op1);
10984 }
10985 else
10986 {
10987 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10988 things around if they appear profitable, otherwise force op0
10989 into a register. */
10990
10991 if (standard_80387_constant_p (op0) == 0
10992 || (MEM_P (op0)
10993 && ! (standard_80387_constant_p (op1) == 0
10994 || MEM_P (op1))))
10995 {
10996 rtx tmp;
10997 tmp = op0, op0 = op1, op1 = tmp;
10998 code = swap_condition (code);
10999 }
11000
11001 if (!REG_P (op0))
11002 op0 = force_reg (op_mode, op0);
11003
11004 if (CONSTANT_P (op1))
11005 {
11006 int tmp = standard_80387_constant_p (op1);
11007 if (tmp == 0)
11008 op1 = validize_mem (force_const_mem (op_mode, op1));
11009 else if (tmp == 1)
11010 {
11011 if (TARGET_CMOVE)
11012 op1 = force_reg (op_mode, op1);
11013 }
11014 else
11015 op1 = force_reg (op_mode, op1);
11016 }
11017 }
11018
11019 /* Try to rearrange the comparison to make it cheaper. */
11020 if (ix86_fp_comparison_cost (code)
11021 > ix86_fp_comparison_cost (swap_condition (code))
11022 && (REG_P (op1) || !no_new_pseudos))
11023 {
11024 rtx tmp;
11025 tmp = op0, op0 = op1, op1 = tmp;
11026 code = swap_condition (code);
11027 if (!REG_P (op0))
11028 op0 = force_reg (op_mode, op0);
11029 }
11030
11031 *pop0 = op0;
11032 *pop1 = op1;
11033 return code;
11034 }
11035
11036 /* Convert comparison codes we use to represent FP comparison to integer
11037 code that will result in proper branch. Return UNKNOWN if no such code
11038 is available. */
11039
11040 enum rtx_code
11041 ix86_fp_compare_code_to_integer (enum rtx_code code)
11042 {
11043 switch (code)
11044 {
11045 case GT:
11046 return GTU;
11047 case GE:
11048 return GEU;
11049 case ORDERED:
11050 case UNORDERED:
11051 return code;
11052 break;
11053 case UNEQ:
11054 return EQ;
11055 break;
11056 case UNLT:
11057 return LTU;
11058 break;
11059 case UNLE:
11060 return LEU;
11061 break;
11062 case LTGT:
11063 return NE;
11064 break;
11065 default:
11066 return UNKNOWN;
11067 }
11068 }
11069
11070 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11071
11072 static rtx
11073 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11074 rtx *second_test, rtx *bypass_test)
11075 {
11076 enum machine_mode fpcmp_mode, intcmp_mode;
11077 rtx tmp, tmp2;
11078 int cost = ix86_fp_comparison_cost (code);
11079 enum rtx_code bypass_code, first_code, second_code;
11080
11081 fpcmp_mode = ix86_fp_compare_mode (code);
11082 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11083
11084 if (second_test)
11085 *second_test = NULL_RTX;
11086 if (bypass_test)
11087 *bypass_test = NULL_RTX;
11088
11089 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11090
11091 /* Do fcomi/sahf based test when profitable. */
11092 if ((TARGET_CMOVE || TARGET_SAHF)
11093 && (bypass_code == UNKNOWN || bypass_test)
11094 && (second_code == UNKNOWN || second_test)
11095 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11096 {
11097 if (TARGET_CMOVE)
11098 {
11099 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11100 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11101 tmp);
11102 emit_insn (tmp);
11103 }
11104 else
11105 {
11106 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11107 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11108 if (!scratch)
11109 scratch = gen_reg_rtx (HImode);
11110 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11111 emit_insn (gen_x86_sahf_1 (scratch));
11112 }
11113
11114 /* The FP codes work out to act like unsigned. */
11115 intcmp_mode = fpcmp_mode;
11116 code = first_code;
11117 if (bypass_code != UNKNOWN)
11118 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11119 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11120 const0_rtx);
11121 if (second_code != UNKNOWN)
11122 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11123 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11124 const0_rtx);
11125 }
11126 else
11127 {
11128 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11129 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11130 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11131 if (!scratch)
11132 scratch = gen_reg_rtx (HImode);
11133 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11134
11135 /* In the unordered case, we have to check C2 for NaN's, which
11136 doesn't happen to work out to anything nice combination-wise.
11137 So do some bit twiddling on the value we've got in AH to come
11138 up with an appropriate set of condition codes. */
11139
11140 intcmp_mode = CCNOmode;
11141 switch (code)
11142 {
11143 case GT:
11144 case UNGT:
11145 if (code == GT || !TARGET_IEEE_FP)
11146 {
11147 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11148 code = EQ;
11149 }
11150 else
11151 {
11152 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11153 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11154 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11155 intcmp_mode = CCmode;
11156 code = GEU;
11157 }
11158 break;
11159 case LT:
11160 case UNLT:
11161 if (code == LT && TARGET_IEEE_FP)
11162 {
11163 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11164 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11165 intcmp_mode = CCmode;
11166 code = EQ;
11167 }
11168 else
11169 {
11170 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11171 code = NE;
11172 }
11173 break;
11174 case GE:
11175 case UNGE:
11176 if (code == GE || !TARGET_IEEE_FP)
11177 {
11178 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11179 code = EQ;
11180 }
11181 else
11182 {
11183 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11184 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11185 GEN_INT (0x01)));
11186 code = NE;
11187 }
11188 break;
11189 case LE:
11190 case UNLE:
11191 if (code == LE && TARGET_IEEE_FP)
11192 {
11193 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11194 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11195 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11196 intcmp_mode = CCmode;
11197 code = LTU;
11198 }
11199 else
11200 {
11201 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11202 code = NE;
11203 }
11204 break;
11205 case EQ:
11206 case UNEQ:
11207 if (code == EQ && TARGET_IEEE_FP)
11208 {
11209 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11210 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11211 intcmp_mode = CCmode;
11212 code = EQ;
11213 }
11214 else
11215 {
11216 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11217 code = NE;
11218 break;
11219 }
11220 break;
11221 case NE:
11222 case LTGT:
11223 if (code == NE && TARGET_IEEE_FP)
11224 {
11225 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11226 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11227 GEN_INT (0x40)));
11228 code = NE;
11229 }
11230 else
11231 {
11232 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11233 code = EQ;
11234 }
11235 break;
11236
11237 case UNORDERED:
11238 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11239 code = NE;
11240 break;
11241 case ORDERED:
11242 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11243 code = EQ;
11244 break;
11245
11246 default:
11247 gcc_unreachable ();
11248 }
11249 }
11250
11251 /* Return the test that should be put into the flags user, i.e.
11252 the bcc, scc, or cmov instruction. */
11253 return gen_rtx_fmt_ee (code, VOIDmode,
11254 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11255 const0_rtx);
11256 }
11257
11258 rtx
11259 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11260 {
11261 rtx op0, op1, ret;
11262 op0 = ix86_compare_op0;
11263 op1 = ix86_compare_op1;
11264
11265 if (second_test)
11266 *second_test = NULL_RTX;
11267 if (bypass_test)
11268 *bypass_test = NULL_RTX;
11269
11270 if (ix86_compare_emitted)
11271 {
11272 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11273 ix86_compare_emitted = NULL_RTX;
11274 }
11275 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11276 {
11277 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11278 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11279 second_test, bypass_test);
11280 }
11281 else
11282 ret = ix86_expand_int_compare (code, op0, op1);
11283
11284 return ret;
11285 }
11286
11287 /* Return true if the CODE will result in nontrivial jump sequence. */
11288 bool
11289 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11290 {
11291 enum rtx_code bypass_code, first_code, second_code;
11292 if (!TARGET_CMOVE)
11293 return true;
11294 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11295 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11296 }
11297
11298 void
11299 ix86_expand_branch (enum rtx_code code, rtx label)
11300 {
11301 rtx tmp;
11302
11303 /* If we have emitted a compare insn, go straight to simple.
11304 ix86_expand_compare won't emit anything if ix86_compare_emitted
11305 is non NULL. */
11306 if (ix86_compare_emitted)
11307 goto simple;
11308
11309 switch (GET_MODE (ix86_compare_op0))
11310 {
11311 case QImode:
11312 case HImode:
11313 case SImode:
11314 simple:
11315 tmp = ix86_expand_compare (code, NULL, NULL);
11316 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11317 gen_rtx_LABEL_REF (VOIDmode, label),
11318 pc_rtx);
11319 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11320 return;
11321
11322 case SFmode:
11323 case DFmode:
11324 case XFmode:
11325 {
11326 rtvec vec;
11327 int use_fcomi;
11328 enum rtx_code bypass_code, first_code, second_code;
11329
11330 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11331 &ix86_compare_op1);
11332
11333 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11334
11335 /* Check whether we will use the natural sequence with one jump. If
11336 so, we can expand jump early. Otherwise delay expansion by
11337 creating compound insn to not confuse optimizers. */
11338 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11339 && TARGET_CMOVE)
11340 {
11341 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11342 gen_rtx_LABEL_REF (VOIDmode, label),
11343 pc_rtx, NULL_RTX, NULL_RTX);
11344 }
11345 else
11346 {
11347 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11348 ix86_compare_op0, ix86_compare_op1);
11349 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11350 gen_rtx_LABEL_REF (VOIDmode, label),
11351 pc_rtx);
11352 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11353
11354 use_fcomi = ix86_use_fcomi_compare (code);
11355 vec = rtvec_alloc (3 + !use_fcomi);
11356 RTVEC_ELT (vec, 0) = tmp;
11357 RTVEC_ELT (vec, 1)
11358 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11359 RTVEC_ELT (vec, 2)
11360 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11361 if (! use_fcomi)
11362 RTVEC_ELT (vec, 3)
11363 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11364
11365 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11366 }
11367 return;
11368 }
11369
11370 case DImode:
11371 if (TARGET_64BIT)
11372 goto simple;
11373 case TImode:
11374 /* Expand DImode branch into multiple compare+branch. */
11375 {
11376 rtx lo[2], hi[2], label2;
11377 enum rtx_code code1, code2, code3;
11378 enum machine_mode submode;
11379
11380 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11381 {
11382 tmp = ix86_compare_op0;
11383 ix86_compare_op0 = ix86_compare_op1;
11384 ix86_compare_op1 = tmp;
11385 code = swap_condition (code);
11386 }
11387 if (GET_MODE (ix86_compare_op0) == DImode)
11388 {
11389 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11390 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11391 submode = SImode;
11392 }
11393 else
11394 {
11395 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11396 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11397 submode = DImode;
11398 }
11399
11400 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11401 avoid two branches. This costs one extra insn, so disable when
11402 optimizing for size. */
11403
11404 if ((code == EQ || code == NE)
11405 && (!optimize_size
11406 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11407 {
11408 rtx xor0, xor1;
11409
11410 xor1 = hi[0];
11411 if (hi[1] != const0_rtx)
11412 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11413 NULL_RTX, 0, OPTAB_WIDEN);
11414
11415 xor0 = lo[0];
11416 if (lo[1] != const0_rtx)
11417 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11418 NULL_RTX, 0, OPTAB_WIDEN);
11419
11420 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11421 NULL_RTX, 0, OPTAB_WIDEN);
11422
11423 ix86_compare_op0 = tmp;
11424 ix86_compare_op1 = const0_rtx;
11425 ix86_expand_branch (code, label);
11426 return;
11427 }
11428
11429 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11430 op1 is a constant and the low word is zero, then we can just
11431 examine the high word. */
11432
11433 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11434 switch (code)
11435 {
11436 case LT: case LTU: case GE: case GEU:
11437 ix86_compare_op0 = hi[0];
11438 ix86_compare_op1 = hi[1];
11439 ix86_expand_branch (code, label);
11440 return;
11441 default:
11442 break;
11443 }
11444
11445 /* Otherwise, we need two or three jumps. */
11446
11447 label2 = gen_label_rtx ();
11448
11449 code1 = code;
11450 code2 = swap_condition (code);
11451 code3 = unsigned_condition (code);
11452
11453 switch (code)
11454 {
11455 case LT: case GT: case LTU: case GTU:
11456 break;
11457
11458 case LE: code1 = LT; code2 = GT; break;
11459 case GE: code1 = GT; code2 = LT; break;
11460 case LEU: code1 = LTU; code2 = GTU; break;
11461 case GEU: code1 = GTU; code2 = LTU; break;
11462
11463 case EQ: code1 = UNKNOWN; code2 = NE; break;
11464 case NE: code2 = UNKNOWN; break;
11465
11466 default:
11467 gcc_unreachable ();
11468 }
11469
11470 /*
11471 * a < b =>
11472 * if (hi(a) < hi(b)) goto true;
11473 * if (hi(a) > hi(b)) goto false;
11474 * if (lo(a) < lo(b)) goto true;
11475 * false:
11476 */
11477
11478 ix86_compare_op0 = hi[0];
11479 ix86_compare_op1 = hi[1];
11480
11481 if (code1 != UNKNOWN)
11482 ix86_expand_branch (code1, label);
11483 if (code2 != UNKNOWN)
11484 ix86_expand_branch (code2, label2);
11485
11486 ix86_compare_op0 = lo[0];
11487 ix86_compare_op1 = lo[1];
11488 ix86_expand_branch (code3, label);
11489
11490 if (code2 != UNKNOWN)
11491 emit_label (label2);
11492 return;
11493 }
11494
11495 default:
11496 gcc_unreachable ();
11497 }
11498 }
11499
11500 /* Split branch based on floating point condition. */
11501 void
11502 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11503 rtx target1, rtx target2, rtx tmp, rtx pushed)
11504 {
11505 rtx second, bypass;
11506 rtx label = NULL_RTX;
11507 rtx condition;
11508 int bypass_probability = -1, second_probability = -1, probability = -1;
11509 rtx i;
11510
11511 if (target2 != pc_rtx)
11512 {
11513 rtx tmp = target2;
11514 code = reverse_condition_maybe_unordered (code);
11515 target2 = target1;
11516 target1 = tmp;
11517 }
11518
11519 condition = ix86_expand_fp_compare (code, op1, op2,
11520 tmp, &second, &bypass);
11521
11522 /* Remove pushed operand from stack. */
11523 if (pushed)
11524 ix86_free_from_memory (GET_MODE (pushed));
11525
11526 if (split_branch_probability >= 0)
11527 {
11528 /* Distribute the probabilities across the jumps.
11529 Assume the BYPASS and SECOND to be always test
11530 for UNORDERED. */
11531 probability = split_branch_probability;
11532
11533 /* Value of 1 is low enough to make no need for probability
11534 to be updated. Later we may run some experiments and see
11535 if unordered values are more frequent in practice. */
11536 if (bypass)
11537 bypass_probability = 1;
11538 if (second)
11539 second_probability = 1;
11540 }
11541 if (bypass != NULL_RTX)
11542 {
11543 label = gen_label_rtx ();
11544 i = emit_jump_insn (gen_rtx_SET
11545 (VOIDmode, pc_rtx,
11546 gen_rtx_IF_THEN_ELSE (VOIDmode,
11547 bypass,
11548 gen_rtx_LABEL_REF (VOIDmode,
11549 label),
11550 pc_rtx)));
11551 if (bypass_probability >= 0)
11552 REG_NOTES (i)
11553 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11554 GEN_INT (bypass_probability),
11555 REG_NOTES (i));
11556 }
11557 i = emit_jump_insn (gen_rtx_SET
11558 (VOIDmode, pc_rtx,
11559 gen_rtx_IF_THEN_ELSE (VOIDmode,
11560 condition, target1, target2)));
11561 if (probability >= 0)
11562 REG_NOTES (i)
11563 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11564 GEN_INT (probability),
11565 REG_NOTES (i));
11566 if (second != NULL_RTX)
11567 {
11568 i = emit_jump_insn (gen_rtx_SET
11569 (VOIDmode, pc_rtx,
11570 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11571 target2)));
11572 if (second_probability >= 0)
11573 REG_NOTES (i)
11574 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11575 GEN_INT (second_probability),
11576 REG_NOTES (i));
11577 }
11578 if (label != NULL_RTX)
11579 emit_label (label);
11580 }
11581
11582 int
11583 ix86_expand_setcc (enum rtx_code code, rtx dest)
11584 {
11585 rtx ret, tmp, tmpreg, equiv;
11586 rtx second_test, bypass_test;
11587
11588 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11589 return 0; /* FAIL */
11590
11591 gcc_assert (GET_MODE (dest) == QImode);
11592
11593 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11594 PUT_MODE (ret, QImode);
11595
11596 tmp = dest;
11597 tmpreg = dest;
11598
11599 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11600 if (bypass_test || second_test)
11601 {
11602 rtx test = second_test;
11603 int bypass = 0;
11604 rtx tmp2 = gen_reg_rtx (QImode);
11605 if (bypass_test)
11606 {
11607 gcc_assert (!second_test);
11608 test = bypass_test;
11609 bypass = 1;
11610 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11611 }
11612 PUT_MODE (test, QImode);
11613 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11614
11615 if (bypass)
11616 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11617 else
11618 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11619 }
11620
11621 /* Attach a REG_EQUAL note describing the comparison result. */
11622 if (ix86_compare_op0 && ix86_compare_op1)
11623 {
11624 equiv = simplify_gen_relational (code, QImode,
11625 GET_MODE (ix86_compare_op0),
11626 ix86_compare_op0, ix86_compare_op1);
11627 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11628 }
11629
11630 return 1; /* DONE */
11631 }
11632
11633 /* Expand comparison setting or clearing carry flag. Return true when
11634 successful and set pop for the operation. */
11635 static bool
11636 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11637 {
11638 enum machine_mode mode =
11639 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11640
11641 /* Do not handle DImode compares that go through special path.
11642 Also we can't deal with FP compares yet. This is possible to add. */
11643 if (mode == (TARGET_64BIT ? TImode : DImode))
11644 return false;
11645
11646 if (SCALAR_FLOAT_MODE_P (mode))
11647 {
11648 rtx second_test = NULL, bypass_test = NULL;
11649 rtx compare_op, compare_seq;
11650
11651 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11652
11653 /* Shortcut: following common codes never translate
11654 into carry flag compares. */
11655 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11656 || code == ORDERED || code == UNORDERED)
11657 return false;
11658
11659 /* These comparisons require zero flag; swap operands so they won't. */
11660 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11661 && !TARGET_IEEE_FP)
11662 {
11663 rtx tmp = op0;
11664 op0 = op1;
11665 op1 = tmp;
11666 code = swap_condition (code);
11667 }
11668
11669 /* Try to expand the comparison and verify that we end up with carry flag
11670 based comparison. This is fails to be true only when we decide to expand
11671 comparison using arithmetic that is not too common scenario. */
11672 start_sequence ();
11673 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11674 &second_test, &bypass_test);
11675 compare_seq = get_insns ();
11676 end_sequence ();
11677
11678 if (second_test || bypass_test)
11679 return false;
11680 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11681 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11682 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11683 else
11684 code = GET_CODE (compare_op);
11685 if (code != LTU && code != GEU)
11686 return false;
11687 emit_insn (compare_seq);
11688 *pop = compare_op;
11689 return true;
11690 }
11691 if (!INTEGRAL_MODE_P (mode))
11692 return false;
11693 switch (code)
11694 {
11695 case LTU:
11696 case GEU:
11697 break;
11698
11699 /* Convert a==0 into (unsigned)a<1. */
11700 case EQ:
11701 case NE:
11702 if (op1 != const0_rtx)
11703 return false;
11704 op1 = const1_rtx;
11705 code = (code == EQ ? LTU : GEU);
11706 break;
11707
11708 /* Convert a>b into b<a or a>=b-1. */
11709 case GTU:
11710 case LEU:
11711 if (CONST_INT_P (op1))
11712 {
11713 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11714 /* Bail out on overflow. We still can swap operands but that
11715 would force loading of the constant into register. */
11716 if (op1 == const0_rtx
11717 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11718 return false;
11719 code = (code == GTU ? GEU : LTU);
11720 }
11721 else
11722 {
11723 rtx tmp = op1;
11724 op1 = op0;
11725 op0 = tmp;
11726 code = (code == GTU ? LTU : GEU);
11727 }
11728 break;
11729
11730 /* Convert a>=0 into (unsigned)a<0x80000000. */
11731 case LT:
11732 case GE:
11733 if (mode == DImode || op1 != const0_rtx)
11734 return false;
11735 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11736 code = (code == LT ? GEU : LTU);
11737 break;
11738 case LE:
11739 case GT:
11740 if (mode == DImode || op1 != constm1_rtx)
11741 return false;
11742 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11743 code = (code == LE ? GEU : LTU);
11744 break;
11745
11746 default:
11747 return false;
11748 }
11749 /* Swapping operands may cause constant to appear as first operand. */
11750 if (!nonimmediate_operand (op0, VOIDmode))
11751 {
11752 if (no_new_pseudos)
11753 return false;
11754 op0 = force_reg (mode, op0);
11755 }
11756 ix86_compare_op0 = op0;
11757 ix86_compare_op1 = op1;
11758 *pop = ix86_expand_compare (code, NULL, NULL);
11759 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11760 return true;
11761 }
11762
11763 int
11764 ix86_expand_int_movcc (rtx operands[])
11765 {
11766 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11767 rtx compare_seq, compare_op;
11768 rtx second_test, bypass_test;
11769 enum machine_mode mode = GET_MODE (operands[0]);
11770 bool sign_bit_compare_p = false;;
11771
11772 start_sequence ();
11773 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11774 compare_seq = get_insns ();
11775 end_sequence ();
11776
11777 compare_code = GET_CODE (compare_op);
11778
11779 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11780 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11781 sign_bit_compare_p = true;
11782
11783 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11784 HImode insns, we'd be swallowed in word prefix ops. */
11785
11786 if ((mode != HImode || TARGET_FAST_PREFIX)
11787 && (mode != (TARGET_64BIT ? TImode : DImode))
11788 && CONST_INT_P (operands[2])
11789 && CONST_INT_P (operands[3]))
11790 {
11791 rtx out = operands[0];
11792 HOST_WIDE_INT ct = INTVAL (operands[2]);
11793 HOST_WIDE_INT cf = INTVAL (operands[3]);
11794 HOST_WIDE_INT diff;
11795
11796 diff = ct - cf;
11797 /* Sign bit compares are better done using shifts than we do by using
11798 sbb. */
11799 if (sign_bit_compare_p
11800 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11801 ix86_compare_op1, &compare_op))
11802 {
11803 /* Detect overlap between destination and compare sources. */
11804 rtx tmp = out;
11805
11806 if (!sign_bit_compare_p)
11807 {
11808 bool fpcmp = false;
11809
11810 compare_code = GET_CODE (compare_op);
11811
11812 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11813 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11814 {
11815 fpcmp = true;
11816 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11817 }
11818
11819 /* To simplify rest of code, restrict to the GEU case. */
11820 if (compare_code == LTU)
11821 {
11822 HOST_WIDE_INT tmp = ct;
11823 ct = cf;
11824 cf = tmp;
11825 compare_code = reverse_condition (compare_code);
11826 code = reverse_condition (code);
11827 }
11828 else
11829 {
11830 if (fpcmp)
11831 PUT_CODE (compare_op,
11832 reverse_condition_maybe_unordered
11833 (GET_CODE (compare_op)));
11834 else
11835 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11836 }
11837 diff = ct - cf;
11838
11839 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11840 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11841 tmp = gen_reg_rtx (mode);
11842
11843 if (mode == DImode)
11844 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11845 else
11846 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11847 }
11848 else
11849 {
11850 if (code == GT || code == GE)
11851 code = reverse_condition (code);
11852 else
11853 {
11854 HOST_WIDE_INT tmp = ct;
11855 ct = cf;
11856 cf = tmp;
11857 diff = ct - cf;
11858 }
11859 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11860 ix86_compare_op1, VOIDmode, 0, -1);
11861 }
11862
11863 if (diff == 1)
11864 {
11865 /*
11866 * cmpl op0,op1
11867 * sbbl dest,dest
11868 * [addl dest, ct]
11869 *
11870 * Size 5 - 8.
11871 */
11872 if (ct)
11873 tmp = expand_simple_binop (mode, PLUS,
11874 tmp, GEN_INT (ct),
11875 copy_rtx (tmp), 1, OPTAB_DIRECT);
11876 }
11877 else if (cf == -1)
11878 {
11879 /*
11880 * cmpl op0,op1
11881 * sbbl dest,dest
11882 * orl $ct, dest
11883 *
11884 * Size 8.
11885 */
11886 tmp = expand_simple_binop (mode, IOR,
11887 tmp, GEN_INT (ct),
11888 copy_rtx (tmp), 1, OPTAB_DIRECT);
11889 }
11890 else if (diff == -1 && ct)
11891 {
11892 /*
11893 * cmpl op0,op1
11894 * sbbl dest,dest
11895 * notl dest
11896 * [addl dest, cf]
11897 *
11898 * Size 8 - 11.
11899 */
11900 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11901 if (cf)
11902 tmp = expand_simple_binop (mode, PLUS,
11903 copy_rtx (tmp), GEN_INT (cf),
11904 copy_rtx (tmp), 1, OPTAB_DIRECT);
11905 }
11906 else
11907 {
11908 /*
11909 * cmpl op0,op1
11910 * sbbl dest,dest
11911 * [notl dest]
11912 * andl cf - ct, dest
11913 * [addl dest, ct]
11914 *
11915 * Size 8 - 11.
11916 */
11917
11918 if (cf == 0)
11919 {
11920 cf = ct;
11921 ct = 0;
11922 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11923 }
11924
11925 tmp = expand_simple_binop (mode, AND,
11926 copy_rtx (tmp),
11927 gen_int_mode (cf - ct, mode),
11928 copy_rtx (tmp), 1, OPTAB_DIRECT);
11929 if (ct)
11930 tmp = expand_simple_binop (mode, PLUS,
11931 copy_rtx (tmp), GEN_INT (ct),
11932 copy_rtx (tmp), 1, OPTAB_DIRECT);
11933 }
11934
11935 if (!rtx_equal_p (tmp, out))
11936 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11937
11938 return 1; /* DONE */
11939 }
11940
11941 if (diff < 0)
11942 {
11943 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
11944
11945 HOST_WIDE_INT tmp;
11946 tmp = ct, ct = cf, cf = tmp;
11947 diff = -diff;
11948
11949 if (SCALAR_FLOAT_MODE_P (cmp_mode))
11950 {
11951 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
11952
11953 /* We may be reversing unordered compare to normal compare, that
11954 is not valid in general (we may convert non-trapping condition
11955 to trapping one), however on i386 we currently emit all
11956 comparisons unordered. */
11957 compare_code = reverse_condition_maybe_unordered (compare_code);
11958 code = reverse_condition_maybe_unordered (code);
11959 }
11960 else
11961 {
11962 compare_code = reverse_condition (compare_code);
11963 code = reverse_condition (code);
11964 }
11965 }
11966
11967 compare_code = UNKNOWN;
11968 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11969 && CONST_INT_P (ix86_compare_op1))
11970 {
11971 if (ix86_compare_op1 == const0_rtx
11972 && (code == LT || code == GE))
11973 compare_code = code;
11974 else if (ix86_compare_op1 == constm1_rtx)
11975 {
11976 if (code == LE)
11977 compare_code = LT;
11978 else if (code == GT)
11979 compare_code = GE;
11980 }
11981 }
11982
11983 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11984 if (compare_code != UNKNOWN
11985 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11986 && (cf == -1 || ct == -1))
11987 {
11988 /* If lea code below could be used, only optimize
11989 if it results in a 2 insn sequence. */
11990
11991 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11992 || diff == 3 || diff == 5 || diff == 9)
11993 || (compare_code == LT && ct == -1)
11994 || (compare_code == GE && cf == -1))
11995 {
11996 /*
11997 * notl op1 (if necessary)
11998 * sarl $31, op1
11999 * orl cf, op1
12000 */
12001 if (ct != -1)
12002 {
12003 cf = ct;
12004 ct = -1;
12005 code = reverse_condition (code);
12006 }
12007
12008 out = emit_store_flag (out, code, ix86_compare_op0,
12009 ix86_compare_op1, VOIDmode, 0, -1);
12010
12011 out = expand_simple_binop (mode, IOR,
12012 out, GEN_INT (cf),
12013 out, 1, OPTAB_DIRECT);
12014 if (out != operands[0])
12015 emit_move_insn (operands[0], out);
12016
12017 return 1; /* DONE */
12018 }
12019 }
12020
12021
12022 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12023 || diff == 3 || diff == 5 || diff == 9)
12024 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12025 && (mode != DImode
12026 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12027 {
12028 /*
12029 * xorl dest,dest
12030 * cmpl op1,op2
12031 * setcc dest
12032 * lea cf(dest*(ct-cf)),dest
12033 *
12034 * Size 14.
12035 *
12036 * This also catches the degenerate setcc-only case.
12037 */
12038
12039 rtx tmp;
12040 int nops;
12041
12042 out = emit_store_flag (out, code, ix86_compare_op0,
12043 ix86_compare_op1, VOIDmode, 0, 1);
12044
12045 nops = 0;
12046 /* On x86_64 the lea instruction operates on Pmode, so we need
12047 to get arithmetics done in proper mode to match. */
12048 if (diff == 1)
12049 tmp = copy_rtx (out);
12050 else
12051 {
12052 rtx out1;
12053 out1 = copy_rtx (out);
12054 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12055 nops++;
12056 if (diff & 1)
12057 {
12058 tmp = gen_rtx_PLUS (mode, tmp, out1);
12059 nops++;
12060 }
12061 }
12062 if (cf != 0)
12063 {
12064 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12065 nops++;
12066 }
12067 if (!rtx_equal_p (tmp, out))
12068 {
12069 if (nops == 1)
12070 out = force_operand (tmp, copy_rtx (out));
12071 else
12072 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12073 }
12074 if (!rtx_equal_p (out, operands[0]))
12075 emit_move_insn (operands[0], copy_rtx (out));
12076
12077 return 1; /* DONE */
12078 }
12079
12080 /*
12081 * General case: Jumpful:
12082 * xorl dest,dest cmpl op1, op2
12083 * cmpl op1, op2 movl ct, dest
12084 * setcc dest jcc 1f
12085 * decl dest movl cf, dest
12086 * andl (cf-ct),dest 1:
12087 * addl ct,dest
12088 *
12089 * Size 20. Size 14.
12090 *
12091 * This is reasonably steep, but branch mispredict costs are
12092 * high on modern cpus, so consider failing only if optimizing
12093 * for space.
12094 */
12095
12096 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12097 && BRANCH_COST >= 2)
12098 {
12099 if (cf == 0)
12100 {
12101 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12102
12103 cf = ct;
12104 ct = 0;
12105
12106 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12107 {
12108 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12109
12110 /* We may be reversing unordered compare to normal compare,
12111 that is not valid in general (we may convert non-trapping
12112 condition to trapping one), however on i386 we currently
12113 emit all comparisons unordered. */
12114 code = reverse_condition_maybe_unordered (code);
12115 }
12116 else
12117 {
12118 code = reverse_condition (code);
12119 if (compare_code != UNKNOWN)
12120 compare_code = reverse_condition (compare_code);
12121 }
12122 }
12123
12124 if (compare_code != UNKNOWN)
12125 {
12126 /* notl op1 (if needed)
12127 sarl $31, op1
12128 andl (cf-ct), op1
12129 addl ct, op1
12130
12131 For x < 0 (resp. x <= -1) there will be no notl,
12132 so if possible swap the constants to get rid of the
12133 complement.
12134 True/false will be -1/0 while code below (store flag
12135 followed by decrement) is 0/-1, so the constants need
12136 to be exchanged once more. */
12137
12138 if (compare_code == GE || !cf)
12139 {
12140 code = reverse_condition (code);
12141 compare_code = LT;
12142 }
12143 else
12144 {
12145 HOST_WIDE_INT tmp = cf;
12146 cf = ct;
12147 ct = tmp;
12148 }
12149
12150 out = emit_store_flag (out, code, ix86_compare_op0,
12151 ix86_compare_op1, VOIDmode, 0, -1);
12152 }
12153 else
12154 {
12155 out = emit_store_flag (out, code, ix86_compare_op0,
12156 ix86_compare_op1, VOIDmode, 0, 1);
12157
12158 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12159 copy_rtx (out), 1, OPTAB_DIRECT);
12160 }
12161
12162 out = expand_simple_binop (mode, AND, copy_rtx (out),
12163 gen_int_mode (cf - ct, mode),
12164 copy_rtx (out), 1, OPTAB_DIRECT);
12165 if (ct)
12166 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12167 copy_rtx (out), 1, OPTAB_DIRECT);
12168 if (!rtx_equal_p (out, operands[0]))
12169 emit_move_insn (operands[0], copy_rtx (out));
12170
12171 return 1; /* DONE */
12172 }
12173 }
12174
12175 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12176 {
12177 /* Try a few things more with specific constants and a variable. */
12178
12179 optab op;
12180 rtx var, orig_out, out, tmp;
12181
12182 if (BRANCH_COST <= 2)
12183 return 0; /* FAIL */
12184
12185 /* If one of the two operands is an interesting constant, load a
12186 constant with the above and mask it in with a logical operation. */
12187
12188 if (CONST_INT_P (operands[2]))
12189 {
12190 var = operands[3];
12191 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12192 operands[3] = constm1_rtx, op = and_optab;
12193 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12194 operands[3] = const0_rtx, op = ior_optab;
12195 else
12196 return 0; /* FAIL */
12197 }
12198 else if (CONST_INT_P (operands[3]))
12199 {
12200 var = operands[2];
12201 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12202 operands[2] = constm1_rtx, op = and_optab;
12203 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12204 operands[2] = const0_rtx, op = ior_optab;
12205 else
12206 return 0; /* FAIL */
12207 }
12208 else
12209 return 0; /* FAIL */
12210
12211 orig_out = operands[0];
12212 tmp = gen_reg_rtx (mode);
12213 operands[0] = tmp;
12214
12215 /* Recurse to get the constant loaded. */
12216 if (ix86_expand_int_movcc (operands) == 0)
12217 return 0; /* FAIL */
12218
12219 /* Mask in the interesting variable. */
12220 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12221 OPTAB_WIDEN);
12222 if (!rtx_equal_p (out, orig_out))
12223 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12224
12225 return 1; /* DONE */
12226 }
12227
12228 /*
12229 * For comparison with above,
12230 *
12231 * movl cf,dest
12232 * movl ct,tmp
12233 * cmpl op1,op2
12234 * cmovcc tmp,dest
12235 *
12236 * Size 15.
12237 */
12238
12239 if (! nonimmediate_operand (operands[2], mode))
12240 operands[2] = force_reg (mode, operands[2]);
12241 if (! nonimmediate_operand (operands[3], mode))
12242 operands[3] = force_reg (mode, operands[3]);
12243
12244 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12245 {
12246 rtx tmp = gen_reg_rtx (mode);
12247 emit_move_insn (tmp, operands[3]);
12248 operands[3] = tmp;
12249 }
12250 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12251 {
12252 rtx tmp = gen_reg_rtx (mode);
12253 emit_move_insn (tmp, operands[2]);
12254 operands[2] = tmp;
12255 }
12256
12257 if (! register_operand (operands[2], VOIDmode)
12258 && (mode == QImode
12259 || ! register_operand (operands[3], VOIDmode)))
12260 operands[2] = force_reg (mode, operands[2]);
12261
12262 if (mode == QImode
12263 && ! register_operand (operands[3], VOIDmode))
12264 operands[3] = force_reg (mode, operands[3]);
12265
12266 emit_insn (compare_seq);
12267 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12268 gen_rtx_IF_THEN_ELSE (mode,
12269 compare_op, operands[2],
12270 operands[3])));
12271 if (bypass_test)
12272 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12273 gen_rtx_IF_THEN_ELSE (mode,
12274 bypass_test,
12275 copy_rtx (operands[3]),
12276 copy_rtx (operands[0]))));
12277 if (second_test)
12278 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12279 gen_rtx_IF_THEN_ELSE (mode,
12280 second_test,
12281 copy_rtx (operands[2]),
12282 copy_rtx (operands[0]))));
12283
12284 return 1; /* DONE */
12285 }
12286
12287 /* Swap, force into registers, or otherwise massage the two operands
12288 to an sse comparison with a mask result. Thus we differ a bit from
12289 ix86_prepare_fp_compare_args which expects to produce a flags result.
12290
12291 The DEST operand exists to help determine whether to commute commutative
12292 operators. The POP0/POP1 operands are updated in place. The new
12293 comparison code is returned, or UNKNOWN if not implementable. */
12294
12295 static enum rtx_code
12296 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12297 rtx *pop0, rtx *pop1)
12298 {
12299 rtx tmp;
12300
12301 switch (code)
12302 {
12303 case LTGT:
12304 case UNEQ:
12305 /* We have no LTGT as an operator. We could implement it with
12306 NE & ORDERED, but this requires an extra temporary. It's
12307 not clear that it's worth it. */
12308 return UNKNOWN;
12309
12310 case LT:
12311 case LE:
12312 case UNGT:
12313 case UNGE:
12314 /* These are supported directly. */
12315 break;
12316
12317 case EQ:
12318 case NE:
12319 case UNORDERED:
12320 case ORDERED:
12321 /* For commutative operators, try to canonicalize the destination
12322 operand to be first in the comparison - this helps reload to
12323 avoid extra moves. */
12324 if (!dest || !rtx_equal_p (dest, *pop1))
12325 break;
12326 /* FALLTHRU */
12327
12328 case GE:
12329 case GT:
12330 case UNLE:
12331 case UNLT:
12332 /* These are not supported directly. Swap the comparison operands
12333 to transform into something that is supported. */
12334 tmp = *pop0;
12335 *pop0 = *pop1;
12336 *pop1 = tmp;
12337 code = swap_condition (code);
12338 break;
12339
12340 default:
12341 gcc_unreachable ();
12342 }
12343
12344 return code;
12345 }
12346
12347 /* Detect conditional moves that exactly match min/max operational
12348 semantics. Note that this is IEEE safe, as long as we don't
12349 interchange the operands.
12350
12351 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12352 and TRUE if the operation is successful and instructions are emitted. */
12353
12354 static bool
12355 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12356 rtx cmp_op1, rtx if_true, rtx if_false)
12357 {
12358 enum machine_mode mode;
12359 bool is_min;
12360 rtx tmp;
12361
12362 if (code == LT)
12363 ;
12364 else if (code == UNGE)
12365 {
12366 tmp = if_true;
12367 if_true = if_false;
12368 if_false = tmp;
12369 }
12370 else
12371 return false;
12372
12373 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12374 is_min = true;
12375 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12376 is_min = false;
12377 else
12378 return false;
12379
12380 mode = GET_MODE (dest);
12381
12382 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12383 but MODE may be a vector mode and thus not appropriate. */
12384 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12385 {
12386 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12387 rtvec v;
12388
12389 if_true = force_reg (mode, if_true);
12390 v = gen_rtvec (2, if_true, if_false);
12391 tmp = gen_rtx_UNSPEC (mode, v, u);
12392 }
12393 else
12394 {
12395 code = is_min ? SMIN : SMAX;
12396 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12397 }
12398
12399 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12400 return true;
12401 }
12402
12403 /* Expand an sse vector comparison. Return the register with the result. */
12404
12405 static rtx
12406 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12407 rtx op_true, rtx op_false)
12408 {
12409 enum machine_mode mode = GET_MODE (dest);
12410 rtx x;
12411
12412 cmp_op0 = force_reg (mode, cmp_op0);
12413 if (!nonimmediate_operand (cmp_op1, mode))
12414 cmp_op1 = force_reg (mode, cmp_op1);
12415
12416 if (optimize
12417 || reg_overlap_mentioned_p (dest, op_true)
12418 || reg_overlap_mentioned_p (dest, op_false))
12419 dest = gen_reg_rtx (mode);
12420
12421 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12422 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12423
12424 return dest;
12425 }
12426
12427 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12428 operations. This is used for both scalar and vector conditional moves. */
12429
12430 static void
12431 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12432 {
12433 enum machine_mode mode = GET_MODE (dest);
12434 rtx t2, t3, x;
12435
12436 if (op_false == CONST0_RTX (mode))
12437 {
12438 op_true = force_reg (mode, op_true);
12439 x = gen_rtx_AND (mode, cmp, op_true);
12440 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12441 }
12442 else if (op_true == CONST0_RTX (mode))
12443 {
12444 op_false = force_reg (mode, op_false);
12445 x = gen_rtx_NOT (mode, cmp);
12446 x = gen_rtx_AND (mode, x, op_false);
12447 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12448 }
12449 else
12450 {
12451 op_true = force_reg (mode, op_true);
12452 op_false = force_reg (mode, op_false);
12453
12454 t2 = gen_reg_rtx (mode);
12455 if (optimize)
12456 t3 = gen_reg_rtx (mode);
12457 else
12458 t3 = dest;
12459
12460 x = gen_rtx_AND (mode, op_true, cmp);
12461 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12462
12463 x = gen_rtx_NOT (mode, cmp);
12464 x = gen_rtx_AND (mode, x, op_false);
12465 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12466
12467 x = gen_rtx_IOR (mode, t3, t2);
12468 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12469 }
12470 }
12471
12472 /* Expand a floating-point conditional move. Return true if successful. */
12473
12474 int
12475 ix86_expand_fp_movcc (rtx operands[])
12476 {
12477 enum machine_mode mode = GET_MODE (operands[0]);
12478 enum rtx_code code = GET_CODE (operands[1]);
12479 rtx tmp, compare_op, second_test, bypass_test;
12480
12481 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12482 {
12483 enum machine_mode cmode;
12484
12485 /* Since we've no cmove for sse registers, don't force bad register
12486 allocation just to gain access to it. Deny movcc when the
12487 comparison mode doesn't match the move mode. */
12488 cmode = GET_MODE (ix86_compare_op0);
12489 if (cmode == VOIDmode)
12490 cmode = GET_MODE (ix86_compare_op1);
12491 if (cmode != mode)
12492 return 0;
12493
12494 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12495 &ix86_compare_op0,
12496 &ix86_compare_op1);
12497 if (code == UNKNOWN)
12498 return 0;
12499
12500 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12501 ix86_compare_op1, operands[2],
12502 operands[3]))
12503 return 1;
12504
12505 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12506 ix86_compare_op1, operands[2], operands[3]);
12507 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12508 return 1;
12509 }
12510
12511 /* The floating point conditional move instructions don't directly
12512 support conditions resulting from a signed integer comparison. */
12513
12514 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12515
12516 /* The floating point conditional move instructions don't directly
12517 support signed integer comparisons. */
12518
12519 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12520 {
12521 gcc_assert (!second_test && !bypass_test);
12522 tmp = gen_reg_rtx (QImode);
12523 ix86_expand_setcc (code, tmp);
12524 code = NE;
12525 ix86_compare_op0 = tmp;
12526 ix86_compare_op1 = const0_rtx;
12527 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12528 }
12529 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12530 {
12531 tmp = gen_reg_rtx (mode);
12532 emit_move_insn (tmp, operands[3]);
12533 operands[3] = tmp;
12534 }
12535 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12536 {
12537 tmp = gen_reg_rtx (mode);
12538 emit_move_insn (tmp, operands[2]);
12539 operands[2] = tmp;
12540 }
12541
12542 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12543 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12544 operands[2], operands[3])));
12545 if (bypass_test)
12546 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12547 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12548 operands[3], operands[0])));
12549 if (second_test)
12550 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12551 gen_rtx_IF_THEN_ELSE (mode, second_test,
12552 operands[2], operands[0])));
12553
12554 return 1;
12555 }
12556
12557 /* Expand a floating-point vector conditional move; a vcond operation
12558 rather than a movcc operation. */
12559
12560 bool
12561 ix86_expand_fp_vcond (rtx operands[])
12562 {
12563 enum rtx_code code = GET_CODE (operands[3]);
12564 rtx cmp;
12565
12566 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12567 &operands[4], &operands[5]);
12568 if (code == UNKNOWN)
12569 return false;
12570
12571 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12572 operands[5], operands[1], operands[2]))
12573 return true;
12574
12575 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12576 operands[1], operands[2]);
12577 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12578 return true;
12579 }
12580
12581 /* Expand a signed integral vector conditional move. */
12582
12583 bool
12584 ix86_expand_int_vcond (rtx operands[])
12585 {
12586 enum machine_mode mode = GET_MODE (operands[0]);
12587 enum rtx_code code = GET_CODE (operands[3]);
12588 bool negate = false;
12589 rtx x, cop0, cop1;
12590
12591 cop0 = operands[4];
12592 cop1 = operands[5];
12593
12594 /* Canonicalize the comparison to EQ, GT, GTU. */
12595 switch (code)
12596 {
12597 case EQ:
12598 case GT:
12599 case GTU:
12600 break;
12601
12602 case NE:
12603 case LE:
12604 case LEU:
12605 code = reverse_condition (code);
12606 negate = true;
12607 break;
12608
12609 case GE:
12610 case GEU:
12611 code = reverse_condition (code);
12612 negate = true;
12613 /* FALLTHRU */
12614
12615 case LT:
12616 case LTU:
12617 code = swap_condition (code);
12618 x = cop0, cop0 = cop1, cop1 = x;
12619 break;
12620
12621 default:
12622 gcc_unreachable ();
12623 }
12624
12625 /* Unsigned parallel compare is not supported by the hardware. Play some
12626 tricks to turn this into a signed comparison against 0. */
12627 if (code == GTU)
12628 {
12629 cop0 = force_reg (mode, cop0);
12630
12631 switch (mode)
12632 {
12633 case V4SImode:
12634 {
12635 rtx t1, t2, mask;
12636
12637 /* Perform a parallel modulo subtraction. */
12638 t1 = gen_reg_rtx (mode);
12639 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12640
12641 /* Extract the original sign bit of op0. */
12642 mask = GEN_INT (-0x80000000);
12643 mask = gen_rtx_CONST_VECTOR (mode,
12644 gen_rtvec (4, mask, mask, mask, mask));
12645 mask = force_reg (mode, mask);
12646 t2 = gen_reg_rtx (mode);
12647 emit_insn (gen_andv4si3 (t2, cop0, mask));
12648
12649 /* XOR it back into the result of the subtraction. This results
12650 in the sign bit set iff we saw unsigned underflow. */
12651 x = gen_reg_rtx (mode);
12652 emit_insn (gen_xorv4si3 (x, t1, t2));
12653
12654 code = GT;
12655 }
12656 break;
12657
12658 case V16QImode:
12659 case V8HImode:
12660 /* Perform a parallel unsigned saturating subtraction. */
12661 x = gen_reg_rtx (mode);
12662 emit_insn (gen_rtx_SET (VOIDmode, x,
12663 gen_rtx_US_MINUS (mode, cop0, cop1)));
12664
12665 code = EQ;
12666 negate = !negate;
12667 break;
12668
12669 default:
12670 gcc_unreachable ();
12671 }
12672
12673 cop0 = x;
12674 cop1 = CONST0_RTX (mode);
12675 }
12676
12677 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12678 operands[1+negate], operands[2-negate]);
12679
12680 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12681 operands[2-negate]);
12682 return true;
12683 }
12684
12685 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12686 true if we should do zero extension, else sign extension. HIGH_P is
12687 true if we want the N/2 high elements, else the low elements. */
12688
12689 void
12690 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12691 {
12692 enum machine_mode imode = GET_MODE (operands[1]);
12693 rtx (*unpack)(rtx, rtx, rtx);
12694 rtx se, dest;
12695
12696 switch (imode)
12697 {
12698 case V16QImode:
12699 if (high_p)
12700 unpack = gen_vec_interleave_highv16qi;
12701 else
12702 unpack = gen_vec_interleave_lowv16qi;
12703 break;
12704 case V8HImode:
12705 if (high_p)
12706 unpack = gen_vec_interleave_highv8hi;
12707 else
12708 unpack = gen_vec_interleave_lowv8hi;
12709 break;
12710 case V4SImode:
12711 if (high_p)
12712 unpack = gen_vec_interleave_highv4si;
12713 else
12714 unpack = gen_vec_interleave_lowv4si;
12715 break;
12716 default:
12717 gcc_unreachable ();
12718 }
12719
12720 dest = gen_lowpart (imode, operands[0]);
12721
12722 if (unsigned_p)
12723 se = force_reg (imode, CONST0_RTX (imode));
12724 else
12725 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12726 operands[1], pc_rtx, pc_rtx);
12727
12728 emit_insn (unpack (dest, operands[1], se));
12729 }
12730
12731 /* Expand conditional increment or decrement using adb/sbb instructions.
12732 The default case using setcc followed by the conditional move can be
12733 done by generic code. */
12734 int
12735 ix86_expand_int_addcc (rtx operands[])
12736 {
12737 enum rtx_code code = GET_CODE (operands[1]);
12738 rtx compare_op;
12739 rtx val = const0_rtx;
12740 bool fpcmp = false;
12741 enum machine_mode mode = GET_MODE (operands[0]);
12742
12743 if (operands[3] != const1_rtx
12744 && operands[3] != constm1_rtx)
12745 return 0;
12746 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12747 ix86_compare_op1, &compare_op))
12748 return 0;
12749 code = GET_CODE (compare_op);
12750
12751 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12752 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12753 {
12754 fpcmp = true;
12755 code = ix86_fp_compare_code_to_integer (code);
12756 }
12757
12758 if (code != LTU)
12759 {
12760 val = constm1_rtx;
12761 if (fpcmp)
12762 PUT_CODE (compare_op,
12763 reverse_condition_maybe_unordered
12764 (GET_CODE (compare_op)));
12765 else
12766 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12767 }
12768 PUT_MODE (compare_op, mode);
12769
12770 /* Construct either adc or sbb insn. */
12771 if ((code == LTU) == (operands[3] == constm1_rtx))
12772 {
12773 switch (GET_MODE (operands[0]))
12774 {
12775 case QImode:
12776 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12777 break;
12778 case HImode:
12779 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12780 break;
12781 case SImode:
12782 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12783 break;
12784 case DImode:
12785 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12786 break;
12787 default:
12788 gcc_unreachable ();
12789 }
12790 }
12791 else
12792 {
12793 switch (GET_MODE (operands[0]))
12794 {
12795 case QImode:
12796 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12797 break;
12798 case HImode:
12799 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12800 break;
12801 case SImode:
12802 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12803 break;
12804 case DImode:
12805 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12806 break;
12807 default:
12808 gcc_unreachable ();
12809 }
12810 }
12811 return 1; /* DONE */
12812 }
12813
12814
12815 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12816 works for floating pointer parameters and nonoffsetable memories.
12817 For pushes, it returns just stack offsets; the values will be saved
12818 in the right order. Maximally three parts are generated. */
12819
12820 static int
12821 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12822 {
12823 int size;
12824
12825 if (!TARGET_64BIT)
12826 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12827 else
12828 size = (GET_MODE_SIZE (mode) + 4) / 8;
12829
12830 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12831 gcc_assert (size >= 2 && size <= 3);
12832
12833 /* Optimize constant pool reference to immediates. This is used by fp
12834 moves, that force all constants to memory to allow combining. */
12835 if (MEM_P (operand) && MEM_READONLY_P (operand))
12836 {
12837 rtx tmp = maybe_get_pool_constant (operand);
12838 if (tmp)
12839 operand = tmp;
12840 }
12841
12842 if (MEM_P (operand) && !offsettable_memref_p (operand))
12843 {
12844 /* The only non-offsetable memories we handle are pushes. */
12845 int ok = push_operand (operand, VOIDmode);
12846
12847 gcc_assert (ok);
12848
12849 operand = copy_rtx (operand);
12850 PUT_MODE (operand, Pmode);
12851 parts[0] = parts[1] = parts[2] = operand;
12852 return size;
12853 }
12854
12855 if (GET_CODE (operand) == CONST_VECTOR)
12856 {
12857 enum machine_mode imode = int_mode_for_mode (mode);
12858 /* Caution: if we looked through a constant pool memory above,
12859 the operand may actually have a different mode now. That's
12860 ok, since we want to pun this all the way back to an integer. */
12861 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12862 gcc_assert (operand != NULL);
12863 mode = imode;
12864 }
12865
12866 if (!TARGET_64BIT)
12867 {
12868 if (mode == DImode)
12869 split_di (&operand, 1, &parts[0], &parts[1]);
12870 else
12871 {
12872 if (REG_P (operand))
12873 {
12874 gcc_assert (reload_completed);
12875 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12876 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12877 if (size == 3)
12878 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12879 }
12880 else if (offsettable_memref_p (operand))
12881 {
12882 operand = adjust_address (operand, SImode, 0);
12883 parts[0] = operand;
12884 parts[1] = adjust_address (operand, SImode, 4);
12885 if (size == 3)
12886 parts[2] = adjust_address (operand, SImode, 8);
12887 }
12888 else if (GET_CODE (operand) == CONST_DOUBLE)
12889 {
12890 REAL_VALUE_TYPE r;
12891 long l[4];
12892
12893 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12894 switch (mode)
12895 {
12896 case XFmode:
12897 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12898 parts[2] = gen_int_mode (l[2], SImode);
12899 break;
12900 case DFmode:
12901 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12902 break;
12903 default:
12904 gcc_unreachable ();
12905 }
12906 parts[1] = gen_int_mode (l[1], SImode);
12907 parts[0] = gen_int_mode (l[0], SImode);
12908 }
12909 else
12910 gcc_unreachable ();
12911 }
12912 }
12913 else
12914 {
12915 if (mode == TImode)
12916 split_ti (&operand, 1, &parts[0], &parts[1]);
12917 if (mode == XFmode || mode == TFmode)
12918 {
12919 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12920 if (REG_P (operand))
12921 {
12922 gcc_assert (reload_completed);
12923 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12924 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12925 }
12926 else if (offsettable_memref_p (operand))
12927 {
12928 operand = adjust_address (operand, DImode, 0);
12929 parts[0] = operand;
12930 parts[1] = adjust_address (operand, upper_mode, 8);
12931 }
12932 else if (GET_CODE (operand) == CONST_DOUBLE)
12933 {
12934 REAL_VALUE_TYPE r;
12935 long l[4];
12936
12937 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12938 real_to_target (l, &r, mode);
12939
12940 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12941 if (HOST_BITS_PER_WIDE_INT >= 64)
12942 parts[0]
12943 = gen_int_mode
12944 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12945 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12946 DImode);
12947 else
12948 parts[0] = immed_double_const (l[0], l[1], DImode);
12949
12950 if (upper_mode == SImode)
12951 parts[1] = gen_int_mode (l[2], SImode);
12952 else if (HOST_BITS_PER_WIDE_INT >= 64)
12953 parts[1]
12954 = gen_int_mode
12955 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12956 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12957 DImode);
12958 else
12959 parts[1] = immed_double_const (l[2], l[3], DImode);
12960 }
12961 else
12962 gcc_unreachable ();
12963 }
12964 }
12965
12966 return size;
12967 }
12968
12969 /* Emit insns to perform a move or push of DI, DF, and XF values.
12970 Return false when normal moves are needed; true when all required
12971 insns have been emitted. Operands 2-4 contain the input values
12972 int the correct order; operands 5-7 contain the output values. */
12973
12974 void
12975 ix86_split_long_move (rtx operands[])
12976 {
12977 rtx part[2][3];
12978 int nparts;
12979 int push = 0;
12980 int collisions = 0;
12981 enum machine_mode mode = GET_MODE (operands[0]);
12982
12983 /* The DFmode expanders may ask us to move double.
12984 For 64bit target this is single move. By hiding the fact
12985 here we simplify i386.md splitters. */
12986 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12987 {
12988 /* Optimize constant pool reference to immediates. This is used by
12989 fp moves, that force all constants to memory to allow combining. */
12990
12991 if (MEM_P (operands[1])
12992 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12993 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12994 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12995 if (push_operand (operands[0], VOIDmode))
12996 {
12997 operands[0] = copy_rtx (operands[0]);
12998 PUT_MODE (operands[0], Pmode);
12999 }
13000 else
13001 operands[0] = gen_lowpart (DImode, operands[0]);
13002 operands[1] = gen_lowpart (DImode, operands[1]);
13003 emit_move_insn (operands[0], operands[1]);
13004 return;
13005 }
13006
13007 /* The only non-offsettable memory we handle is push. */
13008 if (push_operand (operands[0], VOIDmode))
13009 push = 1;
13010 else
13011 gcc_assert (!MEM_P (operands[0])
13012 || offsettable_memref_p (operands[0]));
13013
13014 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13015 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13016
13017 /* When emitting push, take care for source operands on the stack. */
13018 if (push && MEM_P (operands[1])
13019 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13020 {
13021 if (nparts == 3)
13022 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13023 XEXP (part[1][2], 0));
13024 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13025 XEXP (part[1][1], 0));
13026 }
13027
13028 /* We need to do copy in the right order in case an address register
13029 of the source overlaps the destination. */
13030 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13031 {
13032 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13033 collisions++;
13034 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13035 collisions++;
13036 if (nparts == 3
13037 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13038 collisions++;
13039
13040 /* Collision in the middle part can be handled by reordering. */
13041 if (collisions == 1 && nparts == 3
13042 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13043 {
13044 rtx tmp;
13045 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13046 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13047 }
13048
13049 /* If there are more collisions, we can't handle it by reordering.
13050 Do an lea to the last part and use only one colliding move. */
13051 else if (collisions > 1)
13052 {
13053 rtx base;
13054
13055 collisions = 1;
13056
13057 base = part[0][nparts - 1];
13058
13059 /* Handle the case when the last part isn't valid for lea.
13060 Happens in 64-bit mode storing the 12-byte XFmode. */
13061 if (GET_MODE (base) != Pmode)
13062 base = gen_rtx_REG (Pmode, REGNO (base));
13063
13064 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13065 part[1][0] = replace_equiv_address (part[1][0], base);
13066 part[1][1] = replace_equiv_address (part[1][1],
13067 plus_constant (base, UNITS_PER_WORD));
13068 if (nparts == 3)
13069 part[1][2] = replace_equiv_address (part[1][2],
13070 plus_constant (base, 8));
13071 }
13072 }
13073
13074 if (push)
13075 {
13076 if (!TARGET_64BIT)
13077 {
13078 if (nparts == 3)
13079 {
13080 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13081 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13082 emit_move_insn (part[0][2], part[1][2]);
13083 }
13084 }
13085 else
13086 {
13087 /* In 64bit mode we don't have 32bit push available. In case this is
13088 register, it is OK - we will just use larger counterpart. We also
13089 retype memory - these comes from attempt to avoid REX prefix on
13090 moving of second half of TFmode value. */
13091 if (GET_MODE (part[1][1]) == SImode)
13092 {
13093 switch (GET_CODE (part[1][1]))
13094 {
13095 case MEM:
13096 part[1][1] = adjust_address (part[1][1], DImode, 0);
13097 break;
13098
13099 case REG:
13100 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13101 break;
13102
13103 default:
13104 gcc_unreachable ();
13105 }
13106
13107 if (GET_MODE (part[1][0]) == SImode)
13108 part[1][0] = part[1][1];
13109 }
13110 }
13111 emit_move_insn (part[0][1], part[1][1]);
13112 emit_move_insn (part[0][0], part[1][0]);
13113 return;
13114 }
13115
13116 /* Choose correct order to not overwrite the source before it is copied. */
13117 if ((REG_P (part[0][0])
13118 && REG_P (part[1][1])
13119 && (REGNO (part[0][0]) == REGNO (part[1][1])
13120 || (nparts == 3
13121 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13122 || (collisions > 0
13123 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13124 {
13125 if (nparts == 3)
13126 {
13127 operands[2] = part[0][2];
13128 operands[3] = part[0][1];
13129 operands[4] = part[0][0];
13130 operands[5] = part[1][2];
13131 operands[6] = part[1][1];
13132 operands[7] = part[1][0];
13133 }
13134 else
13135 {
13136 operands[2] = part[0][1];
13137 operands[3] = part[0][0];
13138 operands[5] = part[1][1];
13139 operands[6] = part[1][0];
13140 }
13141 }
13142 else
13143 {
13144 if (nparts == 3)
13145 {
13146 operands[2] = part[0][0];
13147 operands[3] = part[0][1];
13148 operands[4] = part[0][2];
13149 operands[5] = part[1][0];
13150 operands[6] = part[1][1];
13151 operands[7] = part[1][2];
13152 }
13153 else
13154 {
13155 operands[2] = part[0][0];
13156 operands[3] = part[0][1];
13157 operands[5] = part[1][0];
13158 operands[6] = part[1][1];
13159 }
13160 }
13161
13162 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13163 if (optimize_size)
13164 {
13165 if (CONST_INT_P (operands[5])
13166 && operands[5] != const0_rtx
13167 && REG_P (operands[2]))
13168 {
13169 if (CONST_INT_P (operands[6])
13170 && INTVAL (operands[6]) == INTVAL (operands[5]))
13171 operands[6] = operands[2];
13172
13173 if (nparts == 3
13174 && CONST_INT_P (operands[7])
13175 && INTVAL (operands[7]) == INTVAL (operands[5]))
13176 operands[7] = operands[2];
13177 }
13178
13179 if (nparts == 3
13180 && CONST_INT_P (operands[6])
13181 && operands[6] != const0_rtx
13182 && REG_P (operands[3])
13183 && CONST_INT_P (operands[7])
13184 && INTVAL (operands[7]) == INTVAL (operands[6]))
13185 operands[7] = operands[3];
13186 }
13187
13188 emit_move_insn (operands[2], operands[5]);
13189 emit_move_insn (operands[3], operands[6]);
13190 if (nparts == 3)
13191 emit_move_insn (operands[4], operands[7]);
13192
13193 return;
13194 }
13195
13196 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13197 left shift by a constant, either using a single shift or
13198 a sequence of add instructions. */
13199
13200 static void
13201 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13202 {
13203 if (count == 1)
13204 {
13205 emit_insn ((mode == DImode
13206 ? gen_addsi3
13207 : gen_adddi3) (operand, operand, operand));
13208 }
13209 else if (!optimize_size
13210 && count * ix86_cost->add <= ix86_cost->shift_const)
13211 {
13212 int i;
13213 for (i=0; i<count; i++)
13214 {
13215 emit_insn ((mode == DImode
13216 ? gen_addsi3
13217 : gen_adddi3) (operand, operand, operand));
13218 }
13219 }
13220 else
13221 emit_insn ((mode == DImode
13222 ? gen_ashlsi3
13223 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13224 }
13225
13226 void
13227 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13228 {
13229 rtx low[2], high[2];
13230 int count;
13231 const int single_width = mode == DImode ? 32 : 64;
13232
13233 if (CONST_INT_P (operands[2]))
13234 {
13235 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13236 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13237
13238 if (count >= single_width)
13239 {
13240 emit_move_insn (high[0], low[1]);
13241 emit_move_insn (low[0], const0_rtx);
13242
13243 if (count > single_width)
13244 ix86_expand_ashl_const (high[0], count - single_width, mode);
13245 }
13246 else
13247 {
13248 if (!rtx_equal_p (operands[0], operands[1]))
13249 emit_move_insn (operands[0], operands[1]);
13250 emit_insn ((mode == DImode
13251 ? gen_x86_shld_1
13252 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13253 ix86_expand_ashl_const (low[0], count, mode);
13254 }
13255 return;
13256 }
13257
13258 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13259
13260 if (operands[1] == const1_rtx)
13261 {
13262 /* Assuming we've chosen a QImode capable registers, then 1 << N
13263 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13264 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13265 {
13266 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13267
13268 ix86_expand_clear (low[0]);
13269 ix86_expand_clear (high[0]);
13270 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13271
13272 d = gen_lowpart (QImode, low[0]);
13273 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13274 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13275 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13276
13277 d = gen_lowpart (QImode, high[0]);
13278 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13279 s = gen_rtx_NE (QImode, flags, const0_rtx);
13280 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13281 }
13282
13283 /* Otherwise, we can get the same results by manually performing
13284 a bit extract operation on bit 5/6, and then performing the two
13285 shifts. The two methods of getting 0/1 into low/high are exactly
13286 the same size. Avoiding the shift in the bit extract case helps
13287 pentium4 a bit; no one else seems to care much either way. */
13288 else
13289 {
13290 rtx x;
13291
13292 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13293 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13294 else
13295 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13296 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13297
13298 emit_insn ((mode == DImode
13299 ? gen_lshrsi3
13300 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13301 emit_insn ((mode == DImode
13302 ? gen_andsi3
13303 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13304 emit_move_insn (low[0], high[0]);
13305 emit_insn ((mode == DImode
13306 ? gen_xorsi3
13307 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13308 }
13309
13310 emit_insn ((mode == DImode
13311 ? gen_ashlsi3
13312 : gen_ashldi3) (low[0], low[0], operands[2]));
13313 emit_insn ((mode == DImode
13314 ? gen_ashlsi3
13315 : gen_ashldi3) (high[0], high[0], operands[2]));
13316 return;
13317 }
13318
13319 if (operands[1] == constm1_rtx)
13320 {
13321 /* For -1 << N, we can avoid the shld instruction, because we
13322 know that we're shifting 0...31/63 ones into a -1. */
13323 emit_move_insn (low[0], constm1_rtx);
13324 if (optimize_size)
13325 emit_move_insn (high[0], low[0]);
13326 else
13327 emit_move_insn (high[0], constm1_rtx);
13328 }
13329 else
13330 {
13331 if (!rtx_equal_p (operands[0], operands[1]))
13332 emit_move_insn (operands[0], operands[1]);
13333
13334 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13335 emit_insn ((mode == DImode
13336 ? gen_x86_shld_1
13337 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13338 }
13339
13340 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13341
13342 if (TARGET_CMOVE && scratch)
13343 {
13344 ix86_expand_clear (scratch);
13345 emit_insn ((mode == DImode
13346 ? gen_x86_shift_adj_1
13347 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13348 }
13349 else
13350 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13351 }
13352
13353 void
13354 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13355 {
13356 rtx low[2], high[2];
13357 int count;
13358 const int single_width = mode == DImode ? 32 : 64;
13359
13360 if (CONST_INT_P (operands[2]))
13361 {
13362 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13363 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13364
13365 if (count == single_width * 2 - 1)
13366 {
13367 emit_move_insn (high[0], high[1]);
13368 emit_insn ((mode == DImode
13369 ? gen_ashrsi3
13370 : gen_ashrdi3) (high[0], high[0],
13371 GEN_INT (single_width - 1)));
13372 emit_move_insn (low[0], high[0]);
13373
13374 }
13375 else if (count >= single_width)
13376 {
13377 emit_move_insn (low[0], high[1]);
13378 emit_move_insn (high[0], low[0]);
13379 emit_insn ((mode == DImode
13380 ? gen_ashrsi3
13381 : gen_ashrdi3) (high[0], high[0],
13382 GEN_INT (single_width - 1)));
13383 if (count > single_width)
13384 emit_insn ((mode == DImode
13385 ? gen_ashrsi3
13386 : gen_ashrdi3) (low[0], low[0],
13387 GEN_INT (count - single_width)));
13388 }
13389 else
13390 {
13391 if (!rtx_equal_p (operands[0], operands[1]))
13392 emit_move_insn (operands[0], operands[1]);
13393 emit_insn ((mode == DImode
13394 ? gen_x86_shrd_1
13395 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13396 emit_insn ((mode == DImode
13397 ? gen_ashrsi3
13398 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13399 }
13400 }
13401 else
13402 {
13403 if (!rtx_equal_p (operands[0], operands[1]))
13404 emit_move_insn (operands[0], operands[1]);
13405
13406 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13407
13408 emit_insn ((mode == DImode
13409 ? gen_x86_shrd_1
13410 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13411 emit_insn ((mode == DImode
13412 ? gen_ashrsi3
13413 : gen_ashrdi3) (high[0], high[0], operands[2]));
13414
13415 if (TARGET_CMOVE && scratch)
13416 {
13417 emit_move_insn (scratch, high[0]);
13418 emit_insn ((mode == DImode
13419 ? gen_ashrsi3
13420 : gen_ashrdi3) (scratch, scratch,
13421 GEN_INT (single_width - 1)));
13422 emit_insn ((mode == DImode
13423 ? gen_x86_shift_adj_1
13424 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13425 scratch));
13426 }
13427 else
13428 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13429 }
13430 }
13431
13432 void
13433 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13434 {
13435 rtx low[2], high[2];
13436 int count;
13437 const int single_width = mode == DImode ? 32 : 64;
13438
13439 if (CONST_INT_P (operands[2]))
13440 {
13441 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13442 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13443
13444 if (count >= single_width)
13445 {
13446 emit_move_insn (low[0], high[1]);
13447 ix86_expand_clear (high[0]);
13448
13449 if (count > single_width)
13450 emit_insn ((mode == DImode
13451 ? gen_lshrsi3
13452 : gen_lshrdi3) (low[0], low[0],
13453 GEN_INT (count - single_width)));
13454 }
13455 else
13456 {
13457 if (!rtx_equal_p (operands[0], operands[1]))
13458 emit_move_insn (operands[0], operands[1]);
13459 emit_insn ((mode == DImode
13460 ? gen_x86_shrd_1
13461 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13462 emit_insn ((mode == DImode
13463 ? gen_lshrsi3
13464 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13465 }
13466 }
13467 else
13468 {
13469 if (!rtx_equal_p (operands[0], operands[1]))
13470 emit_move_insn (operands[0], operands[1]);
13471
13472 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13473
13474 emit_insn ((mode == DImode
13475 ? gen_x86_shrd_1
13476 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13477 emit_insn ((mode == DImode
13478 ? gen_lshrsi3
13479 : gen_lshrdi3) (high[0], high[0], operands[2]));
13480
13481 /* Heh. By reversing the arguments, we can reuse this pattern. */
13482 if (TARGET_CMOVE && scratch)
13483 {
13484 ix86_expand_clear (scratch);
13485 emit_insn ((mode == DImode
13486 ? gen_x86_shift_adj_1
13487 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13488 scratch));
13489 }
13490 else
13491 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13492 }
13493 }
13494
13495 /* Predict just emitted jump instruction to be taken with probability PROB. */
13496 static void
13497 predict_jump (int prob)
13498 {
13499 rtx insn = get_last_insn ();
13500 gcc_assert (JUMP_P (insn));
13501 REG_NOTES (insn)
13502 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13503 GEN_INT (prob),
13504 REG_NOTES (insn));
13505 }
13506
13507 /* Helper function for the string operations below. Dest VARIABLE whether
13508 it is aligned to VALUE bytes. If true, jump to the label. */
13509 static rtx
13510 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13511 {
13512 rtx label = gen_label_rtx ();
13513 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13514 if (GET_MODE (variable) == DImode)
13515 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13516 else
13517 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13518 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13519 1, label);
13520 if (epilogue)
13521 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13522 else
13523 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13524 return label;
13525 }
13526
13527 /* Adjust COUNTER by the VALUE. */
13528 static void
13529 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13530 {
13531 if (GET_MODE (countreg) == DImode)
13532 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13533 else
13534 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13535 }
13536
13537 /* Zero extend possibly SImode EXP to Pmode register. */
13538 rtx
13539 ix86_zero_extend_to_Pmode (rtx exp)
13540 {
13541 rtx r;
13542 if (GET_MODE (exp) == VOIDmode)
13543 return force_reg (Pmode, exp);
13544 if (GET_MODE (exp) == Pmode)
13545 return copy_to_mode_reg (Pmode, exp);
13546 r = gen_reg_rtx (Pmode);
13547 emit_insn (gen_zero_extendsidi2 (r, exp));
13548 return r;
13549 }
13550
13551 /* Divide COUNTREG by SCALE. */
13552 static rtx
13553 scale_counter (rtx countreg, int scale)
13554 {
13555 rtx sc;
13556 rtx piece_size_mask;
13557
13558 if (scale == 1)
13559 return countreg;
13560 if (CONST_INT_P (countreg))
13561 return GEN_INT (INTVAL (countreg) / scale);
13562 gcc_assert (REG_P (countreg));
13563
13564 piece_size_mask = GEN_INT (scale - 1);
13565 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13566 GEN_INT (exact_log2 (scale)),
13567 NULL, 1, OPTAB_DIRECT);
13568 return sc;
13569 }
13570
13571 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
13572 DImode for constant loop counts. */
13573
13574 static enum machine_mode
13575 counter_mode (rtx count_exp)
13576 {
13577 if (GET_MODE (count_exp) != VOIDmode)
13578 return GET_MODE (count_exp);
13579 if (GET_CODE (count_exp) != CONST_INT)
13580 return Pmode;
13581 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13582 return DImode;
13583 return SImode;
13584 }
13585
13586 /* When SRCPTR is non-NULL, output simple loop to move memory
13587 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13588 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13589 equivalent loop to set memory by VALUE (supposed to be in MODE).
13590
13591 The size is rounded down to whole number of chunk size moved at once.
13592 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13593
13594
13595 static void
13596 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13597 rtx destptr, rtx srcptr, rtx value,
13598 rtx count, enum machine_mode mode, int unroll,
13599 int expected_size)
13600 {
13601 rtx out_label, top_label, iter, tmp;
13602 enum machine_mode iter_mode = counter_mode (count);
13603 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13604 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13605 rtx size;
13606 rtx x_addr;
13607 rtx y_addr;
13608 int i;
13609
13610 top_label = gen_label_rtx ();
13611 out_label = gen_label_rtx ();
13612 iter = gen_reg_rtx (iter_mode);
13613
13614 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13615 NULL, 1, OPTAB_DIRECT);
13616 /* Those two should combine. */
13617 if (piece_size == const1_rtx)
13618 {
13619 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13620 true, out_label);
13621 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13622 }
13623 emit_move_insn (iter, const0_rtx);
13624
13625 emit_label (top_label);
13626
13627 tmp = convert_modes (Pmode, iter_mode, iter, true);
13628 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13629 destmem = change_address (destmem, mode, x_addr);
13630
13631 if (srcmem)
13632 {
13633 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13634 srcmem = change_address (srcmem, mode, y_addr);
13635
13636 /* When unrolling for chips that reorder memory reads and writes,
13637 we can save registers by using single temporary.
13638 Also using 4 temporaries is overkill in 32bit mode. */
13639 if (!TARGET_64BIT && 0)
13640 {
13641 for (i = 0; i < unroll; i++)
13642 {
13643 if (i)
13644 {
13645 destmem =
13646 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13647 srcmem =
13648 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13649 }
13650 emit_move_insn (destmem, srcmem);
13651 }
13652 }
13653 else
13654 {
13655 rtx tmpreg[4];
13656 gcc_assert (unroll <= 4);
13657 for (i = 0; i < unroll; i++)
13658 {
13659 tmpreg[i] = gen_reg_rtx (mode);
13660 if (i)
13661 {
13662 srcmem =
13663 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13664 }
13665 emit_move_insn (tmpreg[i], srcmem);
13666 }
13667 for (i = 0; i < unroll; i++)
13668 {
13669 if (i)
13670 {
13671 destmem =
13672 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13673 }
13674 emit_move_insn (destmem, tmpreg[i]);
13675 }
13676 }
13677 }
13678 else
13679 for (i = 0; i < unroll; i++)
13680 {
13681 if (i)
13682 destmem =
13683 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13684 emit_move_insn (destmem, value);
13685 }
13686
13687 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13688 true, OPTAB_LIB_WIDEN);
13689 if (tmp != iter)
13690 emit_move_insn (iter, tmp);
13691
13692 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13693 true, top_label);
13694 if (expected_size != -1)
13695 {
13696 expected_size /= GET_MODE_SIZE (mode) * unroll;
13697 if (expected_size == 0)
13698 predict_jump (0);
13699 else if (expected_size > REG_BR_PROB_BASE)
13700 predict_jump (REG_BR_PROB_BASE - 1);
13701 else
13702 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13703 }
13704 else
13705 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13706 iter = ix86_zero_extend_to_Pmode (iter);
13707 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13708 true, OPTAB_LIB_WIDEN);
13709 if (tmp != destptr)
13710 emit_move_insn (destptr, tmp);
13711 if (srcptr)
13712 {
13713 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13714 true, OPTAB_LIB_WIDEN);
13715 if (tmp != srcptr)
13716 emit_move_insn (srcptr, tmp);
13717 }
13718 emit_label (out_label);
13719 }
13720
13721 /* Output "rep; mov" instruction.
13722 Arguments have same meaning as for previous function */
13723 static void
13724 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13725 rtx destptr, rtx srcptr,
13726 rtx count,
13727 enum machine_mode mode)
13728 {
13729 rtx destexp;
13730 rtx srcexp;
13731 rtx countreg;
13732
13733 /* If the size is known, it is shorter to use rep movs. */
13734 if (mode == QImode && CONST_INT_P (count)
13735 && !(INTVAL (count) & 3))
13736 mode = SImode;
13737
13738 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13739 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13740 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13741 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13742 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13743 if (mode != QImode)
13744 {
13745 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13746 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13747 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13748 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13749 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13750 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13751 }
13752 else
13753 {
13754 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13755 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13756 }
13757 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13758 destexp, srcexp));
13759 }
13760
13761 /* Output "rep; stos" instruction.
13762 Arguments have same meaning as for previous function */
13763 static void
13764 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13765 rtx count,
13766 enum machine_mode mode)
13767 {
13768 rtx destexp;
13769 rtx countreg;
13770
13771 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13772 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13773 value = force_reg (mode, gen_lowpart (mode, value));
13774 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13775 if (mode != QImode)
13776 {
13777 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13778 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13779 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13780 }
13781 else
13782 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13783 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13784 }
13785
13786 static void
13787 emit_strmov (rtx destmem, rtx srcmem,
13788 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13789 {
13790 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13791 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13792 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13793 }
13794
13795 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13796 static void
13797 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13798 rtx destptr, rtx srcptr, rtx count, int max_size)
13799 {
13800 rtx src, dest;
13801 if (CONST_INT_P (count))
13802 {
13803 HOST_WIDE_INT countval = INTVAL (count);
13804 int offset = 0;
13805
13806 if ((countval & 0x10) && max_size > 16)
13807 {
13808 if (TARGET_64BIT)
13809 {
13810 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13811 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13812 }
13813 else
13814 gcc_unreachable ();
13815 offset += 16;
13816 }
13817 if ((countval & 0x08) && max_size > 8)
13818 {
13819 if (TARGET_64BIT)
13820 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13821 else
13822 {
13823 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13824 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13825 }
13826 offset += 8;
13827 }
13828 if ((countval & 0x04) && max_size > 4)
13829 {
13830 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13831 offset += 4;
13832 }
13833 if ((countval & 0x02) && max_size > 2)
13834 {
13835 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13836 offset += 2;
13837 }
13838 if ((countval & 0x01) && max_size > 1)
13839 {
13840 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13841 offset += 1;
13842 }
13843 return;
13844 }
13845 if (max_size > 8)
13846 {
13847 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13848 count, 1, OPTAB_DIRECT);
13849 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13850 count, QImode, 1, 4);
13851 return;
13852 }
13853
13854 /* When there are stringops, we can cheaply increase dest and src pointers.
13855 Otherwise we save code size by maintaining offset (zero is readily
13856 available from preceding rep operation) and using x86 addressing modes.
13857 */
13858 if (TARGET_SINGLE_STRINGOP)
13859 {
13860 if (max_size > 4)
13861 {
13862 rtx label = ix86_expand_aligntest (count, 4, true);
13863 src = change_address (srcmem, SImode, srcptr);
13864 dest = change_address (destmem, SImode, destptr);
13865 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13866 emit_label (label);
13867 LABEL_NUSES (label) = 1;
13868 }
13869 if (max_size > 2)
13870 {
13871 rtx label = ix86_expand_aligntest (count, 2, true);
13872 src = change_address (srcmem, HImode, srcptr);
13873 dest = change_address (destmem, HImode, destptr);
13874 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13875 emit_label (label);
13876 LABEL_NUSES (label) = 1;
13877 }
13878 if (max_size > 1)
13879 {
13880 rtx label = ix86_expand_aligntest (count, 1, true);
13881 src = change_address (srcmem, QImode, srcptr);
13882 dest = change_address (destmem, QImode, destptr);
13883 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13884 emit_label (label);
13885 LABEL_NUSES (label) = 1;
13886 }
13887 }
13888 else
13889 {
13890 rtx offset = force_reg (Pmode, const0_rtx);
13891 rtx tmp;
13892
13893 if (max_size > 4)
13894 {
13895 rtx label = ix86_expand_aligntest (count, 4, true);
13896 src = change_address (srcmem, SImode, srcptr);
13897 dest = change_address (destmem, SImode, destptr);
13898 emit_move_insn (dest, src);
13899 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13900 true, OPTAB_LIB_WIDEN);
13901 if (tmp != offset)
13902 emit_move_insn (offset, tmp);
13903 emit_label (label);
13904 LABEL_NUSES (label) = 1;
13905 }
13906 if (max_size > 2)
13907 {
13908 rtx label = ix86_expand_aligntest (count, 2, true);
13909 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13910 src = change_address (srcmem, HImode, tmp);
13911 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13912 dest = change_address (destmem, HImode, tmp);
13913 emit_move_insn (dest, src);
13914 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13915 true, OPTAB_LIB_WIDEN);
13916 if (tmp != offset)
13917 emit_move_insn (offset, tmp);
13918 emit_label (label);
13919 LABEL_NUSES (label) = 1;
13920 }
13921 if (max_size > 1)
13922 {
13923 rtx label = ix86_expand_aligntest (count, 1, true);
13924 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13925 src = change_address (srcmem, QImode, tmp);
13926 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13927 dest = change_address (destmem, QImode, tmp);
13928 emit_move_insn (dest, src);
13929 emit_label (label);
13930 LABEL_NUSES (label) = 1;
13931 }
13932 }
13933 }
13934
13935 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13936 static void
13937 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13938 rtx count, int max_size)
13939 {
13940 count =
13941 expand_simple_binop (counter_mode (count), AND, count,
13942 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13943 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13944 gen_lowpart (QImode, value), count, QImode,
13945 1, max_size / 2);
13946 }
13947
13948 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13949 static void
13950 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13951 {
13952 rtx dest;
13953
13954 if (CONST_INT_P (count))
13955 {
13956 HOST_WIDE_INT countval = INTVAL (count);
13957 int offset = 0;
13958
13959 if ((countval & 0x10) && max_size > 16)
13960 {
13961 if (TARGET_64BIT)
13962 {
13963 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13964 emit_insn (gen_strset (destptr, dest, value));
13965 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13966 emit_insn (gen_strset (destptr, dest, value));
13967 }
13968 else
13969 gcc_unreachable ();
13970 offset += 16;
13971 }
13972 if ((countval & 0x08) && max_size > 8)
13973 {
13974 if (TARGET_64BIT)
13975 {
13976 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13977 emit_insn (gen_strset (destptr, dest, value));
13978 }
13979 else
13980 {
13981 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13982 emit_insn (gen_strset (destptr, dest, value));
13983 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13984 emit_insn (gen_strset (destptr, dest, value));
13985 }
13986 offset += 8;
13987 }
13988 if ((countval & 0x04) && max_size > 4)
13989 {
13990 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13991 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13992 offset += 4;
13993 }
13994 if ((countval & 0x02) && max_size > 2)
13995 {
13996 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13997 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13998 offset += 2;
13999 }
14000 if ((countval & 0x01) && max_size > 1)
14001 {
14002 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14003 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14004 offset += 1;
14005 }
14006 return;
14007 }
14008 if (max_size > 32)
14009 {
14010 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14011 return;
14012 }
14013 if (max_size > 16)
14014 {
14015 rtx label = ix86_expand_aligntest (count, 16, true);
14016 if (TARGET_64BIT)
14017 {
14018 dest = change_address (destmem, DImode, destptr);
14019 emit_insn (gen_strset (destptr, dest, value));
14020 emit_insn (gen_strset (destptr, dest, value));
14021 }
14022 else
14023 {
14024 dest = change_address (destmem, SImode, destptr);
14025 emit_insn (gen_strset (destptr, dest, value));
14026 emit_insn (gen_strset (destptr, dest, value));
14027 emit_insn (gen_strset (destptr, dest, value));
14028 emit_insn (gen_strset (destptr, dest, value));
14029 }
14030 emit_label (label);
14031 LABEL_NUSES (label) = 1;
14032 }
14033 if (max_size > 8)
14034 {
14035 rtx label = ix86_expand_aligntest (count, 8, true);
14036 if (TARGET_64BIT)
14037 {
14038 dest = change_address (destmem, DImode, destptr);
14039 emit_insn (gen_strset (destptr, dest, value));
14040 }
14041 else
14042 {
14043 dest = change_address (destmem, SImode, destptr);
14044 emit_insn (gen_strset (destptr, dest, value));
14045 emit_insn (gen_strset (destptr, dest, value));
14046 }
14047 emit_label (label);
14048 LABEL_NUSES (label) = 1;
14049 }
14050 if (max_size > 4)
14051 {
14052 rtx label = ix86_expand_aligntest (count, 4, true);
14053 dest = change_address (destmem, SImode, destptr);
14054 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14055 emit_label (label);
14056 LABEL_NUSES (label) = 1;
14057 }
14058 if (max_size > 2)
14059 {
14060 rtx label = ix86_expand_aligntest (count, 2, true);
14061 dest = change_address (destmem, HImode, destptr);
14062 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14063 emit_label (label);
14064 LABEL_NUSES (label) = 1;
14065 }
14066 if (max_size > 1)
14067 {
14068 rtx label = ix86_expand_aligntest (count, 1, true);
14069 dest = change_address (destmem, QImode, destptr);
14070 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14071 emit_label (label);
14072 LABEL_NUSES (label) = 1;
14073 }
14074 }
14075
14076 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14077 DESIRED_ALIGNMENT. */
14078 static void
14079 expand_movmem_prologue (rtx destmem, rtx srcmem,
14080 rtx destptr, rtx srcptr, rtx count,
14081 int align, int desired_alignment)
14082 {
14083 if (align <= 1 && desired_alignment > 1)
14084 {
14085 rtx label = ix86_expand_aligntest (destptr, 1, false);
14086 srcmem = change_address (srcmem, QImode, srcptr);
14087 destmem = change_address (destmem, QImode, destptr);
14088 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14089 ix86_adjust_counter (count, 1);
14090 emit_label (label);
14091 LABEL_NUSES (label) = 1;
14092 }
14093 if (align <= 2 && desired_alignment > 2)
14094 {
14095 rtx label = ix86_expand_aligntest (destptr, 2, false);
14096 srcmem = change_address (srcmem, HImode, srcptr);
14097 destmem = change_address (destmem, HImode, destptr);
14098 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14099 ix86_adjust_counter (count, 2);
14100 emit_label (label);
14101 LABEL_NUSES (label) = 1;
14102 }
14103 if (align <= 4 && desired_alignment > 4)
14104 {
14105 rtx label = ix86_expand_aligntest (destptr, 4, false);
14106 srcmem = change_address (srcmem, SImode, srcptr);
14107 destmem = change_address (destmem, SImode, destptr);
14108 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14109 ix86_adjust_counter (count, 4);
14110 emit_label (label);
14111 LABEL_NUSES (label) = 1;
14112 }
14113 gcc_assert (desired_alignment <= 8);
14114 }
14115
14116 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14117 DESIRED_ALIGNMENT. */
14118 static void
14119 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14120 int align, int desired_alignment)
14121 {
14122 if (align <= 1 && desired_alignment > 1)
14123 {
14124 rtx label = ix86_expand_aligntest (destptr, 1, false);
14125 destmem = change_address (destmem, QImode, destptr);
14126 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14127 ix86_adjust_counter (count, 1);
14128 emit_label (label);
14129 LABEL_NUSES (label) = 1;
14130 }
14131 if (align <= 2 && desired_alignment > 2)
14132 {
14133 rtx label = ix86_expand_aligntest (destptr, 2, false);
14134 destmem = change_address (destmem, HImode, destptr);
14135 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14136 ix86_adjust_counter (count, 2);
14137 emit_label (label);
14138 LABEL_NUSES (label) = 1;
14139 }
14140 if (align <= 4 && desired_alignment > 4)
14141 {
14142 rtx label = ix86_expand_aligntest (destptr, 4, false);
14143 destmem = change_address (destmem, SImode, destptr);
14144 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14145 ix86_adjust_counter (count, 4);
14146 emit_label (label);
14147 LABEL_NUSES (label) = 1;
14148 }
14149 gcc_assert (desired_alignment <= 8);
14150 }
14151
14152 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14153 static enum stringop_alg
14154 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14155 int *dynamic_check)
14156 {
14157 const struct stringop_algs * algs;
14158
14159 *dynamic_check = -1;
14160 if (memset)
14161 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14162 else
14163 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14164 if (stringop_alg != no_stringop)
14165 return stringop_alg;
14166 /* rep; movq or rep; movl is the smallest variant. */
14167 else if (optimize_size)
14168 {
14169 if (!count || (count & 3))
14170 return rep_prefix_1_byte;
14171 else
14172 return rep_prefix_4_byte;
14173 }
14174 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14175 */
14176 else if (expected_size != -1 && expected_size < 4)
14177 return loop_1_byte;
14178 else if (expected_size != -1)
14179 {
14180 unsigned int i;
14181 enum stringop_alg alg = libcall;
14182 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14183 {
14184 gcc_assert (algs->size[i].max);
14185 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14186 {
14187 if (algs->size[i].alg != libcall)
14188 alg = algs->size[i].alg;
14189 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14190 last non-libcall inline algorithm. */
14191 if (TARGET_INLINE_ALL_STRINGOPS)
14192 {
14193 /* When the current size is best to be copied by a libcall,
14194 but we are still forced to inline, run the heuristic bellow
14195 that will pick code for medium sized blocks. */
14196 if (alg != libcall)
14197 return alg;
14198 break;
14199 }
14200 else
14201 return algs->size[i].alg;
14202 }
14203 }
14204 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14205 }
14206 /* When asked to inline the call anyway, try to pick meaningful choice.
14207 We look for maximal size of block that is faster to copy by hand and
14208 take blocks of at most of that size guessing that average size will
14209 be roughly half of the block.
14210
14211 If this turns out to be bad, we might simply specify the preferred
14212 choice in ix86_costs. */
14213 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14214 && algs->unknown_size == libcall)
14215 {
14216 int max = -1;
14217 enum stringop_alg alg;
14218 int i;
14219
14220 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14221 if (algs->size[i].alg != libcall && algs->size[i].alg)
14222 max = algs->size[i].max;
14223 if (max == -1)
14224 max = 4096;
14225 alg = decide_alg (count, max / 2, memset, dynamic_check);
14226 gcc_assert (*dynamic_check == -1);
14227 gcc_assert (alg != libcall);
14228 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14229 *dynamic_check = max;
14230 return alg;
14231 }
14232 return algs->unknown_size;
14233 }
14234
14235 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14236 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14237 static int
14238 decide_alignment (int align,
14239 enum stringop_alg alg,
14240 int expected_size)
14241 {
14242 int desired_align = 0;
14243 switch (alg)
14244 {
14245 case no_stringop:
14246 gcc_unreachable ();
14247 case loop:
14248 case unrolled_loop:
14249 desired_align = GET_MODE_SIZE (Pmode);
14250 break;
14251 case rep_prefix_8_byte:
14252 desired_align = 8;
14253 break;
14254 case rep_prefix_4_byte:
14255 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14256 copying whole cacheline at once. */
14257 if (TARGET_PENTIUMPRO)
14258 desired_align = 8;
14259 else
14260 desired_align = 4;
14261 break;
14262 case rep_prefix_1_byte:
14263 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14264 copying whole cacheline at once. */
14265 if (TARGET_PENTIUMPRO)
14266 desired_align = 8;
14267 else
14268 desired_align = 1;
14269 break;
14270 case loop_1_byte:
14271 desired_align = 1;
14272 break;
14273 case libcall:
14274 return 0;
14275 }
14276
14277 if (optimize_size)
14278 desired_align = 1;
14279 if (desired_align < align)
14280 desired_align = align;
14281 if (expected_size != -1 && expected_size < 4)
14282 desired_align = align;
14283 return desired_align;
14284 }
14285
14286 /* Return the smallest power of 2 greater than VAL. */
14287 static int
14288 smallest_pow2_greater_than (int val)
14289 {
14290 int ret = 1;
14291 while (ret <= val)
14292 ret <<= 1;
14293 return ret;
14294 }
14295
14296 /* Expand string move (memcpy) operation. Use i386 string operations when
14297 profitable. expand_clrmem contains similar code. The code depends upon
14298 architecture, block size and alignment, but always has the same
14299 overall structure:
14300
14301 1) Prologue guard: Conditional that jumps up to epilogues for small
14302 blocks that can be handled by epilogue alone. This is faster but
14303 also needed for correctness, since prologue assume the block is larger
14304 than the desired alignment.
14305
14306 Optional dynamic check for size and libcall for large
14307 blocks is emitted here too, with -minline-stringops-dynamically.
14308
14309 2) Prologue: copy first few bytes in order to get destination aligned
14310 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14311 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14312 We emit either a jump tree on power of two sized blocks, or a byte loop.
14313
14314 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14315 with specified algorithm.
14316
14317 4) Epilogue: code copying tail of the block that is too small to be
14318 handled by main body (or up to size guarded by prologue guard). */
14319
14320 int
14321 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14322 rtx expected_align_exp, rtx expected_size_exp)
14323 {
14324 rtx destreg;
14325 rtx srcreg;
14326 rtx label = NULL;
14327 rtx tmp;
14328 rtx jump_around_label = NULL;
14329 HOST_WIDE_INT align = 1;
14330 unsigned HOST_WIDE_INT count = 0;
14331 HOST_WIDE_INT expected_size = -1;
14332 int size_needed = 0, epilogue_size_needed;
14333 int desired_align = 0;
14334 enum stringop_alg alg;
14335 int dynamic_check;
14336
14337 if (CONST_INT_P (align_exp))
14338 align = INTVAL (align_exp);
14339 /* i386 can do misaligned access on reasonably increased cost. */
14340 if (CONST_INT_P (expected_align_exp)
14341 && INTVAL (expected_align_exp) > align)
14342 align = INTVAL (expected_align_exp);
14343 if (CONST_INT_P (count_exp))
14344 count = expected_size = INTVAL (count_exp);
14345 if (CONST_INT_P (expected_size_exp) && count == 0)
14346 expected_size = INTVAL (expected_size_exp);
14347
14348 /* Step 0: Decide on preferred algorithm, desired alignment and
14349 size of chunks to be copied by main loop. */
14350
14351 alg = decide_alg (count, expected_size, false, &dynamic_check);
14352 desired_align = decide_alignment (align, alg, expected_size);
14353
14354 if (!TARGET_ALIGN_STRINGOPS)
14355 align = desired_align;
14356
14357 if (alg == libcall)
14358 return 0;
14359 gcc_assert (alg != no_stringop);
14360 if (!count)
14361 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14362 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14363 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14364 switch (alg)
14365 {
14366 case libcall:
14367 case no_stringop:
14368 gcc_unreachable ();
14369 case loop:
14370 size_needed = GET_MODE_SIZE (Pmode);
14371 break;
14372 case unrolled_loop:
14373 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14374 break;
14375 case rep_prefix_8_byte:
14376 size_needed = 8;
14377 break;
14378 case rep_prefix_4_byte:
14379 size_needed = 4;
14380 break;
14381 case rep_prefix_1_byte:
14382 case loop_1_byte:
14383 size_needed = 1;
14384 break;
14385 }
14386
14387 epilogue_size_needed = size_needed;
14388
14389 /* Step 1: Prologue guard. */
14390
14391 /* Alignment code needs count to be in register. */
14392 if (CONST_INT_P (count_exp) && desired_align > align)
14393 {
14394 enum machine_mode mode = SImode;
14395 if (TARGET_64BIT && (count & ~0xffffffff))
14396 mode = DImode;
14397 count_exp = force_reg (mode, count_exp);
14398 }
14399 gcc_assert (desired_align >= 1 && align >= 1);
14400
14401 /* Ensure that alignment prologue won't copy past end of block. */
14402 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14403 {
14404 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14405 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14406 Make sure it is power of 2. */
14407 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14408
14409 label = gen_label_rtx ();
14410 emit_cmp_and_jump_insns (count_exp,
14411 GEN_INT (epilogue_size_needed),
14412 LTU, 0, counter_mode (count_exp), 1, label);
14413 if (GET_CODE (count_exp) == CONST_INT)
14414 ;
14415 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14416 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14417 else
14418 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14419 }
14420 /* Emit code to decide on runtime whether library call or inline should be
14421 used. */
14422 if (dynamic_check != -1)
14423 {
14424 rtx hot_label = gen_label_rtx ();
14425 jump_around_label = gen_label_rtx ();
14426 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14427 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14428 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14429 emit_block_move_via_libcall (dst, src, count_exp, false);
14430 emit_jump (jump_around_label);
14431 emit_label (hot_label);
14432 }
14433
14434 /* Step 2: Alignment prologue. */
14435
14436 if (desired_align > align)
14437 {
14438 /* Except for the first move in epilogue, we no longer know
14439 constant offset in aliasing info. It don't seems to worth
14440 the pain to maintain it for the first move, so throw away
14441 the info early. */
14442 src = change_address (src, BLKmode, srcreg);
14443 dst = change_address (dst, BLKmode, destreg);
14444 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14445 desired_align);
14446 }
14447 if (label && size_needed == 1)
14448 {
14449 emit_label (label);
14450 LABEL_NUSES (label) = 1;
14451 label = NULL;
14452 }
14453
14454 /* Step 3: Main loop. */
14455
14456 switch (alg)
14457 {
14458 case libcall:
14459 case no_stringop:
14460 gcc_unreachable ();
14461 case loop_1_byte:
14462 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14463 count_exp, QImode, 1, expected_size);
14464 break;
14465 case loop:
14466 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14467 count_exp, Pmode, 1, expected_size);
14468 break;
14469 case unrolled_loop:
14470 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14471 registers for 4 temporaries anyway. */
14472 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14473 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14474 expected_size);
14475 break;
14476 case rep_prefix_8_byte:
14477 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14478 DImode);
14479 break;
14480 case rep_prefix_4_byte:
14481 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14482 SImode);
14483 break;
14484 case rep_prefix_1_byte:
14485 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14486 QImode);
14487 break;
14488 }
14489 /* Adjust properly the offset of src and dest memory for aliasing. */
14490 if (CONST_INT_P (count_exp))
14491 {
14492 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14493 (count / size_needed) * size_needed);
14494 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14495 (count / size_needed) * size_needed);
14496 }
14497 else
14498 {
14499 src = change_address (src, BLKmode, srcreg);
14500 dst = change_address (dst, BLKmode, destreg);
14501 }
14502
14503 /* Step 4: Epilogue to copy the remaining bytes. */
14504
14505 if (label)
14506 {
14507 /* When the main loop is done, COUNT_EXP might hold original count,
14508 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14509 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14510 bytes. Compensate if needed. */
14511
14512 if (size_needed < epilogue_size_needed)
14513 {
14514 tmp =
14515 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14516 GEN_INT (size_needed - 1), count_exp, 1,
14517 OPTAB_DIRECT);
14518 if (tmp != count_exp)
14519 emit_move_insn (count_exp, tmp);
14520 }
14521 emit_label (label);
14522 LABEL_NUSES (label) = 1;
14523 }
14524
14525 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14526 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14527 epilogue_size_needed);
14528 if (jump_around_label)
14529 emit_label (jump_around_label);
14530 return 1;
14531 }
14532
14533 /* Helper function for memcpy. For QImode value 0xXY produce
14534 0xXYXYXYXY of wide specified by MODE. This is essentially
14535 a * 0x10101010, but we can do slightly better than
14536 synth_mult by unwinding the sequence by hand on CPUs with
14537 slow multiply. */
14538 static rtx
14539 promote_duplicated_reg (enum machine_mode mode, rtx val)
14540 {
14541 enum machine_mode valmode = GET_MODE (val);
14542 rtx tmp;
14543 int nops = mode == DImode ? 3 : 2;
14544
14545 gcc_assert (mode == SImode || mode == DImode);
14546 if (val == const0_rtx)
14547 return copy_to_mode_reg (mode, const0_rtx);
14548 if (CONST_INT_P (val))
14549 {
14550 HOST_WIDE_INT v = INTVAL (val) & 255;
14551
14552 v |= v << 8;
14553 v |= v << 16;
14554 if (mode == DImode)
14555 v |= (v << 16) << 16;
14556 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14557 }
14558
14559 if (valmode == VOIDmode)
14560 valmode = QImode;
14561 if (valmode != QImode)
14562 val = gen_lowpart (QImode, val);
14563 if (mode == QImode)
14564 return val;
14565 if (!TARGET_PARTIAL_REG_STALL)
14566 nops--;
14567 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14568 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14569 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14570 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14571 {
14572 rtx reg = convert_modes (mode, QImode, val, true);
14573 tmp = promote_duplicated_reg (mode, const1_rtx);
14574 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14575 OPTAB_DIRECT);
14576 }
14577 else
14578 {
14579 rtx reg = convert_modes (mode, QImode, val, true);
14580
14581 if (!TARGET_PARTIAL_REG_STALL)
14582 if (mode == SImode)
14583 emit_insn (gen_movsi_insv_1 (reg, reg));
14584 else
14585 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14586 else
14587 {
14588 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14589 NULL, 1, OPTAB_DIRECT);
14590 reg =
14591 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14592 }
14593 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14594 NULL, 1, OPTAB_DIRECT);
14595 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14596 if (mode == SImode)
14597 return reg;
14598 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14599 NULL, 1, OPTAB_DIRECT);
14600 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14601 return reg;
14602 }
14603 }
14604
14605 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14606 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14607 alignment from ALIGN to DESIRED_ALIGN. */
14608 static rtx
14609 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14610 {
14611 rtx promoted_val;
14612
14613 if (TARGET_64BIT
14614 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14615 promoted_val = promote_duplicated_reg (DImode, val);
14616 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14617 promoted_val = promote_duplicated_reg (SImode, val);
14618 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14619 promoted_val = promote_duplicated_reg (HImode, val);
14620 else
14621 promoted_val = val;
14622
14623 return promoted_val;
14624 }
14625
14626 /* Expand string clear operation (bzero). Use i386 string operations when
14627 profitable. See expand_movmem comment for explanation of individual
14628 steps performed. */
14629 int
14630 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14631 rtx expected_align_exp, rtx expected_size_exp)
14632 {
14633 rtx destreg;
14634 rtx label = NULL;
14635 rtx tmp;
14636 rtx jump_around_label = NULL;
14637 HOST_WIDE_INT align = 1;
14638 unsigned HOST_WIDE_INT count = 0;
14639 HOST_WIDE_INT expected_size = -1;
14640 int size_needed = 0, epilogue_size_needed;
14641 int desired_align = 0;
14642 enum stringop_alg alg;
14643 rtx promoted_val = NULL;
14644 bool force_loopy_epilogue = false;
14645 int dynamic_check;
14646
14647 if (CONST_INT_P (align_exp))
14648 align = INTVAL (align_exp);
14649 /* i386 can do misaligned access on reasonably increased cost. */
14650 if (CONST_INT_P (expected_align_exp)
14651 && INTVAL (expected_align_exp) > align)
14652 align = INTVAL (expected_align_exp);
14653 if (CONST_INT_P (count_exp))
14654 count = expected_size = INTVAL (count_exp);
14655 if (CONST_INT_P (expected_size_exp) && count == 0)
14656 expected_size = INTVAL (expected_size_exp);
14657
14658 /* Step 0: Decide on preferred algorithm, desired alignment and
14659 size of chunks to be copied by main loop. */
14660
14661 alg = decide_alg (count, expected_size, true, &dynamic_check);
14662 desired_align = decide_alignment (align, alg, expected_size);
14663
14664 if (!TARGET_ALIGN_STRINGOPS)
14665 align = desired_align;
14666
14667 if (alg == libcall)
14668 return 0;
14669 gcc_assert (alg != no_stringop);
14670 if (!count)
14671 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14672 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14673 switch (alg)
14674 {
14675 case libcall:
14676 case no_stringop:
14677 gcc_unreachable ();
14678 case loop:
14679 size_needed = GET_MODE_SIZE (Pmode);
14680 break;
14681 case unrolled_loop:
14682 size_needed = GET_MODE_SIZE (Pmode) * 4;
14683 break;
14684 case rep_prefix_8_byte:
14685 size_needed = 8;
14686 break;
14687 case rep_prefix_4_byte:
14688 size_needed = 4;
14689 break;
14690 case rep_prefix_1_byte:
14691 case loop_1_byte:
14692 size_needed = 1;
14693 break;
14694 }
14695 epilogue_size_needed = size_needed;
14696
14697 /* Step 1: Prologue guard. */
14698
14699 /* Alignment code needs count to be in register. */
14700 if (CONST_INT_P (count_exp) && desired_align > align)
14701 {
14702 enum machine_mode mode = SImode;
14703 if (TARGET_64BIT && (count & ~0xffffffff))
14704 mode = DImode;
14705 count_exp = force_reg (mode, count_exp);
14706 }
14707 /* Do the cheap promotion to allow better CSE across the
14708 main loop and epilogue (ie one load of the big constant in the
14709 front of all code. */
14710 if (CONST_INT_P (val_exp))
14711 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14712 desired_align, align);
14713 /* Ensure that alignment prologue won't copy past end of block. */
14714 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14715 {
14716 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14717 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14718 Make sure it is power of 2. */
14719 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14720
14721 /* To improve performance of small blocks, we jump around the VAL
14722 promoting mode. This mean that if the promoted VAL is not constant,
14723 we might not use it in the epilogue and have to use byte
14724 loop variant. */
14725 if (epilogue_size_needed > 2 && !promoted_val)
14726 force_loopy_epilogue = true;
14727 label = gen_label_rtx ();
14728 emit_cmp_and_jump_insns (count_exp,
14729 GEN_INT (epilogue_size_needed),
14730 LTU, 0, counter_mode (count_exp), 1, label);
14731 if (GET_CODE (count_exp) == CONST_INT)
14732 ;
14733 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14734 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14735 else
14736 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14737 }
14738 if (dynamic_check != -1)
14739 {
14740 rtx hot_label = gen_label_rtx ();
14741 jump_around_label = gen_label_rtx ();
14742 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14743 LEU, 0, counter_mode (count_exp), 1, hot_label);
14744 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14745 set_storage_via_libcall (dst, count_exp, val_exp, false);
14746 emit_jump (jump_around_label);
14747 emit_label (hot_label);
14748 }
14749
14750 /* Step 2: Alignment prologue. */
14751
14752 /* Do the expensive promotion once we branched off the small blocks. */
14753 if (!promoted_val)
14754 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14755 desired_align, align);
14756 gcc_assert (desired_align >= 1 && align >= 1);
14757
14758 if (desired_align > align)
14759 {
14760 /* Except for the first move in epilogue, we no longer know
14761 constant offset in aliasing info. It don't seems to worth
14762 the pain to maintain it for the first move, so throw away
14763 the info early. */
14764 dst = change_address (dst, BLKmode, destreg);
14765 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14766 desired_align);
14767 }
14768 if (label && size_needed == 1)
14769 {
14770 emit_label (label);
14771 LABEL_NUSES (label) = 1;
14772 label = NULL;
14773 }
14774
14775 /* Step 3: Main loop. */
14776
14777 switch (alg)
14778 {
14779 case libcall:
14780 case no_stringop:
14781 gcc_unreachable ();
14782 case loop_1_byte:
14783 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14784 count_exp, QImode, 1, expected_size);
14785 break;
14786 case loop:
14787 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14788 count_exp, Pmode, 1, expected_size);
14789 break;
14790 case unrolled_loop:
14791 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14792 count_exp, Pmode, 4, expected_size);
14793 break;
14794 case rep_prefix_8_byte:
14795 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14796 DImode);
14797 break;
14798 case rep_prefix_4_byte:
14799 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14800 SImode);
14801 break;
14802 case rep_prefix_1_byte:
14803 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14804 QImode);
14805 break;
14806 }
14807 /* Adjust properly the offset of src and dest memory for aliasing. */
14808 if (CONST_INT_P (count_exp))
14809 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14810 (count / size_needed) * size_needed);
14811 else
14812 dst = change_address (dst, BLKmode, destreg);
14813
14814 /* Step 4: Epilogue to copy the remaining bytes. */
14815
14816 if (label)
14817 {
14818 /* When the main loop is done, COUNT_EXP might hold original count,
14819 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14820 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14821 bytes. Compensate if needed. */
14822
14823 if (size_needed < desired_align - align)
14824 {
14825 tmp =
14826 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14827 GEN_INT (size_needed - 1), count_exp, 1,
14828 OPTAB_DIRECT);
14829 size_needed = desired_align - align + 1;
14830 if (tmp != count_exp)
14831 emit_move_insn (count_exp, tmp);
14832 }
14833 emit_label (label);
14834 LABEL_NUSES (label) = 1;
14835 }
14836 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14837 {
14838 if (force_loopy_epilogue)
14839 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14840 size_needed);
14841 else
14842 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14843 size_needed);
14844 }
14845 if (jump_around_label)
14846 emit_label (jump_around_label);
14847 return 1;
14848 }
14849
14850 /* Expand the appropriate insns for doing strlen if not just doing
14851 repnz; scasb
14852
14853 out = result, initialized with the start address
14854 align_rtx = alignment of the address.
14855 scratch = scratch register, initialized with the startaddress when
14856 not aligned, otherwise undefined
14857
14858 This is just the body. It needs the initializations mentioned above and
14859 some address computing at the end. These things are done in i386.md. */
14860
14861 static void
14862 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14863 {
14864 int align;
14865 rtx tmp;
14866 rtx align_2_label = NULL_RTX;
14867 rtx align_3_label = NULL_RTX;
14868 rtx align_4_label = gen_label_rtx ();
14869 rtx end_0_label = gen_label_rtx ();
14870 rtx mem;
14871 rtx tmpreg = gen_reg_rtx (SImode);
14872 rtx scratch = gen_reg_rtx (SImode);
14873 rtx cmp;
14874
14875 align = 0;
14876 if (CONST_INT_P (align_rtx))
14877 align = INTVAL (align_rtx);
14878
14879 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14880
14881 /* Is there a known alignment and is it less than 4? */
14882 if (align < 4)
14883 {
14884 rtx scratch1 = gen_reg_rtx (Pmode);
14885 emit_move_insn (scratch1, out);
14886 /* Is there a known alignment and is it not 2? */
14887 if (align != 2)
14888 {
14889 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14890 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14891
14892 /* Leave just the 3 lower bits. */
14893 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14894 NULL_RTX, 0, OPTAB_WIDEN);
14895
14896 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14897 Pmode, 1, align_4_label);
14898 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14899 Pmode, 1, align_2_label);
14900 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14901 Pmode, 1, align_3_label);
14902 }
14903 else
14904 {
14905 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14906 check if is aligned to 4 - byte. */
14907
14908 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14909 NULL_RTX, 0, OPTAB_WIDEN);
14910
14911 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14912 Pmode, 1, align_4_label);
14913 }
14914
14915 mem = change_address (src, QImode, out);
14916
14917 /* Now compare the bytes. */
14918
14919 /* Compare the first n unaligned byte on a byte per byte basis. */
14920 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14921 QImode, 1, end_0_label);
14922
14923 /* Increment the address. */
14924 if (TARGET_64BIT)
14925 emit_insn (gen_adddi3 (out, out, const1_rtx));
14926 else
14927 emit_insn (gen_addsi3 (out, out, const1_rtx));
14928
14929 /* Not needed with an alignment of 2 */
14930 if (align != 2)
14931 {
14932 emit_label (align_2_label);
14933
14934 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14935 end_0_label);
14936
14937 if (TARGET_64BIT)
14938 emit_insn (gen_adddi3 (out, out, const1_rtx));
14939 else
14940 emit_insn (gen_addsi3 (out, out, const1_rtx));
14941
14942 emit_label (align_3_label);
14943 }
14944
14945 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14946 end_0_label);
14947
14948 if (TARGET_64BIT)
14949 emit_insn (gen_adddi3 (out, out, const1_rtx));
14950 else
14951 emit_insn (gen_addsi3 (out, out, const1_rtx));
14952 }
14953
14954 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14955 align this loop. It gives only huge programs, but does not help to
14956 speed up. */
14957 emit_label (align_4_label);
14958
14959 mem = change_address (src, SImode, out);
14960 emit_move_insn (scratch, mem);
14961 if (TARGET_64BIT)
14962 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14963 else
14964 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14965
14966 /* This formula yields a nonzero result iff one of the bytes is zero.
14967 This saves three branches inside loop and many cycles. */
14968
14969 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14970 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14971 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14972 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14973 gen_int_mode (0x80808080, SImode)));
14974 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14975 align_4_label);
14976
14977 if (TARGET_CMOVE)
14978 {
14979 rtx reg = gen_reg_rtx (SImode);
14980 rtx reg2 = gen_reg_rtx (Pmode);
14981 emit_move_insn (reg, tmpreg);
14982 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14983
14984 /* If zero is not in the first two bytes, move two bytes forward. */
14985 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14986 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14987 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14988 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14989 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14990 reg,
14991 tmpreg)));
14992 /* Emit lea manually to avoid clobbering of flags. */
14993 emit_insn (gen_rtx_SET (SImode, reg2,
14994 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14995
14996 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14997 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14998 emit_insn (gen_rtx_SET (VOIDmode, out,
14999 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15000 reg2,
15001 out)));
15002
15003 }
15004 else
15005 {
15006 rtx end_2_label = gen_label_rtx ();
15007 /* Is zero in the first two bytes? */
15008
15009 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15010 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15011 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15012 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15013 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15014 pc_rtx);
15015 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15016 JUMP_LABEL (tmp) = end_2_label;
15017
15018 /* Not in the first two. Move two bytes forward. */
15019 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15020 if (TARGET_64BIT)
15021 emit_insn (gen_adddi3 (out, out, const2_rtx));
15022 else
15023 emit_insn (gen_addsi3 (out, out, const2_rtx));
15024
15025 emit_label (end_2_label);
15026
15027 }
15028
15029 /* Avoid branch in fixing the byte. */
15030 tmpreg = gen_lowpart (QImode, tmpreg);
15031 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15032 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15033 if (TARGET_64BIT)
15034 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15035 else
15036 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15037
15038 emit_label (end_0_label);
15039 }
15040
15041 /* Expand strlen. */
15042
15043 int
15044 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15045 {
15046 rtx addr, scratch1, scratch2, scratch3, scratch4;
15047
15048 /* The generic case of strlen expander is long. Avoid it's
15049 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15050
15051 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15052 && !TARGET_INLINE_ALL_STRINGOPS
15053 && !optimize_size
15054 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15055 return 0;
15056
15057 addr = force_reg (Pmode, XEXP (src, 0));
15058 scratch1 = gen_reg_rtx (Pmode);
15059
15060 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15061 && !optimize_size)
15062 {
15063 /* Well it seems that some optimizer does not combine a call like
15064 foo(strlen(bar), strlen(bar));
15065 when the move and the subtraction is done here. It does calculate
15066 the length just once when these instructions are done inside of
15067 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15068 often used and I use one fewer register for the lifetime of
15069 output_strlen_unroll() this is better. */
15070
15071 emit_move_insn (out, addr);
15072
15073 ix86_expand_strlensi_unroll_1 (out, src, align);
15074
15075 /* strlensi_unroll_1 returns the address of the zero at the end of
15076 the string, like memchr(), so compute the length by subtracting
15077 the start address. */
15078 if (TARGET_64BIT)
15079 emit_insn (gen_subdi3 (out, out, addr));
15080 else
15081 emit_insn (gen_subsi3 (out, out, addr));
15082 }
15083 else
15084 {
15085 rtx unspec;
15086 scratch2 = gen_reg_rtx (Pmode);
15087 scratch3 = gen_reg_rtx (Pmode);
15088 scratch4 = force_reg (Pmode, constm1_rtx);
15089
15090 emit_move_insn (scratch3, addr);
15091 eoschar = force_reg (QImode, eoschar);
15092
15093 src = replace_equiv_address_nv (src, scratch3);
15094
15095 /* If .md starts supporting :P, this can be done in .md. */
15096 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15097 scratch4), UNSPEC_SCAS);
15098 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15099 if (TARGET_64BIT)
15100 {
15101 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15102 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15103 }
15104 else
15105 {
15106 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15107 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15108 }
15109 }
15110 return 1;
15111 }
15112
15113 /* For given symbol (function) construct code to compute address of it's PLT
15114 entry in large x86-64 PIC model. */
15115 rtx
15116 construct_plt_address (rtx symbol)
15117 {
15118 rtx tmp = gen_reg_rtx (Pmode);
15119 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15120
15121 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15122 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15123
15124 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15125 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15126 return tmp;
15127 }
15128
15129 void
15130 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15131 rtx callarg2 ATTRIBUTE_UNUSED,
15132 rtx pop, int sibcall)
15133 {
15134 rtx use = NULL, call;
15135
15136 if (pop == const0_rtx)
15137 pop = NULL;
15138 gcc_assert (!TARGET_64BIT || !pop);
15139
15140 if (TARGET_MACHO && !TARGET_64BIT)
15141 {
15142 #if TARGET_MACHO
15143 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15144 fnaddr = machopic_indirect_call_target (fnaddr);
15145 #endif
15146 }
15147 else
15148 {
15149 /* Static functions and indirect calls don't need the pic register. */
15150 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15151 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15152 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15153 use_reg (&use, pic_offset_table_rtx);
15154 }
15155
15156 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15157 {
15158 rtx al = gen_rtx_REG (QImode, 0);
15159 emit_move_insn (al, callarg2);
15160 use_reg (&use, al);
15161 }
15162
15163 if (ix86_cmodel == CM_LARGE_PIC
15164 && GET_CODE (fnaddr) == MEM
15165 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15166 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15167 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15168 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15169 {
15170 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15171 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15172 }
15173 if (sibcall && TARGET_64BIT
15174 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15175 {
15176 rtx addr;
15177 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15178 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15179 emit_move_insn (fnaddr, addr);
15180 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15181 }
15182
15183 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15184 if (retval)
15185 call = gen_rtx_SET (VOIDmode, retval, call);
15186 if (pop)
15187 {
15188 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15189 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15190 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15191 }
15192
15193 call = emit_call_insn (call);
15194 if (use)
15195 CALL_INSN_FUNCTION_USAGE (call) = use;
15196 }
15197
15198 \f
15199 /* Clear stack slot assignments remembered from previous functions.
15200 This is called from INIT_EXPANDERS once before RTL is emitted for each
15201 function. */
15202
15203 static struct machine_function *
15204 ix86_init_machine_status (void)
15205 {
15206 struct machine_function *f;
15207
15208 f = ggc_alloc_cleared (sizeof (struct machine_function));
15209 f->use_fast_prologue_epilogue_nregs = -1;
15210 f->tls_descriptor_call_expanded_p = 0;
15211
15212 return f;
15213 }
15214
15215 /* Return a MEM corresponding to a stack slot with mode MODE.
15216 Allocate a new slot if necessary.
15217
15218 The RTL for a function can have several slots available: N is
15219 which slot to use. */
15220
15221 rtx
15222 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15223 {
15224 struct stack_local_entry *s;
15225
15226 gcc_assert (n < MAX_386_STACK_LOCALS);
15227
15228 for (s = ix86_stack_locals; s; s = s->next)
15229 if (s->mode == mode && s->n == n)
15230 return copy_rtx (s->rtl);
15231
15232 s = (struct stack_local_entry *)
15233 ggc_alloc (sizeof (struct stack_local_entry));
15234 s->n = n;
15235 s->mode = mode;
15236 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15237
15238 s->next = ix86_stack_locals;
15239 ix86_stack_locals = s;
15240 return s->rtl;
15241 }
15242
15243 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15244
15245 static GTY(()) rtx ix86_tls_symbol;
15246 rtx
15247 ix86_tls_get_addr (void)
15248 {
15249
15250 if (!ix86_tls_symbol)
15251 {
15252 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15253 (TARGET_ANY_GNU_TLS
15254 && !TARGET_64BIT)
15255 ? "___tls_get_addr"
15256 : "__tls_get_addr");
15257 }
15258
15259 return ix86_tls_symbol;
15260 }
15261
15262 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15263
15264 static GTY(()) rtx ix86_tls_module_base_symbol;
15265 rtx
15266 ix86_tls_module_base (void)
15267 {
15268
15269 if (!ix86_tls_module_base_symbol)
15270 {
15271 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15272 "_TLS_MODULE_BASE_");
15273 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15274 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15275 }
15276
15277 return ix86_tls_module_base_symbol;
15278 }
15279 \f
15280 /* Calculate the length of the memory address in the instruction
15281 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15282
15283 int
15284 memory_address_length (rtx addr)
15285 {
15286 struct ix86_address parts;
15287 rtx base, index, disp;
15288 int len;
15289 int ok;
15290
15291 if (GET_CODE (addr) == PRE_DEC
15292 || GET_CODE (addr) == POST_INC
15293 || GET_CODE (addr) == PRE_MODIFY
15294 || GET_CODE (addr) == POST_MODIFY)
15295 return 0;
15296
15297 ok = ix86_decompose_address (addr, &parts);
15298 gcc_assert (ok);
15299
15300 if (parts.base && GET_CODE (parts.base) == SUBREG)
15301 parts.base = SUBREG_REG (parts.base);
15302 if (parts.index && GET_CODE (parts.index) == SUBREG)
15303 parts.index = SUBREG_REG (parts.index);
15304
15305 base = parts.base;
15306 index = parts.index;
15307 disp = parts.disp;
15308 len = 0;
15309
15310 /* Rule of thumb:
15311 - esp as the base always wants an index,
15312 - ebp as the base always wants a displacement. */
15313
15314 /* Register Indirect. */
15315 if (base && !index && !disp)
15316 {
15317 /* esp (for its index) and ebp (for its displacement) need
15318 the two-byte modrm form. */
15319 if (addr == stack_pointer_rtx
15320 || addr == arg_pointer_rtx
15321 || addr == frame_pointer_rtx
15322 || addr == hard_frame_pointer_rtx)
15323 len = 1;
15324 }
15325
15326 /* Direct Addressing. */
15327 else if (disp && !base && !index)
15328 len = 4;
15329
15330 else
15331 {
15332 /* Find the length of the displacement constant. */
15333 if (disp)
15334 {
15335 if (base && satisfies_constraint_K (disp))
15336 len = 1;
15337 else
15338 len = 4;
15339 }
15340 /* ebp always wants a displacement. */
15341 else if (base == hard_frame_pointer_rtx)
15342 len = 1;
15343
15344 /* An index requires the two-byte modrm form.... */
15345 if (index
15346 /* ...like esp, which always wants an index. */
15347 || base == stack_pointer_rtx
15348 || base == arg_pointer_rtx
15349 || base == frame_pointer_rtx)
15350 len += 1;
15351 }
15352
15353 return len;
15354 }
15355
15356 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15357 is set, expect that insn have 8bit immediate alternative. */
15358 int
15359 ix86_attr_length_immediate_default (rtx insn, int shortform)
15360 {
15361 int len = 0;
15362 int i;
15363 extract_insn_cached (insn);
15364 for (i = recog_data.n_operands - 1; i >= 0; --i)
15365 if (CONSTANT_P (recog_data.operand[i]))
15366 {
15367 gcc_assert (!len);
15368 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15369 len = 1;
15370 else
15371 {
15372 switch (get_attr_mode (insn))
15373 {
15374 case MODE_QI:
15375 len+=1;
15376 break;
15377 case MODE_HI:
15378 len+=2;
15379 break;
15380 case MODE_SI:
15381 len+=4;
15382 break;
15383 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15384 case MODE_DI:
15385 len+=4;
15386 break;
15387 default:
15388 fatal_insn ("unknown insn mode", insn);
15389 }
15390 }
15391 }
15392 return len;
15393 }
15394 /* Compute default value for "length_address" attribute. */
15395 int
15396 ix86_attr_length_address_default (rtx insn)
15397 {
15398 int i;
15399
15400 if (get_attr_type (insn) == TYPE_LEA)
15401 {
15402 rtx set = PATTERN (insn);
15403
15404 if (GET_CODE (set) == PARALLEL)
15405 set = XVECEXP (set, 0, 0);
15406
15407 gcc_assert (GET_CODE (set) == SET);
15408
15409 return memory_address_length (SET_SRC (set));
15410 }
15411
15412 extract_insn_cached (insn);
15413 for (i = recog_data.n_operands - 1; i >= 0; --i)
15414 if (MEM_P (recog_data.operand[i]))
15415 {
15416 return memory_address_length (XEXP (recog_data.operand[i], 0));
15417 break;
15418 }
15419 return 0;
15420 }
15421 \f
15422 /* Return the maximum number of instructions a cpu can issue. */
15423
15424 static int
15425 ix86_issue_rate (void)
15426 {
15427 switch (ix86_tune)
15428 {
15429 case PROCESSOR_PENTIUM:
15430 case PROCESSOR_K6:
15431 return 2;
15432
15433 case PROCESSOR_PENTIUMPRO:
15434 case PROCESSOR_PENTIUM4:
15435 case PROCESSOR_ATHLON:
15436 case PROCESSOR_K8:
15437 case PROCESSOR_AMDFAM10:
15438 case PROCESSOR_NOCONA:
15439 case PROCESSOR_GENERIC32:
15440 case PROCESSOR_GENERIC64:
15441 return 3;
15442
15443 case PROCESSOR_CORE2:
15444 return 4;
15445
15446 default:
15447 return 1;
15448 }
15449 }
15450
15451 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15452 by DEP_INSN and nothing set by DEP_INSN. */
15453
15454 static int
15455 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15456 {
15457 rtx set, set2;
15458
15459 /* Simplify the test for uninteresting insns. */
15460 if (insn_type != TYPE_SETCC
15461 && insn_type != TYPE_ICMOV
15462 && insn_type != TYPE_FCMOV
15463 && insn_type != TYPE_IBR)
15464 return 0;
15465
15466 if ((set = single_set (dep_insn)) != 0)
15467 {
15468 set = SET_DEST (set);
15469 set2 = NULL_RTX;
15470 }
15471 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15472 && XVECLEN (PATTERN (dep_insn), 0) == 2
15473 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15474 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15475 {
15476 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15477 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15478 }
15479 else
15480 return 0;
15481
15482 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15483 return 0;
15484
15485 /* This test is true if the dependent insn reads the flags but
15486 not any other potentially set register. */
15487 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15488 return 0;
15489
15490 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15491 return 0;
15492
15493 return 1;
15494 }
15495
15496 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15497 address with operands set by DEP_INSN. */
15498
15499 static int
15500 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15501 {
15502 rtx addr;
15503
15504 if (insn_type == TYPE_LEA
15505 && TARGET_PENTIUM)
15506 {
15507 addr = PATTERN (insn);
15508
15509 if (GET_CODE (addr) == PARALLEL)
15510 addr = XVECEXP (addr, 0, 0);
15511
15512 gcc_assert (GET_CODE (addr) == SET);
15513
15514 addr = SET_SRC (addr);
15515 }
15516 else
15517 {
15518 int i;
15519 extract_insn_cached (insn);
15520 for (i = recog_data.n_operands - 1; i >= 0; --i)
15521 if (MEM_P (recog_data.operand[i]))
15522 {
15523 addr = XEXP (recog_data.operand[i], 0);
15524 goto found;
15525 }
15526 return 0;
15527 found:;
15528 }
15529
15530 return modified_in_p (addr, dep_insn);
15531 }
15532
15533 static int
15534 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15535 {
15536 enum attr_type insn_type, dep_insn_type;
15537 enum attr_memory memory;
15538 rtx set, set2;
15539 int dep_insn_code_number;
15540
15541 /* Anti and output dependencies have zero cost on all CPUs. */
15542 if (REG_NOTE_KIND (link) != 0)
15543 return 0;
15544
15545 dep_insn_code_number = recog_memoized (dep_insn);
15546
15547 /* If we can't recognize the insns, we can't really do anything. */
15548 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15549 return cost;
15550
15551 insn_type = get_attr_type (insn);
15552 dep_insn_type = get_attr_type (dep_insn);
15553
15554 switch (ix86_tune)
15555 {
15556 case PROCESSOR_PENTIUM:
15557 /* Address Generation Interlock adds a cycle of latency. */
15558 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15559 cost += 1;
15560
15561 /* ??? Compares pair with jump/setcc. */
15562 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15563 cost = 0;
15564
15565 /* Floating point stores require value to be ready one cycle earlier. */
15566 if (insn_type == TYPE_FMOV
15567 && get_attr_memory (insn) == MEMORY_STORE
15568 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15569 cost += 1;
15570 break;
15571
15572 case PROCESSOR_PENTIUMPRO:
15573 memory = get_attr_memory (insn);
15574
15575 /* INT->FP conversion is expensive. */
15576 if (get_attr_fp_int_src (dep_insn))
15577 cost += 5;
15578
15579 /* There is one cycle extra latency between an FP op and a store. */
15580 if (insn_type == TYPE_FMOV
15581 && (set = single_set (dep_insn)) != NULL_RTX
15582 && (set2 = single_set (insn)) != NULL_RTX
15583 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15584 && MEM_P (SET_DEST (set2)))
15585 cost += 1;
15586
15587 /* Show ability of reorder buffer to hide latency of load by executing
15588 in parallel with previous instruction in case
15589 previous instruction is not needed to compute the address. */
15590 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15591 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15592 {
15593 /* Claim moves to take one cycle, as core can issue one load
15594 at time and the next load can start cycle later. */
15595 if (dep_insn_type == TYPE_IMOV
15596 || dep_insn_type == TYPE_FMOV)
15597 cost = 1;
15598 else if (cost > 1)
15599 cost--;
15600 }
15601 break;
15602
15603 case PROCESSOR_K6:
15604 memory = get_attr_memory (insn);
15605
15606 /* The esp dependency is resolved before the instruction is really
15607 finished. */
15608 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15609 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15610 return 1;
15611
15612 /* INT->FP conversion is expensive. */
15613 if (get_attr_fp_int_src (dep_insn))
15614 cost += 5;
15615
15616 /* Show ability of reorder buffer to hide latency of load by executing
15617 in parallel with previous instruction in case
15618 previous instruction is not needed to compute the address. */
15619 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15620 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15621 {
15622 /* Claim moves to take one cycle, as core can issue one load
15623 at time and the next load can start cycle later. */
15624 if (dep_insn_type == TYPE_IMOV
15625 || dep_insn_type == TYPE_FMOV)
15626 cost = 1;
15627 else if (cost > 2)
15628 cost -= 2;
15629 else
15630 cost = 1;
15631 }
15632 break;
15633
15634 case PROCESSOR_ATHLON:
15635 case PROCESSOR_K8:
15636 case PROCESSOR_AMDFAM10:
15637 case PROCESSOR_GENERIC32:
15638 case PROCESSOR_GENERIC64:
15639 memory = get_attr_memory (insn);
15640
15641 /* Show ability of reorder buffer to hide latency of load by executing
15642 in parallel with previous instruction in case
15643 previous instruction is not needed to compute the address. */
15644 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15645 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15646 {
15647 enum attr_unit unit = get_attr_unit (insn);
15648 int loadcost = 3;
15649
15650 /* Because of the difference between the length of integer and
15651 floating unit pipeline preparation stages, the memory operands
15652 for floating point are cheaper.
15653
15654 ??? For Athlon it the difference is most probably 2. */
15655 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15656 loadcost = 3;
15657 else
15658 loadcost = TARGET_ATHLON ? 2 : 0;
15659
15660 if (cost >= loadcost)
15661 cost -= loadcost;
15662 else
15663 cost = 0;
15664 }
15665
15666 default:
15667 break;
15668 }
15669
15670 return cost;
15671 }
15672
15673 /* How many alternative schedules to try. This should be as wide as the
15674 scheduling freedom in the DFA, but no wider. Making this value too
15675 large results extra work for the scheduler. */
15676
15677 static int
15678 ia32_multipass_dfa_lookahead (void)
15679 {
15680 if (ix86_tune == PROCESSOR_PENTIUM)
15681 return 2;
15682
15683 if (ix86_tune == PROCESSOR_PENTIUMPRO
15684 || ix86_tune == PROCESSOR_K6)
15685 return 1;
15686
15687 else
15688 return 0;
15689 }
15690
15691 \f
15692 /* Compute the alignment given to a constant that is being placed in memory.
15693 EXP is the constant and ALIGN is the alignment that the object would
15694 ordinarily have.
15695 The value of this function is used instead of that alignment to align
15696 the object. */
15697
15698 int
15699 ix86_constant_alignment (tree exp, int align)
15700 {
15701 if (TREE_CODE (exp) == REAL_CST)
15702 {
15703 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15704 return 64;
15705 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15706 return 128;
15707 }
15708 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15709 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15710 return BITS_PER_WORD;
15711
15712 return align;
15713 }
15714
15715 /* Compute the alignment for a static variable.
15716 TYPE is the data type, and ALIGN is the alignment that
15717 the object would ordinarily have. The value of this function is used
15718 instead of that alignment to align the object. */
15719
15720 int
15721 ix86_data_alignment (tree type, int align)
15722 {
15723 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15724
15725 if (AGGREGATE_TYPE_P (type)
15726 && TYPE_SIZE (type)
15727 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15728 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15729 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15730 && align < max_align)
15731 align = max_align;
15732
15733 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15734 to 16byte boundary. */
15735 if (TARGET_64BIT)
15736 {
15737 if (AGGREGATE_TYPE_P (type)
15738 && TYPE_SIZE (type)
15739 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15740 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15741 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15742 return 128;
15743 }
15744
15745 if (TREE_CODE (type) == ARRAY_TYPE)
15746 {
15747 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15748 return 64;
15749 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15750 return 128;
15751 }
15752 else if (TREE_CODE (type) == COMPLEX_TYPE)
15753 {
15754
15755 if (TYPE_MODE (type) == DCmode && align < 64)
15756 return 64;
15757 if (TYPE_MODE (type) == XCmode && align < 128)
15758 return 128;
15759 }
15760 else if ((TREE_CODE (type) == RECORD_TYPE
15761 || TREE_CODE (type) == UNION_TYPE
15762 || TREE_CODE (type) == QUAL_UNION_TYPE)
15763 && TYPE_FIELDS (type))
15764 {
15765 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15766 return 64;
15767 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15768 return 128;
15769 }
15770 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15771 || TREE_CODE (type) == INTEGER_TYPE)
15772 {
15773 if (TYPE_MODE (type) == DFmode && align < 64)
15774 return 64;
15775 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15776 return 128;
15777 }
15778
15779 return align;
15780 }
15781
15782 /* Compute the alignment for a local variable.
15783 TYPE is the data type, and ALIGN is the alignment that
15784 the object would ordinarily have. The value of this macro is used
15785 instead of that alignment to align the object. */
15786
15787 int
15788 ix86_local_alignment (tree type, int align)
15789 {
15790 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15791 to 16byte boundary. */
15792 if (TARGET_64BIT)
15793 {
15794 if (AGGREGATE_TYPE_P (type)
15795 && TYPE_SIZE (type)
15796 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15797 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15798 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15799 return 128;
15800 }
15801 if (TREE_CODE (type) == ARRAY_TYPE)
15802 {
15803 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15804 return 64;
15805 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15806 return 128;
15807 }
15808 else if (TREE_CODE (type) == COMPLEX_TYPE)
15809 {
15810 if (TYPE_MODE (type) == DCmode && align < 64)
15811 return 64;
15812 if (TYPE_MODE (type) == XCmode && align < 128)
15813 return 128;
15814 }
15815 else if ((TREE_CODE (type) == RECORD_TYPE
15816 || TREE_CODE (type) == UNION_TYPE
15817 || TREE_CODE (type) == QUAL_UNION_TYPE)
15818 && TYPE_FIELDS (type))
15819 {
15820 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15821 return 64;
15822 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15823 return 128;
15824 }
15825 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15826 || TREE_CODE (type) == INTEGER_TYPE)
15827 {
15828
15829 if (TYPE_MODE (type) == DFmode && align < 64)
15830 return 64;
15831 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15832 return 128;
15833 }
15834 return align;
15835 }
15836 \f
15837 /* Emit RTL insns to initialize the variable parts of a trampoline.
15838 FNADDR is an RTX for the address of the function's pure code.
15839 CXT is an RTX for the static chain value for the function. */
15840 void
15841 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15842 {
15843 if (!TARGET_64BIT)
15844 {
15845 /* Compute offset from the end of the jmp to the target function. */
15846 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15847 plus_constant (tramp, 10),
15848 NULL_RTX, 1, OPTAB_DIRECT);
15849 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15850 gen_int_mode (0xb9, QImode));
15851 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15852 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15853 gen_int_mode (0xe9, QImode));
15854 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15855 }
15856 else
15857 {
15858 int offset = 0;
15859 /* Try to load address using shorter movl instead of movabs.
15860 We may want to support movq for kernel mode, but kernel does not use
15861 trampolines at the moment. */
15862 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15863 {
15864 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15865 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15866 gen_int_mode (0xbb41, HImode));
15867 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15868 gen_lowpart (SImode, fnaddr));
15869 offset += 6;
15870 }
15871 else
15872 {
15873 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15874 gen_int_mode (0xbb49, HImode));
15875 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15876 fnaddr);
15877 offset += 10;
15878 }
15879 /* Load static chain using movabs to r10. */
15880 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15881 gen_int_mode (0xba49, HImode));
15882 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15883 cxt);
15884 offset += 10;
15885 /* Jump to the r11 */
15886 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15887 gen_int_mode (0xff49, HImode));
15888 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15889 gen_int_mode (0xe3, QImode));
15890 offset += 3;
15891 gcc_assert (offset <= TRAMPOLINE_SIZE);
15892 }
15893
15894 #ifdef ENABLE_EXECUTE_STACK
15895 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15896 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15897 #endif
15898 }
15899 \f
15900 /* Codes for all the SSE/MMX builtins. */
15901 enum ix86_builtins
15902 {
15903 IX86_BUILTIN_ADDPS,
15904 IX86_BUILTIN_ADDSS,
15905 IX86_BUILTIN_DIVPS,
15906 IX86_BUILTIN_DIVSS,
15907 IX86_BUILTIN_MULPS,
15908 IX86_BUILTIN_MULSS,
15909 IX86_BUILTIN_SUBPS,
15910 IX86_BUILTIN_SUBSS,
15911
15912 IX86_BUILTIN_CMPEQPS,
15913 IX86_BUILTIN_CMPLTPS,
15914 IX86_BUILTIN_CMPLEPS,
15915 IX86_BUILTIN_CMPGTPS,
15916 IX86_BUILTIN_CMPGEPS,
15917 IX86_BUILTIN_CMPNEQPS,
15918 IX86_BUILTIN_CMPNLTPS,
15919 IX86_BUILTIN_CMPNLEPS,
15920 IX86_BUILTIN_CMPNGTPS,
15921 IX86_BUILTIN_CMPNGEPS,
15922 IX86_BUILTIN_CMPORDPS,
15923 IX86_BUILTIN_CMPUNORDPS,
15924 IX86_BUILTIN_CMPEQSS,
15925 IX86_BUILTIN_CMPLTSS,
15926 IX86_BUILTIN_CMPLESS,
15927 IX86_BUILTIN_CMPNEQSS,
15928 IX86_BUILTIN_CMPNLTSS,
15929 IX86_BUILTIN_CMPNLESS,
15930 IX86_BUILTIN_CMPNGTSS,
15931 IX86_BUILTIN_CMPNGESS,
15932 IX86_BUILTIN_CMPORDSS,
15933 IX86_BUILTIN_CMPUNORDSS,
15934
15935 IX86_BUILTIN_COMIEQSS,
15936 IX86_BUILTIN_COMILTSS,
15937 IX86_BUILTIN_COMILESS,
15938 IX86_BUILTIN_COMIGTSS,
15939 IX86_BUILTIN_COMIGESS,
15940 IX86_BUILTIN_COMINEQSS,
15941 IX86_BUILTIN_UCOMIEQSS,
15942 IX86_BUILTIN_UCOMILTSS,
15943 IX86_BUILTIN_UCOMILESS,
15944 IX86_BUILTIN_UCOMIGTSS,
15945 IX86_BUILTIN_UCOMIGESS,
15946 IX86_BUILTIN_UCOMINEQSS,
15947
15948 IX86_BUILTIN_CVTPI2PS,
15949 IX86_BUILTIN_CVTPS2PI,
15950 IX86_BUILTIN_CVTSI2SS,
15951 IX86_BUILTIN_CVTSI642SS,
15952 IX86_BUILTIN_CVTSS2SI,
15953 IX86_BUILTIN_CVTSS2SI64,
15954 IX86_BUILTIN_CVTTPS2PI,
15955 IX86_BUILTIN_CVTTSS2SI,
15956 IX86_BUILTIN_CVTTSS2SI64,
15957
15958 IX86_BUILTIN_MAXPS,
15959 IX86_BUILTIN_MAXSS,
15960 IX86_BUILTIN_MINPS,
15961 IX86_BUILTIN_MINSS,
15962
15963 IX86_BUILTIN_LOADUPS,
15964 IX86_BUILTIN_STOREUPS,
15965 IX86_BUILTIN_MOVSS,
15966
15967 IX86_BUILTIN_MOVHLPS,
15968 IX86_BUILTIN_MOVLHPS,
15969 IX86_BUILTIN_LOADHPS,
15970 IX86_BUILTIN_LOADLPS,
15971 IX86_BUILTIN_STOREHPS,
15972 IX86_BUILTIN_STORELPS,
15973
15974 IX86_BUILTIN_MASKMOVQ,
15975 IX86_BUILTIN_MOVMSKPS,
15976 IX86_BUILTIN_PMOVMSKB,
15977
15978 IX86_BUILTIN_MOVNTPS,
15979 IX86_BUILTIN_MOVNTQ,
15980
15981 IX86_BUILTIN_LOADDQU,
15982 IX86_BUILTIN_STOREDQU,
15983
15984 IX86_BUILTIN_PACKSSWB,
15985 IX86_BUILTIN_PACKSSDW,
15986 IX86_BUILTIN_PACKUSWB,
15987
15988 IX86_BUILTIN_PADDB,
15989 IX86_BUILTIN_PADDW,
15990 IX86_BUILTIN_PADDD,
15991 IX86_BUILTIN_PADDQ,
15992 IX86_BUILTIN_PADDSB,
15993 IX86_BUILTIN_PADDSW,
15994 IX86_BUILTIN_PADDUSB,
15995 IX86_BUILTIN_PADDUSW,
15996 IX86_BUILTIN_PSUBB,
15997 IX86_BUILTIN_PSUBW,
15998 IX86_BUILTIN_PSUBD,
15999 IX86_BUILTIN_PSUBQ,
16000 IX86_BUILTIN_PSUBSB,
16001 IX86_BUILTIN_PSUBSW,
16002 IX86_BUILTIN_PSUBUSB,
16003 IX86_BUILTIN_PSUBUSW,
16004
16005 IX86_BUILTIN_PAND,
16006 IX86_BUILTIN_PANDN,
16007 IX86_BUILTIN_POR,
16008 IX86_BUILTIN_PXOR,
16009
16010 IX86_BUILTIN_PAVGB,
16011 IX86_BUILTIN_PAVGW,
16012
16013 IX86_BUILTIN_PCMPEQB,
16014 IX86_BUILTIN_PCMPEQW,
16015 IX86_BUILTIN_PCMPEQD,
16016 IX86_BUILTIN_PCMPGTB,
16017 IX86_BUILTIN_PCMPGTW,
16018 IX86_BUILTIN_PCMPGTD,
16019
16020 IX86_BUILTIN_PMADDWD,
16021
16022 IX86_BUILTIN_PMAXSW,
16023 IX86_BUILTIN_PMAXUB,
16024 IX86_BUILTIN_PMINSW,
16025 IX86_BUILTIN_PMINUB,
16026
16027 IX86_BUILTIN_PMULHUW,
16028 IX86_BUILTIN_PMULHW,
16029 IX86_BUILTIN_PMULLW,
16030
16031 IX86_BUILTIN_PSADBW,
16032 IX86_BUILTIN_PSHUFW,
16033
16034 IX86_BUILTIN_PSLLW,
16035 IX86_BUILTIN_PSLLD,
16036 IX86_BUILTIN_PSLLQ,
16037 IX86_BUILTIN_PSRAW,
16038 IX86_BUILTIN_PSRAD,
16039 IX86_BUILTIN_PSRLW,
16040 IX86_BUILTIN_PSRLD,
16041 IX86_BUILTIN_PSRLQ,
16042 IX86_BUILTIN_PSLLWI,
16043 IX86_BUILTIN_PSLLDI,
16044 IX86_BUILTIN_PSLLQI,
16045 IX86_BUILTIN_PSRAWI,
16046 IX86_BUILTIN_PSRADI,
16047 IX86_BUILTIN_PSRLWI,
16048 IX86_BUILTIN_PSRLDI,
16049 IX86_BUILTIN_PSRLQI,
16050
16051 IX86_BUILTIN_PUNPCKHBW,
16052 IX86_BUILTIN_PUNPCKHWD,
16053 IX86_BUILTIN_PUNPCKHDQ,
16054 IX86_BUILTIN_PUNPCKLBW,
16055 IX86_BUILTIN_PUNPCKLWD,
16056 IX86_BUILTIN_PUNPCKLDQ,
16057
16058 IX86_BUILTIN_SHUFPS,
16059
16060 IX86_BUILTIN_RCPPS,
16061 IX86_BUILTIN_RCPSS,
16062 IX86_BUILTIN_RSQRTPS,
16063 IX86_BUILTIN_RSQRTSS,
16064 IX86_BUILTIN_SQRTPS,
16065 IX86_BUILTIN_SQRTSS,
16066
16067 IX86_BUILTIN_UNPCKHPS,
16068 IX86_BUILTIN_UNPCKLPS,
16069
16070 IX86_BUILTIN_ANDPS,
16071 IX86_BUILTIN_ANDNPS,
16072 IX86_BUILTIN_ORPS,
16073 IX86_BUILTIN_XORPS,
16074
16075 IX86_BUILTIN_EMMS,
16076 IX86_BUILTIN_LDMXCSR,
16077 IX86_BUILTIN_STMXCSR,
16078 IX86_BUILTIN_SFENCE,
16079
16080 /* 3DNow! Original */
16081 IX86_BUILTIN_FEMMS,
16082 IX86_BUILTIN_PAVGUSB,
16083 IX86_BUILTIN_PF2ID,
16084 IX86_BUILTIN_PFACC,
16085 IX86_BUILTIN_PFADD,
16086 IX86_BUILTIN_PFCMPEQ,
16087 IX86_BUILTIN_PFCMPGE,
16088 IX86_BUILTIN_PFCMPGT,
16089 IX86_BUILTIN_PFMAX,
16090 IX86_BUILTIN_PFMIN,
16091 IX86_BUILTIN_PFMUL,
16092 IX86_BUILTIN_PFRCP,
16093 IX86_BUILTIN_PFRCPIT1,
16094 IX86_BUILTIN_PFRCPIT2,
16095 IX86_BUILTIN_PFRSQIT1,
16096 IX86_BUILTIN_PFRSQRT,
16097 IX86_BUILTIN_PFSUB,
16098 IX86_BUILTIN_PFSUBR,
16099 IX86_BUILTIN_PI2FD,
16100 IX86_BUILTIN_PMULHRW,
16101
16102 /* 3DNow! Athlon Extensions */
16103 IX86_BUILTIN_PF2IW,
16104 IX86_BUILTIN_PFNACC,
16105 IX86_BUILTIN_PFPNACC,
16106 IX86_BUILTIN_PI2FW,
16107 IX86_BUILTIN_PSWAPDSI,
16108 IX86_BUILTIN_PSWAPDSF,
16109
16110 /* SSE2 */
16111 IX86_BUILTIN_ADDPD,
16112 IX86_BUILTIN_ADDSD,
16113 IX86_BUILTIN_DIVPD,
16114 IX86_BUILTIN_DIVSD,
16115 IX86_BUILTIN_MULPD,
16116 IX86_BUILTIN_MULSD,
16117 IX86_BUILTIN_SUBPD,
16118 IX86_BUILTIN_SUBSD,
16119
16120 IX86_BUILTIN_CMPEQPD,
16121 IX86_BUILTIN_CMPLTPD,
16122 IX86_BUILTIN_CMPLEPD,
16123 IX86_BUILTIN_CMPGTPD,
16124 IX86_BUILTIN_CMPGEPD,
16125 IX86_BUILTIN_CMPNEQPD,
16126 IX86_BUILTIN_CMPNLTPD,
16127 IX86_BUILTIN_CMPNLEPD,
16128 IX86_BUILTIN_CMPNGTPD,
16129 IX86_BUILTIN_CMPNGEPD,
16130 IX86_BUILTIN_CMPORDPD,
16131 IX86_BUILTIN_CMPUNORDPD,
16132 IX86_BUILTIN_CMPEQSD,
16133 IX86_BUILTIN_CMPLTSD,
16134 IX86_BUILTIN_CMPLESD,
16135 IX86_BUILTIN_CMPNEQSD,
16136 IX86_BUILTIN_CMPNLTSD,
16137 IX86_BUILTIN_CMPNLESD,
16138 IX86_BUILTIN_CMPORDSD,
16139 IX86_BUILTIN_CMPUNORDSD,
16140
16141 IX86_BUILTIN_COMIEQSD,
16142 IX86_BUILTIN_COMILTSD,
16143 IX86_BUILTIN_COMILESD,
16144 IX86_BUILTIN_COMIGTSD,
16145 IX86_BUILTIN_COMIGESD,
16146 IX86_BUILTIN_COMINEQSD,
16147 IX86_BUILTIN_UCOMIEQSD,
16148 IX86_BUILTIN_UCOMILTSD,
16149 IX86_BUILTIN_UCOMILESD,
16150 IX86_BUILTIN_UCOMIGTSD,
16151 IX86_BUILTIN_UCOMIGESD,
16152 IX86_BUILTIN_UCOMINEQSD,
16153
16154 IX86_BUILTIN_MAXPD,
16155 IX86_BUILTIN_MAXSD,
16156 IX86_BUILTIN_MINPD,
16157 IX86_BUILTIN_MINSD,
16158
16159 IX86_BUILTIN_ANDPD,
16160 IX86_BUILTIN_ANDNPD,
16161 IX86_BUILTIN_ORPD,
16162 IX86_BUILTIN_XORPD,
16163
16164 IX86_BUILTIN_SQRTPD,
16165 IX86_BUILTIN_SQRTSD,
16166
16167 IX86_BUILTIN_UNPCKHPD,
16168 IX86_BUILTIN_UNPCKLPD,
16169
16170 IX86_BUILTIN_SHUFPD,
16171
16172 IX86_BUILTIN_LOADUPD,
16173 IX86_BUILTIN_STOREUPD,
16174 IX86_BUILTIN_MOVSD,
16175
16176 IX86_BUILTIN_LOADHPD,
16177 IX86_BUILTIN_LOADLPD,
16178
16179 IX86_BUILTIN_CVTDQ2PD,
16180 IX86_BUILTIN_CVTDQ2PS,
16181
16182 IX86_BUILTIN_CVTPD2DQ,
16183 IX86_BUILTIN_CVTPD2PI,
16184 IX86_BUILTIN_CVTPD2PS,
16185 IX86_BUILTIN_CVTTPD2DQ,
16186 IX86_BUILTIN_CVTTPD2PI,
16187
16188 IX86_BUILTIN_CVTPI2PD,
16189 IX86_BUILTIN_CVTSI2SD,
16190 IX86_BUILTIN_CVTSI642SD,
16191
16192 IX86_BUILTIN_CVTSD2SI,
16193 IX86_BUILTIN_CVTSD2SI64,
16194 IX86_BUILTIN_CVTSD2SS,
16195 IX86_BUILTIN_CVTSS2SD,
16196 IX86_BUILTIN_CVTTSD2SI,
16197 IX86_BUILTIN_CVTTSD2SI64,
16198
16199 IX86_BUILTIN_CVTPS2DQ,
16200 IX86_BUILTIN_CVTPS2PD,
16201 IX86_BUILTIN_CVTTPS2DQ,
16202
16203 IX86_BUILTIN_MOVNTI,
16204 IX86_BUILTIN_MOVNTPD,
16205 IX86_BUILTIN_MOVNTDQ,
16206
16207 /* SSE2 MMX */
16208 IX86_BUILTIN_MASKMOVDQU,
16209 IX86_BUILTIN_MOVMSKPD,
16210 IX86_BUILTIN_PMOVMSKB128,
16211
16212 IX86_BUILTIN_PACKSSWB128,
16213 IX86_BUILTIN_PACKSSDW128,
16214 IX86_BUILTIN_PACKUSWB128,
16215
16216 IX86_BUILTIN_PADDB128,
16217 IX86_BUILTIN_PADDW128,
16218 IX86_BUILTIN_PADDD128,
16219 IX86_BUILTIN_PADDQ128,
16220 IX86_BUILTIN_PADDSB128,
16221 IX86_BUILTIN_PADDSW128,
16222 IX86_BUILTIN_PADDUSB128,
16223 IX86_BUILTIN_PADDUSW128,
16224 IX86_BUILTIN_PSUBB128,
16225 IX86_BUILTIN_PSUBW128,
16226 IX86_BUILTIN_PSUBD128,
16227 IX86_BUILTIN_PSUBQ128,
16228 IX86_BUILTIN_PSUBSB128,
16229 IX86_BUILTIN_PSUBSW128,
16230 IX86_BUILTIN_PSUBUSB128,
16231 IX86_BUILTIN_PSUBUSW128,
16232
16233 IX86_BUILTIN_PAND128,
16234 IX86_BUILTIN_PANDN128,
16235 IX86_BUILTIN_POR128,
16236 IX86_BUILTIN_PXOR128,
16237
16238 IX86_BUILTIN_PAVGB128,
16239 IX86_BUILTIN_PAVGW128,
16240
16241 IX86_BUILTIN_PCMPEQB128,
16242 IX86_BUILTIN_PCMPEQW128,
16243 IX86_BUILTIN_PCMPEQD128,
16244 IX86_BUILTIN_PCMPGTB128,
16245 IX86_BUILTIN_PCMPGTW128,
16246 IX86_BUILTIN_PCMPGTD128,
16247
16248 IX86_BUILTIN_PMADDWD128,
16249
16250 IX86_BUILTIN_PMAXSW128,
16251 IX86_BUILTIN_PMAXUB128,
16252 IX86_BUILTIN_PMINSW128,
16253 IX86_BUILTIN_PMINUB128,
16254
16255 IX86_BUILTIN_PMULUDQ,
16256 IX86_BUILTIN_PMULUDQ128,
16257 IX86_BUILTIN_PMULHUW128,
16258 IX86_BUILTIN_PMULHW128,
16259 IX86_BUILTIN_PMULLW128,
16260
16261 IX86_BUILTIN_PSADBW128,
16262 IX86_BUILTIN_PSHUFHW,
16263 IX86_BUILTIN_PSHUFLW,
16264 IX86_BUILTIN_PSHUFD,
16265
16266 IX86_BUILTIN_PSLLDQI128,
16267 IX86_BUILTIN_PSLLWI128,
16268 IX86_BUILTIN_PSLLDI128,
16269 IX86_BUILTIN_PSLLQI128,
16270 IX86_BUILTIN_PSRAWI128,
16271 IX86_BUILTIN_PSRADI128,
16272 IX86_BUILTIN_PSRLDQI128,
16273 IX86_BUILTIN_PSRLWI128,
16274 IX86_BUILTIN_PSRLDI128,
16275 IX86_BUILTIN_PSRLQI128,
16276
16277 IX86_BUILTIN_PSLLDQ128,
16278 IX86_BUILTIN_PSLLW128,
16279 IX86_BUILTIN_PSLLD128,
16280 IX86_BUILTIN_PSLLQ128,
16281 IX86_BUILTIN_PSRAW128,
16282 IX86_BUILTIN_PSRAD128,
16283 IX86_BUILTIN_PSRLW128,
16284 IX86_BUILTIN_PSRLD128,
16285 IX86_BUILTIN_PSRLQ128,
16286
16287 IX86_BUILTIN_PUNPCKHBW128,
16288 IX86_BUILTIN_PUNPCKHWD128,
16289 IX86_BUILTIN_PUNPCKHDQ128,
16290 IX86_BUILTIN_PUNPCKHQDQ128,
16291 IX86_BUILTIN_PUNPCKLBW128,
16292 IX86_BUILTIN_PUNPCKLWD128,
16293 IX86_BUILTIN_PUNPCKLDQ128,
16294 IX86_BUILTIN_PUNPCKLQDQ128,
16295
16296 IX86_BUILTIN_CLFLUSH,
16297 IX86_BUILTIN_MFENCE,
16298 IX86_BUILTIN_LFENCE,
16299
16300 /* Prescott New Instructions. */
16301 IX86_BUILTIN_ADDSUBPS,
16302 IX86_BUILTIN_HADDPS,
16303 IX86_BUILTIN_HSUBPS,
16304 IX86_BUILTIN_MOVSHDUP,
16305 IX86_BUILTIN_MOVSLDUP,
16306 IX86_BUILTIN_ADDSUBPD,
16307 IX86_BUILTIN_HADDPD,
16308 IX86_BUILTIN_HSUBPD,
16309 IX86_BUILTIN_LDDQU,
16310
16311 IX86_BUILTIN_MONITOR,
16312 IX86_BUILTIN_MWAIT,
16313
16314 /* SSSE3. */
16315 IX86_BUILTIN_PHADDW,
16316 IX86_BUILTIN_PHADDD,
16317 IX86_BUILTIN_PHADDSW,
16318 IX86_BUILTIN_PHSUBW,
16319 IX86_BUILTIN_PHSUBD,
16320 IX86_BUILTIN_PHSUBSW,
16321 IX86_BUILTIN_PMADDUBSW,
16322 IX86_BUILTIN_PMULHRSW,
16323 IX86_BUILTIN_PSHUFB,
16324 IX86_BUILTIN_PSIGNB,
16325 IX86_BUILTIN_PSIGNW,
16326 IX86_BUILTIN_PSIGND,
16327 IX86_BUILTIN_PALIGNR,
16328 IX86_BUILTIN_PABSB,
16329 IX86_BUILTIN_PABSW,
16330 IX86_BUILTIN_PABSD,
16331
16332 IX86_BUILTIN_PHADDW128,
16333 IX86_BUILTIN_PHADDD128,
16334 IX86_BUILTIN_PHADDSW128,
16335 IX86_BUILTIN_PHSUBW128,
16336 IX86_BUILTIN_PHSUBD128,
16337 IX86_BUILTIN_PHSUBSW128,
16338 IX86_BUILTIN_PMADDUBSW128,
16339 IX86_BUILTIN_PMULHRSW128,
16340 IX86_BUILTIN_PSHUFB128,
16341 IX86_BUILTIN_PSIGNB128,
16342 IX86_BUILTIN_PSIGNW128,
16343 IX86_BUILTIN_PSIGND128,
16344 IX86_BUILTIN_PALIGNR128,
16345 IX86_BUILTIN_PABSB128,
16346 IX86_BUILTIN_PABSW128,
16347 IX86_BUILTIN_PABSD128,
16348
16349 /* AMDFAM10 - SSE4A New Instructions. */
16350 IX86_BUILTIN_MOVNTSD,
16351 IX86_BUILTIN_MOVNTSS,
16352 IX86_BUILTIN_EXTRQI,
16353 IX86_BUILTIN_EXTRQ,
16354 IX86_BUILTIN_INSERTQI,
16355 IX86_BUILTIN_INSERTQ,
16356
16357 IX86_BUILTIN_VEC_INIT_V2SI,
16358 IX86_BUILTIN_VEC_INIT_V4HI,
16359 IX86_BUILTIN_VEC_INIT_V8QI,
16360 IX86_BUILTIN_VEC_EXT_V2DF,
16361 IX86_BUILTIN_VEC_EXT_V2DI,
16362 IX86_BUILTIN_VEC_EXT_V4SF,
16363 IX86_BUILTIN_VEC_EXT_V4SI,
16364 IX86_BUILTIN_VEC_EXT_V8HI,
16365 IX86_BUILTIN_VEC_EXT_V2SI,
16366 IX86_BUILTIN_VEC_EXT_V4HI,
16367 IX86_BUILTIN_VEC_SET_V8HI,
16368 IX86_BUILTIN_VEC_SET_V4HI,
16369
16370 IX86_BUILTIN_MAX
16371 };
16372
16373 /* Table for the ix86 builtin decls. */
16374 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16375
16376 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16377 * if the target_flags include one of MASK. Stores the function decl
16378 * in the ix86_builtins array.
16379 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16380
16381 static inline tree
16382 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16383 {
16384 tree decl = NULL_TREE;
16385
16386 if (mask & target_flags
16387 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16388 {
16389 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16390 NULL, NULL_TREE);
16391 ix86_builtins[(int) code] = decl;
16392 }
16393
16394 return decl;
16395 }
16396
16397 /* Like def_builtin, but also marks the function decl "const". */
16398
16399 static inline tree
16400 def_builtin_const (int mask, const char *name, tree type,
16401 enum ix86_builtins code)
16402 {
16403 tree decl = def_builtin (mask, name, type, code);
16404 if (decl)
16405 TREE_READONLY (decl) = 1;
16406 return decl;
16407 }
16408
16409 /* Bits for builtin_description.flag. */
16410
16411 /* Set when we don't support the comparison natively, and should
16412 swap_comparison in order to support it. */
16413 #define BUILTIN_DESC_SWAP_OPERANDS 1
16414
16415 struct builtin_description
16416 {
16417 const unsigned int mask;
16418 const enum insn_code icode;
16419 const char *const name;
16420 const enum ix86_builtins code;
16421 const enum rtx_code comparison;
16422 const unsigned int flag;
16423 };
16424
16425 static const struct builtin_description bdesc_comi[] =
16426 {
16427 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16428 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16429 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16430 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16431 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16432 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16433 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16434 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16435 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16436 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16437 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16438 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16439 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16440 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16441 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16442 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16443 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16444 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16445 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16446 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16447 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16448 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16449 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16450 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16451 };
16452
16453 static const struct builtin_description bdesc_2arg[] =
16454 {
16455 /* SSE */
16456 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16457 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16458 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16459 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16460 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16461 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16462 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16463 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16464
16465 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16466 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16467 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16468 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16469 BUILTIN_DESC_SWAP_OPERANDS },
16470 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16471 BUILTIN_DESC_SWAP_OPERANDS },
16472 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16473 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16474 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16475 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16476 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16477 BUILTIN_DESC_SWAP_OPERANDS },
16478 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16479 BUILTIN_DESC_SWAP_OPERANDS },
16480 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16481 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16482 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16483 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16484 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16485 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16486 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16487 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16488 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16489 BUILTIN_DESC_SWAP_OPERANDS },
16490 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16491 BUILTIN_DESC_SWAP_OPERANDS },
16492 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
16493
16494 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16495 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16496 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16497 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16498
16499 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16500 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16501 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16502 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16503
16504 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16505 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16506 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16507 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16508 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16509
16510 /* MMX */
16511 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16512 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16513 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16514 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16515 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16516 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16517 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16518 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16519
16520 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16521 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16522 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16523 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16524 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16525 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16526 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16527 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16528
16529 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16530 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16531 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16532
16533 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16534 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16535 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16536 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16537
16538 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16539 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16540
16541 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16542 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16543 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16544 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16545 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16546 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16547
16548 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16549 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16550 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16551 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16552
16553 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16554 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16555 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16556 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16557 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16558 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16559
16560 /* Special. */
16561 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16562 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16563 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16564
16565 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16566 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16567 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16568
16569 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16570 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16571 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16572 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16573 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16574 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16575
16576 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16577 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16578 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16579 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16580 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16581 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16582
16583 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16584 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16585 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16586 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16587
16588 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16589 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16590
16591 /* SSE2 */
16592 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16593 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16594 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16595 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16596 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16597 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16598 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16599 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16600
16601 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16602 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16603 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16604 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16605 BUILTIN_DESC_SWAP_OPERANDS },
16606 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16607 BUILTIN_DESC_SWAP_OPERANDS },
16608 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16609 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16610 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16611 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16612 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16613 BUILTIN_DESC_SWAP_OPERANDS },
16614 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16615 BUILTIN_DESC_SWAP_OPERANDS },
16616 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16617 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16618 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16619 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16620 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16621 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16622 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16623 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16624 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16625
16626 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16627 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16628 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16629 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16630
16631 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16632 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16633 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16634 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16635
16636 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16637 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16638 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16639
16640 /* SSE2 MMX */
16641 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16642 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16643 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16644 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16645 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16646 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16647 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16648 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16649
16650 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16651 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16652 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16653 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16654 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16655 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16656 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16657 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16658
16659 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16660 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16661
16662 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16663 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16664 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16665 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16666
16667 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16668 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16669
16670 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16671 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16672 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16673 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16674 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16675 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16676
16677 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16678 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16679 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16680 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16681
16682 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16683 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16684 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16685 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16686 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16687 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16688 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16689 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16690
16691 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16692 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16693 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16694
16695 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16696 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16697
16698 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16699 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16700
16701 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16702 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16703 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16704
16705 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16706 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16707 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16708
16709 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16710 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16711
16712 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16713
16714 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16715 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16716 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16717 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16718
16719 /* SSE3 MMX */
16720 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16721 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16722 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16723 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16724 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16725 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16726
16727 /* SSSE3 */
16728 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16729 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16730 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16731 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16732 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16733 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16734 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16735 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16736 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16737 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16738 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16739 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16740 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16741 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16742 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16743 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16744 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16745 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16746 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16747 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16748 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16749 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16750 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16751 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16752 };
16753
16754 static const struct builtin_description bdesc_1arg[] =
16755 {
16756 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16757 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16758
16759 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16760 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16761 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16762
16763 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16764 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16765 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16766 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16767 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16768 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16769
16770 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16771 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16772
16773 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16774
16775 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16776 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16777
16778 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16779 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16780 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16781 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16782 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16783
16784 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16785
16786 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16787 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16788 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16789 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16790
16791 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16792 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16793 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16794
16795 /* SSE3 */
16796 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16797 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16798
16799 /* SSSE3 */
16800 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16801 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16802 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16803 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16804 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16805 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16806 };
16807
16808 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16809 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16810 builtins. */
16811 static void
16812 ix86_init_mmx_sse_builtins (void)
16813 {
16814 const struct builtin_description * d;
16815 size_t i;
16816
16817 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16818 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16819 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16820 tree V2DI_type_node
16821 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16822 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16823 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16824 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16825 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16826 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16827 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16828
16829 tree pchar_type_node = build_pointer_type (char_type_node);
16830 tree pcchar_type_node = build_pointer_type (
16831 build_type_variant (char_type_node, 1, 0));
16832 tree pfloat_type_node = build_pointer_type (float_type_node);
16833 tree pcfloat_type_node = build_pointer_type (
16834 build_type_variant (float_type_node, 1, 0));
16835 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16836 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16837 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16838
16839 /* Comparisons. */
16840 tree int_ftype_v4sf_v4sf
16841 = build_function_type_list (integer_type_node,
16842 V4SF_type_node, V4SF_type_node, NULL_TREE);
16843 tree v4si_ftype_v4sf_v4sf
16844 = build_function_type_list (V4SI_type_node,
16845 V4SF_type_node, V4SF_type_node, NULL_TREE);
16846 /* MMX/SSE/integer conversions. */
16847 tree int_ftype_v4sf
16848 = build_function_type_list (integer_type_node,
16849 V4SF_type_node, NULL_TREE);
16850 tree int64_ftype_v4sf
16851 = build_function_type_list (long_long_integer_type_node,
16852 V4SF_type_node, NULL_TREE);
16853 tree int_ftype_v8qi
16854 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16855 tree v4sf_ftype_v4sf_int
16856 = build_function_type_list (V4SF_type_node,
16857 V4SF_type_node, integer_type_node, NULL_TREE);
16858 tree v4sf_ftype_v4sf_int64
16859 = build_function_type_list (V4SF_type_node,
16860 V4SF_type_node, long_long_integer_type_node,
16861 NULL_TREE);
16862 tree v4sf_ftype_v4sf_v2si
16863 = build_function_type_list (V4SF_type_node,
16864 V4SF_type_node, V2SI_type_node, NULL_TREE);
16865
16866 /* Miscellaneous. */
16867 tree v8qi_ftype_v4hi_v4hi
16868 = build_function_type_list (V8QI_type_node,
16869 V4HI_type_node, V4HI_type_node, NULL_TREE);
16870 tree v4hi_ftype_v2si_v2si
16871 = build_function_type_list (V4HI_type_node,
16872 V2SI_type_node, V2SI_type_node, NULL_TREE);
16873 tree v4sf_ftype_v4sf_v4sf_int
16874 = build_function_type_list (V4SF_type_node,
16875 V4SF_type_node, V4SF_type_node,
16876 integer_type_node, NULL_TREE);
16877 tree v2si_ftype_v4hi_v4hi
16878 = build_function_type_list (V2SI_type_node,
16879 V4HI_type_node, V4HI_type_node, NULL_TREE);
16880 tree v4hi_ftype_v4hi_int
16881 = build_function_type_list (V4HI_type_node,
16882 V4HI_type_node, integer_type_node, NULL_TREE);
16883 tree v4hi_ftype_v4hi_di
16884 = build_function_type_list (V4HI_type_node,
16885 V4HI_type_node, long_long_unsigned_type_node,
16886 NULL_TREE);
16887 tree v2si_ftype_v2si_di
16888 = build_function_type_list (V2SI_type_node,
16889 V2SI_type_node, long_long_unsigned_type_node,
16890 NULL_TREE);
16891 tree void_ftype_void
16892 = build_function_type (void_type_node, void_list_node);
16893 tree void_ftype_unsigned
16894 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16895 tree void_ftype_unsigned_unsigned
16896 = build_function_type_list (void_type_node, unsigned_type_node,
16897 unsigned_type_node, NULL_TREE);
16898 tree void_ftype_pcvoid_unsigned_unsigned
16899 = build_function_type_list (void_type_node, const_ptr_type_node,
16900 unsigned_type_node, unsigned_type_node,
16901 NULL_TREE);
16902 tree unsigned_ftype_void
16903 = build_function_type (unsigned_type_node, void_list_node);
16904 tree v2si_ftype_v4sf
16905 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16906 /* Loads/stores. */
16907 tree void_ftype_v8qi_v8qi_pchar
16908 = build_function_type_list (void_type_node,
16909 V8QI_type_node, V8QI_type_node,
16910 pchar_type_node, NULL_TREE);
16911 tree v4sf_ftype_pcfloat
16912 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16913 /* @@@ the type is bogus */
16914 tree v4sf_ftype_v4sf_pv2si
16915 = build_function_type_list (V4SF_type_node,
16916 V4SF_type_node, pv2si_type_node, NULL_TREE);
16917 tree void_ftype_pv2si_v4sf
16918 = build_function_type_list (void_type_node,
16919 pv2si_type_node, V4SF_type_node, NULL_TREE);
16920 tree void_ftype_pfloat_v4sf
16921 = build_function_type_list (void_type_node,
16922 pfloat_type_node, V4SF_type_node, NULL_TREE);
16923 tree void_ftype_pdi_di
16924 = build_function_type_list (void_type_node,
16925 pdi_type_node, long_long_unsigned_type_node,
16926 NULL_TREE);
16927 tree void_ftype_pv2di_v2di
16928 = build_function_type_list (void_type_node,
16929 pv2di_type_node, V2DI_type_node, NULL_TREE);
16930 /* Normal vector unops. */
16931 tree v4sf_ftype_v4sf
16932 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16933 tree v16qi_ftype_v16qi
16934 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16935 tree v8hi_ftype_v8hi
16936 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16937 tree v4si_ftype_v4si
16938 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16939 tree v8qi_ftype_v8qi
16940 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16941 tree v4hi_ftype_v4hi
16942 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16943
16944 /* Normal vector binops. */
16945 tree v4sf_ftype_v4sf_v4sf
16946 = build_function_type_list (V4SF_type_node,
16947 V4SF_type_node, V4SF_type_node, NULL_TREE);
16948 tree v8qi_ftype_v8qi_v8qi
16949 = build_function_type_list (V8QI_type_node,
16950 V8QI_type_node, V8QI_type_node, NULL_TREE);
16951 tree v4hi_ftype_v4hi_v4hi
16952 = build_function_type_list (V4HI_type_node,
16953 V4HI_type_node, V4HI_type_node, NULL_TREE);
16954 tree v2si_ftype_v2si_v2si
16955 = build_function_type_list (V2SI_type_node,
16956 V2SI_type_node, V2SI_type_node, NULL_TREE);
16957 tree di_ftype_di_di
16958 = build_function_type_list (long_long_unsigned_type_node,
16959 long_long_unsigned_type_node,
16960 long_long_unsigned_type_node, NULL_TREE);
16961
16962 tree di_ftype_di_di_int
16963 = build_function_type_list (long_long_unsigned_type_node,
16964 long_long_unsigned_type_node,
16965 long_long_unsigned_type_node,
16966 integer_type_node, NULL_TREE);
16967
16968 tree v2si_ftype_v2sf
16969 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16970 tree v2sf_ftype_v2si
16971 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16972 tree v2si_ftype_v2si
16973 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16974 tree v2sf_ftype_v2sf
16975 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16976 tree v2sf_ftype_v2sf_v2sf
16977 = build_function_type_list (V2SF_type_node,
16978 V2SF_type_node, V2SF_type_node, NULL_TREE);
16979 tree v2si_ftype_v2sf_v2sf
16980 = build_function_type_list (V2SI_type_node,
16981 V2SF_type_node, V2SF_type_node, NULL_TREE);
16982 tree pint_type_node = build_pointer_type (integer_type_node);
16983 tree pdouble_type_node = build_pointer_type (double_type_node);
16984 tree pcdouble_type_node = build_pointer_type (
16985 build_type_variant (double_type_node, 1, 0));
16986 tree int_ftype_v2df_v2df
16987 = build_function_type_list (integer_type_node,
16988 V2DF_type_node, V2DF_type_node, NULL_TREE);
16989
16990 tree void_ftype_pcvoid
16991 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16992 tree v4sf_ftype_v4si
16993 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16994 tree v4si_ftype_v4sf
16995 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16996 tree v2df_ftype_v4si
16997 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16998 tree v4si_ftype_v2df
16999 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17000 tree v2si_ftype_v2df
17001 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17002 tree v4sf_ftype_v2df
17003 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17004 tree v2df_ftype_v2si
17005 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17006 tree v2df_ftype_v4sf
17007 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17008 tree int_ftype_v2df
17009 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17010 tree int64_ftype_v2df
17011 = build_function_type_list (long_long_integer_type_node,
17012 V2DF_type_node, NULL_TREE);
17013 tree v2df_ftype_v2df_int
17014 = build_function_type_list (V2DF_type_node,
17015 V2DF_type_node, integer_type_node, NULL_TREE);
17016 tree v2df_ftype_v2df_int64
17017 = build_function_type_list (V2DF_type_node,
17018 V2DF_type_node, long_long_integer_type_node,
17019 NULL_TREE);
17020 tree v4sf_ftype_v4sf_v2df
17021 = build_function_type_list (V4SF_type_node,
17022 V4SF_type_node, V2DF_type_node, NULL_TREE);
17023 tree v2df_ftype_v2df_v4sf
17024 = build_function_type_list (V2DF_type_node,
17025 V2DF_type_node, V4SF_type_node, NULL_TREE);
17026 tree v2df_ftype_v2df_v2df_int
17027 = build_function_type_list (V2DF_type_node,
17028 V2DF_type_node, V2DF_type_node,
17029 integer_type_node,
17030 NULL_TREE);
17031 tree v2df_ftype_v2df_pcdouble
17032 = build_function_type_list (V2DF_type_node,
17033 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17034 tree void_ftype_pdouble_v2df
17035 = build_function_type_list (void_type_node,
17036 pdouble_type_node, V2DF_type_node, NULL_TREE);
17037 tree void_ftype_pint_int
17038 = build_function_type_list (void_type_node,
17039 pint_type_node, integer_type_node, NULL_TREE);
17040 tree void_ftype_v16qi_v16qi_pchar
17041 = build_function_type_list (void_type_node,
17042 V16QI_type_node, V16QI_type_node,
17043 pchar_type_node, NULL_TREE);
17044 tree v2df_ftype_pcdouble
17045 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17046 tree v2df_ftype_v2df_v2df
17047 = build_function_type_list (V2DF_type_node,
17048 V2DF_type_node, V2DF_type_node, NULL_TREE);
17049 tree v16qi_ftype_v16qi_v16qi
17050 = build_function_type_list (V16QI_type_node,
17051 V16QI_type_node, V16QI_type_node, NULL_TREE);
17052 tree v8hi_ftype_v8hi_v8hi
17053 = build_function_type_list (V8HI_type_node,
17054 V8HI_type_node, V8HI_type_node, NULL_TREE);
17055 tree v4si_ftype_v4si_v4si
17056 = build_function_type_list (V4SI_type_node,
17057 V4SI_type_node, V4SI_type_node, NULL_TREE);
17058 tree v2di_ftype_v2di_v2di
17059 = build_function_type_list (V2DI_type_node,
17060 V2DI_type_node, V2DI_type_node, NULL_TREE);
17061 tree v2di_ftype_v2df_v2df
17062 = build_function_type_list (V2DI_type_node,
17063 V2DF_type_node, V2DF_type_node, NULL_TREE);
17064 tree v2df_ftype_v2df
17065 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17066 tree v2di_ftype_v2di_int
17067 = build_function_type_list (V2DI_type_node,
17068 V2DI_type_node, integer_type_node, NULL_TREE);
17069 tree v2di_ftype_v2di_v2di_int
17070 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17071 V2DI_type_node, integer_type_node, NULL_TREE);
17072 tree v4si_ftype_v4si_int
17073 = build_function_type_list (V4SI_type_node,
17074 V4SI_type_node, integer_type_node, NULL_TREE);
17075 tree v8hi_ftype_v8hi_int
17076 = build_function_type_list (V8HI_type_node,
17077 V8HI_type_node, integer_type_node, NULL_TREE);
17078 tree v4si_ftype_v8hi_v8hi
17079 = build_function_type_list (V4SI_type_node,
17080 V8HI_type_node, V8HI_type_node, NULL_TREE);
17081 tree di_ftype_v8qi_v8qi
17082 = build_function_type_list (long_long_unsigned_type_node,
17083 V8QI_type_node, V8QI_type_node, NULL_TREE);
17084 tree di_ftype_v2si_v2si
17085 = build_function_type_list (long_long_unsigned_type_node,
17086 V2SI_type_node, V2SI_type_node, NULL_TREE);
17087 tree v2di_ftype_v16qi_v16qi
17088 = build_function_type_list (V2DI_type_node,
17089 V16QI_type_node, V16QI_type_node, NULL_TREE);
17090 tree v2di_ftype_v4si_v4si
17091 = build_function_type_list (V2DI_type_node,
17092 V4SI_type_node, V4SI_type_node, NULL_TREE);
17093 tree int_ftype_v16qi
17094 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17095 tree v16qi_ftype_pcchar
17096 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17097 tree void_ftype_pchar_v16qi
17098 = build_function_type_list (void_type_node,
17099 pchar_type_node, V16QI_type_node, NULL_TREE);
17100
17101 tree v2di_ftype_v2di_unsigned_unsigned
17102 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17103 unsigned_type_node, unsigned_type_node,
17104 NULL_TREE);
17105 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17106 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17107 unsigned_type_node, unsigned_type_node,
17108 NULL_TREE);
17109 tree v2di_ftype_v2di_v16qi
17110 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17111 NULL_TREE);
17112
17113 tree float80_type;
17114 tree float128_type;
17115 tree ftype;
17116
17117 /* The __float80 type. */
17118 if (TYPE_MODE (long_double_type_node) == XFmode)
17119 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17120 "__float80");
17121 else
17122 {
17123 /* The __float80 type. */
17124 float80_type = make_node (REAL_TYPE);
17125 TYPE_PRECISION (float80_type) = 80;
17126 layout_type (float80_type);
17127 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17128 }
17129
17130 if (TARGET_64BIT)
17131 {
17132 float128_type = make_node (REAL_TYPE);
17133 TYPE_PRECISION (float128_type) = 128;
17134 layout_type (float128_type);
17135 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17136 }
17137
17138 /* Add all builtins that are more or less simple operations on two
17139 operands. */
17140 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17141 {
17142 /* Use one of the operands; the target can have a different mode for
17143 mask-generating compares. */
17144 enum machine_mode mode;
17145 tree type;
17146
17147 if (d->name == 0)
17148 continue;
17149 mode = insn_data[d->icode].operand[1].mode;
17150
17151 switch (mode)
17152 {
17153 case V16QImode:
17154 type = v16qi_ftype_v16qi_v16qi;
17155 break;
17156 case V8HImode:
17157 type = v8hi_ftype_v8hi_v8hi;
17158 break;
17159 case V4SImode:
17160 type = v4si_ftype_v4si_v4si;
17161 break;
17162 case V2DImode:
17163 type = v2di_ftype_v2di_v2di;
17164 break;
17165 case V2DFmode:
17166 type = v2df_ftype_v2df_v2df;
17167 break;
17168 case V4SFmode:
17169 type = v4sf_ftype_v4sf_v4sf;
17170 break;
17171 case V8QImode:
17172 type = v8qi_ftype_v8qi_v8qi;
17173 break;
17174 case V4HImode:
17175 type = v4hi_ftype_v4hi_v4hi;
17176 break;
17177 case V2SImode:
17178 type = v2si_ftype_v2si_v2si;
17179 break;
17180 case DImode:
17181 type = di_ftype_di_di;
17182 break;
17183
17184 default:
17185 gcc_unreachable ();
17186 }
17187
17188 /* Override for comparisons. */
17189 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17190 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17191 type = v4si_ftype_v4sf_v4sf;
17192
17193 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17194 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17195 type = v2di_ftype_v2df_v2df;
17196
17197 def_builtin (d->mask, d->name, type, d->code);
17198 }
17199
17200 /* Add all builtins that are more or less simple operations on 1 operand. */
17201 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17202 {
17203 enum machine_mode mode;
17204 tree type;
17205
17206 if (d->name == 0)
17207 continue;
17208 mode = insn_data[d->icode].operand[1].mode;
17209
17210 switch (mode)
17211 {
17212 case V16QImode:
17213 type = v16qi_ftype_v16qi;
17214 break;
17215 case V8HImode:
17216 type = v8hi_ftype_v8hi;
17217 break;
17218 case V4SImode:
17219 type = v4si_ftype_v4si;
17220 break;
17221 case V2DFmode:
17222 type = v2df_ftype_v2df;
17223 break;
17224 case V4SFmode:
17225 type = v4sf_ftype_v4sf;
17226 break;
17227 case V8QImode:
17228 type = v8qi_ftype_v8qi;
17229 break;
17230 case V4HImode:
17231 type = v4hi_ftype_v4hi;
17232 break;
17233 case V2SImode:
17234 type = v2si_ftype_v2si;
17235 break;
17236
17237 default:
17238 abort ();
17239 }
17240
17241 def_builtin (d->mask, d->name, type, d->code);
17242 }
17243
17244 /* Add the remaining MMX insns with somewhat more complicated types. */
17245 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17246 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17247 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17248 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17249
17250 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17251 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17252 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17253
17254 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17255 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17256
17257 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17258 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17259
17260 /* comi/ucomi insns. */
17261 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17262 if (d->mask == MASK_SSE2)
17263 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17264 else
17265 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17266
17267 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17268 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17269 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17270
17271 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17272 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17273 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17274 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17275 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17276 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17277 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17278 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17279 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17280 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17281 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17282
17283 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17284
17285 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17286 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17287
17288 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17289 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17290 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17291 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17292
17293 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17294 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17295 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17296 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17297
17298 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17299
17300 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17301
17302 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17303 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17304 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17305 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17306 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17307 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17308
17309 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17310
17311 /* Original 3DNow! */
17312 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17313 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17314 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17315 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17316 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17317 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17318 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17319 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17320 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17321 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17322 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17323 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17324 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17325 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17326 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17327 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17328 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17329 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17330 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17331 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17332
17333 /* 3DNow! extension as used in the Athlon CPU. */
17334 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17335 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17336 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17337 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17338 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17339 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17340
17341 /* SSE2 */
17342 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17343
17344 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17345 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17346
17347 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17348 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17349
17350 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17351 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17352 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17353 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17354 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17355
17356 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17357 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17358 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17359 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17360
17361 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17362 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17363
17364 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17365
17366 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17367 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17368
17369 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17370 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17371 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17372 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17373 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17374
17375 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17376
17377 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17378 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17379 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17380 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17381
17382 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17383 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17384 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17385
17386 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17387 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17388 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17389 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17390
17391 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17392 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17393 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17394
17395 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17396 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17397
17398 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17399 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17400
17401 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17402 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17403 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17404 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17405 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17406 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17407 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17408
17409 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17410 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17411 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17412 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17413 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17414 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17415 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17416
17417 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17418 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17419 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17420 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17421
17422 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17423
17424 /* Prescott New Instructions. */
17425 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17426 void_ftype_pcvoid_unsigned_unsigned,
17427 IX86_BUILTIN_MONITOR);
17428 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17429 void_ftype_unsigned_unsigned,
17430 IX86_BUILTIN_MWAIT);
17431 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17432 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17433
17434 /* SSSE3. */
17435 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17436 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17437 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17438 IX86_BUILTIN_PALIGNR);
17439
17440 /* AMDFAM10 SSE4A New built-ins */
17441 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17442 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17443 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17444 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17445 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17446 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17447 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17448 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17449 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17450 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17451 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17452 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17453
17454 /* Access to the vec_init patterns. */
17455 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17456 integer_type_node, NULL_TREE);
17457 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17458 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17459
17460 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17461 short_integer_type_node,
17462 short_integer_type_node,
17463 short_integer_type_node, NULL_TREE);
17464 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17465 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17466
17467 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17468 char_type_node, char_type_node,
17469 char_type_node, char_type_node,
17470 char_type_node, char_type_node,
17471 char_type_node, NULL_TREE);
17472 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17473 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17474
17475 /* Access to the vec_extract patterns. */
17476 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17477 integer_type_node, NULL_TREE);
17478 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17479 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17480
17481 ftype = build_function_type_list (long_long_integer_type_node,
17482 V2DI_type_node, integer_type_node,
17483 NULL_TREE);
17484 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17485 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17486
17487 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17488 integer_type_node, NULL_TREE);
17489 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17490 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17491
17492 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17493 integer_type_node, NULL_TREE);
17494 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17495 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17496
17497 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17498 integer_type_node, NULL_TREE);
17499 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17500 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17501
17502 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17503 integer_type_node, NULL_TREE);
17504 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17505 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17506
17507 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17508 integer_type_node, NULL_TREE);
17509 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17510 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17511
17512 /* Access to the vec_set patterns. */
17513 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17514 intHI_type_node,
17515 integer_type_node, NULL_TREE);
17516 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17517 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17518
17519 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17520 intHI_type_node,
17521 integer_type_node, NULL_TREE);
17522 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17523 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17524 }
17525
17526 static void
17527 ix86_init_builtins (void)
17528 {
17529 if (TARGET_MMX)
17530 ix86_init_mmx_sse_builtins ();
17531 }
17532
17533 /* Errors in the source file can cause expand_expr to return const0_rtx
17534 where we expect a vector. To avoid crashing, use one of the vector
17535 clear instructions. */
17536 static rtx
17537 safe_vector_operand (rtx x, enum machine_mode mode)
17538 {
17539 if (x == const0_rtx)
17540 x = CONST0_RTX (mode);
17541 return x;
17542 }
17543
17544 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17545
17546 static rtx
17547 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17548 {
17549 rtx pat, xops[3];
17550 tree arg0 = CALL_EXPR_ARG (exp, 0);
17551 tree arg1 = CALL_EXPR_ARG (exp, 1);
17552 rtx op0 = expand_normal (arg0);
17553 rtx op1 = expand_normal (arg1);
17554 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17555 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17556 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17557
17558 if (VECTOR_MODE_P (mode0))
17559 op0 = safe_vector_operand (op0, mode0);
17560 if (VECTOR_MODE_P (mode1))
17561 op1 = safe_vector_operand (op1, mode1);
17562
17563 if (optimize || !target
17564 || GET_MODE (target) != tmode
17565 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17566 target = gen_reg_rtx (tmode);
17567
17568 if (GET_MODE (op1) == SImode && mode1 == TImode)
17569 {
17570 rtx x = gen_reg_rtx (V4SImode);
17571 emit_insn (gen_sse2_loadd (x, op1));
17572 op1 = gen_lowpart (TImode, x);
17573 }
17574
17575 /* The insn must want input operands in the same modes as the
17576 result. */
17577 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17578 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17579
17580 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17581 op0 = copy_to_mode_reg (mode0, op0);
17582 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17583 op1 = copy_to_mode_reg (mode1, op1);
17584
17585 /* ??? Using ix86_fixup_binary_operands is problematic when
17586 we've got mismatched modes. Fake it. */
17587
17588 xops[0] = target;
17589 xops[1] = op0;
17590 xops[2] = op1;
17591
17592 if (tmode == mode0 && tmode == mode1)
17593 {
17594 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17595 op0 = xops[1];
17596 op1 = xops[2];
17597 }
17598 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17599 {
17600 op0 = force_reg (mode0, op0);
17601 op1 = force_reg (mode1, op1);
17602 target = gen_reg_rtx (tmode);
17603 }
17604
17605 pat = GEN_FCN (icode) (target, op0, op1);
17606 if (! pat)
17607 return 0;
17608 emit_insn (pat);
17609 return target;
17610 }
17611
17612 /* Subroutine of ix86_expand_builtin to take care of stores. */
17613
17614 static rtx
17615 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17616 {
17617 rtx pat;
17618 tree arg0 = CALL_EXPR_ARG (exp, 0);
17619 tree arg1 = CALL_EXPR_ARG (exp, 1);
17620 rtx op0 = expand_normal (arg0);
17621 rtx op1 = expand_normal (arg1);
17622 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17623 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17624
17625 if (VECTOR_MODE_P (mode1))
17626 op1 = safe_vector_operand (op1, mode1);
17627
17628 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17629 op1 = copy_to_mode_reg (mode1, op1);
17630
17631 pat = GEN_FCN (icode) (op0, op1);
17632 if (pat)
17633 emit_insn (pat);
17634 return 0;
17635 }
17636
17637 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17638
17639 static rtx
17640 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17641 rtx target, int do_load)
17642 {
17643 rtx pat;
17644 tree arg0 = CALL_EXPR_ARG (exp, 0);
17645 rtx op0 = expand_normal (arg0);
17646 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17647 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17648
17649 if (optimize || !target
17650 || GET_MODE (target) != tmode
17651 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17652 target = gen_reg_rtx (tmode);
17653 if (do_load)
17654 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17655 else
17656 {
17657 if (VECTOR_MODE_P (mode0))
17658 op0 = safe_vector_operand (op0, mode0);
17659
17660 if ((optimize && !register_operand (op0, mode0))
17661 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17662 op0 = copy_to_mode_reg (mode0, op0);
17663 }
17664
17665 pat = GEN_FCN (icode) (target, op0);
17666 if (! pat)
17667 return 0;
17668 emit_insn (pat);
17669 return target;
17670 }
17671
17672 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17673 sqrtss, rsqrtss, rcpss. */
17674
17675 static rtx
17676 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17677 {
17678 rtx pat;
17679 tree arg0 = CALL_EXPR_ARG (exp, 0);
17680 rtx op1, op0 = expand_normal (arg0);
17681 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17682 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17683
17684 if (optimize || !target
17685 || GET_MODE (target) != tmode
17686 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17687 target = gen_reg_rtx (tmode);
17688
17689 if (VECTOR_MODE_P (mode0))
17690 op0 = safe_vector_operand (op0, mode0);
17691
17692 if ((optimize && !register_operand (op0, mode0))
17693 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17694 op0 = copy_to_mode_reg (mode0, op0);
17695
17696 op1 = op0;
17697 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17698 op1 = copy_to_mode_reg (mode0, op1);
17699
17700 pat = GEN_FCN (icode) (target, op0, op1);
17701 if (! pat)
17702 return 0;
17703 emit_insn (pat);
17704 return target;
17705 }
17706
17707 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17708
17709 static rtx
17710 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17711 rtx target)
17712 {
17713 rtx pat;
17714 tree arg0 = CALL_EXPR_ARG (exp, 0);
17715 tree arg1 = CALL_EXPR_ARG (exp, 1);
17716 rtx op0 = expand_normal (arg0);
17717 rtx op1 = expand_normal (arg1);
17718 rtx op2;
17719 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17720 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17721 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17722 enum rtx_code comparison = d->comparison;
17723
17724 if (VECTOR_MODE_P (mode0))
17725 op0 = safe_vector_operand (op0, mode0);
17726 if (VECTOR_MODE_P (mode1))
17727 op1 = safe_vector_operand (op1, mode1);
17728
17729 /* Swap operands if we have a comparison that isn't available in
17730 hardware. */
17731 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17732 {
17733 rtx tmp = gen_reg_rtx (mode1);
17734 emit_move_insn (tmp, op1);
17735 op1 = op0;
17736 op0 = tmp;
17737 }
17738
17739 if (optimize || !target
17740 || GET_MODE (target) != tmode
17741 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17742 target = gen_reg_rtx (tmode);
17743
17744 if ((optimize && !register_operand (op0, mode0))
17745 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17746 op0 = copy_to_mode_reg (mode0, op0);
17747 if ((optimize && !register_operand (op1, mode1))
17748 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17749 op1 = copy_to_mode_reg (mode1, op1);
17750
17751 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17752 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17753 if (! pat)
17754 return 0;
17755 emit_insn (pat);
17756 return target;
17757 }
17758
17759 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17760
17761 static rtx
17762 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17763 rtx target)
17764 {
17765 rtx pat;
17766 tree arg0 = CALL_EXPR_ARG (exp, 0);
17767 tree arg1 = CALL_EXPR_ARG (exp, 1);
17768 rtx op0 = expand_normal (arg0);
17769 rtx op1 = expand_normal (arg1);
17770 rtx op2;
17771 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17772 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17773 enum rtx_code comparison = d->comparison;
17774
17775 if (VECTOR_MODE_P (mode0))
17776 op0 = safe_vector_operand (op0, mode0);
17777 if (VECTOR_MODE_P (mode1))
17778 op1 = safe_vector_operand (op1, mode1);
17779
17780 /* Swap operands if we have a comparison that isn't available in
17781 hardware. */
17782 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17783 {
17784 rtx tmp = op1;
17785 op1 = op0;
17786 op0 = tmp;
17787 }
17788
17789 target = gen_reg_rtx (SImode);
17790 emit_move_insn (target, const0_rtx);
17791 target = gen_rtx_SUBREG (QImode, target, 0);
17792
17793 if ((optimize && !register_operand (op0, mode0))
17794 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17795 op0 = copy_to_mode_reg (mode0, op0);
17796 if ((optimize && !register_operand (op1, mode1))
17797 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17798 op1 = copy_to_mode_reg (mode1, op1);
17799
17800 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17801 pat = GEN_FCN (d->icode) (op0, op1);
17802 if (! pat)
17803 return 0;
17804 emit_insn (pat);
17805 emit_insn (gen_rtx_SET (VOIDmode,
17806 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17807 gen_rtx_fmt_ee (comparison, QImode,
17808 SET_DEST (pat),
17809 const0_rtx)));
17810
17811 return SUBREG_REG (target);
17812 }
17813
17814 /* Return the integer constant in ARG. Constrain it to be in the range
17815 of the subparts of VEC_TYPE; issue an error if not. */
17816
17817 static int
17818 get_element_number (tree vec_type, tree arg)
17819 {
17820 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17821
17822 if (!host_integerp (arg, 1)
17823 || (elt = tree_low_cst (arg, 1), elt > max))
17824 {
17825 error ("selector must be an integer constant in the range 0..%wi", max);
17826 return 0;
17827 }
17828
17829 return elt;
17830 }
17831
17832 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17833 ix86_expand_vector_init. We DO have language-level syntax for this, in
17834 the form of (type){ init-list }. Except that since we can't place emms
17835 instructions from inside the compiler, we can't allow the use of MMX
17836 registers unless the user explicitly asks for it. So we do *not* define
17837 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17838 we have builtins invoked by mmintrin.h that gives us license to emit
17839 these sorts of instructions. */
17840
17841 static rtx
17842 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17843 {
17844 enum machine_mode tmode = TYPE_MODE (type);
17845 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17846 int i, n_elt = GET_MODE_NUNITS (tmode);
17847 rtvec v = rtvec_alloc (n_elt);
17848
17849 gcc_assert (VECTOR_MODE_P (tmode));
17850 gcc_assert (call_expr_nargs (exp) == n_elt);
17851
17852 for (i = 0; i < n_elt; ++i)
17853 {
17854 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17855 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17856 }
17857
17858 if (!target || !register_operand (target, tmode))
17859 target = gen_reg_rtx (tmode);
17860
17861 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17862 return target;
17863 }
17864
17865 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17866 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17867 had a language-level syntax for referencing vector elements. */
17868
17869 static rtx
17870 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17871 {
17872 enum machine_mode tmode, mode0;
17873 tree arg0, arg1;
17874 int elt;
17875 rtx op0;
17876
17877 arg0 = CALL_EXPR_ARG (exp, 0);
17878 arg1 = CALL_EXPR_ARG (exp, 1);
17879
17880 op0 = expand_normal (arg0);
17881 elt = get_element_number (TREE_TYPE (arg0), arg1);
17882
17883 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17884 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17885 gcc_assert (VECTOR_MODE_P (mode0));
17886
17887 op0 = force_reg (mode0, op0);
17888
17889 if (optimize || !target || !register_operand (target, tmode))
17890 target = gen_reg_rtx (tmode);
17891
17892 ix86_expand_vector_extract (true, target, op0, elt);
17893
17894 return target;
17895 }
17896
17897 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17898 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17899 a language-level syntax for referencing vector elements. */
17900
17901 static rtx
17902 ix86_expand_vec_set_builtin (tree exp)
17903 {
17904 enum machine_mode tmode, mode1;
17905 tree arg0, arg1, arg2;
17906 int elt;
17907 rtx op0, op1, target;
17908
17909 arg0 = CALL_EXPR_ARG (exp, 0);
17910 arg1 = CALL_EXPR_ARG (exp, 1);
17911 arg2 = CALL_EXPR_ARG (exp, 2);
17912
17913 tmode = TYPE_MODE (TREE_TYPE (arg0));
17914 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17915 gcc_assert (VECTOR_MODE_P (tmode));
17916
17917 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17918 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17919 elt = get_element_number (TREE_TYPE (arg0), arg2);
17920
17921 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17922 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17923
17924 op0 = force_reg (tmode, op0);
17925 op1 = force_reg (mode1, op1);
17926
17927 /* OP0 is the source of these builtin functions and shouldn't be
17928 modified. Create a copy, use it and return it as target. */
17929 target = gen_reg_rtx (tmode);
17930 emit_move_insn (target, op0);
17931 ix86_expand_vector_set (true, target, op1, elt);
17932
17933 return target;
17934 }
17935
17936 /* Expand an expression EXP that calls a built-in function,
17937 with result going to TARGET if that's convenient
17938 (and in mode MODE if that's convenient).
17939 SUBTARGET may be used as the target for computing one of EXP's operands.
17940 IGNORE is nonzero if the value is to be ignored. */
17941
17942 static rtx
17943 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17944 enum machine_mode mode ATTRIBUTE_UNUSED,
17945 int ignore ATTRIBUTE_UNUSED)
17946 {
17947 const struct builtin_description *d;
17948 size_t i;
17949 enum insn_code icode;
17950 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17951 tree arg0, arg1, arg2, arg3;
17952 rtx op0, op1, op2, op3, pat;
17953 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17954 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17955
17956 switch (fcode)
17957 {
17958 case IX86_BUILTIN_EMMS:
17959 emit_insn (gen_mmx_emms ());
17960 return 0;
17961
17962 case IX86_BUILTIN_SFENCE:
17963 emit_insn (gen_sse_sfence ());
17964 return 0;
17965
17966 case IX86_BUILTIN_MASKMOVQ:
17967 case IX86_BUILTIN_MASKMOVDQU:
17968 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17969 ? CODE_FOR_mmx_maskmovq
17970 : CODE_FOR_sse2_maskmovdqu);
17971 /* Note the arg order is different from the operand order. */
17972 arg1 = CALL_EXPR_ARG (exp, 0);
17973 arg2 = CALL_EXPR_ARG (exp, 1);
17974 arg0 = CALL_EXPR_ARG (exp, 2);
17975 op0 = expand_normal (arg0);
17976 op1 = expand_normal (arg1);
17977 op2 = expand_normal (arg2);
17978 mode0 = insn_data[icode].operand[0].mode;
17979 mode1 = insn_data[icode].operand[1].mode;
17980 mode2 = insn_data[icode].operand[2].mode;
17981
17982 op0 = force_reg (Pmode, op0);
17983 op0 = gen_rtx_MEM (mode1, op0);
17984
17985 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17986 op0 = copy_to_mode_reg (mode0, op0);
17987 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17988 op1 = copy_to_mode_reg (mode1, op1);
17989 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17990 op2 = copy_to_mode_reg (mode2, op2);
17991 pat = GEN_FCN (icode) (op0, op1, op2);
17992 if (! pat)
17993 return 0;
17994 emit_insn (pat);
17995 return 0;
17996
17997 case IX86_BUILTIN_SQRTSS:
17998 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17999 case IX86_BUILTIN_RSQRTSS:
18000 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
18001 case IX86_BUILTIN_RCPSS:
18002 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
18003
18004 case IX86_BUILTIN_LOADUPS:
18005 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
18006
18007 case IX86_BUILTIN_STOREUPS:
18008 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
18009
18010 case IX86_BUILTIN_LOADHPS:
18011 case IX86_BUILTIN_LOADLPS:
18012 case IX86_BUILTIN_LOADHPD:
18013 case IX86_BUILTIN_LOADLPD:
18014 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
18015 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
18016 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
18017 : CODE_FOR_sse2_loadlpd);
18018 arg0 = CALL_EXPR_ARG (exp, 0);
18019 arg1 = CALL_EXPR_ARG (exp, 1);
18020 op0 = expand_normal (arg0);
18021 op1 = expand_normal (arg1);
18022 tmode = insn_data[icode].operand[0].mode;
18023 mode0 = insn_data[icode].operand[1].mode;
18024 mode1 = insn_data[icode].operand[2].mode;
18025
18026 op0 = force_reg (mode0, op0);
18027 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
18028 if (optimize || target == 0
18029 || GET_MODE (target) != tmode
18030 || !register_operand (target, tmode))
18031 target = gen_reg_rtx (tmode);
18032 pat = GEN_FCN (icode) (target, op0, op1);
18033 if (! pat)
18034 return 0;
18035 emit_insn (pat);
18036 return target;
18037
18038 case IX86_BUILTIN_STOREHPS:
18039 case IX86_BUILTIN_STORELPS:
18040 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
18041 : CODE_FOR_sse_storelps);
18042 arg0 = CALL_EXPR_ARG (exp, 0);
18043 arg1 = CALL_EXPR_ARG (exp, 1);
18044 op0 = expand_normal (arg0);
18045 op1 = expand_normal (arg1);
18046 mode0 = insn_data[icode].operand[0].mode;
18047 mode1 = insn_data[icode].operand[1].mode;
18048
18049 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18050 op1 = force_reg (mode1, op1);
18051
18052 pat = GEN_FCN (icode) (op0, op1);
18053 if (! pat)
18054 return 0;
18055 emit_insn (pat);
18056 return const0_rtx;
18057
18058 case IX86_BUILTIN_MOVNTPS:
18059 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18060 case IX86_BUILTIN_MOVNTQ:
18061 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18062
18063 case IX86_BUILTIN_LDMXCSR:
18064 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18065 target = assign_386_stack_local (SImode, SLOT_TEMP);
18066 emit_move_insn (target, op0);
18067 emit_insn (gen_sse_ldmxcsr (target));
18068 return 0;
18069
18070 case IX86_BUILTIN_STMXCSR:
18071 target = assign_386_stack_local (SImode, SLOT_TEMP);
18072 emit_insn (gen_sse_stmxcsr (target));
18073 return copy_to_mode_reg (SImode, target);
18074
18075 case IX86_BUILTIN_SHUFPS:
18076 case IX86_BUILTIN_SHUFPD:
18077 icode = (fcode == IX86_BUILTIN_SHUFPS
18078 ? CODE_FOR_sse_shufps
18079 : CODE_FOR_sse2_shufpd);
18080 arg0 = CALL_EXPR_ARG (exp, 0);
18081 arg1 = CALL_EXPR_ARG (exp, 1);
18082 arg2 = CALL_EXPR_ARG (exp, 2);
18083 op0 = expand_normal (arg0);
18084 op1 = expand_normal (arg1);
18085 op2 = expand_normal (arg2);
18086 tmode = insn_data[icode].operand[0].mode;
18087 mode0 = insn_data[icode].operand[1].mode;
18088 mode1 = insn_data[icode].operand[2].mode;
18089 mode2 = insn_data[icode].operand[3].mode;
18090
18091 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18092 op0 = copy_to_mode_reg (mode0, op0);
18093 if ((optimize && !register_operand (op1, mode1))
18094 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18095 op1 = copy_to_mode_reg (mode1, op1);
18096 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18097 {
18098 /* @@@ better error message */
18099 error ("mask must be an immediate");
18100 return gen_reg_rtx (tmode);
18101 }
18102 if (optimize || target == 0
18103 || GET_MODE (target) != tmode
18104 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18105 target = gen_reg_rtx (tmode);
18106 pat = GEN_FCN (icode) (target, op0, op1, op2);
18107 if (! pat)
18108 return 0;
18109 emit_insn (pat);
18110 return target;
18111
18112 case IX86_BUILTIN_PSHUFW:
18113 case IX86_BUILTIN_PSHUFD:
18114 case IX86_BUILTIN_PSHUFHW:
18115 case IX86_BUILTIN_PSHUFLW:
18116 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18117 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18118 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18119 : CODE_FOR_mmx_pshufw);
18120 arg0 = CALL_EXPR_ARG (exp, 0);
18121 arg1 = CALL_EXPR_ARG (exp, 1);
18122 op0 = expand_normal (arg0);
18123 op1 = expand_normal (arg1);
18124 tmode = insn_data[icode].operand[0].mode;
18125 mode1 = insn_data[icode].operand[1].mode;
18126 mode2 = insn_data[icode].operand[2].mode;
18127
18128 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18129 op0 = copy_to_mode_reg (mode1, op0);
18130 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18131 {
18132 /* @@@ better error message */
18133 error ("mask must be an immediate");
18134 return const0_rtx;
18135 }
18136 if (target == 0
18137 || GET_MODE (target) != tmode
18138 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18139 target = gen_reg_rtx (tmode);
18140 pat = GEN_FCN (icode) (target, op0, op1);
18141 if (! pat)
18142 return 0;
18143 emit_insn (pat);
18144 return target;
18145
18146 case IX86_BUILTIN_PSLLWI128:
18147 icode = CODE_FOR_ashlv8hi3;
18148 goto do_pshifti;
18149 case IX86_BUILTIN_PSLLDI128:
18150 icode = CODE_FOR_ashlv4si3;
18151 goto do_pshifti;
18152 case IX86_BUILTIN_PSLLQI128:
18153 icode = CODE_FOR_ashlv2di3;
18154 goto do_pshifti;
18155 case IX86_BUILTIN_PSRAWI128:
18156 icode = CODE_FOR_ashrv8hi3;
18157 goto do_pshifti;
18158 case IX86_BUILTIN_PSRADI128:
18159 icode = CODE_FOR_ashrv4si3;
18160 goto do_pshifti;
18161 case IX86_BUILTIN_PSRLWI128:
18162 icode = CODE_FOR_lshrv8hi3;
18163 goto do_pshifti;
18164 case IX86_BUILTIN_PSRLDI128:
18165 icode = CODE_FOR_lshrv4si3;
18166 goto do_pshifti;
18167 case IX86_BUILTIN_PSRLQI128:
18168 icode = CODE_FOR_lshrv2di3;
18169 goto do_pshifti;
18170 do_pshifti:
18171 arg0 = CALL_EXPR_ARG (exp, 0);
18172 arg1 = CALL_EXPR_ARG (exp, 1);
18173 op0 = expand_normal (arg0);
18174 op1 = expand_normal (arg1);
18175
18176 if (!CONST_INT_P (op1))
18177 {
18178 error ("shift must be an immediate");
18179 return const0_rtx;
18180 }
18181 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
18182 op1 = GEN_INT (255);
18183
18184 tmode = insn_data[icode].operand[0].mode;
18185 mode1 = insn_data[icode].operand[1].mode;
18186 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18187 op0 = copy_to_reg (op0);
18188
18189 target = gen_reg_rtx (tmode);
18190 pat = GEN_FCN (icode) (target, op0, op1);
18191 if (!pat)
18192 return 0;
18193 emit_insn (pat);
18194 return target;
18195
18196 case IX86_BUILTIN_PSLLW128:
18197 icode = CODE_FOR_ashlv8hi3;
18198 goto do_pshift;
18199 case IX86_BUILTIN_PSLLD128:
18200 icode = CODE_FOR_ashlv4si3;
18201 goto do_pshift;
18202 case IX86_BUILTIN_PSLLQ128:
18203 icode = CODE_FOR_ashlv2di3;
18204 goto do_pshift;
18205 case IX86_BUILTIN_PSRAW128:
18206 icode = CODE_FOR_ashrv8hi3;
18207 goto do_pshift;
18208 case IX86_BUILTIN_PSRAD128:
18209 icode = CODE_FOR_ashrv4si3;
18210 goto do_pshift;
18211 case IX86_BUILTIN_PSRLW128:
18212 icode = CODE_FOR_lshrv8hi3;
18213 goto do_pshift;
18214 case IX86_BUILTIN_PSRLD128:
18215 icode = CODE_FOR_lshrv4si3;
18216 goto do_pshift;
18217 case IX86_BUILTIN_PSRLQ128:
18218 icode = CODE_FOR_lshrv2di3;
18219 goto do_pshift;
18220 do_pshift:
18221 arg0 = CALL_EXPR_ARG (exp, 0);
18222 arg1 = CALL_EXPR_ARG (exp, 1);
18223 op0 = expand_normal (arg0);
18224 op1 = expand_normal (arg1);
18225
18226 tmode = insn_data[icode].operand[0].mode;
18227 mode1 = insn_data[icode].operand[1].mode;
18228
18229 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18230 op0 = copy_to_reg (op0);
18231
18232 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
18233 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
18234 op1 = copy_to_reg (op1);
18235
18236 target = gen_reg_rtx (tmode);
18237 pat = GEN_FCN (icode) (target, op0, op1);
18238 if (!pat)
18239 return 0;
18240 emit_insn (pat);
18241 return target;
18242
18243 case IX86_BUILTIN_PSLLDQI128:
18244 case IX86_BUILTIN_PSRLDQI128:
18245 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18246 : CODE_FOR_sse2_lshrti3);
18247 arg0 = CALL_EXPR_ARG (exp, 0);
18248 arg1 = CALL_EXPR_ARG (exp, 1);
18249 op0 = expand_normal (arg0);
18250 op1 = expand_normal (arg1);
18251 tmode = insn_data[icode].operand[0].mode;
18252 mode1 = insn_data[icode].operand[1].mode;
18253 mode2 = insn_data[icode].operand[2].mode;
18254
18255 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18256 {
18257 op0 = copy_to_reg (op0);
18258 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18259 }
18260 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18261 {
18262 error ("shift must be an immediate");
18263 return const0_rtx;
18264 }
18265 target = gen_reg_rtx (V2DImode);
18266 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
18267 op0, op1);
18268 if (! pat)
18269 return 0;
18270 emit_insn (pat);
18271 return target;
18272
18273 case IX86_BUILTIN_FEMMS:
18274 emit_insn (gen_mmx_femms ());
18275 return NULL_RTX;
18276
18277 case IX86_BUILTIN_PAVGUSB:
18278 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18279
18280 case IX86_BUILTIN_PF2ID:
18281 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18282
18283 case IX86_BUILTIN_PFACC:
18284 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18285
18286 case IX86_BUILTIN_PFADD:
18287 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18288
18289 case IX86_BUILTIN_PFCMPEQ:
18290 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18291
18292 case IX86_BUILTIN_PFCMPGE:
18293 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18294
18295 case IX86_BUILTIN_PFCMPGT:
18296 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18297
18298 case IX86_BUILTIN_PFMAX:
18299 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18300
18301 case IX86_BUILTIN_PFMIN:
18302 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18303
18304 case IX86_BUILTIN_PFMUL:
18305 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18306
18307 case IX86_BUILTIN_PFRCP:
18308 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18309
18310 case IX86_BUILTIN_PFRCPIT1:
18311 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18312
18313 case IX86_BUILTIN_PFRCPIT2:
18314 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18315
18316 case IX86_BUILTIN_PFRSQIT1:
18317 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18318
18319 case IX86_BUILTIN_PFRSQRT:
18320 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18321
18322 case IX86_BUILTIN_PFSUB:
18323 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18324
18325 case IX86_BUILTIN_PFSUBR:
18326 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18327
18328 case IX86_BUILTIN_PI2FD:
18329 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18330
18331 case IX86_BUILTIN_PMULHRW:
18332 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18333
18334 case IX86_BUILTIN_PF2IW:
18335 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18336
18337 case IX86_BUILTIN_PFNACC:
18338 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18339
18340 case IX86_BUILTIN_PFPNACC:
18341 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18342
18343 case IX86_BUILTIN_PI2FW:
18344 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18345
18346 case IX86_BUILTIN_PSWAPDSI:
18347 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18348
18349 case IX86_BUILTIN_PSWAPDSF:
18350 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18351
18352 case IX86_BUILTIN_SQRTSD:
18353 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18354 case IX86_BUILTIN_LOADUPD:
18355 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18356 case IX86_BUILTIN_STOREUPD:
18357 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18358
18359 case IX86_BUILTIN_MFENCE:
18360 emit_insn (gen_sse2_mfence ());
18361 return 0;
18362 case IX86_BUILTIN_LFENCE:
18363 emit_insn (gen_sse2_lfence ());
18364 return 0;
18365
18366 case IX86_BUILTIN_CLFLUSH:
18367 arg0 = CALL_EXPR_ARG (exp, 0);
18368 op0 = expand_normal (arg0);
18369 icode = CODE_FOR_sse2_clflush;
18370 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18371 op0 = copy_to_mode_reg (Pmode, op0);
18372
18373 emit_insn (gen_sse2_clflush (op0));
18374 return 0;
18375
18376 case IX86_BUILTIN_MOVNTPD:
18377 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18378 case IX86_BUILTIN_MOVNTDQ:
18379 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18380 case IX86_BUILTIN_MOVNTI:
18381 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18382
18383 case IX86_BUILTIN_LOADDQU:
18384 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18385 case IX86_BUILTIN_STOREDQU:
18386 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18387
18388 case IX86_BUILTIN_MONITOR:
18389 arg0 = CALL_EXPR_ARG (exp, 0);
18390 arg1 = CALL_EXPR_ARG (exp, 1);
18391 arg2 = CALL_EXPR_ARG (exp, 2);
18392 op0 = expand_normal (arg0);
18393 op1 = expand_normal (arg1);
18394 op2 = expand_normal (arg2);
18395 if (!REG_P (op0))
18396 op0 = copy_to_mode_reg (Pmode, op0);
18397 if (!REG_P (op1))
18398 op1 = copy_to_mode_reg (SImode, op1);
18399 if (!REG_P (op2))
18400 op2 = copy_to_mode_reg (SImode, op2);
18401 if (!TARGET_64BIT)
18402 emit_insn (gen_sse3_monitor (op0, op1, op2));
18403 else
18404 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18405 return 0;
18406
18407 case IX86_BUILTIN_MWAIT:
18408 arg0 = CALL_EXPR_ARG (exp, 0);
18409 arg1 = CALL_EXPR_ARG (exp, 1);
18410 op0 = expand_normal (arg0);
18411 op1 = expand_normal (arg1);
18412 if (!REG_P (op0))
18413 op0 = copy_to_mode_reg (SImode, op0);
18414 if (!REG_P (op1))
18415 op1 = copy_to_mode_reg (SImode, op1);
18416 emit_insn (gen_sse3_mwait (op0, op1));
18417 return 0;
18418
18419 case IX86_BUILTIN_LDDQU:
18420 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18421 target, 1);
18422
18423 case IX86_BUILTIN_PALIGNR:
18424 case IX86_BUILTIN_PALIGNR128:
18425 if (fcode == IX86_BUILTIN_PALIGNR)
18426 {
18427 icode = CODE_FOR_ssse3_palignrdi;
18428 mode = DImode;
18429 }
18430 else
18431 {
18432 icode = CODE_FOR_ssse3_palignrti;
18433 mode = V2DImode;
18434 }
18435 arg0 = CALL_EXPR_ARG (exp, 0);
18436 arg1 = CALL_EXPR_ARG (exp, 1);
18437 arg2 = CALL_EXPR_ARG (exp, 2);
18438 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18439 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18440 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18441 tmode = insn_data[icode].operand[0].mode;
18442 mode1 = insn_data[icode].operand[1].mode;
18443 mode2 = insn_data[icode].operand[2].mode;
18444 mode3 = insn_data[icode].operand[3].mode;
18445
18446 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18447 {
18448 op0 = copy_to_reg (op0);
18449 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18450 }
18451 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18452 {
18453 op1 = copy_to_reg (op1);
18454 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18455 }
18456 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18457 {
18458 error ("shift must be an immediate");
18459 return const0_rtx;
18460 }
18461 target = gen_reg_rtx (mode);
18462 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18463 op0, op1, op2);
18464 if (! pat)
18465 return 0;
18466 emit_insn (pat);
18467 return target;
18468
18469 case IX86_BUILTIN_MOVNTSD:
18470 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18471
18472 case IX86_BUILTIN_MOVNTSS:
18473 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18474
18475 case IX86_BUILTIN_INSERTQ:
18476 case IX86_BUILTIN_EXTRQ:
18477 icode = (fcode == IX86_BUILTIN_EXTRQ
18478 ? CODE_FOR_sse4a_extrq
18479 : CODE_FOR_sse4a_insertq);
18480 arg0 = CALL_EXPR_ARG (exp, 0);
18481 arg1 = CALL_EXPR_ARG (exp, 1);
18482 op0 = expand_normal (arg0);
18483 op1 = expand_normal (arg1);
18484 tmode = insn_data[icode].operand[0].mode;
18485 mode1 = insn_data[icode].operand[1].mode;
18486 mode2 = insn_data[icode].operand[2].mode;
18487 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18488 op0 = copy_to_mode_reg (mode1, op0);
18489 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18490 op1 = copy_to_mode_reg (mode2, op1);
18491 if (optimize || target == 0
18492 || GET_MODE (target) != tmode
18493 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18494 target = gen_reg_rtx (tmode);
18495 pat = GEN_FCN (icode) (target, op0, op1);
18496 if (! pat)
18497 return NULL_RTX;
18498 emit_insn (pat);
18499 return target;
18500
18501 case IX86_BUILTIN_EXTRQI:
18502 icode = CODE_FOR_sse4a_extrqi;
18503 arg0 = CALL_EXPR_ARG (exp, 0);
18504 arg1 = CALL_EXPR_ARG (exp, 1);
18505 arg2 = CALL_EXPR_ARG (exp, 2);
18506 op0 = expand_normal (arg0);
18507 op1 = expand_normal (arg1);
18508 op2 = expand_normal (arg2);
18509 tmode = insn_data[icode].operand[0].mode;
18510 mode1 = insn_data[icode].operand[1].mode;
18511 mode2 = insn_data[icode].operand[2].mode;
18512 mode3 = insn_data[icode].operand[3].mode;
18513 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18514 op0 = copy_to_mode_reg (mode1, op0);
18515 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18516 {
18517 error ("index mask must be an immediate");
18518 return gen_reg_rtx (tmode);
18519 }
18520 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18521 {
18522 error ("length mask must be an immediate");
18523 return gen_reg_rtx (tmode);
18524 }
18525 if (optimize || target == 0
18526 || GET_MODE (target) != tmode
18527 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18528 target = gen_reg_rtx (tmode);
18529 pat = GEN_FCN (icode) (target, op0, op1, op2);
18530 if (! pat)
18531 return NULL_RTX;
18532 emit_insn (pat);
18533 return target;
18534
18535 case IX86_BUILTIN_INSERTQI:
18536 icode = CODE_FOR_sse4a_insertqi;
18537 arg0 = CALL_EXPR_ARG (exp, 0);
18538 arg1 = CALL_EXPR_ARG (exp, 1);
18539 arg2 = CALL_EXPR_ARG (exp, 2);
18540 arg3 = CALL_EXPR_ARG (exp, 3);
18541 op0 = expand_normal (arg0);
18542 op1 = expand_normal (arg1);
18543 op2 = expand_normal (arg2);
18544 op3 = expand_normal (arg3);
18545 tmode = insn_data[icode].operand[0].mode;
18546 mode1 = insn_data[icode].operand[1].mode;
18547 mode2 = insn_data[icode].operand[2].mode;
18548 mode3 = insn_data[icode].operand[3].mode;
18549 mode4 = insn_data[icode].operand[4].mode;
18550
18551 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18552 op0 = copy_to_mode_reg (mode1, op0);
18553
18554 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18555 op1 = copy_to_mode_reg (mode2, op1);
18556
18557 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18558 {
18559 error ("index mask must be an immediate");
18560 return gen_reg_rtx (tmode);
18561 }
18562 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18563 {
18564 error ("length mask must be an immediate");
18565 return gen_reg_rtx (tmode);
18566 }
18567 if (optimize || target == 0
18568 || GET_MODE (target) != tmode
18569 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18570 target = gen_reg_rtx (tmode);
18571 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18572 if (! pat)
18573 return NULL_RTX;
18574 emit_insn (pat);
18575 return target;
18576
18577 case IX86_BUILTIN_VEC_INIT_V2SI:
18578 case IX86_BUILTIN_VEC_INIT_V4HI:
18579 case IX86_BUILTIN_VEC_INIT_V8QI:
18580 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18581
18582 case IX86_BUILTIN_VEC_EXT_V2DF:
18583 case IX86_BUILTIN_VEC_EXT_V2DI:
18584 case IX86_BUILTIN_VEC_EXT_V4SF:
18585 case IX86_BUILTIN_VEC_EXT_V4SI:
18586 case IX86_BUILTIN_VEC_EXT_V8HI:
18587 case IX86_BUILTIN_VEC_EXT_V2SI:
18588 case IX86_BUILTIN_VEC_EXT_V4HI:
18589 return ix86_expand_vec_ext_builtin (exp, target);
18590
18591 case IX86_BUILTIN_VEC_SET_V8HI:
18592 case IX86_BUILTIN_VEC_SET_V4HI:
18593 return ix86_expand_vec_set_builtin (exp);
18594
18595 default:
18596 break;
18597 }
18598
18599 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18600 if (d->code == fcode)
18601 {
18602 /* Compares are treated specially. */
18603 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18604 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18605 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18606 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18607 return ix86_expand_sse_compare (d, exp, target);
18608
18609 return ix86_expand_binop_builtin (d->icode, exp, target);
18610 }
18611
18612 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18613 if (d->code == fcode)
18614 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18615
18616 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18617 if (d->code == fcode)
18618 return ix86_expand_sse_comi (d, exp, target);
18619
18620 gcc_unreachable ();
18621 }
18622
18623 /* Returns a function decl for a vectorized version of the builtin function
18624 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18625 if it is not available. */
18626
18627 static tree
18628 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18629 tree type_in)
18630 {
18631 enum machine_mode in_mode, out_mode;
18632 int in_n, out_n;
18633
18634 if (TREE_CODE (type_out) != VECTOR_TYPE
18635 || TREE_CODE (type_in) != VECTOR_TYPE)
18636 return NULL_TREE;
18637
18638 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18639 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18640 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18641 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18642
18643 switch (fn)
18644 {
18645 case BUILT_IN_SQRT:
18646 if (out_mode == DFmode && out_n == 2
18647 && in_mode == DFmode && in_n == 2)
18648 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18649 return NULL_TREE;
18650
18651 case BUILT_IN_SQRTF:
18652 if (out_mode == SFmode && out_n == 4
18653 && in_mode == SFmode && in_n == 4)
18654 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18655 return NULL_TREE;
18656
18657 case BUILT_IN_LRINTF:
18658 if (out_mode == SImode && out_n == 4
18659 && in_mode == SFmode && in_n == 4)
18660 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18661 return NULL_TREE;
18662
18663 default:
18664 ;
18665 }
18666
18667 return NULL_TREE;
18668 }
18669
18670 /* Returns a decl of a function that implements conversion of the
18671 input vector of type TYPE, or NULL_TREE if it is not available. */
18672
18673 static tree
18674 ix86_builtin_conversion (enum tree_code code, tree type)
18675 {
18676 if (TREE_CODE (type) != VECTOR_TYPE)
18677 return NULL_TREE;
18678
18679 switch (code)
18680 {
18681 case FLOAT_EXPR:
18682 switch (TYPE_MODE (type))
18683 {
18684 case V4SImode:
18685 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18686 default:
18687 return NULL_TREE;
18688 }
18689
18690 case FIX_TRUNC_EXPR:
18691 switch (TYPE_MODE (type))
18692 {
18693 case V4SFmode:
18694 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18695 default:
18696 return NULL_TREE;
18697 }
18698 default:
18699 return NULL_TREE;
18700
18701 }
18702 }
18703
18704 /* Store OPERAND to the memory after reload is completed. This means
18705 that we can't easily use assign_stack_local. */
18706 rtx
18707 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18708 {
18709 rtx result;
18710
18711 gcc_assert (reload_completed);
18712 if (TARGET_RED_ZONE)
18713 {
18714 result = gen_rtx_MEM (mode,
18715 gen_rtx_PLUS (Pmode,
18716 stack_pointer_rtx,
18717 GEN_INT (-RED_ZONE_SIZE)));
18718 emit_move_insn (result, operand);
18719 }
18720 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18721 {
18722 switch (mode)
18723 {
18724 case HImode:
18725 case SImode:
18726 operand = gen_lowpart (DImode, operand);
18727 /* FALLTHRU */
18728 case DImode:
18729 emit_insn (
18730 gen_rtx_SET (VOIDmode,
18731 gen_rtx_MEM (DImode,
18732 gen_rtx_PRE_DEC (DImode,
18733 stack_pointer_rtx)),
18734 operand));
18735 break;
18736 default:
18737 gcc_unreachable ();
18738 }
18739 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18740 }
18741 else
18742 {
18743 switch (mode)
18744 {
18745 case DImode:
18746 {
18747 rtx operands[2];
18748 split_di (&operand, 1, operands, operands + 1);
18749 emit_insn (
18750 gen_rtx_SET (VOIDmode,
18751 gen_rtx_MEM (SImode,
18752 gen_rtx_PRE_DEC (Pmode,
18753 stack_pointer_rtx)),
18754 operands[1]));
18755 emit_insn (
18756 gen_rtx_SET (VOIDmode,
18757 gen_rtx_MEM (SImode,
18758 gen_rtx_PRE_DEC (Pmode,
18759 stack_pointer_rtx)),
18760 operands[0]));
18761 }
18762 break;
18763 case HImode:
18764 /* Store HImodes as SImodes. */
18765 operand = gen_lowpart (SImode, operand);
18766 /* FALLTHRU */
18767 case SImode:
18768 emit_insn (
18769 gen_rtx_SET (VOIDmode,
18770 gen_rtx_MEM (GET_MODE (operand),
18771 gen_rtx_PRE_DEC (SImode,
18772 stack_pointer_rtx)),
18773 operand));
18774 break;
18775 default:
18776 gcc_unreachable ();
18777 }
18778 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18779 }
18780 return result;
18781 }
18782
18783 /* Free operand from the memory. */
18784 void
18785 ix86_free_from_memory (enum machine_mode mode)
18786 {
18787 if (!TARGET_RED_ZONE)
18788 {
18789 int size;
18790
18791 if (mode == DImode || TARGET_64BIT)
18792 size = 8;
18793 else
18794 size = 4;
18795 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18796 to pop or add instruction if registers are available. */
18797 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18798 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18799 GEN_INT (size))));
18800 }
18801 }
18802
18803 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18804 QImode must go into class Q_REGS.
18805 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18806 movdf to do mem-to-mem moves through integer regs. */
18807 enum reg_class
18808 ix86_preferred_reload_class (rtx x, enum reg_class class)
18809 {
18810 enum machine_mode mode = GET_MODE (x);
18811
18812 /* We're only allowed to return a subclass of CLASS. Many of the
18813 following checks fail for NO_REGS, so eliminate that early. */
18814 if (class == NO_REGS)
18815 return NO_REGS;
18816
18817 /* All classes can load zeros. */
18818 if (x == CONST0_RTX (mode))
18819 return class;
18820
18821 /* Force constants into memory if we are loading a (nonzero) constant into
18822 an MMX or SSE register. This is because there are no MMX/SSE instructions
18823 to load from a constant. */
18824 if (CONSTANT_P (x)
18825 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18826 return NO_REGS;
18827
18828 /* Prefer SSE regs only, if we can use them for math. */
18829 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18830 return SSE_CLASS_P (class) ? class : NO_REGS;
18831
18832 /* Floating-point constants need more complex checks. */
18833 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18834 {
18835 /* General regs can load everything. */
18836 if (reg_class_subset_p (class, GENERAL_REGS))
18837 return class;
18838
18839 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18840 zero above. We only want to wind up preferring 80387 registers if
18841 we plan on doing computation with them. */
18842 if (TARGET_80387
18843 && standard_80387_constant_p (x))
18844 {
18845 /* Limit class to non-sse. */
18846 if (class == FLOAT_SSE_REGS)
18847 return FLOAT_REGS;
18848 if (class == FP_TOP_SSE_REGS)
18849 return FP_TOP_REG;
18850 if (class == FP_SECOND_SSE_REGS)
18851 return FP_SECOND_REG;
18852 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18853 return class;
18854 }
18855
18856 return NO_REGS;
18857 }
18858
18859 /* Generally when we see PLUS here, it's the function invariant
18860 (plus soft-fp const_int). Which can only be computed into general
18861 regs. */
18862 if (GET_CODE (x) == PLUS)
18863 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18864
18865 /* QImode constants are easy to load, but non-constant QImode data
18866 must go into Q_REGS. */
18867 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18868 {
18869 if (reg_class_subset_p (class, Q_REGS))
18870 return class;
18871 if (reg_class_subset_p (Q_REGS, class))
18872 return Q_REGS;
18873 return NO_REGS;
18874 }
18875
18876 return class;
18877 }
18878
18879 /* Discourage putting floating-point values in SSE registers unless
18880 SSE math is being used, and likewise for the 387 registers. */
18881 enum reg_class
18882 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18883 {
18884 enum machine_mode mode = GET_MODE (x);
18885
18886 /* Restrict the output reload class to the register bank that we are doing
18887 math on. If we would like not to return a subset of CLASS, reject this
18888 alternative: if reload cannot do this, it will still use its choice. */
18889 mode = GET_MODE (x);
18890 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18891 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18892
18893 if (X87_FLOAT_MODE_P (mode))
18894 {
18895 if (class == FP_TOP_SSE_REGS)
18896 return FP_TOP_REG;
18897 else if (class == FP_SECOND_SSE_REGS)
18898 return FP_SECOND_REG;
18899 else
18900 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18901 }
18902
18903 return class;
18904 }
18905
18906 /* If we are copying between general and FP registers, we need a memory
18907 location. The same is true for SSE and MMX registers.
18908
18909 The macro can't work reliably when one of the CLASSES is class containing
18910 registers from multiple units (SSE, MMX, integer). We avoid this by never
18911 combining those units in single alternative in the machine description.
18912 Ensure that this constraint holds to avoid unexpected surprises.
18913
18914 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18915 enforce these sanity checks. */
18916
18917 int
18918 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18919 enum machine_mode mode, int strict)
18920 {
18921 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18922 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18923 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18924 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18925 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18926 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18927 {
18928 gcc_assert (!strict);
18929 return true;
18930 }
18931
18932 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18933 return true;
18934
18935 /* ??? This is a lie. We do have moves between mmx/general, and for
18936 mmx/sse2. But by saying we need secondary memory we discourage the
18937 register allocator from using the mmx registers unless needed. */
18938 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18939 return true;
18940
18941 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18942 {
18943 /* SSE1 doesn't have any direct moves from other classes. */
18944 if (!TARGET_SSE2)
18945 return true;
18946
18947 /* If the target says that inter-unit moves are more expensive
18948 than moving through memory, then don't generate them. */
18949 if (!TARGET_INTER_UNIT_MOVES)
18950 return true;
18951
18952 /* Between SSE and general, we have moves no larger than word size. */
18953 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18954 return true;
18955 }
18956
18957 return false;
18958 }
18959
18960 /* Return true if the registers in CLASS cannot represent the change from
18961 modes FROM to TO. */
18962
18963 bool
18964 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18965 enum reg_class class)
18966 {
18967 if (from == to)
18968 return false;
18969
18970 /* x87 registers can't do subreg at all, as all values are reformatted
18971 to extended precision. */
18972 if (MAYBE_FLOAT_CLASS_P (class))
18973 return true;
18974
18975 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18976 {
18977 /* Vector registers do not support QI or HImode loads. If we don't
18978 disallow a change to these modes, reload will assume it's ok to
18979 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18980 the vec_dupv4hi pattern. */
18981 if (GET_MODE_SIZE (from) < 4)
18982 return true;
18983
18984 /* Vector registers do not support subreg with nonzero offsets, which
18985 are otherwise valid for integer registers. Since we can't see
18986 whether we have a nonzero offset from here, prohibit all
18987 nonparadoxical subregs changing size. */
18988 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18989 return true;
18990 }
18991
18992 return false;
18993 }
18994
18995 /* Return the cost of moving data from a register in class CLASS1 to
18996 one in class CLASS2.
18997
18998 It is not required that the cost always equal 2 when FROM is the same as TO;
18999 on some machines it is expensive to move between registers if they are not
19000 general registers. */
19001
19002 int
19003 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
19004 enum reg_class class2)
19005 {
19006 /* In case we require secondary memory, compute cost of the store followed
19007 by load. In order to avoid bad register allocation choices, we need
19008 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
19009
19010 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
19011 {
19012 int cost = 1;
19013
19014 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
19015 MEMORY_MOVE_COST (mode, class1, 1));
19016 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
19017 MEMORY_MOVE_COST (mode, class2, 1));
19018
19019 /* In case of copying from general_purpose_register we may emit multiple
19020 stores followed by single load causing memory size mismatch stall.
19021 Count this as arbitrarily high cost of 20. */
19022 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
19023 cost += 20;
19024
19025 /* In the case of FP/MMX moves, the registers actually overlap, and we
19026 have to switch modes in order to treat them differently. */
19027 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
19028 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
19029 cost += 20;
19030
19031 return cost;
19032 }
19033
19034 /* Moves between SSE/MMX and integer unit are expensive. */
19035 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
19036 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
19037 return ix86_cost->mmxsse_to_integer;
19038 if (MAYBE_FLOAT_CLASS_P (class1))
19039 return ix86_cost->fp_move;
19040 if (MAYBE_SSE_CLASS_P (class1))
19041 return ix86_cost->sse_move;
19042 if (MAYBE_MMX_CLASS_P (class1))
19043 return ix86_cost->mmx_move;
19044 return 2;
19045 }
19046
19047 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
19048
19049 bool
19050 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
19051 {
19052 /* Flags and only flags can only hold CCmode values. */
19053 if (CC_REGNO_P (regno))
19054 return GET_MODE_CLASS (mode) == MODE_CC;
19055 if (GET_MODE_CLASS (mode) == MODE_CC
19056 || GET_MODE_CLASS (mode) == MODE_RANDOM
19057 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
19058 return 0;
19059 if (FP_REGNO_P (regno))
19060 return VALID_FP_MODE_P (mode);
19061 if (SSE_REGNO_P (regno))
19062 {
19063 /* We implement the move patterns for all vector modes into and
19064 out of SSE registers, even when no operation instructions
19065 are available. */
19066 return (VALID_SSE_REG_MODE (mode)
19067 || VALID_SSE2_REG_MODE (mode)
19068 || VALID_MMX_REG_MODE (mode)
19069 || VALID_MMX_REG_MODE_3DNOW (mode));
19070 }
19071 if (MMX_REGNO_P (regno))
19072 {
19073 /* We implement the move patterns for 3DNOW modes even in MMX mode,
19074 so if the register is available at all, then we can move data of
19075 the given mode into or out of it. */
19076 return (VALID_MMX_REG_MODE (mode)
19077 || VALID_MMX_REG_MODE_3DNOW (mode));
19078 }
19079
19080 if (mode == QImode)
19081 {
19082 /* Take care for QImode values - they can be in non-QI regs,
19083 but then they do cause partial register stalls. */
19084 if (regno < 4 || TARGET_64BIT)
19085 return 1;
19086 if (!TARGET_PARTIAL_REG_STALL)
19087 return 1;
19088 return reload_in_progress || reload_completed;
19089 }
19090 /* We handle both integer and floats in the general purpose registers. */
19091 else if (VALID_INT_MODE_P (mode))
19092 return 1;
19093 else if (VALID_FP_MODE_P (mode))
19094 return 1;
19095 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
19096 on to use that value in smaller contexts, this can easily force a
19097 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
19098 supporting DImode, allow it. */
19099 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
19100 return 1;
19101
19102 return 0;
19103 }
19104
19105 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
19106 tieable integer mode. */
19107
19108 static bool
19109 ix86_tieable_integer_mode_p (enum machine_mode mode)
19110 {
19111 switch (mode)
19112 {
19113 case HImode:
19114 case SImode:
19115 return true;
19116
19117 case QImode:
19118 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
19119
19120 case DImode:
19121 return TARGET_64BIT;
19122
19123 default:
19124 return false;
19125 }
19126 }
19127
19128 /* Return true if MODE1 is accessible in a register that can hold MODE2
19129 without copying. That is, all register classes that can hold MODE2
19130 can also hold MODE1. */
19131
19132 bool
19133 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
19134 {
19135 if (mode1 == mode2)
19136 return true;
19137
19138 if (ix86_tieable_integer_mode_p (mode1)
19139 && ix86_tieable_integer_mode_p (mode2))
19140 return true;
19141
19142 /* MODE2 being XFmode implies fp stack or general regs, which means we
19143 can tie any smaller floating point modes to it. Note that we do not
19144 tie this with TFmode. */
19145 if (mode2 == XFmode)
19146 return mode1 == SFmode || mode1 == DFmode;
19147
19148 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
19149 that we can tie it with SFmode. */
19150 if (mode2 == DFmode)
19151 return mode1 == SFmode;
19152
19153 /* If MODE2 is only appropriate for an SSE register, then tie with
19154 any other mode acceptable to SSE registers. */
19155 if (GET_MODE_SIZE (mode2) == 16
19156 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19157 return (GET_MODE_SIZE (mode1) == 16
19158 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19159
19160 /* If MODE2 is appropriate for an MMX register, then tie
19161 with any other mode acceptable to MMX registers. */
19162 if (GET_MODE_SIZE (mode2) == 8
19163 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19164 return (GET_MODE_SIZE (mode1) == 8
19165 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19166
19167 return false;
19168 }
19169
19170 /* Return the cost of moving data of mode M between a
19171 register and memory. A value of 2 is the default; this cost is
19172 relative to those in `REGISTER_MOVE_COST'.
19173
19174 If moving between registers and memory is more expensive than
19175 between two registers, you should define this macro to express the
19176 relative cost.
19177
19178 Model also increased moving costs of QImode registers in non
19179 Q_REGS classes.
19180 */
19181 int
19182 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19183 {
19184 if (FLOAT_CLASS_P (class))
19185 {
19186 int index;
19187 switch (mode)
19188 {
19189 case SFmode:
19190 index = 0;
19191 break;
19192 case DFmode:
19193 index = 1;
19194 break;
19195 case XFmode:
19196 index = 2;
19197 break;
19198 default:
19199 return 100;
19200 }
19201 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19202 }
19203 if (SSE_CLASS_P (class))
19204 {
19205 int index;
19206 switch (GET_MODE_SIZE (mode))
19207 {
19208 case 4:
19209 index = 0;
19210 break;
19211 case 8:
19212 index = 1;
19213 break;
19214 case 16:
19215 index = 2;
19216 break;
19217 default:
19218 return 100;
19219 }
19220 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19221 }
19222 if (MMX_CLASS_P (class))
19223 {
19224 int index;
19225 switch (GET_MODE_SIZE (mode))
19226 {
19227 case 4:
19228 index = 0;
19229 break;
19230 case 8:
19231 index = 1;
19232 break;
19233 default:
19234 return 100;
19235 }
19236 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19237 }
19238 switch (GET_MODE_SIZE (mode))
19239 {
19240 case 1:
19241 if (in)
19242 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19243 : ix86_cost->movzbl_load);
19244 else
19245 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19246 : ix86_cost->int_store[0] + 4);
19247 break;
19248 case 2:
19249 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19250 default:
19251 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19252 if (mode == TFmode)
19253 mode = XFmode;
19254 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19255 * (((int) GET_MODE_SIZE (mode)
19256 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19257 }
19258 }
19259
19260 /* Compute a (partial) cost for rtx X. Return true if the complete
19261 cost has been computed, and false if subexpressions should be
19262 scanned. In either case, *TOTAL contains the cost result. */
19263
19264 static bool
19265 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19266 {
19267 enum machine_mode mode = GET_MODE (x);
19268
19269 switch (code)
19270 {
19271 case CONST_INT:
19272 case CONST:
19273 case LABEL_REF:
19274 case SYMBOL_REF:
19275 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19276 *total = 3;
19277 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19278 *total = 2;
19279 else if (flag_pic && SYMBOLIC_CONST (x)
19280 && (!TARGET_64BIT
19281 || (!GET_CODE (x) != LABEL_REF
19282 && (GET_CODE (x) != SYMBOL_REF
19283 || !SYMBOL_REF_LOCAL_P (x)))))
19284 *total = 1;
19285 else
19286 *total = 0;
19287 return true;
19288
19289 case CONST_DOUBLE:
19290 if (mode == VOIDmode)
19291 *total = 0;
19292 else
19293 switch (standard_80387_constant_p (x))
19294 {
19295 case 1: /* 0.0 */
19296 *total = 1;
19297 break;
19298 default: /* Other constants */
19299 *total = 2;
19300 break;
19301 case 0:
19302 case -1:
19303 /* Start with (MEM (SYMBOL_REF)), since that's where
19304 it'll probably end up. Add a penalty for size. */
19305 *total = (COSTS_N_INSNS (1)
19306 + (flag_pic != 0 && !TARGET_64BIT)
19307 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19308 break;
19309 }
19310 return true;
19311
19312 case ZERO_EXTEND:
19313 /* The zero extensions is often completely free on x86_64, so make
19314 it as cheap as possible. */
19315 if (TARGET_64BIT && mode == DImode
19316 && GET_MODE (XEXP (x, 0)) == SImode)
19317 *total = 1;
19318 else if (TARGET_ZERO_EXTEND_WITH_AND)
19319 *total = ix86_cost->add;
19320 else
19321 *total = ix86_cost->movzx;
19322 return false;
19323
19324 case SIGN_EXTEND:
19325 *total = ix86_cost->movsx;
19326 return false;
19327
19328 case ASHIFT:
19329 if (CONST_INT_P (XEXP (x, 1))
19330 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19331 {
19332 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19333 if (value == 1)
19334 {
19335 *total = ix86_cost->add;
19336 return false;
19337 }
19338 if ((value == 2 || value == 3)
19339 && ix86_cost->lea <= ix86_cost->shift_const)
19340 {
19341 *total = ix86_cost->lea;
19342 return false;
19343 }
19344 }
19345 /* FALLTHRU */
19346
19347 case ROTATE:
19348 case ASHIFTRT:
19349 case LSHIFTRT:
19350 case ROTATERT:
19351 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19352 {
19353 if (CONST_INT_P (XEXP (x, 1)))
19354 {
19355 if (INTVAL (XEXP (x, 1)) > 32)
19356 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19357 else
19358 *total = ix86_cost->shift_const * 2;
19359 }
19360 else
19361 {
19362 if (GET_CODE (XEXP (x, 1)) == AND)
19363 *total = ix86_cost->shift_var * 2;
19364 else
19365 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19366 }
19367 }
19368 else
19369 {
19370 if (CONST_INT_P (XEXP (x, 1)))
19371 *total = ix86_cost->shift_const;
19372 else
19373 *total = ix86_cost->shift_var;
19374 }
19375 return false;
19376
19377 case MULT:
19378 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19379 {
19380 /* ??? SSE scalar cost should be used here. */
19381 *total = ix86_cost->fmul;
19382 return false;
19383 }
19384 else if (X87_FLOAT_MODE_P (mode))
19385 {
19386 *total = ix86_cost->fmul;
19387 return false;
19388 }
19389 else if (FLOAT_MODE_P (mode))
19390 {
19391 /* ??? SSE vector cost should be used here. */
19392 *total = ix86_cost->fmul;
19393 return false;
19394 }
19395 else
19396 {
19397 rtx op0 = XEXP (x, 0);
19398 rtx op1 = XEXP (x, 1);
19399 int nbits;
19400 if (CONST_INT_P (XEXP (x, 1)))
19401 {
19402 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19403 for (nbits = 0; value != 0; value &= value - 1)
19404 nbits++;
19405 }
19406 else
19407 /* This is arbitrary. */
19408 nbits = 7;
19409
19410 /* Compute costs correctly for widening multiplication. */
19411 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19412 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19413 == GET_MODE_SIZE (mode))
19414 {
19415 int is_mulwiden = 0;
19416 enum machine_mode inner_mode = GET_MODE (op0);
19417
19418 if (GET_CODE (op0) == GET_CODE (op1))
19419 is_mulwiden = 1, op1 = XEXP (op1, 0);
19420 else if (CONST_INT_P (op1))
19421 {
19422 if (GET_CODE (op0) == SIGN_EXTEND)
19423 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19424 == INTVAL (op1);
19425 else
19426 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19427 }
19428
19429 if (is_mulwiden)
19430 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19431 }
19432
19433 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19434 + nbits * ix86_cost->mult_bit
19435 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19436
19437 return true;
19438 }
19439
19440 case DIV:
19441 case UDIV:
19442 case MOD:
19443 case UMOD:
19444 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19445 /* ??? SSE cost should be used here. */
19446 *total = ix86_cost->fdiv;
19447 else if (X87_FLOAT_MODE_P (mode))
19448 *total = ix86_cost->fdiv;
19449 else if (FLOAT_MODE_P (mode))
19450 /* ??? SSE vector cost should be used here. */
19451 *total = ix86_cost->fdiv;
19452 else
19453 *total = ix86_cost->divide[MODE_INDEX (mode)];
19454 return false;
19455
19456 case PLUS:
19457 if (GET_MODE_CLASS (mode) == MODE_INT
19458 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19459 {
19460 if (GET_CODE (XEXP (x, 0)) == PLUS
19461 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19462 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19463 && CONSTANT_P (XEXP (x, 1)))
19464 {
19465 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19466 if (val == 2 || val == 4 || val == 8)
19467 {
19468 *total = ix86_cost->lea;
19469 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19470 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19471 outer_code);
19472 *total += rtx_cost (XEXP (x, 1), outer_code);
19473 return true;
19474 }
19475 }
19476 else if (GET_CODE (XEXP (x, 0)) == MULT
19477 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19478 {
19479 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19480 if (val == 2 || val == 4 || val == 8)
19481 {
19482 *total = ix86_cost->lea;
19483 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19484 *total += rtx_cost (XEXP (x, 1), outer_code);
19485 return true;
19486 }
19487 }
19488 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19489 {
19490 *total = ix86_cost->lea;
19491 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19492 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19493 *total += rtx_cost (XEXP (x, 1), outer_code);
19494 return true;
19495 }
19496 }
19497 /* FALLTHRU */
19498
19499 case MINUS:
19500 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19501 {
19502 /* ??? SSE cost should be used here. */
19503 *total = ix86_cost->fadd;
19504 return false;
19505 }
19506 else if (X87_FLOAT_MODE_P (mode))
19507 {
19508 *total = ix86_cost->fadd;
19509 return false;
19510 }
19511 else if (FLOAT_MODE_P (mode))
19512 {
19513 /* ??? SSE vector cost should be used here. */
19514 *total = ix86_cost->fadd;
19515 return false;
19516 }
19517 /* FALLTHRU */
19518
19519 case AND:
19520 case IOR:
19521 case XOR:
19522 if (!TARGET_64BIT && mode == DImode)
19523 {
19524 *total = (ix86_cost->add * 2
19525 + (rtx_cost (XEXP (x, 0), outer_code)
19526 << (GET_MODE (XEXP (x, 0)) != DImode))
19527 + (rtx_cost (XEXP (x, 1), outer_code)
19528 << (GET_MODE (XEXP (x, 1)) != DImode)));
19529 return true;
19530 }
19531 /* FALLTHRU */
19532
19533 case NEG:
19534 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19535 {
19536 /* ??? SSE cost should be used here. */
19537 *total = ix86_cost->fchs;
19538 return false;
19539 }
19540 else if (X87_FLOAT_MODE_P (mode))
19541 {
19542 *total = ix86_cost->fchs;
19543 return false;
19544 }
19545 else if (FLOAT_MODE_P (mode))
19546 {
19547 /* ??? SSE vector cost should be used here. */
19548 *total = ix86_cost->fchs;
19549 return false;
19550 }
19551 /* FALLTHRU */
19552
19553 case NOT:
19554 if (!TARGET_64BIT && mode == DImode)
19555 *total = ix86_cost->add * 2;
19556 else
19557 *total = ix86_cost->add;
19558 return false;
19559
19560 case COMPARE:
19561 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19562 && XEXP (XEXP (x, 0), 1) == const1_rtx
19563 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19564 && XEXP (x, 1) == const0_rtx)
19565 {
19566 /* This kind of construct is implemented using test[bwl].
19567 Treat it as if we had an AND. */
19568 *total = (ix86_cost->add
19569 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19570 + rtx_cost (const1_rtx, outer_code));
19571 return true;
19572 }
19573 return false;
19574
19575 case FLOAT_EXTEND:
19576 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
19577 *total = 0;
19578 return false;
19579
19580 case ABS:
19581 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19582 /* ??? SSE cost should be used here. */
19583 *total = ix86_cost->fabs;
19584 else if (X87_FLOAT_MODE_P (mode))
19585 *total = ix86_cost->fabs;
19586 else if (FLOAT_MODE_P (mode))
19587 /* ??? SSE vector cost should be used here. */
19588 *total = ix86_cost->fabs;
19589 return false;
19590
19591 case SQRT:
19592 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
19593 /* ??? SSE cost should be used here. */
19594 *total = ix86_cost->fsqrt;
19595 else if (X87_FLOAT_MODE_P (mode))
19596 *total = ix86_cost->fsqrt;
19597 else if (FLOAT_MODE_P (mode))
19598 /* ??? SSE vector cost should be used here. */
19599 *total = ix86_cost->fsqrt;
19600 return false;
19601
19602 case UNSPEC:
19603 if (XINT (x, 1) == UNSPEC_TP)
19604 *total = 0;
19605 return false;
19606
19607 default:
19608 return false;
19609 }
19610 }
19611
19612 #if TARGET_MACHO
19613
19614 static int current_machopic_label_num;
19615
19616 /* Given a symbol name and its associated stub, write out the
19617 definition of the stub. */
19618
19619 void
19620 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19621 {
19622 unsigned int length;
19623 char *binder_name, *symbol_name, lazy_ptr_name[32];
19624 int label = ++current_machopic_label_num;
19625
19626 /* For 64-bit we shouldn't get here. */
19627 gcc_assert (!TARGET_64BIT);
19628
19629 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19630 symb = (*targetm.strip_name_encoding) (symb);
19631
19632 length = strlen (stub);
19633 binder_name = alloca (length + 32);
19634 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19635
19636 length = strlen (symb);
19637 symbol_name = alloca (length + 32);
19638 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19639
19640 sprintf (lazy_ptr_name, "L%d$lz", label);
19641
19642 if (MACHOPIC_PURE)
19643 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19644 else
19645 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19646
19647 fprintf (file, "%s:\n", stub);
19648 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19649
19650 if (MACHOPIC_PURE)
19651 {
19652 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19653 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19654 fprintf (file, "\tjmp\t*%%edx\n");
19655 }
19656 else
19657 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19658
19659 fprintf (file, "%s:\n", binder_name);
19660
19661 if (MACHOPIC_PURE)
19662 {
19663 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19664 fprintf (file, "\tpushl\t%%eax\n");
19665 }
19666 else
19667 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19668
19669 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19670
19671 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19672 fprintf (file, "%s:\n", lazy_ptr_name);
19673 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19674 fprintf (file, "\t.long %s\n", binder_name);
19675 }
19676
19677 void
19678 darwin_x86_file_end (void)
19679 {
19680 darwin_file_end ();
19681 ix86_file_end ();
19682 }
19683 #endif /* TARGET_MACHO */
19684
19685 /* Order the registers for register allocator. */
19686
19687 void
19688 x86_order_regs_for_local_alloc (void)
19689 {
19690 int pos = 0;
19691 int i;
19692
19693 /* First allocate the local general purpose registers. */
19694 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19695 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19696 reg_alloc_order [pos++] = i;
19697
19698 /* Global general purpose registers. */
19699 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19700 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19701 reg_alloc_order [pos++] = i;
19702
19703 /* x87 registers come first in case we are doing FP math
19704 using them. */
19705 if (!TARGET_SSE_MATH)
19706 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19707 reg_alloc_order [pos++] = i;
19708
19709 /* SSE registers. */
19710 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19711 reg_alloc_order [pos++] = i;
19712 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19713 reg_alloc_order [pos++] = i;
19714
19715 /* x87 registers. */
19716 if (TARGET_SSE_MATH)
19717 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19718 reg_alloc_order [pos++] = i;
19719
19720 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19721 reg_alloc_order [pos++] = i;
19722
19723 /* Initialize the rest of array as we do not allocate some registers
19724 at all. */
19725 while (pos < FIRST_PSEUDO_REGISTER)
19726 reg_alloc_order [pos++] = 0;
19727 }
19728
19729 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19730 struct attribute_spec.handler. */
19731 static tree
19732 ix86_handle_struct_attribute (tree *node, tree name,
19733 tree args ATTRIBUTE_UNUSED,
19734 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19735 {
19736 tree *type = NULL;
19737 if (DECL_P (*node))
19738 {
19739 if (TREE_CODE (*node) == TYPE_DECL)
19740 type = &TREE_TYPE (*node);
19741 }
19742 else
19743 type = node;
19744
19745 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19746 || TREE_CODE (*type) == UNION_TYPE)))
19747 {
19748 warning (OPT_Wattributes, "%qs attribute ignored",
19749 IDENTIFIER_POINTER (name));
19750 *no_add_attrs = true;
19751 }
19752
19753 else if ((is_attribute_p ("ms_struct", name)
19754 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19755 || ((is_attribute_p ("gcc_struct", name)
19756 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19757 {
19758 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19759 IDENTIFIER_POINTER (name));
19760 *no_add_attrs = true;
19761 }
19762
19763 return NULL_TREE;
19764 }
19765
19766 static bool
19767 ix86_ms_bitfield_layout_p (tree record_type)
19768 {
19769 return (TARGET_MS_BITFIELD_LAYOUT &&
19770 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19771 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19772 }
19773
19774 /* Returns an expression indicating where the this parameter is
19775 located on entry to the FUNCTION. */
19776
19777 static rtx
19778 x86_this_parameter (tree function)
19779 {
19780 tree type = TREE_TYPE (function);
19781 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
19782
19783 if (TARGET_64BIT)
19784 {
19785 const int *parm_regs;
19786
19787 if (TARGET_64BIT_MS_ABI)
19788 parm_regs = x86_64_ms_abi_int_parameter_registers;
19789 else
19790 parm_regs = x86_64_int_parameter_registers;
19791 return gen_rtx_REG (DImode, parm_regs[aggr]);
19792 }
19793
19794 if (ix86_function_regparm (type, function) > 0
19795 && !type_has_variadic_args_p (type))
19796 {
19797 int regno = 0;
19798 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19799 regno = 2;
19800 return gen_rtx_REG (SImode, regno);
19801 }
19802
19803 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
19804 }
19805
19806 /* Determine whether x86_output_mi_thunk can succeed. */
19807
19808 static bool
19809 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19810 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19811 HOST_WIDE_INT vcall_offset, tree function)
19812 {
19813 /* 64-bit can handle anything. */
19814 if (TARGET_64BIT)
19815 return true;
19816
19817 /* For 32-bit, everything's fine if we have one free register. */
19818 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19819 return true;
19820
19821 /* Need a free register for vcall_offset. */
19822 if (vcall_offset)
19823 return false;
19824
19825 /* Need a free register for GOT references. */
19826 if (flag_pic && !(*targetm.binds_local_p) (function))
19827 return false;
19828
19829 /* Otherwise ok. */
19830 return true;
19831 }
19832
19833 /* Output the assembler code for a thunk function. THUNK_DECL is the
19834 declaration for the thunk function itself, FUNCTION is the decl for
19835 the target function. DELTA is an immediate constant offset to be
19836 added to THIS. If VCALL_OFFSET is nonzero, the word at
19837 *(*this + vcall_offset) should be added to THIS. */
19838
19839 static void
19840 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19841 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19842 HOST_WIDE_INT vcall_offset, tree function)
19843 {
19844 rtx xops[3];
19845 rtx this = x86_this_parameter (function);
19846 rtx this_reg, tmp;
19847
19848 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19849 pull it in now and let DELTA benefit. */
19850 if (REG_P (this))
19851 this_reg = this;
19852 else if (vcall_offset)
19853 {
19854 /* Put the this parameter into %eax. */
19855 xops[0] = this;
19856 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19857 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19858 }
19859 else
19860 this_reg = NULL_RTX;
19861
19862 /* Adjust the this parameter by a fixed constant. */
19863 if (delta)
19864 {
19865 xops[0] = GEN_INT (delta);
19866 xops[1] = this_reg ? this_reg : this;
19867 if (TARGET_64BIT)
19868 {
19869 if (!x86_64_general_operand (xops[0], DImode))
19870 {
19871 tmp = gen_rtx_REG (DImode, R10_REG);
19872 xops[1] = tmp;
19873 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19874 xops[0] = tmp;
19875 xops[1] = this;
19876 }
19877 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19878 }
19879 else
19880 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19881 }
19882
19883 /* Adjust the this parameter by a value stored in the vtable. */
19884 if (vcall_offset)
19885 {
19886 if (TARGET_64BIT)
19887 tmp = gen_rtx_REG (DImode, R10_REG);
19888 else
19889 {
19890 int tmp_regno = 2 /* ECX */;
19891 if (lookup_attribute ("fastcall",
19892 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19893 tmp_regno = 0 /* EAX */;
19894 tmp = gen_rtx_REG (SImode, tmp_regno);
19895 }
19896
19897 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19898 xops[1] = tmp;
19899 if (TARGET_64BIT)
19900 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19901 else
19902 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19903
19904 /* Adjust the this parameter. */
19905 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19906 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19907 {
19908 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19909 xops[0] = GEN_INT (vcall_offset);
19910 xops[1] = tmp2;
19911 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19912 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19913 }
19914 xops[1] = this_reg;
19915 if (TARGET_64BIT)
19916 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19917 else
19918 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19919 }
19920
19921 /* If necessary, drop THIS back to its stack slot. */
19922 if (this_reg && this_reg != this)
19923 {
19924 xops[0] = this_reg;
19925 xops[1] = this;
19926 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19927 }
19928
19929 xops[0] = XEXP (DECL_RTL (function), 0);
19930 if (TARGET_64BIT)
19931 {
19932 if (!flag_pic || (*targetm.binds_local_p) (function))
19933 output_asm_insn ("jmp\t%P0", xops);
19934 /* All thunks should be in the same object as their target,
19935 and thus binds_local_p should be true. */
19936 else if (TARGET_64BIT_MS_ABI)
19937 gcc_unreachable ();
19938 else
19939 {
19940 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19941 tmp = gen_rtx_CONST (Pmode, tmp);
19942 tmp = gen_rtx_MEM (QImode, tmp);
19943 xops[0] = tmp;
19944 output_asm_insn ("jmp\t%A0", xops);
19945 }
19946 }
19947 else
19948 {
19949 if (!flag_pic || (*targetm.binds_local_p) (function))
19950 output_asm_insn ("jmp\t%P0", xops);
19951 else
19952 #if TARGET_MACHO
19953 if (TARGET_MACHO)
19954 {
19955 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19956 tmp = (gen_rtx_SYMBOL_REF
19957 (Pmode,
19958 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19959 tmp = gen_rtx_MEM (QImode, tmp);
19960 xops[0] = tmp;
19961 output_asm_insn ("jmp\t%0", xops);
19962 }
19963 else
19964 #endif /* TARGET_MACHO */
19965 {
19966 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19967 output_set_got (tmp, NULL_RTX);
19968
19969 xops[1] = tmp;
19970 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19971 output_asm_insn ("jmp\t{*}%1", xops);
19972 }
19973 }
19974 }
19975
19976 static void
19977 x86_file_start (void)
19978 {
19979 default_file_start ();
19980 #if TARGET_MACHO
19981 darwin_file_start ();
19982 #endif
19983 if (X86_FILE_START_VERSION_DIRECTIVE)
19984 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19985 if (X86_FILE_START_FLTUSED)
19986 fputs ("\t.global\t__fltused\n", asm_out_file);
19987 if (ix86_asm_dialect == ASM_INTEL)
19988 fputs ("\t.intel_syntax\n", asm_out_file);
19989 }
19990
19991 int
19992 x86_field_alignment (tree field, int computed)
19993 {
19994 enum machine_mode mode;
19995 tree type = TREE_TYPE (field);
19996
19997 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19998 return computed;
19999 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
20000 ? get_inner_array_type (type) : type);
20001 if (mode == DFmode || mode == DCmode
20002 || GET_MODE_CLASS (mode) == MODE_INT
20003 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
20004 return MIN (32, computed);
20005 return computed;
20006 }
20007
20008 /* Output assembler code to FILE to increment profiler label # LABELNO
20009 for profiling a function entry. */
20010 void
20011 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
20012 {
20013 if (TARGET_64BIT)
20014 {
20015 #ifndef NO_PROFILE_COUNTERS
20016 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
20017 #endif
20018
20019 if (!TARGET_64BIT_MS_ABI && flag_pic)
20020 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
20021 else
20022 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20023 }
20024 else if (flag_pic)
20025 {
20026 #ifndef NO_PROFILE_COUNTERS
20027 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
20028 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
20029 #endif
20030 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
20031 }
20032 else
20033 {
20034 #ifndef NO_PROFILE_COUNTERS
20035 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
20036 PROFILE_COUNT_REGISTER);
20037 #endif
20038 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
20039 }
20040 }
20041
20042 /* We don't have exact information about the insn sizes, but we may assume
20043 quite safely that we are informed about all 1 byte insns and memory
20044 address sizes. This is enough to eliminate unnecessary padding in
20045 99% of cases. */
20046
20047 static int
20048 min_insn_size (rtx insn)
20049 {
20050 int l = 0;
20051
20052 if (!INSN_P (insn) || !active_insn_p (insn))
20053 return 0;
20054
20055 /* Discard alignments we've emit and jump instructions. */
20056 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
20057 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
20058 return 0;
20059 if (JUMP_P (insn)
20060 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
20061 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
20062 return 0;
20063
20064 /* Important case - calls are always 5 bytes.
20065 It is common to have many calls in the row. */
20066 if (CALL_P (insn)
20067 && symbolic_reference_mentioned_p (PATTERN (insn))
20068 && !SIBLING_CALL_P (insn))
20069 return 5;
20070 if (get_attr_length (insn) <= 1)
20071 return 1;
20072
20073 /* For normal instructions we may rely on the sizes of addresses
20074 and the presence of symbol to require 4 bytes of encoding.
20075 This is not the case for jumps where references are PC relative. */
20076 if (!JUMP_P (insn))
20077 {
20078 l = get_attr_length_address (insn);
20079 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
20080 l = 4;
20081 }
20082 if (l)
20083 return 1+l;
20084 else
20085 return 2;
20086 }
20087
20088 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
20089 window. */
20090
20091 static void
20092 ix86_avoid_jump_misspredicts (void)
20093 {
20094 rtx insn, start = get_insns ();
20095 int nbytes = 0, njumps = 0;
20096 int isjump = 0;
20097
20098 /* Look for all minimal intervals of instructions containing 4 jumps.
20099 The intervals are bounded by START and INSN. NBYTES is the total
20100 size of instructions in the interval including INSN and not including
20101 START. When the NBYTES is smaller than 16 bytes, it is possible
20102 that the end of START and INSN ends up in the same 16byte page.
20103
20104 The smallest offset in the page INSN can start is the case where START
20105 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
20106 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
20107 */
20108 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
20109 {
20110
20111 nbytes += min_insn_size (insn);
20112 if (dump_file)
20113 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
20114 INSN_UID (insn), min_insn_size (insn));
20115 if ((JUMP_P (insn)
20116 && GET_CODE (PATTERN (insn)) != ADDR_VEC
20117 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
20118 || CALL_P (insn))
20119 njumps++;
20120 else
20121 continue;
20122
20123 while (njumps > 3)
20124 {
20125 start = NEXT_INSN (start);
20126 if ((JUMP_P (start)
20127 && GET_CODE (PATTERN (start)) != ADDR_VEC
20128 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
20129 || CALL_P (start))
20130 njumps--, isjump = 1;
20131 else
20132 isjump = 0;
20133 nbytes -= min_insn_size (start);
20134 }
20135 gcc_assert (njumps >= 0);
20136 if (dump_file)
20137 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
20138 INSN_UID (start), INSN_UID (insn), nbytes);
20139
20140 if (njumps == 3 && isjump && nbytes < 16)
20141 {
20142 int padsize = 15 - nbytes + min_insn_size (insn);
20143
20144 if (dump_file)
20145 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
20146 INSN_UID (insn), padsize);
20147 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
20148 }
20149 }
20150 }
20151
20152 /* AMD Athlon works faster
20153 when RET is not destination of conditional jump or directly preceded
20154 by other jump instruction. We avoid the penalty by inserting NOP just
20155 before the RET instructions in such cases. */
20156 static void
20157 ix86_pad_returns (void)
20158 {
20159 edge e;
20160 edge_iterator ei;
20161
20162 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
20163 {
20164 basic_block bb = e->src;
20165 rtx ret = BB_END (bb);
20166 rtx prev;
20167 bool replace = false;
20168
20169 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
20170 || !maybe_hot_bb_p (bb))
20171 continue;
20172 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
20173 if (active_insn_p (prev) || LABEL_P (prev))
20174 break;
20175 if (prev && LABEL_P (prev))
20176 {
20177 edge e;
20178 edge_iterator ei;
20179
20180 FOR_EACH_EDGE (e, ei, bb->preds)
20181 if (EDGE_FREQUENCY (e) && e->src->index >= 0
20182 && !(e->flags & EDGE_FALLTHRU))
20183 replace = true;
20184 }
20185 if (!replace)
20186 {
20187 prev = prev_active_insn (ret);
20188 if (prev
20189 && ((JUMP_P (prev) && any_condjump_p (prev))
20190 || CALL_P (prev)))
20191 replace = true;
20192 /* Empty functions get branch mispredict even when the jump destination
20193 is not visible to us. */
20194 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20195 replace = true;
20196 }
20197 if (replace)
20198 {
20199 emit_insn_before (gen_return_internal_long (), ret);
20200 delete_insn (ret);
20201 }
20202 }
20203 }
20204
20205 /* Implement machine specific optimizations. We implement padding of returns
20206 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20207 static void
20208 ix86_reorg (void)
20209 {
20210 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20211 ix86_pad_returns ();
20212 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20213 ix86_avoid_jump_misspredicts ();
20214 }
20215
20216 /* Return nonzero when QImode register that must be represented via REX prefix
20217 is used. */
20218 bool
20219 x86_extended_QIreg_mentioned_p (rtx insn)
20220 {
20221 int i;
20222 extract_insn_cached (insn);
20223 for (i = 0; i < recog_data.n_operands; i++)
20224 if (REG_P (recog_data.operand[i])
20225 && REGNO (recog_data.operand[i]) >= 4)
20226 return true;
20227 return false;
20228 }
20229
20230 /* Return nonzero when P points to register encoded via REX prefix.
20231 Called via for_each_rtx. */
20232 static int
20233 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20234 {
20235 unsigned int regno;
20236 if (!REG_P (*p))
20237 return 0;
20238 regno = REGNO (*p);
20239 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20240 }
20241
20242 /* Return true when INSN mentions register that must be encoded using REX
20243 prefix. */
20244 bool
20245 x86_extended_reg_mentioned_p (rtx insn)
20246 {
20247 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20248 }
20249
20250 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20251 optabs would emit if we didn't have TFmode patterns. */
20252
20253 void
20254 x86_emit_floatuns (rtx operands[2])
20255 {
20256 rtx neglab, donelab, i0, i1, f0, in, out;
20257 enum machine_mode mode, inmode;
20258
20259 inmode = GET_MODE (operands[1]);
20260 gcc_assert (inmode == SImode || inmode == DImode);
20261
20262 out = operands[0];
20263 in = force_reg (inmode, operands[1]);
20264 mode = GET_MODE (out);
20265 neglab = gen_label_rtx ();
20266 donelab = gen_label_rtx ();
20267 f0 = gen_reg_rtx (mode);
20268
20269 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20270
20271 expand_float (out, in, 0);
20272
20273 emit_jump_insn (gen_jump (donelab));
20274 emit_barrier ();
20275
20276 emit_label (neglab);
20277
20278 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20279 1, OPTAB_DIRECT);
20280 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20281 1, OPTAB_DIRECT);
20282 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20283
20284 expand_float (f0, i0, 0);
20285
20286 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20287
20288 emit_label (donelab);
20289 }
20290 \f
20291 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20292 with all elements equal to VAR. Return true if successful. */
20293
20294 static bool
20295 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20296 rtx target, rtx val)
20297 {
20298 enum machine_mode smode, wsmode, wvmode;
20299 rtx x;
20300
20301 switch (mode)
20302 {
20303 case V2SImode:
20304 case V2SFmode:
20305 if (!mmx_ok)
20306 return false;
20307 /* FALLTHRU */
20308
20309 case V2DFmode:
20310 case V2DImode:
20311 case V4SFmode:
20312 case V4SImode:
20313 val = force_reg (GET_MODE_INNER (mode), val);
20314 x = gen_rtx_VEC_DUPLICATE (mode, val);
20315 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20316 return true;
20317
20318 case V4HImode:
20319 if (!mmx_ok)
20320 return false;
20321 if (TARGET_SSE || TARGET_3DNOW_A)
20322 {
20323 val = gen_lowpart (SImode, val);
20324 x = gen_rtx_TRUNCATE (HImode, val);
20325 x = gen_rtx_VEC_DUPLICATE (mode, x);
20326 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20327 return true;
20328 }
20329 else
20330 {
20331 smode = HImode;
20332 wsmode = SImode;
20333 wvmode = V2SImode;
20334 goto widen;
20335 }
20336
20337 case V8QImode:
20338 if (!mmx_ok)
20339 return false;
20340 smode = QImode;
20341 wsmode = HImode;
20342 wvmode = V4HImode;
20343 goto widen;
20344 case V8HImode:
20345 if (TARGET_SSE2)
20346 {
20347 rtx tmp1, tmp2;
20348 /* Extend HImode to SImode using a paradoxical SUBREG. */
20349 tmp1 = gen_reg_rtx (SImode);
20350 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20351 /* Insert the SImode value as low element of V4SImode vector. */
20352 tmp2 = gen_reg_rtx (V4SImode);
20353 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20354 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20355 CONST0_RTX (V4SImode),
20356 const1_rtx);
20357 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20358 /* Cast the V4SImode vector back to a V8HImode vector. */
20359 tmp1 = gen_reg_rtx (V8HImode);
20360 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20361 /* Duplicate the low short through the whole low SImode word. */
20362 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20363 /* Cast the V8HImode vector back to a V4SImode vector. */
20364 tmp2 = gen_reg_rtx (V4SImode);
20365 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20366 /* Replicate the low element of the V4SImode vector. */
20367 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20368 /* Cast the V2SImode back to V8HImode, and store in target. */
20369 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20370 return true;
20371 }
20372 smode = HImode;
20373 wsmode = SImode;
20374 wvmode = V4SImode;
20375 goto widen;
20376 case V16QImode:
20377 if (TARGET_SSE2)
20378 {
20379 rtx tmp1, tmp2;
20380 /* Extend QImode to SImode using a paradoxical SUBREG. */
20381 tmp1 = gen_reg_rtx (SImode);
20382 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20383 /* Insert the SImode value as low element of V4SImode vector. */
20384 tmp2 = gen_reg_rtx (V4SImode);
20385 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20386 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20387 CONST0_RTX (V4SImode),
20388 const1_rtx);
20389 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20390 /* Cast the V4SImode vector back to a V16QImode vector. */
20391 tmp1 = gen_reg_rtx (V16QImode);
20392 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20393 /* Duplicate the low byte through the whole low SImode word. */
20394 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20395 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20396 /* Cast the V16QImode vector back to a V4SImode vector. */
20397 tmp2 = gen_reg_rtx (V4SImode);
20398 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20399 /* Replicate the low element of the V4SImode vector. */
20400 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20401 /* Cast the V2SImode back to V16QImode, and store in target. */
20402 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20403 return true;
20404 }
20405 smode = QImode;
20406 wsmode = HImode;
20407 wvmode = V8HImode;
20408 goto widen;
20409 widen:
20410 /* Replicate the value once into the next wider mode and recurse. */
20411 val = convert_modes (wsmode, smode, val, true);
20412 x = expand_simple_binop (wsmode, ASHIFT, val,
20413 GEN_INT (GET_MODE_BITSIZE (smode)),
20414 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20415 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20416
20417 x = gen_reg_rtx (wvmode);
20418 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20419 gcc_unreachable ();
20420 emit_move_insn (target, gen_lowpart (mode, x));
20421 return true;
20422
20423 default:
20424 return false;
20425 }
20426 }
20427
20428 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20429 whose ONE_VAR element is VAR, and other elements are zero. Return true
20430 if successful. */
20431
20432 static bool
20433 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20434 rtx target, rtx var, int one_var)
20435 {
20436 enum machine_mode vsimode;
20437 rtx new_target;
20438 rtx x, tmp;
20439
20440 switch (mode)
20441 {
20442 case V2SFmode:
20443 case V2SImode:
20444 if (!mmx_ok)
20445 return false;
20446 /* FALLTHRU */
20447
20448 case V2DFmode:
20449 case V2DImode:
20450 if (one_var != 0)
20451 return false;
20452 var = force_reg (GET_MODE_INNER (mode), var);
20453 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20454 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20455 return true;
20456
20457 case V4SFmode:
20458 case V4SImode:
20459 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20460 new_target = gen_reg_rtx (mode);
20461 else
20462 new_target = target;
20463 var = force_reg (GET_MODE_INNER (mode), var);
20464 x = gen_rtx_VEC_DUPLICATE (mode, var);
20465 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20466 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20467 if (one_var != 0)
20468 {
20469 /* We need to shuffle the value to the correct position, so
20470 create a new pseudo to store the intermediate result. */
20471
20472 /* With SSE2, we can use the integer shuffle insns. */
20473 if (mode != V4SFmode && TARGET_SSE2)
20474 {
20475 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20476 GEN_INT (1),
20477 GEN_INT (one_var == 1 ? 0 : 1),
20478 GEN_INT (one_var == 2 ? 0 : 1),
20479 GEN_INT (one_var == 3 ? 0 : 1)));
20480 if (target != new_target)
20481 emit_move_insn (target, new_target);
20482 return true;
20483 }
20484
20485 /* Otherwise convert the intermediate result to V4SFmode and
20486 use the SSE1 shuffle instructions. */
20487 if (mode != V4SFmode)
20488 {
20489 tmp = gen_reg_rtx (V4SFmode);
20490 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20491 }
20492 else
20493 tmp = new_target;
20494
20495 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20496 GEN_INT (1),
20497 GEN_INT (one_var == 1 ? 0 : 1),
20498 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20499 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20500
20501 if (mode != V4SFmode)
20502 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20503 else if (tmp != target)
20504 emit_move_insn (target, tmp);
20505 }
20506 else if (target != new_target)
20507 emit_move_insn (target, new_target);
20508 return true;
20509
20510 case V8HImode:
20511 case V16QImode:
20512 vsimode = V4SImode;
20513 goto widen;
20514 case V4HImode:
20515 case V8QImode:
20516 if (!mmx_ok)
20517 return false;
20518 vsimode = V2SImode;
20519 goto widen;
20520 widen:
20521 if (one_var != 0)
20522 return false;
20523
20524 /* Zero extend the variable element to SImode and recurse. */
20525 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20526
20527 x = gen_reg_rtx (vsimode);
20528 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20529 var, one_var))
20530 gcc_unreachable ();
20531
20532 emit_move_insn (target, gen_lowpart (mode, x));
20533 return true;
20534
20535 default:
20536 return false;
20537 }
20538 }
20539
20540 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20541 consisting of the values in VALS. It is known that all elements
20542 except ONE_VAR are constants. Return true if successful. */
20543
20544 static bool
20545 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20546 rtx target, rtx vals, int one_var)
20547 {
20548 rtx var = XVECEXP (vals, 0, one_var);
20549 enum machine_mode wmode;
20550 rtx const_vec, x;
20551
20552 const_vec = copy_rtx (vals);
20553 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20554 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20555
20556 switch (mode)
20557 {
20558 case V2DFmode:
20559 case V2DImode:
20560 case V2SFmode:
20561 case V2SImode:
20562 /* For the two element vectors, it's just as easy to use
20563 the general case. */
20564 return false;
20565
20566 case V4SFmode:
20567 case V4SImode:
20568 case V8HImode:
20569 case V4HImode:
20570 break;
20571
20572 case V16QImode:
20573 wmode = V8HImode;
20574 goto widen;
20575 case V8QImode:
20576 wmode = V4HImode;
20577 goto widen;
20578 widen:
20579 /* There's no way to set one QImode entry easily. Combine
20580 the variable value with its adjacent constant value, and
20581 promote to an HImode set. */
20582 x = XVECEXP (vals, 0, one_var ^ 1);
20583 if (one_var & 1)
20584 {
20585 var = convert_modes (HImode, QImode, var, true);
20586 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20587 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20588 x = GEN_INT (INTVAL (x) & 0xff);
20589 }
20590 else
20591 {
20592 var = convert_modes (HImode, QImode, var, true);
20593 x = gen_int_mode (INTVAL (x) << 8, HImode);
20594 }
20595 if (x != const0_rtx)
20596 var = expand_simple_binop (HImode, IOR, var, x, var,
20597 1, OPTAB_LIB_WIDEN);
20598
20599 x = gen_reg_rtx (wmode);
20600 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20601 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20602
20603 emit_move_insn (target, gen_lowpart (mode, x));
20604 return true;
20605
20606 default:
20607 return false;
20608 }
20609
20610 emit_move_insn (target, const_vec);
20611 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20612 return true;
20613 }
20614
20615 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20616 all values variable, and none identical. */
20617
20618 static void
20619 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20620 rtx target, rtx vals)
20621 {
20622 enum machine_mode half_mode = GET_MODE_INNER (mode);
20623 rtx op0 = NULL, op1 = NULL;
20624 bool use_vec_concat = false;
20625
20626 switch (mode)
20627 {
20628 case V2SFmode:
20629 case V2SImode:
20630 if (!mmx_ok && !TARGET_SSE)
20631 break;
20632 /* FALLTHRU */
20633
20634 case V2DFmode:
20635 case V2DImode:
20636 /* For the two element vectors, we always implement VEC_CONCAT. */
20637 op0 = XVECEXP (vals, 0, 0);
20638 op1 = XVECEXP (vals, 0, 1);
20639 use_vec_concat = true;
20640 break;
20641
20642 case V4SFmode:
20643 half_mode = V2SFmode;
20644 goto half;
20645 case V4SImode:
20646 half_mode = V2SImode;
20647 goto half;
20648 half:
20649 {
20650 rtvec v;
20651
20652 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20653 Recurse to load the two halves. */
20654
20655 op0 = gen_reg_rtx (half_mode);
20656 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20657 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20658
20659 op1 = gen_reg_rtx (half_mode);
20660 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20661 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20662
20663 use_vec_concat = true;
20664 }
20665 break;
20666
20667 case V8HImode:
20668 case V16QImode:
20669 case V4HImode:
20670 case V8QImode:
20671 break;
20672
20673 default:
20674 gcc_unreachable ();
20675 }
20676
20677 if (use_vec_concat)
20678 {
20679 if (!register_operand (op0, half_mode))
20680 op0 = force_reg (half_mode, op0);
20681 if (!register_operand (op1, half_mode))
20682 op1 = force_reg (half_mode, op1);
20683
20684 emit_insn (gen_rtx_SET (VOIDmode, target,
20685 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20686 }
20687 else
20688 {
20689 int i, j, n_elts, n_words, n_elt_per_word;
20690 enum machine_mode inner_mode;
20691 rtx words[4], shift;
20692
20693 inner_mode = GET_MODE_INNER (mode);
20694 n_elts = GET_MODE_NUNITS (mode);
20695 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20696 n_elt_per_word = n_elts / n_words;
20697 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20698
20699 for (i = 0; i < n_words; ++i)
20700 {
20701 rtx word = NULL_RTX;
20702
20703 for (j = 0; j < n_elt_per_word; ++j)
20704 {
20705 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20706 elt = convert_modes (word_mode, inner_mode, elt, true);
20707
20708 if (j == 0)
20709 word = elt;
20710 else
20711 {
20712 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20713 word, 1, OPTAB_LIB_WIDEN);
20714 word = expand_simple_binop (word_mode, IOR, word, elt,
20715 word, 1, OPTAB_LIB_WIDEN);
20716 }
20717 }
20718
20719 words[i] = word;
20720 }
20721
20722 if (n_words == 1)
20723 emit_move_insn (target, gen_lowpart (mode, words[0]));
20724 else if (n_words == 2)
20725 {
20726 rtx tmp = gen_reg_rtx (mode);
20727 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20728 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20729 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20730 emit_move_insn (target, tmp);
20731 }
20732 else if (n_words == 4)
20733 {
20734 rtx tmp = gen_reg_rtx (V4SImode);
20735 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20736 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20737 emit_move_insn (target, gen_lowpart (mode, tmp));
20738 }
20739 else
20740 gcc_unreachable ();
20741 }
20742 }
20743
20744 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20745 instructions unless MMX_OK is true. */
20746
20747 void
20748 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20749 {
20750 enum machine_mode mode = GET_MODE (target);
20751 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20752 int n_elts = GET_MODE_NUNITS (mode);
20753 int n_var = 0, one_var = -1;
20754 bool all_same = true, all_const_zero = true;
20755 int i;
20756 rtx x;
20757
20758 for (i = 0; i < n_elts; ++i)
20759 {
20760 x = XVECEXP (vals, 0, i);
20761 if (!CONSTANT_P (x))
20762 n_var++, one_var = i;
20763 else if (x != CONST0_RTX (inner_mode))
20764 all_const_zero = false;
20765 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20766 all_same = false;
20767 }
20768
20769 /* Constants are best loaded from the constant pool. */
20770 if (n_var == 0)
20771 {
20772 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20773 return;
20774 }
20775
20776 /* If all values are identical, broadcast the value. */
20777 if (all_same
20778 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20779 XVECEXP (vals, 0, 0)))
20780 return;
20781
20782 /* Values where only one field is non-constant are best loaded from
20783 the pool and overwritten via move later. */
20784 if (n_var == 1)
20785 {
20786 if (all_const_zero
20787 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20788 XVECEXP (vals, 0, one_var),
20789 one_var))
20790 return;
20791
20792 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20793 return;
20794 }
20795
20796 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20797 }
20798
20799 void
20800 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20801 {
20802 enum machine_mode mode = GET_MODE (target);
20803 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20804 bool use_vec_merge = false;
20805 rtx tmp;
20806
20807 switch (mode)
20808 {
20809 case V2SFmode:
20810 case V2SImode:
20811 if (mmx_ok)
20812 {
20813 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20814 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20815 if (elt == 0)
20816 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20817 else
20818 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20819 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20820 return;
20821 }
20822 break;
20823
20824 case V2DFmode:
20825 case V2DImode:
20826 {
20827 rtx op0, op1;
20828
20829 /* For the two element vectors, we implement a VEC_CONCAT with
20830 the extraction of the other element. */
20831
20832 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20833 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20834
20835 if (elt == 0)
20836 op0 = val, op1 = tmp;
20837 else
20838 op0 = tmp, op1 = val;
20839
20840 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20841 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20842 }
20843 return;
20844
20845 case V4SFmode:
20846 switch (elt)
20847 {
20848 case 0:
20849 use_vec_merge = true;
20850 break;
20851
20852 case 1:
20853 /* tmp = target = A B C D */
20854 tmp = copy_to_reg (target);
20855 /* target = A A B B */
20856 emit_insn (gen_sse_unpcklps (target, target, target));
20857 /* target = X A B B */
20858 ix86_expand_vector_set (false, target, val, 0);
20859 /* target = A X C D */
20860 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20861 GEN_INT (1), GEN_INT (0),
20862 GEN_INT (2+4), GEN_INT (3+4)));
20863 return;
20864
20865 case 2:
20866 /* tmp = target = A B C D */
20867 tmp = copy_to_reg (target);
20868 /* tmp = X B C D */
20869 ix86_expand_vector_set (false, tmp, val, 0);
20870 /* target = A B X D */
20871 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20872 GEN_INT (0), GEN_INT (1),
20873 GEN_INT (0+4), GEN_INT (3+4)));
20874 return;
20875
20876 case 3:
20877 /* tmp = target = A B C D */
20878 tmp = copy_to_reg (target);
20879 /* tmp = X B C D */
20880 ix86_expand_vector_set (false, tmp, val, 0);
20881 /* target = A B X D */
20882 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20883 GEN_INT (0), GEN_INT (1),
20884 GEN_INT (2+4), GEN_INT (0+4)));
20885 return;
20886
20887 default:
20888 gcc_unreachable ();
20889 }
20890 break;
20891
20892 case V4SImode:
20893 /* Element 0 handled by vec_merge below. */
20894 if (elt == 0)
20895 {
20896 use_vec_merge = true;
20897 break;
20898 }
20899
20900 if (TARGET_SSE2)
20901 {
20902 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20903 store into element 0, then shuffle them back. */
20904
20905 rtx order[4];
20906
20907 order[0] = GEN_INT (elt);
20908 order[1] = const1_rtx;
20909 order[2] = const2_rtx;
20910 order[3] = GEN_INT (3);
20911 order[elt] = const0_rtx;
20912
20913 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20914 order[1], order[2], order[3]));
20915
20916 ix86_expand_vector_set (false, target, val, 0);
20917
20918 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20919 order[1], order[2], order[3]));
20920 }
20921 else
20922 {
20923 /* For SSE1, we have to reuse the V4SF code. */
20924 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20925 gen_lowpart (SFmode, val), elt);
20926 }
20927 return;
20928
20929 case V8HImode:
20930 use_vec_merge = TARGET_SSE2;
20931 break;
20932 case V4HImode:
20933 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20934 break;
20935
20936 case V16QImode:
20937 case V8QImode:
20938 default:
20939 break;
20940 }
20941
20942 if (use_vec_merge)
20943 {
20944 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20945 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20946 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20947 }
20948 else
20949 {
20950 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20951
20952 emit_move_insn (mem, target);
20953
20954 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20955 emit_move_insn (tmp, val);
20956
20957 emit_move_insn (target, mem);
20958 }
20959 }
20960
20961 void
20962 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20963 {
20964 enum machine_mode mode = GET_MODE (vec);
20965 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20966 bool use_vec_extr = false;
20967 rtx tmp;
20968
20969 switch (mode)
20970 {
20971 case V2SImode:
20972 case V2SFmode:
20973 if (!mmx_ok)
20974 break;
20975 /* FALLTHRU */
20976
20977 case V2DFmode:
20978 case V2DImode:
20979 use_vec_extr = true;
20980 break;
20981
20982 case V4SFmode:
20983 switch (elt)
20984 {
20985 case 0:
20986 tmp = vec;
20987 break;
20988
20989 case 1:
20990 case 3:
20991 tmp = gen_reg_rtx (mode);
20992 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20993 GEN_INT (elt), GEN_INT (elt),
20994 GEN_INT (elt+4), GEN_INT (elt+4)));
20995 break;
20996
20997 case 2:
20998 tmp = gen_reg_rtx (mode);
20999 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
21000 break;
21001
21002 default:
21003 gcc_unreachable ();
21004 }
21005 vec = tmp;
21006 use_vec_extr = true;
21007 elt = 0;
21008 break;
21009
21010 case V4SImode:
21011 if (TARGET_SSE2)
21012 {
21013 switch (elt)
21014 {
21015 case 0:
21016 tmp = vec;
21017 break;
21018
21019 case 1:
21020 case 3:
21021 tmp = gen_reg_rtx (mode);
21022 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
21023 GEN_INT (elt), GEN_INT (elt),
21024 GEN_INT (elt), GEN_INT (elt)));
21025 break;
21026
21027 case 2:
21028 tmp = gen_reg_rtx (mode);
21029 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
21030 break;
21031
21032 default:
21033 gcc_unreachable ();
21034 }
21035 vec = tmp;
21036 use_vec_extr = true;
21037 elt = 0;
21038 }
21039 else
21040 {
21041 /* For SSE1, we have to reuse the V4SF code. */
21042 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
21043 gen_lowpart (V4SFmode, vec), elt);
21044 return;
21045 }
21046 break;
21047
21048 case V8HImode:
21049 use_vec_extr = TARGET_SSE2;
21050 break;
21051 case V4HImode:
21052 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
21053 break;
21054
21055 case V16QImode:
21056 case V8QImode:
21057 /* ??? Could extract the appropriate HImode element and shift. */
21058 default:
21059 break;
21060 }
21061
21062 if (use_vec_extr)
21063 {
21064 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
21065 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
21066
21067 /* Let the rtl optimizers know about the zero extension performed. */
21068 if (inner_mode == HImode)
21069 {
21070 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
21071 target = gen_lowpart (SImode, target);
21072 }
21073
21074 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
21075 }
21076 else
21077 {
21078 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
21079
21080 emit_move_insn (mem, vec);
21081
21082 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
21083 emit_move_insn (target, tmp);
21084 }
21085 }
21086
21087 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
21088 pattern to reduce; DEST is the destination; IN is the input vector. */
21089
21090 void
21091 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
21092 {
21093 rtx tmp1, tmp2, tmp3;
21094
21095 tmp1 = gen_reg_rtx (V4SFmode);
21096 tmp2 = gen_reg_rtx (V4SFmode);
21097 tmp3 = gen_reg_rtx (V4SFmode);
21098
21099 emit_insn (gen_sse_movhlps (tmp1, in, in));
21100 emit_insn (fn (tmp2, tmp1, in));
21101
21102 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
21103 GEN_INT (1), GEN_INT (1),
21104 GEN_INT (1+4), GEN_INT (1+4)));
21105 emit_insn (fn (dest, tmp2, tmp3));
21106 }
21107 \f
21108 /* Target hook for scalar_mode_supported_p. */
21109 static bool
21110 ix86_scalar_mode_supported_p (enum machine_mode mode)
21111 {
21112 if (DECIMAL_FLOAT_MODE_P (mode))
21113 return true;
21114 else
21115 return default_scalar_mode_supported_p (mode);
21116 }
21117
21118 /* Implements target hook vector_mode_supported_p. */
21119 static bool
21120 ix86_vector_mode_supported_p (enum machine_mode mode)
21121 {
21122 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
21123 return true;
21124 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
21125 return true;
21126 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
21127 return true;
21128 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
21129 return true;
21130 return false;
21131 }
21132
21133 /* Worker function for TARGET_MD_ASM_CLOBBERS.
21134
21135 We do this in the new i386 backend to maintain source compatibility
21136 with the old cc0-based compiler. */
21137
21138 static tree
21139 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
21140 tree inputs ATTRIBUTE_UNUSED,
21141 tree clobbers)
21142 {
21143 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
21144 clobbers);
21145 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
21146 clobbers);
21147 return clobbers;
21148 }
21149
21150 /* Implements target vector targetm.asm.encode_section_info. This
21151 is not used by netware. */
21152
21153 static void ATTRIBUTE_UNUSED
21154 ix86_encode_section_info (tree decl, rtx rtl, int first)
21155 {
21156 default_encode_section_info (decl, rtl, first);
21157
21158 if (TREE_CODE (decl) == VAR_DECL
21159 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
21160 && ix86_in_large_data_p (decl))
21161 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
21162 }
21163
21164 /* Worker function for REVERSE_CONDITION. */
21165
21166 enum rtx_code
21167 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21168 {
21169 return (mode != CCFPmode && mode != CCFPUmode
21170 ? reverse_condition (code)
21171 : reverse_condition_maybe_unordered (code));
21172 }
21173
21174 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21175 to OPERANDS[0]. */
21176
21177 const char *
21178 output_387_reg_move (rtx insn, rtx *operands)
21179 {
21180 if (REG_P (operands[0]))
21181 {
21182 if (REG_P (operands[1])
21183 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21184 {
21185 if (REGNO (operands[0]) == FIRST_STACK_REG)
21186 return output_387_ffreep (operands, 0);
21187 return "fstp\t%y0";
21188 }
21189 if (STACK_TOP_P (operands[0]))
21190 return "fld%z1\t%y1";
21191 return "fst\t%y0";
21192 }
21193 else if (MEM_P (operands[0]))
21194 {
21195 gcc_assert (REG_P (operands[1]));
21196 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21197 return "fstp%z0\t%y0";
21198 else
21199 {
21200 /* There is no non-popping store to memory for XFmode.
21201 So if we need one, follow the store with a load. */
21202 if (GET_MODE (operands[0]) == XFmode)
21203 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
21204 else
21205 return "fst%z0\t%y0";
21206 }
21207 }
21208 else
21209 gcc_unreachable();
21210 }
21211
21212 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21213 FP status register is set. */
21214
21215 void
21216 ix86_emit_fp_unordered_jump (rtx label)
21217 {
21218 rtx reg = gen_reg_rtx (HImode);
21219 rtx temp;
21220
21221 emit_insn (gen_x86_fnstsw_1 (reg));
21222
21223 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21224 {
21225 emit_insn (gen_x86_sahf_1 (reg));
21226
21227 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21228 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21229 }
21230 else
21231 {
21232 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21233
21234 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21235 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21236 }
21237
21238 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21239 gen_rtx_LABEL_REF (VOIDmode, label),
21240 pc_rtx);
21241 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21242
21243 emit_jump_insn (temp);
21244 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21245 }
21246
21247 /* Output code to perform a log1p XFmode calculation. */
21248
21249 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21250 {
21251 rtx label1 = gen_label_rtx ();
21252 rtx label2 = gen_label_rtx ();
21253
21254 rtx tmp = gen_reg_rtx (XFmode);
21255 rtx tmp2 = gen_reg_rtx (XFmode);
21256
21257 emit_insn (gen_absxf2 (tmp, op1));
21258 emit_insn (gen_cmpxf (tmp,
21259 CONST_DOUBLE_FROM_REAL_VALUE (
21260 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21261 XFmode)));
21262 emit_jump_insn (gen_bge (label1));
21263
21264 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21265 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21266 emit_jump (label2);
21267
21268 emit_label (label1);
21269 emit_move_insn (tmp, CONST1_RTX (XFmode));
21270 emit_insn (gen_addxf3 (tmp, op1, tmp));
21271 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21272 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21273
21274 emit_label (label2);
21275 }
21276
21277 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21278
21279 static void ATTRIBUTE_UNUSED
21280 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21281 tree decl)
21282 {
21283 /* With Binutils 2.15, the "@unwind" marker must be specified on
21284 every occurrence of the ".eh_frame" section, not just the first
21285 one. */
21286 if (TARGET_64BIT
21287 && strcmp (name, ".eh_frame") == 0)
21288 {
21289 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21290 flags & SECTION_WRITE ? "aw" : "a");
21291 return;
21292 }
21293 default_elf_asm_named_section (name, flags, decl);
21294 }
21295
21296 /* Return the mangling of TYPE if it is an extended fundamental type. */
21297
21298 static const char *
21299 ix86_mangle_fundamental_type (tree type)
21300 {
21301 switch (TYPE_MODE (type))
21302 {
21303 case TFmode:
21304 /* __float128 is "g". */
21305 return "g";
21306 case XFmode:
21307 /* "long double" or __float80 is "e". */
21308 return "e";
21309 default:
21310 return NULL;
21311 }
21312 }
21313
21314 /* For 32-bit code we can save PIC register setup by using
21315 __stack_chk_fail_local hidden function instead of calling
21316 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21317 register, so it is better to call __stack_chk_fail directly. */
21318
21319 static tree
21320 ix86_stack_protect_fail (void)
21321 {
21322 return TARGET_64BIT
21323 ? default_external_stack_protect_fail ()
21324 : default_hidden_stack_protect_fail ();
21325 }
21326
21327 /* Select a format to encode pointers in exception handling data. CODE
21328 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21329 true if the symbol may be affected by dynamic relocations.
21330
21331 ??? All x86 object file formats are capable of representing this.
21332 After all, the relocation needed is the same as for the call insn.
21333 Whether or not a particular assembler allows us to enter such, I
21334 guess we'll have to see. */
21335 int
21336 asm_preferred_eh_data_format (int code, int global)
21337 {
21338 if (flag_pic)
21339 {
21340 int type = DW_EH_PE_sdata8;
21341 if (!TARGET_64BIT
21342 || ix86_cmodel == CM_SMALL_PIC
21343 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21344 type = DW_EH_PE_sdata4;
21345 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21346 }
21347 if (ix86_cmodel == CM_SMALL
21348 || (ix86_cmodel == CM_MEDIUM && code))
21349 return DW_EH_PE_udata4;
21350 return DW_EH_PE_absptr;
21351 }
21352 \f
21353 /* Expand copysign from SIGN to the positive value ABS_VALUE
21354 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21355 the sign-bit. */
21356 static void
21357 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21358 {
21359 enum machine_mode mode = GET_MODE (sign);
21360 rtx sgn = gen_reg_rtx (mode);
21361 if (mask == NULL_RTX)
21362 {
21363 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21364 if (!VECTOR_MODE_P (mode))
21365 {
21366 /* We need to generate a scalar mode mask in this case. */
21367 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21368 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21369 mask = gen_reg_rtx (mode);
21370 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21371 }
21372 }
21373 else
21374 mask = gen_rtx_NOT (mode, mask);
21375 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21376 gen_rtx_AND (mode, mask, sign)));
21377 emit_insn (gen_rtx_SET (VOIDmode, result,
21378 gen_rtx_IOR (mode, abs_value, sgn)));
21379 }
21380
21381 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21382 mask for masking out the sign-bit is stored in *SMASK, if that is
21383 non-null. */
21384 static rtx
21385 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21386 {
21387 enum machine_mode mode = GET_MODE (op0);
21388 rtx xa, mask;
21389
21390 xa = gen_reg_rtx (mode);
21391 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21392 if (!VECTOR_MODE_P (mode))
21393 {
21394 /* We need to generate a scalar mode mask in this case. */
21395 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21396 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21397 mask = gen_reg_rtx (mode);
21398 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21399 }
21400 emit_insn (gen_rtx_SET (VOIDmode, xa,
21401 gen_rtx_AND (mode, op0, mask)));
21402
21403 if (smask)
21404 *smask = mask;
21405
21406 return xa;
21407 }
21408
21409 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21410 swapping the operands if SWAP_OPERANDS is true. The expanded
21411 code is a forward jump to a newly created label in case the
21412 comparison is true. The generated label rtx is returned. */
21413 static rtx
21414 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21415 bool swap_operands)
21416 {
21417 rtx label, tmp;
21418
21419 if (swap_operands)
21420 {
21421 tmp = op0;
21422 op0 = op1;
21423 op1 = tmp;
21424 }
21425
21426 label = gen_label_rtx ();
21427 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21428 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21429 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21430 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21431 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21432 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21433 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21434 JUMP_LABEL (tmp) = label;
21435
21436 return label;
21437 }
21438
21439 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21440 using comparison code CODE. Operands are swapped for the comparison if
21441 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21442 static rtx
21443 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21444 bool swap_operands)
21445 {
21446 enum machine_mode mode = GET_MODE (op0);
21447 rtx mask = gen_reg_rtx (mode);
21448
21449 if (swap_operands)
21450 {
21451 rtx tmp = op0;
21452 op0 = op1;
21453 op1 = tmp;
21454 }
21455
21456 if (mode == DFmode)
21457 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21458 gen_rtx_fmt_ee (code, mode, op0, op1)));
21459 else
21460 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21461 gen_rtx_fmt_ee (code, mode, op0, op1)));
21462
21463 return mask;
21464 }
21465
21466 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21467 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21468 static rtx
21469 ix86_gen_TWO52 (enum machine_mode mode)
21470 {
21471 REAL_VALUE_TYPE TWO52r;
21472 rtx TWO52;
21473
21474 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21475 TWO52 = const_double_from_real_value (TWO52r, mode);
21476 TWO52 = force_reg (mode, TWO52);
21477
21478 return TWO52;
21479 }
21480
21481 /* Expand SSE sequence for computing lround from OP1 storing
21482 into OP0. */
21483 void
21484 ix86_expand_lround (rtx op0, rtx op1)
21485 {
21486 /* C code for the stuff we're doing below:
21487 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21488 return (long)tmp;
21489 */
21490 enum machine_mode mode = GET_MODE (op1);
21491 const struct real_format *fmt;
21492 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21493 rtx adj;
21494
21495 /* load nextafter (0.5, 0.0) */
21496 fmt = REAL_MODE_FORMAT (mode);
21497 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21498 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21499
21500 /* adj = copysign (0.5, op1) */
21501 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21502 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21503
21504 /* adj = op1 + adj */
21505 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21506
21507 /* op0 = (imode)adj */
21508 expand_fix (op0, adj, 0);
21509 }
21510
21511 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21512 into OPERAND0. */
21513 void
21514 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21515 {
21516 /* C code for the stuff we're doing below (for do_floor):
21517 xi = (long)op1;
21518 xi -= (double)xi > op1 ? 1 : 0;
21519 return xi;
21520 */
21521 enum machine_mode fmode = GET_MODE (op1);
21522 enum machine_mode imode = GET_MODE (op0);
21523 rtx ireg, freg, label, tmp;
21524
21525 /* reg = (long)op1 */
21526 ireg = gen_reg_rtx (imode);
21527 expand_fix (ireg, op1, 0);
21528
21529 /* freg = (double)reg */
21530 freg = gen_reg_rtx (fmode);
21531 expand_float (freg, ireg, 0);
21532
21533 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21534 label = ix86_expand_sse_compare_and_jump (UNLE,
21535 freg, op1, !do_floor);
21536 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21537 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21538 emit_move_insn (ireg, tmp);
21539
21540 emit_label (label);
21541 LABEL_NUSES (label) = 1;
21542
21543 emit_move_insn (op0, ireg);
21544 }
21545
21546 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21547 result in OPERAND0. */
21548 void
21549 ix86_expand_rint (rtx operand0, rtx operand1)
21550 {
21551 /* C code for the stuff we're doing below:
21552 xa = fabs (operand1);
21553 if (!isless (xa, 2**52))
21554 return operand1;
21555 xa = xa + 2**52 - 2**52;
21556 return copysign (xa, operand1);
21557 */
21558 enum machine_mode mode = GET_MODE (operand0);
21559 rtx res, xa, label, TWO52, mask;
21560
21561 res = gen_reg_rtx (mode);
21562 emit_move_insn (res, operand1);
21563
21564 /* xa = abs (operand1) */
21565 xa = ix86_expand_sse_fabs (res, &mask);
21566
21567 /* if (!isless (xa, TWO52)) goto label; */
21568 TWO52 = ix86_gen_TWO52 (mode);
21569 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21570
21571 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21572 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21573
21574 ix86_sse_copysign_to_positive (res, xa, res, mask);
21575
21576 emit_label (label);
21577 LABEL_NUSES (label) = 1;
21578
21579 emit_move_insn (operand0, res);
21580 }
21581
21582 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21583 into OPERAND0. */
21584 void
21585 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21586 {
21587 /* C code for the stuff we expand below.
21588 double xa = fabs (x), x2;
21589 if (!isless (xa, TWO52))
21590 return x;
21591 xa = xa + TWO52 - TWO52;
21592 x2 = copysign (xa, x);
21593 Compensate. Floor:
21594 if (x2 > x)
21595 x2 -= 1;
21596 Compensate. Ceil:
21597 if (x2 < x)
21598 x2 -= -1;
21599 return x2;
21600 */
21601 enum machine_mode mode = GET_MODE (operand0);
21602 rtx xa, TWO52, tmp, label, one, res, mask;
21603
21604 TWO52 = ix86_gen_TWO52 (mode);
21605
21606 /* Temporary for holding the result, initialized to the input
21607 operand to ease control flow. */
21608 res = gen_reg_rtx (mode);
21609 emit_move_insn (res, operand1);
21610
21611 /* xa = abs (operand1) */
21612 xa = ix86_expand_sse_fabs (res, &mask);
21613
21614 /* if (!isless (xa, TWO52)) goto label; */
21615 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21616
21617 /* xa = xa + TWO52 - TWO52; */
21618 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21619 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21620
21621 /* xa = copysign (xa, operand1) */
21622 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21623
21624 /* generate 1.0 or -1.0 */
21625 one = force_reg (mode,
21626 const_double_from_real_value (do_floor
21627 ? dconst1 : dconstm1, mode));
21628
21629 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21630 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21631 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21632 gen_rtx_AND (mode, one, tmp)));
21633 /* We always need to subtract here to preserve signed zero. */
21634 tmp = expand_simple_binop (mode, MINUS,
21635 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21636 emit_move_insn (res, tmp);
21637
21638 emit_label (label);
21639 LABEL_NUSES (label) = 1;
21640
21641 emit_move_insn (operand0, res);
21642 }
21643
21644 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21645 into OPERAND0. */
21646 void
21647 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21648 {
21649 /* C code for the stuff we expand below.
21650 double xa = fabs (x), x2;
21651 if (!isless (xa, TWO52))
21652 return x;
21653 x2 = (double)(long)x;
21654 Compensate. Floor:
21655 if (x2 > x)
21656 x2 -= 1;
21657 Compensate. Ceil:
21658 if (x2 < x)
21659 x2 += 1;
21660 if (HONOR_SIGNED_ZEROS (mode))
21661 return copysign (x2, x);
21662 return x2;
21663 */
21664 enum machine_mode mode = GET_MODE (operand0);
21665 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21666
21667 TWO52 = ix86_gen_TWO52 (mode);
21668
21669 /* Temporary for holding the result, initialized to the input
21670 operand to ease control flow. */
21671 res = gen_reg_rtx (mode);
21672 emit_move_insn (res, operand1);
21673
21674 /* xa = abs (operand1) */
21675 xa = ix86_expand_sse_fabs (res, &mask);
21676
21677 /* if (!isless (xa, TWO52)) goto label; */
21678 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21679
21680 /* xa = (double)(long)x */
21681 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21682 expand_fix (xi, res, 0);
21683 expand_float (xa, xi, 0);
21684
21685 /* generate 1.0 */
21686 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21687
21688 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21689 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21690 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21691 gen_rtx_AND (mode, one, tmp)));
21692 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21693 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21694 emit_move_insn (res, tmp);
21695
21696 if (HONOR_SIGNED_ZEROS (mode))
21697 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21698
21699 emit_label (label);
21700 LABEL_NUSES (label) = 1;
21701
21702 emit_move_insn (operand0, res);
21703 }
21704
21705 /* Expand SSE sequence for computing round from OPERAND1 storing
21706 into OPERAND0. Sequence that works without relying on DImode truncation
21707 via cvttsd2siq that is only available on 64bit targets. */
21708 void
21709 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21710 {
21711 /* C code for the stuff we expand below.
21712 double xa = fabs (x), xa2, x2;
21713 if (!isless (xa, TWO52))
21714 return x;
21715 Using the absolute value and copying back sign makes
21716 -0.0 -> -0.0 correct.
21717 xa2 = xa + TWO52 - TWO52;
21718 Compensate.
21719 dxa = xa2 - xa;
21720 if (dxa <= -0.5)
21721 xa2 += 1;
21722 else if (dxa > 0.5)
21723 xa2 -= 1;
21724 x2 = copysign (xa2, x);
21725 return x2;
21726 */
21727 enum machine_mode mode = GET_MODE (operand0);
21728 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21729
21730 TWO52 = ix86_gen_TWO52 (mode);
21731
21732 /* Temporary for holding the result, initialized to the input
21733 operand to ease control flow. */
21734 res = gen_reg_rtx (mode);
21735 emit_move_insn (res, operand1);
21736
21737 /* xa = abs (operand1) */
21738 xa = ix86_expand_sse_fabs (res, &mask);
21739
21740 /* if (!isless (xa, TWO52)) goto label; */
21741 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21742
21743 /* xa2 = xa + TWO52 - TWO52; */
21744 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21745 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21746
21747 /* dxa = xa2 - xa; */
21748 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21749
21750 /* generate 0.5, 1.0 and -0.5 */
21751 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21752 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21753 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21754 0, OPTAB_DIRECT);
21755
21756 /* Compensate. */
21757 tmp = gen_reg_rtx (mode);
21758 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21759 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21760 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21761 gen_rtx_AND (mode, one, tmp)));
21762 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21763 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21764 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21765 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21766 gen_rtx_AND (mode, one, tmp)));
21767 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21768
21769 /* res = copysign (xa2, operand1) */
21770 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21771
21772 emit_label (label);
21773 LABEL_NUSES (label) = 1;
21774
21775 emit_move_insn (operand0, res);
21776 }
21777
21778 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21779 into OPERAND0. */
21780 void
21781 ix86_expand_trunc (rtx operand0, rtx operand1)
21782 {
21783 /* C code for SSE variant we expand below.
21784 double xa = fabs (x), x2;
21785 if (!isless (xa, TWO52))
21786 return x;
21787 x2 = (double)(long)x;
21788 if (HONOR_SIGNED_ZEROS (mode))
21789 return copysign (x2, x);
21790 return x2;
21791 */
21792 enum machine_mode mode = GET_MODE (operand0);
21793 rtx xa, xi, TWO52, label, res, mask;
21794
21795 TWO52 = ix86_gen_TWO52 (mode);
21796
21797 /* Temporary for holding the result, initialized to the input
21798 operand to ease control flow. */
21799 res = gen_reg_rtx (mode);
21800 emit_move_insn (res, operand1);
21801
21802 /* xa = abs (operand1) */
21803 xa = ix86_expand_sse_fabs (res, &mask);
21804
21805 /* if (!isless (xa, TWO52)) goto label; */
21806 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21807
21808 /* x = (double)(long)x */
21809 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21810 expand_fix (xi, res, 0);
21811 expand_float (res, xi, 0);
21812
21813 if (HONOR_SIGNED_ZEROS (mode))
21814 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21815
21816 emit_label (label);
21817 LABEL_NUSES (label) = 1;
21818
21819 emit_move_insn (operand0, res);
21820 }
21821
21822 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21823 into OPERAND0. */
21824 void
21825 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21826 {
21827 enum machine_mode mode = GET_MODE (operand0);
21828 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21829
21830 /* C code for SSE variant we expand below.
21831 double xa = fabs (x), x2;
21832 if (!isless (xa, TWO52))
21833 return x;
21834 xa2 = xa + TWO52 - TWO52;
21835 Compensate:
21836 if (xa2 > xa)
21837 xa2 -= 1.0;
21838 x2 = copysign (xa2, x);
21839 return x2;
21840 */
21841
21842 TWO52 = ix86_gen_TWO52 (mode);
21843
21844 /* Temporary for holding the result, initialized to the input
21845 operand to ease control flow. */
21846 res = gen_reg_rtx (mode);
21847 emit_move_insn (res, operand1);
21848
21849 /* xa = abs (operand1) */
21850 xa = ix86_expand_sse_fabs (res, &smask);
21851
21852 /* if (!isless (xa, TWO52)) goto label; */
21853 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21854
21855 /* res = xa + TWO52 - TWO52; */
21856 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21857 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21858 emit_move_insn (res, tmp);
21859
21860 /* generate 1.0 */
21861 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21862
21863 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21864 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21865 emit_insn (gen_rtx_SET (VOIDmode, mask,
21866 gen_rtx_AND (mode, mask, one)));
21867 tmp = expand_simple_binop (mode, MINUS,
21868 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21869 emit_move_insn (res, tmp);
21870
21871 /* res = copysign (res, operand1) */
21872 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21873
21874 emit_label (label);
21875 LABEL_NUSES (label) = 1;
21876
21877 emit_move_insn (operand0, res);
21878 }
21879
21880 /* Expand SSE sequence for computing round from OPERAND1 storing
21881 into OPERAND0. */
21882 void
21883 ix86_expand_round (rtx operand0, rtx operand1)
21884 {
21885 /* C code for the stuff we're doing below:
21886 double xa = fabs (x);
21887 if (!isless (xa, TWO52))
21888 return x;
21889 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21890 return copysign (xa, x);
21891 */
21892 enum machine_mode mode = GET_MODE (operand0);
21893 rtx res, TWO52, xa, label, xi, half, mask;
21894 const struct real_format *fmt;
21895 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21896
21897 /* Temporary for holding the result, initialized to the input
21898 operand to ease control flow. */
21899 res = gen_reg_rtx (mode);
21900 emit_move_insn (res, operand1);
21901
21902 TWO52 = ix86_gen_TWO52 (mode);
21903 xa = ix86_expand_sse_fabs (res, &mask);
21904 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21905
21906 /* load nextafter (0.5, 0.0) */
21907 fmt = REAL_MODE_FORMAT (mode);
21908 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21909 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21910
21911 /* xa = xa + 0.5 */
21912 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21913 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21914
21915 /* xa = (double)(int64_t)xa */
21916 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21917 expand_fix (xi, xa, 0);
21918 expand_float (xa, xi, 0);
21919
21920 /* res = copysign (xa, operand1) */
21921 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21922
21923 emit_label (label);
21924 LABEL_NUSES (label) = 1;
21925
21926 emit_move_insn (operand0, res);
21927 }
21928
21929 \f
21930 /* Table of valid machine attributes. */
21931 static const struct attribute_spec ix86_attribute_table[] =
21932 {
21933 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21934 /* Stdcall attribute says callee is responsible for popping arguments
21935 if they are not variable. */
21936 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21937 /* Fastcall attribute says callee is responsible for popping arguments
21938 if they are not variable. */
21939 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21940 /* Cdecl attribute says the callee is a normal C declaration */
21941 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21942 /* Regparm attribute specifies how many integer arguments are to be
21943 passed in registers. */
21944 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
21945 /* Sseregparm attribute says we are using x86_64 calling conventions
21946 for FP arguments. */
21947 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21948 /* force_align_arg_pointer says this function realigns the stack at entry. */
21949 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
21950 false, true, true, ix86_handle_cconv_attribute },
21951 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21952 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
21953 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
21954 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
21955 #endif
21956 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21957 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21958 #ifdef SUBTARGET_ATTRIBUTE_TABLE
21959 SUBTARGET_ATTRIBUTE_TABLE,
21960 #endif
21961 { NULL, 0, 0, false, false, false, NULL }
21962 };
21963
21964 /* Initialize the GCC target structure. */
21965 #undef TARGET_ATTRIBUTE_TABLE
21966 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
21967 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21968 # undef TARGET_MERGE_DECL_ATTRIBUTES
21969 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
21970 #endif
21971
21972 #undef TARGET_COMP_TYPE_ATTRIBUTES
21973 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
21974
21975 #undef TARGET_INIT_BUILTINS
21976 #define TARGET_INIT_BUILTINS ix86_init_builtins
21977 #undef TARGET_EXPAND_BUILTIN
21978 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
21979
21980 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
21981 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
21982 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
21983 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
21984
21985 #undef TARGET_ASM_FUNCTION_EPILOGUE
21986 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
21987
21988 #undef TARGET_ENCODE_SECTION_INFO
21989 #ifndef SUBTARGET_ENCODE_SECTION_INFO
21990 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
21991 #else
21992 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
21993 #endif
21994
21995 #undef TARGET_ASM_OPEN_PAREN
21996 #define TARGET_ASM_OPEN_PAREN ""
21997 #undef TARGET_ASM_CLOSE_PAREN
21998 #define TARGET_ASM_CLOSE_PAREN ""
21999
22000 #undef TARGET_ASM_ALIGNED_HI_OP
22001 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
22002 #undef TARGET_ASM_ALIGNED_SI_OP
22003 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
22004 #ifdef ASM_QUAD
22005 #undef TARGET_ASM_ALIGNED_DI_OP
22006 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
22007 #endif
22008
22009 #undef TARGET_ASM_UNALIGNED_HI_OP
22010 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
22011 #undef TARGET_ASM_UNALIGNED_SI_OP
22012 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
22013 #undef TARGET_ASM_UNALIGNED_DI_OP
22014 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
22015
22016 #undef TARGET_SCHED_ADJUST_COST
22017 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
22018 #undef TARGET_SCHED_ISSUE_RATE
22019 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
22020 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
22021 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
22022 ia32_multipass_dfa_lookahead
22023
22024 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
22025 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
22026
22027 #ifdef HAVE_AS_TLS
22028 #undef TARGET_HAVE_TLS
22029 #define TARGET_HAVE_TLS true
22030 #endif
22031 #undef TARGET_CANNOT_FORCE_CONST_MEM
22032 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
22033 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
22034 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
22035
22036 #undef TARGET_DELEGITIMIZE_ADDRESS
22037 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
22038
22039 #undef TARGET_MS_BITFIELD_LAYOUT_P
22040 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
22041
22042 #if TARGET_MACHO
22043 #undef TARGET_BINDS_LOCAL_P
22044 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
22045 #endif
22046 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
22047 #undef TARGET_BINDS_LOCAL_P
22048 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
22049 #endif
22050
22051 #undef TARGET_ASM_OUTPUT_MI_THUNK
22052 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
22053 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
22054 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
22055
22056 #undef TARGET_ASM_FILE_START
22057 #define TARGET_ASM_FILE_START x86_file_start
22058
22059 #undef TARGET_DEFAULT_TARGET_FLAGS
22060 #define TARGET_DEFAULT_TARGET_FLAGS \
22061 (TARGET_DEFAULT \
22062 | TARGET_64BIT_DEFAULT \
22063 | TARGET_SUBTARGET_DEFAULT \
22064 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
22065
22066 #undef TARGET_HANDLE_OPTION
22067 #define TARGET_HANDLE_OPTION ix86_handle_option
22068
22069 #undef TARGET_RTX_COSTS
22070 #define TARGET_RTX_COSTS ix86_rtx_costs
22071 #undef TARGET_ADDRESS_COST
22072 #define TARGET_ADDRESS_COST ix86_address_cost
22073
22074 #undef TARGET_FIXED_CONDITION_CODE_REGS
22075 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
22076 #undef TARGET_CC_MODES_COMPATIBLE
22077 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
22078
22079 #undef TARGET_MACHINE_DEPENDENT_REORG
22080 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
22081
22082 #undef TARGET_BUILD_BUILTIN_VA_LIST
22083 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
22084
22085 #undef TARGET_MD_ASM_CLOBBERS
22086 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
22087
22088 #undef TARGET_PROMOTE_PROTOTYPES
22089 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
22090 #undef TARGET_STRUCT_VALUE_RTX
22091 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
22092 #undef TARGET_SETUP_INCOMING_VARARGS
22093 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
22094 #undef TARGET_MUST_PASS_IN_STACK
22095 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
22096 #undef TARGET_PASS_BY_REFERENCE
22097 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
22098 #undef TARGET_INTERNAL_ARG_POINTER
22099 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
22100 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
22101 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
22102 #undef TARGET_STRICT_ARGUMENT_NAMING
22103 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
22104
22105 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
22106 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
22107
22108 #undef TARGET_SCALAR_MODE_SUPPORTED_P
22109 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
22110
22111 #undef TARGET_VECTOR_MODE_SUPPORTED_P
22112 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
22113
22114 #ifdef HAVE_AS_TLS
22115 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
22116 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
22117 #endif
22118
22119 #ifdef SUBTARGET_INSERT_ATTRIBUTES
22120 #undef TARGET_INSERT_ATTRIBUTES
22121 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
22122 #endif
22123
22124 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
22125 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
22126
22127 #undef TARGET_STACK_PROTECT_FAIL
22128 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
22129
22130 #undef TARGET_FUNCTION_VALUE
22131 #define TARGET_FUNCTION_VALUE ix86_function_value
22132
22133 struct gcc_target targetm = TARGET_INITIALIZER;
22134 \f
22135 #include "gt-i386.h"