b1dda95adc7bd1206e1267b16005912aa3b3781b
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 #include "tree-gimple.h"
50 #include "dwarf2.h"
51 #include "df.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of l1 cache */
116 0, /* size of l2 cache */
117 0, /* size of prefetch block */
118 0, /* number of parallel prefetches */
119 2, /* Branch cost */
120 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
121 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
122 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
123 COSTS_N_BYTES (2), /* cost of FABS instruction. */
124 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
125 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 };
131
132 /* Processor costs (relative to an add) */
133 static const
134 struct processor_costs i386_cost = { /* 386 specific costs */
135 COSTS_N_INSNS (1), /* cost of an add instruction */
136 COSTS_N_INSNS (1), /* cost of a lea instruction */
137 COSTS_N_INSNS (3), /* variable shift costs */
138 COSTS_N_INSNS (2), /* constant shift costs */
139 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
140 COSTS_N_INSNS (6), /* HI */
141 COSTS_N_INSNS (6), /* SI */
142 COSTS_N_INSNS (6), /* DI */
143 COSTS_N_INSNS (6)}, /* other */
144 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
145 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
146 COSTS_N_INSNS (23), /* HI */
147 COSTS_N_INSNS (23), /* SI */
148 COSTS_N_INSNS (23), /* DI */
149 COSTS_N_INSNS (23)}, /* other */
150 COSTS_N_INSNS (3), /* cost of movsx */
151 COSTS_N_INSNS (2), /* cost of movzx */
152 15, /* "large" insn */
153 3, /* MOVE_RATIO */
154 4, /* cost for loading QImode using movzbl */
155 {2, 4, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
157 Relative to reg-reg move (2). */
158 {2, 4, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {8, 8, 8}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
162 {8, 8, 8}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
164 2, /* cost of moving MMX register */
165 {4, 8}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {4, 8}, /* cost of storing MMX registers
168 in SImode and DImode */
169 2, /* cost of moving SSE register */
170 {4, 8, 16}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {4, 8, 16}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
179 1, /* Branch cost */
180 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
181 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
182 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
183 COSTS_N_INSNS (22), /* cost of FABS instruction. */
184 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
185 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
186 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
187 DUMMY_STRINGOP_ALGS},
188 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
189 DUMMY_STRINGOP_ALGS},
190 };
191
192 static const
193 struct processor_costs i486_cost = { /* 486 specific costs */
194 COSTS_N_INSNS (1), /* cost of an add instruction */
195 COSTS_N_INSNS (1), /* cost of a lea instruction */
196 COSTS_N_INSNS (3), /* variable shift costs */
197 COSTS_N_INSNS (2), /* constant shift costs */
198 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
199 COSTS_N_INSNS (12), /* HI */
200 COSTS_N_INSNS (12), /* SI */
201 COSTS_N_INSNS (12), /* DI */
202 COSTS_N_INSNS (12)}, /* other */
203 1, /* cost of multiply per each bit set */
204 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
205 COSTS_N_INSNS (40), /* HI */
206 COSTS_N_INSNS (40), /* SI */
207 COSTS_N_INSNS (40), /* DI */
208 COSTS_N_INSNS (40)}, /* other */
209 COSTS_N_INSNS (3), /* cost of movsx */
210 COSTS_N_INSNS (2), /* cost of movzx */
211 15, /* "large" insn */
212 3, /* MOVE_RATIO */
213 4, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {8, 8, 8}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {8, 8, 8}, /* cost of storing fp registers
222 in SFmode, DFmode and XFmode */
223 2, /* cost of moving MMX register */
224 {4, 8}, /* cost of loading MMX registers
225 in SImode and DImode */
226 {4, 8}, /* cost of storing MMX registers
227 in SImode and DImode */
228 2, /* cost of moving SSE register */
229 {4, 8, 16}, /* cost of loading SSE registers
230 in SImode, DImode and TImode */
231 {4, 8, 16}, /* cost of storing SSE registers
232 in SImode, DImode and TImode */
233 3, /* MMX or SSE register to integer */
234 4, /* size of l1 cache. 486 has 8kB cache
235 shared for code and data, so 4kB is
236 not really precise. */
237 4, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
240 1, /* Branch cost */
241 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (3), /* cost of FABS instruction. */
245 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
247 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
248 DUMMY_STRINGOP_ALGS},
249 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
250 DUMMY_STRINGOP_ALGS}
251 };
252
253 static const
254 struct processor_costs pentium_cost = {
255 COSTS_N_INSNS (1), /* cost of an add instruction */
256 COSTS_N_INSNS (1), /* cost of a lea instruction */
257 COSTS_N_INSNS (4), /* variable shift costs */
258 COSTS_N_INSNS (1), /* constant shift costs */
259 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
260 COSTS_N_INSNS (11), /* HI */
261 COSTS_N_INSNS (11), /* SI */
262 COSTS_N_INSNS (11), /* DI */
263 COSTS_N_INSNS (11)}, /* other */
264 0, /* cost of multiply per each bit set */
265 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
266 COSTS_N_INSNS (25), /* HI */
267 COSTS_N_INSNS (25), /* SI */
268 COSTS_N_INSNS (25), /* DI */
269 COSTS_N_INSNS (25)}, /* other */
270 COSTS_N_INSNS (3), /* cost of movsx */
271 COSTS_N_INSNS (2), /* cost of movzx */
272 8, /* "large" insn */
273 6, /* MOVE_RATIO */
274 6, /* cost for loading QImode using movzbl */
275 {2, 4, 2}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
277 Relative to reg-reg move (2). */
278 {2, 4, 2}, /* cost of storing integer registers */
279 2, /* cost of reg,reg fld/fst */
280 {2, 2, 6}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
282 {4, 4, 6}, /* cost of storing fp registers
283 in SFmode, DFmode and XFmode */
284 8, /* cost of moving MMX register */
285 {8, 8}, /* cost of loading MMX registers
286 in SImode and DImode */
287 {8, 8}, /* cost of storing MMX registers
288 in SImode and DImode */
289 2, /* cost of moving SSE register */
290 {4, 8, 16}, /* cost of loading SSE registers
291 in SImode, DImode and TImode */
292 {4, 8, 16}, /* cost of storing SSE registers
293 in SImode, DImode and TImode */
294 3, /* MMX or SSE register to integer */
295 8, /* size of l1 cache. */
296 8, /* size of l2 cache */
297 0, /* size of prefetch block */
298 0, /* number of parallel prefetches */
299 2, /* Branch cost */
300 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
301 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
302 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
303 COSTS_N_INSNS (1), /* cost of FABS instruction. */
304 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
305 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
306 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
307 DUMMY_STRINGOP_ALGS},
308 {{libcall, {{-1, rep_prefix_4_byte}}},
309 DUMMY_STRINGOP_ALGS}
310 };
311
312 static const
313 struct processor_costs pentiumpro_cost = {
314 COSTS_N_INSNS (1), /* cost of an add instruction */
315 COSTS_N_INSNS (1), /* cost of a lea instruction */
316 COSTS_N_INSNS (1), /* variable shift costs */
317 COSTS_N_INSNS (1), /* constant shift costs */
318 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
319 COSTS_N_INSNS (4), /* HI */
320 COSTS_N_INSNS (4), /* SI */
321 COSTS_N_INSNS (4), /* DI */
322 COSTS_N_INSNS (4)}, /* other */
323 0, /* cost of multiply per each bit set */
324 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
325 COSTS_N_INSNS (17), /* HI */
326 COSTS_N_INSNS (17), /* SI */
327 COSTS_N_INSNS (17), /* DI */
328 COSTS_N_INSNS (17)}, /* other */
329 COSTS_N_INSNS (1), /* cost of movsx */
330 COSTS_N_INSNS (1), /* cost of movzx */
331 8, /* "large" insn */
332 6, /* MOVE_RATIO */
333 2, /* cost for loading QImode using movzbl */
334 {4, 4, 4}, /* cost of loading integer registers
335 in QImode, HImode and SImode.
336 Relative to reg-reg move (2). */
337 {2, 2, 2}, /* cost of storing integer registers */
338 2, /* cost of reg,reg fld/fst */
339 {2, 2, 6}, /* cost of loading fp registers
340 in SFmode, DFmode and XFmode */
341 {4, 4, 6}, /* cost of storing fp registers
342 in SFmode, DFmode and XFmode */
343 2, /* cost of moving MMX register */
344 {2, 2}, /* cost of loading MMX registers
345 in SImode and DImode */
346 {2, 2}, /* cost of storing MMX registers
347 in SImode and DImode */
348 2, /* cost of moving SSE register */
349 {2, 2, 8}, /* cost of loading SSE registers
350 in SImode, DImode and TImode */
351 {2, 2, 8}, /* cost of storing SSE registers
352 in SImode, DImode and TImode */
353 3, /* MMX or SSE register to integer */
354 8, /* size of l1 cache. */
355 256, /* size of l2 cache */
356 32, /* size of prefetch block */
357 6, /* number of parallel prefetches */
358 2, /* Branch cost */
359 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
360 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
361 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
362 COSTS_N_INSNS (2), /* cost of FABS instruction. */
363 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
364 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
365 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
366 the alignment). For small blocks inline loop is still a noticeable win, for bigger
367 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
368 more expensive startup time in CPU, but after 4K the difference is down in the noise.
369 */
370 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
371 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
372 DUMMY_STRINGOP_ALGS},
373 {{rep_prefix_4_byte, {{1024, unrolled_loop},
374 {8192, rep_prefix_4_byte}, {-1, libcall}}},
375 DUMMY_STRINGOP_ALGS}
376 };
377
378 static const
379 struct processor_costs geode_cost = {
380 COSTS_N_INSNS (1), /* cost of an add instruction */
381 COSTS_N_INSNS (1), /* cost of a lea instruction */
382 COSTS_N_INSNS (2), /* variable shift costs */
383 COSTS_N_INSNS (1), /* constant shift costs */
384 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
385 COSTS_N_INSNS (4), /* HI */
386 COSTS_N_INSNS (7), /* SI */
387 COSTS_N_INSNS (7), /* DI */
388 COSTS_N_INSNS (7)}, /* other */
389 0, /* cost of multiply per each bit set */
390 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
391 COSTS_N_INSNS (23), /* HI */
392 COSTS_N_INSNS (39), /* SI */
393 COSTS_N_INSNS (39), /* DI */
394 COSTS_N_INSNS (39)}, /* other */
395 COSTS_N_INSNS (1), /* cost of movsx */
396 COSTS_N_INSNS (1), /* cost of movzx */
397 8, /* "large" insn */
398 4, /* MOVE_RATIO */
399 1, /* cost for loading QImode using movzbl */
400 {1, 1, 1}, /* cost of loading integer registers
401 in QImode, HImode and SImode.
402 Relative to reg-reg move (2). */
403 {1, 1, 1}, /* cost of storing integer registers */
404 1, /* cost of reg,reg fld/fst */
405 {1, 1, 1}, /* cost of loading fp registers
406 in SFmode, DFmode and XFmode */
407 {4, 6, 6}, /* cost of storing fp registers
408 in SFmode, DFmode and XFmode */
409
410 1, /* cost of moving MMX register */
411 {1, 1}, /* cost of loading MMX registers
412 in SImode and DImode */
413 {1, 1}, /* cost of storing MMX registers
414 in SImode and DImode */
415 1, /* cost of moving SSE register */
416 {1, 1, 1}, /* cost of loading SSE registers
417 in SImode, DImode and TImode */
418 {1, 1, 1}, /* cost of storing SSE registers
419 in SImode, DImode and TImode */
420 1, /* MMX or SSE register to integer */
421 64, /* size of l1 cache. */
422 128, /* size of l2 cache. */
423 32, /* size of prefetch block */
424 1, /* number of parallel prefetches */
425 1, /* Branch cost */
426 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (1), /* cost of FABS instruction. */
430 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
432 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
433 DUMMY_STRINGOP_ALGS},
434 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
435 DUMMY_STRINGOP_ALGS}
436 };
437
438 static const
439 struct processor_costs k6_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (2), /* cost of a lea instruction */
442 COSTS_N_INSNS (1), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (3), /* HI */
446 COSTS_N_INSNS (3), /* SI */
447 COSTS_N_INSNS (3), /* DI */
448 COSTS_N_INSNS (3)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (18), /* HI */
452 COSTS_N_INSNS (18), /* SI */
453 COSTS_N_INSNS (18), /* DI */
454 COSTS_N_INSNS (18)}, /* other */
455 COSTS_N_INSNS (2), /* cost of movsx */
456 COSTS_N_INSNS (2), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 3, /* cost for loading QImode using movzbl */
460 {4, 5, 4}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {2, 3, 2}, /* cost of storing integer registers */
464 4, /* cost of reg,reg fld/fst */
465 {6, 6, 6}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 4, 4}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469 2, /* cost of moving MMX register */
470 {2, 2}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {2, 2}, /* cost of storing MMX registers
473 in SImode and DImode */
474 2, /* cost of moving SSE register */
475 {2, 2, 8}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {2, 2, 8}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
479 6, /* MMX or SSE register to integer */
480 32, /* size of l1 cache. */
481 32, /* size of l2 cache. Some models
482 have integrated l2 cache, but
483 optimizing for k6 is not important
484 enough to worry about that. */
485 32, /* size of prefetch block */
486 1, /* number of parallel prefetches */
487 1, /* Branch cost */
488 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
497 DUMMY_STRINGOP_ALGS}
498 };
499
500 static const
501 struct processor_costs athlon_cost = {
502 COSTS_N_INSNS (1), /* cost of an add instruction */
503 COSTS_N_INSNS (2), /* cost of a lea instruction */
504 COSTS_N_INSNS (1), /* variable shift costs */
505 COSTS_N_INSNS (1), /* constant shift costs */
506 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
507 COSTS_N_INSNS (5), /* HI */
508 COSTS_N_INSNS (5), /* SI */
509 COSTS_N_INSNS (5), /* DI */
510 COSTS_N_INSNS (5)}, /* other */
511 0, /* cost of multiply per each bit set */
512 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
513 COSTS_N_INSNS (26), /* HI */
514 COSTS_N_INSNS (42), /* SI */
515 COSTS_N_INSNS (74), /* DI */
516 COSTS_N_INSNS (74)}, /* other */
517 COSTS_N_INSNS (1), /* cost of movsx */
518 COSTS_N_INSNS (1), /* cost of movzx */
519 8, /* "large" insn */
520 9, /* MOVE_RATIO */
521 4, /* cost for loading QImode using movzbl */
522 {3, 4, 3}, /* cost of loading integer registers
523 in QImode, HImode and SImode.
524 Relative to reg-reg move (2). */
525 {3, 4, 3}, /* cost of storing integer registers */
526 4, /* cost of reg,reg fld/fst */
527 {4, 4, 12}, /* cost of loading fp registers
528 in SFmode, DFmode and XFmode */
529 {6, 6, 8}, /* cost of storing fp registers
530 in SFmode, DFmode and XFmode */
531 2, /* cost of moving MMX register */
532 {4, 4}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {4, 4}, /* cost of storing MMX registers
535 in SImode and DImode */
536 2, /* cost of moving SSE register */
537 {4, 4, 6}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {4, 4, 5}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 5, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 256, /* size of l2 cache. */
544 64, /* size of prefetch block */
545 6, /* number of parallel prefetches */
546 5, /* Branch cost */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
553 /* For some reason, Athlon deals better with REP prefix (relative to loops)
554 compared to K8. Alignment becomes important after 8 bytes for memcpy and
555 128 bytes for memset. */
556 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
557 DUMMY_STRINGOP_ALGS},
558 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
559 DUMMY_STRINGOP_ALGS}
560 };
561
562 static const
563 struct processor_costs k8_cost = {
564 COSTS_N_INSNS (1), /* cost of an add instruction */
565 COSTS_N_INSNS (2), /* cost of a lea instruction */
566 COSTS_N_INSNS (1), /* variable shift costs */
567 COSTS_N_INSNS (1), /* constant shift costs */
568 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
569 COSTS_N_INSNS (4), /* HI */
570 COSTS_N_INSNS (3), /* SI */
571 COSTS_N_INSNS (4), /* DI */
572 COSTS_N_INSNS (5)}, /* other */
573 0, /* cost of multiply per each bit set */
574 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
575 COSTS_N_INSNS (26), /* HI */
576 COSTS_N_INSNS (42), /* SI */
577 COSTS_N_INSNS (74), /* DI */
578 COSTS_N_INSNS (74)}, /* other */
579 COSTS_N_INSNS (1), /* cost of movsx */
580 COSTS_N_INSNS (1), /* cost of movzx */
581 8, /* "large" insn */
582 9, /* MOVE_RATIO */
583 4, /* cost for loading QImode using movzbl */
584 {3, 4, 3}, /* cost of loading integer registers
585 in QImode, HImode and SImode.
586 Relative to reg-reg move (2). */
587 {3, 4, 3}, /* cost of storing integer registers */
588 4, /* cost of reg,reg fld/fst */
589 {4, 4, 12}, /* cost of loading fp registers
590 in SFmode, DFmode and XFmode */
591 {6, 6, 8}, /* cost of storing fp registers
592 in SFmode, DFmode and XFmode */
593 2, /* cost of moving MMX register */
594 {3, 3}, /* cost of loading MMX registers
595 in SImode and DImode */
596 {4, 4}, /* cost of storing MMX registers
597 in SImode and DImode */
598 2, /* cost of moving SSE register */
599 {4, 3, 6}, /* cost of loading SSE registers
600 in SImode, DImode and TImode */
601 {4, 4, 5}, /* cost of storing SSE registers
602 in SImode, DImode and TImode */
603 5, /* MMX or SSE register to integer */
604 64, /* size of l1 cache. */
605 512, /* size of l2 cache. */
606 64, /* size of prefetch block */
607 /* New AMD processors never drop prefetches; if they cannot be performed
608 immediately, they are queued. We set number of simultaneous prefetches
609 to a large constant to reflect this (it probably is not a good idea not
610 to limit number of prefetches at all, as their execution also takes some
611 time). */
612 100, /* number of parallel prefetches */
613 5, /* Branch cost */
614 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (2), /* cost of FABS instruction. */
618 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
620 /* K8 has optimized REP instruction for medium sized blocks, but for very small
621 blocks it is better to use loop. For large blocks, libcall can do
622 nontemporary accesses and beat inline considerably. */
623 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
624 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
625 {{libcall, {{8, loop}, {24, unrolled_loop},
626 {2048, rep_prefix_4_byte}, {-1, libcall}}},
627 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
628 };
629
630 struct processor_costs amdfam10_cost = {
631 COSTS_N_INSNS (1), /* cost of an add instruction */
632 COSTS_N_INSNS (2), /* cost of a lea instruction */
633 COSTS_N_INSNS (1), /* variable shift costs */
634 COSTS_N_INSNS (1), /* constant shift costs */
635 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
636 COSTS_N_INSNS (4), /* HI */
637 COSTS_N_INSNS (3), /* SI */
638 COSTS_N_INSNS (4), /* DI */
639 COSTS_N_INSNS (5)}, /* other */
640 0, /* cost of multiply per each bit set */
641 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
642 COSTS_N_INSNS (35), /* HI */
643 COSTS_N_INSNS (51), /* SI */
644 COSTS_N_INSNS (83), /* DI */
645 COSTS_N_INSNS (83)}, /* other */
646 COSTS_N_INSNS (1), /* cost of movsx */
647 COSTS_N_INSNS (1), /* cost of movzx */
648 8, /* "large" insn */
649 9, /* MOVE_RATIO */
650 4, /* cost for loading QImode using movzbl */
651 {3, 4, 3}, /* cost of loading integer registers
652 in QImode, HImode and SImode.
653 Relative to reg-reg move (2). */
654 {3, 4, 3}, /* cost of storing integer registers */
655 4, /* cost of reg,reg fld/fst */
656 {4, 4, 12}, /* cost of loading fp registers
657 in SFmode, DFmode and XFmode */
658 {6, 6, 8}, /* cost of storing fp registers
659 in SFmode, DFmode and XFmode */
660 2, /* cost of moving MMX register */
661 {3, 3}, /* cost of loading MMX registers
662 in SImode and DImode */
663 {4, 4}, /* cost of storing MMX registers
664 in SImode and DImode */
665 2, /* cost of moving SSE register */
666 {4, 4, 3}, /* cost of loading SSE registers
667 in SImode, DImode and TImode */
668 {4, 4, 5}, /* cost of storing SSE registers
669 in SImode, DImode and TImode */
670 3, /* MMX or SSE register to integer */
671 /* On K8
672 MOVD reg64, xmmreg Double FSTORE 4
673 MOVD reg32, xmmreg Double FSTORE 4
674 On AMDFAM10
675 MOVD reg64, xmmreg Double FADD 3
676 1/1 1/1
677 MOVD reg32, xmmreg Double FADD 3
678 1/1 1/1 */
679 64, /* size of l1 cache. */
680 512, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 /* New AMD processors never drop prefetches; if they cannot be performed
683 immediately, they are queued. We set number of simultaneous prefetches
684 to a large constant to reflect this (it probably is not a good idea not
685 to limit number of prefetches at all, as their execution also takes some
686 time). */
687 100, /* number of parallel prefetches */
688 5, /* Branch cost */
689 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
690 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
691 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
692 COSTS_N_INSNS (2), /* cost of FABS instruction. */
693 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
694 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
695
696 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
697 very small blocks it is better to use loop. For large blocks, libcall can
698 do nontemporary accesses and beat inline considerably. */
699 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
700 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
701 {{libcall, {{8, loop}, {24, unrolled_loop},
702 {2048, rep_prefix_4_byte}, {-1, libcall}}},
703 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
704 };
705
706 static const
707 struct processor_costs pentium4_cost = {
708 COSTS_N_INSNS (1), /* cost of an add instruction */
709 COSTS_N_INSNS (3), /* cost of a lea instruction */
710 COSTS_N_INSNS (4), /* variable shift costs */
711 COSTS_N_INSNS (4), /* constant shift costs */
712 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
713 COSTS_N_INSNS (15), /* HI */
714 COSTS_N_INSNS (15), /* SI */
715 COSTS_N_INSNS (15), /* DI */
716 COSTS_N_INSNS (15)}, /* other */
717 0, /* cost of multiply per each bit set */
718 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
719 COSTS_N_INSNS (56), /* HI */
720 COSTS_N_INSNS (56), /* SI */
721 COSTS_N_INSNS (56), /* DI */
722 COSTS_N_INSNS (56)}, /* other */
723 COSTS_N_INSNS (1), /* cost of movsx */
724 COSTS_N_INSNS (1), /* cost of movzx */
725 16, /* "large" insn */
726 6, /* MOVE_RATIO */
727 2, /* cost for loading QImode using movzbl */
728 {4, 5, 4}, /* cost of loading integer registers
729 in QImode, HImode and SImode.
730 Relative to reg-reg move (2). */
731 {2, 3, 2}, /* cost of storing integer registers */
732 2, /* cost of reg,reg fld/fst */
733 {2, 2, 6}, /* cost of loading fp registers
734 in SFmode, DFmode and XFmode */
735 {4, 4, 6}, /* cost of storing fp registers
736 in SFmode, DFmode and XFmode */
737 2, /* cost of moving MMX register */
738 {2, 2}, /* cost of loading MMX registers
739 in SImode and DImode */
740 {2, 2}, /* cost of storing MMX registers
741 in SImode and DImode */
742 12, /* cost of moving SSE register */
743 {12, 12, 12}, /* cost of loading SSE registers
744 in SImode, DImode and TImode */
745 {2, 2, 8}, /* cost of storing SSE registers
746 in SImode, DImode and TImode */
747 10, /* MMX or SSE register to integer */
748 8, /* size of l1 cache. */
749 256, /* size of l2 cache. */
750 64, /* size of prefetch block */
751 6, /* number of parallel prefetches */
752 2, /* Branch cost */
753 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
754 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
755 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
756 COSTS_N_INSNS (2), /* cost of FABS instruction. */
757 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
758 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
759 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
760 DUMMY_STRINGOP_ALGS},
761 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
762 {-1, libcall}}},
763 DUMMY_STRINGOP_ALGS},
764 };
765
766 static const
767 struct processor_costs nocona_cost = {
768 COSTS_N_INSNS (1), /* cost of an add instruction */
769 COSTS_N_INSNS (1), /* cost of a lea instruction */
770 COSTS_N_INSNS (1), /* variable shift costs */
771 COSTS_N_INSNS (1), /* constant shift costs */
772 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
773 COSTS_N_INSNS (10), /* HI */
774 COSTS_N_INSNS (10), /* SI */
775 COSTS_N_INSNS (10), /* DI */
776 COSTS_N_INSNS (10)}, /* other */
777 0, /* cost of multiply per each bit set */
778 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
779 COSTS_N_INSNS (66), /* HI */
780 COSTS_N_INSNS (66), /* SI */
781 COSTS_N_INSNS (66), /* DI */
782 COSTS_N_INSNS (66)}, /* other */
783 COSTS_N_INSNS (1), /* cost of movsx */
784 COSTS_N_INSNS (1), /* cost of movzx */
785 16, /* "large" insn */
786 17, /* MOVE_RATIO */
787 4, /* cost for loading QImode using movzbl */
788 {4, 4, 4}, /* cost of loading integer registers
789 in QImode, HImode and SImode.
790 Relative to reg-reg move (2). */
791 {4, 4, 4}, /* cost of storing integer registers */
792 3, /* cost of reg,reg fld/fst */
793 {12, 12, 12}, /* cost of loading fp registers
794 in SFmode, DFmode and XFmode */
795 {4, 4, 4}, /* cost of storing fp registers
796 in SFmode, DFmode and XFmode */
797 6, /* cost of moving MMX register */
798 {12, 12}, /* cost of loading MMX registers
799 in SImode and DImode */
800 {12, 12}, /* cost of storing MMX registers
801 in SImode and DImode */
802 6, /* cost of moving SSE register */
803 {12, 12, 12}, /* cost of loading SSE registers
804 in SImode, DImode and TImode */
805 {12, 12, 12}, /* cost of storing SSE registers
806 in SImode, DImode and TImode */
807 8, /* MMX or SSE register to integer */
808 8, /* size of l1 cache. */
809 1024, /* size of l2 cache. */
810 128, /* size of prefetch block */
811 8, /* number of parallel prefetches */
812 1, /* Branch cost */
813 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
814 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
815 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
816 COSTS_N_INSNS (3), /* cost of FABS instruction. */
817 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
818 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
819 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
820 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
821 {100000, unrolled_loop}, {-1, libcall}}}},
822 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
823 {-1, libcall}}},
824 {libcall, {{24, loop}, {64, unrolled_loop},
825 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
826 };
827
828 static const
829 struct processor_costs core2_cost = {
830 COSTS_N_INSNS (1), /* cost of an add instruction */
831 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
832 COSTS_N_INSNS (1), /* variable shift costs */
833 COSTS_N_INSNS (1), /* constant shift costs */
834 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
835 COSTS_N_INSNS (3), /* HI */
836 COSTS_N_INSNS (3), /* SI */
837 COSTS_N_INSNS (3), /* DI */
838 COSTS_N_INSNS (3)}, /* other */
839 0, /* cost of multiply per each bit set */
840 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
841 COSTS_N_INSNS (22), /* HI */
842 COSTS_N_INSNS (22), /* SI */
843 COSTS_N_INSNS (22), /* DI */
844 COSTS_N_INSNS (22)}, /* other */
845 COSTS_N_INSNS (1), /* cost of movsx */
846 COSTS_N_INSNS (1), /* cost of movzx */
847 8, /* "large" insn */
848 16, /* MOVE_RATIO */
849 2, /* cost for loading QImode using movzbl */
850 {6, 6, 6}, /* cost of loading integer registers
851 in QImode, HImode and SImode.
852 Relative to reg-reg move (2). */
853 {4, 4, 4}, /* cost of storing integer registers */
854 2, /* cost of reg,reg fld/fst */
855 {6, 6, 6}, /* cost of loading fp registers
856 in SFmode, DFmode and XFmode */
857 {4, 4, 4}, /* cost of loading integer registers */
858 2, /* cost of moving MMX register */
859 {6, 6}, /* cost of loading MMX registers
860 in SImode and DImode */
861 {4, 4}, /* cost of storing MMX registers
862 in SImode and DImode */
863 2, /* cost of moving SSE register */
864 {6, 6, 6}, /* cost of loading SSE registers
865 in SImode, DImode and TImode */
866 {4, 4, 4}, /* cost of storing SSE registers
867 in SImode, DImode and TImode */
868 2, /* MMX or SSE register to integer */
869 32, /* size of l1 cache. */
870 2048, /* size of l2 cache. */
871 128, /* size of prefetch block */
872 8, /* number of parallel prefetches */
873 3, /* Branch cost */
874 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
875 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
876 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
877 COSTS_N_INSNS (1), /* cost of FABS instruction. */
878 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
879 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
880 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
881 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
882 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
883 {{libcall, {{8, loop}, {15, unrolled_loop},
884 {2048, rep_prefix_4_byte}, {-1, libcall}}},
885 {libcall, {{24, loop}, {32, unrolled_loop},
886 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
887 };
888
889 /* Generic64 should produce code tuned for Nocona and K8. */
890 static const
891 struct processor_costs generic64_cost = {
892 COSTS_N_INSNS (1), /* cost of an add instruction */
893 /* On all chips taken into consideration lea is 2 cycles and more. With
894 this cost however our current implementation of synth_mult results in
895 use of unnecessary temporary registers causing regression on several
896 SPECfp benchmarks. */
897 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
898 COSTS_N_INSNS (1), /* variable shift costs */
899 COSTS_N_INSNS (1), /* constant shift costs */
900 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
901 COSTS_N_INSNS (4), /* HI */
902 COSTS_N_INSNS (3), /* SI */
903 COSTS_N_INSNS (4), /* DI */
904 COSTS_N_INSNS (2)}, /* other */
905 0, /* cost of multiply per each bit set */
906 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
907 COSTS_N_INSNS (26), /* HI */
908 COSTS_N_INSNS (42), /* SI */
909 COSTS_N_INSNS (74), /* DI */
910 COSTS_N_INSNS (74)}, /* other */
911 COSTS_N_INSNS (1), /* cost of movsx */
912 COSTS_N_INSNS (1), /* cost of movzx */
913 8, /* "large" insn */
914 17, /* MOVE_RATIO */
915 4, /* cost for loading QImode using movzbl */
916 {4, 4, 4}, /* cost of loading integer registers
917 in QImode, HImode and SImode.
918 Relative to reg-reg move (2). */
919 {4, 4, 4}, /* cost of storing integer registers */
920 4, /* cost of reg,reg fld/fst */
921 {12, 12, 12}, /* cost of loading fp registers
922 in SFmode, DFmode and XFmode */
923 {6, 6, 8}, /* cost of storing fp registers
924 in SFmode, DFmode and XFmode */
925 2, /* cost of moving MMX register */
926 {8, 8}, /* cost of loading MMX registers
927 in SImode and DImode */
928 {8, 8}, /* cost of storing MMX registers
929 in SImode and DImode */
930 2, /* cost of moving SSE register */
931 {8, 8, 8}, /* cost of loading SSE registers
932 in SImode, DImode and TImode */
933 {8, 8, 8}, /* cost of storing SSE registers
934 in SImode, DImode and TImode */
935 5, /* MMX or SSE register to integer */
936 32, /* size of l1 cache. */
937 512, /* size of l2 cache. */
938 64, /* size of prefetch block */
939 6, /* number of parallel prefetches */
940 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
941 is increased to perhaps more appropriate value of 5. */
942 3, /* Branch cost */
943 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
944 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
945 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
946 COSTS_N_INSNS (8), /* cost of FABS instruction. */
947 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
948 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
949 {DUMMY_STRINGOP_ALGS,
950 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
951 {DUMMY_STRINGOP_ALGS,
952 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
953 };
954
955 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
956 static const
957 struct processor_costs generic32_cost = {
958 COSTS_N_INSNS (1), /* cost of an add instruction */
959 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
960 COSTS_N_INSNS (1), /* variable shift costs */
961 COSTS_N_INSNS (1), /* constant shift costs */
962 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
963 COSTS_N_INSNS (4), /* HI */
964 COSTS_N_INSNS (3), /* SI */
965 COSTS_N_INSNS (4), /* DI */
966 COSTS_N_INSNS (2)}, /* other */
967 0, /* cost of multiply per each bit set */
968 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
969 COSTS_N_INSNS (26), /* HI */
970 COSTS_N_INSNS (42), /* SI */
971 COSTS_N_INSNS (74), /* DI */
972 COSTS_N_INSNS (74)}, /* other */
973 COSTS_N_INSNS (1), /* cost of movsx */
974 COSTS_N_INSNS (1), /* cost of movzx */
975 8, /* "large" insn */
976 17, /* MOVE_RATIO */
977 4, /* cost for loading QImode using movzbl */
978 {4, 4, 4}, /* cost of loading integer registers
979 in QImode, HImode and SImode.
980 Relative to reg-reg move (2). */
981 {4, 4, 4}, /* cost of storing integer registers */
982 4, /* cost of reg,reg fld/fst */
983 {12, 12, 12}, /* cost of loading fp registers
984 in SFmode, DFmode and XFmode */
985 {6, 6, 8}, /* cost of storing fp registers
986 in SFmode, DFmode and XFmode */
987 2, /* cost of moving MMX register */
988 {8, 8}, /* cost of loading MMX registers
989 in SImode and DImode */
990 {8, 8}, /* cost of storing MMX registers
991 in SImode and DImode */
992 2, /* cost of moving SSE register */
993 {8, 8, 8}, /* cost of loading SSE registers
994 in SImode, DImode and TImode */
995 {8, 8, 8}, /* cost of storing SSE registers
996 in SImode, DImode and TImode */
997 5, /* MMX or SSE register to integer */
998 32, /* size of l1 cache. */
999 256, /* size of l2 cache. */
1000 64, /* size of prefetch block */
1001 6, /* number of parallel prefetches */
1002 3, /* Branch cost */
1003 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1004 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1005 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1006 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1007 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1008 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1009 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1010 DUMMY_STRINGOP_ALGS},
1011 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1012 DUMMY_STRINGOP_ALGS},
1013 };
1014
1015 const struct processor_costs *ix86_cost = &pentium_cost;
1016
1017 /* Processor feature/optimization bitmasks. */
1018 #define m_386 (1<<PROCESSOR_I386)
1019 #define m_486 (1<<PROCESSOR_I486)
1020 #define m_PENT (1<<PROCESSOR_PENTIUM)
1021 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1022 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1023 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1024 #define m_CORE2 (1<<PROCESSOR_CORE2)
1025
1026 #define m_GEODE (1<<PROCESSOR_GEODE)
1027 #define m_K6 (1<<PROCESSOR_K6)
1028 #define m_K6_GEODE (m_K6 | m_GEODE)
1029 #define m_K8 (1<<PROCESSOR_K8)
1030 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1031 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1032 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1033 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1034
1035 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1036 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1037
1038 /* Generic instruction choice should be common subset of supported CPUs
1039 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1040 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1041
1042 /* Feature tests against the various tunings. */
1043 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1044 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1045 negatively, so enabling for Generic64 seems like good code size
1046 tradeoff. We can't enable it for 32bit generic because it does not
1047 work well with PPro base chips. */
1048 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1049
1050 /* X86_TUNE_PUSH_MEMORY */
1051 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1052 | m_NOCONA | m_CORE2 | m_GENERIC,
1053
1054 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1055 m_486 | m_PENT,
1056
1057 /* X86_TUNE_USE_BIT_TEST */
1058 m_386,
1059
1060 /* X86_TUNE_UNROLL_STRLEN */
1061 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1064 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1065
1066 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1067 on simulation result. But after P4 was made, no performance benefit
1068 was observed with branch hints. It also increases the code size.
1069 As a result, icc never generates branch hints. */
1070 0,
1071
1072 /* X86_TUNE_DOUBLE_WITH_ADD */
1073 ~m_386,
1074
1075 /* X86_TUNE_USE_SAHF */
1076 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1077 | m_NOCONA | m_CORE2 | m_GENERIC,
1078
1079 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1080 partial dependencies. */
1081 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1082 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1083
1084 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1085 register stalls on Generic32 compilation setting as well. However
1086 in current implementation the partial register stalls are not eliminated
1087 very well - they can be introduced via subregs synthesized by combine
1088 and can happen in caller/callee saving sequences. Because this option
1089 pays back little on PPro based chips and is in conflict with partial reg
1090 dependencies used by Athlon/P4 based chips, it is better to leave it off
1091 for generic32 for now. */
1092 m_PPRO,
1093
1094 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1095 m_CORE2 | m_GENERIC,
1096
1097 /* X86_TUNE_USE_HIMODE_FIOP */
1098 m_386 | m_486 | m_K6_GEODE,
1099
1100 /* X86_TUNE_USE_SIMODE_FIOP */
1101 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1102
1103 /* X86_TUNE_USE_MOV0 */
1104 m_K6,
1105
1106 /* X86_TUNE_USE_CLTD */
1107 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1108
1109 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1110 m_PENT4,
1111
1112 /* X86_TUNE_SPLIT_LONG_MOVES */
1113 m_PPRO,
1114
1115 /* X86_TUNE_READ_MODIFY_WRITE */
1116 ~m_PENT,
1117
1118 /* X86_TUNE_READ_MODIFY */
1119 ~(m_PENT | m_PPRO),
1120
1121 /* X86_TUNE_PROMOTE_QIMODE */
1122 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1123 | m_GENERIC /* | m_PENT4 ? */,
1124
1125 /* X86_TUNE_FAST_PREFIX */
1126 ~(m_PENT | m_486 | m_386),
1127
1128 /* X86_TUNE_SINGLE_STRINGOP */
1129 m_386 | m_PENT4 | m_NOCONA,
1130
1131 /* X86_TUNE_QIMODE_MATH */
1132 ~0,
1133
1134 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1135 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1136 might be considered for Generic32 if our scheme for avoiding partial
1137 stalls was more effective. */
1138 ~m_PPRO,
1139
1140 /* X86_TUNE_PROMOTE_QI_REGS */
1141 0,
1142
1143 /* X86_TUNE_PROMOTE_HI_REGS */
1144 m_PPRO,
1145
1146 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1147 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1148
1149 /* X86_TUNE_ADD_ESP_8 */
1150 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1151 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1152
1153 /* X86_TUNE_SUB_ESP_4 */
1154 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1155
1156 /* X86_TUNE_SUB_ESP_8 */
1157 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1158 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1159
1160 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1161 for DFmode copies */
1162 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1163 | m_GENERIC | m_GEODE),
1164
1165 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1166 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1167
1168 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1169 conflict here in between PPro/Pentium4 based chips that thread 128bit
1170 SSE registers as single units versus K8 based chips that divide SSE
1171 registers to two 64bit halves. This knob promotes all store destinations
1172 to be 128bit to allow register renaming on 128bit SSE units, but usually
1173 results in one extra microop on 64bit SSE units. Experimental results
1174 shows that disabling this option on P4 brings over 20% SPECfp regression,
1175 while enabling it on K8 brings roughly 2.4% regression that can be partly
1176 masked by careful scheduling of moves. */
1177 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1178
1179 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1180 m_AMDFAM10,
1181
1182 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1183 are resolved on SSE register parts instead of whole registers, so we may
1184 maintain just lower part of scalar values in proper format leaving the
1185 upper part undefined. */
1186 m_ATHLON_K8,
1187
1188 /* X86_TUNE_SSE_TYPELESS_STORES */
1189 m_ATHLON_K8_AMDFAM10,
1190
1191 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1192 m_PPRO | m_PENT4 | m_NOCONA,
1193
1194 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1195 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1196
1197 /* X86_TUNE_PROLOGUE_USING_MOVE */
1198 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1199
1200 /* X86_TUNE_EPILOGUE_USING_MOVE */
1201 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1202
1203 /* X86_TUNE_SHIFT1 */
1204 ~m_486,
1205
1206 /* X86_TUNE_USE_FFREEP */
1207 m_ATHLON_K8_AMDFAM10,
1208
1209 /* X86_TUNE_INTER_UNIT_MOVES */
1210 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1211
1212 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1213 than 4 branch instructions in the 16 byte window. */
1214 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1215
1216 /* X86_TUNE_SCHEDULE */
1217 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1218
1219 /* X86_TUNE_USE_BT */
1220 m_ATHLON_K8_AMDFAM10,
1221
1222 /* X86_TUNE_USE_INCDEC */
1223 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1224
1225 /* X86_TUNE_PAD_RETURNS */
1226 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1227
1228 /* X86_TUNE_EXT_80387_CONSTANTS */
1229 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1230
1231 /* X86_TUNE_SHORTEN_X87_SSE */
1232 ~m_K8,
1233
1234 /* X86_TUNE_AVOID_VECTOR_DECODE */
1235 m_K8 | m_GENERIC64,
1236
1237 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1238 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1239 ~(m_386 | m_486),
1240
1241 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1242 vector path on AMD machines. */
1243 m_K8 | m_GENERIC64 | m_AMDFAM10,
1244
1245 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1246 machines. */
1247 m_K8 | m_GENERIC64 | m_AMDFAM10,
1248
1249 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1250 than a MOV. */
1251 m_PENT,
1252
1253 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1254 but one byte longer. */
1255 m_PENT,
1256
1257 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1258 operand that cannot be represented using a modRM byte. The XOR
1259 replacement is long decoded, so this split helps here as well. */
1260 m_K6,
1261
1262 /* X86_TUNE_USE_VECTOR_CONVERTS: Preffer vector packed SSE conversion
1263 from integer to FP. */
1264 m_AMDFAM10,
1265 };
1266
1267 /* Feature tests against the various architecture variations. */
1268 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1269 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1270 ~(m_386 | m_486 | m_PENT | m_K6),
1271
1272 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1273 ~m_386,
1274
1275 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1276 ~(m_386 | m_486),
1277
1278 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1279 ~m_386,
1280
1281 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1282 ~m_386,
1283 };
1284
1285 static const unsigned int x86_accumulate_outgoing_args
1286 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1287
1288 static const unsigned int x86_arch_always_fancy_math_387
1289 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1290 | m_NOCONA | m_CORE2 | m_GENERIC;
1291
1292 static enum stringop_alg stringop_alg = no_stringop;
1293
1294 /* In case the average insn count for single function invocation is
1295 lower than this constant, emit fast (but longer) prologue and
1296 epilogue code. */
1297 #define FAST_PROLOGUE_INSN_COUNT 20
1298
1299 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1300 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1301 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1302 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1303
1304 /* Array of the smallest class containing reg number REGNO, indexed by
1305 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1306
1307 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1308 {
1309 /* ax, dx, cx, bx */
1310 AREG, DREG, CREG, BREG,
1311 /* si, di, bp, sp */
1312 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1313 /* FP registers */
1314 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1315 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1316 /* arg pointer */
1317 NON_Q_REGS,
1318 /* flags, fpsr, fpcr, frame */
1319 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1320 /* SSE registers */
1321 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1322 SSE_REGS, SSE_REGS,
1323 /* MMX registers */
1324 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1325 MMX_REGS, MMX_REGS,
1326 /* REX registers */
1327 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1328 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1329 /* SSE REX registers */
1330 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1331 SSE_REGS, SSE_REGS,
1332 };
1333
1334 /* The "default" register map used in 32bit mode. */
1335
1336 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1337 {
1338 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1339 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1340 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1341 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1342 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1343 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1344 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1345 };
1346
1347 static int const x86_64_int_parameter_registers[6] =
1348 {
1349 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1350 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1351 };
1352
1353 static int const x86_64_ms_abi_int_parameter_registers[4] =
1354 {
1355 2 /*RCX*/, 1 /*RDX*/,
1356 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1357 };
1358
1359 static int const x86_64_int_return_registers[4] =
1360 {
1361 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1362 };
1363
1364 /* The "default" register map used in 64bit mode. */
1365 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1366 {
1367 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1368 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1369 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1370 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1371 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1372 8,9,10,11,12,13,14,15, /* extended integer registers */
1373 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1374 };
1375
1376 /* Define the register numbers to be used in Dwarf debugging information.
1377 The SVR4 reference port C compiler uses the following register numbers
1378 in its Dwarf output code:
1379 0 for %eax (gcc regno = 0)
1380 1 for %ecx (gcc regno = 2)
1381 2 for %edx (gcc regno = 1)
1382 3 for %ebx (gcc regno = 3)
1383 4 for %esp (gcc regno = 7)
1384 5 for %ebp (gcc regno = 6)
1385 6 for %esi (gcc regno = 4)
1386 7 for %edi (gcc regno = 5)
1387 The following three DWARF register numbers are never generated by
1388 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1389 believes these numbers have these meanings.
1390 8 for %eip (no gcc equivalent)
1391 9 for %eflags (gcc regno = 17)
1392 10 for %trapno (no gcc equivalent)
1393 It is not at all clear how we should number the FP stack registers
1394 for the x86 architecture. If the version of SDB on x86/svr4 were
1395 a bit less brain dead with respect to floating-point then we would
1396 have a precedent to follow with respect to DWARF register numbers
1397 for x86 FP registers, but the SDB on x86/svr4 is so completely
1398 broken with respect to FP registers that it is hardly worth thinking
1399 of it as something to strive for compatibility with.
1400 The version of x86/svr4 SDB I have at the moment does (partially)
1401 seem to believe that DWARF register number 11 is associated with
1402 the x86 register %st(0), but that's about all. Higher DWARF
1403 register numbers don't seem to be associated with anything in
1404 particular, and even for DWARF regno 11, SDB only seems to under-
1405 stand that it should say that a variable lives in %st(0) (when
1406 asked via an `=' command) if we said it was in DWARF regno 11,
1407 but SDB still prints garbage when asked for the value of the
1408 variable in question (via a `/' command).
1409 (Also note that the labels SDB prints for various FP stack regs
1410 when doing an `x' command are all wrong.)
1411 Note that these problems generally don't affect the native SVR4
1412 C compiler because it doesn't allow the use of -O with -g and
1413 because when it is *not* optimizing, it allocates a memory
1414 location for each floating-point variable, and the memory
1415 location is what gets described in the DWARF AT_location
1416 attribute for the variable in question.
1417 Regardless of the severe mental illness of the x86/svr4 SDB, we
1418 do something sensible here and we use the following DWARF
1419 register numbers. Note that these are all stack-top-relative
1420 numbers.
1421 11 for %st(0) (gcc regno = 8)
1422 12 for %st(1) (gcc regno = 9)
1423 13 for %st(2) (gcc regno = 10)
1424 14 for %st(3) (gcc regno = 11)
1425 15 for %st(4) (gcc regno = 12)
1426 16 for %st(5) (gcc regno = 13)
1427 17 for %st(6) (gcc regno = 14)
1428 18 for %st(7) (gcc regno = 15)
1429 */
1430 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1431 {
1432 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1433 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1434 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1435 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1436 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1437 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1438 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1439 };
1440
1441 /* Test and compare insns in i386.md store the information needed to
1442 generate branch and scc insns here. */
1443
1444 rtx ix86_compare_op0 = NULL_RTX;
1445 rtx ix86_compare_op1 = NULL_RTX;
1446 rtx ix86_compare_emitted = NULL_RTX;
1447
1448 /* Size of the register save area. */
1449 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1450
1451 /* Define the structure for the machine field in struct function. */
1452
1453 struct stack_local_entry GTY(())
1454 {
1455 unsigned short mode;
1456 unsigned short n;
1457 rtx rtl;
1458 struct stack_local_entry *next;
1459 };
1460
1461 /* Structure describing stack frame layout.
1462 Stack grows downward:
1463
1464 [arguments]
1465 <- ARG_POINTER
1466 saved pc
1467
1468 saved frame pointer if frame_pointer_needed
1469 <- HARD_FRAME_POINTER
1470 [saved regs]
1471
1472 [padding1] \
1473 )
1474 [va_arg registers] (
1475 > to_allocate <- FRAME_POINTER
1476 [frame] (
1477 )
1478 [padding2] /
1479 */
1480 struct ix86_frame
1481 {
1482 int nregs;
1483 int padding1;
1484 int va_arg_size;
1485 HOST_WIDE_INT frame;
1486 int padding2;
1487 int outgoing_arguments_size;
1488 int red_zone_size;
1489
1490 HOST_WIDE_INT to_allocate;
1491 /* The offsets relative to ARG_POINTER. */
1492 HOST_WIDE_INT frame_pointer_offset;
1493 HOST_WIDE_INT hard_frame_pointer_offset;
1494 HOST_WIDE_INT stack_pointer_offset;
1495
1496 /* When save_regs_using_mov is set, emit prologue using
1497 move instead of push instructions. */
1498 bool save_regs_using_mov;
1499 };
1500
1501 /* Code model option. */
1502 enum cmodel ix86_cmodel;
1503 /* Asm dialect. */
1504 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1505 /* TLS dialects. */
1506 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1507
1508 /* Which unit we are generating floating point math for. */
1509 enum fpmath_unit ix86_fpmath;
1510
1511 /* Which cpu are we scheduling for. */
1512 enum processor_type ix86_tune;
1513
1514 /* Which instruction set architecture to use. */
1515 enum processor_type ix86_arch;
1516
1517 /* true if sse prefetch instruction is not NOOP. */
1518 int x86_prefetch_sse;
1519
1520 /* ix86_regparm_string as a number */
1521 static int ix86_regparm;
1522
1523 /* -mstackrealign option */
1524 extern int ix86_force_align_arg_pointer;
1525 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1526
1527 /* Preferred alignment for stack boundary in bits. */
1528 unsigned int ix86_preferred_stack_boundary;
1529
1530 /* Values 1-5: see jump.c */
1531 int ix86_branch_cost;
1532
1533 /* Variables which are this size or smaller are put in the data/bss
1534 or ldata/lbss sections. */
1535
1536 int ix86_section_threshold = 65536;
1537
1538 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1539 char internal_label_prefix[16];
1540 int internal_label_prefix_len;
1541
1542 /* Fence to use after loop using movnt. */
1543 tree x86_mfence;
1544
1545 /* Register class used for passing given 64bit part of the argument.
1546 These represent classes as documented by the PS ABI, with the exception
1547 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1548 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1549
1550 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1551 whenever possible (upper half does contain padding). */
1552 enum x86_64_reg_class
1553 {
1554 X86_64_NO_CLASS,
1555 X86_64_INTEGER_CLASS,
1556 X86_64_INTEGERSI_CLASS,
1557 X86_64_SSE_CLASS,
1558 X86_64_SSESF_CLASS,
1559 X86_64_SSEDF_CLASS,
1560 X86_64_SSEUP_CLASS,
1561 X86_64_X87_CLASS,
1562 X86_64_X87UP_CLASS,
1563 X86_64_COMPLEX_X87_CLASS,
1564 X86_64_MEMORY_CLASS
1565 };
1566 static const char * const x86_64_reg_class_name[] =
1567 {
1568 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1569 "sseup", "x87", "x87up", "cplx87", "no"
1570 };
1571
1572 #define MAX_CLASSES 4
1573
1574 /* Table of constants used by fldpi, fldln2, etc.... */
1575 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1576 static bool ext_80387_constants_init = 0;
1577
1578 \f
1579 static struct machine_function * ix86_init_machine_status (void);
1580 static rtx ix86_function_value (const_tree, const_tree, bool);
1581 static int ix86_function_regparm (const_tree, const_tree);
1582 static void ix86_compute_frame_layout (struct ix86_frame *);
1583 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1584 rtx, rtx, int);
1585
1586 \f
1587 /* The svr4 ABI for the i386 says that records and unions are returned
1588 in memory. */
1589 #ifndef DEFAULT_PCC_STRUCT_RETURN
1590 #define DEFAULT_PCC_STRUCT_RETURN 1
1591 #endif
1592
1593 /* Bit flags that specify the ISA we are compiling for. */
1594 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1595
1596 /* A mask of ix86_isa_flags that includes bit X if X
1597 was set or cleared on the command line. */
1598 static int ix86_isa_flags_explicit;
1599
1600 /* Define a set of ISAs which aren't available for a given ISA. MMX
1601 and SSE ISAs are handled separately. */
1602
1603 #define OPTION_MASK_ISA_MMX_UNSET \
1604 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1605 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1606
1607 #define OPTION_MASK_ISA_SSE_UNSET \
1608 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1609 #define OPTION_MASK_ISA_SSE2_UNSET \
1610 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1611 #define OPTION_MASK_ISA_SSE3_UNSET \
1612 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1613 #define OPTION_MASK_ISA_SSSE3_UNSET \
1614 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1615 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1616 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1617 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1618
1619 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1620 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1621 #define OPTION_MASK_ISA_SSE4 \
1622 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1623 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1624
1625 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1626
1627 /* Vectorization library interface and handlers. */
1628 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1629 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1630
1631 /* Implement TARGET_HANDLE_OPTION. */
1632
1633 static bool
1634 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1635 {
1636 switch (code)
1637 {
1638 case OPT_mmmx:
1639 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1640 if (!value)
1641 {
1642 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1643 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1644 }
1645 return true;
1646
1647 case OPT_m3dnow:
1648 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1649 if (!value)
1650 {
1651 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1652 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1653 }
1654 return true;
1655
1656 case OPT_m3dnowa:
1657 return false;
1658
1659 case OPT_msse:
1660 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1661 if (!value)
1662 {
1663 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1664 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1665 }
1666 return true;
1667
1668 case OPT_msse2:
1669 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1670 if (!value)
1671 {
1672 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1673 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1674 }
1675 return true;
1676
1677 case OPT_msse3:
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1679 if (!value)
1680 {
1681 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1682 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1683 }
1684 return true;
1685
1686 case OPT_mssse3:
1687 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1688 if (!value)
1689 {
1690 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1691 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1692 }
1693 return true;
1694
1695 case OPT_msse4_1:
1696 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1697 if (!value)
1698 {
1699 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1700 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1701 }
1702 return true;
1703
1704 case OPT_msse4_2:
1705 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1706 if (!value)
1707 {
1708 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1709 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1710 }
1711 return true;
1712
1713 case OPT_msse4:
1714 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1715 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1716 return true;
1717
1718 case OPT_mno_sse4:
1719 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1720 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1721 return true;
1722
1723 case OPT_msse4a:
1724 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1725 if (!value)
1726 {
1727 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1728 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1729 }
1730 return true;
1731
1732 default:
1733 return true;
1734 }
1735 }
1736
1737 /* Sometimes certain combinations of command options do not make
1738 sense on a particular target machine. You can define a macro
1739 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1740 defined, is executed once just after all the command options have
1741 been parsed.
1742
1743 Don't use this macro to turn on various extra optimizations for
1744 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1745
1746 void
1747 override_options (void)
1748 {
1749 int i;
1750 int ix86_tune_defaulted = 0;
1751 int ix86_arch_specified = 0;
1752 unsigned int ix86_arch_mask, ix86_tune_mask;
1753
1754 /* Comes from final.c -- no real reason to change it. */
1755 #define MAX_CODE_ALIGN 16
1756
1757 static struct ptt
1758 {
1759 const struct processor_costs *cost; /* Processor costs */
1760 const int align_loop; /* Default alignments. */
1761 const int align_loop_max_skip;
1762 const int align_jump;
1763 const int align_jump_max_skip;
1764 const int align_func;
1765 }
1766 const processor_target_table[PROCESSOR_max] =
1767 {
1768 {&i386_cost, 4, 3, 4, 3, 4},
1769 {&i486_cost, 16, 15, 16, 15, 16},
1770 {&pentium_cost, 16, 7, 16, 7, 16},
1771 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1772 {&geode_cost, 0, 0, 0, 0, 0},
1773 {&k6_cost, 32, 7, 32, 7, 32},
1774 {&athlon_cost, 16, 7, 16, 7, 16},
1775 {&pentium4_cost, 0, 0, 0, 0, 0},
1776 {&k8_cost, 16, 7, 16, 7, 16},
1777 {&nocona_cost, 0, 0, 0, 0, 0},
1778 {&core2_cost, 16, 10, 16, 10, 16},
1779 {&generic32_cost, 16, 7, 16, 7, 16},
1780 {&generic64_cost, 16, 10, 16, 10, 16},
1781 {&amdfam10_cost, 32, 24, 32, 7, 32}
1782 };
1783
1784 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1785 enum pta_flags
1786 {
1787 PTA_SSE = 1 << 0,
1788 PTA_SSE2 = 1 << 1,
1789 PTA_SSE3 = 1 << 2,
1790 PTA_MMX = 1 << 3,
1791 PTA_PREFETCH_SSE = 1 << 4,
1792 PTA_3DNOW = 1 << 5,
1793 PTA_3DNOW_A = 1 << 6,
1794 PTA_64BIT = 1 << 7,
1795 PTA_SSSE3 = 1 << 8,
1796 PTA_CX16 = 1 << 9,
1797 PTA_POPCNT = 1 << 10,
1798 PTA_ABM = 1 << 11,
1799 PTA_SSE4A = 1 << 12,
1800 PTA_NO_SAHF = 1 << 13,
1801 PTA_SSE4_1 = 1 << 14,
1802 PTA_SSE4_2 = 1 << 15
1803 };
1804
1805 static struct pta
1806 {
1807 const char *const name; /* processor name or nickname. */
1808 const enum processor_type processor;
1809 const unsigned /*enum pta_flags*/ flags;
1810 }
1811 const processor_alias_table[] =
1812 {
1813 {"i386", PROCESSOR_I386, 0},
1814 {"i486", PROCESSOR_I486, 0},
1815 {"i586", PROCESSOR_PENTIUM, 0},
1816 {"pentium", PROCESSOR_PENTIUM, 0},
1817 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1818 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1819 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1820 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1821 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1822 {"i686", PROCESSOR_PENTIUMPRO, 0},
1823 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1824 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1825 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1826 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1827 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1828 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1829 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1830 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1831 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1832 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1833 | PTA_CX16 | PTA_NO_SAHF)},
1834 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1835 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1836 | PTA_SSSE3
1837 | PTA_CX16)},
1838 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1839 |PTA_PREFETCH_SSE)},
1840 {"k6", PROCESSOR_K6, PTA_MMX},
1841 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1842 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1843 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1844 | PTA_PREFETCH_SSE)},
1845 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1846 | PTA_PREFETCH_SSE)},
1847 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1848 | PTA_SSE)},
1849 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1850 | PTA_SSE)},
1851 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1852 | PTA_SSE)},
1853 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1854 | PTA_MMX | PTA_SSE | PTA_SSE2
1855 | PTA_NO_SAHF)},
1856 {"k8", PROCESSOR_K8, (PTA_64BIT
1857 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1858 | PTA_SSE | PTA_SSE2
1859 | PTA_NO_SAHF)},
1860 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1861 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1862 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1863 | PTA_NO_SAHF)},
1864 {"opteron", PROCESSOR_K8, (PTA_64BIT
1865 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1866 | PTA_SSE | PTA_SSE2
1867 | PTA_NO_SAHF)},
1868 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1869 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1870 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1871 | PTA_NO_SAHF)},
1872 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1873 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1874 | PTA_SSE | PTA_SSE2
1875 | PTA_NO_SAHF)},
1876 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1877 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1878 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1879 | PTA_NO_SAHF)},
1880 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1881 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1882 | PTA_SSE | PTA_SSE2
1883 | PTA_NO_SAHF)},
1884 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1885 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1886 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1887 | PTA_SSE4A
1888 | PTA_CX16 | PTA_ABM)},
1889 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1890 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1891 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1892 | PTA_SSE4A
1893 | PTA_CX16 | PTA_ABM)},
1894 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1895 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1896 };
1897
1898 int const pta_size = ARRAY_SIZE (processor_alias_table);
1899
1900 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1901 SUBTARGET_OVERRIDE_OPTIONS;
1902 #endif
1903
1904 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1905 SUBSUBTARGET_OVERRIDE_OPTIONS;
1906 #endif
1907
1908 /* -fPIC is the default for x86_64. */
1909 if (TARGET_MACHO && TARGET_64BIT)
1910 flag_pic = 2;
1911
1912 /* Set the default values for switches whose default depends on TARGET_64BIT
1913 in case they weren't overwritten by command line options. */
1914 if (TARGET_64BIT)
1915 {
1916 /* Mach-O doesn't support omitting the frame pointer for now. */
1917 if (flag_omit_frame_pointer == 2)
1918 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1919 if (flag_asynchronous_unwind_tables == 2)
1920 flag_asynchronous_unwind_tables = 1;
1921 if (flag_pcc_struct_return == 2)
1922 flag_pcc_struct_return = 0;
1923 }
1924 else
1925 {
1926 if (flag_omit_frame_pointer == 2)
1927 flag_omit_frame_pointer = 0;
1928 if (flag_asynchronous_unwind_tables == 2)
1929 flag_asynchronous_unwind_tables = 0;
1930 if (flag_pcc_struct_return == 2)
1931 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1932 }
1933
1934 /* Need to check -mtune=generic first. */
1935 if (ix86_tune_string)
1936 {
1937 if (!strcmp (ix86_tune_string, "generic")
1938 || !strcmp (ix86_tune_string, "i686")
1939 /* As special support for cross compilers we read -mtune=native
1940 as -mtune=generic. With native compilers we won't see the
1941 -mtune=native, as it was changed by the driver. */
1942 || !strcmp (ix86_tune_string, "native"))
1943 {
1944 if (TARGET_64BIT)
1945 ix86_tune_string = "generic64";
1946 else
1947 ix86_tune_string = "generic32";
1948 }
1949 else if (!strncmp (ix86_tune_string, "generic", 7))
1950 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1951 }
1952 else
1953 {
1954 if (ix86_arch_string)
1955 ix86_tune_string = ix86_arch_string;
1956 if (!ix86_tune_string)
1957 {
1958 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1959 ix86_tune_defaulted = 1;
1960 }
1961
1962 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1963 need to use a sensible tune option. */
1964 if (!strcmp (ix86_tune_string, "generic")
1965 || !strcmp (ix86_tune_string, "x86-64")
1966 || !strcmp (ix86_tune_string, "i686"))
1967 {
1968 if (TARGET_64BIT)
1969 ix86_tune_string = "generic64";
1970 else
1971 ix86_tune_string = "generic32";
1972 }
1973 }
1974 if (ix86_stringop_string)
1975 {
1976 if (!strcmp (ix86_stringop_string, "rep_byte"))
1977 stringop_alg = rep_prefix_1_byte;
1978 else if (!strcmp (ix86_stringop_string, "libcall"))
1979 stringop_alg = libcall;
1980 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1981 stringop_alg = rep_prefix_4_byte;
1982 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1983 stringop_alg = rep_prefix_8_byte;
1984 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1985 stringop_alg = loop_1_byte;
1986 else if (!strcmp (ix86_stringop_string, "loop"))
1987 stringop_alg = loop;
1988 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1989 stringop_alg = unrolled_loop;
1990 else
1991 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1992 }
1993 if (!strcmp (ix86_tune_string, "x86-64"))
1994 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1995 "-mtune=generic instead as appropriate.");
1996
1997 if (!ix86_arch_string)
1998 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1999 else
2000 ix86_arch_specified = 1;
2001
2002 if (!strcmp (ix86_arch_string, "generic"))
2003 error ("generic CPU can be used only for -mtune= switch");
2004 if (!strncmp (ix86_arch_string, "generic", 7))
2005 error ("bad value (%s) for -march= switch", ix86_arch_string);
2006
2007 if (ix86_cmodel_string != 0)
2008 {
2009 if (!strcmp (ix86_cmodel_string, "small"))
2010 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2011 else if (!strcmp (ix86_cmodel_string, "medium"))
2012 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2013 else if (!strcmp (ix86_cmodel_string, "large"))
2014 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2015 else if (flag_pic)
2016 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2017 else if (!strcmp (ix86_cmodel_string, "32"))
2018 ix86_cmodel = CM_32;
2019 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2020 ix86_cmodel = CM_KERNEL;
2021 else
2022 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2023 }
2024 else
2025 {
2026 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2027 use of rip-relative addressing. This eliminates fixups that
2028 would otherwise be needed if this object is to be placed in a
2029 DLL, and is essentially just as efficient as direct addressing. */
2030 if (TARGET_64BIT_MS_ABI)
2031 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2032 else if (TARGET_64BIT)
2033 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2034 else
2035 ix86_cmodel = CM_32;
2036 }
2037 if (ix86_asm_string != 0)
2038 {
2039 if (! TARGET_MACHO
2040 && !strcmp (ix86_asm_string, "intel"))
2041 ix86_asm_dialect = ASM_INTEL;
2042 else if (!strcmp (ix86_asm_string, "att"))
2043 ix86_asm_dialect = ASM_ATT;
2044 else
2045 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2046 }
2047 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2048 error ("code model %qs not supported in the %s bit mode",
2049 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2050 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2051 sorry ("%i-bit mode not compiled in",
2052 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2053
2054 for (i = 0; i < pta_size; i++)
2055 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2056 {
2057 ix86_arch = processor_alias_table[i].processor;
2058 /* Default cpu tuning to the architecture. */
2059 ix86_tune = ix86_arch;
2060
2061 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2062 error ("CPU you selected does not support x86-64 "
2063 "instruction set");
2064
2065 if (processor_alias_table[i].flags & PTA_MMX
2066 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2067 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2068 if (processor_alias_table[i].flags & PTA_3DNOW
2069 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2070 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2071 if (processor_alias_table[i].flags & PTA_3DNOW_A
2072 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2073 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2074 if (processor_alias_table[i].flags & PTA_SSE
2075 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2076 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2077 if (processor_alias_table[i].flags & PTA_SSE2
2078 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2079 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2080 if (processor_alias_table[i].flags & PTA_SSE3
2081 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2082 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2083 if (processor_alias_table[i].flags & PTA_SSSE3
2084 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2085 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2086 if (processor_alias_table[i].flags & PTA_SSE4_1
2087 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2088 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2089 if (processor_alias_table[i].flags & PTA_SSE4_2
2090 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2091 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2092 if (processor_alias_table[i].flags & PTA_SSE4A
2093 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2094 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2095
2096 if (processor_alias_table[i].flags & PTA_ABM)
2097 x86_abm = true;
2098 if (processor_alias_table[i].flags & PTA_CX16)
2099 x86_cmpxchg16b = true;
2100 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2101 x86_popcnt = true;
2102 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2103 x86_prefetch_sse = true;
2104 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2105 x86_sahf = true;
2106
2107 break;
2108 }
2109
2110 if (i == pta_size)
2111 error ("bad value (%s) for -march= switch", ix86_arch_string);
2112
2113 ix86_arch_mask = 1u << ix86_arch;
2114 for (i = 0; i < X86_ARCH_LAST; ++i)
2115 ix86_arch_features[i] &= ix86_arch_mask;
2116
2117 for (i = 0; i < pta_size; i++)
2118 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2119 {
2120 ix86_tune = processor_alias_table[i].processor;
2121 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2122 {
2123 if (ix86_tune_defaulted)
2124 {
2125 ix86_tune_string = "x86-64";
2126 for (i = 0; i < pta_size; i++)
2127 if (! strcmp (ix86_tune_string,
2128 processor_alias_table[i].name))
2129 break;
2130 ix86_tune = processor_alias_table[i].processor;
2131 }
2132 else
2133 error ("CPU you selected does not support x86-64 "
2134 "instruction set");
2135 }
2136 /* Intel CPUs have always interpreted SSE prefetch instructions as
2137 NOPs; so, we can enable SSE prefetch instructions even when
2138 -mtune (rather than -march) points us to a processor that has them.
2139 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2140 higher processors. */
2141 if (TARGET_CMOVE
2142 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2143 x86_prefetch_sse = true;
2144 break;
2145 }
2146 if (i == pta_size)
2147 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2148
2149 ix86_tune_mask = 1u << ix86_tune;
2150 for (i = 0; i < X86_TUNE_LAST; ++i)
2151 ix86_tune_features[i] &= ix86_tune_mask;
2152
2153 if (optimize_size)
2154 ix86_cost = &size_cost;
2155 else
2156 ix86_cost = processor_target_table[ix86_tune].cost;
2157
2158 /* Arrange to set up i386_stack_locals for all functions. */
2159 init_machine_status = ix86_init_machine_status;
2160
2161 /* Validate -mregparm= value. */
2162 if (ix86_regparm_string)
2163 {
2164 if (TARGET_64BIT)
2165 warning (0, "-mregparm is ignored in 64-bit mode");
2166 i = atoi (ix86_regparm_string);
2167 if (i < 0 || i > REGPARM_MAX)
2168 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2169 else
2170 ix86_regparm = i;
2171 }
2172 if (TARGET_64BIT)
2173 ix86_regparm = REGPARM_MAX;
2174
2175 /* If the user has provided any of the -malign-* options,
2176 warn and use that value only if -falign-* is not set.
2177 Remove this code in GCC 3.2 or later. */
2178 if (ix86_align_loops_string)
2179 {
2180 warning (0, "-malign-loops is obsolete, use -falign-loops");
2181 if (align_loops == 0)
2182 {
2183 i = atoi (ix86_align_loops_string);
2184 if (i < 0 || i > MAX_CODE_ALIGN)
2185 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2186 else
2187 align_loops = 1 << i;
2188 }
2189 }
2190
2191 if (ix86_align_jumps_string)
2192 {
2193 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2194 if (align_jumps == 0)
2195 {
2196 i = atoi (ix86_align_jumps_string);
2197 if (i < 0 || i > MAX_CODE_ALIGN)
2198 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2199 else
2200 align_jumps = 1 << i;
2201 }
2202 }
2203
2204 if (ix86_align_funcs_string)
2205 {
2206 warning (0, "-malign-functions is obsolete, use -falign-functions");
2207 if (align_functions == 0)
2208 {
2209 i = atoi (ix86_align_funcs_string);
2210 if (i < 0 || i > MAX_CODE_ALIGN)
2211 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2212 else
2213 align_functions = 1 << i;
2214 }
2215 }
2216
2217 /* Default align_* from the processor table. */
2218 if (align_loops == 0)
2219 {
2220 align_loops = processor_target_table[ix86_tune].align_loop;
2221 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2222 }
2223 if (align_jumps == 0)
2224 {
2225 align_jumps = processor_target_table[ix86_tune].align_jump;
2226 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2227 }
2228 if (align_functions == 0)
2229 {
2230 align_functions = processor_target_table[ix86_tune].align_func;
2231 }
2232
2233 /* Validate -mbranch-cost= value, or provide default. */
2234 ix86_branch_cost = ix86_cost->branch_cost;
2235 if (ix86_branch_cost_string)
2236 {
2237 i = atoi (ix86_branch_cost_string);
2238 if (i < 0 || i > 5)
2239 error ("-mbranch-cost=%d is not between 0 and 5", i);
2240 else
2241 ix86_branch_cost = i;
2242 }
2243 if (ix86_section_threshold_string)
2244 {
2245 i = atoi (ix86_section_threshold_string);
2246 if (i < 0)
2247 error ("-mlarge-data-threshold=%d is negative", i);
2248 else
2249 ix86_section_threshold = i;
2250 }
2251
2252 if (ix86_tls_dialect_string)
2253 {
2254 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2255 ix86_tls_dialect = TLS_DIALECT_GNU;
2256 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2257 ix86_tls_dialect = TLS_DIALECT_GNU2;
2258 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2259 ix86_tls_dialect = TLS_DIALECT_SUN;
2260 else
2261 error ("bad value (%s) for -mtls-dialect= switch",
2262 ix86_tls_dialect_string);
2263 }
2264
2265 if (ix87_precision_string)
2266 {
2267 i = atoi (ix87_precision_string);
2268 if (i != 32 && i != 64 && i != 80)
2269 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2270 }
2271
2272 if (TARGET_64BIT)
2273 {
2274 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2275
2276 /* Enable by default the SSE and MMX builtins. Do allow the user to
2277 explicitly disable any of these. In particular, disabling SSE and
2278 MMX for kernel code is extremely useful. */
2279 if (!ix86_arch_specified)
2280 ix86_isa_flags
2281 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2282 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2283
2284 if (TARGET_RTD)
2285 warning (0, "-mrtd is ignored in 64bit mode");
2286 }
2287 else
2288 {
2289 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2290
2291 if (!ix86_arch_specified)
2292 ix86_isa_flags
2293 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2294
2295 /* i386 ABI does not specify red zone. It still makes sense to use it
2296 when programmer takes care to stack from being destroyed. */
2297 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2298 target_flags |= MASK_NO_RED_ZONE;
2299 }
2300
2301 /* Keep nonleaf frame pointers. */
2302 if (flag_omit_frame_pointer)
2303 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2304 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2305 flag_omit_frame_pointer = 1;
2306
2307 /* If we're doing fast math, we don't care about comparison order
2308 wrt NaNs. This lets us use a shorter comparison sequence. */
2309 if (flag_finite_math_only)
2310 target_flags &= ~MASK_IEEE_FP;
2311
2312 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2313 since the insns won't need emulation. */
2314 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2315 target_flags &= ~MASK_NO_FANCY_MATH_387;
2316
2317 /* Likewise, if the target doesn't have a 387, or we've specified
2318 software floating point, don't use 387 inline intrinsics. */
2319 if (!TARGET_80387)
2320 target_flags |= MASK_NO_FANCY_MATH_387;
2321
2322 /* Turn on SSE4.1 builtins for -msse4.2. */
2323 if (TARGET_SSE4_2)
2324 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2325
2326 /* Turn on SSSE3 builtins for -msse4.1. */
2327 if (TARGET_SSE4_1)
2328 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2329
2330 /* Turn on SSE3 builtins for -mssse3. */
2331 if (TARGET_SSSE3)
2332 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2333
2334 /* Turn on SSE3 builtins for -msse4a. */
2335 if (TARGET_SSE4A)
2336 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2337
2338 /* Turn on SSE2 builtins for -msse3. */
2339 if (TARGET_SSE3)
2340 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2341
2342 /* Turn on SSE builtins for -msse2. */
2343 if (TARGET_SSE2)
2344 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2345
2346 /* Turn on MMX builtins for -msse. */
2347 if (TARGET_SSE)
2348 {
2349 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2350 x86_prefetch_sse = true;
2351 }
2352
2353 /* Turn on MMX builtins for 3Dnow. */
2354 if (TARGET_3DNOW)
2355 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2356
2357 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2358 if (TARGET_SSE4_2 || TARGET_ABM)
2359 x86_popcnt = true;
2360
2361 /* Validate -mpreferred-stack-boundary= value, or provide default.
2362 The default of 128 bits is for Pentium III's SSE __m128. We can't
2363 change it because of optimize_size. Otherwise, we can't mix object
2364 files compiled with -Os and -On. */
2365 ix86_preferred_stack_boundary = 128;
2366 if (ix86_preferred_stack_boundary_string)
2367 {
2368 i = atoi (ix86_preferred_stack_boundary_string);
2369 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2370 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2371 TARGET_64BIT ? 4 : 2);
2372 else
2373 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2374 }
2375
2376 /* Accept -msseregparm only if at least SSE support is enabled. */
2377 if (TARGET_SSEREGPARM
2378 && ! TARGET_SSE)
2379 error ("-msseregparm used without SSE enabled");
2380
2381 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2382 if (ix86_fpmath_string != 0)
2383 {
2384 if (! strcmp (ix86_fpmath_string, "387"))
2385 ix86_fpmath = FPMATH_387;
2386 else if (! strcmp (ix86_fpmath_string, "sse"))
2387 {
2388 if (!TARGET_SSE)
2389 {
2390 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2391 ix86_fpmath = FPMATH_387;
2392 }
2393 else
2394 ix86_fpmath = FPMATH_SSE;
2395 }
2396 else if (! strcmp (ix86_fpmath_string, "387,sse")
2397 || ! strcmp (ix86_fpmath_string, "sse,387"))
2398 {
2399 if (!TARGET_SSE)
2400 {
2401 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2402 ix86_fpmath = FPMATH_387;
2403 }
2404 else if (!TARGET_80387)
2405 {
2406 warning (0, "387 instruction set disabled, using SSE arithmetics");
2407 ix86_fpmath = FPMATH_SSE;
2408 }
2409 else
2410 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2411 }
2412 else
2413 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2414 }
2415
2416 /* If the i387 is disabled, then do not return values in it. */
2417 if (!TARGET_80387)
2418 target_flags &= ~MASK_FLOAT_RETURNS;
2419
2420 /* Use external vectorized library in vectorizing intrinsics. */
2421 if (ix86_veclibabi_string)
2422 {
2423 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2424 ix86_veclib_handler = ix86_veclibabi_acml;
2425 else
2426 error ("unknown vectorization library ABI type (%s) for "
2427 "-mveclibabi= switch", ix86_veclibabi_string);
2428 }
2429
2430 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2431 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2432 && !optimize_size)
2433 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2434
2435 /* ??? Unwind info is not correct around the CFG unless either a frame
2436 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2437 unwind info generation to be aware of the CFG and propagating states
2438 around edges. */
2439 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2440 || flag_exceptions || flag_non_call_exceptions)
2441 && flag_omit_frame_pointer
2442 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2443 {
2444 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2445 warning (0, "unwind tables currently require either a frame pointer "
2446 "or -maccumulate-outgoing-args for correctness");
2447 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2448 }
2449
2450 /* For sane SSE instruction set generation we need fcomi instruction.
2451 It is safe to enable all CMOVE instructions. */
2452 if (TARGET_SSE)
2453 TARGET_CMOVE = 1;
2454
2455 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2456 {
2457 char *p;
2458 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2459 p = strchr (internal_label_prefix, 'X');
2460 internal_label_prefix_len = p - internal_label_prefix;
2461 *p = '\0';
2462 }
2463
2464 /* When scheduling description is not available, disable scheduler pass
2465 so it won't slow down the compilation and make x87 code slower. */
2466 if (!TARGET_SCHEDULE)
2467 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2468
2469 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2470 set_param_value ("simultaneous-prefetches",
2471 ix86_cost->simultaneous_prefetches);
2472 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2473 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2474 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2475 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2476 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2477 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2478 }
2479 \f
2480 /* Return true if this goes in large data/bss. */
2481
2482 static bool
2483 ix86_in_large_data_p (tree exp)
2484 {
2485 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2486 return false;
2487
2488 /* Functions are never large data. */
2489 if (TREE_CODE (exp) == FUNCTION_DECL)
2490 return false;
2491
2492 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2493 {
2494 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2495 if (strcmp (section, ".ldata") == 0
2496 || strcmp (section, ".lbss") == 0)
2497 return true;
2498 return false;
2499 }
2500 else
2501 {
2502 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2503
2504 /* If this is an incomplete type with size 0, then we can't put it
2505 in data because it might be too big when completed. */
2506 if (!size || size > ix86_section_threshold)
2507 return true;
2508 }
2509
2510 return false;
2511 }
2512
2513 /* Switch to the appropriate section for output of DECL.
2514 DECL is either a `VAR_DECL' node or a constant of some sort.
2515 RELOC indicates whether forming the initial value of DECL requires
2516 link-time relocations. */
2517
2518 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2519 ATTRIBUTE_UNUSED;
2520
2521 static section *
2522 x86_64_elf_select_section (tree decl, int reloc,
2523 unsigned HOST_WIDE_INT align)
2524 {
2525 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2526 && ix86_in_large_data_p (decl))
2527 {
2528 const char *sname = NULL;
2529 unsigned int flags = SECTION_WRITE;
2530 switch (categorize_decl_for_section (decl, reloc))
2531 {
2532 case SECCAT_DATA:
2533 sname = ".ldata";
2534 break;
2535 case SECCAT_DATA_REL:
2536 sname = ".ldata.rel";
2537 break;
2538 case SECCAT_DATA_REL_LOCAL:
2539 sname = ".ldata.rel.local";
2540 break;
2541 case SECCAT_DATA_REL_RO:
2542 sname = ".ldata.rel.ro";
2543 break;
2544 case SECCAT_DATA_REL_RO_LOCAL:
2545 sname = ".ldata.rel.ro.local";
2546 break;
2547 case SECCAT_BSS:
2548 sname = ".lbss";
2549 flags |= SECTION_BSS;
2550 break;
2551 case SECCAT_RODATA:
2552 case SECCAT_RODATA_MERGE_STR:
2553 case SECCAT_RODATA_MERGE_STR_INIT:
2554 case SECCAT_RODATA_MERGE_CONST:
2555 sname = ".lrodata";
2556 flags = 0;
2557 break;
2558 case SECCAT_SRODATA:
2559 case SECCAT_SDATA:
2560 case SECCAT_SBSS:
2561 gcc_unreachable ();
2562 case SECCAT_TEXT:
2563 case SECCAT_TDATA:
2564 case SECCAT_TBSS:
2565 /* We don't split these for medium model. Place them into
2566 default sections and hope for best. */
2567 break;
2568 }
2569 if (sname)
2570 {
2571 /* We might get called with string constants, but get_named_section
2572 doesn't like them as they are not DECLs. Also, we need to set
2573 flags in that case. */
2574 if (!DECL_P (decl))
2575 return get_section (sname, flags, NULL);
2576 return get_named_section (decl, sname, reloc);
2577 }
2578 }
2579 return default_elf_select_section (decl, reloc, align);
2580 }
2581
2582 /* Build up a unique section name, expressed as a
2583 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2584 RELOC indicates whether the initial value of EXP requires
2585 link-time relocations. */
2586
2587 static void ATTRIBUTE_UNUSED
2588 x86_64_elf_unique_section (tree decl, int reloc)
2589 {
2590 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2591 && ix86_in_large_data_p (decl))
2592 {
2593 const char *prefix = NULL;
2594 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2595 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2596
2597 switch (categorize_decl_for_section (decl, reloc))
2598 {
2599 case SECCAT_DATA:
2600 case SECCAT_DATA_REL:
2601 case SECCAT_DATA_REL_LOCAL:
2602 case SECCAT_DATA_REL_RO:
2603 case SECCAT_DATA_REL_RO_LOCAL:
2604 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2605 break;
2606 case SECCAT_BSS:
2607 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2608 break;
2609 case SECCAT_RODATA:
2610 case SECCAT_RODATA_MERGE_STR:
2611 case SECCAT_RODATA_MERGE_STR_INIT:
2612 case SECCAT_RODATA_MERGE_CONST:
2613 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2614 break;
2615 case SECCAT_SRODATA:
2616 case SECCAT_SDATA:
2617 case SECCAT_SBSS:
2618 gcc_unreachable ();
2619 case SECCAT_TEXT:
2620 case SECCAT_TDATA:
2621 case SECCAT_TBSS:
2622 /* We don't split these for medium model. Place them into
2623 default sections and hope for best. */
2624 break;
2625 }
2626 if (prefix)
2627 {
2628 const char *name;
2629 size_t nlen, plen;
2630 char *string;
2631 plen = strlen (prefix);
2632
2633 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2634 name = targetm.strip_name_encoding (name);
2635 nlen = strlen (name);
2636
2637 string = (char *) alloca (nlen + plen + 1);
2638 memcpy (string, prefix, plen);
2639 memcpy (string + plen, name, nlen + 1);
2640
2641 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2642 return;
2643 }
2644 }
2645 default_unique_section (decl, reloc);
2646 }
2647
2648 #ifdef COMMON_ASM_OP
2649 /* This says how to output assembler code to declare an
2650 uninitialized external linkage data object.
2651
2652 For medium model x86-64 we need to use .largecomm opcode for
2653 large objects. */
2654 void
2655 x86_elf_aligned_common (FILE *file,
2656 const char *name, unsigned HOST_WIDE_INT size,
2657 int align)
2658 {
2659 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2660 && size > (unsigned int)ix86_section_threshold)
2661 fprintf (file, ".largecomm\t");
2662 else
2663 fprintf (file, "%s", COMMON_ASM_OP);
2664 assemble_name (file, name);
2665 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2666 size, align / BITS_PER_UNIT);
2667 }
2668 #endif
2669
2670 /* Utility function for targets to use in implementing
2671 ASM_OUTPUT_ALIGNED_BSS. */
2672
2673 void
2674 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2675 const char *name, unsigned HOST_WIDE_INT size,
2676 int align)
2677 {
2678 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2679 && size > (unsigned int)ix86_section_threshold)
2680 switch_to_section (get_named_section (decl, ".lbss", 0));
2681 else
2682 switch_to_section (bss_section);
2683 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2684 #ifdef ASM_DECLARE_OBJECT_NAME
2685 last_assemble_variable_decl = decl;
2686 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2687 #else
2688 /* Standard thing is just output label for the object. */
2689 ASM_OUTPUT_LABEL (file, name);
2690 #endif /* ASM_DECLARE_OBJECT_NAME */
2691 ASM_OUTPUT_SKIP (file, size ? size : 1);
2692 }
2693 \f
2694 void
2695 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2696 {
2697 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2698 make the problem with not enough registers even worse. */
2699 #ifdef INSN_SCHEDULING
2700 if (level > 1)
2701 flag_schedule_insns = 0;
2702 #endif
2703
2704 if (TARGET_MACHO)
2705 /* The Darwin libraries never set errno, so we might as well
2706 avoid calling them when that's the only reason we would. */
2707 flag_errno_math = 0;
2708
2709 /* The default values of these switches depend on the TARGET_64BIT
2710 that is not known at this moment. Mark these values with 2 and
2711 let user the to override these. In case there is no command line option
2712 specifying them, we will set the defaults in override_options. */
2713 if (optimize >= 1)
2714 flag_omit_frame_pointer = 2;
2715 flag_pcc_struct_return = 2;
2716 flag_asynchronous_unwind_tables = 2;
2717 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2718 SUBTARGET_OPTIMIZATION_OPTIONS;
2719 #endif
2720 }
2721 \f
2722 /* Decide whether we can make a sibling call to a function. DECL is the
2723 declaration of the function being targeted by the call and EXP is the
2724 CALL_EXPR representing the call. */
2725
2726 static bool
2727 ix86_function_ok_for_sibcall (tree decl, tree exp)
2728 {
2729 tree func;
2730 rtx a, b;
2731
2732 /* If we are generating position-independent code, we cannot sibcall
2733 optimize any indirect call, or a direct call to a global function,
2734 as the PLT requires %ebx be live. */
2735 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2736 return false;
2737
2738 if (decl)
2739 func = decl;
2740 else
2741 {
2742 func = TREE_TYPE (CALL_EXPR_FN (exp));
2743 if (POINTER_TYPE_P (func))
2744 func = TREE_TYPE (func);
2745 }
2746
2747 /* Check that the return value locations are the same. Like
2748 if we are returning floats on the 80387 register stack, we cannot
2749 make a sibcall from a function that doesn't return a float to a
2750 function that does or, conversely, from a function that does return
2751 a float to a function that doesn't; the necessary stack adjustment
2752 would not be executed. This is also the place we notice
2753 differences in the return value ABI. Note that it is ok for one
2754 of the functions to have void return type as long as the return
2755 value of the other is passed in a register. */
2756 a = ix86_function_value (TREE_TYPE (exp), func, false);
2757 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2758 cfun->decl, false);
2759 if (STACK_REG_P (a) || STACK_REG_P (b))
2760 {
2761 if (!rtx_equal_p (a, b))
2762 return false;
2763 }
2764 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2765 ;
2766 else if (!rtx_equal_p (a, b))
2767 return false;
2768
2769 /* If this call is indirect, we'll need to be able to use a call-clobbered
2770 register for the address of the target function. Make sure that all
2771 such registers are not used for passing parameters. */
2772 if (!decl && !TARGET_64BIT)
2773 {
2774 tree type;
2775
2776 /* We're looking at the CALL_EXPR, we need the type of the function. */
2777 type = CALL_EXPR_FN (exp); /* pointer expression */
2778 type = TREE_TYPE (type); /* pointer type */
2779 type = TREE_TYPE (type); /* function type */
2780
2781 if (ix86_function_regparm (type, NULL) >= 3)
2782 {
2783 /* ??? Need to count the actual number of registers to be used,
2784 not the possible number of registers. Fix later. */
2785 return false;
2786 }
2787 }
2788
2789 /* Dllimport'd functions are also called indirectly. */
2790 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2791 && decl && DECL_DLLIMPORT_P (decl)
2792 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2793 return false;
2794
2795 /* If we forced aligned the stack, then sibcalling would unalign the
2796 stack, which may break the called function. */
2797 if (cfun->machine->force_align_arg_pointer)
2798 return false;
2799
2800 /* Otherwise okay. That also includes certain types of indirect calls. */
2801 return true;
2802 }
2803
2804 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2805 calling convention attributes;
2806 arguments as in struct attribute_spec.handler. */
2807
2808 static tree
2809 ix86_handle_cconv_attribute (tree *node, tree name,
2810 tree args,
2811 int flags ATTRIBUTE_UNUSED,
2812 bool *no_add_attrs)
2813 {
2814 if (TREE_CODE (*node) != FUNCTION_TYPE
2815 && TREE_CODE (*node) != METHOD_TYPE
2816 && TREE_CODE (*node) != FIELD_DECL
2817 && TREE_CODE (*node) != TYPE_DECL)
2818 {
2819 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2820 IDENTIFIER_POINTER (name));
2821 *no_add_attrs = true;
2822 return NULL_TREE;
2823 }
2824
2825 /* Can combine regparm with all attributes but fastcall. */
2826 if (is_attribute_p ("regparm", name))
2827 {
2828 tree cst;
2829
2830 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2831 {
2832 error ("fastcall and regparm attributes are not compatible");
2833 }
2834
2835 cst = TREE_VALUE (args);
2836 if (TREE_CODE (cst) != INTEGER_CST)
2837 {
2838 warning (OPT_Wattributes,
2839 "%qs attribute requires an integer constant argument",
2840 IDENTIFIER_POINTER (name));
2841 *no_add_attrs = true;
2842 }
2843 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2844 {
2845 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2846 IDENTIFIER_POINTER (name), REGPARM_MAX);
2847 *no_add_attrs = true;
2848 }
2849
2850 if (!TARGET_64BIT
2851 && lookup_attribute (ix86_force_align_arg_pointer_string,
2852 TYPE_ATTRIBUTES (*node))
2853 && compare_tree_int (cst, REGPARM_MAX-1))
2854 {
2855 error ("%s functions limited to %d register parameters",
2856 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2857 }
2858
2859 return NULL_TREE;
2860 }
2861
2862 if (TARGET_64BIT)
2863 {
2864 /* Do not warn when emulating the MS ABI. */
2865 if (!TARGET_64BIT_MS_ABI)
2866 warning (OPT_Wattributes, "%qs attribute ignored",
2867 IDENTIFIER_POINTER (name));
2868 *no_add_attrs = true;
2869 return NULL_TREE;
2870 }
2871
2872 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2873 if (is_attribute_p ("fastcall", name))
2874 {
2875 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2876 {
2877 error ("fastcall and cdecl attributes are not compatible");
2878 }
2879 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2880 {
2881 error ("fastcall and stdcall attributes are not compatible");
2882 }
2883 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2884 {
2885 error ("fastcall and regparm attributes are not compatible");
2886 }
2887 }
2888
2889 /* Can combine stdcall with fastcall (redundant), regparm and
2890 sseregparm. */
2891 else if (is_attribute_p ("stdcall", name))
2892 {
2893 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2894 {
2895 error ("stdcall and cdecl attributes are not compatible");
2896 }
2897 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2898 {
2899 error ("stdcall and fastcall attributes are not compatible");
2900 }
2901 }
2902
2903 /* Can combine cdecl with regparm and sseregparm. */
2904 else if (is_attribute_p ("cdecl", name))
2905 {
2906 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2907 {
2908 error ("stdcall and cdecl attributes are not compatible");
2909 }
2910 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2911 {
2912 error ("fastcall and cdecl attributes are not compatible");
2913 }
2914 }
2915
2916 /* Can combine sseregparm with all attributes. */
2917
2918 return NULL_TREE;
2919 }
2920
2921 /* Return 0 if the attributes for two types are incompatible, 1 if they
2922 are compatible, and 2 if they are nearly compatible (which causes a
2923 warning to be generated). */
2924
2925 static int
2926 ix86_comp_type_attributes (const_tree type1, const_tree type2)
2927 {
2928 /* Check for mismatch of non-default calling convention. */
2929 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2930
2931 if (TREE_CODE (type1) != FUNCTION_TYPE)
2932 return 1;
2933
2934 /* Check for mismatched fastcall/regparm types. */
2935 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2936 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2937 || (ix86_function_regparm (type1, NULL)
2938 != ix86_function_regparm (type2, NULL)))
2939 return 0;
2940
2941 /* Check for mismatched sseregparm types. */
2942 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2943 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2944 return 0;
2945
2946 /* Check for mismatched return types (cdecl vs stdcall). */
2947 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2948 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2949 return 0;
2950
2951 return 1;
2952 }
2953 \f
2954 /* Return the regparm value for a function with the indicated TYPE and DECL.
2955 DECL may be NULL when calling function indirectly
2956 or considering a libcall. */
2957
2958 static int
2959 ix86_function_regparm (const_tree type, const_tree decl)
2960 {
2961 tree attr;
2962 int regparm = ix86_regparm;
2963
2964 if (TARGET_64BIT)
2965 return regparm;
2966
2967 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2968 if (attr)
2969 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2970
2971 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2972 return 2;
2973
2974 /* Use register calling convention for local functions when possible. */
2975 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2976 && flag_unit_at_a_time && !profile_flag)
2977 {
2978 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
2979 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
2980 if (i && i->local)
2981 {
2982 int local_regparm, globals = 0, regno;
2983 struct function *f;
2984
2985 /* Make sure no regparm register is taken by a
2986 global register variable. */
2987 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2988 if (global_regs[local_regparm])
2989 break;
2990
2991 /* We can't use regparm(3) for nested functions as these use
2992 static chain pointer in third argument. */
2993 if (local_regparm == 3
2994 && (decl_function_context (decl)
2995 || ix86_force_align_arg_pointer)
2996 && !DECL_NO_STATIC_CHAIN (decl))
2997 local_regparm = 2;
2998
2999 /* If the function realigns its stackpointer, the prologue will
3000 clobber %ecx. If we've already generated code for the callee,
3001 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
3002 scanning the attributes for the self-realigning property. */
3003 f = DECL_STRUCT_FUNCTION (decl);
3004 if (local_regparm == 3
3005 && (f ? !!f->machine->force_align_arg_pointer
3006 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3007 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3008 local_regparm = 2;
3009
3010 /* Each global register variable increases register preassure,
3011 so the more global reg vars there are, the smaller regparm
3012 optimization use, unless requested by the user explicitly. */
3013 for (regno = 0; regno < 6; regno++)
3014 if (global_regs[regno])
3015 globals++;
3016 local_regparm
3017 = globals < local_regparm ? local_regparm - globals : 0;
3018
3019 if (local_regparm > regparm)
3020 regparm = local_regparm;
3021 }
3022 }
3023
3024 return regparm;
3025 }
3026
3027 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3028 DFmode (2) arguments in SSE registers for a function with the
3029 indicated TYPE and DECL. DECL may be NULL when calling function
3030 indirectly or considering a libcall. Otherwise return 0. */
3031
3032 static int
3033 ix86_function_sseregparm (const_tree type, const_tree decl)
3034 {
3035 gcc_assert (!TARGET_64BIT);
3036
3037 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3038 by the sseregparm attribute. */
3039 if (TARGET_SSEREGPARM
3040 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3041 {
3042 if (!TARGET_SSE)
3043 {
3044 if (decl)
3045 error ("Calling %qD with attribute sseregparm without "
3046 "SSE/SSE2 enabled", decl);
3047 else
3048 error ("Calling %qT with attribute sseregparm without "
3049 "SSE/SSE2 enabled", type);
3050 return 0;
3051 }
3052
3053 return 2;
3054 }
3055
3056 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3057 (and DFmode for SSE2) arguments in SSE registers. */
3058 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3059 {
3060 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3061 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3062 if (i && i->local)
3063 return TARGET_SSE2 ? 2 : 1;
3064 }
3065
3066 return 0;
3067 }
3068
3069 /* Return true if EAX is live at the start of the function. Used by
3070 ix86_expand_prologue to determine if we need special help before
3071 calling allocate_stack_worker. */
3072
3073 static bool
3074 ix86_eax_live_at_start_p (void)
3075 {
3076 /* Cheat. Don't bother working forward from ix86_function_regparm
3077 to the function type to whether an actual argument is located in
3078 eax. Instead just look at cfg info, which is still close enough
3079 to correct at this point. This gives false positives for broken
3080 functions that might use uninitialized data that happens to be
3081 allocated in eax, but who cares? */
3082 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3083 }
3084
3085 /* Return true if TYPE has a variable argument list. */
3086
3087 static bool
3088 type_has_variadic_args_p (tree type)
3089 {
3090 tree n, t = TYPE_ARG_TYPES (type);
3091
3092 if (t == NULL)
3093 return false;
3094
3095 while ((n = TREE_CHAIN (t)) != NULL)
3096 t = n;
3097
3098 return TREE_VALUE (t) != void_type_node;
3099 }
3100
3101 /* Value is the number of bytes of arguments automatically
3102 popped when returning from a subroutine call.
3103 FUNDECL is the declaration node of the function (as a tree),
3104 FUNTYPE is the data type of the function (as a tree),
3105 or for a library call it is an identifier node for the subroutine name.
3106 SIZE is the number of bytes of arguments passed on the stack.
3107
3108 On the 80386, the RTD insn may be used to pop them if the number
3109 of args is fixed, but if the number is variable then the caller
3110 must pop them all. RTD can't be used for library calls now
3111 because the library is compiled with the Unix compiler.
3112 Use of RTD is a selectable option, since it is incompatible with
3113 standard Unix calling sequences. If the option is not selected,
3114 the caller must always pop the args.
3115
3116 The attribute stdcall is equivalent to RTD on a per module basis. */
3117
3118 int
3119 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3120 {
3121 int rtd;
3122
3123 /* None of the 64-bit ABIs pop arguments. */
3124 if (TARGET_64BIT)
3125 return 0;
3126
3127 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3128
3129 /* Cdecl functions override -mrtd, and never pop the stack. */
3130 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3131 {
3132 /* Stdcall and fastcall functions will pop the stack if not
3133 variable args. */
3134 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3135 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3136 rtd = 1;
3137
3138 if (rtd && ! type_has_variadic_args_p (funtype))
3139 return size;
3140 }
3141
3142 /* Lose any fake structure return argument if it is passed on the stack. */
3143 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3144 && !KEEP_AGGREGATE_RETURN_POINTER)
3145 {
3146 int nregs = ix86_function_regparm (funtype, fundecl);
3147 if (nregs == 0)
3148 return GET_MODE_SIZE (Pmode);
3149 }
3150
3151 return 0;
3152 }
3153 \f
3154 /* Argument support functions. */
3155
3156 /* Return true when register may be used to pass function parameters. */
3157 bool
3158 ix86_function_arg_regno_p (int regno)
3159 {
3160 int i;
3161 const int *parm_regs;
3162
3163 if (!TARGET_64BIT)
3164 {
3165 if (TARGET_MACHO)
3166 return (regno < REGPARM_MAX
3167 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3168 else
3169 return (regno < REGPARM_MAX
3170 || (TARGET_MMX && MMX_REGNO_P (regno)
3171 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3172 || (TARGET_SSE && SSE_REGNO_P (regno)
3173 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3174 }
3175
3176 if (TARGET_MACHO)
3177 {
3178 if (SSE_REGNO_P (regno) && TARGET_SSE)
3179 return true;
3180 }
3181 else
3182 {
3183 if (TARGET_SSE && SSE_REGNO_P (regno)
3184 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3185 return true;
3186 }
3187
3188 /* RAX is used as hidden argument to va_arg functions. */
3189 if (!TARGET_64BIT_MS_ABI && regno == 0)
3190 return true;
3191
3192 if (TARGET_64BIT_MS_ABI)
3193 parm_regs = x86_64_ms_abi_int_parameter_registers;
3194 else
3195 parm_regs = x86_64_int_parameter_registers;
3196 for (i = 0; i < REGPARM_MAX; i++)
3197 if (regno == parm_regs[i])
3198 return true;
3199 return false;
3200 }
3201
3202 /* Return if we do not know how to pass TYPE solely in registers. */
3203
3204 static bool
3205 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3206 {
3207 if (must_pass_in_stack_var_size_or_pad (mode, type))
3208 return true;
3209
3210 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3211 The layout_type routine is crafty and tries to trick us into passing
3212 currently unsupported vector types on the stack by using TImode. */
3213 return (!TARGET_64BIT && mode == TImode
3214 && type && TREE_CODE (type) != VECTOR_TYPE);
3215 }
3216
3217 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3218 for a call to a function whose data type is FNTYPE.
3219 For a library call, FNTYPE is 0. */
3220
3221 void
3222 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3223 tree fntype, /* tree ptr for function decl */
3224 rtx libname, /* SYMBOL_REF of library name or 0 */
3225 tree fndecl)
3226 {
3227 memset (cum, 0, sizeof (*cum));
3228
3229 /* Set up the number of registers to use for passing arguments. */
3230 cum->nregs = ix86_regparm;
3231 if (TARGET_SSE)
3232 cum->sse_nregs = SSE_REGPARM_MAX;
3233 if (TARGET_MMX)
3234 cum->mmx_nregs = MMX_REGPARM_MAX;
3235 cum->warn_sse = true;
3236 cum->warn_mmx = true;
3237 cum->maybe_vaarg = (fntype
3238 ? (!TYPE_ARG_TYPES (fntype)
3239 || type_has_variadic_args_p (fntype))
3240 : !libname);
3241
3242 if (!TARGET_64BIT)
3243 {
3244 /* If there are variable arguments, then we won't pass anything
3245 in registers in 32-bit mode. */
3246 if (cum->maybe_vaarg)
3247 {
3248 cum->nregs = 0;
3249 cum->sse_nregs = 0;
3250 cum->mmx_nregs = 0;
3251 cum->warn_sse = 0;
3252 cum->warn_mmx = 0;
3253 return;
3254 }
3255
3256 /* Use ecx and edx registers if function has fastcall attribute,
3257 else look for regparm information. */
3258 if (fntype)
3259 {
3260 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3261 {
3262 cum->nregs = 2;
3263 cum->fastcall = 1;
3264 }
3265 else
3266 cum->nregs = ix86_function_regparm (fntype, fndecl);
3267 }
3268
3269 /* Set up the number of SSE registers used for passing SFmode
3270 and DFmode arguments. Warn for mismatching ABI. */
3271 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3272 }
3273 }
3274
3275 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3276 But in the case of vector types, it is some vector mode.
3277
3278 When we have only some of our vector isa extensions enabled, then there
3279 are some modes for which vector_mode_supported_p is false. For these
3280 modes, the generic vector support in gcc will choose some non-vector mode
3281 in order to implement the type. By computing the natural mode, we'll
3282 select the proper ABI location for the operand and not depend on whatever
3283 the middle-end decides to do with these vector types. */
3284
3285 static enum machine_mode
3286 type_natural_mode (const_tree type)
3287 {
3288 enum machine_mode mode = TYPE_MODE (type);
3289
3290 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3291 {
3292 HOST_WIDE_INT size = int_size_in_bytes (type);
3293 if ((size == 8 || size == 16)
3294 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3295 && TYPE_VECTOR_SUBPARTS (type) > 1)
3296 {
3297 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3298
3299 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3300 mode = MIN_MODE_VECTOR_FLOAT;
3301 else
3302 mode = MIN_MODE_VECTOR_INT;
3303
3304 /* Get the mode which has this inner mode and number of units. */
3305 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3306 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3307 && GET_MODE_INNER (mode) == innermode)
3308 return mode;
3309
3310 gcc_unreachable ();
3311 }
3312 }
3313
3314 return mode;
3315 }
3316
3317 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3318 this may not agree with the mode that the type system has chosen for the
3319 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3320 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3321
3322 static rtx
3323 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3324 unsigned int regno)
3325 {
3326 rtx tmp;
3327
3328 if (orig_mode != BLKmode)
3329 tmp = gen_rtx_REG (orig_mode, regno);
3330 else
3331 {
3332 tmp = gen_rtx_REG (mode, regno);
3333 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3334 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3335 }
3336
3337 return tmp;
3338 }
3339
3340 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3341 of this code is to classify each 8bytes of incoming argument by the register
3342 class and assign registers accordingly. */
3343
3344 /* Return the union class of CLASS1 and CLASS2.
3345 See the x86-64 PS ABI for details. */
3346
3347 static enum x86_64_reg_class
3348 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3349 {
3350 /* Rule #1: If both classes are equal, this is the resulting class. */
3351 if (class1 == class2)
3352 return class1;
3353
3354 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3355 the other class. */
3356 if (class1 == X86_64_NO_CLASS)
3357 return class2;
3358 if (class2 == X86_64_NO_CLASS)
3359 return class1;
3360
3361 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3362 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3363 return X86_64_MEMORY_CLASS;
3364
3365 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3366 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3367 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3368 return X86_64_INTEGERSI_CLASS;
3369 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3370 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3371 return X86_64_INTEGER_CLASS;
3372
3373 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3374 MEMORY is used. */
3375 if (class1 == X86_64_X87_CLASS
3376 || class1 == X86_64_X87UP_CLASS
3377 || class1 == X86_64_COMPLEX_X87_CLASS
3378 || class2 == X86_64_X87_CLASS
3379 || class2 == X86_64_X87UP_CLASS
3380 || class2 == X86_64_COMPLEX_X87_CLASS)
3381 return X86_64_MEMORY_CLASS;
3382
3383 /* Rule #6: Otherwise class SSE is used. */
3384 return X86_64_SSE_CLASS;
3385 }
3386
3387 /* Classify the argument of type TYPE and mode MODE.
3388 CLASSES will be filled by the register class used to pass each word
3389 of the operand. The number of words is returned. In case the parameter
3390 should be passed in memory, 0 is returned. As a special case for zero
3391 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3392
3393 BIT_OFFSET is used internally for handling records and specifies offset
3394 of the offset in bits modulo 256 to avoid overflow cases.
3395
3396 See the x86-64 PS ABI for details.
3397 */
3398
3399 static int
3400 classify_argument (enum machine_mode mode, const_tree type,
3401 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3402 {
3403 HOST_WIDE_INT bytes =
3404 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3405 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3406
3407 /* Variable sized entities are always passed/returned in memory. */
3408 if (bytes < 0)
3409 return 0;
3410
3411 if (mode != VOIDmode
3412 && targetm.calls.must_pass_in_stack (mode, type))
3413 return 0;
3414
3415 if (type && AGGREGATE_TYPE_P (type))
3416 {
3417 int i;
3418 tree field;
3419 enum x86_64_reg_class subclasses[MAX_CLASSES];
3420
3421 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3422 if (bytes > 16)
3423 return 0;
3424
3425 for (i = 0; i < words; i++)
3426 classes[i] = X86_64_NO_CLASS;
3427
3428 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3429 signalize memory class, so handle it as special case. */
3430 if (!words)
3431 {
3432 classes[0] = X86_64_NO_CLASS;
3433 return 1;
3434 }
3435
3436 /* Classify each field of record and merge classes. */
3437 switch (TREE_CODE (type))
3438 {
3439 case RECORD_TYPE:
3440 /* And now merge the fields of structure. */
3441 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3442 {
3443 if (TREE_CODE (field) == FIELD_DECL)
3444 {
3445 int num;
3446
3447 if (TREE_TYPE (field) == error_mark_node)
3448 continue;
3449
3450 /* Bitfields are always classified as integer. Handle them
3451 early, since later code would consider them to be
3452 misaligned integers. */
3453 if (DECL_BIT_FIELD (field))
3454 {
3455 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3456 i < ((int_bit_position (field) + (bit_offset % 64))
3457 + tree_low_cst (DECL_SIZE (field), 0)
3458 + 63) / 8 / 8; i++)
3459 classes[i] =
3460 merge_classes (X86_64_INTEGER_CLASS,
3461 classes[i]);
3462 }
3463 else
3464 {
3465 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3466 TREE_TYPE (field), subclasses,
3467 (int_bit_position (field)
3468 + bit_offset) % 256);
3469 if (!num)
3470 return 0;
3471 for (i = 0; i < num; i++)
3472 {
3473 int pos =
3474 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3475 classes[i + pos] =
3476 merge_classes (subclasses[i], classes[i + pos]);
3477 }
3478 }
3479 }
3480 }
3481 break;
3482
3483 case ARRAY_TYPE:
3484 /* Arrays are handled as small records. */
3485 {
3486 int num;
3487 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3488 TREE_TYPE (type), subclasses, bit_offset);
3489 if (!num)
3490 return 0;
3491
3492 /* The partial classes are now full classes. */
3493 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3494 subclasses[0] = X86_64_SSE_CLASS;
3495 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3496 subclasses[0] = X86_64_INTEGER_CLASS;
3497
3498 for (i = 0; i < words; i++)
3499 classes[i] = subclasses[i % num];
3500
3501 break;
3502 }
3503 case UNION_TYPE:
3504 case QUAL_UNION_TYPE:
3505 /* Unions are similar to RECORD_TYPE but offset is always 0.
3506 */
3507 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3508 {
3509 if (TREE_CODE (field) == FIELD_DECL)
3510 {
3511 int num;
3512
3513 if (TREE_TYPE (field) == error_mark_node)
3514 continue;
3515
3516 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3517 TREE_TYPE (field), subclasses,
3518 bit_offset);
3519 if (!num)
3520 return 0;
3521 for (i = 0; i < num; i++)
3522 classes[i] = merge_classes (subclasses[i], classes[i]);
3523 }
3524 }
3525 break;
3526
3527 default:
3528 gcc_unreachable ();
3529 }
3530
3531 /* Final merger cleanup. */
3532 for (i = 0; i < words; i++)
3533 {
3534 /* If one class is MEMORY, everything should be passed in
3535 memory. */
3536 if (classes[i] == X86_64_MEMORY_CLASS)
3537 return 0;
3538
3539 /* The X86_64_SSEUP_CLASS should be always preceded by
3540 X86_64_SSE_CLASS. */
3541 if (classes[i] == X86_64_SSEUP_CLASS
3542 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3543 classes[i] = X86_64_SSE_CLASS;
3544
3545 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3546 if (classes[i] == X86_64_X87UP_CLASS
3547 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3548 classes[i] = X86_64_SSE_CLASS;
3549 }
3550 return words;
3551 }
3552
3553 /* Compute alignment needed. We align all types to natural boundaries with
3554 exception of XFmode that is aligned to 64bits. */
3555 if (mode != VOIDmode && mode != BLKmode)
3556 {
3557 int mode_alignment = GET_MODE_BITSIZE (mode);
3558
3559 if (mode == XFmode)
3560 mode_alignment = 128;
3561 else if (mode == XCmode)
3562 mode_alignment = 256;
3563 if (COMPLEX_MODE_P (mode))
3564 mode_alignment /= 2;
3565 /* Misaligned fields are always returned in memory. */
3566 if (bit_offset % mode_alignment)
3567 return 0;
3568 }
3569
3570 /* for V1xx modes, just use the base mode */
3571 if (VECTOR_MODE_P (mode)
3572 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3573 mode = GET_MODE_INNER (mode);
3574
3575 /* Classification of atomic types. */
3576 switch (mode)
3577 {
3578 case SDmode:
3579 case DDmode:
3580 classes[0] = X86_64_SSE_CLASS;
3581 return 1;
3582 case TDmode:
3583 classes[0] = X86_64_SSE_CLASS;
3584 classes[1] = X86_64_SSEUP_CLASS;
3585 return 2;
3586 case DImode:
3587 case SImode:
3588 case HImode:
3589 case QImode:
3590 case CSImode:
3591 case CHImode:
3592 case CQImode:
3593 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3594 classes[0] = X86_64_INTEGERSI_CLASS;
3595 else
3596 classes[0] = X86_64_INTEGER_CLASS;
3597 return 1;
3598 case CDImode:
3599 case TImode:
3600 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3601 return 2;
3602 case CTImode:
3603 return 0;
3604 case SFmode:
3605 if (!(bit_offset % 64))
3606 classes[0] = X86_64_SSESF_CLASS;
3607 else
3608 classes[0] = X86_64_SSE_CLASS;
3609 return 1;
3610 case DFmode:
3611 classes[0] = X86_64_SSEDF_CLASS;
3612 return 1;
3613 case XFmode:
3614 classes[0] = X86_64_X87_CLASS;
3615 classes[1] = X86_64_X87UP_CLASS;
3616 return 2;
3617 case TFmode:
3618 classes[0] = X86_64_SSE_CLASS;
3619 classes[1] = X86_64_SSEUP_CLASS;
3620 return 2;
3621 case SCmode:
3622 classes[0] = X86_64_SSE_CLASS;
3623 return 1;
3624 case DCmode:
3625 classes[0] = X86_64_SSEDF_CLASS;
3626 classes[1] = X86_64_SSEDF_CLASS;
3627 return 2;
3628 case XCmode:
3629 classes[0] = X86_64_COMPLEX_X87_CLASS;
3630 return 1;
3631 case TCmode:
3632 /* This modes is larger than 16 bytes. */
3633 return 0;
3634 case V4SFmode:
3635 case V4SImode:
3636 case V16QImode:
3637 case V8HImode:
3638 case V2DFmode:
3639 case V2DImode:
3640 classes[0] = X86_64_SSE_CLASS;
3641 classes[1] = X86_64_SSEUP_CLASS;
3642 return 2;
3643 case V2SFmode:
3644 case V2SImode:
3645 case V4HImode:
3646 case V8QImode:
3647 classes[0] = X86_64_SSE_CLASS;
3648 return 1;
3649 case BLKmode:
3650 case VOIDmode:
3651 return 0;
3652 default:
3653 gcc_assert (VECTOR_MODE_P (mode));
3654
3655 if (bytes > 16)
3656 return 0;
3657
3658 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3659
3660 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3661 classes[0] = X86_64_INTEGERSI_CLASS;
3662 else
3663 classes[0] = X86_64_INTEGER_CLASS;
3664 classes[1] = X86_64_INTEGER_CLASS;
3665 return 1 + (bytes > 8);
3666 }
3667 }
3668
3669 /* Examine the argument and return set number of register required in each
3670 class. Return 0 iff parameter should be passed in memory. */
3671 static int
3672 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3673 int *int_nregs, int *sse_nregs)
3674 {
3675 enum x86_64_reg_class regclass[MAX_CLASSES];
3676 int n = classify_argument (mode, type, regclass, 0);
3677
3678 *int_nregs = 0;
3679 *sse_nregs = 0;
3680 if (!n)
3681 return 0;
3682 for (n--; n >= 0; n--)
3683 switch (regclass[n])
3684 {
3685 case X86_64_INTEGER_CLASS:
3686 case X86_64_INTEGERSI_CLASS:
3687 (*int_nregs)++;
3688 break;
3689 case X86_64_SSE_CLASS:
3690 case X86_64_SSESF_CLASS:
3691 case X86_64_SSEDF_CLASS:
3692 (*sse_nregs)++;
3693 break;
3694 case X86_64_NO_CLASS:
3695 case X86_64_SSEUP_CLASS:
3696 break;
3697 case X86_64_X87_CLASS:
3698 case X86_64_X87UP_CLASS:
3699 if (!in_return)
3700 return 0;
3701 break;
3702 case X86_64_COMPLEX_X87_CLASS:
3703 return in_return ? 2 : 0;
3704 case X86_64_MEMORY_CLASS:
3705 gcc_unreachable ();
3706 }
3707 return 1;
3708 }
3709
3710 /* Construct container for the argument used by GCC interface. See
3711 FUNCTION_ARG for the detailed description. */
3712
3713 static rtx
3714 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3715 const_tree type, int in_return, int nintregs, int nsseregs,
3716 const int *intreg, int sse_regno)
3717 {
3718 /* The following variables hold the static issued_error state. */
3719 static bool issued_sse_arg_error;
3720 static bool issued_sse_ret_error;
3721 static bool issued_x87_ret_error;
3722
3723 enum machine_mode tmpmode;
3724 int bytes =
3725 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3726 enum x86_64_reg_class regclass[MAX_CLASSES];
3727 int n;
3728 int i;
3729 int nexps = 0;
3730 int needed_sseregs, needed_intregs;
3731 rtx exp[MAX_CLASSES];
3732 rtx ret;
3733
3734 n = classify_argument (mode, type, regclass, 0);
3735 if (!n)
3736 return NULL;
3737 if (!examine_argument (mode, type, in_return, &needed_intregs,
3738 &needed_sseregs))
3739 return NULL;
3740 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3741 return NULL;
3742
3743 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3744 some less clueful developer tries to use floating-point anyway. */
3745 if (needed_sseregs && !TARGET_SSE)
3746 {
3747 if (in_return)
3748 {
3749 if (!issued_sse_ret_error)
3750 {
3751 error ("SSE register return with SSE disabled");
3752 issued_sse_ret_error = true;
3753 }
3754 }
3755 else if (!issued_sse_arg_error)
3756 {
3757 error ("SSE register argument with SSE disabled");
3758 issued_sse_arg_error = true;
3759 }
3760 return NULL;
3761 }
3762
3763 /* Likewise, error if the ABI requires us to return values in the
3764 x87 registers and the user specified -mno-80387. */
3765 if (!TARGET_80387 && in_return)
3766 for (i = 0; i < n; i++)
3767 if (regclass[i] == X86_64_X87_CLASS
3768 || regclass[i] == X86_64_X87UP_CLASS
3769 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3770 {
3771 if (!issued_x87_ret_error)
3772 {
3773 error ("x87 register return with x87 disabled");
3774 issued_x87_ret_error = true;
3775 }
3776 return NULL;
3777 }
3778
3779 /* First construct simple cases. Avoid SCmode, since we want to use
3780 single register to pass this type. */
3781 if (n == 1 && mode != SCmode)
3782 switch (regclass[0])
3783 {
3784 case X86_64_INTEGER_CLASS:
3785 case X86_64_INTEGERSI_CLASS:
3786 return gen_rtx_REG (mode, intreg[0]);
3787 case X86_64_SSE_CLASS:
3788 case X86_64_SSESF_CLASS:
3789 case X86_64_SSEDF_CLASS:
3790 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3791 case X86_64_X87_CLASS:
3792 case X86_64_COMPLEX_X87_CLASS:
3793 return gen_rtx_REG (mode, FIRST_STACK_REG);
3794 case X86_64_NO_CLASS:
3795 /* Zero sized array, struct or class. */
3796 return NULL;
3797 default:
3798 gcc_unreachable ();
3799 }
3800 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3801 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3802 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3803
3804 if (n == 2
3805 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3806 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3807 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3808 && regclass[1] == X86_64_INTEGER_CLASS
3809 && (mode == CDImode || mode == TImode || mode == TFmode)
3810 && intreg[0] + 1 == intreg[1])
3811 return gen_rtx_REG (mode, intreg[0]);
3812
3813 /* Otherwise figure out the entries of the PARALLEL. */
3814 for (i = 0; i < n; i++)
3815 {
3816 switch (regclass[i])
3817 {
3818 case X86_64_NO_CLASS:
3819 break;
3820 case X86_64_INTEGER_CLASS:
3821 case X86_64_INTEGERSI_CLASS:
3822 /* Merge TImodes on aligned occasions here too. */
3823 if (i * 8 + 8 > bytes)
3824 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3825 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3826 tmpmode = SImode;
3827 else
3828 tmpmode = DImode;
3829 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3830 if (tmpmode == BLKmode)
3831 tmpmode = DImode;
3832 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3833 gen_rtx_REG (tmpmode, *intreg),
3834 GEN_INT (i*8));
3835 intreg++;
3836 break;
3837 case X86_64_SSESF_CLASS:
3838 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3839 gen_rtx_REG (SFmode,
3840 SSE_REGNO (sse_regno)),
3841 GEN_INT (i*8));
3842 sse_regno++;
3843 break;
3844 case X86_64_SSEDF_CLASS:
3845 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3846 gen_rtx_REG (DFmode,
3847 SSE_REGNO (sse_regno)),
3848 GEN_INT (i*8));
3849 sse_regno++;
3850 break;
3851 case X86_64_SSE_CLASS:
3852 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3853 tmpmode = TImode;
3854 else
3855 tmpmode = DImode;
3856 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3857 gen_rtx_REG (tmpmode,
3858 SSE_REGNO (sse_regno)),
3859 GEN_INT (i*8));
3860 if (tmpmode == TImode)
3861 i++;
3862 sse_regno++;
3863 break;
3864 default:
3865 gcc_unreachable ();
3866 }
3867 }
3868
3869 /* Empty aligned struct, union or class. */
3870 if (nexps == 0)
3871 return NULL;
3872
3873 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3874 for (i = 0; i < nexps; i++)
3875 XVECEXP (ret, 0, i) = exp [i];
3876 return ret;
3877 }
3878
3879 /* Update the data in CUM to advance over an argument of mode MODE
3880 and data type TYPE. (TYPE is null for libcalls where that information
3881 may not be available.) */
3882
3883 static void
3884 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3885 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3886 {
3887 switch (mode)
3888 {
3889 default:
3890 break;
3891
3892 case BLKmode:
3893 if (bytes < 0)
3894 break;
3895 /* FALLTHRU */
3896
3897 case DImode:
3898 case SImode:
3899 case HImode:
3900 case QImode:
3901 cum->words += words;
3902 cum->nregs -= words;
3903 cum->regno += words;
3904
3905 if (cum->nregs <= 0)
3906 {
3907 cum->nregs = 0;
3908 cum->regno = 0;
3909 }
3910 break;
3911
3912 case DFmode:
3913 if (cum->float_in_sse < 2)
3914 break;
3915 case SFmode:
3916 if (cum->float_in_sse < 1)
3917 break;
3918 /* FALLTHRU */
3919
3920 case TImode:
3921 case V16QImode:
3922 case V8HImode:
3923 case V4SImode:
3924 case V2DImode:
3925 case V4SFmode:
3926 case V2DFmode:
3927 if (!type || !AGGREGATE_TYPE_P (type))
3928 {
3929 cum->sse_words += words;
3930 cum->sse_nregs -= 1;
3931 cum->sse_regno += 1;
3932 if (cum->sse_nregs <= 0)
3933 {
3934 cum->sse_nregs = 0;
3935 cum->sse_regno = 0;
3936 }
3937 }
3938 break;
3939
3940 case V8QImode:
3941 case V4HImode:
3942 case V2SImode:
3943 case V2SFmode:
3944 if (!type || !AGGREGATE_TYPE_P (type))
3945 {
3946 cum->mmx_words += words;
3947 cum->mmx_nregs -= 1;
3948 cum->mmx_regno += 1;
3949 if (cum->mmx_nregs <= 0)
3950 {
3951 cum->mmx_nregs = 0;
3952 cum->mmx_regno = 0;
3953 }
3954 }
3955 break;
3956 }
3957 }
3958
3959 static void
3960 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3961 tree type, HOST_WIDE_INT words)
3962 {
3963 int int_nregs, sse_nregs;
3964
3965 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3966 cum->words += words;
3967 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3968 {
3969 cum->nregs -= int_nregs;
3970 cum->sse_nregs -= sse_nregs;
3971 cum->regno += int_nregs;
3972 cum->sse_regno += sse_nregs;
3973 }
3974 else
3975 cum->words += words;
3976 }
3977
3978 static void
3979 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3980 HOST_WIDE_INT words)
3981 {
3982 /* Otherwise, this should be passed indirect. */
3983 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3984
3985 cum->words += words;
3986 if (cum->nregs > 0)
3987 {
3988 cum->nregs -= 1;
3989 cum->regno += 1;
3990 }
3991 }
3992
3993 void
3994 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3995 tree type, int named ATTRIBUTE_UNUSED)
3996 {
3997 HOST_WIDE_INT bytes, words;
3998
3999 if (mode == BLKmode)
4000 bytes = int_size_in_bytes (type);
4001 else
4002 bytes = GET_MODE_SIZE (mode);
4003 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4004
4005 if (type)
4006 mode = type_natural_mode (type);
4007
4008 if (TARGET_64BIT_MS_ABI)
4009 function_arg_advance_ms_64 (cum, bytes, words);
4010 else if (TARGET_64BIT)
4011 function_arg_advance_64 (cum, mode, type, words);
4012 else
4013 function_arg_advance_32 (cum, mode, type, bytes, words);
4014 }
4015
4016 /* Define where to put the arguments to a function.
4017 Value is zero to push the argument on the stack,
4018 or a hard register in which to store the argument.
4019
4020 MODE is the argument's machine mode.
4021 TYPE is the data type of the argument (as a tree).
4022 This is null for libcalls where that information may
4023 not be available.
4024 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4025 the preceding args and about the function being called.
4026 NAMED is nonzero if this argument is a named parameter
4027 (otherwise it is an extra parameter matching an ellipsis). */
4028
4029 static rtx
4030 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4031 enum machine_mode orig_mode, tree type,
4032 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4033 {
4034 static bool warnedsse, warnedmmx;
4035
4036 /* Avoid the AL settings for the Unix64 ABI. */
4037 if (mode == VOIDmode)
4038 return constm1_rtx;
4039
4040 switch (mode)
4041 {
4042 default:
4043 break;
4044
4045 case BLKmode:
4046 if (bytes < 0)
4047 break;
4048 /* FALLTHRU */
4049 case DImode:
4050 case SImode:
4051 case HImode:
4052 case QImode:
4053 if (words <= cum->nregs)
4054 {
4055 int regno = cum->regno;
4056
4057 /* Fastcall allocates the first two DWORD (SImode) or
4058 smaller arguments to ECX and EDX. */
4059 if (cum->fastcall)
4060 {
4061 if (mode == BLKmode || mode == DImode)
4062 break;
4063
4064 /* ECX not EAX is the first allocated register. */
4065 if (regno == 0)
4066 regno = 2;
4067 }
4068 return gen_rtx_REG (mode, regno);
4069 }
4070 break;
4071
4072 case DFmode:
4073 if (cum->float_in_sse < 2)
4074 break;
4075 case SFmode:
4076 if (cum->float_in_sse < 1)
4077 break;
4078 /* FALLTHRU */
4079 case TImode:
4080 case V16QImode:
4081 case V8HImode:
4082 case V4SImode:
4083 case V2DImode:
4084 case V4SFmode:
4085 case V2DFmode:
4086 if (!type || !AGGREGATE_TYPE_P (type))
4087 {
4088 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4089 {
4090 warnedsse = true;
4091 warning (0, "SSE vector argument without SSE enabled "
4092 "changes the ABI");
4093 }
4094 if (cum->sse_nregs)
4095 return gen_reg_or_parallel (mode, orig_mode,
4096 cum->sse_regno + FIRST_SSE_REG);
4097 }
4098 break;
4099
4100 case V8QImode:
4101 case V4HImode:
4102 case V2SImode:
4103 case V2SFmode:
4104 if (!type || !AGGREGATE_TYPE_P (type))
4105 {
4106 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4107 {
4108 warnedmmx = true;
4109 warning (0, "MMX vector argument without MMX enabled "
4110 "changes the ABI");
4111 }
4112 if (cum->mmx_nregs)
4113 return gen_reg_or_parallel (mode, orig_mode,
4114 cum->mmx_regno + FIRST_MMX_REG);
4115 }
4116 break;
4117 }
4118
4119 return NULL_RTX;
4120 }
4121
4122 static rtx
4123 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4124 enum machine_mode orig_mode, tree type)
4125 {
4126 /* Handle a hidden AL argument containing number of registers
4127 for varargs x86-64 functions. */
4128 if (mode == VOIDmode)
4129 return GEN_INT (cum->maybe_vaarg
4130 ? (cum->sse_nregs < 0
4131 ? SSE_REGPARM_MAX
4132 : cum->sse_regno)
4133 : -1);
4134
4135 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4136 cum->sse_nregs,
4137 &x86_64_int_parameter_registers [cum->regno],
4138 cum->sse_regno);
4139 }
4140
4141 static rtx
4142 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4143 enum machine_mode orig_mode, int named)
4144 {
4145 unsigned int regno;
4146
4147 /* Avoid the AL settings for the Unix64 ABI. */
4148 if (mode == VOIDmode)
4149 return constm1_rtx;
4150
4151 /* If we've run out of registers, it goes on the stack. */
4152 if (cum->nregs == 0)
4153 return NULL_RTX;
4154
4155 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4156
4157 /* Only floating point modes are passed in anything but integer regs. */
4158 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4159 {
4160 if (named)
4161 regno = cum->regno + FIRST_SSE_REG;
4162 else
4163 {
4164 rtx t1, t2;
4165
4166 /* Unnamed floating parameters are passed in both the
4167 SSE and integer registers. */
4168 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4169 t2 = gen_rtx_REG (mode, regno);
4170 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4171 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4172 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4173 }
4174 }
4175
4176 return gen_reg_or_parallel (mode, orig_mode, regno);
4177 }
4178
4179 rtx
4180 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4181 tree type, int named)
4182 {
4183 enum machine_mode mode = omode;
4184 HOST_WIDE_INT bytes, words;
4185
4186 if (mode == BLKmode)
4187 bytes = int_size_in_bytes (type);
4188 else
4189 bytes = GET_MODE_SIZE (mode);
4190 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4191
4192 /* To simplify the code below, represent vector types with a vector mode
4193 even if MMX/SSE are not active. */
4194 if (type && TREE_CODE (type) == VECTOR_TYPE)
4195 mode = type_natural_mode (type);
4196
4197 if (TARGET_64BIT_MS_ABI)
4198 return function_arg_ms_64 (cum, mode, omode, named);
4199 else if (TARGET_64BIT)
4200 return function_arg_64 (cum, mode, omode, type);
4201 else
4202 return function_arg_32 (cum, mode, omode, type, bytes, words);
4203 }
4204
4205 /* A C expression that indicates when an argument must be passed by
4206 reference. If nonzero for an argument, a copy of that argument is
4207 made in memory and a pointer to the argument is passed instead of
4208 the argument itself. The pointer is passed in whatever way is
4209 appropriate for passing a pointer to that type. */
4210
4211 static bool
4212 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4213 enum machine_mode mode ATTRIBUTE_UNUSED,
4214 const_tree type, bool named ATTRIBUTE_UNUSED)
4215 {
4216 if (TARGET_64BIT_MS_ABI)
4217 {
4218 if (type)
4219 {
4220 /* Arrays are passed by reference. */
4221 if (TREE_CODE (type) == ARRAY_TYPE)
4222 return true;
4223
4224 if (AGGREGATE_TYPE_P (type))
4225 {
4226 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4227 are passed by reference. */
4228 int el2 = exact_log2 (int_size_in_bytes (type));
4229 return !(el2 >= 0 && el2 <= 3);
4230 }
4231 }
4232
4233 /* __m128 is passed by reference. */
4234 /* ??? How to handle complex? For now treat them as structs,
4235 and pass them by reference if they're too large. */
4236 if (GET_MODE_SIZE (mode) > 8)
4237 return true;
4238 }
4239 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4240 return 1;
4241
4242 return 0;
4243 }
4244
4245 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4246 ABI. Only called if TARGET_SSE. */
4247 static bool
4248 contains_128bit_aligned_vector_p (tree type)
4249 {
4250 enum machine_mode mode = TYPE_MODE (type);
4251 if (SSE_REG_MODE_P (mode)
4252 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4253 return true;
4254 if (TYPE_ALIGN (type) < 128)
4255 return false;
4256
4257 if (AGGREGATE_TYPE_P (type))
4258 {
4259 /* Walk the aggregates recursively. */
4260 switch (TREE_CODE (type))
4261 {
4262 case RECORD_TYPE:
4263 case UNION_TYPE:
4264 case QUAL_UNION_TYPE:
4265 {
4266 tree field;
4267
4268 /* Walk all the structure fields. */
4269 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4270 {
4271 if (TREE_CODE (field) == FIELD_DECL
4272 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4273 return true;
4274 }
4275 break;
4276 }
4277
4278 case ARRAY_TYPE:
4279 /* Just for use if some languages passes arrays by value. */
4280 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4281 return true;
4282 break;
4283
4284 default:
4285 gcc_unreachable ();
4286 }
4287 }
4288 return false;
4289 }
4290
4291 /* Gives the alignment boundary, in bits, of an argument with the
4292 specified mode and type. */
4293
4294 int
4295 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4296 {
4297 int align;
4298 if (type)
4299 align = TYPE_ALIGN (type);
4300 else
4301 align = GET_MODE_ALIGNMENT (mode);
4302 if (align < PARM_BOUNDARY)
4303 align = PARM_BOUNDARY;
4304 if (!TARGET_64BIT)
4305 {
4306 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4307 make an exception for SSE modes since these require 128bit
4308 alignment.
4309
4310 The handling here differs from field_alignment. ICC aligns MMX
4311 arguments to 4 byte boundaries, while structure fields are aligned
4312 to 8 byte boundaries. */
4313 if (!TARGET_SSE)
4314 align = PARM_BOUNDARY;
4315 else if (!type)
4316 {
4317 if (!SSE_REG_MODE_P (mode))
4318 align = PARM_BOUNDARY;
4319 }
4320 else
4321 {
4322 if (!contains_128bit_aligned_vector_p (type))
4323 align = PARM_BOUNDARY;
4324 }
4325 }
4326 if (align > 128)
4327 align = 128;
4328 return align;
4329 }
4330
4331 /* Return true if N is a possible register number of function value. */
4332
4333 bool
4334 ix86_function_value_regno_p (int regno)
4335 {
4336 switch (regno)
4337 {
4338 case 0:
4339 return true;
4340
4341 case FIRST_FLOAT_REG:
4342 if (TARGET_64BIT_MS_ABI)
4343 return false;
4344 return TARGET_FLOAT_RETURNS_IN_80387;
4345
4346 case FIRST_SSE_REG:
4347 return TARGET_SSE;
4348
4349 case FIRST_MMX_REG:
4350 if (TARGET_MACHO || TARGET_64BIT)
4351 return false;
4352 return TARGET_MMX;
4353 }
4354
4355 return false;
4356 }
4357
4358 /* Define how to find the value returned by a function.
4359 VALTYPE is the data type of the value (as a tree).
4360 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4361 otherwise, FUNC is 0. */
4362
4363 static rtx
4364 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4365 const_tree fntype, const_tree fn)
4366 {
4367 unsigned int regno;
4368
4369 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4370 we normally prevent this case when mmx is not available. However
4371 some ABIs may require the result to be returned like DImode. */
4372 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4373 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4374
4375 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4376 we prevent this case when sse is not available. However some ABIs
4377 may require the result to be returned like integer TImode. */
4378 else if (mode == TImode
4379 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4380 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4381
4382 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4383 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4384 regno = FIRST_FLOAT_REG;
4385 else
4386 /* Most things go in %eax. */
4387 regno = 0;
4388
4389 /* Override FP return register with %xmm0 for local functions when
4390 SSE math is enabled or for functions with sseregparm attribute. */
4391 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4392 {
4393 int sse_level = ix86_function_sseregparm (fntype, fn);
4394 if ((sse_level >= 1 && mode == SFmode)
4395 || (sse_level == 2 && mode == DFmode))
4396 regno = FIRST_SSE_REG;
4397 }
4398
4399 return gen_rtx_REG (orig_mode, regno);
4400 }
4401
4402 static rtx
4403 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4404 const_tree valtype)
4405 {
4406 rtx ret;
4407
4408 /* Handle libcalls, which don't provide a type node. */
4409 if (valtype == NULL)
4410 {
4411 switch (mode)
4412 {
4413 case SFmode:
4414 case SCmode:
4415 case DFmode:
4416 case DCmode:
4417 case TFmode:
4418 case SDmode:
4419 case DDmode:
4420 case TDmode:
4421 return gen_rtx_REG (mode, FIRST_SSE_REG);
4422 case XFmode:
4423 case XCmode:
4424 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4425 case TCmode:
4426 return NULL;
4427 default:
4428 return gen_rtx_REG (mode, 0);
4429 }
4430 }
4431
4432 ret = construct_container (mode, orig_mode, valtype, 1,
4433 REGPARM_MAX, SSE_REGPARM_MAX,
4434 x86_64_int_return_registers, 0);
4435
4436 /* For zero sized structures, construct_container returns NULL, but we
4437 need to keep rest of compiler happy by returning meaningful value. */
4438 if (!ret)
4439 ret = gen_rtx_REG (orig_mode, 0);
4440
4441 return ret;
4442 }
4443
4444 static rtx
4445 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4446 {
4447 unsigned int regno = 0;
4448
4449 if (TARGET_SSE)
4450 {
4451 if (mode == SFmode || mode == DFmode)
4452 regno = FIRST_SSE_REG;
4453 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4454 regno = FIRST_SSE_REG;
4455 }
4456
4457 return gen_rtx_REG (orig_mode, regno);
4458 }
4459
4460 static rtx
4461 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4462 enum machine_mode orig_mode, enum machine_mode mode)
4463 {
4464 const_tree fn, fntype;
4465
4466 fn = NULL_TREE;
4467 if (fntype_or_decl && DECL_P (fntype_or_decl))
4468 fn = fntype_or_decl;
4469 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4470
4471 if (TARGET_64BIT_MS_ABI)
4472 return function_value_ms_64 (orig_mode, mode);
4473 else if (TARGET_64BIT)
4474 return function_value_64 (orig_mode, mode, valtype);
4475 else
4476 return function_value_32 (orig_mode, mode, fntype, fn);
4477 }
4478
4479 static rtx
4480 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4481 bool outgoing ATTRIBUTE_UNUSED)
4482 {
4483 enum machine_mode mode, orig_mode;
4484
4485 orig_mode = TYPE_MODE (valtype);
4486 mode = type_natural_mode (valtype);
4487 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4488 }
4489
4490 rtx
4491 ix86_libcall_value (enum machine_mode mode)
4492 {
4493 return ix86_function_value_1 (NULL, NULL, mode, mode);
4494 }
4495
4496 /* Return true iff type is returned in memory. */
4497
4498 static int
4499 return_in_memory_32 (const_tree type, enum machine_mode mode)
4500 {
4501 HOST_WIDE_INT size;
4502
4503 if (mode == BLKmode)
4504 return 1;
4505
4506 size = int_size_in_bytes (type);
4507
4508 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4509 return 0;
4510
4511 if (VECTOR_MODE_P (mode) || mode == TImode)
4512 {
4513 /* User-created vectors small enough to fit in EAX. */
4514 if (size < 8)
4515 return 0;
4516
4517 /* MMX/3dNow values are returned in MM0,
4518 except when it doesn't exits. */
4519 if (size == 8)
4520 return (TARGET_MMX ? 0 : 1);
4521
4522 /* SSE values are returned in XMM0, except when it doesn't exist. */
4523 if (size == 16)
4524 return (TARGET_SSE ? 0 : 1);
4525 }
4526
4527 if (mode == XFmode)
4528 return 0;
4529
4530 if (mode == TDmode)
4531 return 1;
4532
4533 if (size > 12)
4534 return 1;
4535 return 0;
4536 }
4537
4538 static int
4539 return_in_memory_64 (const_tree type, enum machine_mode mode)
4540 {
4541 int needed_intregs, needed_sseregs;
4542 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4543 }
4544
4545 static int
4546 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4547 {
4548 HOST_WIDE_INT size = int_size_in_bytes (type);
4549
4550 /* __m128 and friends are returned in xmm0. */
4551 if (size == 16 && VECTOR_MODE_P (mode))
4552 return 0;
4553
4554 /* Otherwise, the size must be exactly in [1248]. */
4555 return (size != 1 && size != 2 && size != 4 && size != 8);
4556 }
4557
4558 int
4559 ix86_return_in_memory (const_tree type)
4560 {
4561 const enum machine_mode mode = type_natural_mode (type);
4562
4563 if (TARGET_64BIT_MS_ABI)
4564 return return_in_memory_ms_64 (type, mode);
4565 else if (TARGET_64BIT)
4566 return return_in_memory_64 (type, mode);
4567 else
4568 return return_in_memory_32 (type, mode);
4569 }
4570
4571 /* Return false iff TYPE is returned in memory. This version is used
4572 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4573 but differs notably in that when MMX is available, 8-byte vectors
4574 are returned in memory, rather than in MMX registers. */
4575
4576 int
4577 ix86_sol10_return_in_memory (const_tree type)
4578 {
4579 int size;
4580 enum machine_mode mode = type_natural_mode (type);
4581
4582 if (TARGET_64BIT)
4583 return return_in_memory_64 (type, mode);
4584
4585 if (mode == BLKmode)
4586 return 1;
4587
4588 size = int_size_in_bytes (type);
4589
4590 if (VECTOR_MODE_P (mode))
4591 {
4592 /* Return in memory only if MMX registers *are* available. This
4593 seems backwards, but it is consistent with the existing
4594 Solaris x86 ABI. */
4595 if (size == 8)
4596 return TARGET_MMX;
4597 if (size == 16)
4598 return !TARGET_SSE;
4599 }
4600 else if (mode == TImode)
4601 return !TARGET_SSE;
4602 else if (mode == XFmode)
4603 return 0;
4604
4605 return size > 12;
4606 }
4607
4608 /* When returning SSE vector types, we have a choice of either
4609 (1) being abi incompatible with a -march switch, or
4610 (2) generating an error.
4611 Given no good solution, I think the safest thing is one warning.
4612 The user won't be able to use -Werror, but....
4613
4614 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4615 called in response to actually generating a caller or callee that
4616 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4617 via aggregate_value_p for general type probing from tree-ssa. */
4618
4619 static rtx
4620 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4621 {
4622 static bool warnedsse, warnedmmx;
4623
4624 if (!TARGET_64BIT && type)
4625 {
4626 /* Look at the return type of the function, not the function type. */
4627 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4628
4629 if (!TARGET_SSE && !warnedsse)
4630 {
4631 if (mode == TImode
4632 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4633 {
4634 warnedsse = true;
4635 warning (0, "SSE vector return without SSE enabled "
4636 "changes the ABI");
4637 }
4638 }
4639
4640 if (!TARGET_MMX && !warnedmmx)
4641 {
4642 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4643 {
4644 warnedmmx = true;
4645 warning (0, "MMX vector return without MMX enabled "
4646 "changes the ABI");
4647 }
4648 }
4649 }
4650
4651 return NULL;
4652 }
4653
4654 \f
4655 /* Create the va_list data type. */
4656
4657 static tree
4658 ix86_build_builtin_va_list (void)
4659 {
4660 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4661
4662 /* For i386 we use plain pointer to argument area. */
4663 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4664 return build_pointer_type (char_type_node);
4665
4666 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4667 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4668
4669 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4670 unsigned_type_node);
4671 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4672 unsigned_type_node);
4673 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4674 ptr_type_node);
4675 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4676 ptr_type_node);
4677
4678 va_list_gpr_counter_field = f_gpr;
4679 va_list_fpr_counter_field = f_fpr;
4680
4681 DECL_FIELD_CONTEXT (f_gpr) = record;
4682 DECL_FIELD_CONTEXT (f_fpr) = record;
4683 DECL_FIELD_CONTEXT (f_ovf) = record;
4684 DECL_FIELD_CONTEXT (f_sav) = record;
4685
4686 TREE_CHAIN (record) = type_decl;
4687 TYPE_NAME (record) = type_decl;
4688 TYPE_FIELDS (record) = f_gpr;
4689 TREE_CHAIN (f_gpr) = f_fpr;
4690 TREE_CHAIN (f_fpr) = f_ovf;
4691 TREE_CHAIN (f_ovf) = f_sav;
4692
4693 layout_type (record);
4694
4695 /* The correct type is an array type of one element. */
4696 return build_array_type (record, build_index_type (size_zero_node));
4697 }
4698
4699 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4700
4701 static void
4702 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4703 {
4704 rtx save_area, mem;
4705 rtx label;
4706 rtx label_ref;
4707 rtx tmp_reg;
4708 rtx nsse_reg;
4709 alias_set_type set;
4710 int i;
4711
4712 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4713 return;
4714
4715 /* Indicate to allocate space on the stack for varargs save area. */
4716 ix86_save_varrargs_registers = 1;
4717 /* We need 16-byte stack alignment to save SSE registers. If user
4718 asked for lower preferred_stack_boundary, lets just hope that he knows
4719 what he is doing and won't varargs SSE values.
4720
4721 We also may end up assuming that only 64bit values are stored in SSE
4722 register let some floating point program work. */
4723 if (ix86_preferred_stack_boundary >= 128)
4724 cfun->stack_alignment_needed = 128;
4725
4726 save_area = frame_pointer_rtx;
4727 set = get_varargs_alias_set ();
4728
4729 for (i = cum->regno;
4730 i < ix86_regparm
4731 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4732 i++)
4733 {
4734 mem = gen_rtx_MEM (Pmode,
4735 plus_constant (save_area, i * UNITS_PER_WORD));
4736 MEM_NOTRAP_P (mem) = 1;
4737 set_mem_alias_set (mem, set);
4738 emit_move_insn (mem, gen_rtx_REG (Pmode,
4739 x86_64_int_parameter_registers[i]));
4740 }
4741
4742 if (cum->sse_nregs && cfun->va_list_fpr_size)
4743 {
4744 /* Now emit code to save SSE registers. The AX parameter contains number
4745 of SSE parameter registers used to call this function. We use
4746 sse_prologue_save insn template that produces computed jump across
4747 SSE saves. We need some preparation work to get this working. */
4748
4749 label = gen_label_rtx ();
4750 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4751
4752 /* Compute address to jump to :
4753 label - 5*eax + nnamed_sse_arguments*5 */
4754 tmp_reg = gen_reg_rtx (Pmode);
4755 nsse_reg = gen_reg_rtx (Pmode);
4756 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4757 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4758 gen_rtx_MULT (Pmode, nsse_reg,
4759 GEN_INT (4))));
4760 if (cum->sse_regno)
4761 emit_move_insn
4762 (nsse_reg,
4763 gen_rtx_CONST (DImode,
4764 gen_rtx_PLUS (DImode,
4765 label_ref,
4766 GEN_INT (cum->sse_regno * 4))));
4767 else
4768 emit_move_insn (nsse_reg, label_ref);
4769 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4770
4771 /* Compute address of memory block we save into. We always use pointer
4772 pointing 127 bytes after first byte to store - this is needed to keep
4773 instruction size limited by 4 bytes. */
4774 tmp_reg = gen_reg_rtx (Pmode);
4775 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4776 plus_constant (save_area,
4777 8 * REGPARM_MAX + 127)));
4778 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4779 MEM_NOTRAP_P (mem) = 1;
4780 set_mem_alias_set (mem, set);
4781 set_mem_align (mem, BITS_PER_WORD);
4782
4783 /* And finally do the dirty job! */
4784 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4785 GEN_INT (cum->sse_regno), label));
4786 }
4787 }
4788
4789 static void
4790 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4791 {
4792 alias_set_type set = get_varargs_alias_set ();
4793 int i;
4794
4795 for (i = cum->regno; i < REGPARM_MAX; i++)
4796 {
4797 rtx reg, mem;
4798
4799 mem = gen_rtx_MEM (Pmode,
4800 plus_constant (virtual_incoming_args_rtx,
4801 i * UNITS_PER_WORD));
4802 MEM_NOTRAP_P (mem) = 1;
4803 set_mem_alias_set (mem, set);
4804
4805 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4806 emit_move_insn (mem, reg);
4807 }
4808 }
4809
4810 static void
4811 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4812 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4813 int no_rtl)
4814 {
4815 CUMULATIVE_ARGS next_cum;
4816 tree fntype;
4817 int stdarg_p;
4818
4819 /* This argument doesn't appear to be used anymore. Which is good,
4820 because the old code here didn't suppress rtl generation. */
4821 gcc_assert (!no_rtl);
4822
4823 if (!TARGET_64BIT)
4824 return;
4825
4826 fntype = TREE_TYPE (current_function_decl);
4827 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4828 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4829 != void_type_node));
4830
4831 /* For varargs, we do not want to skip the dummy va_dcl argument.
4832 For stdargs, we do want to skip the last named argument. */
4833 next_cum = *cum;
4834 if (stdarg_p)
4835 function_arg_advance (&next_cum, mode, type, 1);
4836
4837 if (TARGET_64BIT_MS_ABI)
4838 setup_incoming_varargs_ms_64 (&next_cum);
4839 else
4840 setup_incoming_varargs_64 (&next_cum);
4841 }
4842
4843 /* Implement va_start. */
4844
4845 void
4846 ix86_va_start (tree valist, rtx nextarg)
4847 {
4848 HOST_WIDE_INT words, n_gpr, n_fpr;
4849 tree f_gpr, f_fpr, f_ovf, f_sav;
4850 tree gpr, fpr, ovf, sav, t;
4851 tree type;
4852
4853 /* Only 64bit target needs something special. */
4854 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4855 {
4856 std_expand_builtin_va_start (valist, nextarg);
4857 return;
4858 }
4859
4860 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4861 f_fpr = TREE_CHAIN (f_gpr);
4862 f_ovf = TREE_CHAIN (f_fpr);
4863 f_sav = TREE_CHAIN (f_ovf);
4864
4865 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4866 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4867 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4868 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4869 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4870
4871 /* Count number of gp and fp argument registers used. */
4872 words = current_function_args_info.words;
4873 n_gpr = current_function_args_info.regno;
4874 n_fpr = current_function_args_info.sse_regno;
4875
4876 if (cfun->va_list_gpr_size)
4877 {
4878 type = TREE_TYPE (gpr);
4879 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4880 build_int_cst (type, n_gpr * 8));
4881 TREE_SIDE_EFFECTS (t) = 1;
4882 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4883 }
4884
4885 if (cfun->va_list_fpr_size)
4886 {
4887 type = TREE_TYPE (fpr);
4888 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4889 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4890 TREE_SIDE_EFFECTS (t) = 1;
4891 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4892 }
4893
4894 /* Find the overflow area. */
4895 type = TREE_TYPE (ovf);
4896 t = make_tree (type, virtual_incoming_args_rtx);
4897 if (words != 0)
4898 t = build2 (POINTER_PLUS_EXPR, type, t,
4899 size_int (words * UNITS_PER_WORD));
4900 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4901 TREE_SIDE_EFFECTS (t) = 1;
4902 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4903
4904 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4905 {
4906 /* Find the register save area.
4907 Prologue of the function save it right above stack frame. */
4908 type = TREE_TYPE (sav);
4909 t = make_tree (type, frame_pointer_rtx);
4910 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4911 TREE_SIDE_EFFECTS (t) = 1;
4912 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4913 }
4914 }
4915
4916 /* Implement va_arg. */
4917
4918 static tree
4919 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4920 {
4921 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4922 tree f_gpr, f_fpr, f_ovf, f_sav;
4923 tree gpr, fpr, ovf, sav, t;
4924 int size, rsize;
4925 tree lab_false, lab_over = NULL_TREE;
4926 tree addr, t2;
4927 rtx container;
4928 int indirect_p = 0;
4929 tree ptrtype;
4930 enum machine_mode nat_mode;
4931
4932 /* Only 64bit target needs something special. */
4933 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4934 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4935
4936 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4937 f_fpr = TREE_CHAIN (f_gpr);
4938 f_ovf = TREE_CHAIN (f_fpr);
4939 f_sav = TREE_CHAIN (f_ovf);
4940
4941 valist = build_va_arg_indirect_ref (valist);
4942 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4943 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4944 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4945 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4946
4947 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4948 if (indirect_p)
4949 type = build_pointer_type (type);
4950 size = int_size_in_bytes (type);
4951 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4952
4953 nat_mode = type_natural_mode (type);
4954 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4955 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4956
4957 /* Pull the value out of the saved registers. */
4958
4959 addr = create_tmp_var (ptr_type_node, "addr");
4960 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4961
4962 if (container)
4963 {
4964 int needed_intregs, needed_sseregs;
4965 bool need_temp;
4966 tree int_addr, sse_addr;
4967
4968 lab_false = create_artificial_label ();
4969 lab_over = create_artificial_label ();
4970
4971 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4972
4973 need_temp = (!REG_P (container)
4974 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4975 || TYPE_ALIGN (type) > 128));
4976
4977 /* In case we are passing structure, verify that it is consecutive block
4978 on the register save area. If not we need to do moves. */
4979 if (!need_temp && !REG_P (container))
4980 {
4981 /* Verify that all registers are strictly consecutive */
4982 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4983 {
4984 int i;
4985
4986 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4987 {
4988 rtx slot = XVECEXP (container, 0, i);
4989 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4990 || INTVAL (XEXP (slot, 1)) != i * 16)
4991 need_temp = 1;
4992 }
4993 }
4994 else
4995 {
4996 int i;
4997
4998 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4999 {
5000 rtx slot = XVECEXP (container, 0, i);
5001 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
5002 || INTVAL (XEXP (slot, 1)) != i * 8)
5003 need_temp = 1;
5004 }
5005 }
5006 }
5007 if (!need_temp)
5008 {
5009 int_addr = addr;
5010 sse_addr = addr;
5011 }
5012 else
5013 {
5014 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5015 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5016 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5017 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5018 }
5019
5020 /* First ensure that we fit completely in registers. */
5021 if (needed_intregs)
5022 {
5023 t = build_int_cst (TREE_TYPE (gpr),
5024 (REGPARM_MAX - needed_intregs + 1) * 8);
5025 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5026 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5027 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5028 gimplify_and_add (t, pre_p);
5029 }
5030 if (needed_sseregs)
5031 {
5032 t = build_int_cst (TREE_TYPE (fpr),
5033 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5034 + REGPARM_MAX * 8);
5035 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5036 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5037 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5038 gimplify_and_add (t, pre_p);
5039 }
5040
5041 /* Compute index to start of area used for integer regs. */
5042 if (needed_intregs)
5043 {
5044 /* int_addr = gpr + sav; */
5045 t = fold_convert (sizetype, gpr);
5046 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5047 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5048 gimplify_and_add (t, pre_p);
5049 }
5050 if (needed_sseregs)
5051 {
5052 /* sse_addr = fpr + sav; */
5053 t = fold_convert (sizetype, fpr);
5054 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5055 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5056 gimplify_and_add (t, pre_p);
5057 }
5058 if (need_temp)
5059 {
5060 int i;
5061 tree temp = create_tmp_var (type, "va_arg_tmp");
5062
5063 /* addr = &temp; */
5064 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5065 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5066 gimplify_and_add (t, pre_p);
5067
5068 for (i = 0; i < XVECLEN (container, 0); i++)
5069 {
5070 rtx slot = XVECEXP (container, 0, i);
5071 rtx reg = XEXP (slot, 0);
5072 enum machine_mode mode = GET_MODE (reg);
5073 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5074 tree addr_type = build_pointer_type (piece_type);
5075 tree src_addr, src;
5076 int src_offset;
5077 tree dest_addr, dest;
5078
5079 if (SSE_REGNO_P (REGNO (reg)))
5080 {
5081 src_addr = sse_addr;
5082 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5083 }
5084 else
5085 {
5086 src_addr = int_addr;
5087 src_offset = REGNO (reg) * 8;
5088 }
5089 src_addr = fold_convert (addr_type, src_addr);
5090 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5091 size_int (src_offset));
5092 src = build_va_arg_indirect_ref (src_addr);
5093
5094 dest_addr = fold_convert (addr_type, addr);
5095 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5096 size_int (INTVAL (XEXP (slot, 1))));
5097 dest = build_va_arg_indirect_ref (dest_addr);
5098
5099 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5100 gimplify_and_add (t, pre_p);
5101 }
5102 }
5103
5104 if (needed_intregs)
5105 {
5106 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5107 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5108 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5109 gimplify_and_add (t, pre_p);
5110 }
5111 if (needed_sseregs)
5112 {
5113 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5114 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5115 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5116 gimplify_and_add (t, pre_p);
5117 }
5118
5119 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5120 gimplify_and_add (t, pre_p);
5121
5122 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5123 append_to_statement_list (t, pre_p);
5124 }
5125
5126 /* ... otherwise out of the overflow area. */
5127
5128 /* Care for on-stack alignment if needed. */
5129 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5130 || integer_zerop (TYPE_SIZE (type)))
5131 t = ovf;
5132 else
5133 {
5134 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5135 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5136 size_int (align - 1));
5137 t = fold_convert (sizetype, t);
5138 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5139 size_int (-align));
5140 t = fold_convert (TREE_TYPE (ovf), t);
5141 }
5142 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5143
5144 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5145 gimplify_and_add (t2, pre_p);
5146
5147 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5148 size_int (rsize * UNITS_PER_WORD));
5149 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5150 gimplify_and_add (t, pre_p);
5151
5152 if (container)
5153 {
5154 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5155 append_to_statement_list (t, pre_p);
5156 }
5157
5158 ptrtype = build_pointer_type (type);
5159 addr = fold_convert (ptrtype, addr);
5160
5161 if (indirect_p)
5162 addr = build_va_arg_indirect_ref (addr);
5163 return build_va_arg_indirect_ref (addr);
5164 }
5165 \f
5166 /* Return nonzero if OPNUM's MEM should be matched
5167 in movabs* patterns. */
5168
5169 int
5170 ix86_check_movabs (rtx insn, int opnum)
5171 {
5172 rtx set, mem;
5173
5174 set = PATTERN (insn);
5175 if (GET_CODE (set) == PARALLEL)
5176 set = XVECEXP (set, 0, 0);
5177 gcc_assert (GET_CODE (set) == SET);
5178 mem = XEXP (set, opnum);
5179 while (GET_CODE (mem) == SUBREG)
5180 mem = SUBREG_REG (mem);
5181 gcc_assert (MEM_P (mem));
5182 return (volatile_ok || !MEM_VOLATILE_P (mem));
5183 }
5184 \f
5185 /* Initialize the table of extra 80387 mathematical constants. */
5186
5187 static void
5188 init_ext_80387_constants (void)
5189 {
5190 static const char * cst[5] =
5191 {
5192 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5193 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5194 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5195 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5196 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5197 };
5198 int i;
5199
5200 for (i = 0; i < 5; i++)
5201 {
5202 real_from_string (&ext_80387_constants_table[i], cst[i]);
5203 /* Ensure each constant is rounded to XFmode precision. */
5204 real_convert (&ext_80387_constants_table[i],
5205 XFmode, &ext_80387_constants_table[i]);
5206 }
5207
5208 ext_80387_constants_init = 1;
5209 }
5210
5211 /* Return true if the constant is something that can be loaded with
5212 a special instruction. */
5213
5214 int
5215 standard_80387_constant_p (rtx x)
5216 {
5217 enum machine_mode mode = GET_MODE (x);
5218
5219 REAL_VALUE_TYPE r;
5220
5221 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5222 return -1;
5223
5224 if (x == CONST0_RTX (mode))
5225 return 1;
5226 if (x == CONST1_RTX (mode))
5227 return 2;
5228
5229 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5230
5231 /* For XFmode constants, try to find a special 80387 instruction when
5232 optimizing for size or on those CPUs that benefit from them. */
5233 if (mode == XFmode
5234 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5235 {
5236 int i;
5237
5238 if (! ext_80387_constants_init)
5239 init_ext_80387_constants ();
5240
5241 for (i = 0; i < 5; i++)
5242 if (real_identical (&r, &ext_80387_constants_table[i]))
5243 return i + 3;
5244 }
5245
5246 /* Load of the constant -0.0 or -1.0 will be split as
5247 fldz;fchs or fld1;fchs sequence. */
5248 if (real_isnegzero (&r))
5249 return 8;
5250 if (real_identical (&r, &dconstm1))
5251 return 9;
5252
5253 return 0;
5254 }
5255
5256 /* Return the opcode of the special instruction to be used to load
5257 the constant X. */
5258
5259 const char *
5260 standard_80387_constant_opcode (rtx x)
5261 {
5262 switch (standard_80387_constant_p (x))
5263 {
5264 case 1:
5265 return "fldz";
5266 case 2:
5267 return "fld1";
5268 case 3:
5269 return "fldlg2";
5270 case 4:
5271 return "fldln2";
5272 case 5:
5273 return "fldl2e";
5274 case 6:
5275 return "fldl2t";
5276 case 7:
5277 return "fldpi";
5278 case 8:
5279 case 9:
5280 return "#";
5281 default:
5282 gcc_unreachable ();
5283 }
5284 }
5285
5286 /* Return the CONST_DOUBLE representing the 80387 constant that is
5287 loaded by the specified special instruction. The argument IDX
5288 matches the return value from standard_80387_constant_p. */
5289
5290 rtx
5291 standard_80387_constant_rtx (int idx)
5292 {
5293 int i;
5294
5295 if (! ext_80387_constants_init)
5296 init_ext_80387_constants ();
5297
5298 switch (idx)
5299 {
5300 case 3:
5301 case 4:
5302 case 5:
5303 case 6:
5304 case 7:
5305 i = idx - 3;
5306 break;
5307
5308 default:
5309 gcc_unreachable ();
5310 }
5311
5312 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5313 XFmode);
5314 }
5315
5316 /* Return 1 if mode is a valid mode for sse. */
5317 static int
5318 standard_sse_mode_p (enum machine_mode mode)
5319 {
5320 switch (mode)
5321 {
5322 case V16QImode:
5323 case V8HImode:
5324 case V4SImode:
5325 case V2DImode:
5326 case V4SFmode:
5327 case V2DFmode:
5328 return 1;
5329
5330 default:
5331 return 0;
5332 }
5333 }
5334
5335 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5336 */
5337 int
5338 standard_sse_constant_p (rtx x)
5339 {
5340 enum machine_mode mode = GET_MODE (x);
5341
5342 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5343 return 1;
5344 if (vector_all_ones_operand (x, mode)
5345 && standard_sse_mode_p (mode))
5346 return TARGET_SSE2 ? 2 : -1;
5347
5348 return 0;
5349 }
5350
5351 /* Return the opcode of the special instruction to be used to load
5352 the constant X. */
5353
5354 const char *
5355 standard_sse_constant_opcode (rtx insn, rtx x)
5356 {
5357 switch (standard_sse_constant_p (x))
5358 {
5359 case 1:
5360 if (get_attr_mode (insn) == MODE_V4SF)
5361 return "xorps\t%0, %0";
5362 else if (get_attr_mode (insn) == MODE_V2DF)
5363 return "xorpd\t%0, %0";
5364 else
5365 return "pxor\t%0, %0";
5366 case 2:
5367 return "pcmpeqd\t%0, %0";
5368 }
5369 gcc_unreachable ();
5370 }
5371
5372 /* Returns 1 if OP contains a symbol reference */
5373
5374 int
5375 symbolic_reference_mentioned_p (rtx op)
5376 {
5377 const char *fmt;
5378 int i;
5379
5380 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5381 return 1;
5382
5383 fmt = GET_RTX_FORMAT (GET_CODE (op));
5384 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5385 {
5386 if (fmt[i] == 'E')
5387 {
5388 int j;
5389
5390 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5391 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5392 return 1;
5393 }
5394
5395 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5396 return 1;
5397 }
5398
5399 return 0;
5400 }
5401
5402 /* Return 1 if it is appropriate to emit `ret' instructions in the
5403 body of a function. Do this only if the epilogue is simple, needing a
5404 couple of insns. Prior to reloading, we can't tell how many registers
5405 must be saved, so return 0 then. Return 0 if there is no frame
5406 marker to de-allocate. */
5407
5408 int
5409 ix86_can_use_return_insn_p (void)
5410 {
5411 struct ix86_frame frame;
5412
5413 if (! reload_completed || frame_pointer_needed)
5414 return 0;
5415
5416 /* Don't allow more than 32 pop, since that's all we can do
5417 with one instruction. */
5418 if (current_function_pops_args
5419 && current_function_args_size >= 32768)
5420 return 0;
5421
5422 ix86_compute_frame_layout (&frame);
5423 return frame.to_allocate == 0 && frame.nregs == 0;
5424 }
5425 \f
5426 /* Value should be nonzero if functions must have frame pointers.
5427 Zero means the frame pointer need not be set up (and parms may
5428 be accessed via the stack pointer) in functions that seem suitable. */
5429
5430 int
5431 ix86_frame_pointer_required (void)
5432 {
5433 /* If we accessed previous frames, then the generated code expects
5434 to be able to access the saved ebp value in our frame. */
5435 if (cfun->machine->accesses_prev_frame)
5436 return 1;
5437
5438 /* Several x86 os'es need a frame pointer for other reasons,
5439 usually pertaining to setjmp. */
5440 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5441 return 1;
5442
5443 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5444 the frame pointer by default. Turn it back on now if we've not
5445 got a leaf function. */
5446 if (TARGET_OMIT_LEAF_FRAME_POINTER
5447 && (!current_function_is_leaf
5448 || ix86_current_function_calls_tls_descriptor))
5449 return 1;
5450
5451 if (current_function_profile)
5452 return 1;
5453
5454 return 0;
5455 }
5456
5457 /* Record that the current function accesses previous call frames. */
5458
5459 void
5460 ix86_setup_frame_addresses (void)
5461 {
5462 cfun->machine->accesses_prev_frame = 1;
5463 }
5464 \f
5465 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5466 # define USE_HIDDEN_LINKONCE 1
5467 #else
5468 # define USE_HIDDEN_LINKONCE 0
5469 #endif
5470
5471 static int pic_labels_used;
5472
5473 /* Fills in the label name that should be used for a pc thunk for
5474 the given register. */
5475
5476 static void
5477 get_pc_thunk_name (char name[32], unsigned int regno)
5478 {
5479 gcc_assert (!TARGET_64BIT);
5480
5481 if (USE_HIDDEN_LINKONCE)
5482 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5483 else
5484 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5485 }
5486
5487
5488 /* This function generates code for -fpic that loads %ebx with
5489 the return address of the caller and then returns. */
5490
5491 void
5492 ix86_file_end (void)
5493 {
5494 rtx xops[2];
5495 int regno;
5496
5497 for (regno = 0; regno < 8; ++regno)
5498 {
5499 char name[32];
5500
5501 if (! ((pic_labels_used >> regno) & 1))
5502 continue;
5503
5504 get_pc_thunk_name (name, regno);
5505
5506 #if TARGET_MACHO
5507 if (TARGET_MACHO)
5508 {
5509 switch_to_section (darwin_sections[text_coal_section]);
5510 fputs ("\t.weak_definition\t", asm_out_file);
5511 assemble_name (asm_out_file, name);
5512 fputs ("\n\t.private_extern\t", asm_out_file);
5513 assemble_name (asm_out_file, name);
5514 fputs ("\n", asm_out_file);
5515 ASM_OUTPUT_LABEL (asm_out_file, name);
5516 }
5517 else
5518 #endif
5519 if (USE_HIDDEN_LINKONCE)
5520 {
5521 tree decl;
5522
5523 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5524 error_mark_node);
5525 TREE_PUBLIC (decl) = 1;
5526 TREE_STATIC (decl) = 1;
5527 DECL_ONE_ONLY (decl) = 1;
5528
5529 (*targetm.asm_out.unique_section) (decl, 0);
5530 switch_to_section (get_named_section (decl, NULL, 0));
5531
5532 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5533 fputs ("\t.hidden\t", asm_out_file);
5534 assemble_name (asm_out_file, name);
5535 fputc ('\n', asm_out_file);
5536 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5537 }
5538 else
5539 {
5540 switch_to_section (text_section);
5541 ASM_OUTPUT_LABEL (asm_out_file, name);
5542 }
5543
5544 xops[0] = gen_rtx_REG (SImode, regno);
5545 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5546 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5547 output_asm_insn ("ret", xops);
5548 }
5549
5550 if (NEED_INDICATE_EXEC_STACK)
5551 file_end_indicate_exec_stack ();
5552 }
5553
5554 /* Emit code for the SET_GOT patterns. */
5555
5556 const char *
5557 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5558 {
5559 rtx xops[3];
5560
5561 xops[0] = dest;
5562
5563 if (TARGET_VXWORKS_RTP && flag_pic)
5564 {
5565 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5566 xops[2] = gen_rtx_MEM (Pmode,
5567 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5568 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5569
5570 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5571 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5572 an unadorned address. */
5573 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5574 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5575 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5576 return "";
5577 }
5578
5579 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5580
5581 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5582 {
5583 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5584
5585 if (!flag_pic)
5586 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5587 else
5588 output_asm_insn ("call\t%a2", xops);
5589
5590 #if TARGET_MACHO
5591 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5592 is what will be referenced by the Mach-O PIC subsystem. */
5593 if (!label)
5594 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5595 #endif
5596
5597 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5598 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5599
5600 if (flag_pic)
5601 output_asm_insn ("pop{l}\t%0", xops);
5602 }
5603 else
5604 {
5605 char name[32];
5606 get_pc_thunk_name (name, REGNO (dest));
5607 pic_labels_used |= 1 << REGNO (dest);
5608
5609 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5610 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5611 output_asm_insn ("call\t%X2", xops);
5612 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5613 is what will be referenced by the Mach-O PIC subsystem. */
5614 #if TARGET_MACHO
5615 if (!label)
5616 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5617 else
5618 targetm.asm_out.internal_label (asm_out_file, "L",
5619 CODE_LABEL_NUMBER (label));
5620 #endif
5621 }
5622
5623 if (TARGET_MACHO)
5624 return "";
5625
5626 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5627 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5628 else
5629 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5630
5631 return "";
5632 }
5633
5634 /* Generate an "push" pattern for input ARG. */
5635
5636 static rtx
5637 gen_push (rtx arg)
5638 {
5639 return gen_rtx_SET (VOIDmode,
5640 gen_rtx_MEM (Pmode,
5641 gen_rtx_PRE_DEC (Pmode,
5642 stack_pointer_rtx)),
5643 arg);
5644 }
5645
5646 /* Return >= 0 if there is an unused call-clobbered register available
5647 for the entire function. */
5648
5649 static unsigned int
5650 ix86_select_alt_pic_regnum (void)
5651 {
5652 if (current_function_is_leaf && !current_function_profile
5653 && !ix86_current_function_calls_tls_descriptor)
5654 {
5655 int i;
5656 for (i = 2; i >= 0; --i)
5657 if (!df_regs_ever_live_p (i))
5658 return i;
5659 }
5660
5661 return INVALID_REGNUM;
5662 }
5663
5664 /* Return 1 if we need to save REGNO. */
5665 static int
5666 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5667 {
5668 if (pic_offset_table_rtx
5669 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5670 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5671 || current_function_profile
5672 || current_function_calls_eh_return
5673 || current_function_uses_const_pool))
5674 {
5675 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5676 return 0;
5677 return 1;
5678 }
5679
5680 if (current_function_calls_eh_return && maybe_eh_return)
5681 {
5682 unsigned i;
5683 for (i = 0; ; i++)
5684 {
5685 unsigned test = EH_RETURN_DATA_REGNO (i);
5686 if (test == INVALID_REGNUM)
5687 break;
5688 if (test == regno)
5689 return 1;
5690 }
5691 }
5692
5693 if (cfun->machine->force_align_arg_pointer
5694 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5695 return 1;
5696
5697 return (df_regs_ever_live_p (regno)
5698 && !call_used_regs[regno]
5699 && !fixed_regs[regno]
5700 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5701 }
5702
5703 /* Return number of registers to be saved on the stack. */
5704
5705 static int
5706 ix86_nsaved_regs (void)
5707 {
5708 int nregs = 0;
5709 int regno;
5710
5711 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5712 if (ix86_save_reg (regno, true))
5713 nregs++;
5714 return nregs;
5715 }
5716
5717 /* Return the offset between two registers, one to be eliminated, and the other
5718 its replacement, at the start of a routine. */
5719
5720 HOST_WIDE_INT
5721 ix86_initial_elimination_offset (int from, int to)
5722 {
5723 struct ix86_frame frame;
5724 ix86_compute_frame_layout (&frame);
5725
5726 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5727 return frame.hard_frame_pointer_offset;
5728 else if (from == FRAME_POINTER_REGNUM
5729 && to == HARD_FRAME_POINTER_REGNUM)
5730 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5731 else
5732 {
5733 gcc_assert (to == STACK_POINTER_REGNUM);
5734
5735 if (from == ARG_POINTER_REGNUM)
5736 return frame.stack_pointer_offset;
5737
5738 gcc_assert (from == FRAME_POINTER_REGNUM);
5739 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5740 }
5741 }
5742
5743 /* Fill structure ix86_frame about frame of currently computed function. */
5744
5745 static void
5746 ix86_compute_frame_layout (struct ix86_frame *frame)
5747 {
5748 HOST_WIDE_INT total_size;
5749 unsigned int stack_alignment_needed;
5750 HOST_WIDE_INT offset;
5751 unsigned int preferred_alignment;
5752 HOST_WIDE_INT size = get_frame_size ();
5753
5754 frame->nregs = ix86_nsaved_regs ();
5755 total_size = size;
5756
5757 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5758 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5759
5760 /* During reload iteration the amount of registers saved can change.
5761 Recompute the value as needed. Do not recompute when amount of registers
5762 didn't change as reload does multiple calls to the function and does not
5763 expect the decision to change within single iteration. */
5764 if (!optimize_size
5765 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5766 {
5767 int count = frame->nregs;
5768
5769 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5770 /* The fast prologue uses move instead of push to save registers. This
5771 is significantly longer, but also executes faster as modern hardware
5772 can execute the moves in parallel, but can't do that for push/pop.
5773
5774 Be careful about choosing what prologue to emit: When function takes
5775 many instructions to execute we may use slow version as well as in
5776 case function is known to be outside hot spot (this is known with
5777 feedback only). Weight the size of function by number of registers
5778 to save as it is cheap to use one or two push instructions but very
5779 slow to use many of them. */
5780 if (count)
5781 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5782 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5783 || (flag_branch_probabilities
5784 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5785 cfun->machine->use_fast_prologue_epilogue = false;
5786 else
5787 cfun->machine->use_fast_prologue_epilogue
5788 = !expensive_function_p (count);
5789 }
5790 if (TARGET_PROLOGUE_USING_MOVE
5791 && cfun->machine->use_fast_prologue_epilogue)
5792 frame->save_regs_using_mov = true;
5793 else
5794 frame->save_regs_using_mov = false;
5795
5796
5797 /* Skip return address and saved base pointer. */
5798 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5799
5800 frame->hard_frame_pointer_offset = offset;
5801
5802 /* Do some sanity checking of stack_alignment_needed and
5803 preferred_alignment, since i386 port is the only using those features
5804 that may break easily. */
5805
5806 gcc_assert (!size || stack_alignment_needed);
5807 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5808 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5809 gcc_assert (stack_alignment_needed
5810 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5811
5812 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5813 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5814
5815 /* Register save area */
5816 offset += frame->nregs * UNITS_PER_WORD;
5817
5818 /* Va-arg area */
5819 if (ix86_save_varrargs_registers)
5820 {
5821 offset += X86_64_VARARGS_SIZE;
5822 frame->va_arg_size = X86_64_VARARGS_SIZE;
5823 }
5824 else
5825 frame->va_arg_size = 0;
5826
5827 /* Align start of frame for local function. */
5828 frame->padding1 = ((offset + stack_alignment_needed - 1)
5829 & -stack_alignment_needed) - offset;
5830
5831 offset += frame->padding1;
5832
5833 /* Frame pointer points here. */
5834 frame->frame_pointer_offset = offset;
5835
5836 offset += size;
5837
5838 /* Add outgoing arguments area. Can be skipped if we eliminated
5839 all the function calls as dead code.
5840 Skipping is however impossible when function calls alloca. Alloca
5841 expander assumes that last current_function_outgoing_args_size
5842 of stack frame are unused. */
5843 if (ACCUMULATE_OUTGOING_ARGS
5844 && (!current_function_is_leaf || current_function_calls_alloca
5845 || ix86_current_function_calls_tls_descriptor))
5846 {
5847 offset += current_function_outgoing_args_size;
5848 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5849 }
5850 else
5851 frame->outgoing_arguments_size = 0;
5852
5853 /* Align stack boundary. Only needed if we're calling another function
5854 or using alloca. */
5855 if (!current_function_is_leaf || current_function_calls_alloca
5856 || ix86_current_function_calls_tls_descriptor)
5857 frame->padding2 = ((offset + preferred_alignment - 1)
5858 & -preferred_alignment) - offset;
5859 else
5860 frame->padding2 = 0;
5861
5862 offset += frame->padding2;
5863
5864 /* We've reached end of stack frame. */
5865 frame->stack_pointer_offset = offset;
5866
5867 /* Size prologue needs to allocate. */
5868 frame->to_allocate =
5869 (size + frame->padding1 + frame->padding2
5870 + frame->outgoing_arguments_size + frame->va_arg_size);
5871
5872 if ((!frame->to_allocate && frame->nregs <= 1)
5873 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5874 frame->save_regs_using_mov = false;
5875
5876 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5877 && current_function_is_leaf
5878 && !ix86_current_function_calls_tls_descriptor)
5879 {
5880 frame->red_zone_size = frame->to_allocate;
5881 if (frame->save_regs_using_mov)
5882 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5883 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5884 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5885 }
5886 else
5887 frame->red_zone_size = 0;
5888 frame->to_allocate -= frame->red_zone_size;
5889 frame->stack_pointer_offset -= frame->red_zone_size;
5890 #if 0
5891 fprintf (stderr, "\n");
5892 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5893 fprintf (stderr, "size: %ld\n", (long)size);
5894 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5895 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5896 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5897 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5898 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5899 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5900 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5901 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5902 (long)frame->hard_frame_pointer_offset);
5903 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5904 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5905 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5906 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5907 #endif
5908 }
5909
5910 /* Emit code to save registers in the prologue. */
5911
5912 static void
5913 ix86_emit_save_regs (void)
5914 {
5915 unsigned int regno;
5916 rtx insn;
5917
5918 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5919 if (ix86_save_reg (regno, true))
5920 {
5921 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5922 RTX_FRAME_RELATED_P (insn) = 1;
5923 }
5924 }
5925
5926 /* Emit code to save registers using MOV insns. First register
5927 is restored from POINTER + OFFSET. */
5928 static void
5929 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5930 {
5931 unsigned int regno;
5932 rtx insn;
5933
5934 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5935 if (ix86_save_reg (regno, true))
5936 {
5937 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5938 Pmode, offset),
5939 gen_rtx_REG (Pmode, regno));
5940 RTX_FRAME_RELATED_P (insn) = 1;
5941 offset += UNITS_PER_WORD;
5942 }
5943 }
5944
5945 /* Expand prologue or epilogue stack adjustment.
5946 The pattern exist to put a dependency on all ebp-based memory accesses.
5947 STYLE should be negative if instructions should be marked as frame related,
5948 zero if %r11 register is live and cannot be freely used and positive
5949 otherwise. */
5950
5951 static void
5952 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5953 {
5954 rtx insn;
5955
5956 if (! TARGET_64BIT)
5957 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5958 else if (x86_64_immediate_operand (offset, DImode))
5959 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5960 else
5961 {
5962 rtx r11;
5963 /* r11 is used by indirect sibcall return as well, set before the
5964 epilogue and used after the epilogue. ATM indirect sibcall
5965 shouldn't be used together with huge frame sizes in one
5966 function because of the frame_size check in sibcall.c. */
5967 gcc_assert (style);
5968 r11 = gen_rtx_REG (DImode, R11_REG);
5969 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5970 if (style < 0)
5971 RTX_FRAME_RELATED_P (insn) = 1;
5972 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5973 offset));
5974 }
5975 if (style < 0)
5976 RTX_FRAME_RELATED_P (insn) = 1;
5977 }
5978
5979 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5980
5981 static rtx
5982 ix86_internal_arg_pointer (void)
5983 {
5984 bool has_force_align_arg_pointer =
5985 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5986 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5987 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5988 && DECL_NAME (current_function_decl)
5989 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5990 && DECL_FILE_SCOPE_P (current_function_decl))
5991 || ix86_force_align_arg_pointer
5992 || has_force_align_arg_pointer)
5993 {
5994 /* Nested functions can't realign the stack due to a register
5995 conflict. */
5996 if (DECL_CONTEXT (current_function_decl)
5997 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5998 {
5999 if (ix86_force_align_arg_pointer)
6000 warning (0, "-mstackrealign ignored for nested functions");
6001 if (has_force_align_arg_pointer)
6002 error ("%s not supported for nested functions",
6003 ix86_force_align_arg_pointer_string);
6004 return virtual_incoming_args_rtx;
6005 }
6006 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
6007 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6008 }
6009 else
6010 return virtual_incoming_args_rtx;
6011 }
6012
6013 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6014 This is called from dwarf2out.c to emit call frame instructions
6015 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6016 static void
6017 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6018 {
6019 rtx unspec = SET_SRC (pattern);
6020 gcc_assert (GET_CODE (unspec) == UNSPEC);
6021
6022 switch (index)
6023 {
6024 case UNSPEC_REG_SAVE:
6025 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6026 SET_DEST (pattern));
6027 break;
6028 case UNSPEC_DEF_CFA:
6029 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6030 INTVAL (XVECEXP (unspec, 0, 0)));
6031 break;
6032 default:
6033 gcc_unreachable ();
6034 }
6035 }
6036
6037 /* Expand the prologue into a bunch of separate insns. */
6038
6039 void
6040 ix86_expand_prologue (void)
6041 {
6042 rtx insn;
6043 bool pic_reg_used;
6044 struct ix86_frame frame;
6045 HOST_WIDE_INT allocate;
6046
6047 ix86_compute_frame_layout (&frame);
6048
6049 if (cfun->machine->force_align_arg_pointer)
6050 {
6051 rtx x, y;
6052
6053 /* Grab the argument pointer. */
6054 x = plus_constant (stack_pointer_rtx, 4);
6055 y = cfun->machine->force_align_arg_pointer;
6056 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6057 RTX_FRAME_RELATED_P (insn) = 1;
6058
6059 /* The unwind info consists of two parts: install the fafp as the cfa,
6060 and record the fafp as the "save register" of the stack pointer.
6061 The later is there in order that the unwinder can see where it
6062 should restore the stack pointer across the and insn. */
6063 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6064 x = gen_rtx_SET (VOIDmode, y, x);
6065 RTX_FRAME_RELATED_P (x) = 1;
6066 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6067 UNSPEC_REG_SAVE);
6068 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6069 RTX_FRAME_RELATED_P (y) = 1;
6070 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6071 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6072 REG_NOTES (insn) = x;
6073
6074 /* Align the stack. */
6075 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6076 GEN_INT (-16)));
6077
6078 /* And here we cheat like madmen with the unwind info. We force the
6079 cfa register back to sp+4, which is exactly what it was at the
6080 start of the function. Re-pushing the return address results in
6081 the return at the same spot relative to the cfa, and thus is
6082 correct wrt the unwind info. */
6083 x = cfun->machine->force_align_arg_pointer;
6084 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6085 insn = emit_insn (gen_push (x));
6086 RTX_FRAME_RELATED_P (insn) = 1;
6087
6088 x = GEN_INT (4);
6089 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6090 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6091 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6092 REG_NOTES (insn) = x;
6093 }
6094
6095 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6096 slower on all targets. Also sdb doesn't like it. */
6097
6098 if (frame_pointer_needed)
6099 {
6100 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6101 RTX_FRAME_RELATED_P (insn) = 1;
6102
6103 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6104 RTX_FRAME_RELATED_P (insn) = 1;
6105 }
6106
6107 allocate = frame.to_allocate;
6108
6109 if (!frame.save_regs_using_mov)
6110 ix86_emit_save_regs ();
6111 else
6112 allocate += frame.nregs * UNITS_PER_WORD;
6113
6114 /* When using red zone we may start register saving before allocating
6115 the stack frame saving one cycle of the prologue. */
6116 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6117 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6118 : stack_pointer_rtx,
6119 -frame.nregs * UNITS_PER_WORD);
6120
6121 if (allocate == 0)
6122 ;
6123 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6124 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6125 GEN_INT (-allocate), -1);
6126 else
6127 {
6128 /* Only valid for Win32. */
6129 rtx eax = gen_rtx_REG (Pmode, 0);
6130 bool eax_live;
6131 rtx t;
6132
6133 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6134
6135 if (TARGET_64BIT_MS_ABI)
6136 eax_live = false;
6137 else
6138 eax_live = ix86_eax_live_at_start_p ();
6139
6140 if (eax_live)
6141 {
6142 emit_insn (gen_push (eax));
6143 allocate -= UNITS_PER_WORD;
6144 }
6145
6146 emit_move_insn (eax, GEN_INT (allocate));
6147
6148 if (TARGET_64BIT)
6149 insn = gen_allocate_stack_worker_64 (eax);
6150 else
6151 insn = gen_allocate_stack_worker_32 (eax);
6152 insn = emit_insn (insn);
6153 RTX_FRAME_RELATED_P (insn) = 1;
6154 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6155 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6156 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6157 t, REG_NOTES (insn));
6158
6159 if (eax_live)
6160 {
6161 if (frame_pointer_needed)
6162 t = plus_constant (hard_frame_pointer_rtx,
6163 allocate
6164 - frame.to_allocate
6165 - frame.nregs * UNITS_PER_WORD);
6166 else
6167 t = plus_constant (stack_pointer_rtx, allocate);
6168 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6169 }
6170 }
6171
6172 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6173 {
6174 if (!frame_pointer_needed || !frame.to_allocate)
6175 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6176 else
6177 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6178 -frame.nregs * UNITS_PER_WORD);
6179 }
6180
6181 pic_reg_used = false;
6182 if (pic_offset_table_rtx
6183 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6184 || current_function_profile))
6185 {
6186 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6187
6188 if (alt_pic_reg_used != INVALID_REGNUM)
6189 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6190
6191 pic_reg_used = true;
6192 }
6193
6194 if (pic_reg_used)
6195 {
6196 if (TARGET_64BIT)
6197 {
6198 if (ix86_cmodel == CM_LARGE_PIC)
6199 {
6200 rtx tmp_reg = gen_rtx_REG (DImode,
6201 FIRST_REX_INT_REG + 3 /* R11 */);
6202 rtx label = gen_label_rtx ();
6203 emit_label (label);
6204 LABEL_PRESERVE_P (label) = 1;
6205 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6206 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6207 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6208 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6209 pic_offset_table_rtx, tmp_reg));
6210 }
6211 else
6212 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6213 }
6214 else
6215 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6216 }
6217
6218 /* Prevent function calls from be scheduled before the call to mcount.
6219 In the pic_reg_used case, make sure that the got load isn't deleted. */
6220 if (current_function_profile)
6221 {
6222 if (pic_reg_used)
6223 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6224 emit_insn (gen_blockage ());
6225 }
6226 }
6227
6228 /* Emit code to restore saved registers using MOV insns. First register
6229 is restored from POINTER + OFFSET. */
6230 static void
6231 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6232 int maybe_eh_return)
6233 {
6234 int regno;
6235 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6236
6237 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6238 if (ix86_save_reg (regno, maybe_eh_return))
6239 {
6240 /* Ensure that adjust_address won't be forced to produce pointer
6241 out of range allowed by x86-64 instruction set. */
6242 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6243 {
6244 rtx r11;
6245
6246 r11 = gen_rtx_REG (DImode, R11_REG);
6247 emit_move_insn (r11, GEN_INT (offset));
6248 emit_insn (gen_adddi3 (r11, r11, pointer));
6249 base_address = gen_rtx_MEM (Pmode, r11);
6250 offset = 0;
6251 }
6252 emit_move_insn (gen_rtx_REG (Pmode, regno),
6253 adjust_address (base_address, Pmode, offset));
6254 offset += UNITS_PER_WORD;
6255 }
6256 }
6257
6258 /* Restore function stack, frame, and registers. */
6259
6260 void
6261 ix86_expand_epilogue (int style)
6262 {
6263 int regno;
6264 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6265 struct ix86_frame frame;
6266 HOST_WIDE_INT offset;
6267
6268 ix86_compute_frame_layout (&frame);
6269
6270 /* Calculate start of saved registers relative to ebp. Special care
6271 must be taken for the normal return case of a function using
6272 eh_return: the eax and edx registers are marked as saved, but not
6273 restored along this path. */
6274 offset = frame.nregs;
6275 if (current_function_calls_eh_return && style != 2)
6276 offset -= 2;
6277 offset *= -UNITS_PER_WORD;
6278
6279 /* If we're only restoring one register and sp is not valid then
6280 using a move instruction to restore the register since it's
6281 less work than reloading sp and popping the register.
6282
6283 The default code result in stack adjustment using add/lea instruction,
6284 while this code results in LEAVE instruction (or discrete equivalent),
6285 so it is profitable in some other cases as well. Especially when there
6286 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6287 and there is exactly one register to pop. This heuristic may need some
6288 tuning in future. */
6289 if ((!sp_valid && frame.nregs <= 1)
6290 || (TARGET_EPILOGUE_USING_MOVE
6291 && cfun->machine->use_fast_prologue_epilogue
6292 && (frame.nregs > 1 || frame.to_allocate))
6293 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6294 || (frame_pointer_needed && TARGET_USE_LEAVE
6295 && cfun->machine->use_fast_prologue_epilogue
6296 && frame.nregs == 1)
6297 || current_function_calls_eh_return)
6298 {
6299 /* Restore registers. We can use ebp or esp to address the memory
6300 locations. If both are available, default to ebp, since offsets
6301 are known to be small. Only exception is esp pointing directly to the
6302 end of block of saved registers, where we may simplify addressing
6303 mode. */
6304
6305 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6306 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6307 frame.to_allocate, style == 2);
6308 else
6309 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6310 offset, style == 2);
6311
6312 /* eh_return epilogues need %ecx added to the stack pointer. */
6313 if (style == 2)
6314 {
6315 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6316
6317 if (frame_pointer_needed)
6318 {
6319 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6320 tmp = plus_constant (tmp, UNITS_PER_WORD);
6321 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6322
6323 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6324 emit_move_insn (hard_frame_pointer_rtx, tmp);
6325
6326 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6327 const0_rtx, style);
6328 }
6329 else
6330 {
6331 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6332 tmp = plus_constant (tmp, (frame.to_allocate
6333 + frame.nregs * UNITS_PER_WORD));
6334 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6335 }
6336 }
6337 else if (!frame_pointer_needed)
6338 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6339 GEN_INT (frame.to_allocate
6340 + frame.nregs * UNITS_PER_WORD),
6341 style);
6342 /* If not an i386, mov & pop is faster than "leave". */
6343 else if (TARGET_USE_LEAVE || optimize_size
6344 || !cfun->machine->use_fast_prologue_epilogue)
6345 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6346 else
6347 {
6348 pro_epilogue_adjust_stack (stack_pointer_rtx,
6349 hard_frame_pointer_rtx,
6350 const0_rtx, style);
6351 if (TARGET_64BIT)
6352 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6353 else
6354 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6355 }
6356 }
6357 else
6358 {
6359 /* First step is to deallocate the stack frame so that we can
6360 pop the registers. */
6361 if (!sp_valid)
6362 {
6363 gcc_assert (frame_pointer_needed);
6364 pro_epilogue_adjust_stack (stack_pointer_rtx,
6365 hard_frame_pointer_rtx,
6366 GEN_INT (offset), style);
6367 }
6368 else if (frame.to_allocate)
6369 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6370 GEN_INT (frame.to_allocate), style);
6371
6372 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6373 if (ix86_save_reg (regno, false))
6374 {
6375 if (TARGET_64BIT)
6376 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6377 else
6378 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6379 }
6380 if (frame_pointer_needed)
6381 {
6382 /* Leave results in shorter dependency chains on CPUs that are
6383 able to grok it fast. */
6384 if (TARGET_USE_LEAVE)
6385 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6386 else if (TARGET_64BIT)
6387 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6388 else
6389 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6390 }
6391 }
6392
6393 if (cfun->machine->force_align_arg_pointer)
6394 {
6395 emit_insn (gen_addsi3 (stack_pointer_rtx,
6396 cfun->machine->force_align_arg_pointer,
6397 GEN_INT (-4)));
6398 }
6399
6400 /* Sibcall epilogues don't want a return instruction. */
6401 if (style == 0)
6402 return;
6403
6404 if (current_function_pops_args && current_function_args_size)
6405 {
6406 rtx popc = GEN_INT (current_function_pops_args);
6407
6408 /* i386 can only pop 64K bytes. If asked to pop more, pop
6409 return address, do explicit add, and jump indirectly to the
6410 caller. */
6411
6412 if (current_function_pops_args >= 65536)
6413 {
6414 rtx ecx = gen_rtx_REG (SImode, 2);
6415
6416 /* There is no "pascal" calling convention in any 64bit ABI. */
6417 gcc_assert (!TARGET_64BIT);
6418
6419 emit_insn (gen_popsi1 (ecx));
6420 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6421 emit_jump_insn (gen_return_indirect_internal (ecx));
6422 }
6423 else
6424 emit_jump_insn (gen_return_pop_internal (popc));
6425 }
6426 else
6427 emit_jump_insn (gen_return_internal ());
6428 }
6429
6430 /* Reset from the function's potential modifications. */
6431
6432 static void
6433 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6434 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6435 {
6436 if (pic_offset_table_rtx)
6437 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6438 #if TARGET_MACHO
6439 /* Mach-O doesn't support labels at the end of objects, so if
6440 it looks like we might want one, insert a NOP. */
6441 {
6442 rtx insn = get_last_insn ();
6443 while (insn
6444 && NOTE_P (insn)
6445 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6446 insn = PREV_INSN (insn);
6447 if (insn
6448 && (LABEL_P (insn)
6449 || (NOTE_P (insn)
6450 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6451 fputs ("\tnop\n", file);
6452 }
6453 #endif
6454
6455 }
6456 \f
6457 /* Extract the parts of an RTL expression that is a valid memory address
6458 for an instruction. Return 0 if the structure of the address is
6459 grossly off. Return -1 if the address contains ASHIFT, so it is not
6460 strictly valid, but still used for computing length of lea instruction. */
6461
6462 int
6463 ix86_decompose_address (rtx addr, struct ix86_address *out)
6464 {
6465 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6466 rtx base_reg, index_reg;
6467 HOST_WIDE_INT scale = 1;
6468 rtx scale_rtx = NULL_RTX;
6469 int retval = 1;
6470 enum ix86_address_seg seg = SEG_DEFAULT;
6471
6472 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6473 base = addr;
6474 else if (GET_CODE (addr) == PLUS)
6475 {
6476 rtx addends[4], op;
6477 int n = 0, i;
6478
6479 op = addr;
6480 do
6481 {
6482 if (n >= 4)
6483 return 0;
6484 addends[n++] = XEXP (op, 1);
6485 op = XEXP (op, 0);
6486 }
6487 while (GET_CODE (op) == PLUS);
6488 if (n >= 4)
6489 return 0;
6490 addends[n] = op;
6491
6492 for (i = n; i >= 0; --i)
6493 {
6494 op = addends[i];
6495 switch (GET_CODE (op))
6496 {
6497 case MULT:
6498 if (index)
6499 return 0;
6500 index = XEXP (op, 0);
6501 scale_rtx = XEXP (op, 1);
6502 break;
6503
6504 case UNSPEC:
6505 if (XINT (op, 1) == UNSPEC_TP
6506 && TARGET_TLS_DIRECT_SEG_REFS
6507 && seg == SEG_DEFAULT)
6508 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6509 else
6510 return 0;
6511 break;
6512
6513 case REG:
6514 case SUBREG:
6515 if (!base)
6516 base = op;
6517 else if (!index)
6518 index = op;
6519 else
6520 return 0;
6521 break;
6522
6523 case CONST:
6524 case CONST_INT:
6525 case SYMBOL_REF:
6526 case LABEL_REF:
6527 if (disp)
6528 return 0;
6529 disp = op;
6530 break;
6531
6532 default:
6533 return 0;
6534 }
6535 }
6536 }
6537 else if (GET_CODE (addr) == MULT)
6538 {
6539 index = XEXP (addr, 0); /* index*scale */
6540 scale_rtx = XEXP (addr, 1);
6541 }
6542 else if (GET_CODE (addr) == ASHIFT)
6543 {
6544 rtx tmp;
6545
6546 /* We're called for lea too, which implements ashift on occasion. */
6547 index = XEXP (addr, 0);
6548 tmp = XEXP (addr, 1);
6549 if (!CONST_INT_P (tmp))
6550 return 0;
6551 scale = INTVAL (tmp);
6552 if ((unsigned HOST_WIDE_INT) scale > 3)
6553 return 0;
6554 scale = 1 << scale;
6555 retval = -1;
6556 }
6557 else
6558 disp = addr; /* displacement */
6559
6560 /* Extract the integral value of scale. */
6561 if (scale_rtx)
6562 {
6563 if (!CONST_INT_P (scale_rtx))
6564 return 0;
6565 scale = INTVAL (scale_rtx);
6566 }
6567
6568 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6569 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6570
6571 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6572 if (base_reg && index_reg && scale == 1
6573 && (index_reg == arg_pointer_rtx
6574 || index_reg == frame_pointer_rtx
6575 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6576 {
6577 rtx tmp;
6578 tmp = base, base = index, index = tmp;
6579 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6580 }
6581
6582 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6583 if ((base_reg == hard_frame_pointer_rtx
6584 || base_reg == frame_pointer_rtx
6585 || base_reg == arg_pointer_rtx) && !disp)
6586 disp = const0_rtx;
6587
6588 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6589 Avoid this by transforming to [%esi+0]. */
6590 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6591 && base_reg && !index_reg && !disp
6592 && REG_P (base_reg)
6593 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6594 disp = const0_rtx;
6595
6596 /* Special case: encode reg+reg instead of reg*2. */
6597 if (!base && index && scale && scale == 2)
6598 base = index, base_reg = index_reg, scale = 1;
6599
6600 /* Special case: scaling cannot be encoded without base or displacement. */
6601 if (!base && !disp && index && scale != 1)
6602 disp = const0_rtx;
6603
6604 out->base = base;
6605 out->index = index;
6606 out->disp = disp;
6607 out->scale = scale;
6608 out->seg = seg;
6609
6610 return retval;
6611 }
6612 \f
6613 /* Return cost of the memory address x.
6614 For i386, it is better to use a complex address than let gcc copy
6615 the address into a reg and make a new pseudo. But not if the address
6616 requires to two regs - that would mean more pseudos with longer
6617 lifetimes. */
6618 static int
6619 ix86_address_cost (rtx x)
6620 {
6621 struct ix86_address parts;
6622 int cost = 1;
6623 int ok = ix86_decompose_address (x, &parts);
6624
6625 gcc_assert (ok);
6626
6627 if (parts.base && GET_CODE (parts.base) == SUBREG)
6628 parts.base = SUBREG_REG (parts.base);
6629 if (parts.index && GET_CODE (parts.index) == SUBREG)
6630 parts.index = SUBREG_REG (parts.index);
6631
6632 /* Attempt to minimize number of registers in the address. */
6633 if ((parts.base
6634 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6635 || (parts.index
6636 && (!REG_P (parts.index)
6637 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6638 cost++;
6639
6640 if (parts.base
6641 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6642 && parts.index
6643 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6644 && parts.base != parts.index)
6645 cost++;
6646
6647 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6648 since it's predecode logic can't detect the length of instructions
6649 and it degenerates to vector decoded. Increase cost of such
6650 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6651 to split such addresses or even refuse such addresses at all.
6652
6653 Following addressing modes are affected:
6654 [base+scale*index]
6655 [scale*index+disp]
6656 [base+index]
6657
6658 The first and last case may be avoidable by explicitly coding the zero in
6659 memory address, but I don't have AMD-K6 machine handy to check this
6660 theory. */
6661
6662 if (TARGET_K6
6663 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6664 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6665 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6666 cost += 10;
6667
6668 return cost;
6669 }
6670 \f
6671 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6672 this is used for to form addresses to local data when -fPIC is in
6673 use. */
6674
6675 static bool
6676 darwin_local_data_pic (rtx disp)
6677 {
6678 if (GET_CODE (disp) == MINUS)
6679 {
6680 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6681 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6682 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6683 {
6684 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6685 if (! strcmp (sym_name, "<pic base>"))
6686 return true;
6687 }
6688 }
6689
6690 return false;
6691 }
6692
6693 /* Determine if a given RTX is a valid constant. We already know this
6694 satisfies CONSTANT_P. */
6695
6696 bool
6697 legitimate_constant_p (rtx x)
6698 {
6699 switch (GET_CODE (x))
6700 {
6701 case CONST:
6702 x = XEXP (x, 0);
6703
6704 if (GET_CODE (x) == PLUS)
6705 {
6706 if (!CONST_INT_P (XEXP (x, 1)))
6707 return false;
6708 x = XEXP (x, 0);
6709 }
6710
6711 if (TARGET_MACHO && darwin_local_data_pic (x))
6712 return true;
6713
6714 /* Only some unspecs are valid as "constants". */
6715 if (GET_CODE (x) == UNSPEC)
6716 switch (XINT (x, 1))
6717 {
6718 case UNSPEC_GOT:
6719 case UNSPEC_GOTOFF:
6720 case UNSPEC_PLTOFF:
6721 return TARGET_64BIT;
6722 case UNSPEC_TPOFF:
6723 case UNSPEC_NTPOFF:
6724 x = XVECEXP (x, 0, 0);
6725 return (GET_CODE (x) == SYMBOL_REF
6726 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6727 case UNSPEC_DTPOFF:
6728 x = XVECEXP (x, 0, 0);
6729 return (GET_CODE (x) == SYMBOL_REF
6730 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6731 default:
6732 return false;
6733 }
6734
6735 /* We must have drilled down to a symbol. */
6736 if (GET_CODE (x) == LABEL_REF)
6737 return true;
6738 if (GET_CODE (x) != SYMBOL_REF)
6739 return false;
6740 /* FALLTHRU */
6741
6742 case SYMBOL_REF:
6743 /* TLS symbols are never valid. */
6744 if (SYMBOL_REF_TLS_MODEL (x))
6745 return false;
6746
6747 /* DLLIMPORT symbols are never valid. */
6748 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6749 && SYMBOL_REF_DLLIMPORT_P (x))
6750 return false;
6751 break;
6752
6753 case CONST_DOUBLE:
6754 if (GET_MODE (x) == TImode
6755 && x != CONST0_RTX (TImode)
6756 && !TARGET_64BIT)
6757 return false;
6758 break;
6759
6760 case CONST_VECTOR:
6761 if (x == CONST0_RTX (GET_MODE (x)))
6762 return true;
6763 return false;
6764
6765 default:
6766 break;
6767 }
6768
6769 /* Otherwise we handle everything else in the move patterns. */
6770 return true;
6771 }
6772
6773 /* Determine if it's legal to put X into the constant pool. This
6774 is not possible for the address of thread-local symbols, which
6775 is checked above. */
6776
6777 static bool
6778 ix86_cannot_force_const_mem (rtx x)
6779 {
6780 /* We can always put integral constants and vectors in memory. */
6781 switch (GET_CODE (x))
6782 {
6783 case CONST_INT:
6784 case CONST_DOUBLE:
6785 case CONST_VECTOR:
6786 return false;
6787
6788 default:
6789 break;
6790 }
6791 return !legitimate_constant_p (x);
6792 }
6793
6794 /* Determine if a given RTX is a valid constant address. */
6795
6796 bool
6797 constant_address_p (rtx x)
6798 {
6799 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6800 }
6801
6802 /* Nonzero if the constant value X is a legitimate general operand
6803 when generating PIC code. It is given that flag_pic is on and
6804 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6805
6806 bool
6807 legitimate_pic_operand_p (rtx x)
6808 {
6809 rtx inner;
6810
6811 switch (GET_CODE (x))
6812 {
6813 case CONST:
6814 inner = XEXP (x, 0);
6815 if (GET_CODE (inner) == PLUS
6816 && CONST_INT_P (XEXP (inner, 1)))
6817 inner = XEXP (inner, 0);
6818
6819 /* Only some unspecs are valid as "constants". */
6820 if (GET_CODE (inner) == UNSPEC)
6821 switch (XINT (inner, 1))
6822 {
6823 case UNSPEC_GOT:
6824 case UNSPEC_GOTOFF:
6825 case UNSPEC_PLTOFF:
6826 return TARGET_64BIT;
6827 case UNSPEC_TPOFF:
6828 x = XVECEXP (inner, 0, 0);
6829 return (GET_CODE (x) == SYMBOL_REF
6830 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6831 default:
6832 return false;
6833 }
6834 /* FALLTHRU */
6835
6836 case SYMBOL_REF:
6837 case LABEL_REF:
6838 return legitimate_pic_address_disp_p (x);
6839
6840 default:
6841 return true;
6842 }
6843 }
6844
6845 /* Determine if a given CONST RTX is a valid memory displacement
6846 in PIC mode. */
6847
6848 int
6849 legitimate_pic_address_disp_p (rtx disp)
6850 {
6851 bool saw_plus;
6852
6853 /* In 64bit mode we can allow direct addresses of symbols and labels
6854 when they are not dynamic symbols. */
6855 if (TARGET_64BIT)
6856 {
6857 rtx op0 = disp, op1;
6858
6859 switch (GET_CODE (disp))
6860 {
6861 case LABEL_REF:
6862 return true;
6863
6864 case CONST:
6865 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6866 break;
6867 op0 = XEXP (XEXP (disp, 0), 0);
6868 op1 = XEXP (XEXP (disp, 0), 1);
6869 if (!CONST_INT_P (op1)
6870 || INTVAL (op1) >= 16*1024*1024
6871 || INTVAL (op1) < -16*1024*1024)
6872 break;
6873 if (GET_CODE (op0) == LABEL_REF)
6874 return true;
6875 if (GET_CODE (op0) != SYMBOL_REF)
6876 break;
6877 /* FALLTHRU */
6878
6879 case SYMBOL_REF:
6880 /* TLS references should always be enclosed in UNSPEC. */
6881 if (SYMBOL_REF_TLS_MODEL (op0))
6882 return false;
6883 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6884 && ix86_cmodel != CM_LARGE_PIC)
6885 return true;
6886 break;
6887
6888 default:
6889 break;
6890 }
6891 }
6892 if (GET_CODE (disp) != CONST)
6893 return 0;
6894 disp = XEXP (disp, 0);
6895
6896 if (TARGET_64BIT)
6897 {
6898 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6899 of GOT tables. We should not need these anyway. */
6900 if (GET_CODE (disp) != UNSPEC
6901 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6902 && XINT (disp, 1) != UNSPEC_GOTOFF
6903 && XINT (disp, 1) != UNSPEC_PLTOFF))
6904 return 0;
6905
6906 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6907 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6908 return 0;
6909 return 1;
6910 }
6911
6912 saw_plus = false;
6913 if (GET_CODE (disp) == PLUS)
6914 {
6915 if (!CONST_INT_P (XEXP (disp, 1)))
6916 return 0;
6917 disp = XEXP (disp, 0);
6918 saw_plus = true;
6919 }
6920
6921 if (TARGET_MACHO && darwin_local_data_pic (disp))
6922 return 1;
6923
6924 if (GET_CODE (disp) != UNSPEC)
6925 return 0;
6926
6927 switch (XINT (disp, 1))
6928 {
6929 case UNSPEC_GOT:
6930 if (saw_plus)
6931 return false;
6932 /* We need to check for both symbols and labels because VxWorks loads
6933 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6934 details. */
6935 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6936 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6937 case UNSPEC_GOTOFF:
6938 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6939 While ABI specify also 32bit relocation but we don't produce it in
6940 small PIC model at all. */
6941 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6942 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6943 && !TARGET_64BIT)
6944 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6945 return false;
6946 case UNSPEC_GOTTPOFF:
6947 case UNSPEC_GOTNTPOFF:
6948 case UNSPEC_INDNTPOFF:
6949 if (saw_plus)
6950 return false;
6951 disp = XVECEXP (disp, 0, 0);
6952 return (GET_CODE (disp) == SYMBOL_REF
6953 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6954 case UNSPEC_NTPOFF:
6955 disp = XVECEXP (disp, 0, 0);
6956 return (GET_CODE (disp) == SYMBOL_REF
6957 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6958 case UNSPEC_DTPOFF:
6959 disp = XVECEXP (disp, 0, 0);
6960 return (GET_CODE (disp) == SYMBOL_REF
6961 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6962 }
6963
6964 return 0;
6965 }
6966
6967 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6968 memory address for an instruction. The MODE argument is the machine mode
6969 for the MEM expression that wants to use this address.
6970
6971 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6972 convert common non-canonical forms to canonical form so that they will
6973 be recognized. */
6974
6975 int
6976 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6977 rtx addr, int strict)
6978 {
6979 struct ix86_address parts;
6980 rtx base, index, disp;
6981 HOST_WIDE_INT scale;
6982 const char *reason = NULL;
6983 rtx reason_rtx = NULL_RTX;
6984
6985 if (ix86_decompose_address (addr, &parts) <= 0)
6986 {
6987 reason = "decomposition failed";
6988 goto report_error;
6989 }
6990
6991 base = parts.base;
6992 index = parts.index;
6993 disp = parts.disp;
6994 scale = parts.scale;
6995
6996 /* Validate base register.
6997
6998 Don't allow SUBREG's that span more than a word here. It can lead to spill
6999 failures when the base is one word out of a two word structure, which is
7000 represented internally as a DImode int. */
7001
7002 if (base)
7003 {
7004 rtx reg;
7005 reason_rtx = base;
7006
7007 if (REG_P (base))
7008 reg = base;
7009 else if (GET_CODE (base) == SUBREG
7010 && REG_P (SUBREG_REG (base))
7011 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7012 <= UNITS_PER_WORD)
7013 reg = SUBREG_REG (base);
7014 else
7015 {
7016 reason = "base is not a register";
7017 goto report_error;
7018 }
7019
7020 if (GET_MODE (base) != Pmode)
7021 {
7022 reason = "base is not in Pmode";
7023 goto report_error;
7024 }
7025
7026 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7027 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7028 {
7029 reason = "base is not valid";
7030 goto report_error;
7031 }
7032 }
7033
7034 /* Validate index register.
7035
7036 Don't allow SUBREG's that span more than a word here -- same as above. */
7037
7038 if (index)
7039 {
7040 rtx reg;
7041 reason_rtx = index;
7042
7043 if (REG_P (index))
7044 reg = index;
7045 else if (GET_CODE (index) == SUBREG
7046 && REG_P (SUBREG_REG (index))
7047 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7048 <= UNITS_PER_WORD)
7049 reg = SUBREG_REG (index);
7050 else
7051 {
7052 reason = "index is not a register";
7053 goto report_error;
7054 }
7055
7056 if (GET_MODE (index) != Pmode)
7057 {
7058 reason = "index is not in Pmode";
7059 goto report_error;
7060 }
7061
7062 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7063 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7064 {
7065 reason = "index is not valid";
7066 goto report_error;
7067 }
7068 }
7069
7070 /* Validate scale factor. */
7071 if (scale != 1)
7072 {
7073 reason_rtx = GEN_INT (scale);
7074 if (!index)
7075 {
7076 reason = "scale without index";
7077 goto report_error;
7078 }
7079
7080 if (scale != 2 && scale != 4 && scale != 8)
7081 {
7082 reason = "scale is not a valid multiplier";
7083 goto report_error;
7084 }
7085 }
7086
7087 /* Validate displacement. */
7088 if (disp)
7089 {
7090 reason_rtx = disp;
7091
7092 if (GET_CODE (disp) == CONST
7093 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7094 switch (XINT (XEXP (disp, 0), 1))
7095 {
7096 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7097 used. While ABI specify also 32bit relocations, we don't produce
7098 them at all and use IP relative instead. */
7099 case UNSPEC_GOT:
7100 case UNSPEC_GOTOFF:
7101 gcc_assert (flag_pic);
7102 if (!TARGET_64BIT)
7103 goto is_legitimate_pic;
7104 reason = "64bit address unspec";
7105 goto report_error;
7106
7107 case UNSPEC_GOTPCREL:
7108 gcc_assert (flag_pic);
7109 goto is_legitimate_pic;
7110
7111 case UNSPEC_GOTTPOFF:
7112 case UNSPEC_GOTNTPOFF:
7113 case UNSPEC_INDNTPOFF:
7114 case UNSPEC_NTPOFF:
7115 case UNSPEC_DTPOFF:
7116 break;
7117
7118 default:
7119 reason = "invalid address unspec";
7120 goto report_error;
7121 }
7122
7123 else if (SYMBOLIC_CONST (disp)
7124 && (flag_pic
7125 || (TARGET_MACHO
7126 #if TARGET_MACHO
7127 && MACHOPIC_INDIRECT
7128 && !machopic_operand_p (disp)
7129 #endif
7130 )))
7131 {
7132
7133 is_legitimate_pic:
7134 if (TARGET_64BIT && (index || base))
7135 {
7136 /* foo@dtpoff(%rX) is ok. */
7137 if (GET_CODE (disp) != CONST
7138 || GET_CODE (XEXP (disp, 0)) != PLUS
7139 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7140 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7141 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7142 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7143 {
7144 reason = "non-constant pic memory reference";
7145 goto report_error;
7146 }
7147 }
7148 else if (! legitimate_pic_address_disp_p (disp))
7149 {
7150 reason = "displacement is an invalid pic construct";
7151 goto report_error;
7152 }
7153
7154 /* This code used to verify that a symbolic pic displacement
7155 includes the pic_offset_table_rtx register.
7156
7157 While this is good idea, unfortunately these constructs may
7158 be created by "adds using lea" optimization for incorrect
7159 code like:
7160
7161 int a;
7162 int foo(int i)
7163 {
7164 return *(&a+i);
7165 }
7166
7167 This code is nonsensical, but results in addressing
7168 GOT table with pic_offset_table_rtx base. We can't
7169 just refuse it easily, since it gets matched by
7170 "addsi3" pattern, that later gets split to lea in the
7171 case output register differs from input. While this
7172 can be handled by separate addsi pattern for this case
7173 that never results in lea, this seems to be easier and
7174 correct fix for crash to disable this test. */
7175 }
7176 else if (GET_CODE (disp) != LABEL_REF
7177 && !CONST_INT_P (disp)
7178 && (GET_CODE (disp) != CONST
7179 || !legitimate_constant_p (disp))
7180 && (GET_CODE (disp) != SYMBOL_REF
7181 || !legitimate_constant_p (disp)))
7182 {
7183 reason = "displacement is not constant";
7184 goto report_error;
7185 }
7186 else if (TARGET_64BIT
7187 && !x86_64_immediate_operand (disp, VOIDmode))
7188 {
7189 reason = "displacement is out of range";
7190 goto report_error;
7191 }
7192 }
7193
7194 /* Everything looks valid. */
7195 return TRUE;
7196
7197 report_error:
7198 return FALSE;
7199 }
7200 \f
7201 /* Return a unique alias set for the GOT. */
7202
7203 static alias_set_type
7204 ix86_GOT_alias_set (void)
7205 {
7206 static alias_set_type set = -1;
7207 if (set == -1)
7208 set = new_alias_set ();
7209 return set;
7210 }
7211
7212 /* Return a legitimate reference for ORIG (an address) using the
7213 register REG. If REG is 0, a new pseudo is generated.
7214
7215 There are two types of references that must be handled:
7216
7217 1. Global data references must load the address from the GOT, via
7218 the PIC reg. An insn is emitted to do this load, and the reg is
7219 returned.
7220
7221 2. Static data references, constant pool addresses, and code labels
7222 compute the address as an offset from the GOT, whose base is in
7223 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7224 differentiate them from global data objects. The returned
7225 address is the PIC reg + an unspec constant.
7226
7227 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7228 reg also appears in the address. */
7229
7230 static rtx
7231 legitimize_pic_address (rtx orig, rtx reg)
7232 {
7233 rtx addr = orig;
7234 rtx new_rtx = orig;
7235 rtx base;
7236
7237 #if TARGET_MACHO
7238 if (TARGET_MACHO && !TARGET_64BIT)
7239 {
7240 if (reg == 0)
7241 reg = gen_reg_rtx (Pmode);
7242 /* Use the generic Mach-O PIC machinery. */
7243 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7244 }
7245 #endif
7246
7247 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7248 new_rtx = addr;
7249 else if (TARGET_64BIT
7250 && ix86_cmodel != CM_SMALL_PIC
7251 && gotoff_operand (addr, Pmode))
7252 {
7253 rtx tmpreg;
7254 /* This symbol may be referenced via a displacement from the PIC
7255 base address (@GOTOFF). */
7256
7257 if (reload_in_progress)
7258 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7259 if (GET_CODE (addr) == CONST)
7260 addr = XEXP (addr, 0);
7261 if (GET_CODE (addr) == PLUS)
7262 {
7263 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7264 UNSPEC_GOTOFF);
7265 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7266 }
7267 else
7268 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7269 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7270 if (!reg)
7271 tmpreg = gen_reg_rtx (Pmode);
7272 else
7273 tmpreg = reg;
7274 emit_move_insn (tmpreg, new_rtx);
7275
7276 if (reg != 0)
7277 {
7278 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7279 tmpreg, 1, OPTAB_DIRECT);
7280 new_rtx = reg;
7281 }
7282 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7283 }
7284 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7285 {
7286 /* This symbol may be referenced via a displacement from the PIC
7287 base address (@GOTOFF). */
7288
7289 if (reload_in_progress)
7290 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7291 if (GET_CODE (addr) == CONST)
7292 addr = XEXP (addr, 0);
7293 if (GET_CODE (addr) == PLUS)
7294 {
7295 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7296 UNSPEC_GOTOFF);
7297 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7298 }
7299 else
7300 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7301 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7302 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7303
7304 if (reg != 0)
7305 {
7306 emit_move_insn (reg, new_rtx);
7307 new_rtx = reg;
7308 }
7309 }
7310 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7311 /* We can't use @GOTOFF for text labels on VxWorks;
7312 see gotoff_operand. */
7313 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7314 {
7315 /* Given that we've already handled dllimport variables separately
7316 in legitimize_address, and all other variables should satisfy
7317 legitimate_pic_address_disp_p, we should never arrive here. */
7318 gcc_assert (!TARGET_64BIT_MS_ABI);
7319
7320 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7321 {
7322 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7323 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7324 new_rtx = gen_const_mem (Pmode, new_rtx);
7325 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7326
7327 if (reg == 0)
7328 reg = gen_reg_rtx (Pmode);
7329 /* Use directly gen_movsi, otherwise the address is loaded
7330 into register for CSE. We don't want to CSE this addresses,
7331 instead we CSE addresses from the GOT table, so skip this. */
7332 emit_insn (gen_movsi (reg, new_rtx));
7333 new_rtx = reg;
7334 }
7335 else
7336 {
7337 /* This symbol must be referenced via a load from the
7338 Global Offset Table (@GOT). */
7339
7340 if (reload_in_progress)
7341 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7342 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7343 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7344 if (TARGET_64BIT)
7345 new_rtx = force_reg (Pmode, new_rtx);
7346 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7347 new_rtx = gen_const_mem (Pmode, new_rtx);
7348 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7349
7350 if (reg == 0)
7351 reg = gen_reg_rtx (Pmode);
7352 emit_move_insn (reg, new_rtx);
7353 new_rtx = reg;
7354 }
7355 }
7356 else
7357 {
7358 if (CONST_INT_P (addr)
7359 && !x86_64_immediate_operand (addr, VOIDmode))
7360 {
7361 if (reg)
7362 {
7363 emit_move_insn (reg, addr);
7364 new_rtx = reg;
7365 }
7366 else
7367 new_rtx = force_reg (Pmode, addr);
7368 }
7369 else if (GET_CODE (addr) == CONST)
7370 {
7371 addr = XEXP (addr, 0);
7372
7373 /* We must match stuff we generate before. Assume the only
7374 unspecs that can get here are ours. Not that we could do
7375 anything with them anyway.... */
7376 if (GET_CODE (addr) == UNSPEC
7377 || (GET_CODE (addr) == PLUS
7378 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7379 return orig;
7380 gcc_assert (GET_CODE (addr) == PLUS);
7381 }
7382 if (GET_CODE (addr) == PLUS)
7383 {
7384 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7385
7386 /* Check first to see if this is a constant offset from a @GOTOFF
7387 symbol reference. */
7388 if (gotoff_operand (op0, Pmode)
7389 && CONST_INT_P (op1))
7390 {
7391 if (!TARGET_64BIT)
7392 {
7393 if (reload_in_progress)
7394 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7395 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7396 UNSPEC_GOTOFF);
7397 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7398 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7399 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7400
7401 if (reg != 0)
7402 {
7403 emit_move_insn (reg, new_rtx);
7404 new_rtx = reg;
7405 }
7406 }
7407 else
7408 {
7409 if (INTVAL (op1) < -16*1024*1024
7410 || INTVAL (op1) >= 16*1024*1024)
7411 {
7412 if (!x86_64_immediate_operand (op1, Pmode))
7413 op1 = force_reg (Pmode, op1);
7414 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7415 }
7416 }
7417 }
7418 else
7419 {
7420 base = legitimize_pic_address (XEXP (addr, 0), reg);
7421 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7422 base == reg ? NULL_RTX : reg);
7423
7424 if (CONST_INT_P (new_rtx))
7425 new_rtx = plus_constant (base, INTVAL (new_rtx));
7426 else
7427 {
7428 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7429 {
7430 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7431 new_rtx = XEXP (new_rtx, 1);
7432 }
7433 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7434 }
7435 }
7436 }
7437 }
7438 return new_rtx;
7439 }
7440 \f
7441 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7442
7443 static rtx
7444 get_thread_pointer (int to_reg)
7445 {
7446 rtx tp, reg, insn;
7447
7448 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7449 if (!to_reg)
7450 return tp;
7451
7452 reg = gen_reg_rtx (Pmode);
7453 insn = gen_rtx_SET (VOIDmode, reg, tp);
7454 insn = emit_insn (insn);
7455
7456 return reg;
7457 }
7458
7459 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7460 false if we expect this to be used for a memory address and true if
7461 we expect to load the address into a register. */
7462
7463 static rtx
7464 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7465 {
7466 rtx dest, base, off, pic, tp;
7467 int type;
7468
7469 switch (model)
7470 {
7471 case TLS_MODEL_GLOBAL_DYNAMIC:
7472 dest = gen_reg_rtx (Pmode);
7473 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7474
7475 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7476 {
7477 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7478
7479 start_sequence ();
7480 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7481 insns = get_insns ();
7482 end_sequence ();
7483
7484 CONST_OR_PURE_CALL_P (insns) = 1;
7485 emit_libcall_block (insns, dest, rax, x);
7486 }
7487 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7488 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7489 else
7490 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7491
7492 if (TARGET_GNU2_TLS)
7493 {
7494 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7495
7496 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7497 }
7498 break;
7499
7500 case TLS_MODEL_LOCAL_DYNAMIC:
7501 base = gen_reg_rtx (Pmode);
7502 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7503
7504 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7505 {
7506 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7507
7508 start_sequence ();
7509 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7510 insns = get_insns ();
7511 end_sequence ();
7512
7513 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7514 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7515 CONST_OR_PURE_CALL_P (insns) = 1;
7516 emit_libcall_block (insns, base, rax, note);
7517 }
7518 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7519 emit_insn (gen_tls_local_dynamic_base_64 (base));
7520 else
7521 emit_insn (gen_tls_local_dynamic_base_32 (base));
7522
7523 if (TARGET_GNU2_TLS)
7524 {
7525 rtx x = ix86_tls_module_base ();
7526
7527 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7528 gen_rtx_MINUS (Pmode, x, tp));
7529 }
7530
7531 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7532 off = gen_rtx_CONST (Pmode, off);
7533
7534 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7535
7536 if (TARGET_GNU2_TLS)
7537 {
7538 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7539
7540 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7541 }
7542
7543 break;
7544
7545 case TLS_MODEL_INITIAL_EXEC:
7546 if (TARGET_64BIT)
7547 {
7548 pic = NULL;
7549 type = UNSPEC_GOTNTPOFF;
7550 }
7551 else if (flag_pic)
7552 {
7553 if (reload_in_progress)
7554 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7555 pic = pic_offset_table_rtx;
7556 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7557 }
7558 else if (!TARGET_ANY_GNU_TLS)
7559 {
7560 pic = gen_reg_rtx (Pmode);
7561 emit_insn (gen_set_got (pic));
7562 type = UNSPEC_GOTTPOFF;
7563 }
7564 else
7565 {
7566 pic = NULL;
7567 type = UNSPEC_INDNTPOFF;
7568 }
7569
7570 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7571 off = gen_rtx_CONST (Pmode, off);
7572 if (pic)
7573 off = gen_rtx_PLUS (Pmode, pic, off);
7574 off = gen_const_mem (Pmode, off);
7575 set_mem_alias_set (off, ix86_GOT_alias_set ());
7576
7577 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7578 {
7579 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7580 off = force_reg (Pmode, off);
7581 return gen_rtx_PLUS (Pmode, base, off);
7582 }
7583 else
7584 {
7585 base = get_thread_pointer (true);
7586 dest = gen_reg_rtx (Pmode);
7587 emit_insn (gen_subsi3 (dest, base, off));
7588 }
7589 break;
7590
7591 case TLS_MODEL_LOCAL_EXEC:
7592 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7593 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7594 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7595 off = gen_rtx_CONST (Pmode, off);
7596
7597 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7598 {
7599 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7600 return gen_rtx_PLUS (Pmode, base, off);
7601 }
7602 else
7603 {
7604 base = get_thread_pointer (true);
7605 dest = gen_reg_rtx (Pmode);
7606 emit_insn (gen_subsi3 (dest, base, off));
7607 }
7608 break;
7609
7610 default:
7611 gcc_unreachable ();
7612 }
7613
7614 return dest;
7615 }
7616
7617 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7618 to symbol DECL. */
7619
7620 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7621 htab_t dllimport_map;
7622
7623 static tree
7624 get_dllimport_decl (tree decl)
7625 {
7626 struct tree_map *h, in;
7627 void **loc;
7628 const char *name;
7629 const char *prefix;
7630 size_t namelen, prefixlen;
7631 char *imp_name;
7632 tree to;
7633 rtx rtl;
7634
7635 if (!dllimport_map)
7636 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7637
7638 in.hash = htab_hash_pointer (decl);
7639 in.base.from = decl;
7640 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7641 h = (struct tree_map *) *loc;
7642 if (h)
7643 return h->to;
7644
7645 *loc = h = GGC_NEW (struct tree_map);
7646 h->hash = in.hash;
7647 h->base.from = decl;
7648 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7649 DECL_ARTIFICIAL (to) = 1;
7650 DECL_IGNORED_P (to) = 1;
7651 DECL_EXTERNAL (to) = 1;
7652 TREE_READONLY (to) = 1;
7653
7654 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7655 name = targetm.strip_name_encoding (name);
7656 if (name[0] == FASTCALL_PREFIX)
7657 {
7658 name++;
7659 prefix = "*__imp_";
7660 }
7661 else
7662 prefix = "*__imp__";
7663
7664 namelen = strlen (name);
7665 prefixlen = strlen (prefix);
7666 imp_name = (char *) alloca (namelen + prefixlen + 1);
7667 memcpy (imp_name, prefix, prefixlen);
7668 memcpy (imp_name + prefixlen, name, namelen + 1);
7669
7670 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7671 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7672 SET_SYMBOL_REF_DECL (rtl, to);
7673 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7674
7675 rtl = gen_const_mem (Pmode, rtl);
7676 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7677
7678 SET_DECL_RTL (to, rtl);
7679
7680 return to;
7681 }
7682
7683 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7684 true if we require the result be a register. */
7685
7686 static rtx
7687 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7688 {
7689 tree imp_decl;
7690 rtx x;
7691
7692 gcc_assert (SYMBOL_REF_DECL (symbol));
7693 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7694
7695 x = DECL_RTL (imp_decl);
7696 if (want_reg)
7697 x = force_reg (Pmode, x);
7698 return x;
7699 }
7700
7701 /* Try machine-dependent ways of modifying an illegitimate address
7702 to be legitimate. If we find one, return the new, valid address.
7703 This macro is used in only one place: `memory_address' in explow.c.
7704
7705 OLDX is the address as it was before break_out_memory_refs was called.
7706 In some cases it is useful to look at this to decide what needs to be done.
7707
7708 MODE and WIN are passed so that this macro can use
7709 GO_IF_LEGITIMATE_ADDRESS.
7710
7711 It is always safe for this macro to do nothing. It exists to recognize
7712 opportunities to optimize the output.
7713
7714 For the 80386, we handle X+REG by loading X into a register R and
7715 using R+REG. R will go in a general reg and indexing will be used.
7716 However, if REG is a broken-out memory address or multiplication,
7717 nothing needs to be done because REG can certainly go in a general reg.
7718
7719 When -fpic is used, special handling is needed for symbolic references.
7720 See comments by legitimize_pic_address in i386.c for details. */
7721
7722 rtx
7723 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7724 {
7725 int changed = 0;
7726 unsigned log;
7727
7728 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7729 if (log)
7730 return legitimize_tls_address (x, (enum tls_model) log, false);
7731 if (GET_CODE (x) == CONST
7732 && GET_CODE (XEXP (x, 0)) == PLUS
7733 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7734 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7735 {
7736 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7737 (enum tls_model) log, false);
7738 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7739 }
7740
7741 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7742 {
7743 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7744 return legitimize_dllimport_symbol (x, true);
7745 if (GET_CODE (x) == CONST
7746 && GET_CODE (XEXP (x, 0)) == PLUS
7747 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7748 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7749 {
7750 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7751 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7752 }
7753 }
7754
7755 if (flag_pic && SYMBOLIC_CONST (x))
7756 return legitimize_pic_address (x, 0);
7757
7758 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7759 if (GET_CODE (x) == ASHIFT
7760 && CONST_INT_P (XEXP (x, 1))
7761 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7762 {
7763 changed = 1;
7764 log = INTVAL (XEXP (x, 1));
7765 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7766 GEN_INT (1 << log));
7767 }
7768
7769 if (GET_CODE (x) == PLUS)
7770 {
7771 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7772
7773 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7774 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7775 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7776 {
7777 changed = 1;
7778 log = INTVAL (XEXP (XEXP (x, 0), 1));
7779 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7780 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7781 GEN_INT (1 << log));
7782 }
7783
7784 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7785 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7786 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7787 {
7788 changed = 1;
7789 log = INTVAL (XEXP (XEXP (x, 1), 1));
7790 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7791 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7792 GEN_INT (1 << log));
7793 }
7794
7795 /* Put multiply first if it isn't already. */
7796 if (GET_CODE (XEXP (x, 1)) == MULT)
7797 {
7798 rtx tmp = XEXP (x, 0);
7799 XEXP (x, 0) = XEXP (x, 1);
7800 XEXP (x, 1) = tmp;
7801 changed = 1;
7802 }
7803
7804 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7805 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7806 created by virtual register instantiation, register elimination, and
7807 similar optimizations. */
7808 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7809 {
7810 changed = 1;
7811 x = gen_rtx_PLUS (Pmode,
7812 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7813 XEXP (XEXP (x, 1), 0)),
7814 XEXP (XEXP (x, 1), 1));
7815 }
7816
7817 /* Canonicalize
7818 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7819 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7820 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7821 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7822 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7823 && CONSTANT_P (XEXP (x, 1)))
7824 {
7825 rtx constant;
7826 rtx other = NULL_RTX;
7827
7828 if (CONST_INT_P (XEXP (x, 1)))
7829 {
7830 constant = XEXP (x, 1);
7831 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7832 }
7833 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7834 {
7835 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7836 other = XEXP (x, 1);
7837 }
7838 else
7839 constant = 0;
7840
7841 if (constant)
7842 {
7843 changed = 1;
7844 x = gen_rtx_PLUS (Pmode,
7845 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7846 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7847 plus_constant (other, INTVAL (constant)));
7848 }
7849 }
7850
7851 if (changed && legitimate_address_p (mode, x, FALSE))
7852 return x;
7853
7854 if (GET_CODE (XEXP (x, 0)) == MULT)
7855 {
7856 changed = 1;
7857 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7858 }
7859
7860 if (GET_CODE (XEXP (x, 1)) == MULT)
7861 {
7862 changed = 1;
7863 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7864 }
7865
7866 if (changed
7867 && REG_P (XEXP (x, 1))
7868 && REG_P (XEXP (x, 0)))
7869 return x;
7870
7871 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7872 {
7873 changed = 1;
7874 x = legitimize_pic_address (x, 0);
7875 }
7876
7877 if (changed && legitimate_address_p (mode, x, FALSE))
7878 return x;
7879
7880 if (REG_P (XEXP (x, 0)))
7881 {
7882 rtx temp = gen_reg_rtx (Pmode);
7883 rtx val = force_operand (XEXP (x, 1), temp);
7884 if (val != temp)
7885 emit_move_insn (temp, val);
7886
7887 XEXP (x, 1) = temp;
7888 return x;
7889 }
7890
7891 else if (REG_P (XEXP (x, 1)))
7892 {
7893 rtx temp = gen_reg_rtx (Pmode);
7894 rtx val = force_operand (XEXP (x, 0), temp);
7895 if (val != temp)
7896 emit_move_insn (temp, val);
7897
7898 XEXP (x, 0) = temp;
7899 return x;
7900 }
7901 }
7902
7903 return x;
7904 }
7905 \f
7906 /* Print an integer constant expression in assembler syntax. Addition
7907 and subtraction are the only arithmetic that may appear in these
7908 expressions. FILE is the stdio stream to write to, X is the rtx, and
7909 CODE is the operand print code from the output string. */
7910
7911 static void
7912 output_pic_addr_const (FILE *file, rtx x, int code)
7913 {
7914 char buf[256];
7915
7916 switch (GET_CODE (x))
7917 {
7918 case PC:
7919 gcc_assert (flag_pic);
7920 putc ('.', file);
7921 break;
7922
7923 case SYMBOL_REF:
7924 if (! TARGET_MACHO || TARGET_64BIT)
7925 output_addr_const (file, x);
7926 else
7927 {
7928 const char *name = XSTR (x, 0);
7929
7930 /* Mark the decl as referenced so that cgraph will
7931 output the function. */
7932 if (SYMBOL_REF_DECL (x))
7933 mark_decl_referenced (SYMBOL_REF_DECL (x));
7934
7935 #if TARGET_MACHO
7936 if (MACHOPIC_INDIRECT
7937 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7938 name = machopic_indirection_name (x, /*stub_p=*/true);
7939 #endif
7940 assemble_name (file, name);
7941 }
7942 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7943 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7944 fputs ("@PLT", file);
7945 break;
7946
7947 case LABEL_REF:
7948 x = XEXP (x, 0);
7949 /* FALLTHRU */
7950 case CODE_LABEL:
7951 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7952 assemble_name (asm_out_file, buf);
7953 break;
7954
7955 case CONST_INT:
7956 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7957 break;
7958
7959 case CONST:
7960 /* This used to output parentheses around the expression,
7961 but that does not work on the 386 (either ATT or BSD assembler). */
7962 output_pic_addr_const (file, XEXP (x, 0), code);
7963 break;
7964
7965 case CONST_DOUBLE:
7966 if (GET_MODE (x) == VOIDmode)
7967 {
7968 /* We can use %d if the number is <32 bits and positive. */
7969 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7970 fprintf (file, "0x%lx%08lx",
7971 (unsigned long) CONST_DOUBLE_HIGH (x),
7972 (unsigned long) CONST_DOUBLE_LOW (x));
7973 else
7974 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7975 }
7976 else
7977 /* We can't handle floating point constants;
7978 PRINT_OPERAND must handle them. */
7979 output_operand_lossage ("floating constant misused");
7980 break;
7981
7982 case PLUS:
7983 /* Some assemblers need integer constants to appear first. */
7984 if (CONST_INT_P (XEXP (x, 0)))
7985 {
7986 output_pic_addr_const (file, XEXP (x, 0), code);
7987 putc ('+', file);
7988 output_pic_addr_const (file, XEXP (x, 1), code);
7989 }
7990 else
7991 {
7992 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7993 output_pic_addr_const (file, XEXP (x, 1), code);
7994 putc ('+', file);
7995 output_pic_addr_const (file, XEXP (x, 0), code);
7996 }
7997 break;
7998
7999 case MINUS:
8000 if (!TARGET_MACHO)
8001 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
8002 output_pic_addr_const (file, XEXP (x, 0), code);
8003 putc ('-', file);
8004 output_pic_addr_const (file, XEXP (x, 1), code);
8005 if (!TARGET_MACHO)
8006 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8007 break;
8008
8009 case UNSPEC:
8010 gcc_assert (XVECLEN (x, 0) == 1);
8011 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8012 switch (XINT (x, 1))
8013 {
8014 case UNSPEC_GOT:
8015 fputs ("@GOT", file);
8016 break;
8017 case UNSPEC_GOTOFF:
8018 fputs ("@GOTOFF", file);
8019 break;
8020 case UNSPEC_PLTOFF:
8021 fputs ("@PLTOFF", file);
8022 break;
8023 case UNSPEC_GOTPCREL:
8024 fputs ("@GOTPCREL(%rip)", file);
8025 break;
8026 case UNSPEC_GOTTPOFF:
8027 /* FIXME: This might be @TPOFF in Sun ld too. */
8028 fputs ("@GOTTPOFF", file);
8029 break;
8030 case UNSPEC_TPOFF:
8031 fputs ("@TPOFF", file);
8032 break;
8033 case UNSPEC_NTPOFF:
8034 if (TARGET_64BIT)
8035 fputs ("@TPOFF", file);
8036 else
8037 fputs ("@NTPOFF", file);
8038 break;
8039 case UNSPEC_DTPOFF:
8040 fputs ("@DTPOFF", file);
8041 break;
8042 case UNSPEC_GOTNTPOFF:
8043 if (TARGET_64BIT)
8044 fputs ("@GOTTPOFF(%rip)", file);
8045 else
8046 fputs ("@GOTNTPOFF", file);
8047 break;
8048 case UNSPEC_INDNTPOFF:
8049 fputs ("@INDNTPOFF", file);
8050 break;
8051 default:
8052 output_operand_lossage ("invalid UNSPEC as operand");
8053 break;
8054 }
8055 break;
8056
8057 default:
8058 output_operand_lossage ("invalid expression as operand");
8059 }
8060 }
8061
8062 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8063 We need to emit DTP-relative relocations. */
8064
8065 static void ATTRIBUTE_UNUSED
8066 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8067 {
8068 fputs (ASM_LONG, file);
8069 output_addr_const (file, x);
8070 fputs ("@DTPOFF", file);
8071 switch (size)
8072 {
8073 case 4:
8074 break;
8075 case 8:
8076 fputs (", 0", file);
8077 break;
8078 default:
8079 gcc_unreachable ();
8080 }
8081 }
8082
8083 /* In the name of slightly smaller debug output, and to cater to
8084 general assembler lossage, recognize PIC+GOTOFF and turn it back
8085 into a direct symbol reference.
8086
8087 On Darwin, this is necessary to avoid a crash, because Darwin
8088 has a different PIC label for each routine but the DWARF debugging
8089 information is not associated with any particular routine, so it's
8090 necessary to remove references to the PIC label from RTL stored by
8091 the DWARF output code. */
8092
8093 static rtx
8094 ix86_delegitimize_address (rtx orig_x)
8095 {
8096 rtx x = orig_x;
8097 /* reg_addend is NULL or a multiple of some register. */
8098 rtx reg_addend = NULL_RTX;
8099 /* const_addend is NULL or a const_int. */
8100 rtx const_addend = NULL_RTX;
8101 /* This is the result, or NULL. */
8102 rtx result = NULL_RTX;
8103
8104 if (MEM_P (x))
8105 x = XEXP (x, 0);
8106
8107 if (TARGET_64BIT)
8108 {
8109 if (GET_CODE (x) != CONST
8110 || GET_CODE (XEXP (x, 0)) != UNSPEC
8111 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8112 || !MEM_P (orig_x))
8113 return orig_x;
8114 return XVECEXP (XEXP (x, 0), 0, 0);
8115 }
8116
8117 if (GET_CODE (x) != PLUS
8118 || GET_CODE (XEXP (x, 1)) != CONST)
8119 return orig_x;
8120
8121 if (REG_P (XEXP (x, 0))
8122 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8123 /* %ebx + GOT/GOTOFF */
8124 ;
8125 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8126 {
8127 /* %ebx + %reg * scale + GOT/GOTOFF */
8128 reg_addend = XEXP (x, 0);
8129 if (REG_P (XEXP (reg_addend, 0))
8130 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8131 reg_addend = XEXP (reg_addend, 1);
8132 else if (REG_P (XEXP (reg_addend, 1))
8133 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8134 reg_addend = XEXP (reg_addend, 0);
8135 else
8136 return orig_x;
8137 if (!REG_P (reg_addend)
8138 && GET_CODE (reg_addend) != MULT
8139 && GET_CODE (reg_addend) != ASHIFT)
8140 return orig_x;
8141 }
8142 else
8143 return orig_x;
8144
8145 x = XEXP (XEXP (x, 1), 0);
8146 if (GET_CODE (x) == PLUS
8147 && CONST_INT_P (XEXP (x, 1)))
8148 {
8149 const_addend = XEXP (x, 1);
8150 x = XEXP (x, 0);
8151 }
8152
8153 if (GET_CODE (x) == UNSPEC
8154 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8155 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8156 result = XVECEXP (x, 0, 0);
8157
8158 if (TARGET_MACHO && darwin_local_data_pic (x)
8159 && !MEM_P (orig_x))
8160 result = XEXP (x, 0);
8161
8162 if (! result)
8163 return orig_x;
8164
8165 if (const_addend)
8166 result = gen_rtx_PLUS (Pmode, result, const_addend);
8167 if (reg_addend)
8168 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8169 return result;
8170 }
8171
8172 /* If X is a machine specific address (i.e. a symbol or label being
8173 referenced as a displacement from the GOT implemented using an
8174 UNSPEC), then return the base term. Otherwise return X. */
8175
8176 rtx
8177 ix86_find_base_term (rtx x)
8178 {
8179 rtx term;
8180
8181 if (TARGET_64BIT)
8182 {
8183 if (GET_CODE (x) != CONST)
8184 return x;
8185 term = XEXP (x, 0);
8186 if (GET_CODE (term) == PLUS
8187 && (CONST_INT_P (XEXP (term, 1))
8188 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8189 term = XEXP (term, 0);
8190 if (GET_CODE (term) != UNSPEC
8191 || XINT (term, 1) != UNSPEC_GOTPCREL)
8192 return x;
8193
8194 term = XVECEXP (term, 0, 0);
8195
8196 if (GET_CODE (term) != SYMBOL_REF
8197 && GET_CODE (term) != LABEL_REF)
8198 return x;
8199
8200 return term;
8201 }
8202
8203 term = ix86_delegitimize_address (x);
8204
8205 if (GET_CODE (term) != SYMBOL_REF
8206 && GET_CODE (term) != LABEL_REF)
8207 return x;
8208
8209 return term;
8210 }
8211 \f
8212 static void
8213 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8214 int fp, FILE *file)
8215 {
8216 const char *suffix;
8217
8218 if (mode == CCFPmode || mode == CCFPUmode)
8219 {
8220 enum rtx_code second_code, bypass_code;
8221 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8222 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8223 code = ix86_fp_compare_code_to_integer (code);
8224 mode = CCmode;
8225 }
8226 if (reverse)
8227 code = reverse_condition (code);
8228
8229 switch (code)
8230 {
8231 case EQ:
8232 switch (mode)
8233 {
8234 case CCAmode:
8235 suffix = "a";
8236 break;
8237
8238 case CCCmode:
8239 suffix = "c";
8240 break;
8241
8242 case CCOmode:
8243 suffix = "o";
8244 break;
8245
8246 case CCSmode:
8247 suffix = "s";
8248 break;
8249
8250 default:
8251 suffix = "e";
8252 }
8253 break;
8254 case NE:
8255 switch (mode)
8256 {
8257 case CCAmode:
8258 suffix = "na";
8259 break;
8260
8261 case CCCmode:
8262 suffix = "nc";
8263 break;
8264
8265 case CCOmode:
8266 suffix = "no";
8267 break;
8268
8269 case CCSmode:
8270 suffix = "ns";
8271 break;
8272
8273 default:
8274 suffix = "ne";
8275 }
8276 break;
8277 case GT:
8278 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8279 suffix = "g";
8280 break;
8281 case GTU:
8282 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8283 Those same assemblers have the same but opposite lossage on cmov. */
8284 if (mode == CCmode)
8285 suffix = fp ? "nbe" : "a";
8286 else if (mode == CCCmode)
8287 suffix = "b";
8288 else
8289 gcc_unreachable ();
8290 break;
8291 case LT:
8292 switch (mode)
8293 {
8294 case CCNOmode:
8295 case CCGOCmode:
8296 suffix = "s";
8297 break;
8298
8299 case CCmode:
8300 case CCGCmode:
8301 suffix = "l";
8302 break;
8303
8304 default:
8305 gcc_unreachable ();
8306 }
8307 break;
8308 case LTU:
8309 gcc_assert (mode == CCmode || mode == CCCmode);
8310 suffix = "b";
8311 break;
8312 case GE:
8313 switch (mode)
8314 {
8315 case CCNOmode:
8316 case CCGOCmode:
8317 suffix = "ns";
8318 break;
8319
8320 case CCmode:
8321 case CCGCmode:
8322 suffix = "ge";
8323 break;
8324
8325 default:
8326 gcc_unreachable ();
8327 }
8328 break;
8329 case GEU:
8330 /* ??? As above. */
8331 gcc_assert (mode == CCmode || mode == CCCmode);
8332 suffix = fp ? "nb" : "ae";
8333 break;
8334 case LE:
8335 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8336 suffix = "le";
8337 break;
8338 case LEU:
8339 /* ??? As above. */
8340 if (mode == CCmode)
8341 suffix = "be";
8342 else if (mode == CCCmode)
8343 suffix = fp ? "nb" : "ae";
8344 else
8345 gcc_unreachable ();
8346 break;
8347 case UNORDERED:
8348 suffix = fp ? "u" : "p";
8349 break;
8350 case ORDERED:
8351 suffix = fp ? "nu" : "np";
8352 break;
8353 default:
8354 gcc_unreachable ();
8355 }
8356 fputs (suffix, file);
8357 }
8358
8359 /* Print the name of register X to FILE based on its machine mode and number.
8360 If CODE is 'w', pretend the mode is HImode.
8361 If CODE is 'b', pretend the mode is QImode.
8362 If CODE is 'k', pretend the mode is SImode.
8363 If CODE is 'q', pretend the mode is DImode.
8364 If CODE is 'h', pretend the reg is the 'high' byte register.
8365 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8366
8367 void
8368 print_reg (rtx x, int code, FILE *file)
8369 {
8370 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8371 && REGNO (x) != FRAME_POINTER_REGNUM
8372 && REGNO (x) != FLAGS_REG
8373 && REGNO (x) != FPSR_REG
8374 && REGNO (x) != FPCR_REG);
8375
8376 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8377 putc ('%', file);
8378
8379 if (code == 'w' || MMX_REG_P (x))
8380 code = 2;
8381 else if (code == 'b')
8382 code = 1;
8383 else if (code == 'k')
8384 code = 4;
8385 else if (code == 'q')
8386 code = 8;
8387 else if (code == 'y')
8388 code = 3;
8389 else if (code == 'h')
8390 code = 0;
8391 else
8392 code = GET_MODE_SIZE (GET_MODE (x));
8393
8394 /* Irritatingly, AMD extended registers use different naming convention
8395 from the normal registers. */
8396 if (REX_INT_REG_P (x))
8397 {
8398 gcc_assert (TARGET_64BIT);
8399 switch (code)
8400 {
8401 case 0:
8402 error ("extended registers have no high halves");
8403 break;
8404 case 1:
8405 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8406 break;
8407 case 2:
8408 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8409 break;
8410 case 4:
8411 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8412 break;
8413 case 8:
8414 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8415 break;
8416 default:
8417 error ("unsupported operand size for extended register");
8418 break;
8419 }
8420 return;
8421 }
8422 switch (code)
8423 {
8424 case 3:
8425 if (STACK_TOP_P (x))
8426 {
8427 fputs ("st(0)", file);
8428 break;
8429 }
8430 /* FALLTHRU */
8431 case 8:
8432 case 4:
8433 case 12:
8434 if (! ANY_FP_REG_P (x))
8435 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8436 /* FALLTHRU */
8437 case 16:
8438 case 2:
8439 normal:
8440 fputs (hi_reg_name[REGNO (x)], file);
8441 break;
8442 case 1:
8443 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8444 goto normal;
8445 fputs (qi_reg_name[REGNO (x)], file);
8446 break;
8447 case 0:
8448 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8449 goto normal;
8450 fputs (qi_high_reg_name[REGNO (x)], file);
8451 break;
8452 default:
8453 gcc_unreachable ();
8454 }
8455 }
8456
8457 /* Locate some local-dynamic symbol still in use by this function
8458 so that we can print its name in some tls_local_dynamic_base
8459 pattern. */
8460
8461 static int
8462 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8463 {
8464 rtx x = *px;
8465
8466 if (GET_CODE (x) == SYMBOL_REF
8467 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8468 {
8469 cfun->machine->some_ld_name = XSTR (x, 0);
8470 return 1;
8471 }
8472
8473 return 0;
8474 }
8475
8476 static const char *
8477 get_some_local_dynamic_name (void)
8478 {
8479 rtx insn;
8480
8481 if (cfun->machine->some_ld_name)
8482 return cfun->machine->some_ld_name;
8483
8484 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8485 if (INSN_P (insn)
8486 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8487 return cfun->machine->some_ld_name;
8488
8489 gcc_unreachable ();
8490 }
8491
8492 /* Meaning of CODE:
8493 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8494 C -- print opcode suffix for set/cmov insn.
8495 c -- like C, but print reversed condition
8496 F,f -- likewise, but for floating-point.
8497 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8498 otherwise nothing
8499 R -- print the prefix for register names.
8500 z -- print the opcode suffix for the size of the current operand.
8501 * -- print a star (in certain assembler syntax)
8502 A -- print an absolute memory reference.
8503 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8504 s -- print a shift double count, followed by the assemblers argument
8505 delimiter.
8506 b -- print the QImode name of the register for the indicated operand.
8507 %b0 would print %al if operands[0] is reg 0.
8508 w -- likewise, print the HImode name of the register.
8509 k -- likewise, print the SImode name of the register.
8510 q -- likewise, print the DImode name of the register.
8511 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8512 y -- print "st(0)" instead of "st" as a register.
8513 D -- print condition for SSE cmp instruction.
8514 P -- if PIC, print an @PLT suffix.
8515 X -- don't print any sort of PIC '@' suffix for a symbol.
8516 & -- print some in-use local-dynamic symbol name.
8517 H -- print a memory address offset by 8; used for sse high-parts
8518 + -- print a branch hint as 'cs' or 'ds' prefix
8519 ; -- print a semicolon (after prefixes due to bug in older gas).
8520 */
8521
8522 void
8523 print_operand (FILE *file, rtx x, int code)
8524 {
8525 if (code)
8526 {
8527 switch (code)
8528 {
8529 case '*':
8530 if (ASSEMBLER_DIALECT == ASM_ATT)
8531 putc ('*', file);
8532 return;
8533
8534 case '&':
8535 assemble_name (file, get_some_local_dynamic_name ());
8536 return;
8537
8538 case 'A':
8539 switch (ASSEMBLER_DIALECT)
8540 {
8541 case ASM_ATT:
8542 putc ('*', file);
8543 break;
8544
8545 case ASM_INTEL:
8546 /* Intel syntax. For absolute addresses, registers should not
8547 be surrounded by braces. */
8548 if (!REG_P (x))
8549 {
8550 putc ('[', file);
8551 PRINT_OPERAND (file, x, 0);
8552 putc (']', file);
8553 return;
8554 }
8555 break;
8556
8557 default:
8558 gcc_unreachable ();
8559 }
8560
8561 PRINT_OPERAND (file, x, 0);
8562 return;
8563
8564
8565 case 'L':
8566 if (ASSEMBLER_DIALECT == ASM_ATT)
8567 putc ('l', file);
8568 return;
8569
8570 case 'W':
8571 if (ASSEMBLER_DIALECT == ASM_ATT)
8572 putc ('w', file);
8573 return;
8574
8575 case 'B':
8576 if (ASSEMBLER_DIALECT == ASM_ATT)
8577 putc ('b', file);
8578 return;
8579
8580 case 'Q':
8581 if (ASSEMBLER_DIALECT == ASM_ATT)
8582 putc ('l', file);
8583 return;
8584
8585 case 'S':
8586 if (ASSEMBLER_DIALECT == ASM_ATT)
8587 putc ('s', file);
8588 return;
8589
8590 case 'T':
8591 if (ASSEMBLER_DIALECT == ASM_ATT)
8592 putc ('t', file);
8593 return;
8594
8595 case 'z':
8596 /* 387 opcodes don't get size suffixes if the operands are
8597 registers. */
8598 if (STACK_REG_P (x))
8599 return;
8600
8601 /* Likewise if using Intel opcodes. */
8602 if (ASSEMBLER_DIALECT == ASM_INTEL)
8603 return;
8604
8605 /* This is the size of op from size of operand. */
8606 switch (GET_MODE_SIZE (GET_MODE (x)))
8607 {
8608 case 1:
8609 putc ('b', file);
8610 return;
8611
8612 case 2:
8613 if (MEM_P (x))
8614 {
8615 #ifdef HAVE_GAS_FILDS_FISTS
8616 putc ('s', file);
8617 #endif
8618 return;
8619 }
8620 else
8621 putc ('w', file);
8622 return;
8623
8624 case 4:
8625 if (GET_MODE (x) == SFmode)
8626 {
8627 putc ('s', file);
8628 return;
8629 }
8630 else
8631 putc ('l', file);
8632 return;
8633
8634 case 12:
8635 case 16:
8636 putc ('t', file);
8637 return;
8638
8639 case 8:
8640 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8641 {
8642 #ifdef GAS_MNEMONICS
8643 putc ('q', file);
8644 #else
8645 putc ('l', file);
8646 putc ('l', file);
8647 #endif
8648 }
8649 else
8650 putc ('l', file);
8651 return;
8652
8653 default:
8654 gcc_unreachable ();
8655 }
8656
8657 case 'b':
8658 case 'w':
8659 case 'k':
8660 case 'q':
8661 case 'h':
8662 case 'y':
8663 case 'X':
8664 case 'P':
8665 break;
8666
8667 case 's':
8668 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8669 {
8670 PRINT_OPERAND (file, x, 0);
8671 putc (',', file);
8672 }
8673 return;
8674
8675 case 'D':
8676 /* Little bit of braindamage here. The SSE compare instructions
8677 does use completely different names for the comparisons that the
8678 fp conditional moves. */
8679 switch (GET_CODE (x))
8680 {
8681 case EQ:
8682 case UNEQ:
8683 fputs ("eq", file);
8684 break;
8685 case LT:
8686 case UNLT:
8687 fputs ("lt", file);
8688 break;
8689 case LE:
8690 case UNLE:
8691 fputs ("le", file);
8692 break;
8693 case UNORDERED:
8694 fputs ("unord", file);
8695 break;
8696 case NE:
8697 case LTGT:
8698 fputs ("neq", file);
8699 break;
8700 case UNGE:
8701 case GE:
8702 fputs ("nlt", file);
8703 break;
8704 case UNGT:
8705 case GT:
8706 fputs ("nle", file);
8707 break;
8708 case ORDERED:
8709 fputs ("ord", file);
8710 break;
8711 default:
8712 gcc_unreachable ();
8713 }
8714 return;
8715 case 'O':
8716 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8717 if (ASSEMBLER_DIALECT == ASM_ATT)
8718 {
8719 switch (GET_MODE (x))
8720 {
8721 case HImode: putc ('w', file); break;
8722 case SImode:
8723 case SFmode: putc ('l', file); break;
8724 case DImode:
8725 case DFmode: putc ('q', file); break;
8726 default: gcc_unreachable ();
8727 }
8728 putc ('.', file);
8729 }
8730 #endif
8731 return;
8732 case 'C':
8733 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8734 return;
8735 case 'F':
8736 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8737 if (ASSEMBLER_DIALECT == ASM_ATT)
8738 putc ('.', file);
8739 #endif
8740 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8741 return;
8742
8743 /* Like above, but reverse condition */
8744 case 'c':
8745 /* Check to see if argument to %c is really a constant
8746 and not a condition code which needs to be reversed. */
8747 if (!COMPARISON_P (x))
8748 {
8749 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8750 return;
8751 }
8752 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8753 return;
8754 case 'f':
8755 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8756 if (ASSEMBLER_DIALECT == ASM_ATT)
8757 putc ('.', file);
8758 #endif
8759 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8760 return;
8761
8762 case 'H':
8763 /* It doesn't actually matter what mode we use here, as we're
8764 only going to use this for printing. */
8765 x = adjust_address_nv (x, DImode, 8);
8766 break;
8767
8768 case '+':
8769 {
8770 rtx x;
8771
8772 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8773 return;
8774
8775 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8776 if (x)
8777 {
8778 int pred_val = INTVAL (XEXP (x, 0));
8779
8780 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8781 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8782 {
8783 int taken = pred_val > REG_BR_PROB_BASE / 2;
8784 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8785
8786 /* Emit hints only in the case default branch prediction
8787 heuristics would fail. */
8788 if (taken != cputaken)
8789 {
8790 /* We use 3e (DS) prefix for taken branches and
8791 2e (CS) prefix for not taken branches. */
8792 if (taken)
8793 fputs ("ds ; ", file);
8794 else
8795 fputs ("cs ; ", file);
8796 }
8797 }
8798 }
8799 return;
8800 }
8801
8802 case ';':
8803 #if TARGET_MACHO
8804 fputs (" ; ", file);
8805 #else
8806 fputc (' ', file);
8807 #endif
8808 return;
8809
8810 default:
8811 output_operand_lossage ("invalid operand code '%c'", code);
8812 }
8813 }
8814
8815 if (REG_P (x))
8816 print_reg (x, code, file);
8817
8818 else if (MEM_P (x))
8819 {
8820 /* No `byte ptr' prefix for call instructions. */
8821 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8822 {
8823 const char * size;
8824 switch (GET_MODE_SIZE (GET_MODE (x)))
8825 {
8826 case 1: size = "BYTE"; break;
8827 case 2: size = "WORD"; break;
8828 case 4: size = "DWORD"; break;
8829 case 8: size = "QWORD"; break;
8830 case 12: size = "XWORD"; break;
8831 case 16: size = "XMMWORD"; break;
8832 default:
8833 gcc_unreachable ();
8834 }
8835
8836 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8837 if (code == 'b')
8838 size = "BYTE";
8839 else if (code == 'w')
8840 size = "WORD";
8841 else if (code == 'k')
8842 size = "DWORD";
8843
8844 fputs (size, file);
8845 fputs (" PTR ", file);
8846 }
8847
8848 x = XEXP (x, 0);
8849 /* Avoid (%rip) for call operands. */
8850 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8851 && !CONST_INT_P (x))
8852 output_addr_const (file, x);
8853 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8854 output_operand_lossage ("invalid constraints for operand");
8855 else
8856 output_address (x);
8857 }
8858
8859 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8860 {
8861 REAL_VALUE_TYPE r;
8862 long l;
8863
8864 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8865 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8866
8867 if (ASSEMBLER_DIALECT == ASM_ATT)
8868 putc ('$', file);
8869 fprintf (file, "0x%08lx", l);
8870 }
8871
8872 /* These float cases don't actually occur as immediate operands. */
8873 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8874 {
8875 char dstr[30];
8876
8877 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8878 fprintf (file, "%s", dstr);
8879 }
8880
8881 else if (GET_CODE (x) == CONST_DOUBLE
8882 && GET_MODE (x) == XFmode)
8883 {
8884 char dstr[30];
8885
8886 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8887 fprintf (file, "%s", dstr);
8888 }
8889
8890 else
8891 {
8892 /* We have patterns that allow zero sets of memory, for instance.
8893 In 64-bit mode, we should probably support all 8-byte vectors,
8894 since we can in fact encode that into an immediate. */
8895 if (GET_CODE (x) == CONST_VECTOR)
8896 {
8897 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8898 x = const0_rtx;
8899 }
8900
8901 if (code != 'P')
8902 {
8903 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8904 {
8905 if (ASSEMBLER_DIALECT == ASM_ATT)
8906 putc ('$', file);
8907 }
8908 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8909 || GET_CODE (x) == LABEL_REF)
8910 {
8911 if (ASSEMBLER_DIALECT == ASM_ATT)
8912 putc ('$', file);
8913 else
8914 fputs ("OFFSET FLAT:", file);
8915 }
8916 }
8917 if (CONST_INT_P (x))
8918 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8919 else if (flag_pic)
8920 output_pic_addr_const (file, x, code);
8921 else
8922 output_addr_const (file, x);
8923 }
8924 }
8925 \f
8926 /* Print a memory operand whose address is ADDR. */
8927
8928 void
8929 print_operand_address (FILE *file, rtx addr)
8930 {
8931 struct ix86_address parts;
8932 rtx base, index, disp;
8933 int scale;
8934 int ok = ix86_decompose_address (addr, &parts);
8935
8936 gcc_assert (ok);
8937
8938 base = parts.base;
8939 index = parts.index;
8940 disp = parts.disp;
8941 scale = parts.scale;
8942
8943 switch (parts.seg)
8944 {
8945 case SEG_DEFAULT:
8946 break;
8947 case SEG_FS:
8948 case SEG_GS:
8949 if (USER_LABEL_PREFIX[0] == 0)
8950 putc ('%', file);
8951 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8952 break;
8953 default:
8954 gcc_unreachable ();
8955 }
8956
8957 if (!base && !index)
8958 {
8959 /* Displacement only requires special attention. */
8960
8961 if (CONST_INT_P (disp))
8962 {
8963 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8964 {
8965 if (USER_LABEL_PREFIX[0] == 0)
8966 putc ('%', file);
8967 fputs ("ds:", file);
8968 }
8969 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8970 }
8971 else if (flag_pic)
8972 output_pic_addr_const (file, disp, 0);
8973 else
8974 output_addr_const (file, disp);
8975
8976 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8977 if (TARGET_64BIT)
8978 {
8979 if (GET_CODE (disp) == CONST
8980 && GET_CODE (XEXP (disp, 0)) == PLUS
8981 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8982 disp = XEXP (XEXP (disp, 0), 0);
8983 if (GET_CODE (disp) == LABEL_REF
8984 || (GET_CODE (disp) == SYMBOL_REF
8985 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8986 fputs ("(%rip)", file);
8987 }
8988 }
8989 else
8990 {
8991 if (ASSEMBLER_DIALECT == ASM_ATT)
8992 {
8993 if (disp)
8994 {
8995 if (flag_pic)
8996 output_pic_addr_const (file, disp, 0);
8997 else if (GET_CODE (disp) == LABEL_REF)
8998 output_asm_label (disp);
8999 else
9000 output_addr_const (file, disp);
9001 }
9002
9003 putc ('(', file);
9004 if (base)
9005 print_reg (base, 0, file);
9006 if (index)
9007 {
9008 putc (',', file);
9009 print_reg (index, 0, file);
9010 if (scale != 1)
9011 fprintf (file, ",%d", scale);
9012 }
9013 putc (')', file);
9014 }
9015 else
9016 {
9017 rtx offset = NULL_RTX;
9018
9019 if (disp)
9020 {
9021 /* Pull out the offset of a symbol; print any symbol itself. */
9022 if (GET_CODE (disp) == CONST
9023 && GET_CODE (XEXP (disp, 0)) == PLUS
9024 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9025 {
9026 offset = XEXP (XEXP (disp, 0), 1);
9027 disp = gen_rtx_CONST (VOIDmode,
9028 XEXP (XEXP (disp, 0), 0));
9029 }
9030
9031 if (flag_pic)
9032 output_pic_addr_const (file, disp, 0);
9033 else if (GET_CODE (disp) == LABEL_REF)
9034 output_asm_label (disp);
9035 else if (CONST_INT_P (disp))
9036 offset = disp;
9037 else
9038 output_addr_const (file, disp);
9039 }
9040
9041 putc ('[', file);
9042 if (base)
9043 {
9044 print_reg (base, 0, file);
9045 if (offset)
9046 {
9047 if (INTVAL (offset) >= 0)
9048 putc ('+', file);
9049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9050 }
9051 }
9052 else if (offset)
9053 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9054 else
9055 putc ('0', file);
9056
9057 if (index)
9058 {
9059 putc ('+', file);
9060 print_reg (index, 0, file);
9061 if (scale != 1)
9062 fprintf (file, "*%d", scale);
9063 }
9064 putc (']', file);
9065 }
9066 }
9067 }
9068
9069 bool
9070 output_addr_const_extra (FILE *file, rtx x)
9071 {
9072 rtx op;
9073
9074 if (GET_CODE (x) != UNSPEC)
9075 return false;
9076
9077 op = XVECEXP (x, 0, 0);
9078 switch (XINT (x, 1))
9079 {
9080 case UNSPEC_GOTTPOFF:
9081 output_addr_const (file, op);
9082 /* FIXME: This might be @TPOFF in Sun ld. */
9083 fputs ("@GOTTPOFF", file);
9084 break;
9085 case UNSPEC_TPOFF:
9086 output_addr_const (file, op);
9087 fputs ("@TPOFF", file);
9088 break;
9089 case UNSPEC_NTPOFF:
9090 output_addr_const (file, op);
9091 if (TARGET_64BIT)
9092 fputs ("@TPOFF", file);
9093 else
9094 fputs ("@NTPOFF", file);
9095 break;
9096 case UNSPEC_DTPOFF:
9097 output_addr_const (file, op);
9098 fputs ("@DTPOFF", file);
9099 break;
9100 case UNSPEC_GOTNTPOFF:
9101 output_addr_const (file, op);
9102 if (TARGET_64BIT)
9103 fputs ("@GOTTPOFF(%rip)", file);
9104 else
9105 fputs ("@GOTNTPOFF", file);
9106 break;
9107 case UNSPEC_INDNTPOFF:
9108 output_addr_const (file, op);
9109 fputs ("@INDNTPOFF", file);
9110 break;
9111
9112 default:
9113 return false;
9114 }
9115
9116 return true;
9117 }
9118 \f
9119 /* Split one or more DImode RTL references into pairs of SImode
9120 references. The RTL can be REG, offsettable MEM, integer constant, or
9121 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9122 split and "num" is its length. lo_half and hi_half are output arrays
9123 that parallel "operands". */
9124
9125 void
9126 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9127 {
9128 while (num--)
9129 {
9130 rtx op = operands[num];
9131
9132 /* simplify_subreg refuse to split volatile memory addresses,
9133 but we still have to handle it. */
9134 if (MEM_P (op))
9135 {
9136 lo_half[num] = adjust_address (op, SImode, 0);
9137 hi_half[num] = adjust_address (op, SImode, 4);
9138 }
9139 else
9140 {
9141 lo_half[num] = simplify_gen_subreg (SImode, op,
9142 GET_MODE (op) == VOIDmode
9143 ? DImode : GET_MODE (op), 0);
9144 hi_half[num] = simplify_gen_subreg (SImode, op,
9145 GET_MODE (op) == VOIDmode
9146 ? DImode : GET_MODE (op), 4);
9147 }
9148 }
9149 }
9150 /* Split one or more TImode RTL references into pairs of DImode
9151 references. The RTL can be REG, offsettable MEM, integer constant, or
9152 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9153 split and "num" is its length. lo_half and hi_half are output arrays
9154 that parallel "operands". */
9155
9156 void
9157 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9158 {
9159 while (num--)
9160 {
9161 rtx op = operands[num];
9162
9163 /* simplify_subreg refuse to split volatile memory addresses, but we
9164 still have to handle it. */
9165 if (MEM_P (op))
9166 {
9167 lo_half[num] = adjust_address (op, DImode, 0);
9168 hi_half[num] = adjust_address (op, DImode, 8);
9169 }
9170 else
9171 {
9172 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9173 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9174 }
9175 }
9176 }
9177 \f
9178 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9179 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9180 is the expression of the binary operation. The output may either be
9181 emitted here, or returned to the caller, like all output_* functions.
9182
9183 There is no guarantee that the operands are the same mode, as they
9184 might be within FLOAT or FLOAT_EXTEND expressions. */
9185
9186 #ifndef SYSV386_COMPAT
9187 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9188 wants to fix the assemblers because that causes incompatibility
9189 with gcc. No-one wants to fix gcc because that causes
9190 incompatibility with assemblers... You can use the option of
9191 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9192 #define SYSV386_COMPAT 1
9193 #endif
9194
9195 const char *
9196 output_387_binary_op (rtx insn, rtx *operands)
9197 {
9198 static char buf[30];
9199 const char *p;
9200 const char *ssep;
9201 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9202
9203 #ifdef ENABLE_CHECKING
9204 /* Even if we do not want to check the inputs, this documents input
9205 constraints. Which helps in understanding the following code. */
9206 if (STACK_REG_P (operands[0])
9207 && ((REG_P (operands[1])
9208 && REGNO (operands[0]) == REGNO (operands[1])
9209 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9210 || (REG_P (operands[2])
9211 && REGNO (operands[0]) == REGNO (operands[2])
9212 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9213 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9214 ; /* ok */
9215 else
9216 gcc_assert (is_sse);
9217 #endif
9218
9219 switch (GET_CODE (operands[3]))
9220 {
9221 case PLUS:
9222 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9223 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9224 p = "fiadd";
9225 else
9226 p = "fadd";
9227 ssep = "add";
9228 break;
9229
9230 case MINUS:
9231 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9232 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9233 p = "fisub";
9234 else
9235 p = "fsub";
9236 ssep = "sub";
9237 break;
9238
9239 case MULT:
9240 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9241 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9242 p = "fimul";
9243 else
9244 p = "fmul";
9245 ssep = "mul";
9246 break;
9247
9248 case DIV:
9249 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9250 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9251 p = "fidiv";
9252 else
9253 p = "fdiv";
9254 ssep = "div";
9255 break;
9256
9257 default:
9258 gcc_unreachable ();
9259 }
9260
9261 if (is_sse)
9262 {
9263 strcpy (buf, ssep);
9264 if (GET_MODE (operands[0]) == SFmode)
9265 strcat (buf, "ss\t{%2, %0|%0, %2}");
9266 else
9267 strcat (buf, "sd\t{%2, %0|%0, %2}");
9268 return buf;
9269 }
9270 strcpy (buf, p);
9271
9272 switch (GET_CODE (operands[3]))
9273 {
9274 case MULT:
9275 case PLUS:
9276 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9277 {
9278 rtx temp = operands[2];
9279 operands[2] = operands[1];
9280 operands[1] = temp;
9281 }
9282
9283 /* know operands[0] == operands[1]. */
9284
9285 if (MEM_P (operands[2]))
9286 {
9287 p = "%z2\t%2";
9288 break;
9289 }
9290
9291 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9292 {
9293 if (STACK_TOP_P (operands[0]))
9294 /* How is it that we are storing to a dead operand[2]?
9295 Well, presumably operands[1] is dead too. We can't
9296 store the result to st(0) as st(0) gets popped on this
9297 instruction. Instead store to operands[2] (which I
9298 think has to be st(1)). st(1) will be popped later.
9299 gcc <= 2.8.1 didn't have this check and generated
9300 assembly code that the Unixware assembler rejected. */
9301 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9302 else
9303 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9304 break;
9305 }
9306
9307 if (STACK_TOP_P (operands[0]))
9308 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9309 else
9310 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9311 break;
9312
9313 case MINUS:
9314 case DIV:
9315 if (MEM_P (operands[1]))
9316 {
9317 p = "r%z1\t%1";
9318 break;
9319 }
9320
9321 if (MEM_P (operands[2]))
9322 {
9323 p = "%z2\t%2";
9324 break;
9325 }
9326
9327 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9328 {
9329 #if SYSV386_COMPAT
9330 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9331 derived assemblers, confusingly reverse the direction of
9332 the operation for fsub{r} and fdiv{r} when the
9333 destination register is not st(0). The Intel assembler
9334 doesn't have this brain damage. Read !SYSV386_COMPAT to
9335 figure out what the hardware really does. */
9336 if (STACK_TOP_P (operands[0]))
9337 p = "{p\t%0, %2|rp\t%2, %0}";
9338 else
9339 p = "{rp\t%2, %0|p\t%0, %2}";
9340 #else
9341 if (STACK_TOP_P (operands[0]))
9342 /* As above for fmul/fadd, we can't store to st(0). */
9343 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9344 else
9345 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9346 #endif
9347 break;
9348 }
9349
9350 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9351 {
9352 #if SYSV386_COMPAT
9353 if (STACK_TOP_P (operands[0]))
9354 p = "{rp\t%0, %1|p\t%1, %0}";
9355 else
9356 p = "{p\t%1, %0|rp\t%0, %1}";
9357 #else
9358 if (STACK_TOP_P (operands[0]))
9359 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9360 else
9361 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9362 #endif
9363 break;
9364 }
9365
9366 if (STACK_TOP_P (operands[0]))
9367 {
9368 if (STACK_TOP_P (operands[1]))
9369 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9370 else
9371 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9372 break;
9373 }
9374 else if (STACK_TOP_P (operands[1]))
9375 {
9376 #if SYSV386_COMPAT
9377 p = "{\t%1, %0|r\t%0, %1}";
9378 #else
9379 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9380 #endif
9381 }
9382 else
9383 {
9384 #if SYSV386_COMPAT
9385 p = "{r\t%2, %0|\t%0, %2}";
9386 #else
9387 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9388 #endif
9389 }
9390 break;
9391
9392 default:
9393 gcc_unreachable ();
9394 }
9395
9396 strcat (buf, p);
9397 return buf;
9398 }
9399
9400 /* Return needed mode for entity in optimize_mode_switching pass. */
9401
9402 int
9403 ix86_mode_needed (int entity, rtx insn)
9404 {
9405 enum attr_i387_cw mode;
9406
9407 /* The mode UNINITIALIZED is used to store control word after a
9408 function call or ASM pattern. The mode ANY specify that function
9409 has no requirements on the control word and make no changes in the
9410 bits we are interested in. */
9411
9412 if (CALL_P (insn)
9413 || (NONJUMP_INSN_P (insn)
9414 && (asm_noperands (PATTERN (insn)) >= 0
9415 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9416 return I387_CW_UNINITIALIZED;
9417
9418 if (recog_memoized (insn) < 0)
9419 return I387_CW_ANY;
9420
9421 mode = get_attr_i387_cw (insn);
9422
9423 switch (entity)
9424 {
9425 case I387_TRUNC:
9426 if (mode == I387_CW_TRUNC)
9427 return mode;
9428 break;
9429
9430 case I387_FLOOR:
9431 if (mode == I387_CW_FLOOR)
9432 return mode;
9433 break;
9434
9435 case I387_CEIL:
9436 if (mode == I387_CW_CEIL)
9437 return mode;
9438 break;
9439
9440 case I387_MASK_PM:
9441 if (mode == I387_CW_MASK_PM)
9442 return mode;
9443 break;
9444
9445 default:
9446 gcc_unreachable ();
9447 }
9448
9449 return I387_CW_ANY;
9450 }
9451
9452 /* Output code to initialize control word copies used by trunc?f?i and
9453 rounding patterns. CURRENT_MODE is set to current control word,
9454 while NEW_MODE is set to new control word. */
9455
9456 void
9457 emit_i387_cw_initialization (int mode)
9458 {
9459 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9460 rtx new_mode;
9461
9462 enum ix86_stack_slot slot;
9463
9464 rtx reg = gen_reg_rtx (HImode);
9465
9466 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9467 emit_move_insn (reg, copy_rtx (stored_mode));
9468
9469 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9470 {
9471 switch (mode)
9472 {
9473 case I387_CW_TRUNC:
9474 /* round toward zero (truncate) */
9475 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9476 slot = SLOT_CW_TRUNC;
9477 break;
9478
9479 case I387_CW_FLOOR:
9480 /* round down toward -oo */
9481 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9482 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9483 slot = SLOT_CW_FLOOR;
9484 break;
9485
9486 case I387_CW_CEIL:
9487 /* round up toward +oo */
9488 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9489 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9490 slot = SLOT_CW_CEIL;
9491 break;
9492
9493 case I387_CW_MASK_PM:
9494 /* mask precision exception for nearbyint() */
9495 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9496 slot = SLOT_CW_MASK_PM;
9497 break;
9498
9499 default:
9500 gcc_unreachable ();
9501 }
9502 }
9503 else
9504 {
9505 switch (mode)
9506 {
9507 case I387_CW_TRUNC:
9508 /* round toward zero (truncate) */
9509 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9510 slot = SLOT_CW_TRUNC;
9511 break;
9512
9513 case I387_CW_FLOOR:
9514 /* round down toward -oo */
9515 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9516 slot = SLOT_CW_FLOOR;
9517 break;
9518
9519 case I387_CW_CEIL:
9520 /* round up toward +oo */
9521 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9522 slot = SLOT_CW_CEIL;
9523 break;
9524
9525 case I387_CW_MASK_PM:
9526 /* mask precision exception for nearbyint() */
9527 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9528 slot = SLOT_CW_MASK_PM;
9529 break;
9530
9531 default:
9532 gcc_unreachable ();
9533 }
9534 }
9535
9536 gcc_assert (slot < MAX_386_STACK_LOCALS);
9537
9538 new_mode = assign_386_stack_local (HImode, slot);
9539 emit_move_insn (new_mode, reg);
9540 }
9541
9542 /* Output code for INSN to convert a float to a signed int. OPERANDS
9543 are the insn operands. The output may be [HSD]Imode and the input
9544 operand may be [SDX]Fmode. */
9545
9546 const char *
9547 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9548 {
9549 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9550 int dimode_p = GET_MODE (operands[0]) == DImode;
9551 int round_mode = get_attr_i387_cw (insn);
9552
9553 /* Jump through a hoop or two for DImode, since the hardware has no
9554 non-popping instruction. We used to do this a different way, but
9555 that was somewhat fragile and broke with post-reload splitters. */
9556 if ((dimode_p || fisttp) && !stack_top_dies)
9557 output_asm_insn ("fld\t%y1", operands);
9558
9559 gcc_assert (STACK_TOP_P (operands[1]));
9560 gcc_assert (MEM_P (operands[0]));
9561 gcc_assert (GET_MODE (operands[1]) != TFmode);
9562
9563 if (fisttp)
9564 output_asm_insn ("fisttp%z0\t%0", operands);
9565 else
9566 {
9567 if (round_mode != I387_CW_ANY)
9568 output_asm_insn ("fldcw\t%3", operands);
9569 if (stack_top_dies || dimode_p)
9570 output_asm_insn ("fistp%z0\t%0", operands);
9571 else
9572 output_asm_insn ("fist%z0\t%0", operands);
9573 if (round_mode != I387_CW_ANY)
9574 output_asm_insn ("fldcw\t%2", operands);
9575 }
9576
9577 return "";
9578 }
9579
9580 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9581 have the values zero or one, indicates the ffreep insn's operand
9582 from the OPERANDS array. */
9583
9584 static const char *
9585 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9586 {
9587 if (TARGET_USE_FFREEP)
9588 #if HAVE_AS_IX86_FFREEP
9589 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9590 #else
9591 {
9592 static char retval[] = ".word\t0xc_df";
9593 int regno = REGNO (operands[opno]);
9594
9595 gcc_assert (FP_REGNO_P (regno));
9596
9597 retval[9] = '0' + (regno - FIRST_STACK_REG);
9598 return retval;
9599 }
9600 #endif
9601
9602 return opno ? "fstp\t%y1" : "fstp\t%y0";
9603 }
9604
9605
9606 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9607 should be used. UNORDERED_P is true when fucom should be used. */
9608
9609 const char *
9610 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9611 {
9612 int stack_top_dies;
9613 rtx cmp_op0, cmp_op1;
9614 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9615
9616 if (eflags_p)
9617 {
9618 cmp_op0 = operands[0];
9619 cmp_op1 = operands[1];
9620 }
9621 else
9622 {
9623 cmp_op0 = operands[1];
9624 cmp_op1 = operands[2];
9625 }
9626
9627 if (is_sse)
9628 {
9629 if (GET_MODE (operands[0]) == SFmode)
9630 if (unordered_p)
9631 return "ucomiss\t{%1, %0|%0, %1}";
9632 else
9633 return "comiss\t{%1, %0|%0, %1}";
9634 else
9635 if (unordered_p)
9636 return "ucomisd\t{%1, %0|%0, %1}";
9637 else
9638 return "comisd\t{%1, %0|%0, %1}";
9639 }
9640
9641 gcc_assert (STACK_TOP_P (cmp_op0));
9642
9643 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9644
9645 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9646 {
9647 if (stack_top_dies)
9648 {
9649 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9650 return output_387_ffreep (operands, 1);
9651 }
9652 else
9653 return "ftst\n\tfnstsw\t%0";
9654 }
9655
9656 if (STACK_REG_P (cmp_op1)
9657 && stack_top_dies
9658 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9659 && REGNO (cmp_op1) != FIRST_STACK_REG)
9660 {
9661 /* If both the top of the 387 stack dies, and the other operand
9662 is also a stack register that dies, then this must be a
9663 `fcompp' float compare */
9664
9665 if (eflags_p)
9666 {
9667 /* There is no double popping fcomi variant. Fortunately,
9668 eflags is immune from the fstp's cc clobbering. */
9669 if (unordered_p)
9670 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9671 else
9672 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9673 return output_387_ffreep (operands, 0);
9674 }
9675 else
9676 {
9677 if (unordered_p)
9678 return "fucompp\n\tfnstsw\t%0";
9679 else
9680 return "fcompp\n\tfnstsw\t%0";
9681 }
9682 }
9683 else
9684 {
9685 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9686
9687 static const char * const alt[16] =
9688 {
9689 "fcom%z2\t%y2\n\tfnstsw\t%0",
9690 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9691 "fucom%z2\t%y2\n\tfnstsw\t%0",
9692 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9693
9694 "ficom%z2\t%y2\n\tfnstsw\t%0",
9695 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9696 NULL,
9697 NULL,
9698
9699 "fcomi\t{%y1, %0|%0, %y1}",
9700 "fcomip\t{%y1, %0|%0, %y1}",
9701 "fucomi\t{%y1, %0|%0, %y1}",
9702 "fucomip\t{%y1, %0|%0, %y1}",
9703
9704 NULL,
9705 NULL,
9706 NULL,
9707 NULL
9708 };
9709
9710 int mask;
9711 const char *ret;
9712
9713 mask = eflags_p << 3;
9714 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9715 mask |= unordered_p << 1;
9716 mask |= stack_top_dies;
9717
9718 gcc_assert (mask < 16);
9719 ret = alt[mask];
9720 gcc_assert (ret);
9721
9722 return ret;
9723 }
9724 }
9725
9726 void
9727 ix86_output_addr_vec_elt (FILE *file, int value)
9728 {
9729 const char *directive = ASM_LONG;
9730
9731 #ifdef ASM_QUAD
9732 if (TARGET_64BIT)
9733 directive = ASM_QUAD;
9734 #else
9735 gcc_assert (!TARGET_64BIT);
9736 #endif
9737
9738 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9739 }
9740
9741 void
9742 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9743 {
9744 const char *directive = ASM_LONG;
9745
9746 #ifdef ASM_QUAD
9747 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9748 directive = ASM_QUAD;
9749 #else
9750 gcc_assert (!TARGET_64BIT);
9751 #endif
9752 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9753 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9754 fprintf (file, "%s%s%d-%s%d\n",
9755 directive, LPREFIX, value, LPREFIX, rel);
9756 else if (HAVE_AS_GOTOFF_IN_DATA)
9757 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9758 #if TARGET_MACHO
9759 else if (TARGET_MACHO)
9760 {
9761 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9762 machopic_output_function_base_name (file);
9763 fprintf(file, "\n");
9764 }
9765 #endif
9766 else
9767 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9768 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9769 }
9770 \f
9771 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9772 for the target. */
9773
9774 void
9775 ix86_expand_clear (rtx dest)
9776 {
9777 rtx tmp;
9778
9779 /* We play register width games, which are only valid after reload. */
9780 gcc_assert (reload_completed);
9781
9782 /* Avoid HImode and its attendant prefix byte. */
9783 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9784 dest = gen_rtx_REG (SImode, REGNO (dest));
9785 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9786
9787 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9788 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9789 {
9790 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9791 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9792 }
9793
9794 emit_insn (tmp);
9795 }
9796
9797 /* X is an unchanging MEM. If it is a constant pool reference, return
9798 the constant pool rtx, else NULL. */
9799
9800 rtx
9801 maybe_get_pool_constant (rtx x)
9802 {
9803 x = ix86_delegitimize_address (XEXP (x, 0));
9804
9805 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9806 return get_pool_constant (x);
9807
9808 return NULL_RTX;
9809 }
9810
9811 void
9812 ix86_expand_move (enum machine_mode mode, rtx operands[])
9813 {
9814 int strict = (reload_in_progress || reload_completed);
9815 rtx op0, op1;
9816 enum tls_model model;
9817
9818 op0 = operands[0];
9819 op1 = operands[1];
9820
9821 if (GET_CODE (op1) == SYMBOL_REF)
9822 {
9823 model = SYMBOL_REF_TLS_MODEL (op1);
9824 if (model)
9825 {
9826 op1 = legitimize_tls_address (op1, model, true);
9827 op1 = force_operand (op1, op0);
9828 if (op1 == op0)
9829 return;
9830 }
9831 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9832 && SYMBOL_REF_DLLIMPORT_P (op1))
9833 op1 = legitimize_dllimport_symbol (op1, false);
9834 }
9835 else if (GET_CODE (op1) == CONST
9836 && GET_CODE (XEXP (op1, 0)) == PLUS
9837 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9838 {
9839 rtx addend = XEXP (XEXP (op1, 0), 1);
9840 rtx symbol = XEXP (XEXP (op1, 0), 0);
9841 rtx tmp = NULL;
9842
9843 model = SYMBOL_REF_TLS_MODEL (symbol);
9844 if (model)
9845 tmp = legitimize_tls_address (symbol, model, true);
9846 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9847 && SYMBOL_REF_DLLIMPORT_P (symbol))
9848 tmp = legitimize_dllimport_symbol (symbol, true);
9849
9850 if (tmp)
9851 {
9852 tmp = force_operand (tmp, NULL);
9853 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9854 op0, 1, OPTAB_DIRECT);
9855 if (tmp == op0)
9856 return;
9857 }
9858 }
9859
9860 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9861 {
9862 if (TARGET_MACHO && !TARGET_64BIT)
9863 {
9864 #if TARGET_MACHO
9865 if (MACHOPIC_PURE)
9866 {
9867 rtx temp = ((reload_in_progress
9868 || ((op0 && REG_P (op0))
9869 && mode == Pmode))
9870 ? op0 : gen_reg_rtx (Pmode));
9871 op1 = machopic_indirect_data_reference (op1, temp);
9872 op1 = machopic_legitimize_pic_address (op1, mode,
9873 temp == op1 ? 0 : temp);
9874 }
9875 else if (MACHOPIC_INDIRECT)
9876 op1 = machopic_indirect_data_reference (op1, 0);
9877 if (op0 == op1)
9878 return;
9879 #endif
9880 }
9881 else
9882 {
9883 if (MEM_P (op0))
9884 op1 = force_reg (Pmode, op1);
9885 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9886 {
9887 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
9888 op1 = legitimize_pic_address (op1, reg);
9889 if (op0 == op1)
9890 return;
9891 }
9892 }
9893 }
9894 else
9895 {
9896 if (MEM_P (op0)
9897 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9898 || !push_operand (op0, mode))
9899 && MEM_P (op1))
9900 op1 = force_reg (mode, op1);
9901
9902 if (push_operand (op0, mode)
9903 && ! general_no_elim_operand (op1, mode))
9904 op1 = copy_to_mode_reg (mode, op1);
9905
9906 /* Force large constants in 64bit compilation into register
9907 to get them CSEed. */
9908 if (TARGET_64BIT && mode == DImode
9909 && immediate_operand (op1, mode)
9910 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9911 && !register_operand (op0, mode)
9912 && optimize && !reload_completed && !reload_in_progress)
9913 op1 = copy_to_mode_reg (mode, op1);
9914
9915 if (FLOAT_MODE_P (mode))
9916 {
9917 /* If we are loading a floating point constant to a register,
9918 force the value to memory now, since we'll get better code
9919 out the back end. */
9920
9921 if (strict)
9922 ;
9923 else if (GET_CODE (op1) == CONST_DOUBLE)
9924 {
9925 op1 = validize_mem (force_const_mem (mode, op1));
9926 if (!register_operand (op0, mode))
9927 {
9928 rtx temp = gen_reg_rtx (mode);
9929 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9930 emit_move_insn (op0, temp);
9931 return;
9932 }
9933 }
9934 }
9935 }
9936
9937 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9938 }
9939
9940 void
9941 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9942 {
9943 rtx op0 = operands[0], op1 = operands[1];
9944 unsigned int align = GET_MODE_ALIGNMENT (mode);
9945
9946 /* Force constants other than zero into memory. We do not know how
9947 the instructions used to build constants modify the upper 64 bits
9948 of the register, once we have that information we may be able
9949 to handle some of them more efficiently. */
9950 if ((reload_in_progress | reload_completed) == 0
9951 && register_operand (op0, mode)
9952 && (CONSTANT_P (op1)
9953 || (GET_CODE (op1) == SUBREG
9954 && CONSTANT_P (SUBREG_REG (op1))))
9955 && standard_sse_constant_p (op1) <= 0)
9956 op1 = validize_mem (force_const_mem (mode, op1));
9957
9958 /* TDmode values are passed as TImode on the stack. Timode values
9959 are moved via xmm registers, and moving them to stack can result in
9960 unaligned memory access. Use ix86_expand_vector_move_misalign()
9961 if memory operand is not aligned correctly. */
9962 if (can_create_pseudo_p ()
9963 && (mode == TImode) && !TARGET_64BIT
9964 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9965 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9966 {
9967 rtx tmp[2];
9968
9969 /* ix86_expand_vector_move_misalign() does not like constants ... */
9970 if (CONSTANT_P (op1)
9971 || (GET_CODE (op1) == SUBREG
9972 && CONSTANT_P (SUBREG_REG (op1))))
9973 op1 = validize_mem (force_const_mem (mode, op1));
9974
9975 /* ... nor both arguments in memory. */
9976 if (!register_operand (op0, mode)
9977 && !register_operand (op1, mode))
9978 op1 = force_reg (mode, op1);
9979
9980 tmp[0] = op0; tmp[1] = op1;
9981 ix86_expand_vector_move_misalign (mode, tmp);
9982 return;
9983 }
9984
9985 /* Make operand1 a register if it isn't already. */
9986 if (can_create_pseudo_p ()
9987 && !register_operand (op0, mode)
9988 && !register_operand (op1, mode))
9989 {
9990 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9991 return;
9992 }
9993
9994 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9995 }
9996
9997 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9998 straight to ix86_expand_vector_move. */
9999 /* Code generation for scalar reg-reg moves of single and double precision data:
10000 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
10001 movaps reg, reg
10002 else
10003 movss reg, reg
10004 if (x86_sse_partial_reg_dependency == true)
10005 movapd reg, reg
10006 else
10007 movsd reg, reg
10008
10009 Code generation for scalar loads of double precision data:
10010 if (x86_sse_split_regs == true)
10011 movlpd mem, reg (gas syntax)
10012 else
10013 movsd mem, reg
10014
10015 Code generation for unaligned packed loads of single precision data
10016 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10017 if (x86_sse_unaligned_move_optimal)
10018 movups mem, reg
10019
10020 if (x86_sse_partial_reg_dependency == true)
10021 {
10022 xorps reg, reg
10023 movlps mem, reg
10024 movhps mem+8, reg
10025 }
10026 else
10027 {
10028 movlps mem, reg
10029 movhps mem+8, reg
10030 }
10031
10032 Code generation for unaligned packed loads of double precision data
10033 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10034 if (x86_sse_unaligned_move_optimal)
10035 movupd mem, reg
10036
10037 if (x86_sse_split_regs == true)
10038 {
10039 movlpd mem, reg
10040 movhpd mem+8, reg
10041 }
10042 else
10043 {
10044 movsd mem, reg
10045 movhpd mem+8, reg
10046 }
10047 */
10048
10049 void
10050 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10051 {
10052 rtx op0, op1, m;
10053
10054 op0 = operands[0];
10055 op1 = operands[1];
10056
10057 if (MEM_P (op1))
10058 {
10059 /* If we're optimizing for size, movups is the smallest. */
10060 if (optimize_size)
10061 {
10062 op0 = gen_lowpart (V4SFmode, op0);
10063 op1 = gen_lowpart (V4SFmode, op1);
10064 emit_insn (gen_sse_movups (op0, op1));
10065 return;
10066 }
10067
10068 /* ??? If we have typed data, then it would appear that using
10069 movdqu is the only way to get unaligned data loaded with
10070 integer type. */
10071 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10072 {
10073 op0 = gen_lowpart (V16QImode, op0);
10074 op1 = gen_lowpart (V16QImode, op1);
10075 emit_insn (gen_sse2_movdqu (op0, op1));
10076 return;
10077 }
10078
10079 if (TARGET_SSE2 && mode == V2DFmode)
10080 {
10081 rtx zero;
10082
10083 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10084 {
10085 op0 = gen_lowpart (V2DFmode, op0);
10086 op1 = gen_lowpart (V2DFmode, op1);
10087 emit_insn (gen_sse2_movupd (op0, op1));
10088 return;
10089 }
10090
10091 /* When SSE registers are split into halves, we can avoid
10092 writing to the top half twice. */
10093 if (TARGET_SSE_SPLIT_REGS)
10094 {
10095 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10096 zero = op0;
10097 }
10098 else
10099 {
10100 /* ??? Not sure about the best option for the Intel chips.
10101 The following would seem to satisfy; the register is
10102 entirely cleared, breaking the dependency chain. We
10103 then store to the upper half, with a dependency depth
10104 of one. A rumor has it that Intel recommends two movsd
10105 followed by an unpacklpd, but this is unconfirmed. And
10106 given that the dependency depth of the unpacklpd would
10107 still be one, I'm not sure why this would be better. */
10108 zero = CONST0_RTX (V2DFmode);
10109 }
10110
10111 m = adjust_address (op1, DFmode, 0);
10112 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10113 m = adjust_address (op1, DFmode, 8);
10114 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10115 }
10116 else
10117 {
10118 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10119 {
10120 op0 = gen_lowpart (V4SFmode, op0);
10121 op1 = gen_lowpart (V4SFmode, op1);
10122 emit_insn (gen_sse_movups (op0, op1));
10123 return;
10124 }
10125
10126 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10127 emit_move_insn (op0, CONST0_RTX (mode));
10128 else
10129 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10130
10131 if (mode != V4SFmode)
10132 op0 = gen_lowpart (V4SFmode, op0);
10133 m = adjust_address (op1, V2SFmode, 0);
10134 emit_insn (gen_sse_loadlps (op0, op0, m));
10135 m = adjust_address (op1, V2SFmode, 8);
10136 emit_insn (gen_sse_loadhps (op0, op0, m));
10137 }
10138 }
10139 else if (MEM_P (op0))
10140 {
10141 /* If we're optimizing for size, movups is the smallest. */
10142 if (optimize_size)
10143 {
10144 op0 = gen_lowpart (V4SFmode, op0);
10145 op1 = gen_lowpart (V4SFmode, op1);
10146 emit_insn (gen_sse_movups (op0, op1));
10147 return;
10148 }
10149
10150 /* ??? Similar to above, only less clear because of quote
10151 typeless stores unquote. */
10152 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10153 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10154 {
10155 op0 = gen_lowpart (V16QImode, op0);
10156 op1 = gen_lowpart (V16QImode, op1);
10157 emit_insn (gen_sse2_movdqu (op0, op1));
10158 return;
10159 }
10160
10161 if (TARGET_SSE2 && mode == V2DFmode)
10162 {
10163 m = adjust_address (op0, DFmode, 0);
10164 emit_insn (gen_sse2_storelpd (m, op1));
10165 m = adjust_address (op0, DFmode, 8);
10166 emit_insn (gen_sse2_storehpd (m, op1));
10167 }
10168 else
10169 {
10170 if (mode != V4SFmode)
10171 op1 = gen_lowpart (V4SFmode, op1);
10172 m = adjust_address (op0, V2SFmode, 0);
10173 emit_insn (gen_sse_storelps (m, op1));
10174 m = adjust_address (op0, V2SFmode, 8);
10175 emit_insn (gen_sse_storehps (m, op1));
10176 }
10177 }
10178 else
10179 gcc_unreachable ();
10180 }
10181
10182 /* Expand a push in MODE. This is some mode for which we do not support
10183 proper push instructions, at least from the registers that we expect
10184 the value to live in. */
10185
10186 void
10187 ix86_expand_push (enum machine_mode mode, rtx x)
10188 {
10189 rtx tmp;
10190
10191 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10192 GEN_INT (-GET_MODE_SIZE (mode)),
10193 stack_pointer_rtx, 1, OPTAB_DIRECT);
10194 if (tmp != stack_pointer_rtx)
10195 emit_move_insn (stack_pointer_rtx, tmp);
10196
10197 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10198 emit_move_insn (tmp, x);
10199 }
10200
10201 /* Helper function of ix86_fixup_binary_operands to canonicalize
10202 operand order. Returns true if the operands should be swapped. */
10203
10204 static bool
10205 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10206 rtx operands[])
10207 {
10208 rtx dst = operands[0];
10209 rtx src1 = operands[1];
10210 rtx src2 = operands[2];
10211
10212 /* If the operation is not commutative, we can't do anything. */
10213 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10214 return false;
10215
10216 /* Highest priority is that src1 should match dst. */
10217 if (rtx_equal_p (dst, src1))
10218 return false;
10219 if (rtx_equal_p (dst, src2))
10220 return true;
10221
10222 /* Next highest priority is that immediate constants come second. */
10223 if (immediate_operand (src2, mode))
10224 return false;
10225 if (immediate_operand (src1, mode))
10226 return true;
10227
10228 /* Lowest priority is that memory references should come second. */
10229 if (MEM_P (src2))
10230 return false;
10231 if (MEM_P (src1))
10232 return true;
10233
10234 return false;
10235 }
10236
10237
10238 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10239 destination to use for the operation. If different from the true
10240 destination in operands[0], a copy operation will be required. */
10241
10242 rtx
10243 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10244 rtx operands[])
10245 {
10246 rtx dst = operands[0];
10247 rtx src1 = operands[1];
10248 rtx src2 = operands[2];
10249
10250 /* Canonicalize operand order. */
10251 if (ix86_swap_binary_operands_p (code, mode, operands))
10252 {
10253 rtx temp = src1;
10254 src1 = src2;
10255 src2 = temp;
10256 }
10257
10258 /* Both source operands cannot be in memory. */
10259 if (MEM_P (src1) && MEM_P (src2))
10260 {
10261 /* Optimization: Only read from memory once. */
10262 if (rtx_equal_p (src1, src2))
10263 {
10264 src2 = force_reg (mode, src2);
10265 src1 = src2;
10266 }
10267 else
10268 src2 = force_reg (mode, src2);
10269 }
10270
10271 /* If the destination is memory, and we do not have matching source
10272 operands, do things in registers. */
10273 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10274 dst = gen_reg_rtx (mode);
10275
10276 /* Source 1 cannot be a constant. */
10277 if (CONSTANT_P (src1))
10278 src1 = force_reg (mode, src1);
10279
10280 /* Source 1 cannot be a non-matching memory. */
10281 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10282 src1 = force_reg (mode, src1);
10283
10284 operands[1] = src1;
10285 operands[2] = src2;
10286 return dst;
10287 }
10288
10289 /* Similarly, but assume that the destination has already been
10290 set up properly. */
10291
10292 void
10293 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10294 enum machine_mode mode, rtx operands[])
10295 {
10296 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10297 gcc_assert (dst == operands[0]);
10298 }
10299
10300 /* Attempt to expand a binary operator. Make the expansion closer to the
10301 actual machine, then just general_operand, which will allow 3 separate
10302 memory references (one output, two input) in a single insn. */
10303
10304 void
10305 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10306 rtx operands[])
10307 {
10308 rtx src1, src2, dst, op, clob;
10309
10310 dst = ix86_fixup_binary_operands (code, mode, operands);
10311 src1 = operands[1];
10312 src2 = operands[2];
10313
10314 /* Emit the instruction. */
10315
10316 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10317 if (reload_in_progress)
10318 {
10319 /* Reload doesn't know about the flags register, and doesn't know that
10320 it doesn't want to clobber it. We can only do this with PLUS. */
10321 gcc_assert (code == PLUS);
10322 emit_insn (op);
10323 }
10324 else
10325 {
10326 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10327 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10328 }
10329
10330 /* Fix up the destination if needed. */
10331 if (dst != operands[0])
10332 emit_move_insn (operands[0], dst);
10333 }
10334
10335 /* Return TRUE or FALSE depending on whether the binary operator meets the
10336 appropriate constraints. */
10337
10338 int
10339 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10340 rtx operands[3])
10341 {
10342 rtx dst = operands[0];
10343 rtx src1 = operands[1];
10344 rtx src2 = operands[2];
10345
10346 /* Both source operands cannot be in memory. */
10347 if (MEM_P (src1) && MEM_P (src2))
10348 return 0;
10349
10350 /* Canonicalize operand order for commutative operators. */
10351 if (ix86_swap_binary_operands_p (code, mode, operands))
10352 {
10353 rtx temp = src1;
10354 src1 = src2;
10355 src2 = temp;
10356 }
10357
10358 /* If the destination is memory, we must have a matching source operand. */
10359 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10360 return 0;
10361
10362 /* Source 1 cannot be a constant. */
10363 if (CONSTANT_P (src1))
10364 return 0;
10365
10366 /* Source 1 cannot be a non-matching memory. */
10367 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10368 return 0;
10369
10370 return 1;
10371 }
10372
10373 /* Attempt to expand a unary operator. Make the expansion closer to the
10374 actual machine, then just general_operand, which will allow 2 separate
10375 memory references (one output, one input) in a single insn. */
10376
10377 void
10378 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10379 rtx operands[])
10380 {
10381 int matching_memory;
10382 rtx src, dst, op, clob;
10383
10384 dst = operands[0];
10385 src = operands[1];
10386
10387 /* If the destination is memory, and we do not have matching source
10388 operands, do things in registers. */
10389 matching_memory = 0;
10390 if (MEM_P (dst))
10391 {
10392 if (rtx_equal_p (dst, src))
10393 matching_memory = 1;
10394 else
10395 dst = gen_reg_rtx (mode);
10396 }
10397
10398 /* When source operand is memory, destination must match. */
10399 if (MEM_P (src) && !matching_memory)
10400 src = force_reg (mode, src);
10401
10402 /* Emit the instruction. */
10403
10404 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10405 if (reload_in_progress || code == NOT)
10406 {
10407 /* Reload doesn't know about the flags register, and doesn't know that
10408 it doesn't want to clobber it. */
10409 gcc_assert (code == NOT);
10410 emit_insn (op);
10411 }
10412 else
10413 {
10414 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10415 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10416 }
10417
10418 /* Fix up the destination if needed. */
10419 if (dst != operands[0])
10420 emit_move_insn (operands[0], dst);
10421 }
10422
10423 /* Return TRUE or FALSE depending on whether the unary operator meets the
10424 appropriate constraints. */
10425
10426 int
10427 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10428 enum machine_mode mode ATTRIBUTE_UNUSED,
10429 rtx operands[2] ATTRIBUTE_UNUSED)
10430 {
10431 /* If one of operands is memory, source and destination must match. */
10432 if ((MEM_P (operands[0])
10433 || MEM_P (operands[1]))
10434 && ! rtx_equal_p (operands[0], operands[1]))
10435 return FALSE;
10436 return TRUE;
10437 }
10438
10439 /* Post-reload splitter for converting an SF or DFmode value in an
10440 SSE register into an unsigned SImode. */
10441
10442 void
10443 ix86_split_convert_uns_si_sse (rtx operands[])
10444 {
10445 enum machine_mode vecmode;
10446 rtx value, large, zero_or_two31, input, two31, x;
10447
10448 large = operands[1];
10449 zero_or_two31 = operands[2];
10450 input = operands[3];
10451 two31 = operands[4];
10452 vecmode = GET_MODE (large);
10453 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10454
10455 /* Load up the value into the low element. We must ensure that the other
10456 elements are valid floats -- zero is the easiest such value. */
10457 if (MEM_P (input))
10458 {
10459 if (vecmode == V4SFmode)
10460 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10461 else
10462 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10463 }
10464 else
10465 {
10466 input = gen_rtx_REG (vecmode, REGNO (input));
10467 emit_move_insn (value, CONST0_RTX (vecmode));
10468 if (vecmode == V4SFmode)
10469 emit_insn (gen_sse_movss (value, value, input));
10470 else
10471 emit_insn (gen_sse2_movsd (value, value, input));
10472 }
10473
10474 emit_move_insn (large, two31);
10475 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10476
10477 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10478 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10479
10480 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10481 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10482
10483 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10484 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10485
10486 large = gen_rtx_REG (V4SImode, REGNO (large));
10487 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10488
10489 x = gen_rtx_REG (V4SImode, REGNO (value));
10490 if (vecmode == V4SFmode)
10491 emit_insn (gen_sse2_cvttps2dq (x, value));
10492 else
10493 emit_insn (gen_sse2_cvttpd2dq (x, value));
10494 value = x;
10495
10496 emit_insn (gen_xorv4si3 (value, value, large));
10497 }
10498
10499 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10500 Expects the 64-bit DImode to be supplied in a pair of integral
10501 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10502 -mfpmath=sse, !optimize_size only. */
10503
10504 void
10505 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10506 {
10507 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10508 rtx int_xmm, fp_xmm;
10509 rtx biases, exponents;
10510 rtx x;
10511
10512 int_xmm = gen_reg_rtx (V4SImode);
10513 if (TARGET_INTER_UNIT_MOVES)
10514 emit_insn (gen_movdi_to_sse (int_xmm, input));
10515 else if (TARGET_SSE_SPLIT_REGS)
10516 {
10517 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10518 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10519 }
10520 else
10521 {
10522 x = gen_reg_rtx (V2DImode);
10523 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10524 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10525 }
10526
10527 x = gen_rtx_CONST_VECTOR (V4SImode,
10528 gen_rtvec (4, GEN_INT (0x43300000UL),
10529 GEN_INT (0x45300000UL),
10530 const0_rtx, const0_rtx));
10531 exponents = validize_mem (force_const_mem (V4SImode, x));
10532
10533 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10534 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10535
10536 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10537 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10538 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10539 (0x1.0p84 + double(fp_value_hi_xmm)).
10540 Note these exponents differ by 32. */
10541
10542 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10543
10544 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10545 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10546 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10547 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10548 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10549 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10550 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10551 biases = validize_mem (force_const_mem (V2DFmode, biases));
10552 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10553
10554 /* Add the upper and lower DFmode values together. */
10555 if (TARGET_SSE3)
10556 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10557 else
10558 {
10559 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10560 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10561 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10562 }
10563
10564 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10565 }
10566
10567 /* Convert an unsigned SImode value into a DFmode. Only currently used
10568 for SSE, but applicable anywhere. */
10569
10570 void
10571 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10572 {
10573 REAL_VALUE_TYPE TWO31r;
10574 rtx x, fp;
10575
10576 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10577 NULL, 1, OPTAB_DIRECT);
10578
10579 fp = gen_reg_rtx (DFmode);
10580 emit_insn (gen_floatsidf2 (fp, x));
10581
10582 real_ldexp (&TWO31r, &dconst1, 31);
10583 x = const_double_from_real_value (TWO31r, DFmode);
10584
10585 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10586 if (x != target)
10587 emit_move_insn (target, x);
10588 }
10589
10590 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10591 32-bit mode; otherwise we have a direct convert instruction. */
10592
10593 void
10594 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10595 {
10596 REAL_VALUE_TYPE TWO32r;
10597 rtx fp_lo, fp_hi, x;
10598
10599 fp_lo = gen_reg_rtx (DFmode);
10600 fp_hi = gen_reg_rtx (DFmode);
10601
10602 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10603
10604 real_ldexp (&TWO32r, &dconst1, 32);
10605 x = const_double_from_real_value (TWO32r, DFmode);
10606 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10607
10608 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10609
10610 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10611 0, OPTAB_DIRECT);
10612 if (x != target)
10613 emit_move_insn (target, x);
10614 }
10615
10616 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10617 For x86_32, -mfpmath=sse, !optimize_size only. */
10618 void
10619 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10620 {
10621 REAL_VALUE_TYPE ONE16r;
10622 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10623
10624 real_ldexp (&ONE16r, &dconst1, 16);
10625 x = const_double_from_real_value (ONE16r, SFmode);
10626 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10627 NULL, 0, OPTAB_DIRECT);
10628 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10629 NULL, 0, OPTAB_DIRECT);
10630 fp_hi = gen_reg_rtx (SFmode);
10631 fp_lo = gen_reg_rtx (SFmode);
10632 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10633 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10634 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10635 0, OPTAB_DIRECT);
10636 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10637 0, OPTAB_DIRECT);
10638 if (!rtx_equal_p (target, fp_hi))
10639 emit_move_insn (target, fp_hi);
10640 }
10641
10642 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10643 then replicate the value for all elements of the vector
10644 register. */
10645
10646 rtx
10647 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10648 {
10649 rtvec v;
10650 switch (mode)
10651 {
10652 case SImode:
10653 gcc_assert (vect);
10654 v = gen_rtvec (4, value, value, value, value);
10655 return gen_rtx_CONST_VECTOR (V4SImode, v);
10656
10657 case DImode:
10658 gcc_assert (vect);
10659 v = gen_rtvec (2, value, value);
10660 return gen_rtx_CONST_VECTOR (V2DImode, v);
10661
10662 case SFmode:
10663 if (vect)
10664 v = gen_rtvec (4, value, value, value, value);
10665 else
10666 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10667 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10668 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10669
10670 case DFmode:
10671 if (vect)
10672 v = gen_rtvec (2, value, value);
10673 else
10674 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10675 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10676
10677 default:
10678 gcc_unreachable ();
10679 }
10680 }
10681
10682 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10683 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10684 for an SSE register. If VECT is true, then replicate the mask for
10685 all elements of the vector register. If INVERT is true, then create
10686 a mask excluding the sign bit. */
10687
10688 rtx
10689 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10690 {
10691 enum machine_mode vec_mode, imode;
10692 HOST_WIDE_INT hi, lo;
10693 int shift = 63;
10694 rtx v;
10695 rtx mask;
10696
10697 /* Find the sign bit, sign extended to 2*HWI. */
10698 switch (mode)
10699 {
10700 case SImode:
10701 case SFmode:
10702 imode = SImode;
10703 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10704 lo = 0x80000000, hi = lo < 0;
10705 break;
10706
10707 case DImode:
10708 case DFmode:
10709 imode = DImode;
10710 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10711 if (HOST_BITS_PER_WIDE_INT >= 64)
10712 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10713 else
10714 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10715 break;
10716
10717 case TImode:
10718 case TFmode:
10719 imode = TImode;
10720 vec_mode = VOIDmode;
10721 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10722 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10723 break;
10724
10725 default:
10726 gcc_unreachable ();
10727 }
10728
10729 if (invert)
10730 lo = ~lo, hi = ~hi;
10731
10732 /* Force this value into the low part of a fp vector constant. */
10733 mask = immed_double_const (lo, hi, imode);
10734 mask = gen_lowpart (mode, mask);
10735
10736 if (vec_mode == VOIDmode)
10737 return force_reg (mode, mask);
10738
10739 v = ix86_build_const_vector (mode, vect, mask);
10740 return force_reg (vec_mode, v);
10741 }
10742
10743 /* Generate code for floating point ABS or NEG. */
10744
10745 void
10746 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10747 rtx operands[])
10748 {
10749 rtx mask, set, use, clob, dst, src;
10750 bool matching_memory;
10751 bool use_sse = false;
10752 bool vector_mode = VECTOR_MODE_P (mode);
10753 enum machine_mode elt_mode = mode;
10754
10755 if (vector_mode)
10756 {
10757 elt_mode = GET_MODE_INNER (mode);
10758 use_sse = true;
10759 }
10760 else if (mode == TFmode)
10761 use_sse = true;
10762 else if (TARGET_SSE_MATH)
10763 use_sse = SSE_FLOAT_MODE_P (mode);
10764
10765 /* NEG and ABS performed with SSE use bitwise mask operations.
10766 Create the appropriate mask now. */
10767 if (use_sse)
10768 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10769 else
10770 mask = NULL_RTX;
10771
10772 dst = operands[0];
10773 src = operands[1];
10774
10775 /* If the destination is memory, and we don't have matching source
10776 operands or we're using the x87, do things in registers. */
10777 matching_memory = false;
10778 if (MEM_P (dst))
10779 {
10780 if (use_sse && rtx_equal_p (dst, src))
10781 matching_memory = true;
10782 else
10783 dst = gen_reg_rtx (mode);
10784 }
10785 if (MEM_P (src) && !matching_memory)
10786 src = force_reg (mode, src);
10787
10788 if (vector_mode)
10789 {
10790 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10791 set = gen_rtx_SET (VOIDmode, dst, set);
10792 emit_insn (set);
10793 }
10794 else
10795 {
10796 set = gen_rtx_fmt_e (code, mode, src);
10797 set = gen_rtx_SET (VOIDmode, dst, set);
10798 if (mask)
10799 {
10800 use = gen_rtx_USE (VOIDmode, mask);
10801 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10802 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10803 gen_rtvec (3, set, use, clob)));
10804 }
10805 else
10806 emit_insn (set);
10807 }
10808
10809 if (dst != operands[0])
10810 emit_move_insn (operands[0], dst);
10811 }
10812
10813 /* Expand a copysign operation. Special case operand 0 being a constant. */
10814
10815 void
10816 ix86_expand_copysign (rtx operands[])
10817 {
10818 enum machine_mode mode, vmode;
10819 rtx dest, op0, op1, mask, nmask;
10820
10821 dest = operands[0];
10822 op0 = operands[1];
10823 op1 = operands[2];
10824
10825 mode = GET_MODE (dest);
10826 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10827
10828 if (GET_CODE (op0) == CONST_DOUBLE)
10829 {
10830 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10831
10832 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10833 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10834
10835 if (mode == SFmode || mode == DFmode)
10836 {
10837 if (op0 == CONST0_RTX (mode))
10838 op0 = CONST0_RTX (vmode);
10839 else
10840 {
10841 rtvec v;
10842
10843 if (mode == SFmode)
10844 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10845 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10846 else
10847 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10848 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10849 }
10850 }
10851
10852 mask = ix86_build_signbit_mask (mode, 0, 0);
10853
10854 if (mode == SFmode)
10855 copysign_insn = gen_copysignsf3_const;
10856 else if (mode == DFmode)
10857 copysign_insn = gen_copysigndf3_const;
10858 else
10859 copysign_insn = gen_copysigntf3_const;
10860
10861 emit_insn (copysign_insn (dest, op0, op1, mask));
10862 }
10863 else
10864 {
10865 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10866
10867 nmask = ix86_build_signbit_mask (mode, 0, 1);
10868 mask = ix86_build_signbit_mask (mode, 0, 0);
10869
10870 if (mode == SFmode)
10871 copysign_insn = gen_copysignsf3_var;
10872 else if (mode == DFmode)
10873 copysign_insn = gen_copysigndf3_var;
10874 else
10875 copysign_insn = gen_copysigntf3_var;
10876
10877 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
10878 }
10879 }
10880
10881 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10882 be a constant, and so has already been expanded into a vector constant. */
10883
10884 void
10885 ix86_split_copysign_const (rtx operands[])
10886 {
10887 enum machine_mode mode, vmode;
10888 rtx dest, op0, op1, mask, x;
10889
10890 dest = operands[0];
10891 op0 = operands[1];
10892 op1 = operands[2];
10893 mask = operands[3];
10894
10895 mode = GET_MODE (dest);
10896 vmode = GET_MODE (mask);
10897
10898 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10899 x = gen_rtx_AND (vmode, dest, mask);
10900 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10901
10902 if (op0 != CONST0_RTX (vmode))
10903 {
10904 x = gen_rtx_IOR (vmode, dest, op0);
10905 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10906 }
10907 }
10908
10909 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10910 so we have to do two masks. */
10911
10912 void
10913 ix86_split_copysign_var (rtx operands[])
10914 {
10915 enum machine_mode mode, vmode;
10916 rtx dest, scratch, op0, op1, mask, nmask, x;
10917
10918 dest = operands[0];
10919 scratch = operands[1];
10920 op0 = operands[2];
10921 op1 = operands[3];
10922 nmask = operands[4];
10923 mask = operands[5];
10924
10925 mode = GET_MODE (dest);
10926 vmode = GET_MODE (mask);
10927
10928 if (rtx_equal_p (op0, op1))
10929 {
10930 /* Shouldn't happen often (it's useless, obviously), but when it does
10931 we'd generate incorrect code if we continue below. */
10932 emit_move_insn (dest, op0);
10933 return;
10934 }
10935
10936 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10937 {
10938 gcc_assert (REGNO (op1) == REGNO (scratch));
10939
10940 x = gen_rtx_AND (vmode, scratch, mask);
10941 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10942
10943 dest = mask;
10944 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10945 x = gen_rtx_NOT (vmode, dest);
10946 x = gen_rtx_AND (vmode, x, op0);
10947 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10948 }
10949 else
10950 {
10951 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10952 {
10953 x = gen_rtx_AND (vmode, scratch, mask);
10954 }
10955 else /* alternative 2,4 */
10956 {
10957 gcc_assert (REGNO (mask) == REGNO (scratch));
10958 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10959 x = gen_rtx_AND (vmode, scratch, op1);
10960 }
10961 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10962
10963 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10964 {
10965 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10966 x = gen_rtx_AND (vmode, dest, nmask);
10967 }
10968 else /* alternative 3,4 */
10969 {
10970 gcc_assert (REGNO (nmask) == REGNO (dest));
10971 dest = nmask;
10972 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10973 x = gen_rtx_AND (vmode, dest, op0);
10974 }
10975 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10976 }
10977
10978 x = gen_rtx_IOR (vmode, dest, scratch);
10979 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10980 }
10981
10982 /* Return TRUE or FALSE depending on whether the first SET in INSN
10983 has source and destination with matching CC modes, and that the
10984 CC mode is at least as constrained as REQ_MODE. */
10985
10986 int
10987 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10988 {
10989 rtx set;
10990 enum machine_mode set_mode;
10991
10992 set = PATTERN (insn);
10993 if (GET_CODE (set) == PARALLEL)
10994 set = XVECEXP (set, 0, 0);
10995 gcc_assert (GET_CODE (set) == SET);
10996 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10997
10998 set_mode = GET_MODE (SET_DEST (set));
10999 switch (set_mode)
11000 {
11001 case CCNOmode:
11002 if (req_mode != CCNOmode
11003 && (req_mode != CCmode
11004 || XEXP (SET_SRC (set), 1) != const0_rtx))
11005 return 0;
11006 break;
11007 case CCmode:
11008 if (req_mode == CCGCmode)
11009 return 0;
11010 /* FALLTHRU */
11011 case CCGCmode:
11012 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11013 return 0;
11014 /* FALLTHRU */
11015 case CCGOCmode:
11016 if (req_mode == CCZmode)
11017 return 0;
11018 /* FALLTHRU */
11019 case CCZmode:
11020 break;
11021
11022 default:
11023 gcc_unreachable ();
11024 }
11025
11026 return (GET_MODE (SET_SRC (set)) == set_mode);
11027 }
11028
11029 /* Generate insn patterns to do an integer compare of OPERANDS. */
11030
11031 static rtx
11032 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11033 {
11034 enum machine_mode cmpmode;
11035 rtx tmp, flags;
11036
11037 cmpmode = SELECT_CC_MODE (code, op0, op1);
11038 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11039
11040 /* This is very simple, but making the interface the same as in the
11041 FP case makes the rest of the code easier. */
11042 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11043 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11044
11045 /* Return the test that should be put into the flags user, i.e.
11046 the bcc, scc, or cmov instruction. */
11047 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11048 }
11049
11050 /* Figure out whether to use ordered or unordered fp comparisons.
11051 Return the appropriate mode to use. */
11052
11053 enum machine_mode
11054 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11055 {
11056 /* ??? In order to make all comparisons reversible, we do all comparisons
11057 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11058 all forms trapping and nontrapping comparisons, we can make inequality
11059 comparisons trapping again, since it results in better code when using
11060 FCOM based compares. */
11061 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11062 }
11063
11064 enum machine_mode
11065 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11066 {
11067 enum machine_mode mode = GET_MODE (op0);
11068
11069 if (SCALAR_FLOAT_MODE_P (mode))
11070 {
11071 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11072 return ix86_fp_compare_mode (code);
11073 }
11074
11075 switch (code)
11076 {
11077 /* Only zero flag is needed. */
11078 case EQ: /* ZF=0 */
11079 case NE: /* ZF!=0 */
11080 return CCZmode;
11081 /* Codes needing carry flag. */
11082 case GEU: /* CF=0 */
11083 case LTU: /* CF=1 */
11084 /* Detect overflow checks. They need just the carry flag. */
11085 if (GET_CODE (op0) == PLUS
11086 && rtx_equal_p (op1, XEXP (op0, 0)))
11087 return CCCmode;
11088 else
11089 return CCmode;
11090 case GTU: /* CF=0 & ZF=0 */
11091 case LEU: /* CF=1 | ZF=1 */
11092 /* Detect overflow checks. They need just the carry flag. */
11093 if (GET_CODE (op0) == MINUS
11094 && rtx_equal_p (op1, XEXP (op0, 0)))
11095 return CCCmode;
11096 else
11097 return CCmode;
11098 /* Codes possibly doable only with sign flag when
11099 comparing against zero. */
11100 case GE: /* SF=OF or SF=0 */
11101 case LT: /* SF<>OF or SF=1 */
11102 if (op1 == const0_rtx)
11103 return CCGOCmode;
11104 else
11105 /* For other cases Carry flag is not required. */
11106 return CCGCmode;
11107 /* Codes doable only with sign flag when comparing
11108 against zero, but we miss jump instruction for it
11109 so we need to use relational tests against overflow
11110 that thus needs to be zero. */
11111 case GT: /* ZF=0 & SF=OF */
11112 case LE: /* ZF=1 | SF<>OF */
11113 if (op1 == const0_rtx)
11114 return CCNOmode;
11115 else
11116 return CCGCmode;
11117 /* strcmp pattern do (use flags) and combine may ask us for proper
11118 mode. */
11119 case USE:
11120 return CCmode;
11121 default:
11122 gcc_unreachable ();
11123 }
11124 }
11125
11126 /* Return the fixed registers used for condition codes. */
11127
11128 static bool
11129 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11130 {
11131 *p1 = FLAGS_REG;
11132 *p2 = FPSR_REG;
11133 return true;
11134 }
11135
11136 /* If two condition code modes are compatible, return a condition code
11137 mode which is compatible with both. Otherwise, return
11138 VOIDmode. */
11139
11140 static enum machine_mode
11141 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11142 {
11143 if (m1 == m2)
11144 return m1;
11145
11146 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11147 return VOIDmode;
11148
11149 if ((m1 == CCGCmode && m2 == CCGOCmode)
11150 || (m1 == CCGOCmode && m2 == CCGCmode))
11151 return CCGCmode;
11152
11153 switch (m1)
11154 {
11155 default:
11156 gcc_unreachable ();
11157
11158 case CCmode:
11159 case CCGCmode:
11160 case CCGOCmode:
11161 case CCNOmode:
11162 case CCAmode:
11163 case CCCmode:
11164 case CCOmode:
11165 case CCSmode:
11166 case CCZmode:
11167 switch (m2)
11168 {
11169 default:
11170 return VOIDmode;
11171
11172 case CCmode:
11173 case CCGCmode:
11174 case CCGOCmode:
11175 case CCNOmode:
11176 case CCAmode:
11177 case CCCmode:
11178 case CCOmode:
11179 case CCSmode:
11180 case CCZmode:
11181 return CCmode;
11182 }
11183
11184 case CCFPmode:
11185 case CCFPUmode:
11186 /* These are only compatible with themselves, which we already
11187 checked above. */
11188 return VOIDmode;
11189 }
11190 }
11191
11192 /* Split comparison code CODE into comparisons we can do using branch
11193 instructions. BYPASS_CODE is comparison code for branch that will
11194 branch around FIRST_CODE and SECOND_CODE. If some of branches
11195 is not required, set value to UNKNOWN.
11196 We never require more than two branches. */
11197
11198 void
11199 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11200 enum rtx_code *first_code,
11201 enum rtx_code *second_code)
11202 {
11203 *first_code = code;
11204 *bypass_code = UNKNOWN;
11205 *second_code = UNKNOWN;
11206
11207 /* The fcomi comparison sets flags as follows:
11208
11209 cmp ZF PF CF
11210 > 0 0 0
11211 < 0 0 1
11212 = 1 0 0
11213 un 1 1 1 */
11214
11215 switch (code)
11216 {
11217 case GT: /* GTU - CF=0 & ZF=0 */
11218 case GE: /* GEU - CF=0 */
11219 case ORDERED: /* PF=0 */
11220 case UNORDERED: /* PF=1 */
11221 case UNEQ: /* EQ - ZF=1 */
11222 case UNLT: /* LTU - CF=1 */
11223 case UNLE: /* LEU - CF=1 | ZF=1 */
11224 case LTGT: /* EQ - ZF=0 */
11225 break;
11226 case LT: /* LTU - CF=1 - fails on unordered */
11227 *first_code = UNLT;
11228 *bypass_code = UNORDERED;
11229 break;
11230 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11231 *first_code = UNLE;
11232 *bypass_code = UNORDERED;
11233 break;
11234 case EQ: /* EQ - ZF=1 - fails on unordered */
11235 *first_code = UNEQ;
11236 *bypass_code = UNORDERED;
11237 break;
11238 case NE: /* NE - ZF=0 - fails on unordered */
11239 *first_code = LTGT;
11240 *second_code = UNORDERED;
11241 break;
11242 case UNGE: /* GEU - CF=0 - fails on unordered */
11243 *first_code = GE;
11244 *second_code = UNORDERED;
11245 break;
11246 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11247 *first_code = GT;
11248 *second_code = UNORDERED;
11249 break;
11250 default:
11251 gcc_unreachable ();
11252 }
11253 if (!TARGET_IEEE_FP)
11254 {
11255 *second_code = UNKNOWN;
11256 *bypass_code = UNKNOWN;
11257 }
11258 }
11259
11260 /* Return cost of comparison done fcom + arithmetics operations on AX.
11261 All following functions do use number of instructions as a cost metrics.
11262 In future this should be tweaked to compute bytes for optimize_size and
11263 take into account performance of various instructions on various CPUs. */
11264 static int
11265 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11266 {
11267 if (!TARGET_IEEE_FP)
11268 return 4;
11269 /* The cost of code output by ix86_expand_fp_compare. */
11270 switch (code)
11271 {
11272 case UNLE:
11273 case UNLT:
11274 case LTGT:
11275 case GT:
11276 case GE:
11277 case UNORDERED:
11278 case ORDERED:
11279 case UNEQ:
11280 return 4;
11281 break;
11282 case LT:
11283 case NE:
11284 case EQ:
11285 case UNGE:
11286 return 5;
11287 break;
11288 case LE:
11289 case UNGT:
11290 return 6;
11291 break;
11292 default:
11293 gcc_unreachable ();
11294 }
11295 }
11296
11297 /* Return cost of comparison done using fcomi operation.
11298 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11299 static int
11300 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11301 {
11302 enum rtx_code bypass_code, first_code, second_code;
11303 /* Return arbitrarily high cost when instruction is not supported - this
11304 prevents gcc from using it. */
11305 if (!TARGET_CMOVE)
11306 return 1024;
11307 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11308 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11309 }
11310
11311 /* Return cost of comparison done using sahf operation.
11312 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11313 static int
11314 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11315 {
11316 enum rtx_code bypass_code, first_code, second_code;
11317 /* Return arbitrarily high cost when instruction is not preferred - this
11318 avoids gcc from using it. */
11319 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11320 return 1024;
11321 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11322 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11323 }
11324
11325 /* Compute cost of the comparison done using any method.
11326 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11327 static int
11328 ix86_fp_comparison_cost (enum rtx_code code)
11329 {
11330 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11331 int min;
11332
11333 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11334 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11335
11336 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11337 if (min > sahf_cost)
11338 min = sahf_cost;
11339 if (min > fcomi_cost)
11340 min = fcomi_cost;
11341 return min;
11342 }
11343
11344 /* Return true if we should use an FCOMI instruction for this
11345 fp comparison. */
11346
11347 int
11348 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11349 {
11350 enum rtx_code swapped_code = swap_condition (code);
11351
11352 return ((ix86_fp_comparison_cost (code)
11353 == ix86_fp_comparison_fcomi_cost (code))
11354 || (ix86_fp_comparison_cost (swapped_code)
11355 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11356 }
11357
11358 /* Swap, force into registers, or otherwise massage the two operands
11359 to a fp comparison. The operands are updated in place; the new
11360 comparison code is returned. */
11361
11362 static enum rtx_code
11363 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11364 {
11365 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11366 rtx op0 = *pop0, op1 = *pop1;
11367 enum machine_mode op_mode = GET_MODE (op0);
11368 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11369
11370 /* All of the unordered compare instructions only work on registers.
11371 The same is true of the fcomi compare instructions. The XFmode
11372 compare instructions require registers except when comparing
11373 against zero or when converting operand 1 from fixed point to
11374 floating point. */
11375
11376 if (!is_sse
11377 && (fpcmp_mode == CCFPUmode
11378 || (op_mode == XFmode
11379 && ! (standard_80387_constant_p (op0) == 1
11380 || standard_80387_constant_p (op1) == 1)
11381 && GET_CODE (op1) != FLOAT)
11382 || ix86_use_fcomi_compare (code)))
11383 {
11384 op0 = force_reg (op_mode, op0);
11385 op1 = force_reg (op_mode, op1);
11386 }
11387 else
11388 {
11389 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11390 things around if they appear profitable, otherwise force op0
11391 into a register. */
11392
11393 if (standard_80387_constant_p (op0) == 0
11394 || (MEM_P (op0)
11395 && ! (standard_80387_constant_p (op1) == 0
11396 || MEM_P (op1))))
11397 {
11398 rtx tmp;
11399 tmp = op0, op0 = op1, op1 = tmp;
11400 code = swap_condition (code);
11401 }
11402
11403 if (!REG_P (op0))
11404 op0 = force_reg (op_mode, op0);
11405
11406 if (CONSTANT_P (op1))
11407 {
11408 int tmp = standard_80387_constant_p (op1);
11409 if (tmp == 0)
11410 op1 = validize_mem (force_const_mem (op_mode, op1));
11411 else if (tmp == 1)
11412 {
11413 if (TARGET_CMOVE)
11414 op1 = force_reg (op_mode, op1);
11415 }
11416 else
11417 op1 = force_reg (op_mode, op1);
11418 }
11419 }
11420
11421 /* Try to rearrange the comparison to make it cheaper. */
11422 if (ix86_fp_comparison_cost (code)
11423 > ix86_fp_comparison_cost (swap_condition (code))
11424 && (REG_P (op1) || can_create_pseudo_p ()))
11425 {
11426 rtx tmp;
11427 tmp = op0, op0 = op1, op1 = tmp;
11428 code = swap_condition (code);
11429 if (!REG_P (op0))
11430 op0 = force_reg (op_mode, op0);
11431 }
11432
11433 *pop0 = op0;
11434 *pop1 = op1;
11435 return code;
11436 }
11437
11438 /* Convert comparison codes we use to represent FP comparison to integer
11439 code that will result in proper branch. Return UNKNOWN if no such code
11440 is available. */
11441
11442 enum rtx_code
11443 ix86_fp_compare_code_to_integer (enum rtx_code code)
11444 {
11445 switch (code)
11446 {
11447 case GT:
11448 return GTU;
11449 case GE:
11450 return GEU;
11451 case ORDERED:
11452 case UNORDERED:
11453 return code;
11454 break;
11455 case UNEQ:
11456 return EQ;
11457 break;
11458 case UNLT:
11459 return LTU;
11460 break;
11461 case UNLE:
11462 return LEU;
11463 break;
11464 case LTGT:
11465 return NE;
11466 break;
11467 default:
11468 return UNKNOWN;
11469 }
11470 }
11471
11472 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11473
11474 static rtx
11475 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11476 rtx *second_test, rtx *bypass_test)
11477 {
11478 enum machine_mode fpcmp_mode, intcmp_mode;
11479 rtx tmp, tmp2;
11480 int cost = ix86_fp_comparison_cost (code);
11481 enum rtx_code bypass_code, first_code, second_code;
11482
11483 fpcmp_mode = ix86_fp_compare_mode (code);
11484 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11485
11486 if (second_test)
11487 *second_test = NULL_RTX;
11488 if (bypass_test)
11489 *bypass_test = NULL_RTX;
11490
11491 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11492
11493 /* Do fcomi/sahf based test when profitable. */
11494 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11495 && (bypass_code == UNKNOWN || bypass_test)
11496 && (second_code == UNKNOWN || second_test))
11497 {
11498 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11499 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11500 tmp);
11501 if (TARGET_CMOVE)
11502 emit_insn (tmp);
11503 else
11504 {
11505 gcc_assert (TARGET_SAHF);
11506
11507 if (!scratch)
11508 scratch = gen_reg_rtx (HImode);
11509 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11510
11511 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11512 }
11513
11514 /* The FP codes work out to act like unsigned. */
11515 intcmp_mode = fpcmp_mode;
11516 code = first_code;
11517 if (bypass_code != UNKNOWN)
11518 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11519 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11520 const0_rtx);
11521 if (second_code != UNKNOWN)
11522 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11523 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11524 const0_rtx);
11525 }
11526 else
11527 {
11528 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11529 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11530 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11531 if (!scratch)
11532 scratch = gen_reg_rtx (HImode);
11533 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11534
11535 /* In the unordered case, we have to check C2 for NaN's, which
11536 doesn't happen to work out to anything nice combination-wise.
11537 So do some bit twiddling on the value we've got in AH to come
11538 up with an appropriate set of condition codes. */
11539
11540 intcmp_mode = CCNOmode;
11541 switch (code)
11542 {
11543 case GT:
11544 case UNGT:
11545 if (code == GT || !TARGET_IEEE_FP)
11546 {
11547 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11548 code = EQ;
11549 }
11550 else
11551 {
11552 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11553 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11554 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11555 intcmp_mode = CCmode;
11556 code = GEU;
11557 }
11558 break;
11559 case LT:
11560 case UNLT:
11561 if (code == LT && TARGET_IEEE_FP)
11562 {
11563 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11564 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11565 intcmp_mode = CCmode;
11566 code = EQ;
11567 }
11568 else
11569 {
11570 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11571 code = NE;
11572 }
11573 break;
11574 case GE:
11575 case UNGE:
11576 if (code == GE || !TARGET_IEEE_FP)
11577 {
11578 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11579 code = EQ;
11580 }
11581 else
11582 {
11583 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11584 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11585 GEN_INT (0x01)));
11586 code = NE;
11587 }
11588 break;
11589 case LE:
11590 case UNLE:
11591 if (code == LE && TARGET_IEEE_FP)
11592 {
11593 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11594 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11595 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11596 intcmp_mode = CCmode;
11597 code = LTU;
11598 }
11599 else
11600 {
11601 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11602 code = NE;
11603 }
11604 break;
11605 case EQ:
11606 case UNEQ:
11607 if (code == EQ && TARGET_IEEE_FP)
11608 {
11609 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11610 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11611 intcmp_mode = CCmode;
11612 code = EQ;
11613 }
11614 else
11615 {
11616 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11617 code = NE;
11618 break;
11619 }
11620 break;
11621 case NE:
11622 case LTGT:
11623 if (code == NE && TARGET_IEEE_FP)
11624 {
11625 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11626 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11627 GEN_INT (0x40)));
11628 code = NE;
11629 }
11630 else
11631 {
11632 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11633 code = EQ;
11634 }
11635 break;
11636
11637 case UNORDERED:
11638 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11639 code = NE;
11640 break;
11641 case ORDERED:
11642 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11643 code = EQ;
11644 break;
11645
11646 default:
11647 gcc_unreachable ();
11648 }
11649 }
11650
11651 /* Return the test that should be put into the flags user, i.e.
11652 the bcc, scc, or cmov instruction. */
11653 return gen_rtx_fmt_ee (code, VOIDmode,
11654 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11655 const0_rtx);
11656 }
11657
11658 rtx
11659 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11660 {
11661 rtx op0, op1, ret;
11662 op0 = ix86_compare_op0;
11663 op1 = ix86_compare_op1;
11664
11665 if (second_test)
11666 *second_test = NULL_RTX;
11667 if (bypass_test)
11668 *bypass_test = NULL_RTX;
11669
11670 if (ix86_compare_emitted)
11671 {
11672 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11673 ix86_compare_emitted = NULL_RTX;
11674 }
11675 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11676 {
11677 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11678 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11679 second_test, bypass_test);
11680 }
11681 else
11682 ret = ix86_expand_int_compare (code, op0, op1);
11683
11684 return ret;
11685 }
11686
11687 /* Return true if the CODE will result in nontrivial jump sequence. */
11688 bool
11689 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11690 {
11691 enum rtx_code bypass_code, first_code, second_code;
11692 if (!TARGET_CMOVE)
11693 return true;
11694 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11695 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11696 }
11697
11698 void
11699 ix86_expand_branch (enum rtx_code code, rtx label)
11700 {
11701 rtx tmp;
11702
11703 /* If we have emitted a compare insn, go straight to simple.
11704 ix86_expand_compare won't emit anything if ix86_compare_emitted
11705 is non NULL. */
11706 if (ix86_compare_emitted)
11707 goto simple;
11708
11709 switch (GET_MODE (ix86_compare_op0))
11710 {
11711 case QImode:
11712 case HImode:
11713 case SImode:
11714 simple:
11715 tmp = ix86_expand_compare (code, NULL, NULL);
11716 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11717 gen_rtx_LABEL_REF (VOIDmode, label),
11718 pc_rtx);
11719 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11720 return;
11721
11722 case SFmode:
11723 case DFmode:
11724 case XFmode:
11725 {
11726 rtvec vec;
11727 int use_fcomi;
11728 enum rtx_code bypass_code, first_code, second_code;
11729
11730 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11731 &ix86_compare_op1);
11732
11733 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11734
11735 /* Check whether we will use the natural sequence with one jump. If
11736 so, we can expand jump early. Otherwise delay expansion by
11737 creating compound insn to not confuse optimizers. */
11738 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
11739 {
11740 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11741 gen_rtx_LABEL_REF (VOIDmode, label),
11742 pc_rtx, NULL_RTX, NULL_RTX);
11743 }
11744 else
11745 {
11746 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11747 ix86_compare_op0, ix86_compare_op1);
11748 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11749 gen_rtx_LABEL_REF (VOIDmode, label),
11750 pc_rtx);
11751 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11752
11753 use_fcomi = ix86_use_fcomi_compare (code);
11754 vec = rtvec_alloc (3 + !use_fcomi);
11755 RTVEC_ELT (vec, 0) = tmp;
11756 RTVEC_ELT (vec, 1)
11757 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
11758 RTVEC_ELT (vec, 2)
11759 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
11760 if (! use_fcomi)
11761 RTVEC_ELT (vec, 3)
11762 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11763
11764 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11765 }
11766 return;
11767 }
11768
11769 case DImode:
11770 if (TARGET_64BIT)
11771 goto simple;
11772 case TImode:
11773 /* Expand DImode branch into multiple compare+branch. */
11774 {
11775 rtx lo[2], hi[2], label2;
11776 enum rtx_code code1, code2, code3;
11777 enum machine_mode submode;
11778
11779 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11780 {
11781 tmp = ix86_compare_op0;
11782 ix86_compare_op0 = ix86_compare_op1;
11783 ix86_compare_op1 = tmp;
11784 code = swap_condition (code);
11785 }
11786 if (GET_MODE (ix86_compare_op0) == DImode)
11787 {
11788 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11789 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11790 submode = SImode;
11791 }
11792 else
11793 {
11794 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11795 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11796 submode = DImode;
11797 }
11798
11799 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11800 avoid two branches. This costs one extra insn, so disable when
11801 optimizing for size. */
11802
11803 if ((code == EQ || code == NE)
11804 && (!optimize_size
11805 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11806 {
11807 rtx xor0, xor1;
11808
11809 xor1 = hi[0];
11810 if (hi[1] != const0_rtx)
11811 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11812 NULL_RTX, 0, OPTAB_WIDEN);
11813
11814 xor0 = lo[0];
11815 if (lo[1] != const0_rtx)
11816 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11817 NULL_RTX, 0, OPTAB_WIDEN);
11818
11819 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11820 NULL_RTX, 0, OPTAB_WIDEN);
11821
11822 ix86_compare_op0 = tmp;
11823 ix86_compare_op1 = const0_rtx;
11824 ix86_expand_branch (code, label);
11825 return;
11826 }
11827
11828 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11829 op1 is a constant and the low word is zero, then we can just
11830 examine the high word. */
11831
11832 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11833 switch (code)
11834 {
11835 case LT: case LTU: case GE: case GEU:
11836 ix86_compare_op0 = hi[0];
11837 ix86_compare_op1 = hi[1];
11838 ix86_expand_branch (code, label);
11839 return;
11840 default:
11841 break;
11842 }
11843
11844 /* Otherwise, we need two or three jumps. */
11845
11846 label2 = gen_label_rtx ();
11847
11848 code1 = code;
11849 code2 = swap_condition (code);
11850 code3 = unsigned_condition (code);
11851
11852 switch (code)
11853 {
11854 case LT: case GT: case LTU: case GTU:
11855 break;
11856
11857 case LE: code1 = LT; code2 = GT; break;
11858 case GE: code1 = GT; code2 = LT; break;
11859 case LEU: code1 = LTU; code2 = GTU; break;
11860 case GEU: code1 = GTU; code2 = LTU; break;
11861
11862 case EQ: code1 = UNKNOWN; code2 = NE; break;
11863 case NE: code2 = UNKNOWN; break;
11864
11865 default:
11866 gcc_unreachable ();
11867 }
11868
11869 /*
11870 * a < b =>
11871 * if (hi(a) < hi(b)) goto true;
11872 * if (hi(a) > hi(b)) goto false;
11873 * if (lo(a) < lo(b)) goto true;
11874 * false:
11875 */
11876
11877 ix86_compare_op0 = hi[0];
11878 ix86_compare_op1 = hi[1];
11879
11880 if (code1 != UNKNOWN)
11881 ix86_expand_branch (code1, label);
11882 if (code2 != UNKNOWN)
11883 ix86_expand_branch (code2, label2);
11884
11885 ix86_compare_op0 = lo[0];
11886 ix86_compare_op1 = lo[1];
11887 ix86_expand_branch (code3, label);
11888
11889 if (code2 != UNKNOWN)
11890 emit_label (label2);
11891 return;
11892 }
11893
11894 default:
11895 gcc_unreachable ();
11896 }
11897 }
11898
11899 /* Split branch based on floating point condition. */
11900 void
11901 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11902 rtx target1, rtx target2, rtx tmp, rtx pushed)
11903 {
11904 rtx second, bypass;
11905 rtx label = NULL_RTX;
11906 rtx condition;
11907 int bypass_probability = -1, second_probability = -1, probability = -1;
11908 rtx i;
11909
11910 if (target2 != pc_rtx)
11911 {
11912 rtx tmp = target2;
11913 code = reverse_condition_maybe_unordered (code);
11914 target2 = target1;
11915 target1 = tmp;
11916 }
11917
11918 condition = ix86_expand_fp_compare (code, op1, op2,
11919 tmp, &second, &bypass);
11920
11921 /* Remove pushed operand from stack. */
11922 if (pushed)
11923 ix86_free_from_memory (GET_MODE (pushed));
11924
11925 if (split_branch_probability >= 0)
11926 {
11927 /* Distribute the probabilities across the jumps.
11928 Assume the BYPASS and SECOND to be always test
11929 for UNORDERED. */
11930 probability = split_branch_probability;
11931
11932 /* Value of 1 is low enough to make no need for probability
11933 to be updated. Later we may run some experiments and see
11934 if unordered values are more frequent in practice. */
11935 if (bypass)
11936 bypass_probability = 1;
11937 if (second)
11938 second_probability = 1;
11939 }
11940 if (bypass != NULL_RTX)
11941 {
11942 label = gen_label_rtx ();
11943 i = emit_jump_insn (gen_rtx_SET
11944 (VOIDmode, pc_rtx,
11945 gen_rtx_IF_THEN_ELSE (VOIDmode,
11946 bypass,
11947 gen_rtx_LABEL_REF (VOIDmode,
11948 label),
11949 pc_rtx)));
11950 if (bypass_probability >= 0)
11951 REG_NOTES (i)
11952 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11953 GEN_INT (bypass_probability),
11954 REG_NOTES (i));
11955 }
11956 i = emit_jump_insn (gen_rtx_SET
11957 (VOIDmode, pc_rtx,
11958 gen_rtx_IF_THEN_ELSE (VOIDmode,
11959 condition, target1, target2)));
11960 if (probability >= 0)
11961 REG_NOTES (i)
11962 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11963 GEN_INT (probability),
11964 REG_NOTES (i));
11965 if (second != NULL_RTX)
11966 {
11967 i = emit_jump_insn (gen_rtx_SET
11968 (VOIDmode, pc_rtx,
11969 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11970 target2)));
11971 if (second_probability >= 0)
11972 REG_NOTES (i)
11973 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11974 GEN_INT (second_probability),
11975 REG_NOTES (i));
11976 }
11977 if (label != NULL_RTX)
11978 emit_label (label);
11979 }
11980
11981 int
11982 ix86_expand_setcc (enum rtx_code code, rtx dest)
11983 {
11984 rtx ret, tmp, tmpreg, equiv;
11985 rtx second_test, bypass_test;
11986
11987 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11988 return 0; /* FAIL */
11989
11990 gcc_assert (GET_MODE (dest) == QImode);
11991
11992 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11993 PUT_MODE (ret, QImode);
11994
11995 tmp = dest;
11996 tmpreg = dest;
11997
11998 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11999 if (bypass_test || second_test)
12000 {
12001 rtx test = second_test;
12002 int bypass = 0;
12003 rtx tmp2 = gen_reg_rtx (QImode);
12004 if (bypass_test)
12005 {
12006 gcc_assert (!second_test);
12007 test = bypass_test;
12008 bypass = 1;
12009 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12010 }
12011 PUT_MODE (test, QImode);
12012 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12013
12014 if (bypass)
12015 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12016 else
12017 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12018 }
12019
12020 /* Attach a REG_EQUAL note describing the comparison result. */
12021 if (ix86_compare_op0 && ix86_compare_op1)
12022 {
12023 equiv = simplify_gen_relational (code, QImode,
12024 GET_MODE (ix86_compare_op0),
12025 ix86_compare_op0, ix86_compare_op1);
12026 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12027 }
12028
12029 return 1; /* DONE */
12030 }
12031
12032 /* Expand comparison setting or clearing carry flag. Return true when
12033 successful and set pop for the operation. */
12034 static bool
12035 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12036 {
12037 enum machine_mode mode =
12038 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12039
12040 /* Do not handle DImode compares that go through special path. */
12041 if (mode == (TARGET_64BIT ? TImode : DImode))
12042 return false;
12043
12044 if (SCALAR_FLOAT_MODE_P (mode))
12045 {
12046 rtx second_test = NULL, bypass_test = NULL;
12047 rtx compare_op, compare_seq;
12048
12049 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12050
12051 /* Shortcut: following common codes never translate
12052 into carry flag compares. */
12053 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12054 || code == ORDERED || code == UNORDERED)
12055 return false;
12056
12057 /* These comparisons require zero flag; swap operands so they won't. */
12058 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12059 && !TARGET_IEEE_FP)
12060 {
12061 rtx tmp = op0;
12062 op0 = op1;
12063 op1 = tmp;
12064 code = swap_condition (code);
12065 }
12066
12067 /* Try to expand the comparison and verify that we end up with
12068 carry flag based comparison. This fails to be true only when
12069 we decide to expand comparison using arithmetic that is not
12070 too common scenario. */
12071 start_sequence ();
12072 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12073 &second_test, &bypass_test);
12074 compare_seq = get_insns ();
12075 end_sequence ();
12076
12077 if (second_test || bypass_test)
12078 return false;
12079
12080 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12081 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12082 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12083 else
12084 code = GET_CODE (compare_op);
12085
12086 if (code != LTU && code != GEU)
12087 return false;
12088
12089 emit_insn (compare_seq);
12090 *pop = compare_op;
12091 return true;
12092 }
12093
12094 if (!INTEGRAL_MODE_P (mode))
12095 return false;
12096
12097 switch (code)
12098 {
12099 case LTU:
12100 case GEU:
12101 break;
12102
12103 /* Convert a==0 into (unsigned)a<1. */
12104 case EQ:
12105 case NE:
12106 if (op1 != const0_rtx)
12107 return false;
12108 op1 = const1_rtx;
12109 code = (code == EQ ? LTU : GEU);
12110 break;
12111
12112 /* Convert a>b into b<a or a>=b-1. */
12113 case GTU:
12114 case LEU:
12115 if (CONST_INT_P (op1))
12116 {
12117 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12118 /* Bail out on overflow. We still can swap operands but that
12119 would force loading of the constant into register. */
12120 if (op1 == const0_rtx
12121 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12122 return false;
12123 code = (code == GTU ? GEU : LTU);
12124 }
12125 else
12126 {
12127 rtx tmp = op1;
12128 op1 = op0;
12129 op0 = tmp;
12130 code = (code == GTU ? LTU : GEU);
12131 }
12132 break;
12133
12134 /* Convert a>=0 into (unsigned)a<0x80000000. */
12135 case LT:
12136 case GE:
12137 if (mode == DImode || op1 != const0_rtx)
12138 return false;
12139 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12140 code = (code == LT ? GEU : LTU);
12141 break;
12142 case LE:
12143 case GT:
12144 if (mode == DImode || op1 != constm1_rtx)
12145 return false;
12146 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12147 code = (code == LE ? GEU : LTU);
12148 break;
12149
12150 default:
12151 return false;
12152 }
12153 /* Swapping operands may cause constant to appear as first operand. */
12154 if (!nonimmediate_operand (op0, VOIDmode))
12155 {
12156 if (!can_create_pseudo_p ())
12157 return false;
12158 op0 = force_reg (mode, op0);
12159 }
12160 ix86_compare_op0 = op0;
12161 ix86_compare_op1 = op1;
12162 *pop = ix86_expand_compare (code, NULL, NULL);
12163 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12164 return true;
12165 }
12166
12167 int
12168 ix86_expand_int_movcc (rtx operands[])
12169 {
12170 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12171 rtx compare_seq, compare_op;
12172 rtx second_test, bypass_test;
12173 enum machine_mode mode = GET_MODE (operands[0]);
12174 bool sign_bit_compare_p = false;;
12175
12176 start_sequence ();
12177 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12178 compare_seq = get_insns ();
12179 end_sequence ();
12180
12181 compare_code = GET_CODE (compare_op);
12182
12183 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12184 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12185 sign_bit_compare_p = true;
12186
12187 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12188 HImode insns, we'd be swallowed in word prefix ops. */
12189
12190 if ((mode != HImode || TARGET_FAST_PREFIX)
12191 && (mode != (TARGET_64BIT ? TImode : DImode))
12192 && CONST_INT_P (operands[2])
12193 && CONST_INT_P (operands[3]))
12194 {
12195 rtx out = operands[0];
12196 HOST_WIDE_INT ct = INTVAL (operands[2]);
12197 HOST_WIDE_INT cf = INTVAL (operands[3]);
12198 HOST_WIDE_INT diff;
12199
12200 diff = ct - cf;
12201 /* Sign bit compares are better done using shifts than we do by using
12202 sbb. */
12203 if (sign_bit_compare_p
12204 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12205 ix86_compare_op1, &compare_op))
12206 {
12207 /* Detect overlap between destination and compare sources. */
12208 rtx tmp = out;
12209
12210 if (!sign_bit_compare_p)
12211 {
12212 bool fpcmp = false;
12213
12214 compare_code = GET_CODE (compare_op);
12215
12216 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12217 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12218 {
12219 fpcmp = true;
12220 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12221 }
12222
12223 /* To simplify rest of code, restrict to the GEU case. */
12224 if (compare_code == LTU)
12225 {
12226 HOST_WIDE_INT tmp = ct;
12227 ct = cf;
12228 cf = tmp;
12229 compare_code = reverse_condition (compare_code);
12230 code = reverse_condition (code);
12231 }
12232 else
12233 {
12234 if (fpcmp)
12235 PUT_CODE (compare_op,
12236 reverse_condition_maybe_unordered
12237 (GET_CODE (compare_op)));
12238 else
12239 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12240 }
12241 diff = ct - cf;
12242
12243 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12244 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12245 tmp = gen_reg_rtx (mode);
12246
12247 if (mode == DImode)
12248 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12249 else
12250 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12251 }
12252 else
12253 {
12254 if (code == GT || code == GE)
12255 code = reverse_condition (code);
12256 else
12257 {
12258 HOST_WIDE_INT tmp = ct;
12259 ct = cf;
12260 cf = tmp;
12261 diff = ct - cf;
12262 }
12263 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12264 ix86_compare_op1, VOIDmode, 0, -1);
12265 }
12266
12267 if (diff == 1)
12268 {
12269 /*
12270 * cmpl op0,op1
12271 * sbbl dest,dest
12272 * [addl dest, ct]
12273 *
12274 * Size 5 - 8.
12275 */
12276 if (ct)
12277 tmp = expand_simple_binop (mode, PLUS,
12278 tmp, GEN_INT (ct),
12279 copy_rtx (tmp), 1, OPTAB_DIRECT);
12280 }
12281 else if (cf == -1)
12282 {
12283 /*
12284 * cmpl op0,op1
12285 * sbbl dest,dest
12286 * orl $ct, dest
12287 *
12288 * Size 8.
12289 */
12290 tmp = expand_simple_binop (mode, IOR,
12291 tmp, GEN_INT (ct),
12292 copy_rtx (tmp), 1, OPTAB_DIRECT);
12293 }
12294 else if (diff == -1 && ct)
12295 {
12296 /*
12297 * cmpl op0,op1
12298 * sbbl dest,dest
12299 * notl dest
12300 * [addl dest, cf]
12301 *
12302 * Size 8 - 11.
12303 */
12304 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12305 if (cf)
12306 tmp = expand_simple_binop (mode, PLUS,
12307 copy_rtx (tmp), GEN_INT (cf),
12308 copy_rtx (tmp), 1, OPTAB_DIRECT);
12309 }
12310 else
12311 {
12312 /*
12313 * cmpl op0,op1
12314 * sbbl dest,dest
12315 * [notl dest]
12316 * andl cf - ct, dest
12317 * [addl dest, ct]
12318 *
12319 * Size 8 - 11.
12320 */
12321
12322 if (cf == 0)
12323 {
12324 cf = ct;
12325 ct = 0;
12326 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12327 }
12328
12329 tmp = expand_simple_binop (mode, AND,
12330 copy_rtx (tmp),
12331 gen_int_mode (cf - ct, mode),
12332 copy_rtx (tmp), 1, OPTAB_DIRECT);
12333 if (ct)
12334 tmp = expand_simple_binop (mode, PLUS,
12335 copy_rtx (tmp), GEN_INT (ct),
12336 copy_rtx (tmp), 1, OPTAB_DIRECT);
12337 }
12338
12339 if (!rtx_equal_p (tmp, out))
12340 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12341
12342 return 1; /* DONE */
12343 }
12344
12345 if (diff < 0)
12346 {
12347 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12348
12349 HOST_WIDE_INT tmp;
12350 tmp = ct, ct = cf, cf = tmp;
12351 diff = -diff;
12352
12353 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12354 {
12355 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12356
12357 /* We may be reversing unordered compare to normal compare, that
12358 is not valid in general (we may convert non-trapping condition
12359 to trapping one), however on i386 we currently emit all
12360 comparisons unordered. */
12361 compare_code = reverse_condition_maybe_unordered (compare_code);
12362 code = reverse_condition_maybe_unordered (code);
12363 }
12364 else
12365 {
12366 compare_code = reverse_condition (compare_code);
12367 code = reverse_condition (code);
12368 }
12369 }
12370
12371 compare_code = UNKNOWN;
12372 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12373 && CONST_INT_P (ix86_compare_op1))
12374 {
12375 if (ix86_compare_op1 == const0_rtx
12376 && (code == LT || code == GE))
12377 compare_code = code;
12378 else if (ix86_compare_op1 == constm1_rtx)
12379 {
12380 if (code == LE)
12381 compare_code = LT;
12382 else if (code == GT)
12383 compare_code = GE;
12384 }
12385 }
12386
12387 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12388 if (compare_code != UNKNOWN
12389 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12390 && (cf == -1 || ct == -1))
12391 {
12392 /* If lea code below could be used, only optimize
12393 if it results in a 2 insn sequence. */
12394
12395 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12396 || diff == 3 || diff == 5 || diff == 9)
12397 || (compare_code == LT && ct == -1)
12398 || (compare_code == GE && cf == -1))
12399 {
12400 /*
12401 * notl op1 (if necessary)
12402 * sarl $31, op1
12403 * orl cf, op1
12404 */
12405 if (ct != -1)
12406 {
12407 cf = ct;
12408 ct = -1;
12409 code = reverse_condition (code);
12410 }
12411
12412 out = emit_store_flag (out, code, ix86_compare_op0,
12413 ix86_compare_op1, VOIDmode, 0, -1);
12414
12415 out = expand_simple_binop (mode, IOR,
12416 out, GEN_INT (cf),
12417 out, 1, OPTAB_DIRECT);
12418 if (out != operands[0])
12419 emit_move_insn (operands[0], out);
12420
12421 return 1; /* DONE */
12422 }
12423 }
12424
12425
12426 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12427 || diff == 3 || diff == 5 || diff == 9)
12428 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12429 && (mode != DImode
12430 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12431 {
12432 /*
12433 * xorl dest,dest
12434 * cmpl op1,op2
12435 * setcc dest
12436 * lea cf(dest*(ct-cf)),dest
12437 *
12438 * Size 14.
12439 *
12440 * This also catches the degenerate setcc-only case.
12441 */
12442
12443 rtx tmp;
12444 int nops;
12445
12446 out = emit_store_flag (out, code, ix86_compare_op0,
12447 ix86_compare_op1, VOIDmode, 0, 1);
12448
12449 nops = 0;
12450 /* On x86_64 the lea instruction operates on Pmode, so we need
12451 to get arithmetics done in proper mode to match. */
12452 if (diff == 1)
12453 tmp = copy_rtx (out);
12454 else
12455 {
12456 rtx out1;
12457 out1 = copy_rtx (out);
12458 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12459 nops++;
12460 if (diff & 1)
12461 {
12462 tmp = gen_rtx_PLUS (mode, tmp, out1);
12463 nops++;
12464 }
12465 }
12466 if (cf != 0)
12467 {
12468 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12469 nops++;
12470 }
12471 if (!rtx_equal_p (tmp, out))
12472 {
12473 if (nops == 1)
12474 out = force_operand (tmp, copy_rtx (out));
12475 else
12476 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12477 }
12478 if (!rtx_equal_p (out, operands[0]))
12479 emit_move_insn (operands[0], copy_rtx (out));
12480
12481 return 1; /* DONE */
12482 }
12483
12484 /*
12485 * General case: Jumpful:
12486 * xorl dest,dest cmpl op1, op2
12487 * cmpl op1, op2 movl ct, dest
12488 * setcc dest jcc 1f
12489 * decl dest movl cf, dest
12490 * andl (cf-ct),dest 1:
12491 * addl ct,dest
12492 *
12493 * Size 20. Size 14.
12494 *
12495 * This is reasonably steep, but branch mispredict costs are
12496 * high on modern cpus, so consider failing only if optimizing
12497 * for space.
12498 */
12499
12500 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12501 && BRANCH_COST >= 2)
12502 {
12503 if (cf == 0)
12504 {
12505 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12506
12507 cf = ct;
12508 ct = 0;
12509
12510 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12511 {
12512 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12513
12514 /* We may be reversing unordered compare to normal compare,
12515 that is not valid in general (we may convert non-trapping
12516 condition to trapping one), however on i386 we currently
12517 emit all comparisons unordered. */
12518 code = reverse_condition_maybe_unordered (code);
12519 }
12520 else
12521 {
12522 code = reverse_condition (code);
12523 if (compare_code != UNKNOWN)
12524 compare_code = reverse_condition (compare_code);
12525 }
12526 }
12527
12528 if (compare_code != UNKNOWN)
12529 {
12530 /* notl op1 (if needed)
12531 sarl $31, op1
12532 andl (cf-ct), op1
12533 addl ct, op1
12534
12535 For x < 0 (resp. x <= -1) there will be no notl,
12536 so if possible swap the constants to get rid of the
12537 complement.
12538 True/false will be -1/0 while code below (store flag
12539 followed by decrement) is 0/-1, so the constants need
12540 to be exchanged once more. */
12541
12542 if (compare_code == GE || !cf)
12543 {
12544 code = reverse_condition (code);
12545 compare_code = LT;
12546 }
12547 else
12548 {
12549 HOST_WIDE_INT tmp = cf;
12550 cf = ct;
12551 ct = tmp;
12552 }
12553
12554 out = emit_store_flag (out, code, ix86_compare_op0,
12555 ix86_compare_op1, VOIDmode, 0, -1);
12556 }
12557 else
12558 {
12559 out = emit_store_flag (out, code, ix86_compare_op0,
12560 ix86_compare_op1, VOIDmode, 0, 1);
12561
12562 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12563 copy_rtx (out), 1, OPTAB_DIRECT);
12564 }
12565
12566 out = expand_simple_binop (mode, AND, copy_rtx (out),
12567 gen_int_mode (cf - ct, mode),
12568 copy_rtx (out), 1, OPTAB_DIRECT);
12569 if (ct)
12570 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12571 copy_rtx (out), 1, OPTAB_DIRECT);
12572 if (!rtx_equal_p (out, operands[0]))
12573 emit_move_insn (operands[0], copy_rtx (out));
12574
12575 return 1; /* DONE */
12576 }
12577 }
12578
12579 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12580 {
12581 /* Try a few things more with specific constants and a variable. */
12582
12583 optab op;
12584 rtx var, orig_out, out, tmp;
12585
12586 if (BRANCH_COST <= 2)
12587 return 0; /* FAIL */
12588
12589 /* If one of the two operands is an interesting constant, load a
12590 constant with the above and mask it in with a logical operation. */
12591
12592 if (CONST_INT_P (operands[2]))
12593 {
12594 var = operands[3];
12595 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12596 operands[3] = constm1_rtx, op = and_optab;
12597 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12598 operands[3] = const0_rtx, op = ior_optab;
12599 else
12600 return 0; /* FAIL */
12601 }
12602 else if (CONST_INT_P (operands[3]))
12603 {
12604 var = operands[2];
12605 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12606 operands[2] = constm1_rtx, op = and_optab;
12607 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12608 operands[2] = const0_rtx, op = ior_optab;
12609 else
12610 return 0; /* FAIL */
12611 }
12612 else
12613 return 0; /* FAIL */
12614
12615 orig_out = operands[0];
12616 tmp = gen_reg_rtx (mode);
12617 operands[0] = tmp;
12618
12619 /* Recurse to get the constant loaded. */
12620 if (ix86_expand_int_movcc (operands) == 0)
12621 return 0; /* FAIL */
12622
12623 /* Mask in the interesting variable. */
12624 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12625 OPTAB_WIDEN);
12626 if (!rtx_equal_p (out, orig_out))
12627 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12628
12629 return 1; /* DONE */
12630 }
12631
12632 /*
12633 * For comparison with above,
12634 *
12635 * movl cf,dest
12636 * movl ct,tmp
12637 * cmpl op1,op2
12638 * cmovcc tmp,dest
12639 *
12640 * Size 15.
12641 */
12642
12643 if (! nonimmediate_operand (operands[2], mode))
12644 operands[2] = force_reg (mode, operands[2]);
12645 if (! nonimmediate_operand (operands[3], mode))
12646 operands[3] = force_reg (mode, operands[3]);
12647
12648 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12649 {
12650 rtx tmp = gen_reg_rtx (mode);
12651 emit_move_insn (tmp, operands[3]);
12652 operands[3] = tmp;
12653 }
12654 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12655 {
12656 rtx tmp = gen_reg_rtx (mode);
12657 emit_move_insn (tmp, operands[2]);
12658 operands[2] = tmp;
12659 }
12660
12661 if (! register_operand (operands[2], VOIDmode)
12662 && (mode == QImode
12663 || ! register_operand (operands[3], VOIDmode)))
12664 operands[2] = force_reg (mode, operands[2]);
12665
12666 if (mode == QImode
12667 && ! register_operand (operands[3], VOIDmode))
12668 operands[3] = force_reg (mode, operands[3]);
12669
12670 emit_insn (compare_seq);
12671 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12672 gen_rtx_IF_THEN_ELSE (mode,
12673 compare_op, operands[2],
12674 operands[3])));
12675 if (bypass_test)
12676 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12677 gen_rtx_IF_THEN_ELSE (mode,
12678 bypass_test,
12679 copy_rtx (operands[3]),
12680 copy_rtx (operands[0]))));
12681 if (second_test)
12682 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12683 gen_rtx_IF_THEN_ELSE (mode,
12684 second_test,
12685 copy_rtx (operands[2]),
12686 copy_rtx (operands[0]))));
12687
12688 return 1; /* DONE */
12689 }
12690
12691 /* Swap, force into registers, or otherwise massage the two operands
12692 to an sse comparison with a mask result. Thus we differ a bit from
12693 ix86_prepare_fp_compare_args which expects to produce a flags result.
12694
12695 The DEST operand exists to help determine whether to commute commutative
12696 operators. The POP0/POP1 operands are updated in place. The new
12697 comparison code is returned, or UNKNOWN if not implementable. */
12698
12699 static enum rtx_code
12700 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12701 rtx *pop0, rtx *pop1)
12702 {
12703 rtx tmp;
12704
12705 switch (code)
12706 {
12707 case LTGT:
12708 case UNEQ:
12709 /* We have no LTGT as an operator. We could implement it with
12710 NE & ORDERED, but this requires an extra temporary. It's
12711 not clear that it's worth it. */
12712 return UNKNOWN;
12713
12714 case LT:
12715 case LE:
12716 case UNGT:
12717 case UNGE:
12718 /* These are supported directly. */
12719 break;
12720
12721 case EQ:
12722 case NE:
12723 case UNORDERED:
12724 case ORDERED:
12725 /* For commutative operators, try to canonicalize the destination
12726 operand to be first in the comparison - this helps reload to
12727 avoid extra moves. */
12728 if (!dest || !rtx_equal_p (dest, *pop1))
12729 break;
12730 /* FALLTHRU */
12731
12732 case GE:
12733 case GT:
12734 case UNLE:
12735 case UNLT:
12736 /* These are not supported directly. Swap the comparison operands
12737 to transform into something that is supported. */
12738 tmp = *pop0;
12739 *pop0 = *pop1;
12740 *pop1 = tmp;
12741 code = swap_condition (code);
12742 break;
12743
12744 default:
12745 gcc_unreachable ();
12746 }
12747
12748 return code;
12749 }
12750
12751 /* Detect conditional moves that exactly match min/max operational
12752 semantics. Note that this is IEEE safe, as long as we don't
12753 interchange the operands.
12754
12755 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12756 and TRUE if the operation is successful and instructions are emitted. */
12757
12758 static bool
12759 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12760 rtx cmp_op1, rtx if_true, rtx if_false)
12761 {
12762 enum machine_mode mode;
12763 bool is_min;
12764 rtx tmp;
12765
12766 if (code == LT)
12767 ;
12768 else if (code == UNGE)
12769 {
12770 tmp = if_true;
12771 if_true = if_false;
12772 if_false = tmp;
12773 }
12774 else
12775 return false;
12776
12777 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12778 is_min = true;
12779 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12780 is_min = false;
12781 else
12782 return false;
12783
12784 mode = GET_MODE (dest);
12785
12786 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12787 but MODE may be a vector mode and thus not appropriate. */
12788 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12789 {
12790 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12791 rtvec v;
12792
12793 if_true = force_reg (mode, if_true);
12794 v = gen_rtvec (2, if_true, if_false);
12795 tmp = gen_rtx_UNSPEC (mode, v, u);
12796 }
12797 else
12798 {
12799 code = is_min ? SMIN : SMAX;
12800 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12801 }
12802
12803 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12804 return true;
12805 }
12806
12807 /* Expand an sse vector comparison. Return the register with the result. */
12808
12809 static rtx
12810 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12811 rtx op_true, rtx op_false)
12812 {
12813 enum machine_mode mode = GET_MODE (dest);
12814 rtx x;
12815
12816 cmp_op0 = force_reg (mode, cmp_op0);
12817 if (!nonimmediate_operand (cmp_op1, mode))
12818 cmp_op1 = force_reg (mode, cmp_op1);
12819
12820 if (optimize
12821 || reg_overlap_mentioned_p (dest, op_true)
12822 || reg_overlap_mentioned_p (dest, op_false))
12823 dest = gen_reg_rtx (mode);
12824
12825 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12826 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12827
12828 return dest;
12829 }
12830
12831 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12832 operations. This is used for both scalar and vector conditional moves. */
12833
12834 static void
12835 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12836 {
12837 enum machine_mode mode = GET_MODE (dest);
12838 rtx t2, t3, x;
12839
12840 if (op_false == CONST0_RTX (mode))
12841 {
12842 op_true = force_reg (mode, op_true);
12843 x = gen_rtx_AND (mode, cmp, op_true);
12844 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12845 }
12846 else if (op_true == CONST0_RTX (mode))
12847 {
12848 op_false = force_reg (mode, op_false);
12849 x = gen_rtx_NOT (mode, cmp);
12850 x = gen_rtx_AND (mode, x, op_false);
12851 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12852 }
12853 else
12854 {
12855 op_true = force_reg (mode, op_true);
12856 op_false = force_reg (mode, op_false);
12857
12858 t2 = gen_reg_rtx (mode);
12859 if (optimize)
12860 t3 = gen_reg_rtx (mode);
12861 else
12862 t3 = dest;
12863
12864 x = gen_rtx_AND (mode, op_true, cmp);
12865 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12866
12867 x = gen_rtx_NOT (mode, cmp);
12868 x = gen_rtx_AND (mode, x, op_false);
12869 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12870
12871 x = gen_rtx_IOR (mode, t3, t2);
12872 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12873 }
12874 }
12875
12876 /* Expand a floating-point conditional move. Return true if successful. */
12877
12878 int
12879 ix86_expand_fp_movcc (rtx operands[])
12880 {
12881 enum machine_mode mode = GET_MODE (operands[0]);
12882 enum rtx_code code = GET_CODE (operands[1]);
12883 rtx tmp, compare_op, second_test, bypass_test;
12884
12885 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12886 {
12887 enum machine_mode cmode;
12888
12889 /* Since we've no cmove for sse registers, don't force bad register
12890 allocation just to gain access to it. Deny movcc when the
12891 comparison mode doesn't match the move mode. */
12892 cmode = GET_MODE (ix86_compare_op0);
12893 if (cmode == VOIDmode)
12894 cmode = GET_MODE (ix86_compare_op1);
12895 if (cmode != mode)
12896 return 0;
12897
12898 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12899 &ix86_compare_op0,
12900 &ix86_compare_op1);
12901 if (code == UNKNOWN)
12902 return 0;
12903
12904 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12905 ix86_compare_op1, operands[2],
12906 operands[3]))
12907 return 1;
12908
12909 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12910 ix86_compare_op1, operands[2], operands[3]);
12911 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12912 return 1;
12913 }
12914
12915 /* The floating point conditional move instructions don't directly
12916 support conditions resulting from a signed integer comparison. */
12917
12918 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12919
12920 /* The floating point conditional move instructions don't directly
12921 support signed integer comparisons. */
12922
12923 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12924 {
12925 gcc_assert (!second_test && !bypass_test);
12926 tmp = gen_reg_rtx (QImode);
12927 ix86_expand_setcc (code, tmp);
12928 code = NE;
12929 ix86_compare_op0 = tmp;
12930 ix86_compare_op1 = const0_rtx;
12931 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12932 }
12933 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12934 {
12935 tmp = gen_reg_rtx (mode);
12936 emit_move_insn (tmp, operands[3]);
12937 operands[3] = tmp;
12938 }
12939 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12940 {
12941 tmp = gen_reg_rtx (mode);
12942 emit_move_insn (tmp, operands[2]);
12943 operands[2] = tmp;
12944 }
12945
12946 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12947 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12948 operands[2], operands[3])));
12949 if (bypass_test)
12950 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12951 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12952 operands[3], operands[0])));
12953 if (second_test)
12954 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12955 gen_rtx_IF_THEN_ELSE (mode, second_test,
12956 operands[2], operands[0])));
12957
12958 return 1;
12959 }
12960
12961 /* Expand a floating-point vector conditional move; a vcond operation
12962 rather than a movcc operation. */
12963
12964 bool
12965 ix86_expand_fp_vcond (rtx operands[])
12966 {
12967 enum rtx_code code = GET_CODE (operands[3]);
12968 rtx cmp;
12969
12970 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12971 &operands[4], &operands[5]);
12972 if (code == UNKNOWN)
12973 return false;
12974
12975 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12976 operands[5], operands[1], operands[2]))
12977 return true;
12978
12979 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12980 operands[1], operands[2]);
12981 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12982 return true;
12983 }
12984
12985 /* Expand a signed/unsigned integral vector conditional move. */
12986
12987 bool
12988 ix86_expand_int_vcond (rtx operands[])
12989 {
12990 enum machine_mode mode = GET_MODE (operands[0]);
12991 enum rtx_code code = GET_CODE (operands[3]);
12992 bool negate = false;
12993 rtx x, cop0, cop1;
12994
12995 cop0 = operands[4];
12996 cop1 = operands[5];
12997
12998 /* Canonicalize the comparison to EQ, GT, GTU. */
12999 switch (code)
13000 {
13001 case EQ:
13002 case GT:
13003 case GTU:
13004 break;
13005
13006 case NE:
13007 case LE:
13008 case LEU:
13009 code = reverse_condition (code);
13010 negate = true;
13011 break;
13012
13013 case GE:
13014 case GEU:
13015 code = reverse_condition (code);
13016 negate = true;
13017 /* FALLTHRU */
13018
13019 case LT:
13020 case LTU:
13021 code = swap_condition (code);
13022 x = cop0, cop0 = cop1, cop1 = x;
13023 break;
13024
13025 default:
13026 gcc_unreachable ();
13027 }
13028
13029 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13030 if (mode == V2DImode)
13031 {
13032 switch (code)
13033 {
13034 case EQ:
13035 /* SSE4.1 supports EQ. */
13036 if (!TARGET_SSE4_1)
13037 return false;
13038 break;
13039
13040 case GT:
13041 case GTU:
13042 /* SSE4.2 supports GT/GTU. */
13043 if (!TARGET_SSE4_2)
13044 return false;
13045 break;
13046
13047 default:
13048 gcc_unreachable ();
13049 }
13050 }
13051
13052 /* Unsigned parallel compare is not supported by the hardware. Play some
13053 tricks to turn this into a signed comparison against 0. */
13054 if (code == GTU)
13055 {
13056 cop0 = force_reg (mode, cop0);
13057
13058 switch (mode)
13059 {
13060 case V4SImode:
13061 case V2DImode:
13062 {
13063 rtx t1, t2, mask;
13064
13065 /* Perform a parallel modulo subtraction. */
13066 t1 = gen_reg_rtx (mode);
13067 emit_insn ((mode == V4SImode
13068 ? gen_subv4si3
13069 : gen_subv2di3) (t1, cop0, cop1));
13070
13071 /* Extract the original sign bit of op0. */
13072 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13073 true, false);
13074 t2 = gen_reg_rtx (mode);
13075 emit_insn ((mode == V4SImode
13076 ? gen_andv4si3
13077 : gen_andv2di3) (t2, cop0, mask));
13078
13079 /* XOR it back into the result of the subtraction. This results
13080 in the sign bit set iff we saw unsigned underflow. */
13081 x = gen_reg_rtx (mode);
13082 emit_insn ((mode == V4SImode
13083 ? gen_xorv4si3
13084 : gen_xorv2di3) (x, t1, t2));
13085
13086 code = GT;
13087 }
13088 break;
13089
13090 case V16QImode:
13091 case V8HImode:
13092 /* Perform a parallel unsigned saturating subtraction. */
13093 x = gen_reg_rtx (mode);
13094 emit_insn (gen_rtx_SET (VOIDmode, x,
13095 gen_rtx_US_MINUS (mode, cop0, cop1)));
13096
13097 code = EQ;
13098 negate = !negate;
13099 break;
13100
13101 default:
13102 gcc_unreachable ();
13103 }
13104
13105 cop0 = x;
13106 cop1 = CONST0_RTX (mode);
13107 }
13108
13109 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13110 operands[1+negate], operands[2-negate]);
13111
13112 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13113 operands[2-negate]);
13114 return true;
13115 }
13116
13117 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13118 true if we should do zero extension, else sign extension. HIGH_P is
13119 true if we want the N/2 high elements, else the low elements. */
13120
13121 void
13122 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13123 {
13124 enum machine_mode imode = GET_MODE (operands[1]);
13125 rtx (*unpack)(rtx, rtx, rtx);
13126 rtx se, dest;
13127
13128 switch (imode)
13129 {
13130 case V16QImode:
13131 if (high_p)
13132 unpack = gen_vec_interleave_highv16qi;
13133 else
13134 unpack = gen_vec_interleave_lowv16qi;
13135 break;
13136 case V8HImode:
13137 if (high_p)
13138 unpack = gen_vec_interleave_highv8hi;
13139 else
13140 unpack = gen_vec_interleave_lowv8hi;
13141 break;
13142 case V4SImode:
13143 if (high_p)
13144 unpack = gen_vec_interleave_highv4si;
13145 else
13146 unpack = gen_vec_interleave_lowv4si;
13147 break;
13148 default:
13149 gcc_unreachable ();
13150 }
13151
13152 dest = gen_lowpart (imode, operands[0]);
13153
13154 if (unsigned_p)
13155 se = force_reg (imode, CONST0_RTX (imode));
13156 else
13157 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13158 operands[1], pc_rtx, pc_rtx);
13159
13160 emit_insn (unpack (dest, operands[1], se));
13161 }
13162
13163 /* This function performs the same task as ix86_expand_sse_unpack,
13164 but with SSE4.1 instructions. */
13165
13166 void
13167 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13168 {
13169 enum machine_mode imode = GET_MODE (operands[1]);
13170 rtx (*unpack)(rtx, rtx);
13171 rtx src, dest;
13172
13173 switch (imode)
13174 {
13175 case V16QImode:
13176 if (unsigned_p)
13177 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13178 else
13179 unpack = gen_sse4_1_extendv8qiv8hi2;
13180 break;
13181 case V8HImode:
13182 if (unsigned_p)
13183 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13184 else
13185 unpack = gen_sse4_1_extendv4hiv4si2;
13186 break;
13187 case V4SImode:
13188 if (unsigned_p)
13189 unpack = gen_sse4_1_zero_extendv2siv2di2;
13190 else
13191 unpack = gen_sse4_1_extendv2siv2di2;
13192 break;
13193 default:
13194 gcc_unreachable ();
13195 }
13196
13197 dest = operands[0];
13198 if (high_p)
13199 {
13200 /* Shift higher 8 bytes to lower 8 bytes. */
13201 src = gen_reg_rtx (imode);
13202 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13203 gen_lowpart (TImode, operands[1]),
13204 GEN_INT (64)));
13205 }
13206 else
13207 src = operands[1];
13208
13209 emit_insn (unpack (dest, src));
13210 }
13211
13212 /* Expand conditional increment or decrement using adb/sbb instructions.
13213 The default case using setcc followed by the conditional move can be
13214 done by generic code. */
13215 int
13216 ix86_expand_int_addcc (rtx operands[])
13217 {
13218 enum rtx_code code = GET_CODE (operands[1]);
13219 rtx compare_op;
13220 rtx val = const0_rtx;
13221 bool fpcmp = false;
13222 enum machine_mode mode = GET_MODE (operands[0]);
13223
13224 if (operands[3] != const1_rtx
13225 && operands[3] != constm1_rtx)
13226 return 0;
13227 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13228 ix86_compare_op1, &compare_op))
13229 return 0;
13230 code = GET_CODE (compare_op);
13231
13232 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13233 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13234 {
13235 fpcmp = true;
13236 code = ix86_fp_compare_code_to_integer (code);
13237 }
13238
13239 if (code != LTU)
13240 {
13241 val = constm1_rtx;
13242 if (fpcmp)
13243 PUT_CODE (compare_op,
13244 reverse_condition_maybe_unordered
13245 (GET_CODE (compare_op)));
13246 else
13247 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13248 }
13249 PUT_MODE (compare_op, mode);
13250
13251 /* Construct either adc or sbb insn. */
13252 if ((code == LTU) == (operands[3] == constm1_rtx))
13253 {
13254 switch (GET_MODE (operands[0]))
13255 {
13256 case QImode:
13257 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13258 break;
13259 case HImode:
13260 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13261 break;
13262 case SImode:
13263 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13264 break;
13265 case DImode:
13266 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13267 break;
13268 default:
13269 gcc_unreachable ();
13270 }
13271 }
13272 else
13273 {
13274 switch (GET_MODE (operands[0]))
13275 {
13276 case QImode:
13277 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13278 break;
13279 case HImode:
13280 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13281 break;
13282 case SImode:
13283 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13284 break;
13285 case DImode:
13286 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13287 break;
13288 default:
13289 gcc_unreachable ();
13290 }
13291 }
13292 return 1; /* DONE */
13293 }
13294
13295
13296 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13297 works for floating pointer parameters and nonoffsetable memories.
13298 For pushes, it returns just stack offsets; the values will be saved
13299 in the right order. Maximally three parts are generated. */
13300
13301 static int
13302 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13303 {
13304 int size;
13305
13306 if (!TARGET_64BIT)
13307 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13308 else
13309 size = (GET_MODE_SIZE (mode) + 4) / 8;
13310
13311 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13312 gcc_assert (size >= 2 && size <= 3);
13313
13314 /* Optimize constant pool reference to immediates. This is used by fp
13315 moves, that force all constants to memory to allow combining. */
13316 if (MEM_P (operand) && MEM_READONLY_P (operand))
13317 {
13318 rtx tmp = maybe_get_pool_constant (operand);
13319 if (tmp)
13320 operand = tmp;
13321 }
13322
13323 if (MEM_P (operand) && !offsettable_memref_p (operand))
13324 {
13325 /* The only non-offsetable memories we handle are pushes. */
13326 int ok = push_operand (operand, VOIDmode);
13327
13328 gcc_assert (ok);
13329
13330 operand = copy_rtx (operand);
13331 PUT_MODE (operand, Pmode);
13332 parts[0] = parts[1] = parts[2] = operand;
13333 return size;
13334 }
13335
13336 if (GET_CODE (operand) == CONST_VECTOR)
13337 {
13338 enum machine_mode imode = int_mode_for_mode (mode);
13339 /* Caution: if we looked through a constant pool memory above,
13340 the operand may actually have a different mode now. That's
13341 ok, since we want to pun this all the way back to an integer. */
13342 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13343 gcc_assert (operand != NULL);
13344 mode = imode;
13345 }
13346
13347 if (!TARGET_64BIT)
13348 {
13349 if (mode == DImode)
13350 split_di (&operand, 1, &parts[0], &parts[1]);
13351 else
13352 {
13353 if (REG_P (operand))
13354 {
13355 gcc_assert (reload_completed);
13356 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13357 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13358 if (size == 3)
13359 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13360 }
13361 else if (offsettable_memref_p (operand))
13362 {
13363 operand = adjust_address (operand, SImode, 0);
13364 parts[0] = operand;
13365 parts[1] = adjust_address (operand, SImode, 4);
13366 if (size == 3)
13367 parts[2] = adjust_address (operand, SImode, 8);
13368 }
13369 else if (GET_CODE (operand) == CONST_DOUBLE)
13370 {
13371 REAL_VALUE_TYPE r;
13372 long l[4];
13373
13374 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13375 switch (mode)
13376 {
13377 case XFmode:
13378 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13379 parts[2] = gen_int_mode (l[2], SImode);
13380 break;
13381 case DFmode:
13382 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13383 break;
13384 default:
13385 gcc_unreachable ();
13386 }
13387 parts[1] = gen_int_mode (l[1], SImode);
13388 parts[0] = gen_int_mode (l[0], SImode);
13389 }
13390 else
13391 gcc_unreachable ();
13392 }
13393 }
13394 else
13395 {
13396 if (mode == TImode)
13397 split_ti (&operand, 1, &parts[0], &parts[1]);
13398 if (mode == XFmode || mode == TFmode)
13399 {
13400 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13401 if (REG_P (operand))
13402 {
13403 gcc_assert (reload_completed);
13404 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13405 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13406 }
13407 else if (offsettable_memref_p (operand))
13408 {
13409 operand = adjust_address (operand, DImode, 0);
13410 parts[0] = operand;
13411 parts[1] = adjust_address (operand, upper_mode, 8);
13412 }
13413 else if (GET_CODE (operand) == CONST_DOUBLE)
13414 {
13415 REAL_VALUE_TYPE r;
13416 long l[4];
13417
13418 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13419 real_to_target (l, &r, mode);
13420
13421 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13422 if (HOST_BITS_PER_WIDE_INT >= 64)
13423 parts[0]
13424 = gen_int_mode
13425 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13426 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13427 DImode);
13428 else
13429 parts[0] = immed_double_const (l[0], l[1], DImode);
13430
13431 if (upper_mode == SImode)
13432 parts[1] = gen_int_mode (l[2], SImode);
13433 else if (HOST_BITS_PER_WIDE_INT >= 64)
13434 parts[1]
13435 = gen_int_mode
13436 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13437 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13438 DImode);
13439 else
13440 parts[1] = immed_double_const (l[2], l[3], DImode);
13441 }
13442 else
13443 gcc_unreachable ();
13444 }
13445 }
13446
13447 return size;
13448 }
13449
13450 /* Emit insns to perform a move or push of DI, DF, and XF values.
13451 Return false when normal moves are needed; true when all required
13452 insns have been emitted. Operands 2-4 contain the input values
13453 int the correct order; operands 5-7 contain the output values. */
13454
13455 void
13456 ix86_split_long_move (rtx operands[])
13457 {
13458 rtx part[2][3];
13459 int nparts;
13460 int push = 0;
13461 int collisions = 0;
13462 enum machine_mode mode = GET_MODE (operands[0]);
13463
13464 /* The DFmode expanders may ask us to move double.
13465 For 64bit target this is single move. By hiding the fact
13466 here we simplify i386.md splitters. */
13467 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13468 {
13469 /* Optimize constant pool reference to immediates. This is used by
13470 fp moves, that force all constants to memory to allow combining. */
13471
13472 if (MEM_P (operands[1])
13473 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13474 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13475 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13476 if (push_operand (operands[0], VOIDmode))
13477 {
13478 operands[0] = copy_rtx (operands[0]);
13479 PUT_MODE (operands[0], Pmode);
13480 }
13481 else
13482 operands[0] = gen_lowpart (DImode, operands[0]);
13483 operands[1] = gen_lowpart (DImode, operands[1]);
13484 emit_move_insn (operands[0], operands[1]);
13485 return;
13486 }
13487
13488 /* The only non-offsettable memory we handle is push. */
13489 if (push_operand (operands[0], VOIDmode))
13490 push = 1;
13491 else
13492 gcc_assert (!MEM_P (operands[0])
13493 || offsettable_memref_p (operands[0]));
13494
13495 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13496 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13497
13498 /* When emitting push, take care for source operands on the stack. */
13499 if (push && MEM_P (operands[1])
13500 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13501 {
13502 if (nparts == 3)
13503 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13504 XEXP (part[1][2], 0));
13505 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13506 XEXP (part[1][1], 0));
13507 }
13508
13509 /* We need to do copy in the right order in case an address register
13510 of the source overlaps the destination. */
13511 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13512 {
13513 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13514 collisions++;
13515 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13516 collisions++;
13517 if (nparts == 3
13518 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13519 collisions++;
13520
13521 /* Collision in the middle part can be handled by reordering. */
13522 if (collisions == 1 && nparts == 3
13523 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13524 {
13525 rtx tmp;
13526 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13527 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13528 }
13529
13530 /* If there are more collisions, we can't handle it by reordering.
13531 Do an lea to the last part and use only one colliding move. */
13532 else if (collisions > 1)
13533 {
13534 rtx base;
13535
13536 collisions = 1;
13537
13538 base = part[0][nparts - 1];
13539
13540 /* Handle the case when the last part isn't valid for lea.
13541 Happens in 64-bit mode storing the 12-byte XFmode. */
13542 if (GET_MODE (base) != Pmode)
13543 base = gen_rtx_REG (Pmode, REGNO (base));
13544
13545 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13546 part[1][0] = replace_equiv_address (part[1][0], base);
13547 part[1][1] = replace_equiv_address (part[1][1],
13548 plus_constant (base, UNITS_PER_WORD));
13549 if (nparts == 3)
13550 part[1][2] = replace_equiv_address (part[1][2],
13551 plus_constant (base, 8));
13552 }
13553 }
13554
13555 if (push)
13556 {
13557 if (!TARGET_64BIT)
13558 {
13559 if (nparts == 3)
13560 {
13561 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13562 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13563 emit_move_insn (part[0][2], part[1][2]);
13564 }
13565 }
13566 else
13567 {
13568 /* In 64bit mode we don't have 32bit push available. In case this is
13569 register, it is OK - we will just use larger counterpart. We also
13570 retype memory - these comes from attempt to avoid REX prefix on
13571 moving of second half of TFmode value. */
13572 if (GET_MODE (part[1][1]) == SImode)
13573 {
13574 switch (GET_CODE (part[1][1]))
13575 {
13576 case MEM:
13577 part[1][1] = adjust_address (part[1][1], DImode, 0);
13578 break;
13579
13580 case REG:
13581 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13582 break;
13583
13584 default:
13585 gcc_unreachable ();
13586 }
13587
13588 if (GET_MODE (part[1][0]) == SImode)
13589 part[1][0] = part[1][1];
13590 }
13591 }
13592 emit_move_insn (part[0][1], part[1][1]);
13593 emit_move_insn (part[0][0], part[1][0]);
13594 return;
13595 }
13596
13597 /* Choose correct order to not overwrite the source before it is copied. */
13598 if ((REG_P (part[0][0])
13599 && REG_P (part[1][1])
13600 && (REGNO (part[0][0]) == REGNO (part[1][1])
13601 || (nparts == 3
13602 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13603 || (collisions > 0
13604 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13605 {
13606 if (nparts == 3)
13607 {
13608 operands[2] = part[0][2];
13609 operands[3] = part[0][1];
13610 operands[4] = part[0][0];
13611 operands[5] = part[1][2];
13612 operands[6] = part[1][1];
13613 operands[7] = part[1][0];
13614 }
13615 else
13616 {
13617 operands[2] = part[0][1];
13618 operands[3] = part[0][0];
13619 operands[5] = part[1][1];
13620 operands[6] = part[1][0];
13621 }
13622 }
13623 else
13624 {
13625 if (nparts == 3)
13626 {
13627 operands[2] = part[0][0];
13628 operands[3] = part[0][1];
13629 operands[4] = part[0][2];
13630 operands[5] = part[1][0];
13631 operands[6] = part[1][1];
13632 operands[7] = part[1][2];
13633 }
13634 else
13635 {
13636 operands[2] = part[0][0];
13637 operands[3] = part[0][1];
13638 operands[5] = part[1][0];
13639 operands[6] = part[1][1];
13640 }
13641 }
13642
13643 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13644 if (optimize_size)
13645 {
13646 if (CONST_INT_P (operands[5])
13647 && operands[5] != const0_rtx
13648 && REG_P (operands[2]))
13649 {
13650 if (CONST_INT_P (operands[6])
13651 && INTVAL (operands[6]) == INTVAL (operands[5]))
13652 operands[6] = operands[2];
13653
13654 if (nparts == 3
13655 && CONST_INT_P (operands[7])
13656 && INTVAL (operands[7]) == INTVAL (operands[5]))
13657 operands[7] = operands[2];
13658 }
13659
13660 if (nparts == 3
13661 && CONST_INT_P (operands[6])
13662 && operands[6] != const0_rtx
13663 && REG_P (operands[3])
13664 && CONST_INT_P (operands[7])
13665 && INTVAL (operands[7]) == INTVAL (operands[6]))
13666 operands[7] = operands[3];
13667 }
13668
13669 emit_move_insn (operands[2], operands[5]);
13670 emit_move_insn (operands[3], operands[6]);
13671 if (nparts == 3)
13672 emit_move_insn (operands[4], operands[7]);
13673
13674 return;
13675 }
13676
13677 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13678 left shift by a constant, either using a single shift or
13679 a sequence of add instructions. */
13680
13681 static void
13682 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13683 {
13684 if (count == 1)
13685 {
13686 emit_insn ((mode == DImode
13687 ? gen_addsi3
13688 : gen_adddi3) (operand, operand, operand));
13689 }
13690 else if (!optimize_size
13691 && count * ix86_cost->add <= ix86_cost->shift_const)
13692 {
13693 int i;
13694 for (i=0; i<count; i++)
13695 {
13696 emit_insn ((mode == DImode
13697 ? gen_addsi3
13698 : gen_adddi3) (operand, operand, operand));
13699 }
13700 }
13701 else
13702 emit_insn ((mode == DImode
13703 ? gen_ashlsi3
13704 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13705 }
13706
13707 void
13708 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13709 {
13710 rtx low[2], high[2];
13711 int count;
13712 const int single_width = mode == DImode ? 32 : 64;
13713
13714 if (CONST_INT_P (operands[2]))
13715 {
13716 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13717 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13718
13719 if (count >= single_width)
13720 {
13721 emit_move_insn (high[0], low[1]);
13722 emit_move_insn (low[0], const0_rtx);
13723
13724 if (count > single_width)
13725 ix86_expand_ashl_const (high[0], count - single_width, mode);
13726 }
13727 else
13728 {
13729 if (!rtx_equal_p (operands[0], operands[1]))
13730 emit_move_insn (operands[0], operands[1]);
13731 emit_insn ((mode == DImode
13732 ? gen_x86_shld_1
13733 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13734 ix86_expand_ashl_const (low[0], count, mode);
13735 }
13736 return;
13737 }
13738
13739 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13740
13741 if (operands[1] == const1_rtx)
13742 {
13743 /* Assuming we've chosen a QImode capable registers, then 1 << N
13744 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13745 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13746 {
13747 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13748
13749 ix86_expand_clear (low[0]);
13750 ix86_expand_clear (high[0]);
13751 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13752
13753 d = gen_lowpart (QImode, low[0]);
13754 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13755 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13756 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13757
13758 d = gen_lowpart (QImode, high[0]);
13759 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13760 s = gen_rtx_NE (QImode, flags, const0_rtx);
13761 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13762 }
13763
13764 /* Otherwise, we can get the same results by manually performing
13765 a bit extract operation on bit 5/6, and then performing the two
13766 shifts. The two methods of getting 0/1 into low/high are exactly
13767 the same size. Avoiding the shift in the bit extract case helps
13768 pentium4 a bit; no one else seems to care much either way. */
13769 else
13770 {
13771 rtx x;
13772
13773 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13774 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13775 else
13776 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13777 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13778
13779 emit_insn ((mode == DImode
13780 ? gen_lshrsi3
13781 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13782 emit_insn ((mode == DImode
13783 ? gen_andsi3
13784 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13785 emit_move_insn (low[0], high[0]);
13786 emit_insn ((mode == DImode
13787 ? gen_xorsi3
13788 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13789 }
13790
13791 emit_insn ((mode == DImode
13792 ? gen_ashlsi3
13793 : gen_ashldi3) (low[0], low[0], operands[2]));
13794 emit_insn ((mode == DImode
13795 ? gen_ashlsi3
13796 : gen_ashldi3) (high[0], high[0], operands[2]));
13797 return;
13798 }
13799
13800 if (operands[1] == constm1_rtx)
13801 {
13802 /* For -1 << N, we can avoid the shld instruction, because we
13803 know that we're shifting 0...31/63 ones into a -1. */
13804 emit_move_insn (low[0], constm1_rtx);
13805 if (optimize_size)
13806 emit_move_insn (high[0], low[0]);
13807 else
13808 emit_move_insn (high[0], constm1_rtx);
13809 }
13810 else
13811 {
13812 if (!rtx_equal_p (operands[0], operands[1]))
13813 emit_move_insn (operands[0], operands[1]);
13814
13815 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13816 emit_insn ((mode == DImode
13817 ? gen_x86_shld_1
13818 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13819 }
13820
13821 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13822
13823 if (TARGET_CMOVE && scratch)
13824 {
13825 ix86_expand_clear (scratch);
13826 emit_insn ((mode == DImode
13827 ? gen_x86_shift_adj_1
13828 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13829 }
13830 else
13831 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13832 }
13833
13834 void
13835 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13836 {
13837 rtx low[2], high[2];
13838 int count;
13839 const int single_width = mode == DImode ? 32 : 64;
13840
13841 if (CONST_INT_P (operands[2]))
13842 {
13843 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13844 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13845
13846 if (count == single_width * 2 - 1)
13847 {
13848 emit_move_insn (high[0], high[1]);
13849 emit_insn ((mode == DImode
13850 ? gen_ashrsi3
13851 : gen_ashrdi3) (high[0], high[0],
13852 GEN_INT (single_width - 1)));
13853 emit_move_insn (low[0], high[0]);
13854
13855 }
13856 else if (count >= single_width)
13857 {
13858 emit_move_insn (low[0], high[1]);
13859 emit_move_insn (high[0], low[0]);
13860 emit_insn ((mode == DImode
13861 ? gen_ashrsi3
13862 : gen_ashrdi3) (high[0], high[0],
13863 GEN_INT (single_width - 1)));
13864 if (count > single_width)
13865 emit_insn ((mode == DImode
13866 ? gen_ashrsi3
13867 : gen_ashrdi3) (low[0], low[0],
13868 GEN_INT (count - single_width)));
13869 }
13870 else
13871 {
13872 if (!rtx_equal_p (operands[0], operands[1]))
13873 emit_move_insn (operands[0], operands[1]);
13874 emit_insn ((mode == DImode
13875 ? gen_x86_shrd_1
13876 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13877 emit_insn ((mode == DImode
13878 ? gen_ashrsi3
13879 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13880 }
13881 }
13882 else
13883 {
13884 if (!rtx_equal_p (operands[0], operands[1]))
13885 emit_move_insn (operands[0], operands[1]);
13886
13887 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13888
13889 emit_insn ((mode == DImode
13890 ? gen_x86_shrd_1
13891 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13892 emit_insn ((mode == DImode
13893 ? gen_ashrsi3
13894 : gen_ashrdi3) (high[0], high[0], operands[2]));
13895
13896 if (TARGET_CMOVE && scratch)
13897 {
13898 emit_move_insn (scratch, high[0]);
13899 emit_insn ((mode == DImode
13900 ? gen_ashrsi3
13901 : gen_ashrdi3) (scratch, scratch,
13902 GEN_INT (single_width - 1)));
13903 emit_insn ((mode == DImode
13904 ? gen_x86_shift_adj_1
13905 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13906 scratch));
13907 }
13908 else
13909 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13910 }
13911 }
13912
13913 void
13914 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13915 {
13916 rtx low[2], high[2];
13917 int count;
13918 const int single_width = mode == DImode ? 32 : 64;
13919
13920 if (CONST_INT_P (operands[2]))
13921 {
13922 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13923 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13924
13925 if (count >= single_width)
13926 {
13927 emit_move_insn (low[0], high[1]);
13928 ix86_expand_clear (high[0]);
13929
13930 if (count > single_width)
13931 emit_insn ((mode == DImode
13932 ? gen_lshrsi3
13933 : gen_lshrdi3) (low[0], low[0],
13934 GEN_INT (count - single_width)));
13935 }
13936 else
13937 {
13938 if (!rtx_equal_p (operands[0], operands[1]))
13939 emit_move_insn (operands[0], operands[1]);
13940 emit_insn ((mode == DImode
13941 ? gen_x86_shrd_1
13942 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13943 emit_insn ((mode == DImode
13944 ? gen_lshrsi3
13945 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13946 }
13947 }
13948 else
13949 {
13950 if (!rtx_equal_p (operands[0], operands[1]))
13951 emit_move_insn (operands[0], operands[1]);
13952
13953 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13954
13955 emit_insn ((mode == DImode
13956 ? gen_x86_shrd_1
13957 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13958 emit_insn ((mode == DImode
13959 ? gen_lshrsi3
13960 : gen_lshrdi3) (high[0], high[0], operands[2]));
13961
13962 /* Heh. By reversing the arguments, we can reuse this pattern. */
13963 if (TARGET_CMOVE && scratch)
13964 {
13965 ix86_expand_clear (scratch);
13966 emit_insn ((mode == DImode
13967 ? gen_x86_shift_adj_1
13968 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13969 scratch));
13970 }
13971 else
13972 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13973 }
13974 }
13975
13976 /* Predict just emitted jump instruction to be taken with probability PROB. */
13977 static void
13978 predict_jump (int prob)
13979 {
13980 rtx insn = get_last_insn ();
13981 gcc_assert (JUMP_P (insn));
13982 REG_NOTES (insn)
13983 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13984 GEN_INT (prob),
13985 REG_NOTES (insn));
13986 }
13987
13988 /* Helper function for the string operations below. Dest VARIABLE whether
13989 it is aligned to VALUE bytes. If true, jump to the label. */
13990 static rtx
13991 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13992 {
13993 rtx label = gen_label_rtx ();
13994 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13995 if (GET_MODE (variable) == DImode)
13996 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13997 else
13998 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13999 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
14000 1, label);
14001 if (epilogue)
14002 predict_jump (REG_BR_PROB_BASE * 50 / 100);
14003 else
14004 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14005 return label;
14006 }
14007
14008 /* Adjust COUNTER by the VALUE. */
14009 static void
14010 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14011 {
14012 if (GET_MODE (countreg) == DImode)
14013 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14014 else
14015 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14016 }
14017
14018 /* Zero extend possibly SImode EXP to Pmode register. */
14019 rtx
14020 ix86_zero_extend_to_Pmode (rtx exp)
14021 {
14022 rtx r;
14023 if (GET_MODE (exp) == VOIDmode)
14024 return force_reg (Pmode, exp);
14025 if (GET_MODE (exp) == Pmode)
14026 return copy_to_mode_reg (Pmode, exp);
14027 r = gen_reg_rtx (Pmode);
14028 emit_insn (gen_zero_extendsidi2 (r, exp));
14029 return r;
14030 }
14031
14032 /* Divide COUNTREG by SCALE. */
14033 static rtx
14034 scale_counter (rtx countreg, int scale)
14035 {
14036 rtx sc;
14037 rtx piece_size_mask;
14038
14039 if (scale == 1)
14040 return countreg;
14041 if (CONST_INT_P (countreg))
14042 return GEN_INT (INTVAL (countreg) / scale);
14043 gcc_assert (REG_P (countreg));
14044
14045 piece_size_mask = GEN_INT (scale - 1);
14046 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14047 GEN_INT (exact_log2 (scale)),
14048 NULL, 1, OPTAB_DIRECT);
14049 return sc;
14050 }
14051
14052 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14053 DImode for constant loop counts. */
14054
14055 static enum machine_mode
14056 counter_mode (rtx count_exp)
14057 {
14058 if (GET_MODE (count_exp) != VOIDmode)
14059 return GET_MODE (count_exp);
14060 if (GET_CODE (count_exp) != CONST_INT)
14061 return Pmode;
14062 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14063 return DImode;
14064 return SImode;
14065 }
14066
14067 /* When SRCPTR is non-NULL, output simple loop to move memory
14068 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14069 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14070 equivalent loop to set memory by VALUE (supposed to be in MODE).
14071
14072 The size is rounded down to whole number of chunk size moved at once.
14073 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14074
14075
14076 static void
14077 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14078 rtx destptr, rtx srcptr, rtx value,
14079 rtx count, enum machine_mode mode, int unroll,
14080 int expected_size)
14081 {
14082 rtx out_label, top_label, iter, tmp;
14083 enum machine_mode iter_mode = counter_mode (count);
14084 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14085 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14086 rtx size;
14087 rtx x_addr;
14088 rtx y_addr;
14089 int i;
14090
14091 top_label = gen_label_rtx ();
14092 out_label = gen_label_rtx ();
14093 iter = gen_reg_rtx (iter_mode);
14094
14095 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14096 NULL, 1, OPTAB_DIRECT);
14097 /* Those two should combine. */
14098 if (piece_size == const1_rtx)
14099 {
14100 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14101 true, out_label);
14102 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14103 }
14104 emit_move_insn (iter, const0_rtx);
14105
14106 emit_label (top_label);
14107
14108 tmp = convert_modes (Pmode, iter_mode, iter, true);
14109 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14110 destmem = change_address (destmem, mode, x_addr);
14111
14112 if (srcmem)
14113 {
14114 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14115 srcmem = change_address (srcmem, mode, y_addr);
14116
14117 /* When unrolling for chips that reorder memory reads and writes,
14118 we can save registers by using single temporary.
14119 Also using 4 temporaries is overkill in 32bit mode. */
14120 if (!TARGET_64BIT && 0)
14121 {
14122 for (i = 0; i < unroll; i++)
14123 {
14124 if (i)
14125 {
14126 destmem =
14127 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14128 srcmem =
14129 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14130 }
14131 emit_move_insn (destmem, srcmem);
14132 }
14133 }
14134 else
14135 {
14136 rtx tmpreg[4];
14137 gcc_assert (unroll <= 4);
14138 for (i = 0; i < unroll; i++)
14139 {
14140 tmpreg[i] = gen_reg_rtx (mode);
14141 if (i)
14142 {
14143 srcmem =
14144 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14145 }
14146 emit_move_insn (tmpreg[i], srcmem);
14147 }
14148 for (i = 0; i < unroll; i++)
14149 {
14150 if (i)
14151 {
14152 destmem =
14153 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14154 }
14155 emit_move_insn (destmem, tmpreg[i]);
14156 }
14157 }
14158 }
14159 else
14160 for (i = 0; i < unroll; i++)
14161 {
14162 if (i)
14163 destmem =
14164 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14165 emit_move_insn (destmem, value);
14166 }
14167
14168 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14169 true, OPTAB_LIB_WIDEN);
14170 if (tmp != iter)
14171 emit_move_insn (iter, tmp);
14172
14173 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14174 true, top_label);
14175 if (expected_size != -1)
14176 {
14177 expected_size /= GET_MODE_SIZE (mode) * unroll;
14178 if (expected_size == 0)
14179 predict_jump (0);
14180 else if (expected_size > REG_BR_PROB_BASE)
14181 predict_jump (REG_BR_PROB_BASE - 1);
14182 else
14183 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14184 }
14185 else
14186 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14187 iter = ix86_zero_extend_to_Pmode (iter);
14188 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14189 true, OPTAB_LIB_WIDEN);
14190 if (tmp != destptr)
14191 emit_move_insn (destptr, tmp);
14192 if (srcptr)
14193 {
14194 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14195 true, OPTAB_LIB_WIDEN);
14196 if (tmp != srcptr)
14197 emit_move_insn (srcptr, tmp);
14198 }
14199 emit_label (out_label);
14200 }
14201
14202 /* Output "rep; mov" instruction.
14203 Arguments have same meaning as for previous function */
14204 static void
14205 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14206 rtx destptr, rtx srcptr,
14207 rtx count,
14208 enum machine_mode mode)
14209 {
14210 rtx destexp;
14211 rtx srcexp;
14212 rtx countreg;
14213
14214 /* If the size is known, it is shorter to use rep movs. */
14215 if (mode == QImode && CONST_INT_P (count)
14216 && !(INTVAL (count) & 3))
14217 mode = SImode;
14218
14219 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14220 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14221 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14222 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14223 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14224 if (mode != QImode)
14225 {
14226 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14227 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14228 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14229 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14230 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14231 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14232 }
14233 else
14234 {
14235 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14236 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14237 }
14238 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14239 destexp, srcexp));
14240 }
14241
14242 /* Output "rep; stos" instruction.
14243 Arguments have same meaning as for previous function */
14244 static void
14245 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14246 rtx count,
14247 enum machine_mode mode)
14248 {
14249 rtx destexp;
14250 rtx countreg;
14251
14252 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14253 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14254 value = force_reg (mode, gen_lowpart (mode, value));
14255 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14256 if (mode != QImode)
14257 {
14258 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14259 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14260 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14261 }
14262 else
14263 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14264 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14265 }
14266
14267 static void
14268 emit_strmov (rtx destmem, rtx srcmem,
14269 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14270 {
14271 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14272 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14273 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14274 }
14275
14276 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14277 static void
14278 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14279 rtx destptr, rtx srcptr, rtx count, int max_size)
14280 {
14281 rtx src, dest;
14282 if (CONST_INT_P (count))
14283 {
14284 HOST_WIDE_INT countval = INTVAL (count);
14285 int offset = 0;
14286
14287 if ((countval & 0x10) && max_size > 16)
14288 {
14289 if (TARGET_64BIT)
14290 {
14291 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14292 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14293 }
14294 else
14295 gcc_unreachable ();
14296 offset += 16;
14297 }
14298 if ((countval & 0x08) && max_size > 8)
14299 {
14300 if (TARGET_64BIT)
14301 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14302 else
14303 {
14304 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14305 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14306 }
14307 offset += 8;
14308 }
14309 if ((countval & 0x04) && max_size > 4)
14310 {
14311 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14312 offset += 4;
14313 }
14314 if ((countval & 0x02) && max_size > 2)
14315 {
14316 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14317 offset += 2;
14318 }
14319 if ((countval & 0x01) && max_size > 1)
14320 {
14321 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14322 offset += 1;
14323 }
14324 return;
14325 }
14326 if (max_size > 8)
14327 {
14328 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14329 count, 1, OPTAB_DIRECT);
14330 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14331 count, QImode, 1, 4);
14332 return;
14333 }
14334
14335 /* When there are stringops, we can cheaply increase dest and src pointers.
14336 Otherwise we save code size by maintaining offset (zero is readily
14337 available from preceding rep operation) and using x86 addressing modes.
14338 */
14339 if (TARGET_SINGLE_STRINGOP)
14340 {
14341 if (max_size > 4)
14342 {
14343 rtx label = ix86_expand_aligntest (count, 4, true);
14344 src = change_address (srcmem, SImode, srcptr);
14345 dest = change_address (destmem, SImode, destptr);
14346 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14347 emit_label (label);
14348 LABEL_NUSES (label) = 1;
14349 }
14350 if (max_size > 2)
14351 {
14352 rtx label = ix86_expand_aligntest (count, 2, true);
14353 src = change_address (srcmem, HImode, srcptr);
14354 dest = change_address (destmem, HImode, destptr);
14355 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14356 emit_label (label);
14357 LABEL_NUSES (label) = 1;
14358 }
14359 if (max_size > 1)
14360 {
14361 rtx label = ix86_expand_aligntest (count, 1, true);
14362 src = change_address (srcmem, QImode, srcptr);
14363 dest = change_address (destmem, QImode, destptr);
14364 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14365 emit_label (label);
14366 LABEL_NUSES (label) = 1;
14367 }
14368 }
14369 else
14370 {
14371 rtx offset = force_reg (Pmode, const0_rtx);
14372 rtx tmp;
14373
14374 if (max_size > 4)
14375 {
14376 rtx label = ix86_expand_aligntest (count, 4, true);
14377 src = change_address (srcmem, SImode, srcptr);
14378 dest = change_address (destmem, SImode, destptr);
14379 emit_move_insn (dest, src);
14380 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14381 true, OPTAB_LIB_WIDEN);
14382 if (tmp != offset)
14383 emit_move_insn (offset, tmp);
14384 emit_label (label);
14385 LABEL_NUSES (label) = 1;
14386 }
14387 if (max_size > 2)
14388 {
14389 rtx label = ix86_expand_aligntest (count, 2, true);
14390 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14391 src = change_address (srcmem, HImode, tmp);
14392 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14393 dest = change_address (destmem, HImode, tmp);
14394 emit_move_insn (dest, src);
14395 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14396 true, OPTAB_LIB_WIDEN);
14397 if (tmp != offset)
14398 emit_move_insn (offset, tmp);
14399 emit_label (label);
14400 LABEL_NUSES (label) = 1;
14401 }
14402 if (max_size > 1)
14403 {
14404 rtx label = ix86_expand_aligntest (count, 1, true);
14405 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14406 src = change_address (srcmem, QImode, tmp);
14407 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14408 dest = change_address (destmem, QImode, tmp);
14409 emit_move_insn (dest, src);
14410 emit_label (label);
14411 LABEL_NUSES (label) = 1;
14412 }
14413 }
14414 }
14415
14416 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14417 static void
14418 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14419 rtx count, int max_size)
14420 {
14421 count =
14422 expand_simple_binop (counter_mode (count), AND, count,
14423 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14424 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14425 gen_lowpart (QImode, value), count, QImode,
14426 1, max_size / 2);
14427 }
14428
14429 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14430 static void
14431 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14432 {
14433 rtx dest;
14434
14435 if (CONST_INT_P (count))
14436 {
14437 HOST_WIDE_INT countval = INTVAL (count);
14438 int offset = 0;
14439
14440 if ((countval & 0x10) && max_size > 16)
14441 {
14442 if (TARGET_64BIT)
14443 {
14444 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14445 emit_insn (gen_strset (destptr, dest, value));
14446 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14447 emit_insn (gen_strset (destptr, dest, value));
14448 }
14449 else
14450 gcc_unreachable ();
14451 offset += 16;
14452 }
14453 if ((countval & 0x08) && max_size > 8)
14454 {
14455 if (TARGET_64BIT)
14456 {
14457 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14458 emit_insn (gen_strset (destptr, dest, value));
14459 }
14460 else
14461 {
14462 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14463 emit_insn (gen_strset (destptr, dest, value));
14464 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14465 emit_insn (gen_strset (destptr, dest, value));
14466 }
14467 offset += 8;
14468 }
14469 if ((countval & 0x04) && max_size > 4)
14470 {
14471 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14472 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14473 offset += 4;
14474 }
14475 if ((countval & 0x02) && max_size > 2)
14476 {
14477 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14478 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14479 offset += 2;
14480 }
14481 if ((countval & 0x01) && max_size > 1)
14482 {
14483 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14484 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14485 offset += 1;
14486 }
14487 return;
14488 }
14489 if (max_size > 32)
14490 {
14491 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14492 return;
14493 }
14494 if (max_size > 16)
14495 {
14496 rtx label = ix86_expand_aligntest (count, 16, true);
14497 if (TARGET_64BIT)
14498 {
14499 dest = change_address (destmem, DImode, destptr);
14500 emit_insn (gen_strset (destptr, dest, value));
14501 emit_insn (gen_strset (destptr, dest, value));
14502 }
14503 else
14504 {
14505 dest = change_address (destmem, SImode, destptr);
14506 emit_insn (gen_strset (destptr, dest, value));
14507 emit_insn (gen_strset (destptr, dest, value));
14508 emit_insn (gen_strset (destptr, dest, value));
14509 emit_insn (gen_strset (destptr, dest, value));
14510 }
14511 emit_label (label);
14512 LABEL_NUSES (label) = 1;
14513 }
14514 if (max_size > 8)
14515 {
14516 rtx label = ix86_expand_aligntest (count, 8, true);
14517 if (TARGET_64BIT)
14518 {
14519 dest = change_address (destmem, DImode, destptr);
14520 emit_insn (gen_strset (destptr, dest, value));
14521 }
14522 else
14523 {
14524 dest = change_address (destmem, SImode, destptr);
14525 emit_insn (gen_strset (destptr, dest, value));
14526 emit_insn (gen_strset (destptr, dest, value));
14527 }
14528 emit_label (label);
14529 LABEL_NUSES (label) = 1;
14530 }
14531 if (max_size > 4)
14532 {
14533 rtx label = ix86_expand_aligntest (count, 4, true);
14534 dest = change_address (destmem, SImode, destptr);
14535 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14536 emit_label (label);
14537 LABEL_NUSES (label) = 1;
14538 }
14539 if (max_size > 2)
14540 {
14541 rtx label = ix86_expand_aligntest (count, 2, true);
14542 dest = change_address (destmem, HImode, destptr);
14543 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14544 emit_label (label);
14545 LABEL_NUSES (label) = 1;
14546 }
14547 if (max_size > 1)
14548 {
14549 rtx label = ix86_expand_aligntest (count, 1, true);
14550 dest = change_address (destmem, QImode, destptr);
14551 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14552 emit_label (label);
14553 LABEL_NUSES (label) = 1;
14554 }
14555 }
14556
14557 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14558 DESIRED_ALIGNMENT. */
14559 static void
14560 expand_movmem_prologue (rtx destmem, rtx srcmem,
14561 rtx destptr, rtx srcptr, rtx count,
14562 int align, int desired_alignment)
14563 {
14564 if (align <= 1 && desired_alignment > 1)
14565 {
14566 rtx label = ix86_expand_aligntest (destptr, 1, false);
14567 srcmem = change_address (srcmem, QImode, srcptr);
14568 destmem = change_address (destmem, QImode, destptr);
14569 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14570 ix86_adjust_counter (count, 1);
14571 emit_label (label);
14572 LABEL_NUSES (label) = 1;
14573 }
14574 if (align <= 2 && desired_alignment > 2)
14575 {
14576 rtx label = ix86_expand_aligntest (destptr, 2, false);
14577 srcmem = change_address (srcmem, HImode, srcptr);
14578 destmem = change_address (destmem, HImode, destptr);
14579 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14580 ix86_adjust_counter (count, 2);
14581 emit_label (label);
14582 LABEL_NUSES (label) = 1;
14583 }
14584 if (align <= 4 && desired_alignment > 4)
14585 {
14586 rtx label = ix86_expand_aligntest (destptr, 4, false);
14587 srcmem = change_address (srcmem, SImode, srcptr);
14588 destmem = change_address (destmem, SImode, destptr);
14589 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14590 ix86_adjust_counter (count, 4);
14591 emit_label (label);
14592 LABEL_NUSES (label) = 1;
14593 }
14594 gcc_assert (desired_alignment <= 8);
14595 }
14596
14597 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14598 DESIRED_ALIGNMENT. */
14599 static void
14600 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14601 int align, int desired_alignment)
14602 {
14603 if (align <= 1 && desired_alignment > 1)
14604 {
14605 rtx label = ix86_expand_aligntest (destptr, 1, false);
14606 destmem = change_address (destmem, QImode, destptr);
14607 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14608 ix86_adjust_counter (count, 1);
14609 emit_label (label);
14610 LABEL_NUSES (label) = 1;
14611 }
14612 if (align <= 2 && desired_alignment > 2)
14613 {
14614 rtx label = ix86_expand_aligntest (destptr, 2, false);
14615 destmem = change_address (destmem, HImode, destptr);
14616 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14617 ix86_adjust_counter (count, 2);
14618 emit_label (label);
14619 LABEL_NUSES (label) = 1;
14620 }
14621 if (align <= 4 && desired_alignment > 4)
14622 {
14623 rtx label = ix86_expand_aligntest (destptr, 4, false);
14624 destmem = change_address (destmem, SImode, destptr);
14625 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14626 ix86_adjust_counter (count, 4);
14627 emit_label (label);
14628 LABEL_NUSES (label) = 1;
14629 }
14630 gcc_assert (desired_alignment <= 8);
14631 }
14632
14633 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14634 static enum stringop_alg
14635 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14636 int *dynamic_check)
14637 {
14638 const struct stringop_algs * algs;
14639
14640 *dynamic_check = -1;
14641 if (memset)
14642 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14643 else
14644 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14645 if (stringop_alg != no_stringop)
14646 return stringop_alg;
14647 /* rep; movq or rep; movl is the smallest variant. */
14648 else if (optimize_size)
14649 {
14650 if (!count || (count & 3))
14651 return rep_prefix_1_byte;
14652 else
14653 return rep_prefix_4_byte;
14654 }
14655 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14656 */
14657 else if (expected_size != -1 && expected_size < 4)
14658 return loop_1_byte;
14659 else if (expected_size != -1)
14660 {
14661 unsigned int i;
14662 enum stringop_alg alg = libcall;
14663 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14664 {
14665 gcc_assert (algs->size[i].max);
14666 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14667 {
14668 if (algs->size[i].alg != libcall)
14669 alg = algs->size[i].alg;
14670 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14671 last non-libcall inline algorithm. */
14672 if (TARGET_INLINE_ALL_STRINGOPS)
14673 {
14674 /* When the current size is best to be copied by a libcall,
14675 but we are still forced to inline, run the heuristic bellow
14676 that will pick code for medium sized blocks. */
14677 if (alg != libcall)
14678 return alg;
14679 break;
14680 }
14681 else
14682 return algs->size[i].alg;
14683 }
14684 }
14685 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14686 }
14687 /* When asked to inline the call anyway, try to pick meaningful choice.
14688 We look for maximal size of block that is faster to copy by hand and
14689 take blocks of at most of that size guessing that average size will
14690 be roughly half of the block.
14691
14692 If this turns out to be bad, we might simply specify the preferred
14693 choice in ix86_costs. */
14694 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14695 && algs->unknown_size == libcall)
14696 {
14697 int max = -1;
14698 enum stringop_alg alg;
14699 int i;
14700
14701 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14702 if (algs->size[i].alg != libcall && algs->size[i].alg)
14703 max = algs->size[i].max;
14704 if (max == -1)
14705 max = 4096;
14706 alg = decide_alg (count, max / 2, memset, dynamic_check);
14707 gcc_assert (*dynamic_check == -1);
14708 gcc_assert (alg != libcall);
14709 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14710 *dynamic_check = max;
14711 return alg;
14712 }
14713 return algs->unknown_size;
14714 }
14715
14716 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14717 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14718 static int
14719 decide_alignment (int align,
14720 enum stringop_alg alg,
14721 int expected_size)
14722 {
14723 int desired_align = 0;
14724 switch (alg)
14725 {
14726 case no_stringop:
14727 gcc_unreachable ();
14728 case loop:
14729 case unrolled_loop:
14730 desired_align = GET_MODE_SIZE (Pmode);
14731 break;
14732 case rep_prefix_8_byte:
14733 desired_align = 8;
14734 break;
14735 case rep_prefix_4_byte:
14736 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14737 copying whole cacheline at once. */
14738 if (TARGET_PENTIUMPRO)
14739 desired_align = 8;
14740 else
14741 desired_align = 4;
14742 break;
14743 case rep_prefix_1_byte:
14744 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14745 copying whole cacheline at once. */
14746 if (TARGET_PENTIUMPRO)
14747 desired_align = 8;
14748 else
14749 desired_align = 1;
14750 break;
14751 case loop_1_byte:
14752 desired_align = 1;
14753 break;
14754 case libcall:
14755 return 0;
14756 }
14757
14758 if (optimize_size)
14759 desired_align = 1;
14760 if (desired_align < align)
14761 desired_align = align;
14762 if (expected_size != -1 && expected_size < 4)
14763 desired_align = align;
14764 return desired_align;
14765 }
14766
14767 /* Return the smallest power of 2 greater than VAL. */
14768 static int
14769 smallest_pow2_greater_than (int val)
14770 {
14771 int ret = 1;
14772 while (ret <= val)
14773 ret <<= 1;
14774 return ret;
14775 }
14776
14777 /* Expand string move (memcpy) operation. Use i386 string operations when
14778 profitable. expand_clrmem contains similar code. The code depends upon
14779 architecture, block size and alignment, but always has the same
14780 overall structure:
14781
14782 1) Prologue guard: Conditional that jumps up to epilogues for small
14783 blocks that can be handled by epilogue alone. This is faster but
14784 also needed for correctness, since prologue assume the block is larger
14785 than the desired alignment.
14786
14787 Optional dynamic check for size and libcall for large
14788 blocks is emitted here too, with -minline-stringops-dynamically.
14789
14790 2) Prologue: copy first few bytes in order to get destination aligned
14791 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14792 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14793 We emit either a jump tree on power of two sized blocks, or a byte loop.
14794
14795 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14796 with specified algorithm.
14797
14798 4) Epilogue: code copying tail of the block that is too small to be
14799 handled by main body (or up to size guarded by prologue guard). */
14800
14801 int
14802 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14803 rtx expected_align_exp, rtx expected_size_exp)
14804 {
14805 rtx destreg;
14806 rtx srcreg;
14807 rtx label = NULL;
14808 rtx tmp;
14809 rtx jump_around_label = NULL;
14810 HOST_WIDE_INT align = 1;
14811 unsigned HOST_WIDE_INT count = 0;
14812 HOST_WIDE_INT expected_size = -1;
14813 int size_needed = 0, epilogue_size_needed;
14814 int desired_align = 0;
14815 enum stringop_alg alg;
14816 int dynamic_check;
14817
14818 if (CONST_INT_P (align_exp))
14819 align = INTVAL (align_exp);
14820 /* i386 can do misaligned access on reasonably increased cost. */
14821 if (CONST_INT_P (expected_align_exp)
14822 && INTVAL (expected_align_exp) > align)
14823 align = INTVAL (expected_align_exp);
14824 if (CONST_INT_P (count_exp))
14825 count = expected_size = INTVAL (count_exp);
14826 if (CONST_INT_P (expected_size_exp) && count == 0)
14827 expected_size = INTVAL (expected_size_exp);
14828
14829 /* Step 0: Decide on preferred algorithm, desired alignment and
14830 size of chunks to be copied by main loop. */
14831
14832 alg = decide_alg (count, expected_size, false, &dynamic_check);
14833 desired_align = decide_alignment (align, alg, expected_size);
14834
14835 if (!TARGET_ALIGN_STRINGOPS)
14836 align = desired_align;
14837
14838 if (alg == libcall)
14839 return 0;
14840 gcc_assert (alg != no_stringop);
14841 if (!count)
14842 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14843 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14844 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14845 switch (alg)
14846 {
14847 case libcall:
14848 case no_stringop:
14849 gcc_unreachable ();
14850 case loop:
14851 size_needed = GET_MODE_SIZE (Pmode);
14852 break;
14853 case unrolled_loop:
14854 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14855 break;
14856 case rep_prefix_8_byte:
14857 size_needed = 8;
14858 break;
14859 case rep_prefix_4_byte:
14860 size_needed = 4;
14861 break;
14862 case rep_prefix_1_byte:
14863 case loop_1_byte:
14864 size_needed = 1;
14865 break;
14866 }
14867
14868 epilogue_size_needed = size_needed;
14869
14870 /* Step 1: Prologue guard. */
14871
14872 /* Alignment code needs count to be in register. */
14873 if (CONST_INT_P (count_exp) && desired_align > align)
14874 {
14875 enum machine_mode mode = SImode;
14876 if (TARGET_64BIT && (count & ~0xffffffff))
14877 mode = DImode;
14878 count_exp = force_reg (mode, count_exp);
14879 }
14880 gcc_assert (desired_align >= 1 && align >= 1);
14881
14882 /* Ensure that alignment prologue won't copy past end of block. */
14883 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14884 {
14885 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14886 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14887 Make sure it is power of 2. */
14888 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14889
14890 label = gen_label_rtx ();
14891 emit_cmp_and_jump_insns (count_exp,
14892 GEN_INT (epilogue_size_needed),
14893 LTU, 0, counter_mode (count_exp), 1, label);
14894 if (GET_CODE (count_exp) == CONST_INT)
14895 ;
14896 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14897 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14898 else
14899 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14900 }
14901 /* Emit code to decide on runtime whether library call or inline should be
14902 used. */
14903 if (dynamic_check != -1)
14904 {
14905 rtx hot_label = gen_label_rtx ();
14906 jump_around_label = gen_label_rtx ();
14907 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14908 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14909 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14910 emit_block_move_via_libcall (dst, src, count_exp, false);
14911 emit_jump (jump_around_label);
14912 emit_label (hot_label);
14913 }
14914
14915 /* Step 2: Alignment prologue. */
14916
14917 if (desired_align > align)
14918 {
14919 /* Except for the first move in epilogue, we no longer know
14920 constant offset in aliasing info. It don't seems to worth
14921 the pain to maintain it for the first move, so throw away
14922 the info early. */
14923 src = change_address (src, BLKmode, srcreg);
14924 dst = change_address (dst, BLKmode, destreg);
14925 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14926 desired_align);
14927 }
14928 if (label && size_needed == 1)
14929 {
14930 emit_label (label);
14931 LABEL_NUSES (label) = 1;
14932 label = NULL;
14933 }
14934
14935 /* Step 3: Main loop. */
14936
14937 switch (alg)
14938 {
14939 case libcall:
14940 case no_stringop:
14941 gcc_unreachable ();
14942 case loop_1_byte:
14943 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14944 count_exp, QImode, 1, expected_size);
14945 break;
14946 case loop:
14947 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14948 count_exp, Pmode, 1, expected_size);
14949 break;
14950 case unrolled_loop:
14951 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14952 registers for 4 temporaries anyway. */
14953 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14954 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14955 expected_size);
14956 break;
14957 case rep_prefix_8_byte:
14958 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14959 DImode);
14960 break;
14961 case rep_prefix_4_byte:
14962 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14963 SImode);
14964 break;
14965 case rep_prefix_1_byte:
14966 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14967 QImode);
14968 break;
14969 }
14970 /* Adjust properly the offset of src and dest memory for aliasing. */
14971 if (CONST_INT_P (count_exp))
14972 {
14973 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14974 (count / size_needed) * size_needed);
14975 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14976 (count / size_needed) * size_needed);
14977 }
14978 else
14979 {
14980 src = change_address (src, BLKmode, srcreg);
14981 dst = change_address (dst, BLKmode, destreg);
14982 }
14983
14984 /* Step 4: Epilogue to copy the remaining bytes. */
14985
14986 if (label)
14987 {
14988 /* When the main loop is done, COUNT_EXP might hold original count,
14989 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14990 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14991 bytes. Compensate if needed. */
14992
14993 if (size_needed < epilogue_size_needed)
14994 {
14995 tmp =
14996 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14997 GEN_INT (size_needed - 1), count_exp, 1,
14998 OPTAB_DIRECT);
14999 if (tmp != count_exp)
15000 emit_move_insn (count_exp, tmp);
15001 }
15002 emit_label (label);
15003 LABEL_NUSES (label) = 1;
15004 }
15005
15006 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15007 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15008 epilogue_size_needed);
15009 if (jump_around_label)
15010 emit_label (jump_around_label);
15011 return 1;
15012 }
15013
15014 /* Helper function for memcpy. For QImode value 0xXY produce
15015 0xXYXYXYXY of wide specified by MODE. This is essentially
15016 a * 0x10101010, but we can do slightly better than
15017 synth_mult by unwinding the sequence by hand on CPUs with
15018 slow multiply. */
15019 static rtx
15020 promote_duplicated_reg (enum machine_mode mode, rtx val)
15021 {
15022 enum machine_mode valmode = GET_MODE (val);
15023 rtx tmp;
15024 int nops = mode == DImode ? 3 : 2;
15025
15026 gcc_assert (mode == SImode || mode == DImode);
15027 if (val == const0_rtx)
15028 return copy_to_mode_reg (mode, const0_rtx);
15029 if (CONST_INT_P (val))
15030 {
15031 HOST_WIDE_INT v = INTVAL (val) & 255;
15032
15033 v |= v << 8;
15034 v |= v << 16;
15035 if (mode == DImode)
15036 v |= (v << 16) << 16;
15037 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15038 }
15039
15040 if (valmode == VOIDmode)
15041 valmode = QImode;
15042 if (valmode != QImode)
15043 val = gen_lowpart (QImode, val);
15044 if (mode == QImode)
15045 return val;
15046 if (!TARGET_PARTIAL_REG_STALL)
15047 nops--;
15048 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15049 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15050 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15051 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15052 {
15053 rtx reg = convert_modes (mode, QImode, val, true);
15054 tmp = promote_duplicated_reg (mode, const1_rtx);
15055 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15056 OPTAB_DIRECT);
15057 }
15058 else
15059 {
15060 rtx reg = convert_modes (mode, QImode, val, true);
15061
15062 if (!TARGET_PARTIAL_REG_STALL)
15063 if (mode == SImode)
15064 emit_insn (gen_movsi_insv_1 (reg, reg));
15065 else
15066 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15067 else
15068 {
15069 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15070 NULL, 1, OPTAB_DIRECT);
15071 reg =
15072 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15073 }
15074 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15075 NULL, 1, OPTAB_DIRECT);
15076 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15077 if (mode == SImode)
15078 return reg;
15079 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15080 NULL, 1, OPTAB_DIRECT);
15081 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15082 return reg;
15083 }
15084 }
15085
15086 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15087 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15088 alignment from ALIGN to DESIRED_ALIGN. */
15089 static rtx
15090 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15091 {
15092 rtx promoted_val;
15093
15094 if (TARGET_64BIT
15095 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15096 promoted_val = promote_duplicated_reg (DImode, val);
15097 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15098 promoted_val = promote_duplicated_reg (SImode, val);
15099 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15100 promoted_val = promote_duplicated_reg (HImode, val);
15101 else
15102 promoted_val = val;
15103
15104 return promoted_val;
15105 }
15106
15107 /* Expand string clear operation (bzero). Use i386 string operations when
15108 profitable. See expand_movmem comment for explanation of individual
15109 steps performed. */
15110 int
15111 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15112 rtx expected_align_exp, rtx expected_size_exp)
15113 {
15114 rtx destreg;
15115 rtx label = NULL;
15116 rtx tmp;
15117 rtx jump_around_label = NULL;
15118 HOST_WIDE_INT align = 1;
15119 unsigned HOST_WIDE_INT count = 0;
15120 HOST_WIDE_INT expected_size = -1;
15121 int size_needed = 0, epilogue_size_needed;
15122 int desired_align = 0;
15123 enum stringop_alg alg;
15124 rtx promoted_val = NULL;
15125 bool force_loopy_epilogue = false;
15126 int dynamic_check;
15127
15128 if (CONST_INT_P (align_exp))
15129 align = INTVAL (align_exp);
15130 /* i386 can do misaligned access on reasonably increased cost. */
15131 if (CONST_INT_P (expected_align_exp)
15132 && INTVAL (expected_align_exp) > align)
15133 align = INTVAL (expected_align_exp);
15134 if (CONST_INT_P (count_exp))
15135 count = expected_size = INTVAL (count_exp);
15136 if (CONST_INT_P (expected_size_exp) && count == 0)
15137 expected_size = INTVAL (expected_size_exp);
15138
15139 /* Step 0: Decide on preferred algorithm, desired alignment and
15140 size of chunks to be copied by main loop. */
15141
15142 alg = decide_alg (count, expected_size, true, &dynamic_check);
15143 desired_align = decide_alignment (align, alg, expected_size);
15144
15145 if (!TARGET_ALIGN_STRINGOPS)
15146 align = desired_align;
15147
15148 if (alg == libcall)
15149 return 0;
15150 gcc_assert (alg != no_stringop);
15151 if (!count)
15152 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15153 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15154 switch (alg)
15155 {
15156 case libcall:
15157 case no_stringop:
15158 gcc_unreachable ();
15159 case loop:
15160 size_needed = GET_MODE_SIZE (Pmode);
15161 break;
15162 case unrolled_loop:
15163 size_needed = GET_MODE_SIZE (Pmode) * 4;
15164 break;
15165 case rep_prefix_8_byte:
15166 size_needed = 8;
15167 break;
15168 case rep_prefix_4_byte:
15169 size_needed = 4;
15170 break;
15171 case rep_prefix_1_byte:
15172 case loop_1_byte:
15173 size_needed = 1;
15174 break;
15175 }
15176 epilogue_size_needed = size_needed;
15177
15178 /* Step 1: Prologue guard. */
15179
15180 /* Alignment code needs count to be in register. */
15181 if (CONST_INT_P (count_exp) && desired_align > align)
15182 {
15183 enum machine_mode mode = SImode;
15184 if (TARGET_64BIT && (count & ~0xffffffff))
15185 mode = DImode;
15186 count_exp = force_reg (mode, count_exp);
15187 }
15188 /* Do the cheap promotion to allow better CSE across the
15189 main loop and epilogue (ie one load of the big constant in the
15190 front of all code. */
15191 if (CONST_INT_P (val_exp))
15192 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15193 desired_align, align);
15194 /* Ensure that alignment prologue won't copy past end of block. */
15195 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15196 {
15197 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15198 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15199 Make sure it is power of 2. */
15200 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15201
15202 /* To improve performance of small blocks, we jump around the VAL
15203 promoting mode. This mean that if the promoted VAL is not constant,
15204 we might not use it in the epilogue and have to use byte
15205 loop variant. */
15206 if (epilogue_size_needed > 2 && !promoted_val)
15207 force_loopy_epilogue = true;
15208 label = gen_label_rtx ();
15209 emit_cmp_and_jump_insns (count_exp,
15210 GEN_INT (epilogue_size_needed),
15211 LTU, 0, counter_mode (count_exp), 1, label);
15212 if (GET_CODE (count_exp) == CONST_INT)
15213 ;
15214 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15215 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15216 else
15217 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15218 }
15219 if (dynamic_check != -1)
15220 {
15221 rtx hot_label = gen_label_rtx ();
15222 jump_around_label = gen_label_rtx ();
15223 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15224 LEU, 0, counter_mode (count_exp), 1, hot_label);
15225 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15226 set_storage_via_libcall (dst, count_exp, val_exp, false);
15227 emit_jump (jump_around_label);
15228 emit_label (hot_label);
15229 }
15230
15231 /* Step 2: Alignment prologue. */
15232
15233 /* Do the expensive promotion once we branched off the small blocks. */
15234 if (!promoted_val)
15235 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15236 desired_align, align);
15237 gcc_assert (desired_align >= 1 && align >= 1);
15238
15239 if (desired_align > align)
15240 {
15241 /* Except for the first move in epilogue, we no longer know
15242 constant offset in aliasing info. It don't seems to worth
15243 the pain to maintain it for the first move, so throw away
15244 the info early. */
15245 dst = change_address (dst, BLKmode, destreg);
15246 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15247 desired_align);
15248 }
15249 if (label && size_needed == 1)
15250 {
15251 emit_label (label);
15252 LABEL_NUSES (label) = 1;
15253 label = NULL;
15254 }
15255
15256 /* Step 3: Main loop. */
15257
15258 switch (alg)
15259 {
15260 case libcall:
15261 case no_stringop:
15262 gcc_unreachable ();
15263 case loop_1_byte:
15264 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15265 count_exp, QImode, 1, expected_size);
15266 break;
15267 case loop:
15268 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15269 count_exp, Pmode, 1, expected_size);
15270 break;
15271 case unrolled_loop:
15272 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15273 count_exp, Pmode, 4, expected_size);
15274 break;
15275 case rep_prefix_8_byte:
15276 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15277 DImode);
15278 break;
15279 case rep_prefix_4_byte:
15280 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15281 SImode);
15282 break;
15283 case rep_prefix_1_byte:
15284 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15285 QImode);
15286 break;
15287 }
15288 /* Adjust properly the offset of src and dest memory for aliasing. */
15289 if (CONST_INT_P (count_exp))
15290 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15291 (count / size_needed) * size_needed);
15292 else
15293 dst = change_address (dst, BLKmode, destreg);
15294
15295 /* Step 4: Epilogue to copy the remaining bytes. */
15296
15297 if (label)
15298 {
15299 /* When the main loop is done, COUNT_EXP might hold original count,
15300 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15301 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15302 bytes. Compensate if needed. */
15303
15304 if (size_needed < desired_align - align)
15305 {
15306 tmp =
15307 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15308 GEN_INT (size_needed - 1), count_exp, 1,
15309 OPTAB_DIRECT);
15310 size_needed = desired_align - align + 1;
15311 if (tmp != count_exp)
15312 emit_move_insn (count_exp, tmp);
15313 }
15314 emit_label (label);
15315 LABEL_NUSES (label) = 1;
15316 }
15317 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15318 {
15319 if (force_loopy_epilogue)
15320 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15321 size_needed);
15322 else
15323 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15324 size_needed);
15325 }
15326 if (jump_around_label)
15327 emit_label (jump_around_label);
15328 return 1;
15329 }
15330
15331 /* Expand the appropriate insns for doing strlen if not just doing
15332 repnz; scasb
15333
15334 out = result, initialized with the start address
15335 align_rtx = alignment of the address.
15336 scratch = scratch register, initialized with the startaddress when
15337 not aligned, otherwise undefined
15338
15339 This is just the body. It needs the initializations mentioned above and
15340 some address computing at the end. These things are done in i386.md. */
15341
15342 static void
15343 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15344 {
15345 int align;
15346 rtx tmp;
15347 rtx align_2_label = NULL_RTX;
15348 rtx align_3_label = NULL_RTX;
15349 rtx align_4_label = gen_label_rtx ();
15350 rtx end_0_label = gen_label_rtx ();
15351 rtx mem;
15352 rtx tmpreg = gen_reg_rtx (SImode);
15353 rtx scratch = gen_reg_rtx (SImode);
15354 rtx cmp;
15355
15356 align = 0;
15357 if (CONST_INT_P (align_rtx))
15358 align = INTVAL (align_rtx);
15359
15360 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15361
15362 /* Is there a known alignment and is it less than 4? */
15363 if (align < 4)
15364 {
15365 rtx scratch1 = gen_reg_rtx (Pmode);
15366 emit_move_insn (scratch1, out);
15367 /* Is there a known alignment and is it not 2? */
15368 if (align != 2)
15369 {
15370 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15371 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15372
15373 /* Leave just the 3 lower bits. */
15374 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15375 NULL_RTX, 0, OPTAB_WIDEN);
15376
15377 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15378 Pmode, 1, align_4_label);
15379 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15380 Pmode, 1, align_2_label);
15381 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15382 Pmode, 1, align_3_label);
15383 }
15384 else
15385 {
15386 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15387 check if is aligned to 4 - byte. */
15388
15389 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15390 NULL_RTX, 0, OPTAB_WIDEN);
15391
15392 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15393 Pmode, 1, align_4_label);
15394 }
15395
15396 mem = change_address (src, QImode, out);
15397
15398 /* Now compare the bytes. */
15399
15400 /* Compare the first n unaligned byte on a byte per byte basis. */
15401 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15402 QImode, 1, end_0_label);
15403
15404 /* Increment the address. */
15405 if (TARGET_64BIT)
15406 emit_insn (gen_adddi3 (out, out, const1_rtx));
15407 else
15408 emit_insn (gen_addsi3 (out, out, const1_rtx));
15409
15410 /* Not needed with an alignment of 2 */
15411 if (align != 2)
15412 {
15413 emit_label (align_2_label);
15414
15415 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15416 end_0_label);
15417
15418 if (TARGET_64BIT)
15419 emit_insn (gen_adddi3 (out, out, const1_rtx));
15420 else
15421 emit_insn (gen_addsi3 (out, out, const1_rtx));
15422
15423 emit_label (align_3_label);
15424 }
15425
15426 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15427 end_0_label);
15428
15429 if (TARGET_64BIT)
15430 emit_insn (gen_adddi3 (out, out, const1_rtx));
15431 else
15432 emit_insn (gen_addsi3 (out, out, const1_rtx));
15433 }
15434
15435 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15436 align this loop. It gives only huge programs, but does not help to
15437 speed up. */
15438 emit_label (align_4_label);
15439
15440 mem = change_address (src, SImode, out);
15441 emit_move_insn (scratch, mem);
15442 if (TARGET_64BIT)
15443 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15444 else
15445 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15446
15447 /* This formula yields a nonzero result iff one of the bytes is zero.
15448 This saves three branches inside loop and many cycles. */
15449
15450 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15451 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15452 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15453 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15454 gen_int_mode (0x80808080, SImode)));
15455 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15456 align_4_label);
15457
15458 if (TARGET_CMOVE)
15459 {
15460 rtx reg = gen_reg_rtx (SImode);
15461 rtx reg2 = gen_reg_rtx (Pmode);
15462 emit_move_insn (reg, tmpreg);
15463 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15464
15465 /* If zero is not in the first two bytes, move two bytes forward. */
15466 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15467 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15468 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15469 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15470 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15471 reg,
15472 tmpreg)));
15473 /* Emit lea manually to avoid clobbering of flags. */
15474 emit_insn (gen_rtx_SET (SImode, reg2,
15475 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15476
15477 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15478 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15479 emit_insn (gen_rtx_SET (VOIDmode, out,
15480 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15481 reg2,
15482 out)));
15483
15484 }
15485 else
15486 {
15487 rtx end_2_label = gen_label_rtx ();
15488 /* Is zero in the first two bytes? */
15489
15490 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15491 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15492 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15493 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15494 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15495 pc_rtx);
15496 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15497 JUMP_LABEL (tmp) = end_2_label;
15498
15499 /* Not in the first two. Move two bytes forward. */
15500 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15501 if (TARGET_64BIT)
15502 emit_insn (gen_adddi3 (out, out, const2_rtx));
15503 else
15504 emit_insn (gen_addsi3 (out, out, const2_rtx));
15505
15506 emit_label (end_2_label);
15507
15508 }
15509
15510 /* Avoid branch in fixing the byte. */
15511 tmpreg = gen_lowpart (QImode, tmpreg);
15512 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15513 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
15514 if (TARGET_64BIT)
15515 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15516 else
15517 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15518
15519 emit_label (end_0_label);
15520 }
15521
15522 /* Expand strlen. */
15523
15524 int
15525 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15526 {
15527 rtx addr, scratch1, scratch2, scratch3, scratch4;
15528
15529 /* The generic case of strlen expander is long. Avoid it's
15530 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15531
15532 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15533 && !TARGET_INLINE_ALL_STRINGOPS
15534 && !optimize_size
15535 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15536 return 0;
15537
15538 addr = force_reg (Pmode, XEXP (src, 0));
15539 scratch1 = gen_reg_rtx (Pmode);
15540
15541 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15542 && !optimize_size)
15543 {
15544 /* Well it seems that some optimizer does not combine a call like
15545 foo(strlen(bar), strlen(bar));
15546 when the move and the subtraction is done here. It does calculate
15547 the length just once when these instructions are done inside of
15548 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15549 often used and I use one fewer register for the lifetime of
15550 output_strlen_unroll() this is better. */
15551
15552 emit_move_insn (out, addr);
15553
15554 ix86_expand_strlensi_unroll_1 (out, src, align);
15555
15556 /* strlensi_unroll_1 returns the address of the zero at the end of
15557 the string, like memchr(), so compute the length by subtracting
15558 the start address. */
15559 if (TARGET_64BIT)
15560 emit_insn (gen_subdi3 (out, out, addr));
15561 else
15562 emit_insn (gen_subsi3 (out, out, addr));
15563 }
15564 else
15565 {
15566 rtx unspec;
15567 scratch2 = gen_reg_rtx (Pmode);
15568 scratch3 = gen_reg_rtx (Pmode);
15569 scratch4 = force_reg (Pmode, constm1_rtx);
15570
15571 emit_move_insn (scratch3, addr);
15572 eoschar = force_reg (QImode, eoschar);
15573
15574 src = replace_equiv_address_nv (src, scratch3);
15575
15576 /* If .md starts supporting :P, this can be done in .md. */
15577 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15578 scratch4), UNSPEC_SCAS);
15579 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15580 if (TARGET_64BIT)
15581 {
15582 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15583 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15584 }
15585 else
15586 {
15587 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15588 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15589 }
15590 }
15591 return 1;
15592 }
15593
15594 /* For given symbol (function) construct code to compute address of it's PLT
15595 entry in large x86-64 PIC model. */
15596 rtx
15597 construct_plt_address (rtx symbol)
15598 {
15599 rtx tmp = gen_reg_rtx (Pmode);
15600 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15601
15602 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15603 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15604
15605 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15606 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15607 return tmp;
15608 }
15609
15610 void
15611 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15612 rtx callarg2 ATTRIBUTE_UNUSED,
15613 rtx pop, int sibcall)
15614 {
15615 rtx use = NULL, call;
15616
15617 if (pop == const0_rtx)
15618 pop = NULL;
15619 gcc_assert (!TARGET_64BIT || !pop);
15620
15621 if (TARGET_MACHO && !TARGET_64BIT)
15622 {
15623 #if TARGET_MACHO
15624 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15625 fnaddr = machopic_indirect_call_target (fnaddr);
15626 #endif
15627 }
15628 else
15629 {
15630 /* Static functions and indirect calls don't need the pic register. */
15631 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15632 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15633 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15634 use_reg (&use, pic_offset_table_rtx);
15635 }
15636
15637 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15638 {
15639 rtx al = gen_rtx_REG (QImode, 0);
15640 emit_move_insn (al, callarg2);
15641 use_reg (&use, al);
15642 }
15643
15644 if (ix86_cmodel == CM_LARGE_PIC
15645 && GET_CODE (fnaddr) == MEM
15646 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15647 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15648 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15649 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15650 {
15651 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15652 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15653 }
15654 if (sibcall && TARGET_64BIT
15655 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15656 {
15657 rtx addr;
15658 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15659 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15660 emit_move_insn (fnaddr, addr);
15661 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15662 }
15663
15664 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15665 if (retval)
15666 call = gen_rtx_SET (VOIDmode, retval, call);
15667 if (pop)
15668 {
15669 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15670 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15671 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15672 }
15673
15674 call = emit_call_insn (call);
15675 if (use)
15676 CALL_INSN_FUNCTION_USAGE (call) = use;
15677 }
15678
15679 \f
15680 /* Clear stack slot assignments remembered from previous functions.
15681 This is called from INIT_EXPANDERS once before RTL is emitted for each
15682 function. */
15683
15684 static struct machine_function *
15685 ix86_init_machine_status (void)
15686 {
15687 struct machine_function *f;
15688
15689 f = GGC_CNEW (struct machine_function);
15690 f->use_fast_prologue_epilogue_nregs = -1;
15691 f->tls_descriptor_call_expanded_p = 0;
15692
15693 return f;
15694 }
15695
15696 /* Return a MEM corresponding to a stack slot with mode MODE.
15697 Allocate a new slot if necessary.
15698
15699 The RTL for a function can have several slots available: N is
15700 which slot to use. */
15701
15702 rtx
15703 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15704 {
15705 struct stack_local_entry *s;
15706
15707 gcc_assert (n < MAX_386_STACK_LOCALS);
15708
15709 /* Virtual slot is valid only before vregs are instantiated. */
15710 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
15711
15712 for (s = ix86_stack_locals; s; s = s->next)
15713 if (s->mode == mode && s->n == n)
15714 return copy_rtx (s->rtl);
15715
15716 s = (struct stack_local_entry *)
15717 ggc_alloc (sizeof (struct stack_local_entry));
15718 s->n = n;
15719 s->mode = mode;
15720 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15721
15722 s->next = ix86_stack_locals;
15723 ix86_stack_locals = s;
15724 return s->rtl;
15725 }
15726
15727 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15728
15729 static GTY(()) rtx ix86_tls_symbol;
15730 rtx
15731 ix86_tls_get_addr (void)
15732 {
15733
15734 if (!ix86_tls_symbol)
15735 {
15736 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15737 (TARGET_ANY_GNU_TLS
15738 && !TARGET_64BIT)
15739 ? "___tls_get_addr"
15740 : "__tls_get_addr");
15741 }
15742
15743 return ix86_tls_symbol;
15744 }
15745
15746 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15747
15748 static GTY(()) rtx ix86_tls_module_base_symbol;
15749 rtx
15750 ix86_tls_module_base (void)
15751 {
15752
15753 if (!ix86_tls_module_base_symbol)
15754 {
15755 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15756 "_TLS_MODULE_BASE_");
15757 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15758 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15759 }
15760
15761 return ix86_tls_module_base_symbol;
15762 }
15763 \f
15764 /* Calculate the length of the memory address in the instruction
15765 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15766
15767 int
15768 memory_address_length (rtx addr)
15769 {
15770 struct ix86_address parts;
15771 rtx base, index, disp;
15772 int len;
15773 int ok;
15774
15775 if (GET_CODE (addr) == PRE_DEC
15776 || GET_CODE (addr) == POST_INC
15777 || GET_CODE (addr) == PRE_MODIFY
15778 || GET_CODE (addr) == POST_MODIFY)
15779 return 0;
15780
15781 ok = ix86_decompose_address (addr, &parts);
15782 gcc_assert (ok);
15783
15784 if (parts.base && GET_CODE (parts.base) == SUBREG)
15785 parts.base = SUBREG_REG (parts.base);
15786 if (parts.index && GET_CODE (parts.index) == SUBREG)
15787 parts.index = SUBREG_REG (parts.index);
15788
15789 base = parts.base;
15790 index = parts.index;
15791 disp = parts.disp;
15792 len = 0;
15793
15794 /* Rule of thumb:
15795 - esp as the base always wants an index,
15796 - ebp as the base always wants a displacement. */
15797
15798 /* Register Indirect. */
15799 if (base && !index && !disp)
15800 {
15801 /* esp (for its index) and ebp (for its displacement) need
15802 the two-byte modrm form. */
15803 if (addr == stack_pointer_rtx
15804 || addr == arg_pointer_rtx
15805 || addr == frame_pointer_rtx
15806 || addr == hard_frame_pointer_rtx)
15807 len = 1;
15808 }
15809
15810 /* Direct Addressing. */
15811 else if (disp && !base && !index)
15812 len = 4;
15813
15814 else
15815 {
15816 /* Find the length of the displacement constant. */
15817 if (disp)
15818 {
15819 if (base && satisfies_constraint_K (disp))
15820 len = 1;
15821 else
15822 len = 4;
15823 }
15824 /* ebp always wants a displacement. */
15825 else if (base == hard_frame_pointer_rtx)
15826 len = 1;
15827
15828 /* An index requires the two-byte modrm form.... */
15829 if (index
15830 /* ...like esp, which always wants an index. */
15831 || base == stack_pointer_rtx
15832 || base == arg_pointer_rtx
15833 || base == frame_pointer_rtx)
15834 len += 1;
15835 }
15836
15837 return len;
15838 }
15839
15840 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15841 is set, expect that insn have 8bit immediate alternative. */
15842 int
15843 ix86_attr_length_immediate_default (rtx insn, int shortform)
15844 {
15845 int len = 0;
15846 int i;
15847 extract_insn_cached (insn);
15848 for (i = recog_data.n_operands - 1; i >= 0; --i)
15849 if (CONSTANT_P (recog_data.operand[i]))
15850 {
15851 gcc_assert (!len);
15852 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15853 len = 1;
15854 else
15855 {
15856 switch (get_attr_mode (insn))
15857 {
15858 case MODE_QI:
15859 len+=1;
15860 break;
15861 case MODE_HI:
15862 len+=2;
15863 break;
15864 case MODE_SI:
15865 len+=4;
15866 break;
15867 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15868 case MODE_DI:
15869 len+=4;
15870 break;
15871 default:
15872 fatal_insn ("unknown insn mode", insn);
15873 }
15874 }
15875 }
15876 return len;
15877 }
15878 /* Compute default value for "length_address" attribute. */
15879 int
15880 ix86_attr_length_address_default (rtx insn)
15881 {
15882 int i;
15883
15884 if (get_attr_type (insn) == TYPE_LEA)
15885 {
15886 rtx set = PATTERN (insn);
15887
15888 if (GET_CODE (set) == PARALLEL)
15889 set = XVECEXP (set, 0, 0);
15890
15891 gcc_assert (GET_CODE (set) == SET);
15892
15893 return memory_address_length (SET_SRC (set));
15894 }
15895
15896 extract_insn_cached (insn);
15897 for (i = recog_data.n_operands - 1; i >= 0; --i)
15898 if (MEM_P (recog_data.operand[i]))
15899 {
15900 return memory_address_length (XEXP (recog_data.operand[i], 0));
15901 break;
15902 }
15903 return 0;
15904 }
15905 \f
15906 /* Return the maximum number of instructions a cpu can issue. */
15907
15908 static int
15909 ix86_issue_rate (void)
15910 {
15911 switch (ix86_tune)
15912 {
15913 case PROCESSOR_PENTIUM:
15914 case PROCESSOR_K6:
15915 return 2;
15916
15917 case PROCESSOR_PENTIUMPRO:
15918 case PROCESSOR_PENTIUM4:
15919 case PROCESSOR_ATHLON:
15920 case PROCESSOR_K8:
15921 case PROCESSOR_AMDFAM10:
15922 case PROCESSOR_NOCONA:
15923 case PROCESSOR_GENERIC32:
15924 case PROCESSOR_GENERIC64:
15925 return 3;
15926
15927 case PROCESSOR_CORE2:
15928 return 4;
15929
15930 default:
15931 return 1;
15932 }
15933 }
15934
15935 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15936 by DEP_INSN and nothing set by DEP_INSN. */
15937
15938 static int
15939 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15940 {
15941 rtx set, set2;
15942
15943 /* Simplify the test for uninteresting insns. */
15944 if (insn_type != TYPE_SETCC
15945 && insn_type != TYPE_ICMOV
15946 && insn_type != TYPE_FCMOV
15947 && insn_type != TYPE_IBR)
15948 return 0;
15949
15950 if ((set = single_set (dep_insn)) != 0)
15951 {
15952 set = SET_DEST (set);
15953 set2 = NULL_RTX;
15954 }
15955 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15956 && XVECLEN (PATTERN (dep_insn), 0) == 2
15957 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15958 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15959 {
15960 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15961 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15962 }
15963 else
15964 return 0;
15965
15966 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15967 return 0;
15968
15969 /* This test is true if the dependent insn reads the flags but
15970 not any other potentially set register. */
15971 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15972 return 0;
15973
15974 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15975 return 0;
15976
15977 return 1;
15978 }
15979
15980 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15981 address with operands set by DEP_INSN. */
15982
15983 static int
15984 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15985 {
15986 rtx addr;
15987
15988 if (insn_type == TYPE_LEA
15989 && TARGET_PENTIUM)
15990 {
15991 addr = PATTERN (insn);
15992
15993 if (GET_CODE (addr) == PARALLEL)
15994 addr = XVECEXP (addr, 0, 0);
15995
15996 gcc_assert (GET_CODE (addr) == SET);
15997
15998 addr = SET_SRC (addr);
15999 }
16000 else
16001 {
16002 int i;
16003 extract_insn_cached (insn);
16004 for (i = recog_data.n_operands - 1; i >= 0; --i)
16005 if (MEM_P (recog_data.operand[i]))
16006 {
16007 addr = XEXP (recog_data.operand[i], 0);
16008 goto found;
16009 }
16010 return 0;
16011 found:;
16012 }
16013
16014 return modified_in_p (addr, dep_insn);
16015 }
16016
16017 static int
16018 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16019 {
16020 enum attr_type insn_type, dep_insn_type;
16021 enum attr_memory memory;
16022 rtx set, set2;
16023 int dep_insn_code_number;
16024
16025 /* Anti and output dependencies have zero cost on all CPUs. */
16026 if (REG_NOTE_KIND (link) != 0)
16027 return 0;
16028
16029 dep_insn_code_number = recog_memoized (dep_insn);
16030
16031 /* If we can't recognize the insns, we can't really do anything. */
16032 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16033 return cost;
16034
16035 insn_type = get_attr_type (insn);
16036 dep_insn_type = get_attr_type (dep_insn);
16037
16038 switch (ix86_tune)
16039 {
16040 case PROCESSOR_PENTIUM:
16041 /* Address Generation Interlock adds a cycle of latency. */
16042 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16043 cost += 1;
16044
16045 /* ??? Compares pair with jump/setcc. */
16046 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16047 cost = 0;
16048
16049 /* Floating point stores require value to be ready one cycle earlier. */
16050 if (insn_type == TYPE_FMOV
16051 && get_attr_memory (insn) == MEMORY_STORE
16052 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16053 cost += 1;
16054 break;
16055
16056 case PROCESSOR_PENTIUMPRO:
16057 memory = get_attr_memory (insn);
16058
16059 /* INT->FP conversion is expensive. */
16060 if (get_attr_fp_int_src (dep_insn))
16061 cost += 5;
16062
16063 /* There is one cycle extra latency between an FP op and a store. */
16064 if (insn_type == TYPE_FMOV
16065 && (set = single_set (dep_insn)) != NULL_RTX
16066 && (set2 = single_set (insn)) != NULL_RTX
16067 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16068 && MEM_P (SET_DEST (set2)))
16069 cost += 1;
16070
16071 /* Show ability of reorder buffer to hide latency of load by executing
16072 in parallel with previous instruction in case
16073 previous instruction is not needed to compute the address. */
16074 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16075 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16076 {
16077 /* Claim moves to take one cycle, as core can issue one load
16078 at time and the next load can start cycle later. */
16079 if (dep_insn_type == TYPE_IMOV
16080 || dep_insn_type == TYPE_FMOV)
16081 cost = 1;
16082 else if (cost > 1)
16083 cost--;
16084 }
16085 break;
16086
16087 case PROCESSOR_K6:
16088 memory = get_attr_memory (insn);
16089
16090 /* The esp dependency is resolved before the instruction is really
16091 finished. */
16092 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16093 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16094 return 1;
16095
16096 /* INT->FP conversion is expensive. */
16097 if (get_attr_fp_int_src (dep_insn))
16098 cost += 5;
16099
16100 /* Show ability of reorder buffer to hide latency of load by executing
16101 in parallel with previous instruction in case
16102 previous instruction is not needed to compute the address. */
16103 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16104 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16105 {
16106 /* Claim moves to take one cycle, as core can issue one load
16107 at time and the next load can start cycle later. */
16108 if (dep_insn_type == TYPE_IMOV
16109 || dep_insn_type == TYPE_FMOV)
16110 cost = 1;
16111 else if (cost > 2)
16112 cost -= 2;
16113 else
16114 cost = 1;
16115 }
16116 break;
16117
16118 case PROCESSOR_ATHLON:
16119 case PROCESSOR_K8:
16120 case PROCESSOR_AMDFAM10:
16121 case PROCESSOR_GENERIC32:
16122 case PROCESSOR_GENERIC64:
16123 memory = get_attr_memory (insn);
16124
16125 /* Show ability of reorder buffer to hide latency of load by executing
16126 in parallel with previous instruction in case
16127 previous instruction is not needed to compute the address. */
16128 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16129 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16130 {
16131 enum attr_unit unit = get_attr_unit (insn);
16132 int loadcost = 3;
16133
16134 /* Because of the difference between the length of integer and
16135 floating unit pipeline preparation stages, the memory operands
16136 for floating point are cheaper.
16137
16138 ??? For Athlon it the difference is most probably 2. */
16139 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16140 loadcost = 3;
16141 else
16142 loadcost = TARGET_ATHLON ? 2 : 0;
16143
16144 if (cost >= loadcost)
16145 cost -= loadcost;
16146 else
16147 cost = 0;
16148 }
16149
16150 default:
16151 break;
16152 }
16153
16154 return cost;
16155 }
16156
16157 /* How many alternative schedules to try. This should be as wide as the
16158 scheduling freedom in the DFA, but no wider. Making this value too
16159 large results extra work for the scheduler. */
16160
16161 static int
16162 ia32_multipass_dfa_lookahead (void)
16163 {
16164 if (ix86_tune == PROCESSOR_PENTIUM)
16165 return 2;
16166
16167 if (ix86_tune == PROCESSOR_PENTIUMPRO
16168 || ix86_tune == PROCESSOR_K6)
16169 return 1;
16170
16171 else
16172 return 0;
16173 }
16174
16175 \f
16176 /* Compute the alignment given to a constant that is being placed in memory.
16177 EXP is the constant and ALIGN is the alignment that the object would
16178 ordinarily have.
16179 The value of this function is used instead of that alignment to align
16180 the object. */
16181
16182 int
16183 ix86_constant_alignment (tree exp, int align)
16184 {
16185 if (TREE_CODE (exp) == REAL_CST)
16186 {
16187 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16188 return 64;
16189 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16190 return 128;
16191 }
16192 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16193 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16194 return BITS_PER_WORD;
16195
16196 return align;
16197 }
16198
16199 /* Compute the alignment for a static variable.
16200 TYPE is the data type, and ALIGN is the alignment that
16201 the object would ordinarily have. The value of this function is used
16202 instead of that alignment to align the object. */
16203
16204 int
16205 ix86_data_alignment (tree type, int align)
16206 {
16207 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16208
16209 if (AGGREGATE_TYPE_P (type)
16210 && TYPE_SIZE (type)
16211 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16212 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16213 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16214 && align < max_align)
16215 align = max_align;
16216
16217 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16218 to 16byte boundary. */
16219 if (TARGET_64BIT)
16220 {
16221 if (AGGREGATE_TYPE_P (type)
16222 && TYPE_SIZE (type)
16223 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16224 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16225 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16226 return 128;
16227 }
16228
16229 if (TREE_CODE (type) == ARRAY_TYPE)
16230 {
16231 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16232 return 64;
16233 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16234 return 128;
16235 }
16236 else if (TREE_CODE (type) == COMPLEX_TYPE)
16237 {
16238
16239 if (TYPE_MODE (type) == DCmode && align < 64)
16240 return 64;
16241 if (TYPE_MODE (type) == XCmode && align < 128)
16242 return 128;
16243 }
16244 else if ((TREE_CODE (type) == RECORD_TYPE
16245 || TREE_CODE (type) == UNION_TYPE
16246 || TREE_CODE (type) == QUAL_UNION_TYPE)
16247 && TYPE_FIELDS (type))
16248 {
16249 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16250 return 64;
16251 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16252 return 128;
16253 }
16254 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16255 || TREE_CODE (type) == INTEGER_TYPE)
16256 {
16257 if (TYPE_MODE (type) == DFmode && align < 64)
16258 return 64;
16259 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16260 return 128;
16261 }
16262
16263 return align;
16264 }
16265
16266 /* Compute the alignment for a local variable.
16267 TYPE is the data type, and ALIGN is the alignment that
16268 the object would ordinarily have. The value of this macro is used
16269 instead of that alignment to align the object. */
16270
16271 int
16272 ix86_local_alignment (tree type, int align)
16273 {
16274 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16275 to 16byte boundary. */
16276 if (TARGET_64BIT)
16277 {
16278 if (AGGREGATE_TYPE_P (type)
16279 && TYPE_SIZE (type)
16280 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16281 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16282 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16283 return 128;
16284 }
16285 if (TREE_CODE (type) == ARRAY_TYPE)
16286 {
16287 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16288 return 64;
16289 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16290 return 128;
16291 }
16292 else if (TREE_CODE (type) == COMPLEX_TYPE)
16293 {
16294 if (TYPE_MODE (type) == DCmode && align < 64)
16295 return 64;
16296 if (TYPE_MODE (type) == XCmode && align < 128)
16297 return 128;
16298 }
16299 else if ((TREE_CODE (type) == RECORD_TYPE
16300 || TREE_CODE (type) == UNION_TYPE
16301 || TREE_CODE (type) == QUAL_UNION_TYPE)
16302 && TYPE_FIELDS (type))
16303 {
16304 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16305 return 64;
16306 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16307 return 128;
16308 }
16309 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16310 || TREE_CODE (type) == INTEGER_TYPE)
16311 {
16312
16313 if (TYPE_MODE (type) == DFmode && align < 64)
16314 return 64;
16315 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16316 return 128;
16317 }
16318 return align;
16319 }
16320 \f
16321 /* Emit RTL insns to initialize the variable parts of a trampoline.
16322 FNADDR is an RTX for the address of the function's pure code.
16323 CXT is an RTX for the static chain value for the function. */
16324 void
16325 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16326 {
16327 if (!TARGET_64BIT)
16328 {
16329 /* Compute offset from the end of the jmp to the target function. */
16330 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16331 plus_constant (tramp, 10),
16332 NULL_RTX, 1, OPTAB_DIRECT);
16333 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16334 gen_int_mode (0xb9, QImode));
16335 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16336 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16337 gen_int_mode (0xe9, QImode));
16338 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16339 }
16340 else
16341 {
16342 int offset = 0;
16343 /* Try to load address using shorter movl instead of movabs.
16344 We may want to support movq for kernel mode, but kernel does not use
16345 trampolines at the moment. */
16346 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16347 {
16348 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16349 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16350 gen_int_mode (0xbb41, HImode));
16351 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16352 gen_lowpart (SImode, fnaddr));
16353 offset += 6;
16354 }
16355 else
16356 {
16357 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16358 gen_int_mode (0xbb49, HImode));
16359 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16360 fnaddr);
16361 offset += 10;
16362 }
16363 /* Load static chain using movabs to r10. */
16364 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16365 gen_int_mode (0xba49, HImode));
16366 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16367 cxt);
16368 offset += 10;
16369 /* Jump to the r11 */
16370 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16371 gen_int_mode (0xff49, HImode));
16372 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16373 gen_int_mode (0xe3, QImode));
16374 offset += 3;
16375 gcc_assert (offset <= TRAMPOLINE_SIZE);
16376 }
16377
16378 #ifdef ENABLE_EXECUTE_STACK
16379 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16380 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16381 #endif
16382 }
16383 \f
16384 /* Codes for all the SSE/MMX builtins. */
16385 enum ix86_builtins
16386 {
16387 IX86_BUILTIN_ADDPS,
16388 IX86_BUILTIN_ADDSS,
16389 IX86_BUILTIN_DIVPS,
16390 IX86_BUILTIN_DIVSS,
16391 IX86_BUILTIN_MULPS,
16392 IX86_BUILTIN_MULSS,
16393 IX86_BUILTIN_SUBPS,
16394 IX86_BUILTIN_SUBSS,
16395
16396 IX86_BUILTIN_CMPEQPS,
16397 IX86_BUILTIN_CMPLTPS,
16398 IX86_BUILTIN_CMPLEPS,
16399 IX86_BUILTIN_CMPGTPS,
16400 IX86_BUILTIN_CMPGEPS,
16401 IX86_BUILTIN_CMPNEQPS,
16402 IX86_BUILTIN_CMPNLTPS,
16403 IX86_BUILTIN_CMPNLEPS,
16404 IX86_BUILTIN_CMPNGTPS,
16405 IX86_BUILTIN_CMPNGEPS,
16406 IX86_BUILTIN_CMPORDPS,
16407 IX86_BUILTIN_CMPUNORDPS,
16408 IX86_BUILTIN_CMPEQSS,
16409 IX86_BUILTIN_CMPLTSS,
16410 IX86_BUILTIN_CMPLESS,
16411 IX86_BUILTIN_CMPNEQSS,
16412 IX86_BUILTIN_CMPNLTSS,
16413 IX86_BUILTIN_CMPNLESS,
16414 IX86_BUILTIN_CMPNGTSS,
16415 IX86_BUILTIN_CMPNGESS,
16416 IX86_BUILTIN_CMPORDSS,
16417 IX86_BUILTIN_CMPUNORDSS,
16418
16419 IX86_BUILTIN_COMIEQSS,
16420 IX86_BUILTIN_COMILTSS,
16421 IX86_BUILTIN_COMILESS,
16422 IX86_BUILTIN_COMIGTSS,
16423 IX86_BUILTIN_COMIGESS,
16424 IX86_BUILTIN_COMINEQSS,
16425 IX86_BUILTIN_UCOMIEQSS,
16426 IX86_BUILTIN_UCOMILTSS,
16427 IX86_BUILTIN_UCOMILESS,
16428 IX86_BUILTIN_UCOMIGTSS,
16429 IX86_BUILTIN_UCOMIGESS,
16430 IX86_BUILTIN_UCOMINEQSS,
16431
16432 IX86_BUILTIN_CVTPI2PS,
16433 IX86_BUILTIN_CVTPS2PI,
16434 IX86_BUILTIN_CVTSI2SS,
16435 IX86_BUILTIN_CVTSI642SS,
16436 IX86_BUILTIN_CVTSS2SI,
16437 IX86_BUILTIN_CVTSS2SI64,
16438 IX86_BUILTIN_CVTTPS2PI,
16439 IX86_BUILTIN_CVTTSS2SI,
16440 IX86_BUILTIN_CVTTSS2SI64,
16441
16442 IX86_BUILTIN_MAXPS,
16443 IX86_BUILTIN_MAXSS,
16444 IX86_BUILTIN_MINPS,
16445 IX86_BUILTIN_MINSS,
16446
16447 IX86_BUILTIN_LOADUPS,
16448 IX86_BUILTIN_STOREUPS,
16449 IX86_BUILTIN_MOVSS,
16450
16451 IX86_BUILTIN_MOVHLPS,
16452 IX86_BUILTIN_MOVLHPS,
16453 IX86_BUILTIN_LOADHPS,
16454 IX86_BUILTIN_LOADLPS,
16455 IX86_BUILTIN_STOREHPS,
16456 IX86_BUILTIN_STORELPS,
16457
16458 IX86_BUILTIN_MASKMOVQ,
16459 IX86_BUILTIN_MOVMSKPS,
16460 IX86_BUILTIN_PMOVMSKB,
16461
16462 IX86_BUILTIN_MOVNTPS,
16463 IX86_BUILTIN_MOVNTQ,
16464
16465 IX86_BUILTIN_LOADDQU,
16466 IX86_BUILTIN_STOREDQU,
16467
16468 IX86_BUILTIN_PACKSSWB,
16469 IX86_BUILTIN_PACKSSDW,
16470 IX86_BUILTIN_PACKUSWB,
16471
16472 IX86_BUILTIN_PADDB,
16473 IX86_BUILTIN_PADDW,
16474 IX86_BUILTIN_PADDD,
16475 IX86_BUILTIN_PADDQ,
16476 IX86_BUILTIN_PADDSB,
16477 IX86_BUILTIN_PADDSW,
16478 IX86_BUILTIN_PADDUSB,
16479 IX86_BUILTIN_PADDUSW,
16480 IX86_BUILTIN_PSUBB,
16481 IX86_BUILTIN_PSUBW,
16482 IX86_BUILTIN_PSUBD,
16483 IX86_BUILTIN_PSUBQ,
16484 IX86_BUILTIN_PSUBSB,
16485 IX86_BUILTIN_PSUBSW,
16486 IX86_BUILTIN_PSUBUSB,
16487 IX86_BUILTIN_PSUBUSW,
16488
16489 IX86_BUILTIN_PAND,
16490 IX86_BUILTIN_PANDN,
16491 IX86_BUILTIN_POR,
16492 IX86_BUILTIN_PXOR,
16493
16494 IX86_BUILTIN_PAVGB,
16495 IX86_BUILTIN_PAVGW,
16496
16497 IX86_BUILTIN_PCMPEQB,
16498 IX86_BUILTIN_PCMPEQW,
16499 IX86_BUILTIN_PCMPEQD,
16500 IX86_BUILTIN_PCMPGTB,
16501 IX86_BUILTIN_PCMPGTW,
16502 IX86_BUILTIN_PCMPGTD,
16503
16504 IX86_BUILTIN_PMADDWD,
16505
16506 IX86_BUILTIN_PMAXSW,
16507 IX86_BUILTIN_PMAXUB,
16508 IX86_BUILTIN_PMINSW,
16509 IX86_BUILTIN_PMINUB,
16510
16511 IX86_BUILTIN_PMULHUW,
16512 IX86_BUILTIN_PMULHW,
16513 IX86_BUILTIN_PMULLW,
16514
16515 IX86_BUILTIN_PSADBW,
16516 IX86_BUILTIN_PSHUFW,
16517
16518 IX86_BUILTIN_PSLLW,
16519 IX86_BUILTIN_PSLLD,
16520 IX86_BUILTIN_PSLLQ,
16521 IX86_BUILTIN_PSRAW,
16522 IX86_BUILTIN_PSRAD,
16523 IX86_BUILTIN_PSRLW,
16524 IX86_BUILTIN_PSRLD,
16525 IX86_BUILTIN_PSRLQ,
16526 IX86_BUILTIN_PSLLWI,
16527 IX86_BUILTIN_PSLLDI,
16528 IX86_BUILTIN_PSLLQI,
16529 IX86_BUILTIN_PSRAWI,
16530 IX86_BUILTIN_PSRADI,
16531 IX86_BUILTIN_PSRLWI,
16532 IX86_BUILTIN_PSRLDI,
16533 IX86_BUILTIN_PSRLQI,
16534
16535 IX86_BUILTIN_PUNPCKHBW,
16536 IX86_BUILTIN_PUNPCKHWD,
16537 IX86_BUILTIN_PUNPCKHDQ,
16538 IX86_BUILTIN_PUNPCKLBW,
16539 IX86_BUILTIN_PUNPCKLWD,
16540 IX86_BUILTIN_PUNPCKLDQ,
16541
16542 IX86_BUILTIN_SHUFPS,
16543
16544 IX86_BUILTIN_RCPPS,
16545 IX86_BUILTIN_RCPSS,
16546 IX86_BUILTIN_RSQRTPS,
16547 IX86_BUILTIN_RSQRTSS,
16548 IX86_BUILTIN_RSQRTF,
16549 IX86_BUILTIN_SQRTPS,
16550 IX86_BUILTIN_SQRTSS,
16551
16552 IX86_BUILTIN_UNPCKHPS,
16553 IX86_BUILTIN_UNPCKLPS,
16554
16555 IX86_BUILTIN_ANDPS,
16556 IX86_BUILTIN_ANDNPS,
16557 IX86_BUILTIN_ORPS,
16558 IX86_BUILTIN_XORPS,
16559
16560 IX86_BUILTIN_EMMS,
16561 IX86_BUILTIN_LDMXCSR,
16562 IX86_BUILTIN_STMXCSR,
16563 IX86_BUILTIN_SFENCE,
16564
16565 /* 3DNow! Original */
16566 IX86_BUILTIN_FEMMS,
16567 IX86_BUILTIN_PAVGUSB,
16568 IX86_BUILTIN_PF2ID,
16569 IX86_BUILTIN_PFACC,
16570 IX86_BUILTIN_PFADD,
16571 IX86_BUILTIN_PFCMPEQ,
16572 IX86_BUILTIN_PFCMPGE,
16573 IX86_BUILTIN_PFCMPGT,
16574 IX86_BUILTIN_PFMAX,
16575 IX86_BUILTIN_PFMIN,
16576 IX86_BUILTIN_PFMUL,
16577 IX86_BUILTIN_PFRCP,
16578 IX86_BUILTIN_PFRCPIT1,
16579 IX86_BUILTIN_PFRCPIT2,
16580 IX86_BUILTIN_PFRSQIT1,
16581 IX86_BUILTIN_PFRSQRT,
16582 IX86_BUILTIN_PFSUB,
16583 IX86_BUILTIN_PFSUBR,
16584 IX86_BUILTIN_PI2FD,
16585 IX86_BUILTIN_PMULHRW,
16586
16587 /* 3DNow! Athlon Extensions */
16588 IX86_BUILTIN_PF2IW,
16589 IX86_BUILTIN_PFNACC,
16590 IX86_BUILTIN_PFPNACC,
16591 IX86_BUILTIN_PI2FW,
16592 IX86_BUILTIN_PSWAPDSI,
16593 IX86_BUILTIN_PSWAPDSF,
16594
16595 /* SSE2 */
16596 IX86_BUILTIN_ADDPD,
16597 IX86_BUILTIN_ADDSD,
16598 IX86_BUILTIN_DIVPD,
16599 IX86_BUILTIN_DIVSD,
16600 IX86_BUILTIN_MULPD,
16601 IX86_BUILTIN_MULSD,
16602 IX86_BUILTIN_SUBPD,
16603 IX86_BUILTIN_SUBSD,
16604
16605 IX86_BUILTIN_CMPEQPD,
16606 IX86_BUILTIN_CMPLTPD,
16607 IX86_BUILTIN_CMPLEPD,
16608 IX86_BUILTIN_CMPGTPD,
16609 IX86_BUILTIN_CMPGEPD,
16610 IX86_BUILTIN_CMPNEQPD,
16611 IX86_BUILTIN_CMPNLTPD,
16612 IX86_BUILTIN_CMPNLEPD,
16613 IX86_BUILTIN_CMPNGTPD,
16614 IX86_BUILTIN_CMPNGEPD,
16615 IX86_BUILTIN_CMPORDPD,
16616 IX86_BUILTIN_CMPUNORDPD,
16617 IX86_BUILTIN_CMPEQSD,
16618 IX86_BUILTIN_CMPLTSD,
16619 IX86_BUILTIN_CMPLESD,
16620 IX86_BUILTIN_CMPNEQSD,
16621 IX86_BUILTIN_CMPNLTSD,
16622 IX86_BUILTIN_CMPNLESD,
16623 IX86_BUILTIN_CMPORDSD,
16624 IX86_BUILTIN_CMPUNORDSD,
16625
16626 IX86_BUILTIN_COMIEQSD,
16627 IX86_BUILTIN_COMILTSD,
16628 IX86_BUILTIN_COMILESD,
16629 IX86_BUILTIN_COMIGTSD,
16630 IX86_BUILTIN_COMIGESD,
16631 IX86_BUILTIN_COMINEQSD,
16632 IX86_BUILTIN_UCOMIEQSD,
16633 IX86_BUILTIN_UCOMILTSD,
16634 IX86_BUILTIN_UCOMILESD,
16635 IX86_BUILTIN_UCOMIGTSD,
16636 IX86_BUILTIN_UCOMIGESD,
16637 IX86_BUILTIN_UCOMINEQSD,
16638
16639 IX86_BUILTIN_MAXPD,
16640 IX86_BUILTIN_MAXSD,
16641 IX86_BUILTIN_MINPD,
16642 IX86_BUILTIN_MINSD,
16643
16644 IX86_BUILTIN_ANDPD,
16645 IX86_BUILTIN_ANDNPD,
16646 IX86_BUILTIN_ORPD,
16647 IX86_BUILTIN_XORPD,
16648
16649 IX86_BUILTIN_SQRTPD,
16650 IX86_BUILTIN_SQRTSD,
16651
16652 IX86_BUILTIN_UNPCKHPD,
16653 IX86_BUILTIN_UNPCKLPD,
16654
16655 IX86_BUILTIN_SHUFPD,
16656
16657 IX86_BUILTIN_LOADUPD,
16658 IX86_BUILTIN_STOREUPD,
16659 IX86_BUILTIN_MOVSD,
16660
16661 IX86_BUILTIN_LOADHPD,
16662 IX86_BUILTIN_LOADLPD,
16663
16664 IX86_BUILTIN_CVTDQ2PD,
16665 IX86_BUILTIN_CVTDQ2PS,
16666
16667 IX86_BUILTIN_CVTPD2DQ,
16668 IX86_BUILTIN_CVTPD2PI,
16669 IX86_BUILTIN_CVTPD2PS,
16670 IX86_BUILTIN_CVTTPD2DQ,
16671 IX86_BUILTIN_CVTTPD2PI,
16672
16673 IX86_BUILTIN_CVTPI2PD,
16674 IX86_BUILTIN_CVTSI2SD,
16675 IX86_BUILTIN_CVTSI642SD,
16676
16677 IX86_BUILTIN_CVTSD2SI,
16678 IX86_BUILTIN_CVTSD2SI64,
16679 IX86_BUILTIN_CVTSD2SS,
16680 IX86_BUILTIN_CVTSS2SD,
16681 IX86_BUILTIN_CVTTSD2SI,
16682 IX86_BUILTIN_CVTTSD2SI64,
16683
16684 IX86_BUILTIN_CVTPS2DQ,
16685 IX86_BUILTIN_CVTPS2PD,
16686 IX86_BUILTIN_CVTTPS2DQ,
16687
16688 IX86_BUILTIN_MOVNTI,
16689 IX86_BUILTIN_MOVNTPD,
16690 IX86_BUILTIN_MOVNTDQ,
16691
16692 /* SSE2 MMX */
16693 IX86_BUILTIN_MASKMOVDQU,
16694 IX86_BUILTIN_MOVMSKPD,
16695 IX86_BUILTIN_PMOVMSKB128,
16696
16697 IX86_BUILTIN_PACKSSWB128,
16698 IX86_BUILTIN_PACKSSDW128,
16699 IX86_BUILTIN_PACKUSWB128,
16700
16701 IX86_BUILTIN_PADDB128,
16702 IX86_BUILTIN_PADDW128,
16703 IX86_BUILTIN_PADDD128,
16704 IX86_BUILTIN_PADDQ128,
16705 IX86_BUILTIN_PADDSB128,
16706 IX86_BUILTIN_PADDSW128,
16707 IX86_BUILTIN_PADDUSB128,
16708 IX86_BUILTIN_PADDUSW128,
16709 IX86_BUILTIN_PSUBB128,
16710 IX86_BUILTIN_PSUBW128,
16711 IX86_BUILTIN_PSUBD128,
16712 IX86_BUILTIN_PSUBQ128,
16713 IX86_BUILTIN_PSUBSB128,
16714 IX86_BUILTIN_PSUBSW128,
16715 IX86_BUILTIN_PSUBUSB128,
16716 IX86_BUILTIN_PSUBUSW128,
16717
16718 IX86_BUILTIN_PAND128,
16719 IX86_BUILTIN_PANDN128,
16720 IX86_BUILTIN_POR128,
16721 IX86_BUILTIN_PXOR128,
16722
16723 IX86_BUILTIN_PAVGB128,
16724 IX86_BUILTIN_PAVGW128,
16725
16726 IX86_BUILTIN_PCMPEQB128,
16727 IX86_BUILTIN_PCMPEQW128,
16728 IX86_BUILTIN_PCMPEQD128,
16729 IX86_BUILTIN_PCMPGTB128,
16730 IX86_BUILTIN_PCMPGTW128,
16731 IX86_BUILTIN_PCMPGTD128,
16732
16733 IX86_BUILTIN_PMADDWD128,
16734
16735 IX86_BUILTIN_PMAXSW128,
16736 IX86_BUILTIN_PMAXUB128,
16737 IX86_BUILTIN_PMINSW128,
16738 IX86_BUILTIN_PMINUB128,
16739
16740 IX86_BUILTIN_PMULUDQ,
16741 IX86_BUILTIN_PMULUDQ128,
16742 IX86_BUILTIN_PMULHUW128,
16743 IX86_BUILTIN_PMULHW128,
16744 IX86_BUILTIN_PMULLW128,
16745
16746 IX86_BUILTIN_PSADBW128,
16747 IX86_BUILTIN_PSHUFHW,
16748 IX86_BUILTIN_PSHUFLW,
16749 IX86_BUILTIN_PSHUFD,
16750
16751 IX86_BUILTIN_PSLLDQI128,
16752 IX86_BUILTIN_PSLLWI128,
16753 IX86_BUILTIN_PSLLDI128,
16754 IX86_BUILTIN_PSLLQI128,
16755 IX86_BUILTIN_PSRAWI128,
16756 IX86_BUILTIN_PSRADI128,
16757 IX86_BUILTIN_PSRLDQI128,
16758 IX86_BUILTIN_PSRLWI128,
16759 IX86_BUILTIN_PSRLDI128,
16760 IX86_BUILTIN_PSRLQI128,
16761
16762 IX86_BUILTIN_PSLLDQ128,
16763 IX86_BUILTIN_PSLLW128,
16764 IX86_BUILTIN_PSLLD128,
16765 IX86_BUILTIN_PSLLQ128,
16766 IX86_BUILTIN_PSRAW128,
16767 IX86_BUILTIN_PSRAD128,
16768 IX86_BUILTIN_PSRLW128,
16769 IX86_BUILTIN_PSRLD128,
16770 IX86_BUILTIN_PSRLQ128,
16771
16772 IX86_BUILTIN_PUNPCKHBW128,
16773 IX86_BUILTIN_PUNPCKHWD128,
16774 IX86_BUILTIN_PUNPCKHDQ128,
16775 IX86_BUILTIN_PUNPCKHQDQ128,
16776 IX86_BUILTIN_PUNPCKLBW128,
16777 IX86_BUILTIN_PUNPCKLWD128,
16778 IX86_BUILTIN_PUNPCKLDQ128,
16779 IX86_BUILTIN_PUNPCKLQDQ128,
16780
16781 IX86_BUILTIN_CLFLUSH,
16782 IX86_BUILTIN_MFENCE,
16783 IX86_BUILTIN_LFENCE,
16784
16785 /* Prescott New Instructions. */
16786 IX86_BUILTIN_ADDSUBPS,
16787 IX86_BUILTIN_HADDPS,
16788 IX86_BUILTIN_HSUBPS,
16789 IX86_BUILTIN_MOVSHDUP,
16790 IX86_BUILTIN_MOVSLDUP,
16791 IX86_BUILTIN_ADDSUBPD,
16792 IX86_BUILTIN_HADDPD,
16793 IX86_BUILTIN_HSUBPD,
16794 IX86_BUILTIN_LDDQU,
16795
16796 IX86_BUILTIN_MONITOR,
16797 IX86_BUILTIN_MWAIT,
16798
16799 /* SSSE3. */
16800 IX86_BUILTIN_PHADDW,
16801 IX86_BUILTIN_PHADDD,
16802 IX86_BUILTIN_PHADDSW,
16803 IX86_BUILTIN_PHSUBW,
16804 IX86_BUILTIN_PHSUBD,
16805 IX86_BUILTIN_PHSUBSW,
16806 IX86_BUILTIN_PMADDUBSW,
16807 IX86_BUILTIN_PMULHRSW,
16808 IX86_BUILTIN_PSHUFB,
16809 IX86_BUILTIN_PSIGNB,
16810 IX86_BUILTIN_PSIGNW,
16811 IX86_BUILTIN_PSIGND,
16812 IX86_BUILTIN_PALIGNR,
16813 IX86_BUILTIN_PABSB,
16814 IX86_BUILTIN_PABSW,
16815 IX86_BUILTIN_PABSD,
16816
16817 IX86_BUILTIN_PHADDW128,
16818 IX86_BUILTIN_PHADDD128,
16819 IX86_BUILTIN_PHADDSW128,
16820 IX86_BUILTIN_PHSUBW128,
16821 IX86_BUILTIN_PHSUBD128,
16822 IX86_BUILTIN_PHSUBSW128,
16823 IX86_BUILTIN_PMADDUBSW128,
16824 IX86_BUILTIN_PMULHRSW128,
16825 IX86_BUILTIN_PSHUFB128,
16826 IX86_BUILTIN_PSIGNB128,
16827 IX86_BUILTIN_PSIGNW128,
16828 IX86_BUILTIN_PSIGND128,
16829 IX86_BUILTIN_PALIGNR128,
16830 IX86_BUILTIN_PABSB128,
16831 IX86_BUILTIN_PABSW128,
16832 IX86_BUILTIN_PABSD128,
16833
16834 /* AMDFAM10 - SSE4A New Instructions. */
16835 IX86_BUILTIN_MOVNTSD,
16836 IX86_BUILTIN_MOVNTSS,
16837 IX86_BUILTIN_EXTRQI,
16838 IX86_BUILTIN_EXTRQ,
16839 IX86_BUILTIN_INSERTQI,
16840 IX86_BUILTIN_INSERTQ,
16841
16842 /* SSE4.1. */
16843 IX86_BUILTIN_BLENDPD,
16844 IX86_BUILTIN_BLENDPS,
16845 IX86_BUILTIN_BLENDVPD,
16846 IX86_BUILTIN_BLENDVPS,
16847 IX86_BUILTIN_PBLENDVB128,
16848 IX86_BUILTIN_PBLENDW128,
16849
16850 IX86_BUILTIN_DPPD,
16851 IX86_BUILTIN_DPPS,
16852
16853 IX86_BUILTIN_INSERTPS128,
16854
16855 IX86_BUILTIN_MOVNTDQA,
16856 IX86_BUILTIN_MPSADBW128,
16857 IX86_BUILTIN_PACKUSDW128,
16858 IX86_BUILTIN_PCMPEQQ,
16859 IX86_BUILTIN_PHMINPOSUW128,
16860
16861 IX86_BUILTIN_PMAXSB128,
16862 IX86_BUILTIN_PMAXSD128,
16863 IX86_BUILTIN_PMAXUD128,
16864 IX86_BUILTIN_PMAXUW128,
16865
16866 IX86_BUILTIN_PMINSB128,
16867 IX86_BUILTIN_PMINSD128,
16868 IX86_BUILTIN_PMINUD128,
16869 IX86_BUILTIN_PMINUW128,
16870
16871 IX86_BUILTIN_PMOVSXBW128,
16872 IX86_BUILTIN_PMOVSXBD128,
16873 IX86_BUILTIN_PMOVSXBQ128,
16874 IX86_BUILTIN_PMOVSXWD128,
16875 IX86_BUILTIN_PMOVSXWQ128,
16876 IX86_BUILTIN_PMOVSXDQ128,
16877
16878 IX86_BUILTIN_PMOVZXBW128,
16879 IX86_BUILTIN_PMOVZXBD128,
16880 IX86_BUILTIN_PMOVZXBQ128,
16881 IX86_BUILTIN_PMOVZXWD128,
16882 IX86_BUILTIN_PMOVZXWQ128,
16883 IX86_BUILTIN_PMOVZXDQ128,
16884
16885 IX86_BUILTIN_PMULDQ128,
16886 IX86_BUILTIN_PMULLD128,
16887
16888 IX86_BUILTIN_ROUNDPD,
16889 IX86_BUILTIN_ROUNDPS,
16890 IX86_BUILTIN_ROUNDSD,
16891 IX86_BUILTIN_ROUNDSS,
16892
16893 IX86_BUILTIN_PTESTZ,
16894 IX86_BUILTIN_PTESTC,
16895 IX86_BUILTIN_PTESTNZC,
16896
16897 IX86_BUILTIN_VEC_INIT_V2SI,
16898 IX86_BUILTIN_VEC_INIT_V4HI,
16899 IX86_BUILTIN_VEC_INIT_V8QI,
16900 IX86_BUILTIN_VEC_EXT_V2DF,
16901 IX86_BUILTIN_VEC_EXT_V2DI,
16902 IX86_BUILTIN_VEC_EXT_V4SF,
16903 IX86_BUILTIN_VEC_EXT_V4SI,
16904 IX86_BUILTIN_VEC_EXT_V8HI,
16905 IX86_BUILTIN_VEC_EXT_V2SI,
16906 IX86_BUILTIN_VEC_EXT_V4HI,
16907 IX86_BUILTIN_VEC_EXT_V16QI,
16908 IX86_BUILTIN_VEC_SET_V2DI,
16909 IX86_BUILTIN_VEC_SET_V4SF,
16910 IX86_BUILTIN_VEC_SET_V4SI,
16911 IX86_BUILTIN_VEC_SET_V8HI,
16912 IX86_BUILTIN_VEC_SET_V4HI,
16913 IX86_BUILTIN_VEC_SET_V16QI,
16914
16915 IX86_BUILTIN_VEC_PACK_SFIX,
16916
16917 /* SSE4.2. */
16918 IX86_BUILTIN_CRC32QI,
16919 IX86_BUILTIN_CRC32HI,
16920 IX86_BUILTIN_CRC32SI,
16921 IX86_BUILTIN_CRC32DI,
16922
16923 IX86_BUILTIN_PCMPESTRI128,
16924 IX86_BUILTIN_PCMPESTRM128,
16925 IX86_BUILTIN_PCMPESTRA128,
16926 IX86_BUILTIN_PCMPESTRC128,
16927 IX86_BUILTIN_PCMPESTRO128,
16928 IX86_BUILTIN_PCMPESTRS128,
16929 IX86_BUILTIN_PCMPESTRZ128,
16930 IX86_BUILTIN_PCMPISTRI128,
16931 IX86_BUILTIN_PCMPISTRM128,
16932 IX86_BUILTIN_PCMPISTRA128,
16933 IX86_BUILTIN_PCMPISTRC128,
16934 IX86_BUILTIN_PCMPISTRO128,
16935 IX86_BUILTIN_PCMPISTRS128,
16936 IX86_BUILTIN_PCMPISTRZ128,
16937
16938 IX86_BUILTIN_PCMPGTQ,
16939
16940 /* TFmode support builtins. */
16941 IX86_BUILTIN_INFQ,
16942 IX86_BUILTIN_FABSQ,
16943 IX86_BUILTIN_COPYSIGNQ,
16944
16945 IX86_BUILTIN_MAX
16946 };
16947
16948 /* Table for the ix86 builtin decls. */
16949 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16950
16951 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16952 * if the target_flags include one of MASK. Stores the function decl
16953 * in the ix86_builtins array.
16954 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16955
16956 static inline tree
16957 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16958 {
16959 tree decl = NULL_TREE;
16960
16961 if (mask & ix86_isa_flags
16962 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16963 {
16964 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16965 NULL, NULL_TREE);
16966 ix86_builtins[(int) code] = decl;
16967 }
16968
16969 return decl;
16970 }
16971
16972 /* Like def_builtin, but also marks the function decl "const". */
16973
16974 static inline tree
16975 def_builtin_const (int mask, const char *name, tree type,
16976 enum ix86_builtins code)
16977 {
16978 tree decl = def_builtin (mask, name, type, code);
16979 if (decl)
16980 TREE_READONLY (decl) = 1;
16981 return decl;
16982 }
16983
16984 /* Bits for builtin_description.flag. */
16985
16986 /* Set when we don't support the comparison natively, and should
16987 swap_comparison in order to support it. */
16988 #define BUILTIN_DESC_SWAP_OPERANDS 1
16989
16990 struct builtin_description
16991 {
16992 const unsigned int mask;
16993 const enum insn_code icode;
16994 const char *const name;
16995 const enum ix86_builtins code;
16996 const enum rtx_code comparison;
16997 const int flag;
16998 };
16999
17000 static const struct builtin_description bdesc_comi[] =
17001 {
17002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
17003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17010 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17011 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17012 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17013 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17022 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17023 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17024 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17025 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17026 };
17027
17028 static const struct builtin_description bdesc_ptest[] =
17029 {
17030 /* SSE4.1 */
17031 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17032 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17033 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17034 };
17035
17036 static const struct builtin_description bdesc_pcmpestr[] =
17037 {
17038 /* SSE4.2 */
17039 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17040 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17041 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17042 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17043 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17044 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17045 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17046 };
17047
17048 static const struct builtin_description bdesc_pcmpistr[] =
17049 {
17050 /* SSE4.2 */
17051 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17052 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17053 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17054 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17055 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17056 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17057 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17058 };
17059
17060 static const struct builtin_description bdesc_crc32[] =
17061 {
17062 /* SSE4.2 */
17063 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17064 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17065 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17066 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17067 };
17068
17069 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17070 static const struct builtin_description bdesc_sse_3arg[] =
17071 {
17072 /* SSE4.1 */
17073 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17074 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17075 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17076 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17077 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17078 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17079 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17080 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17081 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17082 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17083 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17084 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17085 };
17086
17087 static const struct builtin_description bdesc_2arg[] =
17088 {
17089 /* SSE */
17090 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17093 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17094 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17098
17099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17117 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17118 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17119 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17121
17122 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17123 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17125 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17126
17127 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17129 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17130 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17131
17132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17133 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17134 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17135 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17136 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17137
17138 /* MMX */
17139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17142 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17146 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17147
17148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17156
17157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17159 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17160
17161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17165
17166 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17167 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17168
17169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17172 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17175
17176 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17177 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17178 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17179 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17180
17181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17187
17188 /* Special. */
17189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17192
17193 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17194 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17195 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17196
17197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17199 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17203
17204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17206 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17210
17211 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17212 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17214 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17215
17216 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17217 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17218
17219 /* SSE2 */
17220 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17221 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17222 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17224 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17228
17229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17245 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17247 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17249
17250 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17251 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17254
17255 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17259
17260 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17261 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17262 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17263
17264 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17265
17266 /* SSE2 MMX */
17267 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17269 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17270 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17271 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17272 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17274 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17275
17276 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17277 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17278 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17279 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17280 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17281 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17282 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17283 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17284
17285 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17287
17288 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17290 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17291 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17292
17293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17295
17296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17298 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17299 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17300 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17301 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17302
17303 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17304 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17307
17308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17312 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17316
17317 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17319 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17320
17321 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17323
17324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17325 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17326
17327 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17328 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17329 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17330
17331 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17332 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17333 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17334
17335 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17336 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17337
17338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17339
17340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17341 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17342 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17343 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17344
17345 /* SSE3 MMX */
17346 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17347 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17348 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17349 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17350 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17351 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17352
17353 /* SSSE3 */
17354 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17355 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17356 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17357 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17358 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17359 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17360 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17361 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17362 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17363 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17364 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17365 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17366 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17367 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17368 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17369 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17370 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17371 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17372 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17373 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17374 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17375 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17376 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17377 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17378
17379 /* SSE4.1 */
17380 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17381 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17382 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17383 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17384 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17385 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17386 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17387 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17388 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17389 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17390 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17391 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17392
17393 /* SSE4.2 */
17394 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17395 };
17396
17397 static const struct builtin_description bdesc_1arg[] =
17398 {
17399 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17401
17402 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17404 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17405
17406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17407 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17408 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17409 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17410 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17411 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17412
17413 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17415
17416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17417
17418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17420
17421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17424 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17426
17427 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17428
17429 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17431 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17432 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17433
17434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17437
17438 /* SSE3 */
17439 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17440 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17441
17442 /* SSSE3 */
17443 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17444 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17445 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17446 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17447 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17448 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17449
17450 /* SSE4.1 */
17451 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17452 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17453 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17454 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17455 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17456 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17457 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17458 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17459 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17460 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17461 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17462 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17463 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17464
17465 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17466 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17467 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17468 };
17469
17470 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17471 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17472 builtins. */
17473 static void
17474 ix86_init_mmx_sse_builtins (void)
17475 {
17476 const struct builtin_description * d;
17477 size_t i;
17478
17479 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17480 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17481 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17482 tree V2DI_type_node
17483 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17484 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17485 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17486 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17487 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17488 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17489 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17490
17491 tree pchar_type_node = build_pointer_type (char_type_node);
17492 tree pcchar_type_node = build_pointer_type (
17493 build_type_variant (char_type_node, 1, 0));
17494 tree pfloat_type_node = build_pointer_type (float_type_node);
17495 tree pcfloat_type_node = build_pointer_type (
17496 build_type_variant (float_type_node, 1, 0));
17497 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17498 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17499 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17500
17501 /* Comparisons. */
17502 tree int_ftype_v4sf_v4sf
17503 = build_function_type_list (integer_type_node,
17504 V4SF_type_node, V4SF_type_node, NULL_TREE);
17505 tree v4si_ftype_v4sf_v4sf
17506 = build_function_type_list (V4SI_type_node,
17507 V4SF_type_node, V4SF_type_node, NULL_TREE);
17508 /* MMX/SSE/integer conversions. */
17509 tree int_ftype_v4sf
17510 = build_function_type_list (integer_type_node,
17511 V4SF_type_node, NULL_TREE);
17512 tree int64_ftype_v4sf
17513 = build_function_type_list (long_long_integer_type_node,
17514 V4SF_type_node, NULL_TREE);
17515 tree int_ftype_v8qi
17516 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17517 tree v4sf_ftype_v4sf_int
17518 = build_function_type_list (V4SF_type_node,
17519 V4SF_type_node, integer_type_node, NULL_TREE);
17520 tree v4sf_ftype_v4sf_int64
17521 = build_function_type_list (V4SF_type_node,
17522 V4SF_type_node, long_long_integer_type_node,
17523 NULL_TREE);
17524 tree v4sf_ftype_v4sf_v2si
17525 = build_function_type_list (V4SF_type_node,
17526 V4SF_type_node, V2SI_type_node, NULL_TREE);
17527
17528 /* Miscellaneous. */
17529 tree v8qi_ftype_v4hi_v4hi
17530 = build_function_type_list (V8QI_type_node,
17531 V4HI_type_node, V4HI_type_node, NULL_TREE);
17532 tree v4hi_ftype_v2si_v2si
17533 = build_function_type_list (V4HI_type_node,
17534 V2SI_type_node, V2SI_type_node, NULL_TREE);
17535 tree v4sf_ftype_v4sf_v4sf_int
17536 = build_function_type_list (V4SF_type_node,
17537 V4SF_type_node, V4SF_type_node,
17538 integer_type_node, NULL_TREE);
17539 tree v2si_ftype_v4hi_v4hi
17540 = build_function_type_list (V2SI_type_node,
17541 V4HI_type_node, V4HI_type_node, NULL_TREE);
17542 tree v4hi_ftype_v4hi_int
17543 = build_function_type_list (V4HI_type_node,
17544 V4HI_type_node, integer_type_node, NULL_TREE);
17545 tree v4hi_ftype_v4hi_di
17546 = build_function_type_list (V4HI_type_node,
17547 V4HI_type_node, long_long_unsigned_type_node,
17548 NULL_TREE);
17549 tree v2si_ftype_v2si_di
17550 = build_function_type_list (V2SI_type_node,
17551 V2SI_type_node, long_long_unsigned_type_node,
17552 NULL_TREE);
17553 tree void_ftype_void
17554 = build_function_type (void_type_node, void_list_node);
17555 tree void_ftype_unsigned
17556 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17557 tree void_ftype_unsigned_unsigned
17558 = build_function_type_list (void_type_node, unsigned_type_node,
17559 unsigned_type_node, NULL_TREE);
17560 tree void_ftype_pcvoid_unsigned_unsigned
17561 = build_function_type_list (void_type_node, const_ptr_type_node,
17562 unsigned_type_node, unsigned_type_node,
17563 NULL_TREE);
17564 tree unsigned_ftype_void
17565 = build_function_type (unsigned_type_node, void_list_node);
17566 tree v2si_ftype_v4sf
17567 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17568 /* Loads/stores. */
17569 tree void_ftype_v8qi_v8qi_pchar
17570 = build_function_type_list (void_type_node,
17571 V8QI_type_node, V8QI_type_node,
17572 pchar_type_node, NULL_TREE);
17573 tree v4sf_ftype_pcfloat
17574 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17575 /* @@@ the type is bogus */
17576 tree v4sf_ftype_v4sf_pv2si
17577 = build_function_type_list (V4SF_type_node,
17578 V4SF_type_node, pv2si_type_node, NULL_TREE);
17579 tree void_ftype_pv2si_v4sf
17580 = build_function_type_list (void_type_node,
17581 pv2si_type_node, V4SF_type_node, NULL_TREE);
17582 tree void_ftype_pfloat_v4sf
17583 = build_function_type_list (void_type_node,
17584 pfloat_type_node, V4SF_type_node, NULL_TREE);
17585 tree void_ftype_pdi_di
17586 = build_function_type_list (void_type_node,
17587 pdi_type_node, long_long_unsigned_type_node,
17588 NULL_TREE);
17589 tree void_ftype_pv2di_v2di
17590 = build_function_type_list (void_type_node,
17591 pv2di_type_node, V2DI_type_node, NULL_TREE);
17592 /* Normal vector unops. */
17593 tree v4sf_ftype_v4sf
17594 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17595 tree v16qi_ftype_v16qi
17596 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17597 tree v8hi_ftype_v8hi
17598 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17599 tree v4si_ftype_v4si
17600 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17601 tree v8qi_ftype_v8qi
17602 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17603 tree v4hi_ftype_v4hi
17604 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17605
17606 /* Normal vector binops. */
17607 tree v4sf_ftype_v4sf_v4sf
17608 = build_function_type_list (V4SF_type_node,
17609 V4SF_type_node, V4SF_type_node, NULL_TREE);
17610 tree v8qi_ftype_v8qi_v8qi
17611 = build_function_type_list (V8QI_type_node,
17612 V8QI_type_node, V8QI_type_node, NULL_TREE);
17613 tree v4hi_ftype_v4hi_v4hi
17614 = build_function_type_list (V4HI_type_node,
17615 V4HI_type_node, V4HI_type_node, NULL_TREE);
17616 tree v2si_ftype_v2si_v2si
17617 = build_function_type_list (V2SI_type_node,
17618 V2SI_type_node, V2SI_type_node, NULL_TREE);
17619 tree di_ftype_di_di
17620 = build_function_type_list (long_long_unsigned_type_node,
17621 long_long_unsigned_type_node,
17622 long_long_unsigned_type_node, NULL_TREE);
17623
17624 tree di_ftype_di_di_int
17625 = build_function_type_list (long_long_unsigned_type_node,
17626 long_long_unsigned_type_node,
17627 long_long_unsigned_type_node,
17628 integer_type_node, NULL_TREE);
17629
17630 tree v2si_ftype_v2sf
17631 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17632 tree v2sf_ftype_v2si
17633 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17634 tree v2si_ftype_v2si
17635 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17636 tree v2sf_ftype_v2sf
17637 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17638 tree v2sf_ftype_v2sf_v2sf
17639 = build_function_type_list (V2SF_type_node,
17640 V2SF_type_node, V2SF_type_node, NULL_TREE);
17641 tree v2si_ftype_v2sf_v2sf
17642 = build_function_type_list (V2SI_type_node,
17643 V2SF_type_node, V2SF_type_node, NULL_TREE);
17644 tree pint_type_node = build_pointer_type (integer_type_node);
17645 tree pdouble_type_node = build_pointer_type (double_type_node);
17646 tree pcdouble_type_node = build_pointer_type (
17647 build_type_variant (double_type_node, 1, 0));
17648 tree int_ftype_v2df_v2df
17649 = build_function_type_list (integer_type_node,
17650 V2DF_type_node, V2DF_type_node, NULL_TREE);
17651
17652 tree void_ftype_pcvoid
17653 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17654 tree v4sf_ftype_v4si
17655 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17656 tree v4si_ftype_v4sf
17657 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17658 tree v2df_ftype_v4si
17659 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17660 tree v4si_ftype_v2df
17661 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17662 tree v4si_ftype_v2df_v2df
17663 = build_function_type_list (V4SI_type_node,
17664 V2DF_type_node, V2DF_type_node, NULL_TREE);
17665 tree v2si_ftype_v2df
17666 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17667 tree v4sf_ftype_v2df
17668 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17669 tree v2df_ftype_v2si
17670 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17671 tree v2df_ftype_v4sf
17672 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17673 tree int_ftype_v2df
17674 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17675 tree int64_ftype_v2df
17676 = build_function_type_list (long_long_integer_type_node,
17677 V2DF_type_node, NULL_TREE);
17678 tree v2df_ftype_v2df_int
17679 = build_function_type_list (V2DF_type_node,
17680 V2DF_type_node, integer_type_node, NULL_TREE);
17681 tree v2df_ftype_v2df_int64
17682 = build_function_type_list (V2DF_type_node,
17683 V2DF_type_node, long_long_integer_type_node,
17684 NULL_TREE);
17685 tree v4sf_ftype_v4sf_v2df
17686 = build_function_type_list (V4SF_type_node,
17687 V4SF_type_node, V2DF_type_node, NULL_TREE);
17688 tree v2df_ftype_v2df_v4sf
17689 = build_function_type_list (V2DF_type_node,
17690 V2DF_type_node, V4SF_type_node, NULL_TREE);
17691 tree v2df_ftype_v2df_v2df_int
17692 = build_function_type_list (V2DF_type_node,
17693 V2DF_type_node, V2DF_type_node,
17694 integer_type_node,
17695 NULL_TREE);
17696 tree v2df_ftype_v2df_pcdouble
17697 = build_function_type_list (V2DF_type_node,
17698 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17699 tree void_ftype_pdouble_v2df
17700 = build_function_type_list (void_type_node,
17701 pdouble_type_node, V2DF_type_node, NULL_TREE);
17702 tree void_ftype_pint_int
17703 = build_function_type_list (void_type_node,
17704 pint_type_node, integer_type_node, NULL_TREE);
17705 tree void_ftype_v16qi_v16qi_pchar
17706 = build_function_type_list (void_type_node,
17707 V16QI_type_node, V16QI_type_node,
17708 pchar_type_node, NULL_TREE);
17709 tree v2df_ftype_pcdouble
17710 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17711 tree v2df_ftype_v2df_v2df
17712 = build_function_type_list (V2DF_type_node,
17713 V2DF_type_node, V2DF_type_node, NULL_TREE);
17714 tree v16qi_ftype_v16qi_v16qi
17715 = build_function_type_list (V16QI_type_node,
17716 V16QI_type_node, V16QI_type_node, NULL_TREE);
17717 tree v8hi_ftype_v8hi_v8hi
17718 = build_function_type_list (V8HI_type_node,
17719 V8HI_type_node, V8HI_type_node, NULL_TREE);
17720 tree v4si_ftype_v4si_v4si
17721 = build_function_type_list (V4SI_type_node,
17722 V4SI_type_node, V4SI_type_node, NULL_TREE);
17723 tree v2di_ftype_v2di_v2di
17724 = build_function_type_list (V2DI_type_node,
17725 V2DI_type_node, V2DI_type_node, NULL_TREE);
17726 tree v2di_ftype_v2df_v2df
17727 = build_function_type_list (V2DI_type_node,
17728 V2DF_type_node, V2DF_type_node, NULL_TREE);
17729 tree v2df_ftype_v2df
17730 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17731 tree v2di_ftype_v2di_int
17732 = build_function_type_list (V2DI_type_node,
17733 V2DI_type_node, integer_type_node, NULL_TREE);
17734 tree v2di_ftype_v2di_v2di_int
17735 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17736 V2DI_type_node, integer_type_node, NULL_TREE);
17737 tree v4si_ftype_v4si_int
17738 = build_function_type_list (V4SI_type_node,
17739 V4SI_type_node, integer_type_node, NULL_TREE);
17740 tree v8hi_ftype_v8hi_int
17741 = build_function_type_list (V8HI_type_node,
17742 V8HI_type_node, integer_type_node, NULL_TREE);
17743 tree v4si_ftype_v8hi_v8hi
17744 = build_function_type_list (V4SI_type_node,
17745 V8HI_type_node, V8HI_type_node, NULL_TREE);
17746 tree di_ftype_v8qi_v8qi
17747 = build_function_type_list (long_long_unsigned_type_node,
17748 V8QI_type_node, V8QI_type_node, NULL_TREE);
17749 tree di_ftype_v2si_v2si
17750 = build_function_type_list (long_long_unsigned_type_node,
17751 V2SI_type_node, V2SI_type_node, NULL_TREE);
17752 tree v2di_ftype_v16qi_v16qi
17753 = build_function_type_list (V2DI_type_node,
17754 V16QI_type_node, V16QI_type_node, NULL_TREE);
17755 tree v2di_ftype_v4si_v4si
17756 = build_function_type_list (V2DI_type_node,
17757 V4SI_type_node, V4SI_type_node, NULL_TREE);
17758 tree int_ftype_v16qi
17759 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17760 tree v16qi_ftype_pcchar
17761 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17762 tree void_ftype_pchar_v16qi
17763 = build_function_type_list (void_type_node,
17764 pchar_type_node, V16QI_type_node, NULL_TREE);
17765
17766 tree v2di_ftype_v2di_unsigned_unsigned
17767 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17768 unsigned_type_node, unsigned_type_node,
17769 NULL_TREE);
17770 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17771 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17772 unsigned_type_node, unsigned_type_node,
17773 NULL_TREE);
17774 tree v2di_ftype_v2di_v16qi
17775 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17776 NULL_TREE);
17777 tree v2df_ftype_v2df_v2df_v2df
17778 = build_function_type_list (V2DF_type_node,
17779 V2DF_type_node, V2DF_type_node,
17780 V2DF_type_node, NULL_TREE);
17781 tree v4sf_ftype_v4sf_v4sf_v4sf
17782 = build_function_type_list (V4SF_type_node,
17783 V4SF_type_node, V4SF_type_node,
17784 V4SF_type_node, NULL_TREE);
17785 tree v8hi_ftype_v16qi
17786 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17787 NULL_TREE);
17788 tree v4si_ftype_v16qi
17789 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17790 NULL_TREE);
17791 tree v2di_ftype_v16qi
17792 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17793 NULL_TREE);
17794 tree v4si_ftype_v8hi
17795 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17796 NULL_TREE);
17797 tree v2di_ftype_v8hi
17798 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17799 NULL_TREE);
17800 tree v2di_ftype_v4si
17801 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17802 NULL_TREE);
17803 tree v2di_ftype_pv2di
17804 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17805 NULL_TREE);
17806 tree v16qi_ftype_v16qi_v16qi_int
17807 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17808 V16QI_type_node, integer_type_node,
17809 NULL_TREE);
17810 tree v16qi_ftype_v16qi_v16qi_v16qi
17811 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17812 V16QI_type_node, V16QI_type_node,
17813 NULL_TREE);
17814 tree v8hi_ftype_v8hi_v8hi_int
17815 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17816 V8HI_type_node, integer_type_node,
17817 NULL_TREE);
17818 tree v4si_ftype_v4si_v4si_int
17819 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17820 V4SI_type_node, integer_type_node,
17821 NULL_TREE);
17822 tree int_ftype_v2di_v2di
17823 = build_function_type_list (integer_type_node,
17824 V2DI_type_node, V2DI_type_node,
17825 NULL_TREE);
17826 tree int_ftype_v16qi_int_v16qi_int_int
17827 = build_function_type_list (integer_type_node,
17828 V16QI_type_node,
17829 integer_type_node,
17830 V16QI_type_node,
17831 integer_type_node,
17832 integer_type_node,
17833 NULL_TREE);
17834 tree v16qi_ftype_v16qi_int_v16qi_int_int
17835 = build_function_type_list (V16QI_type_node,
17836 V16QI_type_node,
17837 integer_type_node,
17838 V16QI_type_node,
17839 integer_type_node,
17840 integer_type_node,
17841 NULL_TREE);
17842 tree int_ftype_v16qi_v16qi_int
17843 = build_function_type_list (integer_type_node,
17844 V16QI_type_node,
17845 V16QI_type_node,
17846 integer_type_node,
17847 NULL_TREE);
17848 tree ftype;
17849
17850 /* The __float80 type. */
17851 if (TYPE_MODE (long_double_type_node) == XFmode)
17852 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17853 "__float80");
17854 else
17855 {
17856 /* The __float80 type. */
17857 tree float80_type_node = make_node (REAL_TYPE);
17858
17859 TYPE_PRECISION (float80_type_node) = 80;
17860 layout_type (float80_type_node);
17861 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17862 "__float80");
17863 }
17864
17865 if (TARGET_64BIT)
17866 {
17867 tree float128_type_node = make_node (REAL_TYPE);
17868
17869 TYPE_PRECISION (float128_type_node) = 128;
17870 layout_type (float128_type_node);
17871 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17872 "__float128");
17873
17874 /* TFmode support builtins. */
17875 ftype = build_function_type (float128_type_node,
17876 void_list_node);
17877 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
17878
17879 ftype = build_function_type_list (float128_type_node,
17880 float128_type_node,
17881 NULL_TREE);
17882 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
17883
17884 ftype = build_function_type_list (float128_type_node,
17885 float128_type_node,
17886 float128_type_node,
17887 NULL_TREE);
17888 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
17889 }
17890
17891 /* Add all SSE builtins that are more or less simple operations on
17892 three operands. */
17893 for (i = 0, d = bdesc_sse_3arg;
17894 i < ARRAY_SIZE (bdesc_sse_3arg);
17895 i++, d++)
17896 {
17897 /* Use one of the operands; the target can have a different mode for
17898 mask-generating compares. */
17899 enum machine_mode mode;
17900 tree type;
17901
17902 if (d->name == 0)
17903 continue;
17904 mode = insn_data[d->icode].operand[1].mode;
17905
17906 switch (mode)
17907 {
17908 case V16QImode:
17909 type = v16qi_ftype_v16qi_v16qi_int;
17910 break;
17911 case V8HImode:
17912 type = v8hi_ftype_v8hi_v8hi_int;
17913 break;
17914 case V4SImode:
17915 type = v4si_ftype_v4si_v4si_int;
17916 break;
17917 case V2DImode:
17918 type = v2di_ftype_v2di_v2di_int;
17919 break;
17920 case V2DFmode:
17921 type = v2df_ftype_v2df_v2df_int;
17922 break;
17923 case V4SFmode:
17924 type = v4sf_ftype_v4sf_v4sf_int;
17925 break;
17926 default:
17927 gcc_unreachable ();
17928 }
17929
17930 /* Override for variable blends. */
17931 switch (d->icode)
17932 {
17933 case CODE_FOR_sse4_1_blendvpd:
17934 type = v2df_ftype_v2df_v2df_v2df;
17935 break;
17936 case CODE_FOR_sse4_1_blendvps:
17937 type = v4sf_ftype_v4sf_v4sf_v4sf;
17938 break;
17939 case CODE_FOR_sse4_1_pblendvb:
17940 type = v16qi_ftype_v16qi_v16qi_v16qi;
17941 break;
17942 default:
17943 break;
17944 }
17945
17946 def_builtin_const (d->mask, d->name, type, d->code);
17947 }
17948
17949 /* Add all builtins that are more or less simple operations on two
17950 operands. */
17951 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17952 {
17953 /* Use one of the operands; the target can have a different mode for
17954 mask-generating compares. */
17955 enum machine_mode mode;
17956 tree type;
17957
17958 if (d->name == 0)
17959 continue;
17960 mode = insn_data[d->icode].operand[1].mode;
17961
17962 switch (mode)
17963 {
17964 case V16QImode:
17965 type = v16qi_ftype_v16qi_v16qi;
17966 break;
17967 case V8HImode:
17968 type = v8hi_ftype_v8hi_v8hi;
17969 break;
17970 case V4SImode:
17971 type = v4si_ftype_v4si_v4si;
17972 break;
17973 case V2DImode:
17974 type = v2di_ftype_v2di_v2di;
17975 break;
17976 case V2DFmode:
17977 type = v2df_ftype_v2df_v2df;
17978 break;
17979 case V4SFmode:
17980 type = v4sf_ftype_v4sf_v4sf;
17981 break;
17982 case V8QImode:
17983 type = v8qi_ftype_v8qi_v8qi;
17984 break;
17985 case V4HImode:
17986 type = v4hi_ftype_v4hi_v4hi;
17987 break;
17988 case V2SImode:
17989 type = v2si_ftype_v2si_v2si;
17990 break;
17991 case DImode:
17992 type = di_ftype_di_di;
17993 break;
17994
17995 default:
17996 gcc_unreachable ();
17997 }
17998
17999 /* Override for comparisons. */
18000 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18001 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
18002 type = v4si_ftype_v4sf_v4sf;
18003
18004 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
18005 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18006 type = v2di_ftype_v2df_v2df;
18007
18008 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
18009 type = v4si_ftype_v2df_v2df;
18010
18011 def_builtin_const (d->mask, d->name, type, d->code);
18012 }
18013
18014 /* Add all builtins that are more or less simple operations on 1 operand. */
18015 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18016 {
18017 enum machine_mode mode;
18018 tree type;
18019
18020 if (d->name == 0)
18021 continue;
18022 mode = insn_data[d->icode].operand[1].mode;
18023
18024 switch (mode)
18025 {
18026 case V16QImode:
18027 type = v16qi_ftype_v16qi;
18028 break;
18029 case V8HImode:
18030 type = v8hi_ftype_v8hi;
18031 break;
18032 case V4SImode:
18033 type = v4si_ftype_v4si;
18034 break;
18035 case V2DFmode:
18036 type = v2df_ftype_v2df;
18037 break;
18038 case V4SFmode:
18039 type = v4sf_ftype_v4sf;
18040 break;
18041 case V8QImode:
18042 type = v8qi_ftype_v8qi;
18043 break;
18044 case V4HImode:
18045 type = v4hi_ftype_v4hi;
18046 break;
18047 case V2SImode:
18048 type = v2si_ftype_v2si;
18049 break;
18050
18051 default:
18052 abort ();
18053 }
18054
18055 def_builtin_const (d->mask, d->name, type, d->code);
18056 }
18057
18058 /* pcmpestr[im] insns. */
18059 for (i = 0, d = bdesc_pcmpestr;
18060 i < ARRAY_SIZE (bdesc_pcmpestr);
18061 i++, d++)
18062 {
18063 if (d->code == IX86_BUILTIN_PCMPESTRM128)
18064 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
18065 else
18066 ftype = int_ftype_v16qi_int_v16qi_int_int;
18067 def_builtin_const (d->mask, d->name, ftype, d->code);
18068 }
18069
18070 /* pcmpistr[im] insns. */
18071 for (i = 0, d = bdesc_pcmpistr;
18072 i < ARRAY_SIZE (bdesc_pcmpistr);
18073 i++, d++)
18074 {
18075 if (d->code == IX86_BUILTIN_PCMPISTRM128)
18076 ftype = v16qi_ftype_v16qi_v16qi_int;
18077 else
18078 ftype = int_ftype_v16qi_v16qi_int;
18079 def_builtin_const (d->mask, d->name, ftype, d->code);
18080 }
18081
18082 /* Add the remaining MMX insns with somewhat more complicated types. */
18083 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
18084 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
18085 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
18086 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
18087
18088 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
18089 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
18090 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
18091
18092 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
18093 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
18094
18095 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
18096 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
18097
18098 /* comi/ucomi insns. */
18099 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18100 if (d->mask == OPTION_MASK_ISA_SSE2)
18101 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
18102 else
18103 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
18104
18105 /* ptest insns. */
18106 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
18107 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
18108
18109 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18110 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18111 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
18112
18113 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18114 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18115 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18116 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18117 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18118 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18119 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18120 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18121 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18122 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18123 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18124
18125 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18126
18127 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18128 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18129
18130 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18131 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18132 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18133 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18134
18135 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18136 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18137 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18138 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18139
18140 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18141
18142 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18143
18144 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18145 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18146 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18147 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18148 ftype = build_function_type_list (float_type_node,
18149 float_type_node,
18150 NULL_TREE);
18151 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
18152 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18153 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18154
18155 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18156
18157 /* Original 3DNow! */
18158 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18159 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18160 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18161 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18162 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18163 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18164 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18165 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18166 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18167 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18168 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18169 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18170 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18171 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18172 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18173 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18174 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18175 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18176 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18177 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18178
18179 /* 3DNow! extension as used in the Athlon CPU. */
18180 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18181 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18182 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18183 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18184 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18185 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18186
18187 /* SSE2 */
18188 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18189
18190 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18191 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18192
18193 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18194 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18195
18196 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18197 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18198 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18199 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18200 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18201
18202 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18203 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18204 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18205 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18206
18207 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18208 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18209
18210 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18211
18212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18213 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18214
18215 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18216 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18217 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18218 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18219 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18220
18221 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18222
18223 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18224 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18225 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18226 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18227
18228 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18229 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18230 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18231
18232 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18233 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18234 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18235 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18236
18237 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18238 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18239 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18240
18241 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18242 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18243
18244 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18245 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18246
18247 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18248 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18249 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18250 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18251 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18252 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18253 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18254
18255 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18256 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18257 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18258 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18259 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18260 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18261 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18262
18263 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18264 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18265 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18266 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18267
18268 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18269
18270 /* Prescott New Instructions. */
18271 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18272 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18273 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18274
18275 /* SSSE3. */
18276 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18277 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18278
18279 /* SSE4.1. */
18280 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18281 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18282 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18283 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18284 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18285 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18286 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18287 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18288 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18289 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18290 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18291 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18292 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18293 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18294 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18295 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18296 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18297 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18298
18299 /* SSE4.2. */
18300 ftype = build_function_type_list (unsigned_type_node,
18301 unsigned_type_node,
18302 unsigned_char_type_node,
18303 NULL_TREE);
18304 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18305 ftype = build_function_type_list (unsigned_type_node,
18306 unsigned_type_node,
18307 short_unsigned_type_node,
18308 NULL_TREE);
18309 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18310 ftype = build_function_type_list (unsigned_type_node,
18311 unsigned_type_node,
18312 unsigned_type_node,
18313 NULL_TREE);
18314 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18315 ftype = build_function_type_list (long_long_unsigned_type_node,
18316 long_long_unsigned_type_node,
18317 long_long_unsigned_type_node,
18318 NULL_TREE);
18319 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18320
18321 /* AMDFAM10 SSE4A New built-ins */
18322 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18323 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18324 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18325 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18326 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18327 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18328
18329 /* Access to the vec_init patterns. */
18330 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18331 integer_type_node, NULL_TREE);
18332 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18333
18334 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18335 short_integer_type_node,
18336 short_integer_type_node,
18337 short_integer_type_node, NULL_TREE);
18338 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18339
18340 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18341 char_type_node, char_type_node,
18342 char_type_node, char_type_node,
18343 char_type_node, char_type_node,
18344 char_type_node, NULL_TREE);
18345 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18346
18347 /* Access to the vec_extract patterns. */
18348 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18349 integer_type_node, NULL_TREE);
18350 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18351
18352 ftype = build_function_type_list (long_long_integer_type_node,
18353 V2DI_type_node, integer_type_node,
18354 NULL_TREE);
18355 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18356
18357 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18358 integer_type_node, NULL_TREE);
18359 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18360
18361 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18362 integer_type_node, NULL_TREE);
18363 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18364
18365 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18366 integer_type_node, NULL_TREE);
18367 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18368
18369 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18370 integer_type_node, NULL_TREE);
18371 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18372
18373 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18374 integer_type_node, NULL_TREE);
18375 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18376
18377 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18378 integer_type_node, NULL_TREE);
18379 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18380
18381 /* Access to the vec_set patterns. */
18382 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18383 intDI_type_node,
18384 integer_type_node, NULL_TREE);
18385 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18386
18387 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18388 float_type_node,
18389 integer_type_node, NULL_TREE);
18390 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18391
18392 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18393 intSI_type_node,
18394 integer_type_node, NULL_TREE);
18395 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18396
18397 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18398 intHI_type_node,
18399 integer_type_node, NULL_TREE);
18400 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18401
18402 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18403 intHI_type_node,
18404 integer_type_node, NULL_TREE);
18405 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18406
18407 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18408 intQI_type_node,
18409 integer_type_node, NULL_TREE);
18410 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18411 }
18412
18413 static void
18414 ix86_init_builtins (void)
18415 {
18416 if (TARGET_MMX)
18417 ix86_init_mmx_sse_builtins ();
18418 }
18419
18420 /* Errors in the source file can cause expand_expr to return const0_rtx
18421 where we expect a vector. To avoid crashing, use one of the vector
18422 clear instructions. */
18423 static rtx
18424 safe_vector_operand (rtx x, enum machine_mode mode)
18425 {
18426 if (x == const0_rtx)
18427 x = CONST0_RTX (mode);
18428 return x;
18429 }
18430
18431 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18432 4 operands. The third argument must be a constant smaller than 8
18433 bits or xmm0. */
18434
18435 static rtx
18436 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18437 rtx target)
18438 {
18439 rtx pat;
18440 tree arg0 = CALL_EXPR_ARG (exp, 0);
18441 tree arg1 = CALL_EXPR_ARG (exp, 1);
18442 tree arg2 = CALL_EXPR_ARG (exp, 2);
18443 rtx op0 = expand_normal (arg0);
18444 rtx op1 = expand_normal (arg1);
18445 rtx op2 = expand_normal (arg2);
18446 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18447 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18448 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18449 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18450
18451 if (VECTOR_MODE_P (mode1))
18452 op0 = safe_vector_operand (op0, mode1);
18453 if (VECTOR_MODE_P (mode2))
18454 op1 = safe_vector_operand (op1, mode2);
18455 if (VECTOR_MODE_P (mode3))
18456 op2 = safe_vector_operand (op2, mode3);
18457
18458 if (optimize
18459 || target == 0
18460 || GET_MODE (target) != tmode
18461 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18462 target = gen_reg_rtx (tmode);
18463
18464 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18465 op0 = copy_to_mode_reg (mode1, op0);
18466 if ((optimize && !register_operand (op1, mode2))
18467 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18468 op1 = copy_to_mode_reg (mode2, op1);
18469
18470 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18471 switch (icode)
18472 {
18473 case CODE_FOR_sse4_1_blendvpd:
18474 case CODE_FOR_sse4_1_blendvps:
18475 case CODE_FOR_sse4_1_pblendvb:
18476 op2 = copy_to_mode_reg (mode3, op2);
18477 break;
18478
18479 case CODE_FOR_sse4_1_roundsd:
18480 case CODE_FOR_sse4_1_roundss:
18481 error ("the third argument must be a 4-bit immediate");
18482 return const0_rtx;
18483
18484 default:
18485 error ("the third argument must be an 8-bit immediate");
18486 return const0_rtx;
18487 }
18488
18489 pat = GEN_FCN (icode) (target, op0, op1, op2);
18490 if (! pat)
18491 return 0;
18492 emit_insn (pat);
18493 return target;
18494 }
18495
18496 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18497
18498 static rtx
18499 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18500 {
18501 rtx pat;
18502 tree arg0 = CALL_EXPR_ARG (exp, 0);
18503 tree arg1 = CALL_EXPR_ARG (exp, 1);
18504 rtx op0 = expand_normal (arg0);
18505 rtx op1 = expand_normal (arg1);
18506 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18507 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18508 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18509
18510 if (optimize
18511 || !target
18512 || GET_MODE (target) != tmode
18513 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18514 target = gen_reg_rtx (tmode);
18515
18516 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18517 op0 = copy_to_mode_reg (mode0, op0);
18518 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18519 {
18520 op1 = copy_to_reg (op1);
18521 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18522 }
18523
18524 pat = GEN_FCN (icode) (target, op0, op1);
18525 if (! pat)
18526 return 0;
18527 emit_insn (pat);
18528 return target;
18529 }
18530
18531 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18532
18533 static rtx
18534 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18535 {
18536 rtx pat, xops[3];
18537 tree arg0 = CALL_EXPR_ARG (exp, 0);
18538 tree arg1 = CALL_EXPR_ARG (exp, 1);
18539 rtx op0 = expand_normal (arg0);
18540 rtx op1 = expand_normal (arg1);
18541 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18542 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18543 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18544
18545 if (VECTOR_MODE_P (mode0))
18546 op0 = safe_vector_operand (op0, mode0);
18547 if (VECTOR_MODE_P (mode1))
18548 op1 = safe_vector_operand (op1, mode1);
18549
18550 if (optimize || !target
18551 || GET_MODE (target) != tmode
18552 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18553 target = gen_reg_rtx (tmode);
18554
18555 if (GET_MODE (op1) == SImode && mode1 == TImode)
18556 {
18557 rtx x = gen_reg_rtx (V4SImode);
18558 emit_insn (gen_sse2_loadd (x, op1));
18559 op1 = gen_lowpart (TImode, x);
18560 }
18561
18562 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18563 op0 = copy_to_mode_reg (mode0, op0);
18564 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18565 op1 = copy_to_mode_reg (mode1, op1);
18566
18567 /* ??? Using ix86_fixup_binary_operands is problematic when
18568 we've got mismatched modes. Fake it. */
18569
18570 xops[0] = target;
18571 xops[1] = op0;
18572 xops[2] = op1;
18573
18574 if (tmode == mode0 && tmode == mode1)
18575 {
18576 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18577 op0 = xops[1];
18578 op1 = xops[2];
18579 }
18580 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18581 {
18582 op0 = force_reg (mode0, op0);
18583 op1 = force_reg (mode1, op1);
18584 target = gen_reg_rtx (tmode);
18585 }
18586
18587 pat = GEN_FCN (icode) (target, op0, op1);
18588 if (! pat)
18589 return 0;
18590 emit_insn (pat);
18591 return target;
18592 }
18593
18594 /* Subroutine of ix86_expand_builtin to take care of stores. */
18595
18596 static rtx
18597 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18598 {
18599 rtx pat;
18600 tree arg0 = CALL_EXPR_ARG (exp, 0);
18601 tree arg1 = CALL_EXPR_ARG (exp, 1);
18602 rtx op0 = expand_normal (arg0);
18603 rtx op1 = expand_normal (arg1);
18604 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18605 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18606
18607 if (VECTOR_MODE_P (mode1))
18608 op1 = safe_vector_operand (op1, mode1);
18609
18610 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18611 op1 = copy_to_mode_reg (mode1, op1);
18612
18613 pat = GEN_FCN (icode) (op0, op1);
18614 if (pat)
18615 emit_insn (pat);
18616 return 0;
18617 }
18618
18619 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18620
18621 static rtx
18622 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18623 rtx target, int do_load)
18624 {
18625 rtx pat;
18626 tree arg0 = CALL_EXPR_ARG (exp, 0);
18627 rtx op0 = expand_normal (arg0);
18628 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18629 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18630
18631 if (optimize || !target
18632 || GET_MODE (target) != tmode
18633 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18634 target = gen_reg_rtx (tmode);
18635 if (do_load)
18636 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18637 else
18638 {
18639 if (VECTOR_MODE_P (mode0))
18640 op0 = safe_vector_operand (op0, mode0);
18641
18642 if ((optimize && !register_operand (op0, mode0))
18643 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18644 op0 = copy_to_mode_reg (mode0, op0);
18645 }
18646
18647 switch (icode)
18648 {
18649 case CODE_FOR_sse4_1_roundpd:
18650 case CODE_FOR_sse4_1_roundps:
18651 {
18652 tree arg1 = CALL_EXPR_ARG (exp, 1);
18653 rtx op1 = expand_normal (arg1);
18654 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18655
18656 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18657 {
18658 error ("the second argument must be a 4-bit immediate");
18659 return const0_rtx;
18660 }
18661 pat = GEN_FCN (icode) (target, op0, op1);
18662 }
18663 break;
18664 default:
18665 pat = GEN_FCN (icode) (target, op0);
18666 break;
18667 }
18668
18669 if (! pat)
18670 return 0;
18671 emit_insn (pat);
18672 return target;
18673 }
18674
18675 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18676 sqrtss, rsqrtss, rcpss. */
18677
18678 static rtx
18679 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18680 {
18681 rtx pat;
18682 tree arg0 = CALL_EXPR_ARG (exp, 0);
18683 rtx op1, op0 = expand_normal (arg0);
18684 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18685 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18686
18687 if (optimize || !target
18688 || GET_MODE (target) != tmode
18689 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18690 target = gen_reg_rtx (tmode);
18691
18692 if (VECTOR_MODE_P (mode0))
18693 op0 = safe_vector_operand (op0, mode0);
18694
18695 if ((optimize && !register_operand (op0, mode0))
18696 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18697 op0 = copy_to_mode_reg (mode0, op0);
18698
18699 op1 = op0;
18700 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18701 op1 = copy_to_mode_reg (mode0, op1);
18702
18703 pat = GEN_FCN (icode) (target, op0, op1);
18704 if (! pat)
18705 return 0;
18706 emit_insn (pat);
18707 return target;
18708 }
18709
18710 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18711
18712 static rtx
18713 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18714 rtx target)
18715 {
18716 rtx pat;
18717 tree arg0 = CALL_EXPR_ARG (exp, 0);
18718 tree arg1 = CALL_EXPR_ARG (exp, 1);
18719 rtx op0 = expand_normal (arg0);
18720 rtx op1 = expand_normal (arg1);
18721 rtx op2;
18722 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18723 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18724 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18725 enum rtx_code comparison = d->comparison;
18726
18727 if (VECTOR_MODE_P (mode0))
18728 op0 = safe_vector_operand (op0, mode0);
18729 if (VECTOR_MODE_P (mode1))
18730 op1 = safe_vector_operand (op1, mode1);
18731
18732 /* Swap operands if we have a comparison that isn't available in
18733 hardware. */
18734 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18735 {
18736 rtx tmp = gen_reg_rtx (mode1);
18737 emit_move_insn (tmp, op1);
18738 op1 = op0;
18739 op0 = tmp;
18740 }
18741
18742 if (optimize || !target
18743 || GET_MODE (target) != tmode
18744 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18745 target = gen_reg_rtx (tmode);
18746
18747 if ((optimize && !register_operand (op0, mode0))
18748 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18749 op0 = copy_to_mode_reg (mode0, op0);
18750 if ((optimize && !register_operand (op1, mode1))
18751 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18752 op1 = copy_to_mode_reg (mode1, op1);
18753
18754 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18755 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18756 if (! pat)
18757 return 0;
18758 emit_insn (pat);
18759 return target;
18760 }
18761
18762 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18763
18764 static rtx
18765 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18766 rtx target)
18767 {
18768 rtx pat;
18769 tree arg0 = CALL_EXPR_ARG (exp, 0);
18770 tree arg1 = CALL_EXPR_ARG (exp, 1);
18771 rtx op0 = expand_normal (arg0);
18772 rtx op1 = expand_normal (arg1);
18773 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18774 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18775 enum rtx_code comparison = d->comparison;
18776
18777 if (VECTOR_MODE_P (mode0))
18778 op0 = safe_vector_operand (op0, mode0);
18779 if (VECTOR_MODE_P (mode1))
18780 op1 = safe_vector_operand (op1, mode1);
18781
18782 /* Swap operands if we have a comparison that isn't available in
18783 hardware. */
18784 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18785 {
18786 rtx tmp = op1;
18787 op1 = op0;
18788 op0 = tmp;
18789 }
18790
18791 target = gen_reg_rtx (SImode);
18792 emit_move_insn (target, const0_rtx);
18793 target = gen_rtx_SUBREG (QImode, target, 0);
18794
18795 if ((optimize && !register_operand (op0, mode0))
18796 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18797 op0 = copy_to_mode_reg (mode0, op0);
18798 if ((optimize && !register_operand (op1, mode1))
18799 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18800 op1 = copy_to_mode_reg (mode1, op1);
18801
18802 pat = GEN_FCN (d->icode) (op0, op1);
18803 if (! pat)
18804 return 0;
18805 emit_insn (pat);
18806 emit_insn (gen_rtx_SET (VOIDmode,
18807 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18808 gen_rtx_fmt_ee (comparison, QImode,
18809 SET_DEST (pat),
18810 const0_rtx)));
18811
18812 return SUBREG_REG (target);
18813 }
18814
18815 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18816
18817 static rtx
18818 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18819 rtx target)
18820 {
18821 rtx pat;
18822 tree arg0 = CALL_EXPR_ARG (exp, 0);
18823 tree arg1 = CALL_EXPR_ARG (exp, 1);
18824 rtx op0 = expand_normal (arg0);
18825 rtx op1 = expand_normal (arg1);
18826 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18827 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18828 enum rtx_code comparison = d->comparison;
18829
18830 if (VECTOR_MODE_P (mode0))
18831 op0 = safe_vector_operand (op0, mode0);
18832 if (VECTOR_MODE_P (mode1))
18833 op1 = safe_vector_operand (op1, mode1);
18834
18835 target = gen_reg_rtx (SImode);
18836 emit_move_insn (target, const0_rtx);
18837 target = gen_rtx_SUBREG (QImode, target, 0);
18838
18839 if ((optimize && !register_operand (op0, mode0))
18840 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18841 op0 = copy_to_mode_reg (mode0, op0);
18842 if ((optimize && !register_operand (op1, mode1))
18843 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18844 op1 = copy_to_mode_reg (mode1, op1);
18845
18846 pat = GEN_FCN (d->icode) (op0, op1);
18847 if (! pat)
18848 return 0;
18849 emit_insn (pat);
18850 emit_insn (gen_rtx_SET (VOIDmode,
18851 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18852 gen_rtx_fmt_ee (comparison, QImode,
18853 SET_DEST (pat),
18854 const0_rtx)));
18855
18856 return SUBREG_REG (target);
18857 }
18858
18859 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18860
18861 static rtx
18862 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18863 tree exp, rtx target)
18864 {
18865 rtx pat;
18866 tree arg0 = CALL_EXPR_ARG (exp, 0);
18867 tree arg1 = CALL_EXPR_ARG (exp, 1);
18868 tree arg2 = CALL_EXPR_ARG (exp, 2);
18869 tree arg3 = CALL_EXPR_ARG (exp, 3);
18870 tree arg4 = CALL_EXPR_ARG (exp, 4);
18871 rtx scratch0, scratch1;
18872 rtx op0 = expand_normal (arg0);
18873 rtx op1 = expand_normal (arg1);
18874 rtx op2 = expand_normal (arg2);
18875 rtx op3 = expand_normal (arg3);
18876 rtx op4 = expand_normal (arg4);
18877 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18878
18879 tmode0 = insn_data[d->icode].operand[0].mode;
18880 tmode1 = insn_data[d->icode].operand[1].mode;
18881 modev2 = insn_data[d->icode].operand[2].mode;
18882 modei3 = insn_data[d->icode].operand[3].mode;
18883 modev4 = insn_data[d->icode].operand[4].mode;
18884 modei5 = insn_data[d->icode].operand[5].mode;
18885 modeimm = insn_data[d->icode].operand[6].mode;
18886
18887 if (VECTOR_MODE_P (modev2))
18888 op0 = safe_vector_operand (op0, modev2);
18889 if (VECTOR_MODE_P (modev4))
18890 op2 = safe_vector_operand (op2, modev4);
18891
18892 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18893 op0 = copy_to_mode_reg (modev2, op0);
18894 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18895 op1 = copy_to_mode_reg (modei3, op1);
18896 if ((optimize && !register_operand (op2, modev4))
18897 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18898 op2 = copy_to_mode_reg (modev4, op2);
18899 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18900 op3 = copy_to_mode_reg (modei5, op3);
18901
18902 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18903 {
18904 error ("the fifth argument must be a 8-bit immediate");
18905 return const0_rtx;
18906 }
18907
18908 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18909 {
18910 if (optimize || !target
18911 || GET_MODE (target) != tmode0
18912 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18913 target = gen_reg_rtx (tmode0);
18914
18915 scratch1 = gen_reg_rtx (tmode1);
18916
18917 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18918 }
18919 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18920 {
18921 if (optimize || !target
18922 || GET_MODE (target) != tmode1
18923 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18924 target = gen_reg_rtx (tmode1);
18925
18926 scratch0 = gen_reg_rtx (tmode0);
18927
18928 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18929 }
18930 else
18931 {
18932 gcc_assert (d->flag);
18933
18934 scratch0 = gen_reg_rtx (tmode0);
18935 scratch1 = gen_reg_rtx (tmode1);
18936
18937 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18938 }
18939
18940 if (! pat)
18941 return 0;
18942
18943 emit_insn (pat);
18944
18945 if (d->flag)
18946 {
18947 target = gen_reg_rtx (SImode);
18948 emit_move_insn (target, const0_rtx);
18949 target = gen_rtx_SUBREG (QImode, target, 0);
18950
18951 emit_insn
18952 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18953 gen_rtx_fmt_ee (EQ, QImode,
18954 gen_rtx_REG ((enum machine_mode) d->flag,
18955 FLAGS_REG),
18956 const0_rtx)));
18957 return SUBREG_REG (target);
18958 }
18959 else
18960 return target;
18961 }
18962
18963
18964 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18965
18966 static rtx
18967 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18968 tree exp, rtx target)
18969 {
18970 rtx pat;
18971 tree arg0 = CALL_EXPR_ARG (exp, 0);
18972 tree arg1 = CALL_EXPR_ARG (exp, 1);
18973 tree arg2 = CALL_EXPR_ARG (exp, 2);
18974 rtx scratch0, scratch1;
18975 rtx op0 = expand_normal (arg0);
18976 rtx op1 = expand_normal (arg1);
18977 rtx op2 = expand_normal (arg2);
18978 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18979
18980 tmode0 = insn_data[d->icode].operand[0].mode;
18981 tmode1 = insn_data[d->icode].operand[1].mode;
18982 modev2 = insn_data[d->icode].operand[2].mode;
18983 modev3 = insn_data[d->icode].operand[3].mode;
18984 modeimm = insn_data[d->icode].operand[4].mode;
18985
18986 if (VECTOR_MODE_P (modev2))
18987 op0 = safe_vector_operand (op0, modev2);
18988 if (VECTOR_MODE_P (modev3))
18989 op1 = safe_vector_operand (op1, modev3);
18990
18991 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18992 op0 = copy_to_mode_reg (modev2, op0);
18993 if ((optimize && !register_operand (op1, modev3))
18994 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18995 op1 = copy_to_mode_reg (modev3, op1);
18996
18997 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18998 {
18999 error ("the third argument must be a 8-bit immediate");
19000 return const0_rtx;
19001 }
19002
19003 if (d->code == IX86_BUILTIN_PCMPISTRI128)
19004 {
19005 if (optimize || !target
19006 || GET_MODE (target) != tmode0
19007 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
19008 target = gen_reg_rtx (tmode0);
19009
19010 scratch1 = gen_reg_rtx (tmode1);
19011
19012 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
19013 }
19014 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
19015 {
19016 if (optimize || !target
19017 || GET_MODE (target) != tmode1
19018 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
19019 target = gen_reg_rtx (tmode1);
19020
19021 scratch0 = gen_reg_rtx (tmode0);
19022
19023 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
19024 }
19025 else
19026 {
19027 gcc_assert (d->flag);
19028
19029 scratch0 = gen_reg_rtx (tmode0);
19030 scratch1 = gen_reg_rtx (tmode1);
19031
19032 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
19033 }
19034
19035 if (! pat)
19036 return 0;
19037
19038 emit_insn (pat);
19039
19040 if (d->flag)
19041 {
19042 target = gen_reg_rtx (SImode);
19043 emit_move_insn (target, const0_rtx);
19044 target = gen_rtx_SUBREG (QImode, target, 0);
19045
19046 emit_insn
19047 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
19048 gen_rtx_fmt_ee (EQ, QImode,
19049 gen_rtx_REG ((enum machine_mode) d->flag,
19050 FLAGS_REG),
19051 const0_rtx)));
19052 return SUBREG_REG (target);
19053 }
19054 else
19055 return target;
19056 }
19057
19058 /* Return the integer constant in ARG. Constrain it to be in the range
19059 of the subparts of VEC_TYPE; issue an error if not. */
19060
19061 static int
19062 get_element_number (tree vec_type, tree arg)
19063 {
19064 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
19065
19066 if (!host_integerp (arg, 1)
19067 || (elt = tree_low_cst (arg, 1), elt > max))
19068 {
19069 error ("selector must be an integer constant in the range 0..%wi", max);
19070 return 0;
19071 }
19072
19073 return elt;
19074 }
19075
19076 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19077 ix86_expand_vector_init. We DO have language-level syntax for this, in
19078 the form of (type){ init-list }. Except that since we can't place emms
19079 instructions from inside the compiler, we can't allow the use of MMX
19080 registers unless the user explicitly asks for it. So we do *not* define
19081 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
19082 we have builtins invoked by mmintrin.h that gives us license to emit
19083 these sorts of instructions. */
19084
19085 static rtx
19086 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
19087 {
19088 enum machine_mode tmode = TYPE_MODE (type);
19089 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
19090 int i, n_elt = GET_MODE_NUNITS (tmode);
19091 rtvec v = rtvec_alloc (n_elt);
19092
19093 gcc_assert (VECTOR_MODE_P (tmode));
19094 gcc_assert (call_expr_nargs (exp) == n_elt);
19095
19096 for (i = 0; i < n_elt; ++i)
19097 {
19098 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
19099 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
19100 }
19101
19102 if (!target || !register_operand (target, tmode))
19103 target = gen_reg_rtx (tmode);
19104
19105 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
19106 return target;
19107 }
19108
19109 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19110 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19111 had a language-level syntax for referencing vector elements. */
19112
19113 static rtx
19114 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19115 {
19116 enum machine_mode tmode, mode0;
19117 tree arg0, arg1;
19118 int elt;
19119 rtx op0;
19120
19121 arg0 = CALL_EXPR_ARG (exp, 0);
19122 arg1 = CALL_EXPR_ARG (exp, 1);
19123
19124 op0 = expand_normal (arg0);
19125 elt = get_element_number (TREE_TYPE (arg0), arg1);
19126
19127 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19128 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19129 gcc_assert (VECTOR_MODE_P (mode0));
19130
19131 op0 = force_reg (mode0, op0);
19132
19133 if (optimize || !target || !register_operand (target, tmode))
19134 target = gen_reg_rtx (tmode);
19135
19136 ix86_expand_vector_extract (true, target, op0, elt);
19137
19138 return target;
19139 }
19140
19141 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19142 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19143 a language-level syntax for referencing vector elements. */
19144
19145 static rtx
19146 ix86_expand_vec_set_builtin (tree exp)
19147 {
19148 enum machine_mode tmode, mode1;
19149 tree arg0, arg1, arg2;
19150 int elt;
19151 rtx op0, op1, target;
19152
19153 arg0 = CALL_EXPR_ARG (exp, 0);
19154 arg1 = CALL_EXPR_ARG (exp, 1);
19155 arg2 = CALL_EXPR_ARG (exp, 2);
19156
19157 tmode = TYPE_MODE (TREE_TYPE (arg0));
19158 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19159 gcc_assert (VECTOR_MODE_P (tmode));
19160
19161 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19162 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19163 elt = get_element_number (TREE_TYPE (arg0), arg2);
19164
19165 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19166 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19167
19168 op0 = force_reg (tmode, op0);
19169 op1 = force_reg (mode1, op1);
19170
19171 /* OP0 is the source of these builtin functions and shouldn't be
19172 modified. Create a copy, use it and return it as target. */
19173 target = gen_reg_rtx (tmode);
19174 emit_move_insn (target, op0);
19175 ix86_expand_vector_set (true, target, op1, elt);
19176
19177 return target;
19178 }
19179
19180 /* Expand an expression EXP that calls a built-in function,
19181 with result going to TARGET if that's convenient
19182 (and in mode MODE if that's convenient).
19183 SUBTARGET may be used as the target for computing one of EXP's operands.
19184 IGNORE is nonzero if the value is to be ignored. */
19185
19186 static rtx
19187 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19188 enum machine_mode mode ATTRIBUTE_UNUSED,
19189 int ignore ATTRIBUTE_UNUSED)
19190 {
19191 const struct builtin_description *d;
19192 size_t i;
19193 enum insn_code icode;
19194 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19195 tree arg0, arg1, arg2, arg3;
19196 rtx op0, op1, op2, op3, pat;
19197 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19198 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19199
19200 switch (fcode)
19201 {
19202 case IX86_BUILTIN_EMMS:
19203 emit_insn (gen_mmx_emms ());
19204 return 0;
19205
19206 case IX86_BUILTIN_SFENCE:
19207 emit_insn (gen_sse_sfence ());
19208 return 0;
19209
19210 case IX86_BUILTIN_MASKMOVQ:
19211 case IX86_BUILTIN_MASKMOVDQU:
19212 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19213 ? CODE_FOR_mmx_maskmovq
19214 : CODE_FOR_sse2_maskmovdqu);
19215 /* Note the arg order is different from the operand order. */
19216 arg1 = CALL_EXPR_ARG (exp, 0);
19217 arg2 = CALL_EXPR_ARG (exp, 1);
19218 arg0 = CALL_EXPR_ARG (exp, 2);
19219 op0 = expand_normal (arg0);
19220 op1 = expand_normal (arg1);
19221 op2 = expand_normal (arg2);
19222 mode0 = insn_data[icode].operand[0].mode;
19223 mode1 = insn_data[icode].operand[1].mode;
19224 mode2 = insn_data[icode].operand[2].mode;
19225
19226 op0 = force_reg (Pmode, op0);
19227 op0 = gen_rtx_MEM (mode1, op0);
19228
19229 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19230 op0 = copy_to_mode_reg (mode0, op0);
19231 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19232 op1 = copy_to_mode_reg (mode1, op1);
19233 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19234 op2 = copy_to_mode_reg (mode2, op2);
19235 pat = GEN_FCN (icode) (op0, op1, op2);
19236 if (! pat)
19237 return 0;
19238 emit_insn (pat);
19239 return 0;
19240
19241 case IX86_BUILTIN_RSQRTF:
19242 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
19243
19244 case IX86_BUILTIN_SQRTSS:
19245 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19246 case IX86_BUILTIN_RSQRTSS:
19247 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19248 case IX86_BUILTIN_RCPSS:
19249 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19250
19251 case IX86_BUILTIN_LOADUPS:
19252 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19253
19254 case IX86_BUILTIN_STOREUPS:
19255 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19256
19257 case IX86_BUILTIN_LOADHPS:
19258 case IX86_BUILTIN_LOADLPS:
19259 case IX86_BUILTIN_LOADHPD:
19260 case IX86_BUILTIN_LOADLPD:
19261 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19262 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19263 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19264 : CODE_FOR_sse2_loadlpd);
19265 arg0 = CALL_EXPR_ARG (exp, 0);
19266 arg1 = CALL_EXPR_ARG (exp, 1);
19267 op0 = expand_normal (arg0);
19268 op1 = expand_normal (arg1);
19269 tmode = insn_data[icode].operand[0].mode;
19270 mode0 = insn_data[icode].operand[1].mode;
19271 mode1 = insn_data[icode].operand[2].mode;
19272
19273 op0 = force_reg (mode0, op0);
19274 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19275 if (optimize || target == 0
19276 || GET_MODE (target) != tmode
19277 || !register_operand (target, tmode))
19278 target = gen_reg_rtx (tmode);
19279 pat = GEN_FCN (icode) (target, op0, op1);
19280 if (! pat)
19281 return 0;
19282 emit_insn (pat);
19283 return target;
19284
19285 case IX86_BUILTIN_STOREHPS:
19286 case IX86_BUILTIN_STORELPS:
19287 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19288 : CODE_FOR_sse_storelps);
19289 arg0 = CALL_EXPR_ARG (exp, 0);
19290 arg1 = CALL_EXPR_ARG (exp, 1);
19291 op0 = expand_normal (arg0);
19292 op1 = expand_normal (arg1);
19293 mode0 = insn_data[icode].operand[0].mode;
19294 mode1 = insn_data[icode].operand[1].mode;
19295
19296 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19297 op1 = force_reg (mode1, op1);
19298
19299 pat = GEN_FCN (icode) (op0, op1);
19300 if (! pat)
19301 return 0;
19302 emit_insn (pat);
19303 return const0_rtx;
19304
19305 case IX86_BUILTIN_MOVNTPS:
19306 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19307 case IX86_BUILTIN_MOVNTQ:
19308 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19309
19310 case IX86_BUILTIN_LDMXCSR:
19311 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19312 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19313 emit_move_insn (target, op0);
19314 emit_insn (gen_sse_ldmxcsr (target));
19315 return 0;
19316
19317 case IX86_BUILTIN_STMXCSR:
19318 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19319 emit_insn (gen_sse_stmxcsr (target));
19320 return copy_to_mode_reg (SImode, target);
19321
19322 case IX86_BUILTIN_SHUFPS:
19323 case IX86_BUILTIN_SHUFPD:
19324 icode = (fcode == IX86_BUILTIN_SHUFPS
19325 ? CODE_FOR_sse_shufps
19326 : CODE_FOR_sse2_shufpd);
19327 arg0 = CALL_EXPR_ARG (exp, 0);
19328 arg1 = CALL_EXPR_ARG (exp, 1);
19329 arg2 = CALL_EXPR_ARG (exp, 2);
19330 op0 = expand_normal (arg0);
19331 op1 = expand_normal (arg1);
19332 op2 = expand_normal (arg2);
19333 tmode = insn_data[icode].operand[0].mode;
19334 mode0 = insn_data[icode].operand[1].mode;
19335 mode1 = insn_data[icode].operand[2].mode;
19336 mode2 = insn_data[icode].operand[3].mode;
19337
19338 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19339 op0 = copy_to_mode_reg (mode0, op0);
19340 if ((optimize && !register_operand (op1, mode1))
19341 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19342 op1 = copy_to_mode_reg (mode1, op1);
19343 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19344 {
19345 /* @@@ better error message */
19346 error ("mask must be an immediate");
19347 return gen_reg_rtx (tmode);
19348 }
19349 if (optimize || target == 0
19350 || GET_MODE (target) != tmode
19351 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19352 target = gen_reg_rtx (tmode);
19353 pat = GEN_FCN (icode) (target, op0, op1, op2);
19354 if (! pat)
19355 return 0;
19356 emit_insn (pat);
19357 return target;
19358
19359 case IX86_BUILTIN_PSHUFW:
19360 case IX86_BUILTIN_PSHUFD:
19361 case IX86_BUILTIN_PSHUFHW:
19362 case IX86_BUILTIN_PSHUFLW:
19363 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19364 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19365 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19366 : CODE_FOR_mmx_pshufw);
19367 arg0 = CALL_EXPR_ARG (exp, 0);
19368 arg1 = CALL_EXPR_ARG (exp, 1);
19369 op0 = expand_normal (arg0);
19370 op1 = expand_normal (arg1);
19371 tmode = insn_data[icode].operand[0].mode;
19372 mode1 = insn_data[icode].operand[1].mode;
19373 mode2 = insn_data[icode].operand[2].mode;
19374
19375 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19376 op0 = copy_to_mode_reg (mode1, op0);
19377 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19378 {
19379 /* @@@ better error message */
19380 error ("mask must be an immediate");
19381 return const0_rtx;
19382 }
19383 if (target == 0
19384 || GET_MODE (target) != tmode
19385 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19386 target = gen_reg_rtx (tmode);
19387 pat = GEN_FCN (icode) (target, op0, op1);
19388 if (! pat)
19389 return 0;
19390 emit_insn (pat);
19391 return target;
19392
19393 case IX86_BUILTIN_PSLLWI128:
19394 icode = CODE_FOR_ashlv8hi3;
19395 goto do_pshifti;
19396 case IX86_BUILTIN_PSLLDI128:
19397 icode = CODE_FOR_ashlv4si3;
19398 goto do_pshifti;
19399 case IX86_BUILTIN_PSLLQI128:
19400 icode = CODE_FOR_ashlv2di3;
19401 goto do_pshifti;
19402 case IX86_BUILTIN_PSRAWI128:
19403 icode = CODE_FOR_ashrv8hi3;
19404 goto do_pshifti;
19405 case IX86_BUILTIN_PSRADI128:
19406 icode = CODE_FOR_ashrv4si3;
19407 goto do_pshifti;
19408 case IX86_BUILTIN_PSRLWI128:
19409 icode = CODE_FOR_lshrv8hi3;
19410 goto do_pshifti;
19411 case IX86_BUILTIN_PSRLDI128:
19412 icode = CODE_FOR_lshrv4si3;
19413 goto do_pshifti;
19414 case IX86_BUILTIN_PSRLQI128:
19415 icode = CODE_FOR_lshrv2di3;
19416 goto do_pshifti;
19417 do_pshifti:
19418 arg0 = CALL_EXPR_ARG (exp, 0);
19419 arg1 = CALL_EXPR_ARG (exp, 1);
19420 op0 = expand_normal (arg0);
19421 op1 = expand_normal (arg1);
19422
19423 if (!CONST_INT_P (op1))
19424 {
19425 error ("shift must be an immediate");
19426 return const0_rtx;
19427 }
19428 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19429 op1 = GEN_INT (255);
19430
19431 tmode = insn_data[icode].operand[0].mode;
19432 mode1 = insn_data[icode].operand[1].mode;
19433 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19434 op0 = copy_to_reg (op0);
19435
19436 target = gen_reg_rtx (tmode);
19437 pat = GEN_FCN (icode) (target, op0, op1);
19438 if (!pat)
19439 return 0;
19440 emit_insn (pat);
19441 return target;
19442
19443 case IX86_BUILTIN_PSLLW128:
19444 icode = CODE_FOR_ashlv8hi3;
19445 goto do_pshift;
19446 case IX86_BUILTIN_PSLLD128:
19447 icode = CODE_FOR_ashlv4si3;
19448 goto do_pshift;
19449 case IX86_BUILTIN_PSLLQ128:
19450 icode = CODE_FOR_ashlv2di3;
19451 goto do_pshift;
19452 case IX86_BUILTIN_PSRAW128:
19453 icode = CODE_FOR_ashrv8hi3;
19454 goto do_pshift;
19455 case IX86_BUILTIN_PSRAD128:
19456 icode = CODE_FOR_ashrv4si3;
19457 goto do_pshift;
19458 case IX86_BUILTIN_PSRLW128:
19459 icode = CODE_FOR_lshrv8hi3;
19460 goto do_pshift;
19461 case IX86_BUILTIN_PSRLD128:
19462 icode = CODE_FOR_lshrv4si3;
19463 goto do_pshift;
19464 case IX86_BUILTIN_PSRLQ128:
19465 icode = CODE_FOR_lshrv2di3;
19466 goto do_pshift;
19467 do_pshift:
19468 arg0 = CALL_EXPR_ARG (exp, 0);
19469 arg1 = CALL_EXPR_ARG (exp, 1);
19470 op0 = expand_normal (arg0);
19471 op1 = expand_normal (arg1);
19472
19473 tmode = insn_data[icode].operand[0].mode;
19474 mode1 = insn_data[icode].operand[1].mode;
19475
19476 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19477 op0 = copy_to_reg (op0);
19478
19479 op1 = simplify_gen_subreg (SImode, op1, GET_MODE (op1), 0);
19480 if (! (*insn_data[icode].operand[2].predicate) (op1, SImode))
19481 op1 = copy_to_reg (op1);
19482
19483 target = gen_reg_rtx (tmode);
19484 pat = GEN_FCN (icode) (target, op0, op1);
19485 if (!pat)
19486 return 0;
19487 emit_insn (pat);
19488 return target;
19489
19490 case IX86_BUILTIN_PSLLDQI128:
19491 case IX86_BUILTIN_PSRLDQI128:
19492 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19493 : CODE_FOR_sse2_lshrti3);
19494 arg0 = CALL_EXPR_ARG (exp, 0);
19495 arg1 = CALL_EXPR_ARG (exp, 1);
19496 op0 = expand_normal (arg0);
19497 op1 = expand_normal (arg1);
19498 tmode = insn_data[icode].operand[0].mode;
19499 mode1 = insn_data[icode].operand[1].mode;
19500 mode2 = insn_data[icode].operand[2].mode;
19501
19502 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19503 {
19504 op0 = copy_to_reg (op0);
19505 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19506 }
19507 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19508 {
19509 error ("shift must be an immediate");
19510 return const0_rtx;
19511 }
19512 target = gen_reg_rtx (V2DImode);
19513 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19514 op0, op1);
19515 if (! pat)
19516 return 0;
19517 emit_insn (pat);
19518 return target;
19519
19520 case IX86_BUILTIN_FEMMS:
19521 emit_insn (gen_mmx_femms ());
19522 return NULL_RTX;
19523
19524 case IX86_BUILTIN_PAVGUSB:
19525 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19526
19527 case IX86_BUILTIN_PF2ID:
19528 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19529
19530 case IX86_BUILTIN_PFACC:
19531 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19532
19533 case IX86_BUILTIN_PFADD:
19534 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19535
19536 case IX86_BUILTIN_PFCMPEQ:
19537 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19538
19539 case IX86_BUILTIN_PFCMPGE:
19540 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19541
19542 case IX86_BUILTIN_PFCMPGT:
19543 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19544
19545 case IX86_BUILTIN_PFMAX:
19546 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19547
19548 case IX86_BUILTIN_PFMIN:
19549 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19550
19551 case IX86_BUILTIN_PFMUL:
19552 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19553
19554 case IX86_BUILTIN_PFRCP:
19555 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19556
19557 case IX86_BUILTIN_PFRCPIT1:
19558 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19559
19560 case IX86_BUILTIN_PFRCPIT2:
19561 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19562
19563 case IX86_BUILTIN_PFRSQIT1:
19564 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19565
19566 case IX86_BUILTIN_PFRSQRT:
19567 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19568
19569 case IX86_BUILTIN_PFSUB:
19570 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19571
19572 case IX86_BUILTIN_PFSUBR:
19573 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19574
19575 case IX86_BUILTIN_PI2FD:
19576 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19577
19578 case IX86_BUILTIN_PMULHRW:
19579 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19580
19581 case IX86_BUILTIN_PF2IW:
19582 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19583
19584 case IX86_BUILTIN_PFNACC:
19585 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19586
19587 case IX86_BUILTIN_PFPNACC:
19588 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19589
19590 case IX86_BUILTIN_PI2FW:
19591 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19592
19593 case IX86_BUILTIN_PSWAPDSI:
19594 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19595
19596 case IX86_BUILTIN_PSWAPDSF:
19597 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19598
19599 case IX86_BUILTIN_SQRTSD:
19600 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19601 case IX86_BUILTIN_LOADUPD:
19602 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19603 case IX86_BUILTIN_STOREUPD:
19604 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19605
19606 case IX86_BUILTIN_MFENCE:
19607 emit_insn (gen_sse2_mfence ());
19608 return 0;
19609 case IX86_BUILTIN_LFENCE:
19610 emit_insn (gen_sse2_lfence ());
19611 return 0;
19612
19613 case IX86_BUILTIN_CLFLUSH:
19614 arg0 = CALL_EXPR_ARG (exp, 0);
19615 op0 = expand_normal (arg0);
19616 icode = CODE_FOR_sse2_clflush;
19617 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19618 op0 = copy_to_mode_reg (Pmode, op0);
19619
19620 emit_insn (gen_sse2_clflush (op0));
19621 return 0;
19622
19623 case IX86_BUILTIN_MOVNTPD:
19624 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19625 case IX86_BUILTIN_MOVNTDQ:
19626 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19627 case IX86_BUILTIN_MOVNTI:
19628 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19629
19630 case IX86_BUILTIN_LOADDQU:
19631 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19632 case IX86_BUILTIN_STOREDQU:
19633 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19634
19635 case IX86_BUILTIN_MONITOR:
19636 arg0 = CALL_EXPR_ARG (exp, 0);
19637 arg1 = CALL_EXPR_ARG (exp, 1);
19638 arg2 = CALL_EXPR_ARG (exp, 2);
19639 op0 = expand_normal (arg0);
19640 op1 = expand_normal (arg1);
19641 op2 = expand_normal (arg2);
19642 if (!REG_P (op0))
19643 op0 = copy_to_mode_reg (Pmode, op0);
19644 if (!REG_P (op1))
19645 op1 = copy_to_mode_reg (SImode, op1);
19646 if (!REG_P (op2))
19647 op2 = copy_to_mode_reg (SImode, op2);
19648 if (!TARGET_64BIT)
19649 emit_insn (gen_sse3_monitor (op0, op1, op2));
19650 else
19651 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19652 return 0;
19653
19654 case IX86_BUILTIN_MWAIT:
19655 arg0 = CALL_EXPR_ARG (exp, 0);
19656 arg1 = CALL_EXPR_ARG (exp, 1);
19657 op0 = expand_normal (arg0);
19658 op1 = expand_normal (arg1);
19659 if (!REG_P (op0))
19660 op0 = copy_to_mode_reg (SImode, op0);
19661 if (!REG_P (op1))
19662 op1 = copy_to_mode_reg (SImode, op1);
19663 emit_insn (gen_sse3_mwait (op0, op1));
19664 return 0;
19665
19666 case IX86_BUILTIN_LDDQU:
19667 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19668 target, 1);
19669
19670 case IX86_BUILTIN_PALIGNR:
19671 case IX86_BUILTIN_PALIGNR128:
19672 if (fcode == IX86_BUILTIN_PALIGNR)
19673 {
19674 icode = CODE_FOR_ssse3_palignrdi;
19675 mode = DImode;
19676 }
19677 else
19678 {
19679 icode = CODE_FOR_ssse3_palignrti;
19680 mode = V2DImode;
19681 }
19682 arg0 = CALL_EXPR_ARG (exp, 0);
19683 arg1 = CALL_EXPR_ARG (exp, 1);
19684 arg2 = CALL_EXPR_ARG (exp, 2);
19685 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19686 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19687 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19688 tmode = insn_data[icode].operand[0].mode;
19689 mode1 = insn_data[icode].operand[1].mode;
19690 mode2 = insn_data[icode].operand[2].mode;
19691 mode3 = insn_data[icode].operand[3].mode;
19692
19693 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19694 {
19695 op0 = copy_to_reg (op0);
19696 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19697 }
19698 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19699 {
19700 op1 = copy_to_reg (op1);
19701 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19702 }
19703 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19704 {
19705 error ("shift must be an immediate");
19706 return const0_rtx;
19707 }
19708 target = gen_reg_rtx (mode);
19709 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19710 op0, op1, op2);
19711 if (! pat)
19712 return 0;
19713 emit_insn (pat);
19714 return target;
19715
19716 case IX86_BUILTIN_MOVNTDQA:
19717 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19718 target, 1);
19719
19720 case IX86_BUILTIN_MOVNTSD:
19721 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19722
19723 case IX86_BUILTIN_MOVNTSS:
19724 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19725
19726 case IX86_BUILTIN_INSERTQ:
19727 case IX86_BUILTIN_EXTRQ:
19728 icode = (fcode == IX86_BUILTIN_EXTRQ
19729 ? CODE_FOR_sse4a_extrq
19730 : CODE_FOR_sse4a_insertq);
19731 arg0 = CALL_EXPR_ARG (exp, 0);
19732 arg1 = CALL_EXPR_ARG (exp, 1);
19733 op0 = expand_normal (arg0);
19734 op1 = expand_normal (arg1);
19735 tmode = insn_data[icode].operand[0].mode;
19736 mode1 = insn_data[icode].operand[1].mode;
19737 mode2 = insn_data[icode].operand[2].mode;
19738 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19739 op0 = copy_to_mode_reg (mode1, op0);
19740 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19741 op1 = copy_to_mode_reg (mode2, op1);
19742 if (optimize || target == 0
19743 || GET_MODE (target) != tmode
19744 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19745 target = gen_reg_rtx (tmode);
19746 pat = GEN_FCN (icode) (target, op0, op1);
19747 if (! pat)
19748 return NULL_RTX;
19749 emit_insn (pat);
19750 return target;
19751
19752 case IX86_BUILTIN_EXTRQI:
19753 icode = CODE_FOR_sse4a_extrqi;
19754 arg0 = CALL_EXPR_ARG (exp, 0);
19755 arg1 = CALL_EXPR_ARG (exp, 1);
19756 arg2 = CALL_EXPR_ARG (exp, 2);
19757 op0 = expand_normal (arg0);
19758 op1 = expand_normal (arg1);
19759 op2 = expand_normal (arg2);
19760 tmode = insn_data[icode].operand[0].mode;
19761 mode1 = insn_data[icode].operand[1].mode;
19762 mode2 = insn_data[icode].operand[2].mode;
19763 mode3 = insn_data[icode].operand[3].mode;
19764 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19765 op0 = copy_to_mode_reg (mode1, op0);
19766 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19767 {
19768 error ("index mask must be an immediate");
19769 return gen_reg_rtx (tmode);
19770 }
19771 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19772 {
19773 error ("length mask must be an immediate");
19774 return gen_reg_rtx (tmode);
19775 }
19776 if (optimize || target == 0
19777 || GET_MODE (target) != tmode
19778 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19779 target = gen_reg_rtx (tmode);
19780 pat = GEN_FCN (icode) (target, op0, op1, op2);
19781 if (! pat)
19782 return NULL_RTX;
19783 emit_insn (pat);
19784 return target;
19785
19786 case IX86_BUILTIN_INSERTQI:
19787 icode = CODE_FOR_sse4a_insertqi;
19788 arg0 = CALL_EXPR_ARG (exp, 0);
19789 arg1 = CALL_EXPR_ARG (exp, 1);
19790 arg2 = CALL_EXPR_ARG (exp, 2);
19791 arg3 = CALL_EXPR_ARG (exp, 3);
19792 op0 = expand_normal (arg0);
19793 op1 = expand_normal (arg1);
19794 op2 = expand_normal (arg2);
19795 op3 = expand_normal (arg3);
19796 tmode = insn_data[icode].operand[0].mode;
19797 mode1 = insn_data[icode].operand[1].mode;
19798 mode2 = insn_data[icode].operand[2].mode;
19799 mode3 = insn_data[icode].operand[3].mode;
19800 mode4 = insn_data[icode].operand[4].mode;
19801
19802 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19803 op0 = copy_to_mode_reg (mode1, op0);
19804
19805 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19806 op1 = copy_to_mode_reg (mode2, op1);
19807
19808 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19809 {
19810 error ("index mask must be an immediate");
19811 return gen_reg_rtx (tmode);
19812 }
19813 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19814 {
19815 error ("length mask must be an immediate");
19816 return gen_reg_rtx (tmode);
19817 }
19818 if (optimize || target == 0
19819 || GET_MODE (target) != tmode
19820 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19821 target = gen_reg_rtx (tmode);
19822 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19823 if (! pat)
19824 return NULL_RTX;
19825 emit_insn (pat);
19826 return target;
19827
19828 case IX86_BUILTIN_VEC_INIT_V2SI:
19829 case IX86_BUILTIN_VEC_INIT_V4HI:
19830 case IX86_BUILTIN_VEC_INIT_V8QI:
19831 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19832
19833 case IX86_BUILTIN_VEC_EXT_V2DF:
19834 case IX86_BUILTIN_VEC_EXT_V2DI:
19835 case IX86_BUILTIN_VEC_EXT_V4SF:
19836 case IX86_BUILTIN_VEC_EXT_V4SI:
19837 case IX86_BUILTIN_VEC_EXT_V8HI:
19838 case IX86_BUILTIN_VEC_EXT_V2SI:
19839 case IX86_BUILTIN_VEC_EXT_V4HI:
19840 case IX86_BUILTIN_VEC_EXT_V16QI:
19841 return ix86_expand_vec_ext_builtin (exp, target);
19842
19843 case IX86_BUILTIN_VEC_SET_V2DI:
19844 case IX86_BUILTIN_VEC_SET_V4SF:
19845 case IX86_BUILTIN_VEC_SET_V4SI:
19846 case IX86_BUILTIN_VEC_SET_V8HI:
19847 case IX86_BUILTIN_VEC_SET_V4HI:
19848 case IX86_BUILTIN_VEC_SET_V16QI:
19849 return ix86_expand_vec_set_builtin (exp);
19850
19851 case IX86_BUILTIN_INFQ:
19852 {
19853 REAL_VALUE_TYPE inf;
19854 rtx tmp;
19855
19856 real_inf (&inf);
19857 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19858
19859 tmp = validize_mem (force_const_mem (mode, tmp));
19860
19861 if (target == 0)
19862 target = gen_reg_rtx (mode);
19863
19864 emit_move_insn (target, tmp);
19865 return target;
19866 }
19867
19868 case IX86_BUILTIN_FABSQ:
19869 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19870
19871 case IX86_BUILTIN_COPYSIGNQ:
19872 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19873
19874 default:
19875 break;
19876 }
19877
19878 for (i = 0, d = bdesc_sse_3arg;
19879 i < ARRAY_SIZE (bdesc_sse_3arg);
19880 i++, d++)
19881 if (d->code == fcode)
19882 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19883 target);
19884
19885 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19886 if (d->code == fcode)
19887 {
19888 /* Compares are treated specially. */
19889 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19890 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19891 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19892 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19893 return ix86_expand_sse_compare (d, exp, target);
19894
19895 return ix86_expand_binop_builtin (d->icode, exp, target);
19896 }
19897
19898 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19899 if (d->code == fcode)
19900 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19901
19902 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19903 if (d->code == fcode)
19904 return ix86_expand_sse_comi (d, exp, target);
19905
19906 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19907 if (d->code == fcode)
19908 return ix86_expand_sse_ptest (d, exp, target);
19909
19910 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19911 if (d->code == fcode)
19912 return ix86_expand_crc32 (d->icode, exp, target);
19913
19914 for (i = 0, d = bdesc_pcmpestr;
19915 i < ARRAY_SIZE (bdesc_pcmpestr);
19916 i++, d++)
19917 if (d->code == fcode)
19918 return ix86_expand_sse_pcmpestr (d, exp, target);
19919
19920 for (i = 0, d = bdesc_pcmpistr;
19921 i < ARRAY_SIZE (bdesc_pcmpistr);
19922 i++, d++)
19923 if (d->code == fcode)
19924 return ix86_expand_sse_pcmpistr (d, exp, target);
19925
19926 gcc_unreachable ();
19927 }
19928
19929 /* Returns a function decl for a vectorized version of the builtin function
19930 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19931 if it is not available. */
19932
19933 static tree
19934 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19935 tree type_in)
19936 {
19937 enum machine_mode in_mode, out_mode;
19938 int in_n, out_n;
19939
19940 if (TREE_CODE (type_out) != VECTOR_TYPE
19941 || TREE_CODE (type_in) != VECTOR_TYPE)
19942 return NULL_TREE;
19943
19944 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19945 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19946 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19947 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19948
19949 switch (fn)
19950 {
19951 case BUILT_IN_SQRT:
19952 if (out_mode == DFmode && out_n == 2
19953 && in_mode == DFmode && in_n == 2)
19954 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19955 break;
19956
19957 case BUILT_IN_SQRTF:
19958 if (out_mode == SFmode && out_n == 4
19959 && in_mode == SFmode && in_n == 4)
19960 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19961 break;
19962
19963 case BUILT_IN_LRINT:
19964 if (out_mode == SImode && out_n == 4
19965 && in_mode == DFmode && in_n == 2)
19966 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
19967 break;
19968
19969 case BUILT_IN_LRINTF:
19970 if (out_mode == SImode && out_n == 4
19971 && in_mode == SFmode && in_n == 4)
19972 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19973 break;
19974
19975 default:
19976 ;
19977 }
19978
19979 /* Dispatch to a handler for a vectorization library. */
19980 if (ix86_veclib_handler)
19981 return (*ix86_veclib_handler)(fn, type_out, type_in);
19982
19983 return NULL_TREE;
19984 }
19985
19986 /* Handler for an ACML-style interface to a library with vectorized
19987 intrinsics. */
19988
19989 static tree
19990 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
19991 {
19992 char name[20] = "__vr.._";
19993 tree fntype, new_fndecl, args;
19994 unsigned arity;
19995 const char *bname;
19996 enum machine_mode el_mode, in_mode;
19997 int n, in_n;
19998
19999 /* The ACML is 64bits only and suitable for unsafe math only as
20000 it does not correctly support parts of IEEE with the required
20001 precision such as denormals. */
20002 if (!TARGET_64BIT
20003 || !flag_unsafe_math_optimizations)
20004 return NULL_TREE;
20005
20006 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20007 n = TYPE_VECTOR_SUBPARTS (type_out);
20008 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20009 in_n = TYPE_VECTOR_SUBPARTS (type_in);
20010 if (el_mode != in_mode
20011 || n != in_n)
20012 return NULL_TREE;
20013
20014 switch (fn)
20015 {
20016 case BUILT_IN_SIN:
20017 case BUILT_IN_COS:
20018 case BUILT_IN_EXP:
20019 case BUILT_IN_LOG:
20020 case BUILT_IN_LOG2:
20021 case BUILT_IN_LOG10:
20022 name[4] = 'd';
20023 name[5] = '2';
20024 if (el_mode != DFmode
20025 || n != 2)
20026 return NULL_TREE;
20027 break;
20028
20029 case BUILT_IN_SINF:
20030 case BUILT_IN_COSF:
20031 case BUILT_IN_EXPF:
20032 case BUILT_IN_POWF:
20033 case BUILT_IN_LOGF:
20034 case BUILT_IN_LOG2F:
20035 case BUILT_IN_LOG10F:
20036 name[4] = 's';
20037 name[5] = '4';
20038 if (el_mode != SFmode
20039 || n != 4)
20040 return NULL_TREE;
20041 break;
20042
20043 default:
20044 return NULL_TREE;
20045 }
20046
20047 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
20048 sprintf (name + 7, "%s", bname+10);
20049
20050 arity = 0;
20051 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
20052 args = TREE_CHAIN (args))
20053 arity++;
20054
20055 if (arity == 1)
20056 fntype = build_function_type_list (type_out, type_in, NULL);
20057 else
20058 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20059
20060 /* Build a function declaration for the vectorized function. */
20061 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
20062 TREE_PUBLIC (new_fndecl) = 1;
20063 DECL_EXTERNAL (new_fndecl) = 1;
20064 DECL_IS_NOVOPS (new_fndecl) = 1;
20065 TREE_READONLY (new_fndecl) = 1;
20066
20067 return new_fndecl;
20068 }
20069
20070
20071 /* Returns a decl of a function that implements conversion of the
20072 input vector of type TYPE, or NULL_TREE if it is not available. */
20073
20074 static tree
20075 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
20076 {
20077 if (TREE_CODE (type) != VECTOR_TYPE)
20078 return NULL_TREE;
20079
20080 switch (code)
20081 {
20082 case FLOAT_EXPR:
20083 switch (TYPE_MODE (type))
20084 {
20085 case V4SImode:
20086 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
20087 default:
20088 return NULL_TREE;
20089 }
20090
20091 case FIX_TRUNC_EXPR:
20092 switch (TYPE_MODE (type))
20093 {
20094 case V4SFmode:
20095 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
20096 default:
20097 return NULL_TREE;
20098 }
20099 default:
20100 return NULL_TREE;
20101
20102 }
20103 }
20104
20105 /* Returns a code for a target-specific builtin that implements
20106 reciprocal of the function, or NULL_TREE if not available. */
20107
20108 static tree
20109 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
20110 bool sqrt ATTRIBUTE_UNUSED)
20111 {
20112 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
20113 && flag_finite_math_only && !flag_trapping_math
20114 && flag_unsafe_math_optimizations))
20115 return NULL_TREE;
20116
20117 if (md_fn)
20118 /* Machine dependent builtins. */
20119 switch (fn)
20120 {
20121 /* Vectorized version of sqrt to rsqrt conversion. */
20122 case IX86_BUILTIN_SQRTPS:
20123 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
20124
20125 default:
20126 return NULL_TREE;
20127 }
20128 else
20129 /* Normal builtins. */
20130 switch (fn)
20131 {
20132 /* Sqrt to rsqrt conversion. */
20133 case BUILT_IN_SQRTF:
20134 return ix86_builtins[IX86_BUILTIN_RSQRTF];
20135
20136 default:
20137 return NULL_TREE;
20138 }
20139 }
20140
20141 /* Store OPERAND to the memory after reload is completed. This means
20142 that we can't easily use assign_stack_local. */
20143 rtx
20144 ix86_force_to_memory (enum machine_mode mode, rtx operand)
20145 {
20146 rtx result;
20147
20148 gcc_assert (reload_completed);
20149 if (TARGET_RED_ZONE)
20150 {
20151 result = gen_rtx_MEM (mode,
20152 gen_rtx_PLUS (Pmode,
20153 stack_pointer_rtx,
20154 GEN_INT (-RED_ZONE_SIZE)));
20155 emit_move_insn (result, operand);
20156 }
20157 else if (!TARGET_RED_ZONE && TARGET_64BIT)
20158 {
20159 switch (mode)
20160 {
20161 case HImode:
20162 case SImode:
20163 operand = gen_lowpart (DImode, operand);
20164 /* FALLTHRU */
20165 case DImode:
20166 emit_insn (
20167 gen_rtx_SET (VOIDmode,
20168 gen_rtx_MEM (DImode,
20169 gen_rtx_PRE_DEC (DImode,
20170 stack_pointer_rtx)),
20171 operand));
20172 break;
20173 default:
20174 gcc_unreachable ();
20175 }
20176 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20177 }
20178 else
20179 {
20180 switch (mode)
20181 {
20182 case DImode:
20183 {
20184 rtx operands[2];
20185 split_di (&operand, 1, operands, operands + 1);
20186 emit_insn (
20187 gen_rtx_SET (VOIDmode,
20188 gen_rtx_MEM (SImode,
20189 gen_rtx_PRE_DEC (Pmode,
20190 stack_pointer_rtx)),
20191 operands[1]));
20192 emit_insn (
20193 gen_rtx_SET (VOIDmode,
20194 gen_rtx_MEM (SImode,
20195 gen_rtx_PRE_DEC (Pmode,
20196 stack_pointer_rtx)),
20197 operands[0]));
20198 }
20199 break;
20200 case HImode:
20201 /* Store HImodes as SImodes. */
20202 operand = gen_lowpart (SImode, operand);
20203 /* FALLTHRU */
20204 case SImode:
20205 emit_insn (
20206 gen_rtx_SET (VOIDmode,
20207 gen_rtx_MEM (GET_MODE (operand),
20208 gen_rtx_PRE_DEC (SImode,
20209 stack_pointer_rtx)),
20210 operand));
20211 break;
20212 default:
20213 gcc_unreachable ();
20214 }
20215 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20216 }
20217 return result;
20218 }
20219
20220 /* Free operand from the memory. */
20221 void
20222 ix86_free_from_memory (enum machine_mode mode)
20223 {
20224 if (!TARGET_RED_ZONE)
20225 {
20226 int size;
20227
20228 if (mode == DImode || TARGET_64BIT)
20229 size = 8;
20230 else
20231 size = 4;
20232 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20233 to pop or add instruction if registers are available. */
20234 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20235 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
20236 GEN_INT (size))));
20237 }
20238 }
20239
20240 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20241 QImode must go into class Q_REGS.
20242 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20243 movdf to do mem-to-mem moves through integer regs. */
20244 enum reg_class
20245 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20246 {
20247 enum machine_mode mode = GET_MODE (x);
20248
20249 /* We're only allowed to return a subclass of CLASS. Many of the
20250 following checks fail for NO_REGS, so eliminate that early. */
20251 if (regclass == NO_REGS)
20252 return NO_REGS;
20253
20254 /* All classes can load zeros. */
20255 if (x == CONST0_RTX (mode))
20256 return regclass;
20257
20258 /* Force constants into memory if we are loading a (nonzero) constant into
20259 an MMX or SSE register. This is because there are no MMX/SSE instructions
20260 to load from a constant. */
20261 if (CONSTANT_P (x)
20262 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20263 return NO_REGS;
20264
20265 /* Prefer SSE regs only, if we can use them for math. */
20266 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20267 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20268
20269 /* Floating-point constants need more complex checks. */
20270 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20271 {
20272 /* General regs can load everything. */
20273 if (reg_class_subset_p (regclass, GENERAL_REGS))
20274 return regclass;
20275
20276 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20277 zero above. We only want to wind up preferring 80387 registers if
20278 we plan on doing computation with them. */
20279 if (TARGET_80387
20280 && standard_80387_constant_p (x))
20281 {
20282 /* Limit class to non-sse. */
20283 if (regclass == FLOAT_SSE_REGS)
20284 return FLOAT_REGS;
20285 if (regclass == FP_TOP_SSE_REGS)
20286 return FP_TOP_REG;
20287 if (regclass == FP_SECOND_SSE_REGS)
20288 return FP_SECOND_REG;
20289 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20290 return regclass;
20291 }
20292
20293 return NO_REGS;
20294 }
20295
20296 /* Generally when we see PLUS here, it's the function invariant
20297 (plus soft-fp const_int). Which can only be computed into general
20298 regs. */
20299 if (GET_CODE (x) == PLUS)
20300 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20301
20302 /* QImode constants are easy to load, but non-constant QImode data
20303 must go into Q_REGS. */
20304 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20305 {
20306 if (reg_class_subset_p (regclass, Q_REGS))
20307 return regclass;
20308 if (reg_class_subset_p (Q_REGS, regclass))
20309 return Q_REGS;
20310 return NO_REGS;
20311 }
20312
20313 return regclass;
20314 }
20315
20316 /* Discourage putting floating-point values in SSE registers unless
20317 SSE math is being used, and likewise for the 387 registers. */
20318 enum reg_class
20319 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20320 {
20321 enum machine_mode mode = GET_MODE (x);
20322
20323 /* Restrict the output reload class to the register bank that we are doing
20324 math on. If we would like not to return a subset of CLASS, reject this
20325 alternative: if reload cannot do this, it will still use its choice. */
20326 mode = GET_MODE (x);
20327 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20328 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20329
20330 if (X87_FLOAT_MODE_P (mode))
20331 {
20332 if (regclass == FP_TOP_SSE_REGS)
20333 return FP_TOP_REG;
20334 else if (regclass == FP_SECOND_SSE_REGS)
20335 return FP_SECOND_REG;
20336 else
20337 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20338 }
20339
20340 return regclass;
20341 }
20342
20343 /* If we are copying between general and FP registers, we need a memory
20344 location. The same is true for SSE and MMX registers.
20345
20346 To optimize register_move_cost performance, allow inline variant.
20347
20348 The macro can't work reliably when one of the CLASSES is class containing
20349 registers from multiple units (SSE, MMX, integer). We avoid this by never
20350 combining those units in single alternative in the machine description.
20351 Ensure that this constraint holds to avoid unexpected surprises.
20352
20353 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20354 enforce these sanity checks. */
20355
20356 static inline int
20357 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20358 enum machine_mode mode, int strict)
20359 {
20360 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20361 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20362 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20363 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20364 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20365 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20366 {
20367 gcc_assert (!strict);
20368 return true;
20369 }
20370
20371 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20372 return true;
20373
20374 /* ??? This is a lie. We do have moves between mmx/general, and for
20375 mmx/sse2. But by saying we need secondary memory we discourage the
20376 register allocator from using the mmx registers unless needed. */
20377 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20378 return true;
20379
20380 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20381 {
20382 /* SSE1 doesn't have any direct moves from other classes. */
20383 if (!TARGET_SSE2)
20384 return true;
20385
20386 /* If the target says that inter-unit moves are more expensive
20387 than moving through memory, then don't generate them. */
20388 if (!TARGET_INTER_UNIT_MOVES)
20389 return true;
20390
20391 /* Between SSE and general, we have moves no larger than word size. */
20392 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20393 return true;
20394 }
20395
20396 return false;
20397 }
20398
20399 int
20400 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20401 enum machine_mode mode, int strict)
20402 {
20403 return inline_secondary_memory_needed (class1, class2, mode, strict);
20404 }
20405
20406 /* Return true if the registers in CLASS cannot represent the change from
20407 modes FROM to TO. */
20408
20409 bool
20410 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20411 enum reg_class regclass)
20412 {
20413 if (from == to)
20414 return false;
20415
20416 /* x87 registers can't do subreg at all, as all values are reformatted
20417 to extended precision. */
20418 if (MAYBE_FLOAT_CLASS_P (regclass))
20419 return true;
20420
20421 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20422 {
20423 /* Vector registers do not support QI or HImode loads. If we don't
20424 disallow a change to these modes, reload will assume it's ok to
20425 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20426 the vec_dupv4hi pattern. */
20427 if (GET_MODE_SIZE (from) < 4)
20428 return true;
20429
20430 /* Vector registers do not support subreg with nonzero offsets, which
20431 are otherwise valid for integer registers. Since we can't see
20432 whether we have a nonzero offset from here, prohibit all
20433 nonparadoxical subregs changing size. */
20434 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20435 return true;
20436 }
20437
20438 return false;
20439 }
20440
20441 /* Return the cost of moving data of mode M between a
20442 register and memory. A value of 2 is the default; this cost is
20443 relative to those in `REGISTER_MOVE_COST'.
20444
20445 This function is used extensively by register_move_cost that is used to
20446 build tables at startup. Make it inline in this case.
20447 When IN is 2, return maximum of in and out move cost.
20448
20449 If moving between registers and memory is more expensive than
20450 between two registers, you should define this macro to express the
20451 relative cost.
20452
20453 Model also increased moving costs of QImode registers in non
20454 Q_REGS classes.
20455 */
20456 static inline int
20457 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
20458 int in)
20459 {
20460 int cost;
20461 if (FLOAT_CLASS_P (regclass))
20462 {
20463 int index;
20464 switch (mode)
20465 {
20466 case SFmode:
20467 index = 0;
20468 break;
20469 case DFmode:
20470 index = 1;
20471 break;
20472 case XFmode:
20473 index = 2;
20474 break;
20475 default:
20476 return 100;
20477 }
20478 if (in == 2)
20479 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
20480 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20481 }
20482 if (SSE_CLASS_P (regclass))
20483 {
20484 int index;
20485 switch (GET_MODE_SIZE (mode))
20486 {
20487 case 4:
20488 index = 0;
20489 break;
20490 case 8:
20491 index = 1;
20492 break;
20493 case 16:
20494 index = 2;
20495 break;
20496 default:
20497 return 100;
20498 }
20499 if (in == 2)
20500 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
20501 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20502 }
20503 if (MMX_CLASS_P (regclass))
20504 {
20505 int index;
20506 switch (GET_MODE_SIZE (mode))
20507 {
20508 case 4:
20509 index = 0;
20510 break;
20511 case 8:
20512 index = 1;
20513 break;
20514 default:
20515 return 100;
20516 }
20517 if (in)
20518 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
20519 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20520 }
20521 switch (GET_MODE_SIZE (mode))
20522 {
20523 case 1:
20524 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20525 {
20526 if (!in)
20527 return ix86_cost->int_store[0];
20528 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
20529 cost = ix86_cost->movzbl_load;
20530 else
20531 cost = ix86_cost->int_load[0];
20532 if (in == 2)
20533 return MAX (cost, ix86_cost->int_store[0]);
20534 return cost;
20535 }
20536 else
20537 {
20538 if (in == 2)
20539 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
20540 if (in)
20541 return ix86_cost->movzbl_load;
20542 else
20543 return ix86_cost->int_store[0] + 4;
20544 }
20545 break;
20546 case 2:
20547 if (in == 2)
20548 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
20549 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20550 default:
20551 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20552 if (mode == TFmode)
20553 mode = XFmode;
20554 if (in == 2)
20555 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
20556 else if (in)
20557 cost = ix86_cost->int_load[2];
20558 else
20559 cost = ix86_cost->int_store[2];
20560 return (cost * (((int) GET_MODE_SIZE (mode)
20561 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20562 }
20563 }
20564
20565 int
20566 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20567 {
20568 return inline_memory_move_cost (mode, regclass, in);
20569 }
20570
20571
20572 /* Return the cost of moving data from a register in class CLASS1 to
20573 one in class CLASS2.
20574
20575 It is not required that the cost always equal 2 when FROM is the same as TO;
20576 on some machines it is expensive to move between registers if they are not
20577 general registers. */
20578
20579 int
20580 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20581 enum reg_class class2)
20582 {
20583 /* In case we require secondary memory, compute cost of the store followed
20584 by load. In order to avoid bad register allocation choices, we need
20585 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20586
20587 if (inline_secondary_memory_needed (class1, class2, mode, 0))
20588 {
20589 int cost = 1;
20590
20591 cost += inline_memory_move_cost (mode, class1, 2);
20592 cost += inline_memory_move_cost (mode, class2, 2);
20593
20594 /* In case of copying from general_purpose_register we may emit multiple
20595 stores followed by single load causing memory size mismatch stall.
20596 Count this as arbitrarily high cost of 20. */
20597 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20598 cost += 20;
20599
20600 /* In the case of FP/MMX moves, the registers actually overlap, and we
20601 have to switch modes in order to treat them differently. */
20602 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20603 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20604 cost += 20;
20605
20606 return cost;
20607 }
20608
20609 /* Moves between SSE/MMX and integer unit are expensive. */
20610 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20611 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20612
20613 /* ??? By keeping returned value relatively high, we limit the number
20614 of moves between integer and MMX/SSE registers for all targets.
20615 Additionally, high value prevents problem with x86_modes_tieable_p(),
20616 where integer modes in MMX/SSE registers are not tieable
20617 because of missing QImode and HImode moves to, from or between
20618 MMX/SSE registers. */
20619 return MAX (ix86_cost->mmxsse_to_integer, 8);
20620
20621 if (MAYBE_FLOAT_CLASS_P (class1))
20622 return ix86_cost->fp_move;
20623 if (MAYBE_SSE_CLASS_P (class1))
20624 return ix86_cost->sse_move;
20625 if (MAYBE_MMX_CLASS_P (class1))
20626 return ix86_cost->mmx_move;
20627 return 2;
20628 }
20629
20630 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20631
20632 bool
20633 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20634 {
20635 /* Flags and only flags can only hold CCmode values. */
20636 if (CC_REGNO_P (regno))
20637 return GET_MODE_CLASS (mode) == MODE_CC;
20638 if (GET_MODE_CLASS (mode) == MODE_CC
20639 || GET_MODE_CLASS (mode) == MODE_RANDOM
20640 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20641 return 0;
20642 if (FP_REGNO_P (regno))
20643 return VALID_FP_MODE_P (mode);
20644 if (SSE_REGNO_P (regno))
20645 {
20646 /* We implement the move patterns for all vector modes into and
20647 out of SSE registers, even when no operation instructions
20648 are available. */
20649 return (VALID_SSE_REG_MODE (mode)
20650 || VALID_SSE2_REG_MODE (mode)
20651 || VALID_MMX_REG_MODE (mode)
20652 || VALID_MMX_REG_MODE_3DNOW (mode));
20653 }
20654 if (MMX_REGNO_P (regno))
20655 {
20656 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20657 so if the register is available at all, then we can move data of
20658 the given mode into or out of it. */
20659 return (VALID_MMX_REG_MODE (mode)
20660 || VALID_MMX_REG_MODE_3DNOW (mode));
20661 }
20662
20663 if (mode == QImode)
20664 {
20665 /* Take care for QImode values - they can be in non-QI regs,
20666 but then they do cause partial register stalls. */
20667 if (regno < 4 || TARGET_64BIT)
20668 return 1;
20669 if (!TARGET_PARTIAL_REG_STALL)
20670 return 1;
20671 return reload_in_progress || reload_completed;
20672 }
20673 /* We handle both integer and floats in the general purpose registers. */
20674 else if (VALID_INT_MODE_P (mode))
20675 return 1;
20676 else if (VALID_FP_MODE_P (mode))
20677 return 1;
20678 else if (VALID_DFP_MODE_P (mode))
20679 return 1;
20680 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20681 on to use that value in smaller contexts, this can easily force a
20682 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20683 supporting DImode, allow it. */
20684 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20685 return 1;
20686
20687 return 0;
20688 }
20689
20690 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20691 tieable integer mode. */
20692
20693 static bool
20694 ix86_tieable_integer_mode_p (enum machine_mode mode)
20695 {
20696 switch (mode)
20697 {
20698 case HImode:
20699 case SImode:
20700 return true;
20701
20702 case QImode:
20703 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20704
20705 case DImode:
20706 return TARGET_64BIT;
20707
20708 default:
20709 return false;
20710 }
20711 }
20712
20713 /* Return true if MODE1 is accessible in a register that can hold MODE2
20714 without copying. That is, all register classes that can hold MODE2
20715 can also hold MODE1. */
20716
20717 bool
20718 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20719 {
20720 if (mode1 == mode2)
20721 return true;
20722
20723 if (ix86_tieable_integer_mode_p (mode1)
20724 && ix86_tieable_integer_mode_p (mode2))
20725 return true;
20726
20727 /* MODE2 being XFmode implies fp stack or general regs, which means we
20728 can tie any smaller floating point modes to it. Note that we do not
20729 tie this with TFmode. */
20730 if (mode2 == XFmode)
20731 return mode1 == SFmode || mode1 == DFmode;
20732
20733 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20734 that we can tie it with SFmode. */
20735 if (mode2 == DFmode)
20736 return mode1 == SFmode;
20737
20738 /* If MODE2 is only appropriate for an SSE register, then tie with
20739 any other mode acceptable to SSE registers. */
20740 if (GET_MODE_SIZE (mode2) == 16
20741 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20742 return (GET_MODE_SIZE (mode1) == 16
20743 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20744
20745 /* If MODE2 is appropriate for an MMX register, then tie
20746 with any other mode acceptable to MMX registers. */
20747 if (GET_MODE_SIZE (mode2) == 8
20748 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20749 return (GET_MODE_SIZE (mode1) == 8
20750 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20751
20752 return false;
20753 }
20754
20755 /* Compute a (partial) cost for rtx X. Return true if the complete
20756 cost has been computed, and false if subexpressions should be
20757 scanned. In either case, *TOTAL contains the cost result. */
20758
20759 static bool
20760 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20761 {
20762 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20763 enum machine_mode mode = GET_MODE (x);
20764
20765 switch (code)
20766 {
20767 case CONST_INT:
20768 case CONST:
20769 case LABEL_REF:
20770 case SYMBOL_REF:
20771 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20772 *total = 3;
20773 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20774 *total = 2;
20775 else if (flag_pic && SYMBOLIC_CONST (x)
20776 && (!TARGET_64BIT
20777 || (!GET_CODE (x) != LABEL_REF
20778 && (GET_CODE (x) != SYMBOL_REF
20779 || !SYMBOL_REF_LOCAL_P (x)))))
20780 *total = 1;
20781 else
20782 *total = 0;
20783 return true;
20784
20785 case CONST_DOUBLE:
20786 if (mode == VOIDmode)
20787 *total = 0;
20788 else
20789 switch (standard_80387_constant_p (x))
20790 {
20791 case 1: /* 0.0 */
20792 *total = 1;
20793 break;
20794 default: /* Other constants */
20795 *total = 2;
20796 break;
20797 case 0:
20798 case -1:
20799 /* Start with (MEM (SYMBOL_REF)), since that's where
20800 it'll probably end up. Add a penalty for size. */
20801 *total = (COSTS_N_INSNS (1)
20802 + (flag_pic != 0 && !TARGET_64BIT)
20803 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20804 break;
20805 }
20806 return true;
20807
20808 case ZERO_EXTEND:
20809 /* The zero extensions is often completely free on x86_64, so make
20810 it as cheap as possible. */
20811 if (TARGET_64BIT && mode == DImode
20812 && GET_MODE (XEXP (x, 0)) == SImode)
20813 *total = 1;
20814 else if (TARGET_ZERO_EXTEND_WITH_AND)
20815 *total = ix86_cost->add;
20816 else
20817 *total = ix86_cost->movzx;
20818 return false;
20819
20820 case SIGN_EXTEND:
20821 *total = ix86_cost->movsx;
20822 return false;
20823
20824 case ASHIFT:
20825 if (CONST_INT_P (XEXP (x, 1))
20826 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20827 {
20828 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20829 if (value == 1)
20830 {
20831 *total = ix86_cost->add;
20832 return false;
20833 }
20834 if ((value == 2 || value == 3)
20835 && ix86_cost->lea <= ix86_cost->shift_const)
20836 {
20837 *total = ix86_cost->lea;
20838 return false;
20839 }
20840 }
20841 /* FALLTHRU */
20842
20843 case ROTATE:
20844 case ASHIFTRT:
20845 case LSHIFTRT:
20846 case ROTATERT:
20847 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20848 {
20849 if (CONST_INT_P (XEXP (x, 1)))
20850 {
20851 if (INTVAL (XEXP (x, 1)) > 32)
20852 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20853 else
20854 *total = ix86_cost->shift_const * 2;
20855 }
20856 else
20857 {
20858 if (GET_CODE (XEXP (x, 1)) == AND)
20859 *total = ix86_cost->shift_var * 2;
20860 else
20861 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20862 }
20863 }
20864 else
20865 {
20866 if (CONST_INT_P (XEXP (x, 1)))
20867 *total = ix86_cost->shift_const;
20868 else
20869 *total = ix86_cost->shift_var;
20870 }
20871 return false;
20872
20873 case MULT:
20874 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20875 {
20876 /* ??? SSE scalar cost should be used here. */
20877 *total = ix86_cost->fmul;
20878 return false;
20879 }
20880 else if (X87_FLOAT_MODE_P (mode))
20881 {
20882 *total = ix86_cost->fmul;
20883 return false;
20884 }
20885 else if (FLOAT_MODE_P (mode))
20886 {
20887 /* ??? SSE vector cost should be used here. */
20888 *total = ix86_cost->fmul;
20889 return false;
20890 }
20891 else
20892 {
20893 rtx op0 = XEXP (x, 0);
20894 rtx op1 = XEXP (x, 1);
20895 int nbits;
20896 if (CONST_INT_P (XEXP (x, 1)))
20897 {
20898 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20899 for (nbits = 0; value != 0; value &= value - 1)
20900 nbits++;
20901 }
20902 else
20903 /* This is arbitrary. */
20904 nbits = 7;
20905
20906 /* Compute costs correctly for widening multiplication. */
20907 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20908 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20909 == GET_MODE_SIZE (mode))
20910 {
20911 int is_mulwiden = 0;
20912 enum machine_mode inner_mode = GET_MODE (op0);
20913
20914 if (GET_CODE (op0) == GET_CODE (op1))
20915 is_mulwiden = 1, op1 = XEXP (op1, 0);
20916 else if (CONST_INT_P (op1))
20917 {
20918 if (GET_CODE (op0) == SIGN_EXTEND)
20919 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20920 == INTVAL (op1);
20921 else
20922 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20923 }
20924
20925 if (is_mulwiden)
20926 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20927 }
20928
20929 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20930 + nbits * ix86_cost->mult_bit
20931 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20932
20933 return true;
20934 }
20935
20936 case DIV:
20937 case UDIV:
20938 case MOD:
20939 case UMOD:
20940 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20941 /* ??? SSE cost should be used here. */
20942 *total = ix86_cost->fdiv;
20943 else if (X87_FLOAT_MODE_P (mode))
20944 *total = ix86_cost->fdiv;
20945 else if (FLOAT_MODE_P (mode))
20946 /* ??? SSE vector cost should be used here. */
20947 *total = ix86_cost->fdiv;
20948 else
20949 *total = ix86_cost->divide[MODE_INDEX (mode)];
20950 return false;
20951
20952 case PLUS:
20953 if (GET_MODE_CLASS (mode) == MODE_INT
20954 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20955 {
20956 if (GET_CODE (XEXP (x, 0)) == PLUS
20957 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20958 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20959 && CONSTANT_P (XEXP (x, 1)))
20960 {
20961 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20962 if (val == 2 || val == 4 || val == 8)
20963 {
20964 *total = ix86_cost->lea;
20965 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20966 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20967 outer_code);
20968 *total += rtx_cost (XEXP (x, 1), outer_code);
20969 return true;
20970 }
20971 }
20972 else if (GET_CODE (XEXP (x, 0)) == MULT
20973 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20974 {
20975 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20976 if (val == 2 || val == 4 || val == 8)
20977 {
20978 *total = ix86_cost->lea;
20979 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20980 *total += rtx_cost (XEXP (x, 1), outer_code);
20981 return true;
20982 }
20983 }
20984 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20985 {
20986 *total = ix86_cost->lea;
20987 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20988 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20989 *total += rtx_cost (XEXP (x, 1), outer_code);
20990 return true;
20991 }
20992 }
20993 /* FALLTHRU */
20994
20995 case MINUS:
20996 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20997 {
20998 /* ??? SSE cost should be used here. */
20999 *total = ix86_cost->fadd;
21000 return false;
21001 }
21002 else if (X87_FLOAT_MODE_P (mode))
21003 {
21004 *total = ix86_cost->fadd;
21005 return false;
21006 }
21007 else if (FLOAT_MODE_P (mode))
21008 {
21009 /* ??? SSE vector cost should be used here. */
21010 *total = ix86_cost->fadd;
21011 return false;
21012 }
21013 /* FALLTHRU */
21014
21015 case AND:
21016 case IOR:
21017 case XOR:
21018 if (!TARGET_64BIT && mode == DImode)
21019 {
21020 *total = (ix86_cost->add * 2
21021 + (rtx_cost (XEXP (x, 0), outer_code)
21022 << (GET_MODE (XEXP (x, 0)) != DImode))
21023 + (rtx_cost (XEXP (x, 1), outer_code)
21024 << (GET_MODE (XEXP (x, 1)) != DImode)));
21025 return true;
21026 }
21027 /* FALLTHRU */
21028
21029 case NEG:
21030 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21031 {
21032 /* ??? SSE cost should be used here. */
21033 *total = ix86_cost->fchs;
21034 return false;
21035 }
21036 else if (X87_FLOAT_MODE_P (mode))
21037 {
21038 *total = ix86_cost->fchs;
21039 return false;
21040 }
21041 else if (FLOAT_MODE_P (mode))
21042 {
21043 /* ??? SSE vector cost should be used here. */
21044 *total = ix86_cost->fchs;
21045 return false;
21046 }
21047 /* FALLTHRU */
21048
21049 case NOT:
21050 if (!TARGET_64BIT && mode == DImode)
21051 *total = ix86_cost->add * 2;
21052 else
21053 *total = ix86_cost->add;
21054 return false;
21055
21056 case COMPARE:
21057 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
21058 && XEXP (XEXP (x, 0), 1) == const1_rtx
21059 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
21060 && XEXP (x, 1) == const0_rtx)
21061 {
21062 /* This kind of construct is implemented using test[bwl].
21063 Treat it as if we had an AND. */
21064 *total = (ix86_cost->add
21065 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
21066 + rtx_cost (const1_rtx, outer_code));
21067 return true;
21068 }
21069 return false;
21070
21071 case FLOAT_EXTEND:
21072 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
21073 *total = 0;
21074 return false;
21075
21076 case ABS:
21077 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21078 /* ??? SSE cost should be used here. */
21079 *total = ix86_cost->fabs;
21080 else if (X87_FLOAT_MODE_P (mode))
21081 *total = ix86_cost->fabs;
21082 else if (FLOAT_MODE_P (mode))
21083 /* ??? SSE vector cost should be used here. */
21084 *total = ix86_cost->fabs;
21085 return false;
21086
21087 case SQRT:
21088 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21089 /* ??? SSE cost should be used here. */
21090 *total = ix86_cost->fsqrt;
21091 else if (X87_FLOAT_MODE_P (mode))
21092 *total = ix86_cost->fsqrt;
21093 else if (FLOAT_MODE_P (mode))
21094 /* ??? SSE vector cost should be used here. */
21095 *total = ix86_cost->fsqrt;
21096 return false;
21097
21098 case UNSPEC:
21099 if (XINT (x, 1) == UNSPEC_TP)
21100 *total = 0;
21101 return false;
21102
21103 default:
21104 return false;
21105 }
21106 }
21107
21108 #if TARGET_MACHO
21109
21110 static int current_machopic_label_num;
21111
21112 /* Given a symbol name and its associated stub, write out the
21113 definition of the stub. */
21114
21115 void
21116 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21117 {
21118 unsigned int length;
21119 char *binder_name, *symbol_name, lazy_ptr_name[32];
21120 int label = ++current_machopic_label_num;
21121
21122 /* For 64-bit we shouldn't get here. */
21123 gcc_assert (!TARGET_64BIT);
21124
21125 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21126 symb = (*targetm.strip_name_encoding) (symb);
21127
21128 length = strlen (stub);
21129 binder_name = alloca (length + 32);
21130 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
21131
21132 length = strlen (symb);
21133 symbol_name = alloca (length + 32);
21134 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21135
21136 sprintf (lazy_ptr_name, "L%d$lz", label);
21137
21138 if (MACHOPIC_PURE)
21139 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
21140 else
21141 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
21142
21143 fprintf (file, "%s:\n", stub);
21144 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21145
21146 if (MACHOPIC_PURE)
21147 {
21148 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
21149 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
21150 fprintf (file, "\tjmp\t*%%edx\n");
21151 }
21152 else
21153 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
21154
21155 fprintf (file, "%s:\n", binder_name);
21156
21157 if (MACHOPIC_PURE)
21158 {
21159 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
21160 fprintf (file, "\tpushl\t%%eax\n");
21161 }
21162 else
21163 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
21164
21165 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
21166
21167 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
21168 fprintf (file, "%s:\n", lazy_ptr_name);
21169 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21170 fprintf (file, "\t.long %s\n", binder_name);
21171 }
21172
21173 void
21174 darwin_x86_file_end (void)
21175 {
21176 darwin_file_end ();
21177 ix86_file_end ();
21178 }
21179 #endif /* TARGET_MACHO */
21180
21181 /* Order the registers for register allocator. */
21182
21183 void
21184 x86_order_regs_for_local_alloc (void)
21185 {
21186 int pos = 0;
21187 int i;
21188
21189 /* First allocate the local general purpose registers. */
21190 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21191 if (GENERAL_REGNO_P (i) && call_used_regs[i])
21192 reg_alloc_order [pos++] = i;
21193
21194 /* Global general purpose registers. */
21195 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21196 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
21197 reg_alloc_order [pos++] = i;
21198
21199 /* x87 registers come first in case we are doing FP math
21200 using them. */
21201 if (!TARGET_SSE_MATH)
21202 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21203 reg_alloc_order [pos++] = i;
21204
21205 /* SSE registers. */
21206 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21207 reg_alloc_order [pos++] = i;
21208 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21209 reg_alloc_order [pos++] = i;
21210
21211 /* x87 registers. */
21212 if (TARGET_SSE_MATH)
21213 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21214 reg_alloc_order [pos++] = i;
21215
21216 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21217 reg_alloc_order [pos++] = i;
21218
21219 /* Initialize the rest of array as we do not allocate some registers
21220 at all. */
21221 while (pos < FIRST_PSEUDO_REGISTER)
21222 reg_alloc_order [pos++] = 0;
21223 }
21224
21225 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21226 struct attribute_spec.handler. */
21227 static tree
21228 ix86_handle_struct_attribute (tree *node, tree name,
21229 tree args ATTRIBUTE_UNUSED,
21230 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
21231 {
21232 tree *type = NULL;
21233 if (DECL_P (*node))
21234 {
21235 if (TREE_CODE (*node) == TYPE_DECL)
21236 type = &TREE_TYPE (*node);
21237 }
21238 else
21239 type = node;
21240
21241 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
21242 || TREE_CODE (*type) == UNION_TYPE)))
21243 {
21244 warning (OPT_Wattributes, "%qs attribute ignored",
21245 IDENTIFIER_POINTER (name));
21246 *no_add_attrs = true;
21247 }
21248
21249 else if ((is_attribute_p ("ms_struct", name)
21250 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
21251 || ((is_attribute_p ("gcc_struct", name)
21252 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
21253 {
21254 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
21255 IDENTIFIER_POINTER (name));
21256 *no_add_attrs = true;
21257 }
21258
21259 return NULL_TREE;
21260 }
21261
21262 static bool
21263 ix86_ms_bitfield_layout_p (const_tree record_type)
21264 {
21265 return (TARGET_MS_BITFIELD_LAYOUT &&
21266 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21267 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
21268 }
21269
21270 /* Returns an expression indicating where the this parameter is
21271 located on entry to the FUNCTION. */
21272
21273 static rtx
21274 x86_this_parameter (tree function)
21275 {
21276 tree type = TREE_TYPE (function);
21277 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21278
21279 if (TARGET_64BIT)
21280 {
21281 const int *parm_regs;
21282
21283 if (TARGET_64BIT_MS_ABI)
21284 parm_regs = x86_64_ms_abi_int_parameter_registers;
21285 else
21286 parm_regs = x86_64_int_parameter_registers;
21287 return gen_rtx_REG (DImode, parm_regs[aggr]);
21288 }
21289
21290 if (ix86_function_regparm (type, function) > 0
21291 && !type_has_variadic_args_p (type))
21292 {
21293 int regno = 0;
21294 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
21295 regno = 2;
21296 return gen_rtx_REG (SImode, regno);
21297 }
21298
21299 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
21300 }
21301
21302 /* Determine whether x86_output_mi_thunk can succeed. */
21303
21304 static bool
21305 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
21306 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21307 HOST_WIDE_INT vcall_offset, const_tree function)
21308 {
21309 /* 64-bit can handle anything. */
21310 if (TARGET_64BIT)
21311 return true;
21312
21313 /* For 32-bit, everything's fine if we have one free register. */
21314 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21315 return true;
21316
21317 /* Need a free register for vcall_offset. */
21318 if (vcall_offset)
21319 return false;
21320
21321 /* Need a free register for GOT references. */
21322 if (flag_pic && !(*targetm.binds_local_p) (function))
21323 return false;
21324
21325 /* Otherwise ok. */
21326 return true;
21327 }
21328
21329 /* Output the assembler code for a thunk function. THUNK_DECL is the
21330 declaration for the thunk function itself, FUNCTION is the decl for
21331 the target function. DELTA is an immediate constant offset to be
21332 added to THIS. If VCALL_OFFSET is nonzero, the word at
21333 *(*this + vcall_offset) should be added to THIS. */
21334
21335 static void
21336 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21337 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21338 HOST_WIDE_INT vcall_offset, tree function)
21339 {
21340 rtx xops[3];
21341 rtx this_param = x86_this_parameter (function);
21342 rtx this_reg, tmp;
21343
21344 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21345 pull it in now and let DELTA benefit. */
21346 if (REG_P (this_param))
21347 this_reg = this_param;
21348 else if (vcall_offset)
21349 {
21350 /* Put the this parameter into %eax. */
21351 xops[0] = this_param;
21352 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21353 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21354 }
21355 else
21356 this_reg = NULL_RTX;
21357
21358 /* Adjust the this parameter by a fixed constant. */
21359 if (delta)
21360 {
21361 xops[0] = GEN_INT (delta);
21362 xops[1] = this_reg ? this_reg : this_param;
21363 if (TARGET_64BIT)
21364 {
21365 if (!x86_64_general_operand (xops[0], DImode))
21366 {
21367 tmp = gen_rtx_REG (DImode, R10_REG);
21368 xops[1] = tmp;
21369 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21370 xops[0] = tmp;
21371 xops[1] = this_param;
21372 }
21373 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21374 }
21375 else
21376 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21377 }
21378
21379 /* Adjust the this parameter by a value stored in the vtable. */
21380 if (vcall_offset)
21381 {
21382 if (TARGET_64BIT)
21383 tmp = gen_rtx_REG (DImode, R10_REG);
21384 else
21385 {
21386 int tmp_regno = 2 /* ECX */;
21387 if (lookup_attribute ("fastcall",
21388 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21389 tmp_regno = 0 /* EAX */;
21390 tmp = gen_rtx_REG (SImode, tmp_regno);
21391 }
21392
21393 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21394 xops[1] = tmp;
21395 if (TARGET_64BIT)
21396 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21397 else
21398 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21399
21400 /* Adjust the this parameter. */
21401 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21402 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21403 {
21404 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21405 xops[0] = GEN_INT (vcall_offset);
21406 xops[1] = tmp2;
21407 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21408 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21409 }
21410 xops[1] = this_reg;
21411 if (TARGET_64BIT)
21412 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21413 else
21414 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21415 }
21416
21417 /* If necessary, drop THIS back to its stack slot. */
21418 if (this_reg && this_reg != this_param)
21419 {
21420 xops[0] = this_reg;
21421 xops[1] = this_param;
21422 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21423 }
21424
21425 xops[0] = XEXP (DECL_RTL (function), 0);
21426 if (TARGET_64BIT)
21427 {
21428 if (!flag_pic || (*targetm.binds_local_p) (function))
21429 output_asm_insn ("jmp\t%P0", xops);
21430 /* All thunks should be in the same object as their target,
21431 and thus binds_local_p should be true. */
21432 else if (TARGET_64BIT_MS_ABI)
21433 gcc_unreachable ();
21434 else
21435 {
21436 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21437 tmp = gen_rtx_CONST (Pmode, tmp);
21438 tmp = gen_rtx_MEM (QImode, tmp);
21439 xops[0] = tmp;
21440 output_asm_insn ("jmp\t%A0", xops);
21441 }
21442 }
21443 else
21444 {
21445 if (!flag_pic || (*targetm.binds_local_p) (function))
21446 output_asm_insn ("jmp\t%P0", xops);
21447 else
21448 #if TARGET_MACHO
21449 if (TARGET_MACHO)
21450 {
21451 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21452 tmp = (gen_rtx_SYMBOL_REF
21453 (Pmode,
21454 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21455 tmp = gen_rtx_MEM (QImode, tmp);
21456 xops[0] = tmp;
21457 output_asm_insn ("jmp\t%0", xops);
21458 }
21459 else
21460 #endif /* TARGET_MACHO */
21461 {
21462 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21463 output_set_got (tmp, NULL_RTX);
21464
21465 xops[1] = tmp;
21466 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21467 output_asm_insn ("jmp\t{*}%1", xops);
21468 }
21469 }
21470 }
21471
21472 static void
21473 x86_file_start (void)
21474 {
21475 default_file_start ();
21476 #if TARGET_MACHO
21477 darwin_file_start ();
21478 #endif
21479 if (X86_FILE_START_VERSION_DIRECTIVE)
21480 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21481 if (X86_FILE_START_FLTUSED)
21482 fputs ("\t.global\t__fltused\n", asm_out_file);
21483 if (ix86_asm_dialect == ASM_INTEL)
21484 fputs ("\t.intel_syntax\n", asm_out_file);
21485 }
21486
21487 int
21488 x86_field_alignment (tree field, int computed)
21489 {
21490 enum machine_mode mode;
21491 tree type = TREE_TYPE (field);
21492
21493 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21494 return computed;
21495 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21496 ? get_inner_array_type (type) : type);
21497 if (mode == DFmode || mode == DCmode
21498 || GET_MODE_CLASS (mode) == MODE_INT
21499 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21500 return MIN (32, computed);
21501 return computed;
21502 }
21503
21504 /* Output assembler code to FILE to increment profiler label # LABELNO
21505 for profiling a function entry. */
21506 void
21507 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21508 {
21509 if (TARGET_64BIT)
21510 {
21511 #ifndef NO_PROFILE_COUNTERS
21512 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21513 #endif
21514
21515 if (!TARGET_64BIT_MS_ABI && flag_pic)
21516 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21517 else
21518 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21519 }
21520 else if (flag_pic)
21521 {
21522 #ifndef NO_PROFILE_COUNTERS
21523 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21524 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21525 #endif
21526 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21527 }
21528 else
21529 {
21530 #ifndef NO_PROFILE_COUNTERS
21531 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21532 PROFILE_COUNT_REGISTER);
21533 #endif
21534 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21535 }
21536 }
21537
21538 /* We don't have exact information about the insn sizes, but we may assume
21539 quite safely that we are informed about all 1 byte insns and memory
21540 address sizes. This is enough to eliminate unnecessary padding in
21541 99% of cases. */
21542
21543 static int
21544 min_insn_size (rtx insn)
21545 {
21546 int l = 0;
21547
21548 if (!INSN_P (insn) || !active_insn_p (insn))
21549 return 0;
21550
21551 /* Discard alignments we've emit and jump instructions. */
21552 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21553 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21554 return 0;
21555 if (JUMP_P (insn)
21556 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21557 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21558 return 0;
21559
21560 /* Important case - calls are always 5 bytes.
21561 It is common to have many calls in the row. */
21562 if (CALL_P (insn)
21563 && symbolic_reference_mentioned_p (PATTERN (insn))
21564 && !SIBLING_CALL_P (insn))
21565 return 5;
21566 if (get_attr_length (insn) <= 1)
21567 return 1;
21568
21569 /* For normal instructions we may rely on the sizes of addresses
21570 and the presence of symbol to require 4 bytes of encoding.
21571 This is not the case for jumps where references are PC relative. */
21572 if (!JUMP_P (insn))
21573 {
21574 l = get_attr_length_address (insn);
21575 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21576 l = 4;
21577 }
21578 if (l)
21579 return 1+l;
21580 else
21581 return 2;
21582 }
21583
21584 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21585 window. */
21586
21587 static void
21588 ix86_avoid_jump_misspredicts (void)
21589 {
21590 rtx insn, start = get_insns ();
21591 int nbytes = 0, njumps = 0;
21592 int isjump = 0;
21593
21594 /* Look for all minimal intervals of instructions containing 4 jumps.
21595 The intervals are bounded by START and INSN. NBYTES is the total
21596 size of instructions in the interval including INSN and not including
21597 START. When the NBYTES is smaller than 16 bytes, it is possible
21598 that the end of START and INSN ends up in the same 16byte page.
21599
21600 The smallest offset in the page INSN can start is the case where START
21601 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21602 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21603 */
21604 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21605 {
21606
21607 nbytes += min_insn_size (insn);
21608 if (dump_file)
21609 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21610 INSN_UID (insn), min_insn_size (insn));
21611 if ((JUMP_P (insn)
21612 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21613 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21614 || CALL_P (insn))
21615 njumps++;
21616 else
21617 continue;
21618
21619 while (njumps > 3)
21620 {
21621 start = NEXT_INSN (start);
21622 if ((JUMP_P (start)
21623 && GET_CODE (PATTERN (start)) != ADDR_VEC
21624 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21625 || CALL_P (start))
21626 njumps--, isjump = 1;
21627 else
21628 isjump = 0;
21629 nbytes -= min_insn_size (start);
21630 }
21631 gcc_assert (njumps >= 0);
21632 if (dump_file)
21633 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21634 INSN_UID (start), INSN_UID (insn), nbytes);
21635
21636 if (njumps == 3 && isjump && nbytes < 16)
21637 {
21638 int padsize = 15 - nbytes + min_insn_size (insn);
21639
21640 if (dump_file)
21641 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21642 INSN_UID (insn), padsize);
21643 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21644 }
21645 }
21646 }
21647
21648 /* AMD Athlon works faster
21649 when RET is not destination of conditional jump or directly preceded
21650 by other jump instruction. We avoid the penalty by inserting NOP just
21651 before the RET instructions in such cases. */
21652 static void
21653 ix86_pad_returns (void)
21654 {
21655 edge e;
21656 edge_iterator ei;
21657
21658 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21659 {
21660 basic_block bb = e->src;
21661 rtx ret = BB_END (bb);
21662 rtx prev;
21663 bool replace = false;
21664
21665 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21666 || !maybe_hot_bb_p (bb))
21667 continue;
21668 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21669 if (active_insn_p (prev) || LABEL_P (prev))
21670 break;
21671 if (prev && LABEL_P (prev))
21672 {
21673 edge e;
21674 edge_iterator ei;
21675
21676 FOR_EACH_EDGE (e, ei, bb->preds)
21677 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21678 && !(e->flags & EDGE_FALLTHRU))
21679 replace = true;
21680 }
21681 if (!replace)
21682 {
21683 prev = prev_active_insn (ret);
21684 if (prev
21685 && ((JUMP_P (prev) && any_condjump_p (prev))
21686 || CALL_P (prev)))
21687 replace = true;
21688 /* Empty functions get branch mispredict even when the jump destination
21689 is not visible to us. */
21690 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21691 replace = true;
21692 }
21693 if (replace)
21694 {
21695 emit_insn_before (gen_return_internal_long (), ret);
21696 delete_insn (ret);
21697 }
21698 }
21699 }
21700
21701 /* Implement machine specific optimizations. We implement padding of returns
21702 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21703 static void
21704 ix86_reorg (void)
21705 {
21706 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21707 ix86_pad_returns ();
21708 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21709 ix86_avoid_jump_misspredicts ();
21710 }
21711
21712 /* Return nonzero when QImode register that must be represented via REX prefix
21713 is used. */
21714 bool
21715 x86_extended_QIreg_mentioned_p (rtx insn)
21716 {
21717 int i;
21718 extract_insn_cached (insn);
21719 for (i = 0; i < recog_data.n_operands; i++)
21720 if (REG_P (recog_data.operand[i])
21721 && REGNO (recog_data.operand[i]) >= 4)
21722 return true;
21723 return false;
21724 }
21725
21726 /* Return nonzero when P points to register encoded via REX prefix.
21727 Called via for_each_rtx. */
21728 static int
21729 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21730 {
21731 unsigned int regno;
21732 if (!REG_P (*p))
21733 return 0;
21734 regno = REGNO (*p);
21735 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21736 }
21737
21738 /* Return true when INSN mentions register that must be encoded using REX
21739 prefix. */
21740 bool
21741 x86_extended_reg_mentioned_p (rtx insn)
21742 {
21743 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21744 }
21745
21746 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21747 optabs would emit if we didn't have TFmode patterns. */
21748
21749 void
21750 x86_emit_floatuns (rtx operands[2])
21751 {
21752 rtx neglab, donelab, i0, i1, f0, in, out;
21753 enum machine_mode mode, inmode;
21754
21755 inmode = GET_MODE (operands[1]);
21756 gcc_assert (inmode == SImode || inmode == DImode);
21757
21758 out = operands[0];
21759 in = force_reg (inmode, operands[1]);
21760 mode = GET_MODE (out);
21761 neglab = gen_label_rtx ();
21762 donelab = gen_label_rtx ();
21763 f0 = gen_reg_rtx (mode);
21764
21765 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21766
21767 expand_float (out, in, 0);
21768
21769 emit_jump_insn (gen_jump (donelab));
21770 emit_barrier ();
21771
21772 emit_label (neglab);
21773
21774 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21775 1, OPTAB_DIRECT);
21776 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21777 1, OPTAB_DIRECT);
21778 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21779
21780 expand_float (f0, i0, 0);
21781
21782 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21783
21784 emit_label (donelab);
21785 }
21786 \f
21787 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21788 with all elements equal to VAR. Return true if successful. */
21789
21790 static bool
21791 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21792 rtx target, rtx val)
21793 {
21794 enum machine_mode smode, wsmode, wvmode;
21795 rtx x;
21796
21797 switch (mode)
21798 {
21799 case V2SImode:
21800 case V2SFmode:
21801 if (!mmx_ok)
21802 return false;
21803 /* FALLTHRU */
21804
21805 case V2DFmode:
21806 case V2DImode:
21807 case V4SFmode:
21808 case V4SImode:
21809 val = force_reg (GET_MODE_INNER (mode), val);
21810 x = gen_rtx_VEC_DUPLICATE (mode, val);
21811 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21812 return true;
21813
21814 case V4HImode:
21815 if (!mmx_ok)
21816 return false;
21817 if (TARGET_SSE || TARGET_3DNOW_A)
21818 {
21819 val = gen_lowpart (SImode, val);
21820 x = gen_rtx_TRUNCATE (HImode, val);
21821 x = gen_rtx_VEC_DUPLICATE (mode, x);
21822 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21823 return true;
21824 }
21825 else
21826 {
21827 smode = HImode;
21828 wsmode = SImode;
21829 wvmode = V2SImode;
21830 goto widen;
21831 }
21832
21833 case V8QImode:
21834 if (!mmx_ok)
21835 return false;
21836 smode = QImode;
21837 wsmode = HImode;
21838 wvmode = V4HImode;
21839 goto widen;
21840 case V8HImode:
21841 if (TARGET_SSE2)
21842 {
21843 rtx tmp1, tmp2;
21844 /* Extend HImode to SImode using a paradoxical SUBREG. */
21845 tmp1 = gen_reg_rtx (SImode);
21846 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21847 /* Insert the SImode value as low element of V4SImode vector. */
21848 tmp2 = gen_reg_rtx (V4SImode);
21849 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21850 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21851 CONST0_RTX (V4SImode),
21852 const1_rtx);
21853 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21854 /* Cast the V4SImode vector back to a V8HImode vector. */
21855 tmp1 = gen_reg_rtx (V8HImode);
21856 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21857 /* Duplicate the low short through the whole low SImode word. */
21858 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21859 /* Cast the V8HImode vector back to a V4SImode vector. */
21860 tmp2 = gen_reg_rtx (V4SImode);
21861 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21862 /* Replicate the low element of the V4SImode vector. */
21863 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21864 /* Cast the V2SImode back to V8HImode, and store in target. */
21865 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21866 return true;
21867 }
21868 smode = HImode;
21869 wsmode = SImode;
21870 wvmode = V4SImode;
21871 goto widen;
21872 case V16QImode:
21873 if (TARGET_SSE2)
21874 {
21875 rtx tmp1, tmp2;
21876 /* Extend QImode to SImode using a paradoxical SUBREG. */
21877 tmp1 = gen_reg_rtx (SImode);
21878 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21879 /* Insert the SImode value as low element of V4SImode vector. */
21880 tmp2 = gen_reg_rtx (V4SImode);
21881 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21882 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21883 CONST0_RTX (V4SImode),
21884 const1_rtx);
21885 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21886 /* Cast the V4SImode vector back to a V16QImode vector. */
21887 tmp1 = gen_reg_rtx (V16QImode);
21888 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21889 /* Duplicate the low byte through the whole low SImode word. */
21890 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21891 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21892 /* Cast the V16QImode vector back to a V4SImode vector. */
21893 tmp2 = gen_reg_rtx (V4SImode);
21894 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21895 /* Replicate the low element of the V4SImode vector. */
21896 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21897 /* Cast the V2SImode back to V16QImode, and store in target. */
21898 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21899 return true;
21900 }
21901 smode = QImode;
21902 wsmode = HImode;
21903 wvmode = V8HImode;
21904 goto widen;
21905 widen:
21906 /* Replicate the value once into the next wider mode and recurse. */
21907 val = convert_modes (wsmode, smode, val, true);
21908 x = expand_simple_binop (wsmode, ASHIFT, val,
21909 GEN_INT (GET_MODE_BITSIZE (smode)),
21910 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21911 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21912
21913 x = gen_reg_rtx (wvmode);
21914 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21915 gcc_unreachable ();
21916 emit_move_insn (target, gen_lowpart (mode, x));
21917 return true;
21918
21919 default:
21920 return false;
21921 }
21922 }
21923
21924 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21925 whose ONE_VAR element is VAR, and other elements are zero. Return true
21926 if successful. */
21927
21928 static bool
21929 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21930 rtx target, rtx var, int one_var)
21931 {
21932 enum machine_mode vsimode;
21933 rtx new_target;
21934 rtx x, tmp;
21935
21936 switch (mode)
21937 {
21938 case V2SFmode:
21939 case V2SImode:
21940 if (!mmx_ok)
21941 return false;
21942 /* FALLTHRU */
21943
21944 case V2DFmode:
21945 case V2DImode:
21946 if (one_var != 0)
21947 return false;
21948 var = force_reg (GET_MODE_INNER (mode), var);
21949 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21950 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21951 return true;
21952
21953 case V4SFmode:
21954 case V4SImode:
21955 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21956 new_target = gen_reg_rtx (mode);
21957 else
21958 new_target = target;
21959 var = force_reg (GET_MODE_INNER (mode), var);
21960 x = gen_rtx_VEC_DUPLICATE (mode, var);
21961 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21962 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21963 if (one_var != 0)
21964 {
21965 /* We need to shuffle the value to the correct position, so
21966 create a new pseudo to store the intermediate result. */
21967
21968 /* With SSE2, we can use the integer shuffle insns. */
21969 if (mode != V4SFmode && TARGET_SSE2)
21970 {
21971 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21972 GEN_INT (1),
21973 GEN_INT (one_var == 1 ? 0 : 1),
21974 GEN_INT (one_var == 2 ? 0 : 1),
21975 GEN_INT (one_var == 3 ? 0 : 1)));
21976 if (target != new_target)
21977 emit_move_insn (target, new_target);
21978 return true;
21979 }
21980
21981 /* Otherwise convert the intermediate result to V4SFmode and
21982 use the SSE1 shuffle instructions. */
21983 if (mode != V4SFmode)
21984 {
21985 tmp = gen_reg_rtx (V4SFmode);
21986 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21987 }
21988 else
21989 tmp = new_target;
21990
21991 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21992 GEN_INT (1),
21993 GEN_INT (one_var == 1 ? 0 : 1),
21994 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21995 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21996
21997 if (mode != V4SFmode)
21998 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21999 else if (tmp != target)
22000 emit_move_insn (target, tmp);
22001 }
22002 else if (target != new_target)
22003 emit_move_insn (target, new_target);
22004 return true;
22005
22006 case V8HImode:
22007 case V16QImode:
22008 vsimode = V4SImode;
22009 goto widen;
22010 case V4HImode:
22011 case V8QImode:
22012 if (!mmx_ok)
22013 return false;
22014 vsimode = V2SImode;
22015 goto widen;
22016 widen:
22017 if (one_var != 0)
22018 return false;
22019
22020 /* Zero extend the variable element to SImode and recurse. */
22021 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
22022
22023 x = gen_reg_rtx (vsimode);
22024 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
22025 var, one_var))
22026 gcc_unreachable ();
22027
22028 emit_move_insn (target, gen_lowpart (mode, x));
22029 return true;
22030
22031 default:
22032 return false;
22033 }
22034 }
22035
22036 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
22037 consisting of the values in VALS. It is known that all elements
22038 except ONE_VAR are constants. Return true if successful. */
22039
22040 static bool
22041 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
22042 rtx target, rtx vals, int one_var)
22043 {
22044 rtx var = XVECEXP (vals, 0, one_var);
22045 enum machine_mode wmode;
22046 rtx const_vec, x;
22047
22048 const_vec = copy_rtx (vals);
22049 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
22050 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
22051
22052 switch (mode)
22053 {
22054 case V2DFmode:
22055 case V2DImode:
22056 case V2SFmode:
22057 case V2SImode:
22058 /* For the two element vectors, it's just as easy to use
22059 the general case. */
22060 return false;
22061
22062 case V4SFmode:
22063 case V4SImode:
22064 case V8HImode:
22065 case V4HImode:
22066 break;
22067
22068 case V16QImode:
22069 wmode = V8HImode;
22070 goto widen;
22071 case V8QImode:
22072 wmode = V4HImode;
22073 goto widen;
22074 widen:
22075 /* There's no way to set one QImode entry easily. Combine
22076 the variable value with its adjacent constant value, and
22077 promote to an HImode set. */
22078 x = XVECEXP (vals, 0, one_var ^ 1);
22079 if (one_var & 1)
22080 {
22081 var = convert_modes (HImode, QImode, var, true);
22082 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
22083 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22084 x = GEN_INT (INTVAL (x) & 0xff);
22085 }
22086 else
22087 {
22088 var = convert_modes (HImode, QImode, var, true);
22089 x = gen_int_mode (INTVAL (x) << 8, HImode);
22090 }
22091 if (x != const0_rtx)
22092 var = expand_simple_binop (HImode, IOR, var, x, var,
22093 1, OPTAB_LIB_WIDEN);
22094
22095 x = gen_reg_rtx (wmode);
22096 emit_move_insn (x, gen_lowpart (wmode, const_vec));
22097 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
22098
22099 emit_move_insn (target, gen_lowpart (mode, x));
22100 return true;
22101
22102 default:
22103 return false;
22104 }
22105
22106 emit_move_insn (target, const_vec);
22107 ix86_expand_vector_set (mmx_ok, target, var, one_var);
22108 return true;
22109 }
22110
22111 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
22112 all values variable, and none identical. */
22113
22114 static void
22115 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
22116 rtx target, rtx vals)
22117 {
22118 enum machine_mode half_mode = GET_MODE_INNER (mode);
22119 rtx op0 = NULL, op1 = NULL;
22120 bool use_vec_concat = false;
22121
22122 switch (mode)
22123 {
22124 case V2SFmode:
22125 case V2SImode:
22126 if (!mmx_ok && !TARGET_SSE)
22127 break;
22128 /* FALLTHRU */
22129
22130 case V2DFmode:
22131 case V2DImode:
22132 /* For the two element vectors, we always implement VEC_CONCAT. */
22133 op0 = XVECEXP (vals, 0, 0);
22134 op1 = XVECEXP (vals, 0, 1);
22135 use_vec_concat = true;
22136 break;
22137
22138 case V4SFmode:
22139 half_mode = V2SFmode;
22140 goto half;
22141 case V4SImode:
22142 half_mode = V2SImode;
22143 goto half;
22144 half:
22145 {
22146 rtvec v;
22147
22148 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
22149 Recurse to load the two halves. */
22150
22151 op0 = gen_reg_rtx (half_mode);
22152 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
22153 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
22154
22155 op1 = gen_reg_rtx (half_mode);
22156 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
22157 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
22158
22159 use_vec_concat = true;
22160 }
22161 break;
22162
22163 case V8HImode:
22164 case V16QImode:
22165 case V4HImode:
22166 case V8QImode:
22167 break;
22168
22169 default:
22170 gcc_unreachable ();
22171 }
22172
22173 if (use_vec_concat)
22174 {
22175 if (!register_operand (op0, half_mode))
22176 op0 = force_reg (half_mode, op0);
22177 if (!register_operand (op1, half_mode))
22178 op1 = force_reg (half_mode, op1);
22179
22180 emit_insn (gen_rtx_SET (VOIDmode, target,
22181 gen_rtx_VEC_CONCAT (mode, op0, op1)));
22182 }
22183 else
22184 {
22185 int i, j, n_elts, n_words, n_elt_per_word;
22186 enum machine_mode inner_mode;
22187 rtx words[4], shift;
22188
22189 inner_mode = GET_MODE_INNER (mode);
22190 n_elts = GET_MODE_NUNITS (mode);
22191 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22192 n_elt_per_word = n_elts / n_words;
22193 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
22194
22195 for (i = 0; i < n_words; ++i)
22196 {
22197 rtx word = NULL_RTX;
22198
22199 for (j = 0; j < n_elt_per_word; ++j)
22200 {
22201 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
22202 elt = convert_modes (word_mode, inner_mode, elt, true);
22203
22204 if (j == 0)
22205 word = elt;
22206 else
22207 {
22208 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
22209 word, 1, OPTAB_LIB_WIDEN);
22210 word = expand_simple_binop (word_mode, IOR, word, elt,
22211 word, 1, OPTAB_LIB_WIDEN);
22212 }
22213 }
22214
22215 words[i] = word;
22216 }
22217
22218 if (n_words == 1)
22219 emit_move_insn (target, gen_lowpart (mode, words[0]));
22220 else if (n_words == 2)
22221 {
22222 rtx tmp = gen_reg_rtx (mode);
22223 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
22224 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
22225 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
22226 emit_move_insn (target, tmp);
22227 }
22228 else if (n_words == 4)
22229 {
22230 rtx tmp = gen_reg_rtx (V4SImode);
22231 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
22232 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
22233 emit_move_insn (target, gen_lowpart (mode, tmp));
22234 }
22235 else
22236 gcc_unreachable ();
22237 }
22238 }
22239
22240 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22241 instructions unless MMX_OK is true. */
22242
22243 void
22244 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
22245 {
22246 enum machine_mode mode = GET_MODE (target);
22247 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22248 int n_elts = GET_MODE_NUNITS (mode);
22249 int n_var = 0, one_var = -1;
22250 bool all_same = true, all_const_zero = true;
22251 int i;
22252 rtx x;
22253
22254 for (i = 0; i < n_elts; ++i)
22255 {
22256 x = XVECEXP (vals, 0, i);
22257 if (!CONSTANT_P (x))
22258 n_var++, one_var = i;
22259 else if (x != CONST0_RTX (inner_mode))
22260 all_const_zero = false;
22261 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
22262 all_same = false;
22263 }
22264
22265 /* Constants are best loaded from the constant pool. */
22266 if (n_var == 0)
22267 {
22268 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
22269 return;
22270 }
22271
22272 /* If all values are identical, broadcast the value. */
22273 if (all_same
22274 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
22275 XVECEXP (vals, 0, 0)))
22276 return;
22277
22278 /* Values where only one field is non-constant are best loaded from
22279 the pool and overwritten via move later. */
22280 if (n_var == 1)
22281 {
22282 if (all_const_zero
22283 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
22284 XVECEXP (vals, 0, one_var),
22285 one_var))
22286 return;
22287
22288 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
22289 return;
22290 }
22291
22292 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
22293 }
22294
22295 void
22296 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
22297 {
22298 enum machine_mode mode = GET_MODE (target);
22299 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22300 bool use_vec_merge = false;
22301 rtx tmp;
22302
22303 switch (mode)
22304 {
22305 case V2SFmode:
22306 case V2SImode:
22307 if (mmx_ok)
22308 {
22309 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22310 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22311 if (elt == 0)
22312 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22313 else
22314 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22315 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22316 return;
22317 }
22318 break;
22319
22320 case V2DImode:
22321 use_vec_merge = TARGET_SSE4_1;
22322 if (use_vec_merge)
22323 break;
22324
22325 case V2DFmode:
22326 {
22327 rtx op0, op1;
22328
22329 /* For the two element vectors, we implement a VEC_CONCAT with
22330 the extraction of the other element. */
22331
22332 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22333 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22334
22335 if (elt == 0)
22336 op0 = val, op1 = tmp;
22337 else
22338 op0 = tmp, op1 = val;
22339
22340 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22341 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22342 }
22343 return;
22344
22345 case V4SFmode:
22346 use_vec_merge = TARGET_SSE4_1;
22347 if (use_vec_merge)
22348 break;
22349
22350 switch (elt)
22351 {
22352 case 0:
22353 use_vec_merge = true;
22354 break;
22355
22356 case 1:
22357 /* tmp = target = A B C D */
22358 tmp = copy_to_reg (target);
22359 /* target = A A B B */
22360 emit_insn (gen_sse_unpcklps (target, target, target));
22361 /* target = X A B B */
22362 ix86_expand_vector_set (false, target, val, 0);
22363 /* target = A X C D */
22364 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22365 GEN_INT (1), GEN_INT (0),
22366 GEN_INT (2+4), GEN_INT (3+4)));
22367 return;
22368
22369 case 2:
22370 /* tmp = target = A B C D */
22371 tmp = copy_to_reg (target);
22372 /* tmp = X B C D */
22373 ix86_expand_vector_set (false, tmp, val, 0);
22374 /* target = A B X D */
22375 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22376 GEN_INT (0), GEN_INT (1),
22377 GEN_INT (0+4), GEN_INT (3+4)));
22378 return;
22379
22380 case 3:
22381 /* tmp = target = A B C D */
22382 tmp = copy_to_reg (target);
22383 /* tmp = X B C D */
22384 ix86_expand_vector_set (false, tmp, val, 0);
22385 /* target = A B X D */
22386 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22387 GEN_INT (0), GEN_INT (1),
22388 GEN_INT (2+4), GEN_INT (0+4)));
22389 return;
22390
22391 default:
22392 gcc_unreachable ();
22393 }
22394 break;
22395
22396 case V4SImode:
22397 use_vec_merge = TARGET_SSE4_1;
22398 if (use_vec_merge)
22399 break;
22400
22401 /* Element 0 handled by vec_merge below. */
22402 if (elt == 0)
22403 {
22404 use_vec_merge = true;
22405 break;
22406 }
22407
22408 if (TARGET_SSE2)
22409 {
22410 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22411 store into element 0, then shuffle them back. */
22412
22413 rtx order[4];
22414
22415 order[0] = GEN_INT (elt);
22416 order[1] = const1_rtx;
22417 order[2] = const2_rtx;
22418 order[3] = GEN_INT (3);
22419 order[elt] = const0_rtx;
22420
22421 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22422 order[1], order[2], order[3]));
22423
22424 ix86_expand_vector_set (false, target, val, 0);
22425
22426 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22427 order[1], order[2], order[3]));
22428 }
22429 else
22430 {
22431 /* For SSE1, we have to reuse the V4SF code. */
22432 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22433 gen_lowpart (SFmode, val), elt);
22434 }
22435 return;
22436
22437 case V8HImode:
22438 use_vec_merge = TARGET_SSE2;
22439 break;
22440 case V4HImode:
22441 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22442 break;
22443
22444 case V16QImode:
22445 use_vec_merge = TARGET_SSE4_1;
22446 break;
22447
22448 case V8QImode:
22449 default:
22450 break;
22451 }
22452
22453 if (use_vec_merge)
22454 {
22455 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22456 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22457 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22458 }
22459 else
22460 {
22461 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22462
22463 emit_move_insn (mem, target);
22464
22465 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22466 emit_move_insn (tmp, val);
22467
22468 emit_move_insn (target, mem);
22469 }
22470 }
22471
22472 void
22473 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22474 {
22475 enum machine_mode mode = GET_MODE (vec);
22476 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22477 bool use_vec_extr = false;
22478 rtx tmp;
22479
22480 switch (mode)
22481 {
22482 case V2SImode:
22483 case V2SFmode:
22484 if (!mmx_ok)
22485 break;
22486 /* FALLTHRU */
22487
22488 case V2DFmode:
22489 case V2DImode:
22490 use_vec_extr = true;
22491 break;
22492
22493 case V4SFmode:
22494 use_vec_extr = TARGET_SSE4_1;
22495 if (use_vec_extr)
22496 break;
22497
22498 switch (elt)
22499 {
22500 case 0:
22501 tmp = vec;
22502 break;
22503
22504 case 1:
22505 case 3:
22506 tmp = gen_reg_rtx (mode);
22507 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22508 GEN_INT (elt), GEN_INT (elt),
22509 GEN_INT (elt+4), GEN_INT (elt+4)));
22510 break;
22511
22512 case 2:
22513 tmp = gen_reg_rtx (mode);
22514 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22515 break;
22516
22517 default:
22518 gcc_unreachable ();
22519 }
22520 vec = tmp;
22521 use_vec_extr = true;
22522 elt = 0;
22523 break;
22524
22525 case V4SImode:
22526 use_vec_extr = TARGET_SSE4_1;
22527 if (use_vec_extr)
22528 break;
22529
22530 if (TARGET_SSE2)
22531 {
22532 switch (elt)
22533 {
22534 case 0:
22535 tmp = vec;
22536 break;
22537
22538 case 1:
22539 case 3:
22540 tmp = gen_reg_rtx (mode);
22541 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22542 GEN_INT (elt), GEN_INT (elt),
22543 GEN_INT (elt), GEN_INT (elt)));
22544 break;
22545
22546 case 2:
22547 tmp = gen_reg_rtx (mode);
22548 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22549 break;
22550
22551 default:
22552 gcc_unreachable ();
22553 }
22554 vec = tmp;
22555 use_vec_extr = true;
22556 elt = 0;
22557 }
22558 else
22559 {
22560 /* For SSE1, we have to reuse the V4SF code. */
22561 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22562 gen_lowpart (V4SFmode, vec), elt);
22563 return;
22564 }
22565 break;
22566
22567 case V8HImode:
22568 use_vec_extr = TARGET_SSE2;
22569 break;
22570 case V4HImode:
22571 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22572 break;
22573
22574 case V16QImode:
22575 use_vec_extr = TARGET_SSE4_1;
22576 break;
22577
22578 case V8QImode:
22579 /* ??? Could extract the appropriate HImode element and shift. */
22580 default:
22581 break;
22582 }
22583
22584 if (use_vec_extr)
22585 {
22586 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22587 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22588
22589 /* Let the rtl optimizers know about the zero extension performed. */
22590 if (inner_mode == QImode || inner_mode == HImode)
22591 {
22592 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22593 target = gen_lowpart (SImode, target);
22594 }
22595
22596 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22597 }
22598 else
22599 {
22600 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22601
22602 emit_move_insn (mem, vec);
22603
22604 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22605 emit_move_insn (target, tmp);
22606 }
22607 }
22608
22609 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22610 pattern to reduce; DEST is the destination; IN is the input vector. */
22611
22612 void
22613 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22614 {
22615 rtx tmp1, tmp2, tmp3;
22616
22617 tmp1 = gen_reg_rtx (V4SFmode);
22618 tmp2 = gen_reg_rtx (V4SFmode);
22619 tmp3 = gen_reg_rtx (V4SFmode);
22620
22621 emit_insn (gen_sse_movhlps (tmp1, in, in));
22622 emit_insn (fn (tmp2, tmp1, in));
22623
22624 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22625 GEN_INT (1), GEN_INT (1),
22626 GEN_INT (1+4), GEN_INT (1+4)));
22627 emit_insn (fn (dest, tmp2, tmp3));
22628 }
22629 \f
22630 /* Target hook for scalar_mode_supported_p. */
22631 static bool
22632 ix86_scalar_mode_supported_p (enum machine_mode mode)
22633 {
22634 if (DECIMAL_FLOAT_MODE_P (mode))
22635 return true;
22636 else if (mode == TFmode)
22637 return TARGET_64BIT;
22638 else
22639 return default_scalar_mode_supported_p (mode);
22640 }
22641
22642 /* Implements target hook vector_mode_supported_p. */
22643 static bool
22644 ix86_vector_mode_supported_p (enum machine_mode mode)
22645 {
22646 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22647 return true;
22648 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22649 return true;
22650 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22651 return true;
22652 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22653 return true;
22654 return false;
22655 }
22656
22657 /* Target hook for c_mode_for_suffix. */
22658 static enum machine_mode
22659 ix86_c_mode_for_suffix (char suffix)
22660 {
22661 if (TARGET_64BIT && suffix == 'q')
22662 return TFmode;
22663 if (TARGET_MMX && suffix == 'w')
22664 return XFmode;
22665
22666 return VOIDmode;
22667 }
22668
22669 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22670
22671 We do this in the new i386 backend to maintain source compatibility
22672 with the old cc0-based compiler. */
22673
22674 static tree
22675 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22676 tree inputs ATTRIBUTE_UNUSED,
22677 tree clobbers)
22678 {
22679 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22680 clobbers);
22681 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22682 clobbers);
22683 return clobbers;
22684 }
22685
22686 /* Implements target vector targetm.asm.encode_section_info. This
22687 is not used by netware. */
22688
22689 static void ATTRIBUTE_UNUSED
22690 ix86_encode_section_info (tree decl, rtx rtl, int first)
22691 {
22692 default_encode_section_info (decl, rtl, first);
22693
22694 if (TREE_CODE (decl) == VAR_DECL
22695 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22696 && ix86_in_large_data_p (decl))
22697 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22698 }
22699
22700 /* Worker function for REVERSE_CONDITION. */
22701
22702 enum rtx_code
22703 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22704 {
22705 return (mode != CCFPmode && mode != CCFPUmode
22706 ? reverse_condition (code)
22707 : reverse_condition_maybe_unordered (code));
22708 }
22709
22710 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22711 to OPERANDS[0]. */
22712
22713 const char *
22714 output_387_reg_move (rtx insn, rtx *operands)
22715 {
22716 if (REG_P (operands[0]))
22717 {
22718 if (REG_P (operands[1])
22719 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22720 {
22721 if (REGNO (operands[0]) == FIRST_STACK_REG)
22722 return output_387_ffreep (operands, 0);
22723 return "fstp\t%y0";
22724 }
22725 if (STACK_TOP_P (operands[0]))
22726 return "fld%z1\t%y1";
22727 return "fst\t%y0";
22728 }
22729 else if (MEM_P (operands[0]))
22730 {
22731 gcc_assert (REG_P (operands[1]));
22732 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22733 return "fstp%z0\t%y0";
22734 else
22735 {
22736 /* There is no non-popping store to memory for XFmode.
22737 So if we need one, follow the store with a load. */
22738 if (GET_MODE (operands[0]) == XFmode)
22739 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22740 else
22741 return "fst%z0\t%y0";
22742 }
22743 }
22744 else
22745 gcc_unreachable();
22746 }
22747
22748 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22749 FP status register is set. */
22750
22751 void
22752 ix86_emit_fp_unordered_jump (rtx label)
22753 {
22754 rtx reg = gen_reg_rtx (HImode);
22755 rtx temp;
22756
22757 emit_insn (gen_x86_fnstsw_1 (reg));
22758
22759 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22760 {
22761 emit_insn (gen_x86_sahf_1 (reg));
22762
22763 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22764 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22765 }
22766 else
22767 {
22768 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22769
22770 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22771 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22772 }
22773
22774 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22775 gen_rtx_LABEL_REF (VOIDmode, label),
22776 pc_rtx);
22777 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22778
22779 emit_jump_insn (temp);
22780 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22781 }
22782
22783 /* Output code to perform a log1p XFmode calculation. */
22784
22785 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22786 {
22787 rtx label1 = gen_label_rtx ();
22788 rtx label2 = gen_label_rtx ();
22789
22790 rtx tmp = gen_reg_rtx (XFmode);
22791 rtx tmp2 = gen_reg_rtx (XFmode);
22792
22793 emit_insn (gen_absxf2 (tmp, op1));
22794 emit_insn (gen_cmpxf (tmp,
22795 CONST_DOUBLE_FROM_REAL_VALUE (
22796 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22797 XFmode)));
22798 emit_jump_insn (gen_bge (label1));
22799
22800 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22801 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22802 emit_jump (label2);
22803
22804 emit_label (label1);
22805 emit_move_insn (tmp, CONST1_RTX (XFmode));
22806 emit_insn (gen_addxf3 (tmp, op1, tmp));
22807 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22808 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22809
22810 emit_label (label2);
22811 }
22812
22813 /* Output code to perform a Newton-Rhapson approximation of a single precision
22814 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22815
22816 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
22817 {
22818 rtx x0, x1, e0, e1, two;
22819
22820 x0 = gen_reg_rtx (mode);
22821 e0 = gen_reg_rtx (mode);
22822 e1 = gen_reg_rtx (mode);
22823 x1 = gen_reg_rtx (mode);
22824
22825 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
22826
22827 if (VECTOR_MODE_P (mode))
22828 two = ix86_build_const_vector (SFmode, true, two);
22829
22830 two = force_reg (mode, two);
22831
22832 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22833
22834 /* x0 = 1./b estimate */
22835 emit_insn (gen_rtx_SET (VOIDmode, x0,
22836 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
22837 UNSPEC_RCP)));
22838 /* e0 = x0 * b */
22839 emit_insn (gen_rtx_SET (VOIDmode, e0,
22840 gen_rtx_MULT (mode, x0, b)));
22841 /* e1 = 2. - e0 */
22842 emit_insn (gen_rtx_SET (VOIDmode, e1,
22843 gen_rtx_MINUS (mode, two, e0)));
22844 /* x1 = x0 * e1 */
22845 emit_insn (gen_rtx_SET (VOIDmode, x1,
22846 gen_rtx_MULT (mode, x0, e1)));
22847 /* res = a * x1 */
22848 emit_insn (gen_rtx_SET (VOIDmode, res,
22849 gen_rtx_MULT (mode, a, x1)));
22850 }
22851
22852 /* Output code to perform a Newton-Rhapson approximation of a
22853 single precision floating point [reciprocal] square root. */
22854
22855 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
22856 bool recip)
22857 {
22858 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
22859
22860 x0 = gen_reg_rtx (mode);
22861 e0 = gen_reg_rtx (mode);
22862 e1 = gen_reg_rtx (mode);
22863 e2 = gen_reg_rtx (mode);
22864 e3 = gen_reg_rtx (mode);
22865
22866 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
22867 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
22868
22869 mask = gen_reg_rtx (mode);
22870
22871 if (VECTOR_MODE_P (mode))
22872 {
22873 three = ix86_build_const_vector (SFmode, true, three);
22874 half = ix86_build_const_vector (SFmode, true, half);
22875 }
22876
22877 three = force_reg (mode, three);
22878 half = force_reg (mode, half);
22879
22880 zero = force_reg (mode, CONST0_RTX(mode));
22881
22882 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22883 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22884
22885 /* Compare a to zero. */
22886 emit_insn (gen_rtx_SET (VOIDmode, mask,
22887 gen_rtx_NE (mode, a, zero)));
22888
22889 /* x0 = 1./sqrt(a) estimate */
22890 emit_insn (gen_rtx_SET (VOIDmode, x0,
22891 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
22892 UNSPEC_RSQRT)));
22893 /* Filter out infinity. */
22894 if (VECTOR_MODE_P (mode))
22895 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
22896 gen_rtx_AND (mode,
22897 gen_lowpart (V4SFmode, x0),
22898 gen_lowpart (V4SFmode, mask))));
22899 else
22900 emit_insn (gen_rtx_SET (VOIDmode, x0,
22901 gen_rtx_AND (mode, x0, mask)));
22902
22903 /* e0 = x0 * a */
22904 emit_insn (gen_rtx_SET (VOIDmode, e0,
22905 gen_rtx_MULT (mode, x0, a)));
22906 /* e1 = e0 * x0 */
22907 emit_insn (gen_rtx_SET (VOIDmode, e1,
22908 gen_rtx_MULT (mode, e0, x0)));
22909 /* e2 = 3. - e1 */
22910 emit_insn (gen_rtx_SET (VOIDmode, e2,
22911 gen_rtx_MINUS (mode, three, e1)));
22912 if (recip)
22913 /* e3 = .5 * x0 */
22914 emit_insn (gen_rtx_SET (VOIDmode, e3,
22915 gen_rtx_MULT (mode, half, x0)));
22916 else
22917 /* e3 = .5 * e0 */
22918 emit_insn (gen_rtx_SET (VOIDmode, e3,
22919 gen_rtx_MULT (mode, half, e0)));
22920 /* ret = e2 * e3 */
22921 emit_insn (gen_rtx_SET (VOIDmode, res,
22922 gen_rtx_MULT (mode, e2, e3)));
22923 }
22924
22925 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22926
22927 static void ATTRIBUTE_UNUSED
22928 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22929 tree decl)
22930 {
22931 /* With Binutils 2.15, the "@unwind" marker must be specified on
22932 every occurrence of the ".eh_frame" section, not just the first
22933 one. */
22934 if (TARGET_64BIT
22935 && strcmp (name, ".eh_frame") == 0)
22936 {
22937 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22938 flags & SECTION_WRITE ? "aw" : "a");
22939 return;
22940 }
22941 default_elf_asm_named_section (name, flags, decl);
22942 }
22943
22944 /* Return the mangling of TYPE if it is an extended fundamental type. */
22945
22946 static const char *
22947 ix86_mangle_type (const_tree type)
22948 {
22949 type = TYPE_MAIN_VARIANT (type);
22950
22951 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
22952 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
22953 return NULL;
22954
22955 switch (TYPE_MODE (type))
22956 {
22957 case TFmode:
22958 /* __float128 is "g". */
22959 return "g";
22960 case XFmode:
22961 /* "long double" or __float80 is "e". */
22962 return "e";
22963 default:
22964 return NULL;
22965 }
22966 }
22967
22968 /* For 32-bit code we can save PIC register setup by using
22969 __stack_chk_fail_local hidden function instead of calling
22970 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22971 register, so it is better to call __stack_chk_fail directly. */
22972
22973 static tree
22974 ix86_stack_protect_fail (void)
22975 {
22976 return TARGET_64BIT
22977 ? default_external_stack_protect_fail ()
22978 : default_hidden_stack_protect_fail ();
22979 }
22980
22981 /* Select a format to encode pointers in exception handling data. CODE
22982 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22983 true if the symbol may be affected by dynamic relocations.
22984
22985 ??? All x86 object file formats are capable of representing this.
22986 After all, the relocation needed is the same as for the call insn.
22987 Whether or not a particular assembler allows us to enter such, I
22988 guess we'll have to see. */
22989 int
22990 asm_preferred_eh_data_format (int code, int global)
22991 {
22992 if (flag_pic)
22993 {
22994 int type = DW_EH_PE_sdata8;
22995 if (!TARGET_64BIT
22996 || ix86_cmodel == CM_SMALL_PIC
22997 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22998 type = DW_EH_PE_sdata4;
22999 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
23000 }
23001 if (ix86_cmodel == CM_SMALL
23002 || (ix86_cmodel == CM_MEDIUM && code))
23003 return DW_EH_PE_udata4;
23004 return DW_EH_PE_absptr;
23005 }
23006 \f
23007 /* Expand copysign from SIGN to the positive value ABS_VALUE
23008 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
23009 the sign-bit. */
23010 static void
23011 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
23012 {
23013 enum machine_mode mode = GET_MODE (sign);
23014 rtx sgn = gen_reg_rtx (mode);
23015 if (mask == NULL_RTX)
23016 {
23017 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
23018 if (!VECTOR_MODE_P (mode))
23019 {
23020 /* We need to generate a scalar mode mask in this case. */
23021 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
23022 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
23023 mask = gen_reg_rtx (mode);
23024 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
23025 }
23026 }
23027 else
23028 mask = gen_rtx_NOT (mode, mask);
23029 emit_insn (gen_rtx_SET (VOIDmode, sgn,
23030 gen_rtx_AND (mode, mask, sign)));
23031 emit_insn (gen_rtx_SET (VOIDmode, result,
23032 gen_rtx_IOR (mode, abs_value, sgn)));
23033 }
23034
23035 /* Expand fabs (OP0) and return a new rtx that holds the result. The
23036 mask for masking out the sign-bit is stored in *SMASK, if that is
23037 non-null. */
23038 static rtx
23039 ix86_expand_sse_fabs (rtx op0, rtx *smask)
23040 {
23041 enum machine_mode mode = GET_MODE (op0);
23042 rtx xa, mask;
23043
23044 xa = gen_reg_rtx (mode);
23045 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
23046 if (!VECTOR_MODE_P (mode))
23047 {
23048 /* We need to generate a scalar mode mask in this case. */
23049 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
23050 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
23051 mask = gen_reg_rtx (mode);
23052 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
23053 }
23054 emit_insn (gen_rtx_SET (VOIDmode, xa,
23055 gen_rtx_AND (mode, op0, mask)));
23056
23057 if (smask)
23058 *smask = mask;
23059
23060 return xa;
23061 }
23062
23063 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
23064 swapping the operands if SWAP_OPERANDS is true. The expanded
23065 code is a forward jump to a newly created label in case the
23066 comparison is true. The generated label rtx is returned. */
23067 static rtx
23068 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
23069 bool swap_operands)
23070 {
23071 rtx label, tmp;
23072
23073 if (swap_operands)
23074 {
23075 tmp = op0;
23076 op0 = op1;
23077 op1 = tmp;
23078 }
23079
23080 label = gen_label_rtx ();
23081 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
23082 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23083 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
23084 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
23085 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23086 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
23087 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23088 JUMP_LABEL (tmp) = label;
23089
23090 return label;
23091 }
23092
23093 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
23094 using comparison code CODE. Operands are swapped for the comparison if
23095 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
23096 static rtx
23097 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
23098 bool swap_operands)
23099 {
23100 enum machine_mode mode = GET_MODE (op0);
23101 rtx mask = gen_reg_rtx (mode);
23102
23103 if (swap_operands)
23104 {
23105 rtx tmp = op0;
23106 op0 = op1;
23107 op1 = tmp;
23108 }
23109
23110 if (mode == DFmode)
23111 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
23112 gen_rtx_fmt_ee (code, mode, op0, op1)));
23113 else
23114 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
23115 gen_rtx_fmt_ee (code, mode, op0, op1)));
23116
23117 return mask;
23118 }
23119
23120 /* Generate and return a rtx of mode MODE for 2**n where n is the number
23121 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
23122 static rtx
23123 ix86_gen_TWO52 (enum machine_mode mode)
23124 {
23125 REAL_VALUE_TYPE TWO52r;
23126 rtx TWO52;
23127
23128 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
23129 TWO52 = const_double_from_real_value (TWO52r, mode);
23130 TWO52 = force_reg (mode, TWO52);
23131
23132 return TWO52;
23133 }
23134
23135 /* Expand SSE sequence for computing lround from OP1 storing
23136 into OP0. */
23137 void
23138 ix86_expand_lround (rtx op0, rtx op1)
23139 {
23140 /* C code for the stuff we're doing below:
23141 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
23142 return (long)tmp;
23143 */
23144 enum machine_mode mode = GET_MODE (op1);
23145 const struct real_format *fmt;
23146 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23147 rtx adj;
23148
23149 /* load nextafter (0.5, 0.0) */
23150 fmt = REAL_MODE_FORMAT (mode);
23151 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
23152 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23153
23154 /* adj = copysign (0.5, op1) */
23155 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
23156 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
23157
23158 /* adj = op1 + adj */
23159 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
23160
23161 /* op0 = (imode)adj */
23162 expand_fix (op0, adj, 0);
23163 }
23164
23165 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
23166 into OPERAND0. */
23167 void
23168 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
23169 {
23170 /* C code for the stuff we're doing below (for do_floor):
23171 xi = (long)op1;
23172 xi -= (double)xi > op1 ? 1 : 0;
23173 return xi;
23174 */
23175 enum machine_mode fmode = GET_MODE (op1);
23176 enum machine_mode imode = GET_MODE (op0);
23177 rtx ireg, freg, label, tmp;
23178
23179 /* reg = (long)op1 */
23180 ireg = gen_reg_rtx (imode);
23181 expand_fix (ireg, op1, 0);
23182
23183 /* freg = (double)reg */
23184 freg = gen_reg_rtx (fmode);
23185 expand_float (freg, ireg, 0);
23186
23187 /* ireg = (freg > op1) ? ireg - 1 : ireg */
23188 label = ix86_expand_sse_compare_and_jump (UNLE,
23189 freg, op1, !do_floor);
23190 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
23191 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
23192 emit_move_insn (ireg, tmp);
23193
23194 emit_label (label);
23195 LABEL_NUSES (label) = 1;
23196
23197 emit_move_insn (op0, ireg);
23198 }
23199
23200 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23201 result in OPERAND0. */
23202 void
23203 ix86_expand_rint (rtx operand0, rtx operand1)
23204 {
23205 /* C code for the stuff we're doing below:
23206 xa = fabs (operand1);
23207 if (!isless (xa, 2**52))
23208 return operand1;
23209 xa = xa + 2**52 - 2**52;
23210 return copysign (xa, operand1);
23211 */
23212 enum machine_mode mode = GET_MODE (operand0);
23213 rtx res, xa, label, TWO52, mask;
23214
23215 res = gen_reg_rtx (mode);
23216 emit_move_insn (res, operand1);
23217
23218 /* xa = abs (operand1) */
23219 xa = ix86_expand_sse_fabs (res, &mask);
23220
23221 /* if (!isless (xa, TWO52)) goto label; */
23222 TWO52 = ix86_gen_TWO52 (mode);
23223 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23224
23225 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23226 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23227
23228 ix86_sse_copysign_to_positive (res, xa, res, mask);
23229
23230 emit_label (label);
23231 LABEL_NUSES (label) = 1;
23232
23233 emit_move_insn (operand0, res);
23234 }
23235
23236 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23237 into OPERAND0. */
23238 void
23239 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
23240 {
23241 /* C code for the stuff we expand below.
23242 double xa = fabs (x), x2;
23243 if (!isless (xa, TWO52))
23244 return x;
23245 xa = xa + TWO52 - TWO52;
23246 x2 = copysign (xa, x);
23247 Compensate. Floor:
23248 if (x2 > x)
23249 x2 -= 1;
23250 Compensate. Ceil:
23251 if (x2 < x)
23252 x2 -= -1;
23253 return x2;
23254 */
23255 enum machine_mode mode = GET_MODE (operand0);
23256 rtx xa, TWO52, tmp, label, one, res, mask;
23257
23258 TWO52 = ix86_gen_TWO52 (mode);
23259
23260 /* Temporary for holding the result, initialized to the input
23261 operand to ease control flow. */
23262 res = gen_reg_rtx (mode);
23263 emit_move_insn (res, operand1);
23264
23265 /* xa = abs (operand1) */
23266 xa = ix86_expand_sse_fabs (res, &mask);
23267
23268 /* if (!isless (xa, TWO52)) goto label; */
23269 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23270
23271 /* xa = xa + TWO52 - TWO52; */
23272 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23273 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23274
23275 /* xa = copysign (xa, operand1) */
23276 ix86_sse_copysign_to_positive (xa, xa, res, mask);
23277
23278 /* generate 1.0 or -1.0 */
23279 one = force_reg (mode,
23280 const_double_from_real_value (do_floor
23281 ? dconst1 : dconstm1, mode));
23282
23283 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23284 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23285 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23286 gen_rtx_AND (mode, one, tmp)));
23287 /* We always need to subtract here to preserve signed zero. */
23288 tmp = expand_simple_binop (mode, MINUS,
23289 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23290 emit_move_insn (res, tmp);
23291
23292 emit_label (label);
23293 LABEL_NUSES (label) = 1;
23294
23295 emit_move_insn (operand0, res);
23296 }
23297
23298 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23299 into OPERAND0. */
23300 void
23301 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
23302 {
23303 /* C code for the stuff we expand below.
23304 double xa = fabs (x), x2;
23305 if (!isless (xa, TWO52))
23306 return x;
23307 x2 = (double)(long)x;
23308 Compensate. Floor:
23309 if (x2 > x)
23310 x2 -= 1;
23311 Compensate. Ceil:
23312 if (x2 < x)
23313 x2 += 1;
23314 if (HONOR_SIGNED_ZEROS (mode))
23315 return copysign (x2, x);
23316 return x2;
23317 */
23318 enum machine_mode mode = GET_MODE (operand0);
23319 rtx xa, xi, TWO52, tmp, label, one, res, mask;
23320
23321 TWO52 = ix86_gen_TWO52 (mode);
23322
23323 /* Temporary for holding the result, initialized to the input
23324 operand to ease control flow. */
23325 res = gen_reg_rtx (mode);
23326 emit_move_insn (res, operand1);
23327
23328 /* xa = abs (operand1) */
23329 xa = ix86_expand_sse_fabs (res, &mask);
23330
23331 /* if (!isless (xa, TWO52)) goto label; */
23332 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23333
23334 /* xa = (double)(long)x */
23335 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23336 expand_fix (xi, res, 0);
23337 expand_float (xa, xi, 0);
23338
23339 /* generate 1.0 */
23340 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23341
23342 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23343 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23344 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23345 gen_rtx_AND (mode, one, tmp)));
23346 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
23347 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23348 emit_move_insn (res, tmp);
23349
23350 if (HONOR_SIGNED_ZEROS (mode))
23351 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23352
23353 emit_label (label);
23354 LABEL_NUSES (label) = 1;
23355
23356 emit_move_insn (operand0, res);
23357 }
23358
23359 /* Expand SSE sequence for computing round from OPERAND1 storing
23360 into OPERAND0. Sequence that works without relying on DImode truncation
23361 via cvttsd2siq that is only available on 64bit targets. */
23362 void
23363 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
23364 {
23365 /* C code for the stuff we expand below.
23366 double xa = fabs (x), xa2, x2;
23367 if (!isless (xa, TWO52))
23368 return x;
23369 Using the absolute value and copying back sign makes
23370 -0.0 -> -0.0 correct.
23371 xa2 = xa + TWO52 - TWO52;
23372 Compensate.
23373 dxa = xa2 - xa;
23374 if (dxa <= -0.5)
23375 xa2 += 1;
23376 else if (dxa > 0.5)
23377 xa2 -= 1;
23378 x2 = copysign (xa2, x);
23379 return x2;
23380 */
23381 enum machine_mode mode = GET_MODE (operand0);
23382 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
23383
23384 TWO52 = ix86_gen_TWO52 (mode);
23385
23386 /* Temporary for holding the result, initialized to the input
23387 operand to ease control flow. */
23388 res = gen_reg_rtx (mode);
23389 emit_move_insn (res, operand1);
23390
23391 /* xa = abs (operand1) */
23392 xa = ix86_expand_sse_fabs (res, &mask);
23393
23394 /* if (!isless (xa, TWO52)) goto label; */
23395 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23396
23397 /* xa2 = xa + TWO52 - TWO52; */
23398 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23399 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
23400
23401 /* dxa = xa2 - xa; */
23402 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
23403
23404 /* generate 0.5, 1.0 and -0.5 */
23405 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
23406 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
23407 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
23408 0, OPTAB_DIRECT);
23409
23410 /* Compensate. */
23411 tmp = gen_reg_rtx (mode);
23412 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23413 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
23414 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23415 gen_rtx_AND (mode, one, tmp)));
23416 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23417 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23418 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
23419 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23420 gen_rtx_AND (mode, one, tmp)));
23421 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23422
23423 /* res = copysign (xa2, operand1) */
23424 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
23425
23426 emit_label (label);
23427 LABEL_NUSES (label) = 1;
23428
23429 emit_move_insn (operand0, res);
23430 }
23431
23432 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23433 into OPERAND0. */
23434 void
23435 ix86_expand_trunc (rtx operand0, rtx operand1)
23436 {
23437 /* C code for SSE variant we expand below.
23438 double xa = fabs (x), x2;
23439 if (!isless (xa, TWO52))
23440 return x;
23441 x2 = (double)(long)x;
23442 if (HONOR_SIGNED_ZEROS (mode))
23443 return copysign (x2, x);
23444 return x2;
23445 */
23446 enum machine_mode mode = GET_MODE (operand0);
23447 rtx xa, xi, TWO52, label, res, mask;
23448
23449 TWO52 = ix86_gen_TWO52 (mode);
23450
23451 /* Temporary for holding the result, initialized to the input
23452 operand to ease control flow. */
23453 res = gen_reg_rtx (mode);
23454 emit_move_insn (res, operand1);
23455
23456 /* xa = abs (operand1) */
23457 xa = ix86_expand_sse_fabs (res, &mask);
23458
23459 /* if (!isless (xa, TWO52)) goto label; */
23460 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23461
23462 /* x = (double)(long)x */
23463 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23464 expand_fix (xi, res, 0);
23465 expand_float (res, xi, 0);
23466
23467 if (HONOR_SIGNED_ZEROS (mode))
23468 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23469
23470 emit_label (label);
23471 LABEL_NUSES (label) = 1;
23472
23473 emit_move_insn (operand0, res);
23474 }
23475
23476 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23477 into OPERAND0. */
23478 void
23479 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23480 {
23481 enum machine_mode mode = GET_MODE (operand0);
23482 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23483
23484 /* C code for SSE variant we expand below.
23485 double xa = fabs (x), x2;
23486 if (!isless (xa, TWO52))
23487 return x;
23488 xa2 = xa + TWO52 - TWO52;
23489 Compensate:
23490 if (xa2 > xa)
23491 xa2 -= 1.0;
23492 x2 = copysign (xa2, x);
23493 return x2;
23494 */
23495
23496 TWO52 = ix86_gen_TWO52 (mode);
23497
23498 /* Temporary for holding the result, initialized to the input
23499 operand to ease control flow. */
23500 res = gen_reg_rtx (mode);
23501 emit_move_insn (res, operand1);
23502
23503 /* xa = abs (operand1) */
23504 xa = ix86_expand_sse_fabs (res, &smask);
23505
23506 /* if (!isless (xa, TWO52)) goto label; */
23507 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23508
23509 /* res = xa + TWO52 - TWO52; */
23510 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23511 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23512 emit_move_insn (res, tmp);
23513
23514 /* generate 1.0 */
23515 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23516
23517 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23518 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23519 emit_insn (gen_rtx_SET (VOIDmode, mask,
23520 gen_rtx_AND (mode, mask, one)));
23521 tmp = expand_simple_binop (mode, MINUS,
23522 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23523 emit_move_insn (res, tmp);
23524
23525 /* res = copysign (res, operand1) */
23526 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23527
23528 emit_label (label);
23529 LABEL_NUSES (label) = 1;
23530
23531 emit_move_insn (operand0, res);
23532 }
23533
23534 /* Expand SSE sequence for computing round from OPERAND1 storing
23535 into OPERAND0. */
23536 void
23537 ix86_expand_round (rtx operand0, rtx operand1)
23538 {
23539 /* C code for the stuff we're doing below:
23540 double xa = fabs (x);
23541 if (!isless (xa, TWO52))
23542 return x;
23543 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23544 return copysign (xa, x);
23545 */
23546 enum machine_mode mode = GET_MODE (operand0);
23547 rtx res, TWO52, xa, label, xi, half, mask;
23548 const struct real_format *fmt;
23549 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23550
23551 /* Temporary for holding the result, initialized to the input
23552 operand to ease control flow. */
23553 res = gen_reg_rtx (mode);
23554 emit_move_insn (res, operand1);
23555
23556 TWO52 = ix86_gen_TWO52 (mode);
23557 xa = ix86_expand_sse_fabs (res, &mask);
23558 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23559
23560 /* load nextafter (0.5, 0.0) */
23561 fmt = REAL_MODE_FORMAT (mode);
23562 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
23563 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23564
23565 /* xa = xa + 0.5 */
23566 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23567 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23568
23569 /* xa = (double)(int64_t)xa */
23570 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23571 expand_fix (xi, xa, 0);
23572 expand_float (xa, xi, 0);
23573
23574 /* res = copysign (xa, operand1) */
23575 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23576
23577 emit_label (label);
23578 LABEL_NUSES (label) = 1;
23579
23580 emit_move_insn (operand0, res);
23581 }
23582
23583 \f
23584 /* Table of valid machine attributes. */
23585 static const struct attribute_spec ix86_attribute_table[] =
23586 {
23587 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23588 /* Stdcall attribute says callee is responsible for popping arguments
23589 if they are not variable. */
23590 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23591 /* Fastcall attribute says callee is responsible for popping arguments
23592 if they are not variable. */
23593 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23594 /* Cdecl attribute says the callee is a normal C declaration */
23595 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23596 /* Regparm attribute specifies how many integer arguments are to be
23597 passed in registers. */
23598 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23599 /* Sseregparm attribute says we are using x86_64 calling conventions
23600 for FP arguments. */
23601 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23602 /* force_align_arg_pointer says this function realigns the stack at entry. */
23603 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23604 false, true, true, ix86_handle_cconv_attribute },
23605 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23606 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23607 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23608 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23609 #endif
23610 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23611 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23612 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23613 SUBTARGET_ATTRIBUTE_TABLE,
23614 #endif
23615 { NULL, 0, 0, false, false, false, NULL }
23616 };
23617
23618 /* Initialize the GCC target structure. */
23619 #undef TARGET_ATTRIBUTE_TABLE
23620 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23621 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23622 # undef TARGET_MERGE_DECL_ATTRIBUTES
23623 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23624 #endif
23625
23626 #undef TARGET_COMP_TYPE_ATTRIBUTES
23627 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23628
23629 #undef TARGET_INIT_BUILTINS
23630 #define TARGET_INIT_BUILTINS ix86_init_builtins
23631 #undef TARGET_EXPAND_BUILTIN
23632 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23633
23634 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23635 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23636 ix86_builtin_vectorized_function
23637
23638 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23639 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23640
23641 #undef TARGET_BUILTIN_RECIPROCAL
23642 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23643
23644 #undef TARGET_ASM_FUNCTION_EPILOGUE
23645 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23646
23647 #undef TARGET_ENCODE_SECTION_INFO
23648 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23649 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23650 #else
23651 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23652 #endif
23653
23654 #undef TARGET_ASM_OPEN_PAREN
23655 #define TARGET_ASM_OPEN_PAREN ""
23656 #undef TARGET_ASM_CLOSE_PAREN
23657 #define TARGET_ASM_CLOSE_PAREN ""
23658
23659 #undef TARGET_ASM_ALIGNED_HI_OP
23660 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23661 #undef TARGET_ASM_ALIGNED_SI_OP
23662 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23663 #ifdef ASM_QUAD
23664 #undef TARGET_ASM_ALIGNED_DI_OP
23665 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23666 #endif
23667
23668 #undef TARGET_ASM_UNALIGNED_HI_OP
23669 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23670 #undef TARGET_ASM_UNALIGNED_SI_OP
23671 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23672 #undef TARGET_ASM_UNALIGNED_DI_OP
23673 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23674
23675 #undef TARGET_SCHED_ADJUST_COST
23676 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23677 #undef TARGET_SCHED_ISSUE_RATE
23678 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23679 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23680 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23681 ia32_multipass_dfa_lookahead
23682
23683 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23684 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23685
23686 #ifdef HAVE_AS_TLS
23687 #undef TARGET_HAVE_TLS
23688 #define TARGET_HAVE_TLS true
23689 #endif
23690 #undef TARGET_CANNOT_FORCE_CONST_MEM
23691 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23692 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23693 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23694
23695 #undef TARGET_DELEGITIMIZE_ADDRESS
23696 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23697
23698 #undef TARGET_MS_BITFIELD_LAYOUT_P
23699 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23700
23701 #if TARGET_MACHO
23702 #undef TARGET_BINDS_LOCAL_P
23703 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23704 #endif
23705 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23706 #undef TARGET_BINDS_LOCAL_P
23707 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23708 #endif
23709
23710 #undef TARGET_ASM_OUTPUT_MI_THUNK
23711 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23712 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23713 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23714
23715 #undef TARGET_ASM_FILE_START
23716 #define TARGET_ASM_FILE_START x86_file_start
23717
23718 #undef TARGET_DEFAULT_TARGET_FLAGS
23719 #define TARGET_DEFAULT_TARGET_FLAGS \
23720 (TARGET_DEFAULT \
23721 | TARGET_SUBTARGET_DEFAULT \
23722 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23723
23724 #undef TARGET_HANDLE_OPTION
23725 #define TARGET_HANDLE_OPTION ix86_handle_option
23726
23727 #undef TARGET_RTX_COSTS
23728 #define TARGET_RTX_COSTS ix86_rtx_costs
23729 #undef TARGET_ADDRESS_COST
23730 #define TARGET_ADDRESS_COST ix86_address_cost
23731
23732 #undef TARGET_FIXED_CONDITION_CODE_REGS
23733 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23734 #undef TARGET_CC_MODES_COMPATIBLE
23735 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23736
23737 #undef TARGET_MACHINE_DEPENDENT_REORG
23738 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23739
23740 #undef TARGET_BUILD_BUILTIN_VA_LIST
23741 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23742
23743 #undef TARGET_MD_ASM_CLOBBERS
23744 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23745
23746 #undef TARGET_PROMOTE_PROTOTYPES
23747 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23748 #undef TARGET_STRUCT_VALUE_RTX
23749 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23750 #undef TARGET_SETUP_INCOMING_VARARGS
23751 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23752 #undef TARGET_MUST_PASS_IN_STACK
23753 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23754 #undef TARGET_PASS_BY_REFERENCE
23755 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23756 #undef TARGET_INTERNAL_ARG_POINTER
23757 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23758 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23759 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23760 #undef TARGET_STRICT_ARGUMENT_NAMING
23761 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23762
23763 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23764 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23765
23766 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23767 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23768
23769 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23770 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23771
23772 #undef TARGET_C_MODE_FOR_SUFFIX
23773 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23774
23775 #ifdef HAVE_AS_TLS
23776 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23777 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23778 #endif
23779
23780 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23781 #undef TARGET_INSERT_ATTRIBUTES
23782 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23783 #endif
23784
23785 #undef TARGET_MANGLE_TYPE
23786 #define TARGET_MANGLE_TYPE ix86_mangle_type
23787
23788 #undef TARGET_STACK_PROTECT_FAIL
23789 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23790
23791 #undef TARGET_FUNCTION_VALUE
23792 #define TARGET_FUNCTION_VALUE ix86_function_value
23793
23794 struct gcc_target targetm = TARGET_INITIALIZER;
23795 \f
23796 #include "gt-i386.h"